Oozie Hortonworks (Sandbox)
Enable EXTJS for Oozie Admin UI.
$ wget http://archive.cloudera.com/gplextras/misc/ext-2.2.zip
sudo cp ext-2.2.zip /usr/hdp/current/oozie-client/libext/
sudo chown oozie:hadoop /usr/hdp/current/oozie-client/libext/ext-2.2.zip
sudo -u oozie /usr/hdp/current/oozie-server/bin/oozie-setup.sh prepare-war
Restart Oozie services from Ambari.
Create a location directory inside which create the following file.
$ mkdir oozie-example
# File: query-employee.hql
create external table if not exists employee_staging(
id string,
name string,
age int,
deleted boolean,
lmd string
)
row format delimited
fields terminated by ','
stored as textfile
location '/staging/mysql/retail_db/employee';
# File: job.proprties
nameNode = hdfs://sandbox-hdp.hortonworks.com
jobTracker = sandbox-hdp.hortonworks.com:8050
oozie.wf.application.path=${nameNode}/sqoop/oozie-example/worflow.xml
# File: workflow.xml
<?xml version="1.0" encoding="UTF-8"?>
<workflow-app xmlns="uri:oozie:workflow:0.4" name="simple-Workflow">
<start to="incrental_sqoop_import" />
<action name="incrental_sqoop_import">
<sqoop xmlns="uri:oozie:sqoop-action:0.4">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<command>job --exec import-employee</command>
</sqoop>
<ok to="run_hive_query" />
<error to="kill_job" />
</action>
<action name="run_hive_query">
<hive xmlns="uri:oozie:hive-action:0.4">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<script>/sqoop/oozie-example/query-employee.hql</script>
</hive>
<ok to="end" />
<error to="kill_job" />
</action>
<kill name="kill_job">
<message>Failed, Error Message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<end name="end" />
</workflow-app>
The folder structure
[root@sandbox-hdp oozie-example]# tree .
.
├── job.properties
├── query-employee.hql
└── workflow.xml
Upload the files in HDFS
[root@sandbox-hdp ~]# sudo -u hdfs hadoop fs -mkdir /sqoop
[root@sandbox-hdp ~]# sudo -u hdfs hadoop fs -chmod 777 /sqoop
[root@sandbox-hdp ~]# sudo -u hdfs hadoop fs -chmod 777 /
[root@sandbox-hdp ~]# sudo -u hdfs hadoop fs -chmod 777 /user
[root@sandbox-hdp oozie-example]# sudo -u yarn hadoop fs -mkdir -p /sqoop/oozie-example
[root@sandbox-hdp oozie-example]# echo -n "hortonworks1" > /tmp/.password
[root@sandbox-hdp oozie-example]# sudo -u yarn hadoop fs -put /tmp/.password /sqoop
[root@sandbox-hdp oozie-example]# sudo -u yarn hadoop fs -put -f * /sqoop/oozie-example
Check status of the Oozie
[root@sandbox-hdp oozie-example]# oozie admin -oozie http://localhost:11000/oozie -status
System mode: NORMAL
Submit a job
[root@sandbox-hdp oozie-example]# oozie job -oozie http://sandbox-hdp.hortonworks.com:11000/oozie -run -config job.properties
job: 0000000-191112173627164-oozie-oozi-W
Check status of the job
[root@sandbox-hdp oozie-example]# oozie job -oozie http://sandbox-hdp.hortonworks.com:11000/oozie -info 0000000-191112173627164-oozie-oozi-W
Job ID : 0000000-191112173627164-oozie-oozi-W
------------------------------------------------------------------------------------------------------------------------------------
Workflow Name : simple-Workflow
App Path : hdfs://sandbox-hdp.hortonworks.com/sqoop/oozie-example
Status : RUNNING
Run : 0
User : root
Group : -
Created : 2019-11-12 17:46 GMT
Started : 2019-11-12 17:46 GMT
Last Modified : 2019-11-12 17:46 GMT
Ended : -
CoordAction ID: -
Actions
------------------------------------------------------------------------------------------------------------------------------------
ID Status Ext ID Ext Status Err Code
------------------------------------------------------------------------------------------------------------------------------------
0000000-191112173627164-oozie-oozi-W@:start: OK - OK -
------------------------------------------------------------------------------------------------------------------------------------
0000000-191112173627164-oozie-oozi-W@incrental_sqoop_import PREP - - -
------------------------------------------------------------------------------------------------------------------------------------
Scheduling Job
For scheduling job, Oozie has a component called coordinator. Create the following files.
# File: coordinator.properties
frequency=60
startTime=2019-11-13T00:00Z
endTime=2019-11-13T12:00Z
nameNode = hdfs://sandbox-hdp.hortonworks.com
jobTracker = sandbox-hdp.hortonworks.com:8050
oozie.wf.application.path=${nameNode}/sqoop/oozie-example
oozie.coor.application.path=${nameNode}/sqoop/oozie-example/coordinator.xml
Note: update startTime and endTime based on the current time.
# File: coordinator.xml
<?xml version="1.0" encoding="UTF-8"?>
<coordinator-app xmlns="uri:oozie:coordinator:0.4" name="oozie-coordinator-example" frequency="${coord:days(1)}" start="${startTime}" end="${endTime}" timezone="America/Los_Angeles">
<action>
<workflow>
<app-path>${oozie.wf.application.path}</app-path>
</workflow>
</action>
</coordinator-app>
Here is the dir structure
[root@sandbox-hdp oozie-example]# tree .
.
├── coordinator.properties
├── coordinator.xml
├── job.properties
├── query-employee.hql
└── workflow.xml
Upload all files to HDFS
$ sudo -u yarn hadoop fs -put * /sqoop/oozie-example
Save the job.
[root@sandbox-hdp oozie-example]# oozie job -oozie http://sandbox-hdp.hortonworks.com:11000/oozie -run -config coordinator.properties