Oozie Hortonworks (Sandbox)

Enable EXTJS for Oozie Admin UI.

$ wget http://archive.cloudera.com/gplextras/misc/ext-2.2.zip
sudo cp ext-2.2.zip /usr/hdp/current/oozie-client/libext/
sudo chown oozie:hadoop /usr/hdp/current/oozie-client/libext/ext-2.2.zip
sudo -u oozie /usr/hdp/current/oozie-server/bin/oozie-setup.sh prepare-war

Restart Oozie services from Ambari.


Create a location directory inside which create the following file.

$ mkdir oozie-example


# File: query-employee.hql
create external table if not exists employee_staging(
   id string,
   name string,
   age int,
   deleted boolean,
   lmd string
)
row format delimited
fields terminated by ','
stored as textfile
location '/staging/mysql/retail_db/employee';


# File: job.proprties
nameNode = hdfs://sandbox-hdp.hortonworks.com
jobTracker = sandbox-hdp.hortonworks.com:8050
oozie.wf.application.path=${nameNode}/sqoop/oozie-example/worflow.xml


# File: workflow.xml
<?xml version="1.0" encoding="UTF-8"?>
<workflow-app xmlns="uri:oozie:workflow:0.4" name="simple-Workflow">
   <start to="incrental_sqoop_import" />
   <action name="incrental_sqoop_import">
      <sqoop xmlns="uri:oozie:sqoop-action:0.4">
         <job-tracker>${jobTracker}</job-tracker>
         <name-node>${nameNode}</name-node>
         <command>job --exec import-employee</command>
      </sqoop>
      <ok to="run_hive_query" />
      <error to="kill_job" />
   </action>
   <action name="run_hive_query">
      <hive xmlns="uri:oozie:hive-action:0.4">
         <job-tracker>${jobTracker}</job-tracker>
         <name-node>${nameNode}</name-node>
         <script>/sqoop/oozie-example/query-employee.hql</script>
      </hive>
      <ok to="end" />
      <error to="kill_job" />
   </action>
   <kill name="kill_job">
      <message>Failed, Error Message[${wf:errorMessage(wf:lastErrorNode())}]</message>
   </kill>
   <end name="end" />
</workflow-app>


The folder structure

[root@sandbox-hdp oozie-example]# tree .
.
├── job.properties
├── query-employee.hql
└── workflow.xml


Upload the files in HDFS


[root@sandbox-hdp ~]# sudo -u hdfs hadoop fs -mkdir /sqoop
[root@sandbox-hdp ~]# sudo -u hdfs hadoop fs -chmod 777 /sqoop
[root@sandbox-hdp ~]# sudo -u hdfs hadoop fs -chmod 777 /
[root@sandbox-hdp ~]# sudo -u hdfs hadoop fs -chmod 777 /user
[root@sandbox-hdp oozie-example]# sudo -u yarn hadoop fs -mkdir -p /sqoop/oozie-example
[root@sandbox-hdp oozie-example]# echo -n "hortonworks1" > /tmp/.password
[root@sandbox-hdp oozie-example]# sudo -u yarn hadoop fs -put /tmp/.password /sqoop
[root@sandbox-hdp oozie-example]# sudo -u yarn hadoop fs -put -f * /sqoop/oozie-example


Check status of the Oozie

[root@sandbox-hdp oozie-example]# oozie admin -oozie http://localhost:11000/oozie -status
System mode: NORMAL


Submit a job

[root@sandbox-hdp oozie-example]# oozie job -oozie http://sandbox-hdp.hortonworks.com:11000/oozie -run -config job.properties
job: 0000000-191112173627164-oozie-oozi-W


Check status of the job

[root@sandbox-hdp oozie-example]# oozie job -oozie http://sandbox-hdp.hortonworks.com:11000/oozie -info  0000000-191112173627164-oozie-oozi-W
Job ID : 0000000-191112173627164-oozie-oozi-W
------------------------------------------------------------------------------------------------------------------------------------
Workflow Name : simple-Workflow
App Path      : hdfs://sandbox-hdp.hortonworks.com/sqoop/oozie-example
Status        : RUNNING
Run           : 0
User          : root
Group         : -
Created       : 2019-11-12 17:46 GMT
Started       : 2019-11-12 17:46 GMT
Last Modified : 2019-11-12 17:46 GMT
Ended         : -
CoordAction ID: -

Actions
------------------------------------------------------------------------------------------------------------------------------------
ID                                                                            Status    Ext ID                 Ext Status Err Code  
------------------------------------------------------------------------------------------------------------------------------------
0000000-191112173627164-oozie-oozi-W@:start:                                  OK        -                      OK         -         
------------------------------------------------------------------------------------------------------------------------------------
0000000-191112173627164-oozie-oozi-W@incrental_sqoop_import                   PREP      -                      -          -         
------------------------------------------------------------------------------------------------------------------------------------


Scheduling Job

For scheduling job, Oozie has a component called coordinator. Create the following files.


# File: coordinator.properties

frequency=60
startTime=2019-11-13T00:00Z
endTime=2019-11-13T12:00Z

nameNode = hdfs://sandbox-hdp.hortonworks.com
jobTracker = sandbox-hdp.hortonworks.com:8050
oozie.wf.application.path=${nameNode}/sqoop/oozie-example

oozie.coor.application.path=${nameNode}/sqoop/oozie-example/coordinator.xml

Note: update startTime and endTime based on the current time.



# File: coordinator.xml

<?xml version="1.0" encoding="UTF-8"?>
<coordinator-app xmlns="uri:oozie:coordinator:0.4" name="oozie-coordinator-example" frequency="${coord:days(1)}" start="${startTime}" end="${endTime}" timezone="America/Los_Angeles">
   <action>
      <workflow>
         <app-path>${oozie.wf.application.path}</app-path>
      </workflow>
   </action>
</coordinator-app>


Here is the dir structure

[root@sandbox-hdp oozie-example]# tree .
.
├── coordinator.properties
├── coordinator.xml
├── job.properties
├── query-employee.hql
└── workflow.xml


Upload all files to HDFS

$ sudo -u yarn hadoop fs -put * /sqoop/oozie-example


Save the job.

[root@sandbox-hdp oozie-example]# oozie job -oozie http://sandbox-hdp.hortonworks.com:11000/oozie -run -config coordinator.properties