Sensex Log Data Processing (PDF File Processing in Map Reduce) Part 2

Apache Pig Script​

SENSEX.pig

A = LOAD '/hdfs/bhavesh/SENSEX/OUTPUT/HighDemandMarket-r-00000' using PigStorage('\t') as (Sid:int,Sname:chararray,Ttrading:chararray,Sloc:chararray,OBal:int,CBal:int,Frate:int);
disHM = DISTINCT A;
orHM = ORDER disHM by Sid;
STORE orHM into '/hdfs/bhavesh/SENSEX/HM' using PigStorage(',');

A = LOAD '/hdfs/bhavesh/SENSEX/OUTPUT/ReliableProducts-r-00000' using PigStorage('\t') as (Sid:int,Sname:chararray,Ttrading:chararray,Sloc:chararray,OBal:int,CBal:int,Frate:int);
disRP = DISTINCT A;
orRP = ORDER disRP by Sid;
STORE orRP into '/hdfs/bhavesh/SENSEX/RP' using PigStorage(',');

A = LOAD '/hdfs/bhavesh/SENSEX/OUTPUT/OtherProducts-r-00000' using PigStorage('\t') as (Sid:int,Sname:chararray,Ttrading:chararray,Sloc:chararray,OBal:int,CBal:int,Frate:int);
disOP = DISTINCT A;
orOP = ORDER disOP by Sid;
STORE orOP into '/hdfs/bhavesh/SENSEX/OP' using PigStorage(',');

A = LOAD '/hdfs/bhavesh/SENSEX/OUTPUT/WealthyProducts-r-00000' using PigStorage('\t') as (Sid:int,Sname:chararray,Ttrading:chararray,Sloc:chararray,OBal:int,CBal:int,Frate:int);
disWP= DISTINCT A;
orWP = ORDER disWP by Sid;
STORE orWP into '/hdfs/bhavesh/SENSEX/WP' using PigStorage(',');

A = LOAD '/hdfs/bhavesh/SENSEX/OUTPUT/OnGoingMarketStretegy-r-00000' using PigStorage('\t') as (Sid:int,Sname:chararray,Ttrading:chararray,Sloc:chararray,OBal:int,CBal:int,Frate:int);
disOMS = DISTINCT A;
orOMS = ORDER disOMS by Sid;
STORE orOMS into '/hdfs/bhavesh/SENSEX/OMS' using PigStorage(',');

Shell Script (SENSEX.sh)​

###############################################################################
#############################  COMPLETE SCRIPT   ##############################
### HEADER - PROGRAM NAME - <SENSEX.sh>              
### AUTHOR - BHAVESH BHADRICHA                                               
### DATE  - 27/DEC/2015                                                      
### VERSION - 1.0                                                             
### DESCRIPTION - Data: Sensex Log Data Processing                           
### (PDF File Processing in Map Reduce)                                       
###############################################################################
###############################################################################
##################################
###DEFINING THE LOCAL VARIABLES###
##################################
DATE=$(date +"%Y%m%d_%H%M%S")
LOGFILE="/home/bhavesh/POC/SENSEX/LOG/"$DATE".log"

####### Removing if any existent directories ##################################

hadoop fs -rmr /hdfs/bhavesh/SENSEX/RP
hadoop fs -rmr /hdfs/bhavesh/SENSEX/WP
hadoop fs -rmr /hdfs/bhavesh/SENSEX/OP
hadoop fs -rmr /hdfs/bhavesh/SENSEX/OMS
hadoop fs -rmr /hdfs/bhavesh/SENSEX/HM

##################################################################################
############## PDF File Processing USING Map Reduce ##############################
##################################################################################
echo "Mapreduce Program starts here"

echo "PDF File Processing in Map Reduce Started" >> $LOGFILE

hadoop fs -rmr /hdfs/bhavesh/SENSEX/OUTPUT

hadoop jar /home/bhavesh/POC/SENSEX/Mapreduce/SENSEX.jar com.bhavesh.poc.sensex.PdfInputDriver /hdfs/bhavesh/SENSEX/INPUT/sensexinputfile.pdf /hdfs/bhavesh/SENSEX/OUTPUT

if [ $? -eq 0 ]; then
    echo "Succesfully finished Mapreduce Processing " >> $LOGFILE
else
    echo "SENSEX MapReduce Failed Please check the Log " >> $LOGFILE
fi

#################################################################################
############### PIG Processing for SEXSEX DATA  #################################
#################################################################################

echo "SENSEX Pig Processing started "

echo "SENSEX PIG Processing Started" >> $LOGFILE

pig -f /home/bhavesh/POC/SENSEX/PIG/SENSEX.pig

if [ $? -eq 0 ]; then
    echo "PIG Succesfully finished SENSEX Processing " >> $LOGFILE
else
    echo "PIG SENSEX Processing Failed Please check the Log " >> $LOGFILE
fi

################################################################################
############# IMPORTING DATA in SQOOP ##########################################
################################################################################

echo "Importing the data to MYSQL  using SQOOP ";

echo "Importing the data to MYSQL " >> $LOGFILE

##### Creating the tables in MySql
sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "create database if not exists  SENSEX;";
sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "use SENSEX;";
sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "grant all privileges on SENSEX.* to '%'@'localhost'";
sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "grant all privileges on SENSEX.* to ''@'localhost'";

sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "drop table if exists HighDemandMarket";
sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "drop table if exists WealthyProducts";
sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "drop table if exists OngoingMarketSt";
sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "drop table if exists ReliableProducts";
sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "drop table if exists OtherProducts";

echo " MYSQL table creation"

sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "create table HighDemandMarket (Sid int,Sname varchar(30),TType varchar(20),TLoc varchar(20),OpenBal int,CloseBal int,FlucRate int)";
sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "create table WealthyProducts(Sid int,Sname varchar(30),TType varchar(20),TLoc varchar(20),OpenBal int,CloseBal int,FlucRate int)";
sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "create table OngoingMarketSt(Sid int,Sname varchar(30),TType varchar(20),TLoc varchar(20),OpenBal int,CloseBal int,FlucRate int)";
sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "create table ReliableProducts(Sid int,Sname varchar(30),TType varchar(20),TLoc varchar(20),OpenBal int,CloseBal int,FlucRate int)";
sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "create table OtherProducts(Sid int,Sname varchar(30),TType varchar(20),TLoc varchar(20),OpenBal int,CloseBal int,FlucRate int)";

echo "data exporting";

#### exporting the data into MYSQL
sqoop export --connect jdbc:mysql://localhost/SENSEX -username root -password root --table HighDemandMarket --export-dir /hdfs/bhavesh/SENSEX/HM/part-r-00000 --fields-terminated-by ',';
sqoop export --connect jdbc:mysql://localhost/SENSEX -username root -password root --table WealthyProducts --export-dir /hdfs/bhavesh/SENSEX/WP/part-r-00000 --fields-terminated-by ',';
sqoop export --connect jdbc:mysql://localhost/SENSEX -username root -password root --table OngoingMarketSt --export-dir /hdfs/bhavesh/SENSEX/OMS/part-r-00000 --fields-terminated-by ',';
sqoop export --connect jdbc:mysql://localhost/SENSEX -username root -password root --table ReliableProducts --export-dir /hdfs/bhavesh/SENSEX/RP/part-r-00000 --fields-terminated-by ',';
sqoop export --connect jdbc:mysql://localhost/SENSEX -username root -password root --table OtherProducts --export-dir /hdfs/bhavesh/SENSEX/OP/part-r-00000  --fields-terminated-by ',';

if[$? -eq 0]
echo "exporting of data to MYSQL is done";

echo "exporting of data to MYSQL is done" >> $LOGFILE

echo "creation of hive tables started";

echo "creation of hive tables started " >> $LOGFILE

hive -f /home/bhavesh/POC/SENSEX/HIVE/SENSEX.hql

echo "Hive process is done";
echo "HIVE PROCESSING is done" >> $LOGFILE
exit;

Apache Hive (SENSEX.hql)​

use SENSEX;

Drop table HDM;
Drop table WP;
Drop table RP;
Drop table OP;
Drop table OMS;

create table HDM(Sid int,Sname string,TTrading string,Sloc String,OpenBal int,CloseBal int,FlucRate int)
row format delimited
fields terminated by ","
stored as textfile;

load data inpath '/hdfs/bhavesh/SENSEX/HM/part-r-00000' into table HDM;

create table WP(Sid int,Sname string,TTrading string,Sloc String,OpenBal int,CloseBal int,FlucRate int)
row format delimited
fields terminated by ","
stored as textfile;

load data inpath '/hdfs/bhavesh/SENSEX/WP/part-r-00000' into table WP;

create table RP(Sid int,Sname string,TTrading string,Sloc String,OpenBal int,CloseBal int,FlucRate int)
row format delimited
fields terminated by ","
stored as textfile;

load data inpath '/hdfs/bhavesh/SENSEX/RP/part-r-00000' into table RP;

create table OP(Sid int,Sname string,TTrading string,Sloc String,OpenBal int,CloseBal int,FlucRate int)
row format delimited
fields terminated by ","
stored as textfile;

load data inpath '/hdfs/bhavesh/SENSEX/OP/part-r-00000' into table OP;

create table OMS(Sid int,Sname string,TTrading string,Sloc String,OpenBal int,CloseBal int,FlucRate int)
row format delimited
fields terminated by ","
stored as textfile;

load data inpath '/hdfs/bhavesh/SENSEX/OMS/part-r-00000' into table OMS;

Project Execution​

Shell Script Run​

Mapreduce Run​

Apache Pig Run​

Apache Sqoop Run​

Apache Hive Run​

Mapreduce Output​

MYSQL Output​

Apache Hive Output​

By Bhavesh