Sensex Log Data Processing (PDF File Processing in Map Reduce) Part 2
Bhavesh
Apache Pig Script
SENSEX.pig
A = LOAD '/hdfs/bhavesh/SENSEX/OUTPUT/HighDemandMarket-r-00000' using PigStorage('\t') as (Sid:int,Sname:chararray,Ttrading:chararray,Sloc:chararray,OBal:int,CBal:int,Frate:int);
disHM = DISTINCT A;
orHM = ORDER disHM by Sid;
STORE orHM into '/hdfs/bhavesh/SENSEX/HM' using PigStorage(',');
A = LOAD '/hdfs/bhavesh/SENSEX/OUTPUT/ReliableProducts-r-00000' using PigStorage('\t') as (Sid:int,Sname:chararray,Ttrading:chararray,Sloc:chararray,OBal:int,CBal:int,Frate:int);
disRP = DISTINCT A;
orRP = ORDER disRP by Sid;
STORE orRP into '/hdfs/bhavesh/SENSEX/RP' using PigStorage(',');
A = LOAD '/hdfs/bhavesh/SENSEX/OUTPUT/OtherProducts-r-00000' using PigStorage('\t') as (Sid:int,Sname:chararray,Ttrading:chararray,Sloc:chararray,OBal:int,CBal:int,Frate:int);
disOP = DISTINCT A;
orOP = ORDER disOP by Sid;
STORE orOP into '/hdfs/bhavesh/SENSEX/OP' using PigStorage(',');
A = LOAD '/hdfs/bhavesh/SENSEX/OUTPUT/WealthyProducts-r-00000' using PigStorage('\t') as (Sid:int,Sname:chararray,Ttrading:chararray,Sloc:chararray,OBal:int,CBal:int,Frate:int);
disWP= DISTINCT A;
orWP = ORDER disWP by Sid;
STORE orWP into '/hdfs/bhavesh/SENSEX/WP' using PigStorage(',');
A = LOAD '/hdfs/bhavesh/SENSEX/OUTPUT/OnGoingMarketStretegy-r-00000' using PigStorage('\t') as (Sid:int,Sname:chararray,Ttrading:chararray,Sloc:chararray,OBal:int,CBal:int,Frate:int);
disOMS = DISTINCT A;
orOMS = ORDER disOMS by Sid;
STORE orOMS into '/hdfs/bhavesh/SENSEX/OMS' using PigStorage(',');
Shell Script (SENSEX.sh)
###############################################################################
############################# COMPLETE SCRIPT ##############################
### HEADER - PROGRAM NAME - <SENSEX.sh>
### AUTHOR - BHAVESH BHADRICHA
### DATE - 27/DEC/2015
### VERSION - 1.0
### DESCRIPTION - Data: Sensex Log Data Processing
### (PDF File Processing in Map Reduce)
###############################################################################
###############################################################################
##################################
###DEFINING THE LOCAL VARIABLES###
##################################
DATE=$(date +"%Y%m%d_%H%M%S")
LOGFILE="/home/bhavesh/POC/SENSEX/LOG/"$DATE".log"
####### Removing if any existent directories ##################################
hadoop fs -rmr /hdfs/bhavesh/SENSEX/RP
hadoop fs -rmr /hdfs/bhavesh/SENSEX/WP
hadoop fs -rmr /hdfs/bhavesh/SENSEX/OP
hadoop fs -rmr /hdfs/bhavesh/SENSEX/OMS
hadoop fs -rmr /hdfs/bhavesh/SENSEX/HM
##################################################################################
############## PDF File Processing USING Map Reduce ##############################
##################################################################################
echo "Mapreduce Program starts here"
echo "PDF File Processing in Map Reduce Started" >> $LOGFILE
hadoop fs -rmr /hdfs/bhavesh/SENSEX/OUTPUT
hadoop jar /home/bhavesh/POC/SENSEX/Mapreduce/SENSEX.jar com.bhavesh.poc.sensex.PdfInputDriver /hdfs/bhavesh/SENSEX/INPUT/sensexinputfile.pdf /hdfs/bhavesh/SENSEX/OUTPUT
if [ $? -eq 0 ]; then
echo "Succesfully finished Mapreduce Processing " >> $LOGFILE
else
echo "SENSEX MapReduce Failed Please check the Log " >> $LOGFILE
fi
#################################################################################
############### PIG Processing for SEXSEX DATA #################################
#################################################################################
echo "SENSEX Pig Processing started "
echo "SENSEX PIG Processing Started" >> $LOGFILE
pig -f /home/bhavesh/POC/SENSEX/PIG/SENSEX.pig
if [ $? -eq 0 ]; then
echo "PIG Succesfully finished SENSEX Processing " >> $LOGFILE
else
echo "PIG SENSEX Processing Failed Please check the Log " >> $LOGFILE
fi
################################################################################
############# IMPORTING DATA in SQOOP ##########################################
################################################################################
echo "Importing the data to MYSQL using SQOOP ";
echo "Importing the data to MYSQL " >> $LOGFILE
##### Creating the tables in MySql
sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "create database if not exists SENSEX;";
sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "use SENSEX;";
sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "grant all privileges on SENSEX.* to '%'@'localhost'";
sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "grant all privileges on SENSEX.* to ''@'localhost'";
sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "drop table if exists HighDemandMarket";
sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "drop table if exists WealthyProducts";
sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "drop table if exists OngoingMarketSt";
sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "drop table if exists ReliableProducts";
sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "drop table if exists OtherProducts";
echo " MYSQL table creation"
sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "create table HighDemandMarket (Sid int,Sname varchar(30),TType varchar(20),TLoc varchar(20),OpenBal int,CloseBal int,FlucRate int)";
sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "create table WealthyProducts(Sid int,Sname varchar(30),TType varchar(20),TLoc varchar(20),OpenBal int,CloseBal int,FlucRate int)";
sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "create table OngoingMarketSt(Sid int,Sname varchar(30),TType varchar(20),TLoc varchar(20),OpenBal int,CloseBal int,FlucRate int)";
sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "create table ReliableProducts(Sid int,Sname varchar(30),TType varchar(20),TLoc varchar(20),OpenBal int,CloseBal int,FlucRate int)";
sqoop eval --connect jdbc:mysql://localhost/SENSEX -username root -password root --query "create table OtherProducts(Sid int,Sname varchar(30),TType varchar(20),TLoc varchar(20),OpenBal int,CloseBal int,FlucRate int)";
echo "data exporting";
#### exporting the data into MYSQL
sqoop export --connect jdbc:mysql://localhost/SENSEX -username root -password root --table HighDemandMarket --export-dir /hdfs/bhavesh/SENSEX/HM/part-r-00000 --fields-terminated-by ',';
sqoop export --connect jdbc:mysql://localhost/SENSEX -username root -password root --table WealthyProducts --export-dir /hdfs/bhavesh/SENSEX/WP/part-r-00000 --fields-terminated-by ',';
sqoop export --connect jdbc:mysql://localhost/SENSEX -username root -password root --table OngoingMarketSt --export-dir /hdfs/bhavesh/SENSEX/OMS/part-r-00000 --fields-terminated-by ',';
sqoop export --connect jdbc:mysql://localhost/SENSEX -username root -password root --table ReliableProducts --export-dir /hdfs/bhavesh/SENSEX/RP/part-r-00000 --fields-terminated-by ',';
sqoop export --connect jdbc:mysql://localhost/SENSEX -username root -password root --table OtherProducts --export-dir /hdfs/bhavesh/SENSEX/OP/part-r-00000 --fields-terminated-by ',';
if[$? -eq 0]
echo "exporting of data to MYSQL is done";
echo "exporting of data to MYSQL is done" >> $LOGFILE
echo "creation of hive tables started";
echo "creation of hive tables started " >> $LOGFILE
hive -f /home/bhavesh/POC/SENSEX/HIVE/SENSEX.hql
echo "Hive process is done";
echo "HIVE PROCESSING is done" >> $LOGFILE
exit;
Apache Hive (SENSEX.hql)
use SENSEX;
Drop table HDM;
Drop table WP;
Drop table RP;
Drop table OP;
Drop table OMS;
create table HDM(Sid int,Sname string,TTrading string,Sloc String,OpenBal int,CloseBal int,FlucRate int)
row format delimited
fields terminated by ","
stored as textfile;
load data inpath '/hdfs/bhavesh/SENSEX/HM/part-r-00000' into table HDM;
create table WP(Sid int,Sname string,TTrading string,Sloc String,OpenBal int,CloseBal int,FlucRate int)
row format delimited
fields terminated by ","
stored as textfile;
load data inpath '/hdfs/bhavesh/SENSEX/WP/part-r-00000' into table WP;
create table RP(Sid int,Sname string,TTrading string,Sloc String,OpenBal int,CloseBal int,FlucRate int)
row format delimited
fields terminated by ","
stored as textfile;
load data inpath '/hdfs/bhavesh/SENSEX/RP/part-r-00000' into table RP;
create table OP(Sid int,Sname string,TTrading string,Sloc String,OpenBal int,CloseBal int,FlucRate int)
row format delimited
fields terminated by ","
stored as textfile;
load data inpath '/hdfs/bhavesh/SENSEX/OP/part-r-00000' into table OP;
create table OMS(Sid int,Sname string,TTrading string,Sloc String,OpenBal int,CloseBal int,FlucRate int)
row format delimited
fields terminated by ","
stored as textfile;
load data inpath '/hdfs/bhavesh/SENSEX/OMS/part-r-00000' into table OMS;