Setup the VM

Download the following in ~/Downloads directory and run the following commands.

  • htop
  • telnet
  • Anaconda python 3.6
  • spark-2.2.0-bin-hadoop2.7
  • zeppelin-0.7.3-bin-all
  • jdk-8u151-linux-x64.tar.gz

Preparation (inside the VM)

mkdir ~/sample

cd ~/sample

wget https://raw.githubusercontent.com/abulbasar/master/master/pom.xml

mvn package

cd ~
conda update --all -y
conda clean --all -y
tar zcf anaconda3.tar.gz anaconda3
tar zcf .m2.tar.gz .m2
tar zcf eclipse-oxygen.tar.gz eclipse-oxygen
tar R.tar.gz /usr/lib64/R

Download from the VM

rsync -avuz --progress --rsh="ssh -p2222" "cloudera@localhost:eclipse-oxygen.tar.gz .m2.tar.gz anaconda3.tar.gz R.tar.gz" .

Setup inside the VM

cd ~/Downloads

echo "Installing yum utils"

sudo yum localinstall yum-utils*.rpm -y --disablerepo="*"

echo "Installing Anaconda python"
tar xf anaconda3.tar.gz
mv anaconda3 ~/

echo "Setting up anaconda path in ~/.bashrc"

echo "export PATH=~/anaconda3/bin:\$PATH" >> ~/.bashrc

source ~/.bashrc

echo "Installing htop and telnet"

sudo yum localinstall htop*.rpm -y --disablerepo="*"

sudo yum localinstall telnet*.rpm -y --disablerepo="*"

echo "Setting up JDK"

cd ~/Downloads

sudo rm -rf /usr/java/jdk1.8*

tar xf jdk-8u151-linux-x64.tar.gz

sudo mv jdk1.8.0_151 /usr/java/

export "JAVA_HOME=/usr/java/jdk1.8.0_151" >> ~/.bashrc

echo "export PATH=\$JAVA_HOME/bin:\$PATH" >> ~/.bashrc

#echo "Installing R"

#tar xf R.tar.gz

#sudo mv R /usr/lib64

#echo "export PATH=/usr/lib64/R/bin:\$PATH" >> ~/.bashrc

echo "Setting up local maven repo"

tar xf .m2.tar.gz

mv .m2 ~/

echo "Setting up Eclipse"
tar xf eclipse-oxygen.tar.gz
mv eclipse-oxygen ~/

chmod +x Eclipse.desktop

cp Eclipse.desktop ~/Desktop/

echo "Setting up Spark"

cd ~/Downloads

sudo rm -rf /usr/lib/spark-*

tar xf spark-2.2.0-bin-hadoop2.7.tgz

sudo mv spark-2.2.0-bin-hadoop2.7 /usr/lib/

cd /usr/lib/spark-2.2.0-bin-hadoop2.7/conf

ln -s /etc/hive/conf/hive-site.xml

mv log4j.properties.template log4j.properties

mv spark-env.sh.template spark-env.sh

mv spark-defaults.conf.template spark-defaults.conf

echo "export PYSPARK_PYTHON=python3" >> spark-env.sh

echo "export PYSPARK_DRIVER_PYTHON=ipython" >> spark-env.sh

echo "export JAVA_HOME=/usr/java/jdk1.8.0_151" >> spark-env.sh

echo "export HADOOP_CONF_DIR=/etc/hadoop/conf" >> spark-env.sh

echo "export HADOOP_HOME=/usr/lib/hadoop" >> spark-env.sh

echo "export LD_LIBRARY_PATH=$HADOOP_HOME/lib/native" >> spark-env.sh

echo "Setting up Zeppelin"

sudo rm -rf /usr/lib/zeppelin-*

cd ~/Downloads

tar xf zeppelin-0.7.3-bin-all.tgz

sudo mv zeppelin-0.7.3-bin-all /usr/lib

cd /usr/lib/zeppelin-0.7.3-bin-all/conf

mv zeppelin-env.sh.template zeppelin-env.sh

echo "export SPARK_HOME=/usr/lib/spark-2.2.0-bin-hadoop2.7" >> zeppelin-env.sh

echo "export PYSPARK_PYTHON=python3" >> zeppelin-env.sh

echo "export PYSPARK_DRIVER_PYTHON=python3" >> zeppelin-env.sh

echo "Setup is complete"