From f49082d705337bfc301ff71239cb942be5c1655b Mon Sep 17 00:00:00 2001
From: pranavmrane <pmr5279@g.rit.edu>
Date: Fri, 22 Feb 2019 06:40:14 -0500
Subject: [PATCH 01/13] Global Variables Integrated

---
 scripts/cluster/yarn_cluster_setup/README.md  |  4 +-
 .../install_yarn_master_slave.sh              | 40 +++++++++++++++----
 .../yarn_cluster_setup/start_yarn_cluster.sh  | 22 ++++++----
 .../yarn_cluster_setup/stop_yarn_cluster.sh   |  8 ++--
 .../test_scripts/run_spark_test_job_pi.sh     |  8 ++--
 5 files changed, 59 insertions(+), 23 deletions(-)

diff --git a/scripts/cluster/yarn_cluster_setup/README.md b/scripts/cluster/yarn_cluster_setup/README.md
index 5e7b1fc..bc02d71 100644
--- a/scripts/cluster/yarn_cluster_setup/README.md
+++ b/scripts/cluster/yarn_cluster_setup/README.md
@@ -15,7 +15,7 @@ Specify the hostnames of nodes as arguments.
 sudo bash install_yarn_cluster.sh master,worker1,worker2 ...
 ```
 
-# Start Yarn Spark Cluster
+# Start Yarn Spark Cluster and Run Spark Job on Master
 Cluster can only be started on master node after installation is complete on all nodes and configuration files for Yarn and Spark are placed in correct folders.
 ```
 sudo bash start_yarn_cluster.sh
@@ -56,4 +56,4 @@ http://<MASTER_NODE_IP_ADDRESS>:19888/
 cd ..
 sudo bash stop_yarn_job.sh
 ```
-After running this command, the web interfaces will not work.
+After running this command, the web interfaces will not work.
\ No newline at end of file
diff --git a/scripts/cluster/yarn_cluster_setup/install_yarn_master_slave.sh b/scripts/cluster/yarn_cluster_setup/install_yarn_master_slave.sh
index ae806b0..1ed3c47 100644
--- a/scripts/cluster/yarn_cluster_setup/install_yarn_master_slave.sh
+++ b/scripts/cluster/yarn_cluster_setup/install_yarn_master_slave.sh
@@ -7,7 +7,8 @@ HADOOP_SYMLINK=/usr/local/hadoop
 HADOOP_CONFIG_LOCATION=${HADOOP_HOME_INFILE}etc/hadoop/
 HADOOP_VERSION=2.9.2
 HADOOP_WEB_SOURCE=https://www-us.apache.org/dist/hadoop/common/
-GLOBAL_VARIABLES_SOURCE=/etc/environment
+ROOT_VARIABLES_ADDRESS=/root/.bashrc
+USER_VARIABLES_ADDRESS=~/.bashrc
 
 # Install Pre-Reqs
 apt-get update -y
@@ -17,6 +18,22 @@ apt-get install -y python default-jdk wget
 unlink ${HADOOP_SYMLINK} && rm -rf ${HADOOP_DATA}
 rm -rf /usr/local/hadoop-*/
 
+# Remove Global Variables
+sed -i /JAVA_HOME/d $ROOT_VARIABLES_ADDRESS && sed -i /HADOOP_HOME/d $ROOT_VARIABLES_ADDRESS
+sed -i /hadoop/d $ROOT_VARIABLES_ADDRESS && sed -i /default-java/d $ROOT_VARIABLES_ADDRESS
+
+# Make Hadoop Global Variables for User and Root
+echo "export JAVA_HOME="$JAVA_HOME_INFILE >> $ROOT_VARIABLES_ADDRESS
+echo "export PATH=$PATH:"$JAVA_HOME_INFILE"bin/:"$JAVA_HOME_INFILE"sbin/" >> $ROOT_VARIABLES_ADDRESS
+echo "export HADOOP_HOME="$HADOOP_HOME_INFILE >> $ROOT_VARIABLES_ADDRESS
+echo "export HADOOP_MAPRED_HOME="$HADOOP_HOME_INFILE >> $ROOT_VARIABLES_ADDRESS
+echo "export HADOOP_COMMON_HOME="$HADOOP_HOME_INFILE>> $ROOT_VARIABLES_ADDRESS
+echo "export HADOOP_HDFS_HOME="$HADOOP_HOME_INFILE >> $ROOT_VARIABLES_ADDRESS
+echo "export YARN_HOME="$HADOOP_HOME_INFILE >> $ROOT_VARIABLES_ADDRESS
+echo "export HADOOP_COMMON_LIB_NATIVE_DIR="$HADOOP_HOME_INFILE"lib/native" >> $ROOT_VARIABLES_ADDRESS
+echo "export PATH=$PATH:"$HADOOP_HOME_INFILE"bin/:"$HADOOP_HOME_INFILE"sbin/" >> $ROOT_VARIABLES_ADDRESS
+source $ROOT_VARIABLES_ADDRESS
+
 # Make Data Directories for Hadoop
 mkdir -p ${HADOOP_DATA}name
 mkdir -p ${HADOOP_DATA}data
@@ -28,20 +45,18 @@ current_directory=`pwd`
 if [ ! -f "${current_directory}/hadoop-$HADOOP_VERSION.tar.gz" ]; then
 	echo "Downloading Hadoop ${HADOOP_VERSION} ..."
 	sudo wget ${HADOOP_WEB_SOURCE}hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz
-	# wget ${HADOOP_WEB_SOURCE}hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz -P /hadoop-${HADOOP_VERSION}.tar.gz
-    # sudo curl ${HADOOP_WEB_SOURCE}hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz > /hadoop-${HADOOP_VERSION}.tar.gz
     echo "Download of Hadoop ${HADOOP_VERSION} Successful!"
 fi
 
 # Unzip and Install Hadoop Tar
 tar -xzf $current_directory/hadoop-$HADOOP_VERSION.tar.gz -C /usr/local/
-# tar -xzf /hadoop-$HADOOP_VERSION.tar.gz -C /usr/local/
+
+rm $current_directory/hadoop-$HADOOP_VERSION.tar.gz
 
 # Make Symbolic link
 ln -s /usr/local/hadoop-$HADOOP_VERSION/ $HADOOP_SYMLINK
 
-# Copy Config Files
-
+# Copy Hadoop Config Files
 cp -a $current_directory/configs/hadoop/. $HADOOP_CONFIG_LOCATION
 cp $current_directory/configs/master $HADOOP_CONFIG_LOCATION
 cp $current_directory/configs/slaves $HADOOP_CONFIG_LOCATION
@@ -52,10 +67,21 @@ SPARK_HOME_INFILE=`cd ${current_directory}/../../../.. && pwd`
 
 SPARK_CONFIG_LOCATION=$SPARK_HOME_INFILE/conf/
 
+# Remove Spark Global Variables
+sed -i /SPARK_HOME/d $ROOT_VARIABLES_ADDRESS && sed -i /spark/d $ROOT_VARIABLES_ADDRESS
+
+# Make Spark Global Variables for User and Root
+echo "export SPARK_HOME="$SPARK_HOME_INFILE >> $ROOT_VARIABLES_ADDRESS
+echo "export PATH=$PATH:"$SPARK_HOME_INFILE"/bin/" >> $ROOT_VARIABLES_ADDRESS
+source $ROOT_VARIABLES_ADDRESS
+
+# Copy Spark Config Files
 cp -a $current_directory/configs/spark/. $SPARK_CONFIG_LOCATION
 cp -a $current_directory/configs/hadoop/. $SPARK_CONFIG_LOCATION
 cp $current_directory/configs/master $SPARK_CONFIG_LOCATION
 cp $current_directory/configs/slaves $SPARK_CONFIG_LOCATION
 
 # Format Namenode
-/usr/local/hadoop/bin/hdfs namenode -format
\ No newline at end of file
+$HADOOP_HOME_INFILE/bin/hdfs namenode -format
+
+echo "Run the following on master node: sudo bash start_yarn_cluster.sh"
\ No newline at end of file
diff --git a/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh b/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh
index 3e1c531..2474ac9 100644
--- a/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh
+++ b/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh
@@ -1,21 +1,29 @@
 #!/bin/bash
 
 echo "STARTING HADOOP SERVICES"
-/usr/local/hadoop/sbin/start-dfs.sh
 
-/usr/local/hadoop/sbin/start-yarn.sh
+$HADOOP_HOME/sbin/start-dfs.sh
 
-/usr/local/hadoop/sbin/mr-jobhistory-daemon.sh start historyserver
+$HADOOP_HOME/sbin/start-yarn.sh
 
-/usr/local/hadoop/bin/hdfs dfsadmin -safemode leave
+$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver
+
+$HADOOP_HOME/bin/hdfs dfsadmin -safemode leave
 
 echo "STARTING SPARK SERVICES"
-/spark-3.0.0-SNAPSHOT-bin-SparkFHE/sbin/start-all.sh
+SPARK_HOME/sbin/start-all.sh
 
 echo "RUN jps - Java Virtual Machine Process Status Tool"
 jps
 
 echo "Get basic filesystem information and statistics."
-/usr/local/hadoop/bin/hdfs dfsadmin -report
+$HADOOP_HOME/bin/hdfs dfsadmin -report
+
+echo "Yarn Cluster is Active"
+
+master_node_ip_address=`hostname -i`
 
-echo "Yarn Cluster is Active"
\ No newline at end of file
+echo "YARN Interface Available At: "$master_node_ip_address":8088/"
+echo "Spark Interface Available At: "$master_node_ip_address":8080/"
+echo "NameNode Interface Available At: "$master_node_ip_address":50070/"
+echo "Job Master Interface Available At: "$master_node_ip_address":19888/"
\ No newline at end of file
diff --git a/scripts/cluster/yarn_cluster_setup/stop_yarn_cluster.sh b/scripts/cluster/yarn_cluster_setup/stop_yarn_cluster.sh
index d1cad90..9e6a5ba 100644
--- a/scripts/cluster/yarn_cluster_setup/stop_yarn_cluster.sh
+++ b/scripts/cluster/yarn_cluster_setup/stop_yarn_cluster.sh
@@ -2,14 +2,14 @@
 
 echo -e "STOPPING SPARK SERVICES"
 
-/spark-3.0.0-SNAPSHOT-bin-SparkFHE/sbin/stop-all.sh
+$SPARK_HOME/sbin/stop-all.sh
 
 echo -e "STOPPING HADOOP SERVICES"
 
-/usr/local/hadoop/sbin/mr-jobhistory-daemon.sh stop historyserver
+$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh stop historyserver
 
-/usr/local/hadoop/sbin/stop-dfs.sh
+$HADOOP_HOME/sbin/stop-dfs.sh
 
-/usr/local/hadoop/sbin/stop-yarn.sh
+$HADOOP_HOME/sbin/stop-yarn.sh
 
 echo "Hadoop Cluster is Inactive Now"
\ No newline at end of file
diff --git a/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi.sh b/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi.sh
index 1dcd31d..0adfae4 100644
--- a/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi.sh
+++ b/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi.sh
@@ -1,12 +1,14 @@
 #!/bin/bash
 
 echo "SPARK TEST"
-/spark-3.0.0-SNAPSHOT-bin-SparkFHE/bin/spark-submit --class org.apache.spark.examples.SparkPi  \
+$SPARK_HOME/bin/spark-submit --class org.apache.spark.examples.SparkPi  \
     --master yarn \
     --deploy-mode cluster \
     --num-executors 1 \
     --driver-memory 1g \
     --executor-memory 512m \
     --executor-cores 1 \
-    /spark-3.0.0-SNAPSHOT-bin-SparkFHE/examples/jars/spark-examples*.jar \
-    10
\ No newline at end of file
+    $SPARK_HOME/examples/jars/spark-examples*.jar \
+    10
+
+echo "Stop Cluster If not in Use"
\ No newline at end of file

From 917be6fcaf007c412bb94ed4c487c1a1198559be Mon Sep 17 00:00:00 2001
From: pranavmrane <pmr5279@g.rit.edu>
Date: Sun, 24 Feb 2019 15:39:37 -0500
Subject: [PATCH 02/13] Security Features Added

---
 .../configs/hadoop/core-site.xml                   |  5 +++++
 .../configs/hadoop/hadoop-policy.xml               |  8 ++++++++
 .../configs/hadoop/hdfs-site.xml                   |  4 ++++
 .../yarn_cluster_setup/start_yarn_cluster.sh       | 14 ++++++++------
 .../yarn_cluster_setup/stop_yarn_cluster.sh        |  8 ++++----
 .../test_scripts/run_spark_test_job_pi.sh          |  4 ++--
 6 files changed, 31 insertions(+), 12 deletions(-)
 create mode 100644 scripts/cluster/yarn_cluster_setup/configs/hadoop/hadoop-policy.xml

diff --git a/scripts/cluster/yarn_cluster_setup/configs/hadoop/core-site.xml b/scripts/cluster/yarn_cluster_setup/configs/hadoop/core-site.xml
index 7d1a4bb..3f4f6bd 100644
--- a/scripts/cluster/yarn_cluster_setup/configs/hadoop/core-site.xml
+++ b/scripts/cluster/yarn_cluster_setup/configs/hadoop/core-site.xml
@@ -11,4 +11,9 @@
 	<name>dfs.namenode.rpc-bind-host</name>
 	<value>0.0.0.0</value>
     </property>
+    <property>
+    <name>hadoop.security.authorization</name>
+    <value>true</value>
+    <description>Service level authorization params.</description>
+    </property>
 </configuration>
\ No newline at end of file
diff --git a/scripts/cluster/yarn_cluster_setup/configs/hadoop/hadoop-policy.xml b/scripts/cluster/yarn_cluster_setup/configs/hadoop/hadoop-policy.xml
new file mode 100644
index 0000000..e22e187
--- /dev/null
+++ b/scripts/cluster/yarn_cluster_setup/configs/hadoop/hadoop-policy.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<configuration>
+	 <property>
+	     <name>security.job.client.protocol.acl</name>
+	     <value> iotx-PG0</value>
+	</property>
+</configuration>
\ No newline at end of file
diff --git a/scripts/cluster/yarn_cluster_setup/configs/hadoop/hdfs-site.xml b/scripts/cluster/yarn_cluster_setup/configs/hadoop/hdfs-site.xml
index 90ad504..09fccf8 100644
--- a/scripts/cluster/yarn_cluster_setup/configs/hadoop/hdfs-site.xml
+++ b/scripts/cluster/yarn_cluster_setup/configs/hadoop/hdfs-site.xml
@@ -25,4 +25,8 @@
         <name>dfs.datanode.data.dir</name>
         <value>/data/hadoop/data</value>
     </property>
+    <property>
+        <name>dfs.webhdfs.enabled</name>
+        <value>false</value>
+    </property>
 </configuration>
\ No newline at end of file
diff --git a/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh b/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh
index 2474ac9..643f1e8 100644
--- a/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh
+++ b/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh
@@ -2,22 +2,24 @@
 
 echo "STARTING HADOOP SERVICES"
 
-$HADOOP_HOME/sbin/start-dfs.sh
+/usr/local/hadoop/sbin/start-dfs.sh
 
-$HADOOP_HOME/sbin/start-yarn.sh
+/usr/local/hadoop/sbin/start-yarn.sh
 
-$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver
+/usr/local/hadoop/sbin/mr-jobhistory-daemon.sh start historyserver
 
-$HADOOP_HOME/bin/hdfs dfsadmin -safemode leave
+# /usr/local/hadoop/bin/hdfs dfsadmin -safemode leave
+
+# /usr/local/hadoop/bin/hdfs dfsadmin
 
 echo "STARTING SPARK SERVICES"
-SPARK_HOME/sbin/start-all.sh
+/spark-3.0.0-SNAPSHOT-bin-SparkFHE/sbin/start-all.sh
 
 echo "RUN jps - Java Virtual Machine Process Status Tool"
 jps
 
 echo "Get basic filesystem information and statistics."
-$HADOOP_HOME/bin/hdfs dfsadmin -report
+/usr/local/hadoop/bin/hdfs dfsadmin -report
 
 echo "Yarn Cluster is Active"
 
diff --git a/scripts/cluster/yarn_cluster_setup/stop_yarn_cluster.sh b/scripts/cluster/yarn_cluster_setup/stop_yarn_cluster.sh
index 9e6a5ba..d1cad90 100644
--- a/scripts/cluster/yarn_cluster_setup/stop_yarn_cluster.sh
+++ b/scripts/cluster/yarn_cluster_setup/stop_yarn_cluster.sh
@@ -2,14 +2,14 @@
 
 echo -e "STOPPING SPARK SERVICES"
 
-$SPARK_HOME/sbin/stop-all.sh
+/spark-3.0.0-SNAPSHOT-bin-SparkFHE/sbin/stop-all.sh
 
 echo -e "STOPPING HADOOP SERVICES"
 
-$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh stop historyserver
+/usr/local/hadoop/sbin/mr-jobhistory-daemon.sh stop historyserver
 
-$HADOOP_HOME/sbin/stop-dfs.sh
+/usr/local/hadoop/sbin/stop-dfs.sh
 
-$HADOOP_HOME/sbin/stop-yarn.sh
+/usr/local/hadoop/sbin/stop-yarn.sh
 
 echo "Hadoop Cluster is Inactive Now"
\ No newline at end of file
diff --git a/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi.sh b/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi.sh
index 0adfae4..b6d16ba 100644
--- a/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi.sh
+++ b/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi.sh
@@ -1,14 +1,14 @@
 #!/bin/bash
 
 echo "SPARK TEST"
-$SPARK_HOME/bin/spark-submit --class org.apache.spark.examples.SparkPi  \
+/spark-3.0.0-SNAPSHOT-bin-SparkFHE/bin/spark-submit --class org.apache.spark.examples.SparkPi  \
     --master yarn \
     --deploy-mode cluster \
     --num-executors 1 \
     --driver-memory 1g \
     --executor-memory 512m \
     --executor-cores 1 \
-    $SPARK_HOME/examples/jars/spark-examples*.jar \
+    /spark-3.0.0-SNAPSHOT-bin-SparkFHE/examples/jars/spark-examples*.jar \
     10
 
 echo "Stop Cluster If not in Use"
\ No newline at end of file

From 85ea362cabe78df2b4d5aa30cef56b5e799509d6 Mon Sep 17 00:00:00 2001
From: pranavmrane <pmr5279@g.rit.edu>
Date: Sun, 24 Feb 2019 20:21:51 -0500
Subject: [PATCH 03/13] Global Variables Bugs Fixed Using source

---
 scripts/cluster/yarn_cluster_setup/README.md     |  1 +
 .../install_yarn_master_slave.sh                 |  7 +++----
 .../yarn_cluster_setup/start_yarn_cluster.sh     | 16 ++++++++--------
 .../yarn_cluster_setup/stop_yarn_cluster.sh      | 10 ++++++----
 .../test_scripts/run_spark_test_job_pi.sh        |  6 ++++--
 5 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/scripts/cluster/yarn_cluster_setup/README.md b/scripts/cluster/yarn_cluster_setup/README.md
index bc02d71..4a180b5 100644
--- a/scripts/cluster/yarn_cluster_setup/README.md
+++ b/scripts/cluster/yarn_cluster_setup/README.md
@@ -27,6 +27,7 @@ Use the link generated after successful completion of cluster building to view t
 cd test_scripts
 sudo bash run_spark_test_job_pi.sh
 ```
+To view the output open http://<MASTER_NODE_IP_ADDRESS>:8088/, select the latest application, open the logs for that application, and select stdout. This should show the value for Pi calculated on the cluster.
 
 ### Useful Links: 
 Other links can be generated by changing the port number.
diff --git a/scripts/cluster/yarn_cluster_setup/install_yarn_master_slave.sh b/scripts/cluster/yarn_cluster_setup/install_yarn_master_slave.sh
index 1ed3c47..778cb22 100644
--- a/scripts/cluster/yarn_cluster_setup/install_yarn_master_slave.sh
+++ b/scripts/cluster/yarn_cluster_setup/install_yarn_master_slave.sh
@@ -7,8 +7,7 @@ HADOOP_SYMLINK=/usr/local/hadoop
 HADOOP_CONFIG_LOCATION=${HADOOP_HOME_INFILE}etc/hadoop/
 HADOOP_VERSION=2.9.2
 HADOOP_WEB_SOURCE=https://www-us.apache.org/dist/hadoop/common/
-ROOT_VARIABLES_ADDRESS=/root/.bashrc
-USER_VARIABLES_ADDRESS=~/.bashrc
+ROOT_VARIABLES_ADDRESS=/etc/profile
 
 # Install Pre-Reqs
 apt-get update -y
@@ -19,8 +18,8 @@ unlink ${HADOOP_SYMLINK} && rm -rf ${HADOOP_DATA}
 rm -rf /usr/local/hadoop-*/
 
 # Remove Global Variables
-sed -i /JAVA_HOME/d $ROOT_VARIABLES_ADDRESS && sed -i /HADOOP_HOME/d $ROOT_VARIABLES_ADDRESS
-sed -i /hadoop/d $ROOT_VARIABLES_ADDRESS && sed -i /default-java/d $ROOT_VARIABLES_ADDRESS
+sed -i /JAVA_HOME/d $ROOT_VARIABLES_ADDRESS && sed -i /default-java/d $ROOT_VARIABLES_ADDRESS
+sed -i /HADOOP_HOME/d $ROOT_VARIABLES_ADDRESS && sed -i /hadoop/d $ROOT_VARIABLES_ADDRESS
 
 # Make Hadoop Global Variables for User and Root
 echo "export JAVA_HOME="$JAVA_HOME_INFILE >> $ROOT_VARIABLES_ADDRESS
diff --git a/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh b/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh
index 643f1e8..53a8729 100644
--- a/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh
+++ b/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh
@@ -1,25 +1,25 @@
 #!/bin/bash
 
-echo "STARTING HADOOP SERVICES"
+source /etc/profile
 
-/usr/local/hadoop/sbin/start-dfs.sh
+echo "STARTING HADOOP SERVICES"
 
-/usr/local/hadoop/sbin/start-yarn.sh
+$HADOOP_HOME/sbin/start-dfs.sh
 
-/usr/local/hadoop/sbin/mr-jobhistory-daemon.sh start historyserver
+$HADOOP_HOME/sbin/start-yarn.sh
 
-# /usr/local/hadoop/bin/hdfs dfsadmin -safemode leave
+$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver
 
-# /usr/local/hadoop/bin/hdfs dfsadmin
+$HADOOP_HOME/bin/hdfs dfsadmin -safemode leave
 
 echo "STARTING SPARK SERVICES"
-/spark-3.0.0-SNAPSHOT-bin-SparkFHE/sbin/start-all.sh
+$SPARK_HOME/sbin/start-all.sh
 
 echo "RUN jps - Java Virtual Machine Process Status Tool"
 jps
 
 echo "Get basic filesystem information and statistics."
-/usr/local/hadoop/bin/hdfs dfsadmin -report
+$HADOOP_HOME/bin/hdfs dfsadmin -report
 
 echo "Yarn Cluster is Active"
 
diff --git a/scripts/cluster/yarn_cluster_setup/stop_yarn_cluster.sh b/scripts/cluster/yarn_cluster_setup/stop_yarn_cluster.sh
index d1cad90..6e00bb5 100644
--- a/scripts/cluster/yarn_cluster_setup/stop_yarn_cluster.sh
+++ b/scripts/cluster/yarn_cluster_setup/stop_yarn_cluster.sh
@@ -1,15 +1,17 @@
 #!/bin/bash
 
+source /etc/profile
+
 echo -e "STOPPING SPARK SERVICES"
 
-/spark-3.0.0-SNAPSHOT-bin-SparkFHE/sbin/stop-all.sh
+$SPARK_HOME/sbin/stop-all.sh
 
 echo -e "STOPPING HADOOP SERVICES"
 
-/usr/local/hadoop/sbin/mr-jobhistory-daemon.sh stop historyserver
+$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh stop historyserver
 
-/usr/local/hadoop/sbin/stop-dfs.sh
+$HADOOP_HOME/sbin/stop-dfs.sh
 
-/usr/local/hadoop/sbin/stop-yarn.sh
+$HADOOP_HOME/sbin/stop-yarn.sh
 
 echo "Hadoop Cluster is Inactive Now"
\ No newline at end of file
diff --git a/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi.sh b/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi.sh
index b6d16ba..4768ca2 100644
--- a/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi.sh
+++ b/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi.sh
@@ -1,14 +1,16 @@
 #!/bin/bash
 
+source /etc/profile
+
 echo "SPARK TEST"
-/spark-3.0.0-SNAPSHOT-bin-SparkFHE/bin/spark-submit --class org.apache.spark.examples.SparkPi  \
+$SPARK_HOME/bin/spark-submit --class org.apache.spark.examples.SparkPi  \
     --master yarn \
     --deploy-mode cluster \
     --num-executors 1 \
     --driver-memory 1g \
     --executor-memory 512m \
     --executor-cores 1 \
-    /spark-3.0.0-SNAPSHOT-bin-SparkFHE/examples/jars/spark-examples*.jar \
+    $SPARK_HOME/examples/jars/spark-examples*.jar \
     10
 
 echo "Stop Cluster If not in Use"
\ No newline at end of file

From a75c4364bfc022ac502b05a65471935a62b89cd9 Mon Sep 17 00:00:00 2001
From: pranavmrane <pmr5279@g.rit.edu>
Date: Sun, 3 Mar 2019 17:27:26 -0500
Subject: [PATCH 04/13] Security Improvements, Readme Updated with SSH
 Tunneling

---
 scripts/cluster/yarn_cluster_setup/README.md  | 67 +++++++++++++++----
 .../configs/hadoop/core-site.xml              | 14 ++--
 .../configs/hadoop/hadoop-policy.xml          |  8 ---
 .../configs/hadoop/hdfs-site.xml              |  8 ++-
 .../configs/hadoop/yarn-site-capacity.xml     | 10 ++-
 .../configs/hadoop/yarn-site-fair.xml         | 10 ++-
 .../configs/hadoop/yarn-site.xml              | 10 ++-
 .../yarn_cluster_setup/start_yarn_cluster.sh  | 12 ++--
 8 files changed, 96 insertions(+), 43 deletions(-)
 delete mode 100644 scripts/cluster/yarn_cluster_setup/configs/hadoop/hadoop-policy.xml

diff --git a/scripts/cluster/yarn_cluster_setup/README.md b/scripts/cluster/yarn_cluster_setup/README.md
index 4a180b5..ba9553f 100644
--- a/scripts/cluster/yarn_cluster_setup/README.md
+++ b/scripts/cluster/yarn_cluster_setup/README.md
@@ -27,30 +27,73 @@ Use the link generated after successful completion of cluster building to view t
 cd test_scripts
 sudo bash run_spark_test_job_pi.sh
 ```
-To view the output open http://<MASTER_NODE_IP_ADDRESS>:8088/, select the latest application, open the logs for that application, and select stdout. This should show the value for Pi calculated on the cluster.
 
-### Useful Links: 
+## Web Interfaces:
+
+The public IP addresses of all nodes have been closed to bolster security. To view the web Interface, some additional steps will have to be performed.
+
+### Setup SSH Tunneling for nodes
+
+Open a Terminal window on local machine and type the following:
+
+```
+ssh -4 -ND <PORT_NUMBER> <USERNAME@MASTER_NODE_ID.SERVER_AREA.cloudlab.us>
+```
+This step will bind the local machine's port to the IP address of Master Node.
+
+### Get Internal IP of Master Node
+
+On the master node run the following to get the internal IP of Master Node:
+```
+hostname -I | awk '{print $1}'
+```
+This same step can be done on any of the worker nodes.
+
+### Configure Browser to open link
+
+Open Mozilla Firefox browser in the local machine. 
+
+Click on three horizontal bars available on the top right hand side.
+
+Select Preferences and look for 'Network Settings' on the page.
+
+Once inside Network Settings, Select Manual Proxy Configuration.
+
+Select Socks_v5 and type in the Port Number chosen in the previous step for SOCKS Host. The IP of SOCKS Host does not need to be changed. Save the Settings.
+
+### List of Web Interfaces
+
 Other links can be generated by changing the port number.
 
-YARN Interface:
+#### YARN Interface:
+
+http://<MASTER_NODE_IP_ADDRESS_INTERNAL>:8088/
+
+The output of test job is available in the link above.  
+
+Select the latest application, open the logs for that application, and select stdout. This should show the value for Pi calculated on the cluster.
+
+#### Spark Interface:
+
+http://<MASTER_NODE_IP_ADDRESS_INTERNAL>:8080/
 
-http://<MASTER_NODE_IP_ADDRESS>:8088/
+#### Namenode Interface:
 
-Spark Interface:
+http://<MASTER_NODE_IP_ADDRESS_INTERNAL>:50070/
 
-http://<MASTER_NODE_IP_ADDRESS>:8080/
+#### JobMaster Interface:
 
-Namenode Interface:
+http://<MASTER_NODE_IP_ADDRESS_INTERNAL>:19888/
 
-http://<MASTER_NODE_IP_ADDRESS>:50070/
+#### Datanode Interface:
 
-Datanode Interface:
+http://<WORKER_NODE_IP_ADDRESS_INTERNAL>:50075/
 
-http://<WORKER_NODE_IP_ADDRESS>:50075/
+### Remove Browser Configuration
 
-JobMaster Interface:
+To use the Mozilla Firefox browser regularly, Select 'No Proxy' in Network Settings and Save.
 
-http://<MASTER_NODE_IP_ADDRESS>:19888/
+Stop the SSH tunneling by Closing the Terminal Window or Hit Ctrl + C in the terminal window.
 
 # Stop the Cluster
 ```
diff --git a/scripts/cluster/yarn_cluster_setup/configs/hadoop/core-site.xml b/scripts/cluster/yarn_cluster_setup/configs/hadoop/core-site.xml
index 3f4f6bd..710fa1e 100644
--- a/scripts/cluster/yarn_cluster_setup/configs/hadoop/core-site.xml
+++ b/scripts/cluster/yarn_cluster_setup/configs/hadoop/core-site.xml
@@ -8,12 +8,12 @@
     </property>
     <!-- default RPC IP，and use 0.0.0.0 to represent all ips-->
     <property>
-	<name>dfs.namenode.rpc-bind-host</name>
-	<value>0.0.0.0</value>
-    </property>
-    <property>
-    <name>hadoop.security.authorization</name>
-    <value>true</value>
-    <description>Service level authorization params.</description>
+	   <name>dfs.namenode.rpc-bind-host</name>
+	   <value>master</value>
     </property>
+    <!-- <property>
+        <name>hadoop.security.authorization</name>
+        <value>true</value>
+        <description>Service level authorization params.</description>
+    </property> -->
 </configuration>
\ No newline at end of file
diff --git a/scripts/cluster/yarn_cluster_setup/configs/hadoop/hadoop-policy.xml b/scripts/cluster/yarn_cluster_setup/configs/hadoop/hadoop-policy.xml
deleted file mode 100644
index e22e187..0000000
--- a/scripts/cluster/yarn_cluster_setup/configs/hadoop/hadoop-policy.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-<?xml version="1.0"?>
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-<configuration>
-	 <property>
-	     <name>security.job.client.protocol.acl</name>
-	     <value> iotx-PG0</value>
-	</property>
-</configuration>
\ No newline at end of file
diff --git a/scripts/cluster/yarn_cluster_setup/configs/hadoop/hdfs-site.xml b/scripts/cluster/yarn_cluster_setup/configs/hadoop/hdfs-site.xml
index 09fccf8..1c8b404 100644
--- a/scripts/cluster/yarn_cluster_setup/configs/hadoop/hdfs-site.xml
+++ b/scripts/cluster/yarn_cluster_setup/configs/hadoop/hdfs-site.xml
@@ -7,11 +7,11 @@
    </property>
     <property>
         <name>dfs.namenode.http-address</name>
-        <value>0.0.0.0:50070</value>
+        <value>master:50070</value>
     </property>
     <property>
         <name>dfs.namenode.secondary.http-address</name>
-        <value>0.0.0.0:50090</value>
+        <value>master:50090</value>
     </property>
     <property>
         <name>dfs.replication</name>
@@ -29,4 +29,8 @@
         <name>dfs.webhdfs.enabled</name>
         <value>false</value>
     </property>
+    <!-- <property>
+        <name>dfs.namenode.acls.enabled</name>
+        <value>true</value>
+    </property> -->
 </configuration>
\ No newline at end of file
diff --git a/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site-capacity.xml b/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site-capacity.xml
index b403f69..10056c7 100755
--- a/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site-capacity.xml
+++ b/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site-capacity.xml
@@ -27,11 +27,11 @@
   </property>
   <property>
      <name>yarn.resourcemanager.admin.address</name>
-     <value>0.0.0.0:8033</value>
+     <value>master:8033</value>
    </property>
    <property>
       <name>yarn.resourcemanager.webapp.address</name>
-      <value>0.0.0.0:8088</value>
+      <value>master:8088</value>
    </property>
    <property>
       <name>mapreduce.jobhistory.address</name>
@@ -39,6 +39,10 @@
    </property>
    <property>
       <name>mapreduce.jobhistory.webapp.address</name>
-      <value>0.0.0.0:19888</value>
+      <value>master:19888</value>
    </property>
+    <!-- <property>
+      <name>yarn.acl.enable</name>
+      <value>true</value>
+   </property> -->
 </configuration>
\ No newline at end of file
diff --git a/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site-fair.xml b/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site-fair.xml
index 2903140..dec55d6 100755
--- a/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site-fair.xml
+++ b/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site-fair.xml
@@ -27,11 +27,11 @@
   </property>
   <property>
      <name>yarn.resourcemanager.admin.address</name>
-     <value>0.0.0.0:8033</value>
+     <value>master:8033</value>
    </property>
    <property>
       <name>yarn.resourcemanager.webapp.address</name>
-      <value>0.0.0.0:8088</value>
+      <value>master:8088</value>
    </property>
    <property>
       <name>mapreduce.jobhistory.address</name>
@@ -39,8 +39,12 @@
    </property>
    <property>
       <name>mapreduce.jobhistory.webapp.address</name>
-      <value>0.0.0.0:19888</value>
+      <value>master:19888</value>
    </property>
+   <!--  <property>
+      <name>yarn.acl.enable</name>
+      <value>true</value>
+   </property> -->
   <property>
     <name>yarn.resourcemanager.scheduler.class</name>
     <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
diff --git a/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site.xml b/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site.xml
index b403f69..e046711 100755
--- a/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site.xml
+++ b/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site.xml
@@ -27,11 +27,11 @@
   </property>
   <property>
      <name>yarn.resourcemanager.admin.address</name>
-     <value>0.0.0.0:8033</value>
+     <value>master:8033</value>
    </property>
    <property>
       <name>yarn.resourcemanager.webapp.address</name>
-      <value>0.0.0.0:8088</value>
+      <value>master:8088</value>
    </property>
    <property>
       <name>mapreduce.jobhistory.address</name>
@@ -39,6 +39,10 @@
    </property>
    <property>
       <name>mapreduce.jobhistory.webapp.address</name>
-      <value>0.0.0.0:19888</value>
+      <value>master:19888</value>
    </property>
+   <!--  <property>
+      <name>yarn.acl.enable</name>
+      <value>true</value>
+   </property> -->
 </configuration>
\ No newline at end of file
diff --git a/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh b/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh
index 53a8729..10f67e3 100644
--- a/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh
+++ b/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh
@@ -23,9 +23,11 @@ $HADOOP_HOME/bin/hdfs dfsadmin -report
 
 echo "Yarn Cluster is Active"
 
-master_node_ip_address=`hostname -i`
+echo "Follow the instructions for Web Interfaces specified in the Readme page"
 
-echo "YARN Interface Available At: "$master_node_ip_address":8088/"
-echo "Spark Interface Available At: "$master_node_ip_address":8080/"
-echo "NameNode Interface Available At: "$master_node_ip_address":50070/"
-echo "Job Master Interface Available At: "$master_node_ip_address":19888/"
\ No newline at end of file
+master_node_ip_address_internal=`hostname -I | awk '{print $1}'`
+
+echo "YARN Interface Available At: "$master_node_ip_address_internal":8088/"
+echo "Spark Interface Available At: "$master_node_ip_address_internal":8080/"
+echo "NameNode Interface Available At: "$master_node_ip_address_internal":50070/"
+echo "Job Master Interface Available At: "$master_node_ip_address_internal":19888/"
\ No newline at end of file

From c23b18a161f6e20ac3d60bf96fa1444364217378 Mon Sep 17 00:00:00 2001
From: pranavmrane <pmr5279@g.rit.edu>
Date: Mon, 4 Mar 2019 01:31:08 -0500
Subject: [PATCH 05/13] Client Code Added, Code Refinements Pending

---
 scripts/cluster/yarn_cluster_setup/README.md  | 14 +++---
 .../install_yarn_cluster.sh                   | 45 +++++++------------
 .../install_yarn_master_slave.sh              |  4 +-
 .../test_scripts/run_spark_test_job_pi.sh     |  4 +-
 4 files changed, 24 insertions(+), 43 deletions(-)

diff --git a/scripts/cluster/yarn_cluster_setup/README.md b/scripts/cluster/yarn_cluster_setup/README.md
index ba9553f..23df2a7 100644
--- a/scripts/cluster/yarn_cluster_setup/README.md
+++ b/scripts/cluster/yarn_cluster_setup/README.md
@@ -1,32 +1,30 @@
 
 Setup an experiment on Cloudlab using the SparkFHE-Dist-Ubuntu18.04 image. Use the Wisconsin server.
 
-Please note that the scripts are designed to run on Master Node.
-
-# SSH into Master Node
+# SSH into Client Node
 SSH into the master node and navigate to the address specified below:
 ```
 cd /spark-3.0.0-SNAPSHOT-bin-SparkFHE/SparkFHE-Addon/scripts/cluster/yarn_cluster_setup
 ```
 
-# Install Hadoop and Configure Spark on all nodes through Master Node
+# Install Hadoop and Configure Spark on all nodes through Client Node
 Specify the hostnames of nodes as arguments.
 ```
 sudo bash install_yarn_cluster.sh master,worker1,worker2 ...
 ```
 
-# Start Yarn Spark Cluster and Run Spark Job on Master
+# Start Yarn Spark Cluster and Run Spark Job on Master(Step Automatic For Now)
 Cluster can only be started on master node after installation is complete on all nodes and configuration files for Yarn and Spark are placed in correct folders.
 ```
 sudo bash start_yarn_cluster.sh
 ```
 
-# Run Test Spark Job on Master
-Use the link generated after successful completion of cluster building to view the web interface for Yarn.
+# Run Test Spark Job on Master Through Client(Step Automatic For Now)
 ```
 cd test_scripts
 sudo bash run_spark_test_job_pi.sh
 ```
+If the job is successfulll completed, final status is 'SUCCEEDED'. The links generated can be used by following the guide specified below.
 
 ## Web Interfaces:
 
@@ -95,7 +93,7 @@ To use the Mozilla Firefox browser regularly, Select 'No Proxy' in Network Setti
 
 Stop the SSH tunneling by Closing the Terminal Window or Hit Ctrl + C in the terminal window.
 
-# Stop the Cluster
+# Stop the Cluster(Step Automatic For Now)
 ```
 cd ..
 sudo bash stop_yarn_job.sh
diff --git a/scripts/cluster/yarn_cluster_setup/install_yarn_cluster.sh b/scripts/cluster/yarn_cluster_setup/install_yarn_cluster.sh
index 90ac210..cfd6319 100644
--- a/scripts/cluster/yarn_cluster_setup/install_yarn_cluster.sh
+++ b/scripts/cluster/yarn_cluster_setup/install_yarn_cluster.sh
@@ -13,7 +13,7 @@ eval $(echo $cluster | awk '{split($0, array, ",");for(i in array)print "host_ar
 
 function checkSSH() {
     echo "Checking SSH connections"
-    for(( i=2;i<=${#host_array[@]};i++)) ; do
+    for(( i=1;i<=${#host_array[@]};i++)) ; do
         ssh ${host_array[i]} "hostname"
         if [ $? -eq 0 ]
         then
@@ -27,11 +27,11 @@ function checkSSH() {
 
 checkSSH
 
+current_directory=`pwd`
+
 # Make Master and Slaves File
 # Clear Content from Files
 
-current_directory=`pwd`
-
 rm -rf $current_directory/configs/master || true
 touch $current_directory/configs/master
 rm -rf $current_directory/configs/slaves || true
@@ -46,30 +46,17 @@ for(( i=2;i<=${#host_array[@]};i++)) ; do
     echo ${host_array[i]} >> $current_directory/configs/slaves
 done
 
-echo =========================================================
-echo "Setup Yarn Master"
-echo =========================================================
-echo "Installing Yarn-master"
-# Setup Environment at node
-bash install_yarn_master_slave.sh
-
-echo =========================================================
-echo "Setting up Yarn Slaves"
-echo =========================================================
-
-# Read addresses in slaves file
-cat $current_directory/configs/slaves | while read line
-
-do
-    if [ "$line" = "-" ]; then
-        echo "Skip $line"
-    else
-        # Move master and slaves file to worker nodes
-        scp $current_directory/configs/master root@$line:$current_directory/configs
-        scp $current_directory/configs/slaves root@$line:$current_directory/configs
-        echo "Installing on $line"
-        echo "Installing Yarn-slave"
-        ssh root@$line -n "cd ${current_directory} && sudo bash install_yarn_master_slave.sh"
-        echo "Finished config node $line"
-    fi
+# Move Master and Slaves File on all Nodes
+# Install Cluster on all Nodes
+for(( i=1;i<=${#host_array[@]};i++)) ; do
+    scp $current_directory/configs/master ${host_array[i]}:$current_directory/configs
+    scp $current_directory/configs/slaves ${host_array[i]}:$current_directory/configs
+    echo "Installing on "${host_array[i]}
+    ssh root@${host_array[i]} -n "cd ${current_directory} && sudo bash install_yarn_master_slave.sh"
+    echo "Finished configuration on "${host_array[i]}
 done
+
+# Trigger Scripts on Master Node
+ssh root@${host_array[$master_limit]} -n "cd ${current_directory} && sudo bash start_yarn_cluster.sh"
+ssh root@${host_array[$master_limit]} -n "cd ${current_directory}/test_scripts && sudo bash run_spark_test_job_pi.sh"
+ssh root@${host_array[$master_limit]} -n "cd ${current_directory} && sudo bash stop_yarn_cluster.sh"
\ No newline at end of file
diff --git a/scripts/cluster/yarn_cluster_setup/install_yarn_master_slave.sh b/scripts/cluster/yarn_cluster_setup/install_yarn_master_slave.sh
index 778cb22..4e9d017 100644
--- a/scripts/cluster/yarn_cluster_setup/install_yarn_master_slave.sh
+++ b/scripts/cluster/yarn_cluster_setup/install_yarn_master_slave.sh
@@ -81,6 +81,4 @@ cp $current_directory/configs/master $SPARK_CONFIG_LOCATION
 cp $current_directory/configs/slaves $SPARK_CONFIG_LOCATION
 
 # Format Namenode
-$HADOOP_HOME_INFILE/bin/hdfs namenode -format
-
-echo "Run the following on master node: sudo bash start_yarn_cluster.sh"
\ No newline at end of file
+$HADOOP_HOME_INFILE/bin/hdfs namenode -format
\ No newline at end of file
diff --git a/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi.sh b/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi.sh
index 4768ca2..2f37677 100644
--- a/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi.sh
+++ b/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi.sh
@@ -11,6 +11,4 @@ $SPARK_HOME/bin/spark-submit --class org.apache.spark.examples.SparkPi  \
     --executor-memory 512m \
     --executor-cores 1 \
     $SPARK_HOME/examples/jars/spark-examples*.jar \
-    10
-
-echo "Stop Cluster If not in Use"
\ No newline at end of file
+    10
\ No newline at end of file

From bce5932b364ce8fbda3f2299c193ca2ece649f5b Mon Sep 17 00:00:00 2001
From: pranavmrane <pmr5279@g.rit.edu>
Date: Tue, 5 Mar 2019 07:43:09 -0500
Subject: [PATCH 06/13] Client Integration completed, webHDFS enabled

---
 scripts/cluster/yarn_cluster_setup/README.md  | 30 +++++----
 .../configs/hadoop/hdfs-site.xml              |  4 +-
 .../install_yarn_cluster.sh                   | 66 +++++++++----------
 .../yarn_cluster_setup/start_yarn_cluster.sh  | 46 ++++++++-----
 .../yarn_cluster_setup/stop_yarn_cluster.sh   | 24 +++++--
 .../test_scripts/run_spark_test_job_pi.sh     | 14 ----
 .../run_spark_test_job_pi_remotely.sh         | 24 +++++++
 7 files changed, 119 insertions(+), 89 deletions(-)
 delete mode 100644 scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi.sh
 create mode 100644 scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi_remotely.sh

diff --git a/scripts/cluster/yarn_cluster_setup/README.md b/scripts/cluster/yarn_cluster_setup/README.md
index 23df2a7..9b02052 100644
--- a/scripts/cluster/yarn_cluster_setup/README.md
+++ b/scripts/cluster/yarn_cluster_setup/README.md
@@ -1,6 +1,8 @@
 
 Setup an experiment on Cloudlab using the SparkFHE-Dist-Ubuntu18.04 image. Use the Wisconsin server.
 
+Please note that all scripts are designed to run on Client node.
+
 # SSH into Client Node
 SSH into the master node and navigate to the address specified below:
 ```
@@ -8,21 +10,21 @@ cd /spark-3.0.0-SNAPSHOT-bin-SparkFHE/SparkFHE-Addon/scripts/cluster/yarn_cluste
 ```
 
 # Install Hadoop and Configure Spark on all nodes through Client Node
-Specify the hostnames of nodes as arguments.
+The hostnames of nodes in cluster will be picked up from etc/hosts
 ```
-sudo bash install_yarn_cluster.sh master,worker1,worker2 ...
+sudo bash install_yarn_cluster.sh
 ```
 
-# Start Yarn Spark Cluster and Run Spark Job on Master(Step Automatic For Now)
+# Start Yarn Spark Cluster
 Cluster can only be started on master node after installation is complete on all nodes and configuration files for Yarn and Spark are placed in correct folders.
 ```
 sudo bash start_yarn_cluster.sh
 ```
 
-# Run Test Spark Job on Master Through Client(Step Automatic For Now)
+# Run Test Spark Job on Master
 ```
 cd test_scripts
-sudo bash run_spark_test_job_pi.sh
+sudo bash run_spark_test_job_pi_remotely.sh
 ```
 If the job is successfulll completed, final status is 'SUCCEEDED'. The links generated can be used by following the guide specified below.
 
@@ -30,6 +32,14 @@ If the job is successfulll completed, final status is 'SUCCEEDED'. The links gen
 
 The public IP addresses of all nodes have been closed to bolster security. To view the web Interface, some additional steps will have to be performed.
 
+### Find Internal IP of Master Node
+
+On the client node run the following to get the internal IP of Master Node:
+```
+sudo ssh master "hostname -I | awk '{print \$1}'"
+```
+This same step can be done on any of the worker nodes.
+
 ### Setup SSH Tunneling for nodes
 
 Open a Terminal window on local machine and type the following:
@@ -39,14 +49,6 @@ ssh -4 -ND <PORT_NUMBER> <USERNAME@MASTER_NODE_ID.SERVER_AREA.cloudlab.us>
 ```
 This step will bind the local machine's port to the IP address of Master Node.
 
-### Get Internal IP of Master Node
-
-On the master node run the following to get the internal IP of Master Node:
-```
-hostname -I | awk '{print $1}'
-```
-This same step can be done on any of the worker nodes.
-
 ### Configure Browser to open link
 
 Open Mozilla Firefox browser in the local machine. 
@@ -61,7 +63,7 @@ Select Socks_v5 and type in the Port Number chosen in the previous step for SOCK
 
 ### List of Web Interfaces
 
-Other links can be generated by changing the port number.
+Different Web Interfaces can be accessed by changing the port number.
 
 #### YARN Interface:
 
diff --git a/scripts/cluster/yarn_cluster_setup/configs/hadoop/hdfs-site.xml b/scripts/cluster/yarn_cluster_setup/configs/hadoop/hdfs-site.xml
index 1c8b404..4cb3516 100644
--- a/scripts/cluster/yarn_cluster_setup/configs/hadoop/hdfs-site.xml
+++ b/scripts/cluster/yarn_cluster_setup/configs/hadoop/hdfs-site.xml
@@ -25,10 +25,10 @@
         <name>dfs.datanode.data.dir</name>
         <value>/data/hadoop/data</value>
     </property>
-    <property>
+<!--     <property>
         <name>dfs.webhdfs.enabled</name>
         <value>false</value>
-    </property>
+    </property> -->
     <!-- <property>
         <name>dfs.namenode.acls.enabled</name>
         <value>true</value>
diff --git a/scripts/cluster/yarn_cluster_setup/install_yarn_cluster.sh b/scripts/cluster/yarn_cluster_setup/install_yarn_cluster.sh
index cfd6319..faca289 100644
--- a/scripts/cluster/yarn_cluster_setup/install_yarn_cluster.sh
+++ b/scripts/cluster/yarn_cluster_setup/install_yarn_cluster.sh
@@ -1,19 +1,36 @@
 #!/bin/sh
 
-# Checking for no arguments passed
-if [[ $# -eq 0 ]] ; then
-    echo "Missing arguments."
-    echo "Usage: bash install_yarn_cluster.bash masterHostname1,workerHostname1,workerHostname2,..."
-    exit 0
-fi
+# Save Master Node Hostname As Global variable
+ROOT_VARIABLES_ADDRESS=/etc/profile
+HOSTS_ADDRESS=/etc/hosts
 
-# Split based on de-limiter as comma
-cluster=$1
-eval $(echo $cluster | awk '{split($0, array, ",");for(i in array)print "host_array["i"]="array[i]}')
+# Assume that master node and worker node contain the phrase master and worker in their names respectively
+master_name=master
+worker_name=worker
+name_index_location=4
 
+master_index_in_host_array=0
+
+current_directory=`pwd`
+
+# Make Master and Slaves File, Clear Older Files
+rm -rf $current_directory/configs/master || true
+touch $current_directory/configs/master
+rm -rf $current_directory/configs/slaves || true
+touch $current_directory/configs/slaves
+
+# Assume that etc/hosts is correctly populated
+# Read hostnames for master and worker nodes
+grep $master_name $HOSTS_ADDRESS | awk -v var="$name_index_location" '{print $var}' >> $current_directory/configs/master
+grep $worker_name $HOSTS_ADDRESS | awk -v var="$name_index_location" '{print $var}' >> $current_directory/configs/slaves
+
+# Save all hostnames in an array
+host_array=($(grep -E "$master_name|$worker_name" /etc/hosts | awk -v var="$name_index_location" '{print $var}'))
+
+# Ping each node from client
 function checkSSH() {
     echo "Checking SSH connections"
-    for(( i=1;i<=${#host_array[@]};i++)) ; do
+    for(( i=0;i<${#host_array[@]};i++)) ; do
         ssh ${host_array[i]} "hostname"
         if [ $? -eq 0 ]
         then
@@ -27,36 +44,17 @@ function checkSSH() {
 
 checkSSH
 
-current_directory=`pwd`
-
-# Make Master and Slaves File
-# Clear Content from Files
-
-rm -rf $current_directory/configs/master || true
-touch $current_directory/configs/master
-rm -rf $current_directory/configs/slaves || true
-touch $current_directory/configs/slaves
-
-# Save 1st argument in master file
-master_limit=1
-echo ${host_array[$master_limit]} >> $current_directory/configs/master
-
-# Save Remaining arguments in slaves file
-for(( i=2;i<=${#host_array[@]};i++)) ; do
-    echo ${host_array[i]} >> $current_directory/configs/slaves
-done
-
 # Move Master and Slaves File on all Nodes
 # Install Cluster on all Nodes
-for(( i=1;i<=${#host_array[@]};i++)) ; do
+for(( i=0;i<${#host_array[@]};i++)) ; do
     scp $current_directory/configs/master ${host_array[i]}:$current_directory/configs
     scp $current_directory/configs/slaves ${host_array[i]}:$current_directory/configs
     echo "Installing on "${host_array[i]}
     ssh root@${host_array[i]} -n "cd ${current_directory} && sudo bash install_yarn_master_slave.sh"
     echo "Finished configuration on "${host_array[i]}
+    echo ""
 done
 
-# Trigger Scripts on Master Node
-ssh root@${host_array[$master_limit]} -n "cd ${current_directory} && sudo bash start_yarn_cluster.sh"
-ssh root@${host_array[$master_limit]} -n "cd ${current_directory}/test_scripts && sudo bash run_spark_test_job_pi.sh"
-ssh root@${host_array[$master_limit]} -n "cd ${current_directory} && sudo bash stop_yarn_cluster.sh"
\ No newline at end of file
+# Save Master Node Address as Global Variable
+sed -i /MASTER_HOSTNAME/d $ROOT_VARIABLES_ADDRESS
+echo "export MASTER_HOSTNAME="${host_array[$master_index_in_host_array]} >> $ROOT_VARIABLES_ADDRESS
\ No newline at end of file
diff --git a/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh b/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh
index 10f67e3..65d182e 100644
--- a/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh
+++ b/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh
@@ -1,33 +1,43 @@
 #!/bin/bash
 
+current_hostname=`hostname`
 source /etc/profile
 
-echo "STARTING HADOOP SERVICES"
+if [[ $current_hostname == *"client"* ]]; then
+	echo "Commands running from correct node"
+	ssh $MASTER_HOSTNAME '
+		source /etc/profile
 
-$HADOOP_HOME/sbin/start-dfs.sh
+		echo "STARTING HADOOP SERVICES"
 
-$HADOOP_HOME/sbin/start-yarn.sh
+		$HADOOP_HOME/sbin/start-dfs.sh
 
-$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver
+		$HADOOP_HOME/sbin/start-yarn.sh
 
-$HADOOP_HOME/bin/hdfs dfsadmin -safemode leave
+		$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver
 
-echo "STARTING SPARK SERVICES"
-$SPARK_HOME/sbin/start-all.sh
+		$HADOOP_HOME/bin/hdfs dfsadmin -safemode leave
 
-echo "RUN jps - Java Virtual Machine Process Status Tool"
-jps
+		echo "STARTING SPARK SERVICES"
+		$SPARK_HOME/sbin/start-all.sh
 
-echo "Get basic filesystem information and statistics."
-$HADOOP_HOME/bin/hdfs dfsadmin -report
+		echo "RUN jps - Java Virtual Machine Process Status Tool"
+		jps
 
-echo "Yarn Cluster is Active"
+		echo "Get basic filesystem information and statistics."
+		$HADOOP_HOME/bin/hdfs dfsadmin -report
 
-echo "Follow the instructions for Web Interfaces specified in the Readme page"
+		echo "Yarn Cluster is Active"
 
-master_node_ip_address_internal=`hostname -I | awk '{print $1}'`
+		echo "Follow the instructions for Web Interfaces specified in the Readme page"
 
-echo "YARN Interface Available At: "$master_node_ip_address_internal":8088/"
-echo "Spark Interface Available At: "$master_node_ip_address_internal":8080/"
-echo "NameNode Interface Available At: "$master_node_ip_address_internal":50070/"
-echo "Job Master Interface Available At: "$master_node_ip_address_internal":19888/"
\ No newline at end of file
+		master_node_ip_address_internal=`hostname -I | sed "'"s/\s.*$//"'"`
+
+		echo "YARN Interface Available At: "$master_node_ip_address_internal":8088/"
+		echo "Spark Interface Available At: "$master_node_ip_address_internal":8080/"
+		echo "NameNode Interface Available At: "$master_node_ip_address_internal":50070/"
+		echo "Job Master Interface Available At: "$master_node_ip_address_internal":19888/"
+	'
+else
+	echo "This code can run ONLY on Client Node"
+fi
diff --git a/scripts/cluster/yarn_cluster_setup/stop_yarn_cluster.sh b/scripts/cluster/yarn_cluster_setup/stop_yarn_cluster.sh
index 6e00bb5..2e589a2 100644
--- a/scripts/cluster/yarn_cluster_setup/stop_yarn_cluster.sh
+++ b/scripts/cluster/yarn_cluster_setup/stop_yarn_cluster.sh
@@ -1,17 +1,27 @@
 #!/bin/bash
 
+current_hostname=`hostname`
 source /etc/profile
 
-echo -e "STOPPING SPARK SERVICES"
+if [[ $current_hostname == *"client"* ]]; then
+	echo "Commands running from correct node"
+	ssh $MASTER_HOSTNAME '
+		source /etc/profile
 
-$SPARK_HOME/sbin/stop-all.sh
+		echo -e "STOPPING SPARK SERVICES"
 
-echo -e "STOPPING HADOOP SERVICES"
+		$SPARK_HOME/sbin/stop-all.sh
 
-$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh stop historyserver
+		echo -e "STOPPING HADOOP SERVICES"
 
-$HADOOP_HOME/sbin/stop-dfs.sh
+		$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh stop historyserver
 
-$HADOOP_HOME/sbin/stop-yarn.sh
+		$HADOOP_HOME/sbin/stop-dfs.sh
 
-echo "Hadoop Cluster is Inactive Now"
\ No newline at end of file
+		$HADOOP_HOME/sbin/stop-yarn.sh
+
+		echo "Hadoop Cluster is Inactive Now"
+	'
+else
+	echo "This code can run ONLY on Client Node"
+fi
diff --git a/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi.sh b/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi.sh
deleted file mode 100644
index 2f37677..0000000
--- a/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/bash
-
-source /etc/profile
-
-echo "SPARK TEST"
-$SPARK_HOME/bin/spark-submit --class org.apache.spark.examples.SparkPi  \
-    --master yarn \
-    --deploy-mode cluster \
-    --num-executors 1 \
-    --driver-memory 1g \
-    --executor-memory 512m \
-    --executor-cores 1 \
-    $SPARK_HOME/examples/jars/spark-examples*.jar \
-    10
\ No newline at end of file
diff --git a/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi_remotely.sh b/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi_remotely.sh
new file mode 100644
index 0000000..435c331
--- /dev/null
+++ b/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi_remotely.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+current_hostname=`hostname`
+source /etc/profile
+
+if [[ $current_hostname == *"client"* ]]; then
+	echo "Commands running from correct node"
+	ssh $MASTER_HOSTNAME '
+		source /etc/profile
+
+		echo "SPARK TEST"
+		$SPARK_HOME/bin/spark-submit --class org.apache.spark.examples.SparkPi  \
+		    --master yarn \
+		    --deploy-mode cluster \
+		    --num-executors 1 \
+		    --driver-memory 1g \
+		    --executor-memory 512m \
+		    --executor-cores 1 \
+		    $SPARK_HOME/examples/jars/spark-examples*.jar \
+		    10
+	'
+else
+	echo "This code can run ONLY on Client Node"
+fi

From 5a4c67d30173ade86a8a65f0e03ea70841d4722f Mon Sep 17 00:00:00 2001
From: pranavmrane <pmr5279@g.rit.edu>
Date: Tue, 5 Mar 2019 16:21:43 -0500
Subject: [PATCH 07/13] Code Changes for making Image in Cloudlab

---
 .../yarn_cluster_setup/install_yarn_cluster.sh        | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/scripts/cluster/yarn_cluster_setup/install_yarn_cluster.sh b/scripts/cluster/yarn_cluster_setup/install_yarn_cluster.sh
index faca289..5cf5cd8 100644
--- a/scripts/cluster/yarn_cluster_setup/install_yarn_cluster.sh
+++ b/scripts/cluster/yarn_cluster_setup/install_yarn_cluster.sh
@@ -44,17 +44,20 @@ function checkSSH() {
 
 checkSSH
 
-# Move Master and Slaves File on all Nodes
+# Move Config Files and install_yarn_master_slave.sh
 # Install Cluster on all Nodes
 for(( i=0;i<${#host_array[@]};i++)) ; do
-    scp $current_directory/configs/master ${host_array[i]}:$current_directory/configs
-    scp $current_directory/configs/slaves ${host_array[i]}:$current_directory/configs
+    # Assume that yarn_cluster_setup folder is present in all nodes
+    rsync -a --rsync-path="sudo rsync" $current_directory/configs/ ${host_array[i]}:$current_directory/configs/
+    scp $current_directory/install_yarn_master_slave.sh ${host_array[i]}:$current_directory/
     echo "Installing on "${host_array[i]}
     ssh root@${host_array[i]} -n "cd ${current_directory} && sudo bash install_yarn_master_slave.sh"
     echo "Finished configuration on "${host_array[i]}
     echo ""
 done
 
+actual_master_name=`cat ${current_directory}/configs/master`
+
 # Save Master Node Address as Global Variable
 sed -i /MASTER_HOSTNAME/d $ROOT_VARIABLES_ADDRESS
-echo "export MASTER_HOSTNAME="${host_array[$master_index_in_host_array]} >> $ROOT_VARIABLES_ADDRESS
\ No newline at end of file
+echo "export MASTER_HOSTNAME="$actual_master_name >> $ROOT_VARIABLES_ADDRESS
\ No newline at end of file

From 77d6e26d7b0835752c39043eb68d37239e76252b Mon Sep 17 00:00:00 2001
From: pranavmrane <pmr5279@g.rit.edu>
Date: Tue, 12 Mar 2019 14:58:54 -0400
Subject: [PATCH 08/13] Bug Fixes for making Cloudlab Image

---
 scripts/cluster/yarn_cluster_setup/README.md  | 18 ++++++----
 .../install_yarn_cluster.sh                   | 33 +++++++++++--------
 .../yarn_cluster_setup/start_yarn_cluster.sh  |  6 ++--
 .../yarn_cluster_setup/stop_yarn_cluster.sh   |  6 ++--
 .../run_spark_test_job_pi_remotely.sh         |  6 ++--
 5 files changed, 40 insertions(+), 29 deletions(-)

diff --git a/scripts/cluster/yarn_cluster_setup/README.md b/scripts/cluster/yarn_cluster_setup/README.md
index 9b02052..18c4529 100644
--- a/scripts/cluster/yarn_cluster_setup/README.md
+++ b/scripts/cluster/yarn_cluster_setup/README.md
@@ -1,27 +1,33 @@
 
-Setup an experiment on Cloudlab using the SparkFHE-Dist-Ubuntu18.04 image. Use the Wisconsin server.
+Setup an experiment on Cloudlab using the SparkFHE-YARN-Client-Ubun18.04 image. Use the Wisconsin server.
 
 Please note that all scripts are designed to run on Client node.
 
-# SSH into Client Node
+# SSH into Master Node
 SSH into the master node and navigate to the address specified below:
 ```
 cd /spark-3.0.0-SNAPSHOT-bin-SparkFHE/SparkFHE-Addon/scripts/cluster/yarn_cluster_setup
 ```
 
-# Install Hadoop and Configure Spark on all nodes through Client Node
+# Install Hadoop and Configure Spark on all nodes through Master Node
 The hostnames of nodes in cluster will be picked up from etc/hosts
 ```
 sudo bash install_yarn_cluster.sh
 ```
 
-# Start Yarn Spark Cluster
+# SSH into Client Node
+SSH into the Client node and navigate to the address specified below:
+```
+cd /spark-3.0.0-SNAPSHOT-bin-SparkFHE/SparkFHE-Addon/scripts/cluster/yarn_cluster_setup
+```
+
+# Start Yarn Spark Cluster from Client Node
 Cluster can only be started on master node after installation is complete on all nodes and configuration files for Yarn and Spark are placed in correct folders.
 ```
 sudo bash start_yarn_cluster.sh
 ```
 
-# Run Test Spark Job on Master
+# Run Test Spark Job on Master Through Client
 ```
 cd test_scripts
 sudo bash run_spark_test_job_pi_remotely.sh
@@ -95,7 +101,7 @@ To use the Mozilla Firefox browser regularly, Select 'No Proxy' in Network Setti
 
 Stop the SSH tunneling by Closing the Terminal Window or Hit Ctrl + C in the terminal window.
 
-# Stop the Cluster(Step Automatic For Now)
+# Stop the Cluster Through the client Node
 ```
 cd ..
 sudo bash stop_yarn_job.sh
diff --git a/scripts/cluster/yarn_cluster_setup/install_yarn_cluster.sh b/scripts/cluster/yarn_cluster_setup/install_yarn_cluster.sh
index 5cf5cd8..7625dbc 100644
--- a/scripts/cluster/yarn_cluster_setup/install_yarn_cluster.sh
+++ b/scripts/cluster/yarn_cluster_setup/install_yarn_cluster.sh
@@ -18,20 +18,21 @@ rm -rf $current_directory/configs/master || true
 touch $current_directory/configs/master
 rm -rf $current_directory/configs/slaves || true
 touch $current_directory/configs/slaves
+rm -rf $current_directory/configs/hostnames || true
 
 # Assume that etc/hosts is correctly populated
 # Read hostnames for master and worker nodes
 grep $master_name $HOSTS_ADDRESS | awk -v var="$name_index_location" '{print $var}' >> $current_directory/configs/master
 grep $worker_name $HOSTS_ADDRESS | awk -v var="$name_index_location" '{print $var}' >> $current_directory/configs/slaves
+cat $current_directory/configs/master $current_directory/configs/slaves > $current_directory/configs/hostnames
 
-# Save all hostnames in an array
-host_array=($(grep -E "$master_name|$worker_name" /etc/hosts | awk -v var="$name_index_location" '{print $var}'))
+host_array=($(cat $current_directory/configs/hostnames |tr "\n" " "))
 
-# Ping each node from client
 function checkSSH() {
     echo "Checking SSH connections"
     for(( i=0;i<${#host_array[@]};i++)) ; do
-        ssh ${host_array[i]} "hostname"
+        echo ${host_array[i]}
+        ssh root@${host_array[i]} "hostname"
         if [ $? -eq 0 ]
         then
             echo -e "Can SSH to ${host_array[i]}"
@@ -44,20 +45,24 @@ function checkSSH() {
 
 checkSSH
 
+echo =========================================================
+echo "Setup Yarn Master"
+echo =========================================================
+echo "Installing Yarn-master"
+bash install_yarn_master_slave.sh
+
+
 # Move Config Files and install_yarn_master_slave.sh
-# Install Cluster on all Nodes
-for(( i=0;i<${#host_array[@]};i++)) ; do
-    # Assume that yarn_cluster_setup folder is present in all nodes
+# Install Cluster on all Worker Nodes
+echo =========================================================
+echo "Setting up Yarn Slaves"
+echo =========================================================
+for(( i=1;i<${#host_array[@]};i++)) ; do
+    # ssh root@${host_array[i]} -n "sudo rm -rf ${current_directory} && sudo mkdir -p ${current_directory}"
     rsync -a --rsync-path="sudo rsync" $current_directory/configs/ ${host_array[i]}:$current_directory/configs/
     scp $current_directory/install_yarn_master_slave.sh ${host_array[i]}:$current_directory/
     echo "Installing on "${host_array[i]}
     ssh root@${host_array[i]} -n "cd ${current_directory} && sudo bash install_yarn_master_slave.sh"
     echo "Finished configuration on "${host_array[i]}
     echo ""
-done
-
-actual_master_name=`cat ${current_directory}/configs/master`
-
-# Save Master Node Address as Global Variable
-sed -i /MASTER_HOSTNAME/d $ROOT_VARIABLES_ADDRESS
-echo "export MASTER_HOSTNAME="$actual_master_name >> $ROOT_VARIABLES_ADDRESS
\ No newline at end of file
+done
\ No newline at end of file
diff --git a/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh b/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh
index 65d182e..15161ac 100644
--- a/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh
+++ b/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh
@@ -1,9 +1,9 @@
 #!/bin/bash
 
-current_hostname=`hostname`
-source /etc/profile
+MASTER_HOSTNAME=master
+CLIENT_HOSTNAME=client
 
-if [[ $current_hostname == *"client"* ]]; then
+if [[ `hostname` == *${CLIENT_HOSTNAME}* ]]; then
 	echo "Commands running from correct node"
 	ssh $MASTER_HOSTNAME '
 		source /etc/profile
diff --git a/scripts/cluster/yarn_cluster_setup/stop_yarn_cluster.sh b/scripts/cluster/yarn_cluster_setup/stop_yarn_cluster.sh
index 2e589a2..c0fe0b4 100644
--- a/scripts/cluster/yarn_cluster_setup/stop_yarn_cluster.sh
+++ b/scripts/cluster/yarn_cluster_setup/stop_yarn_cluster.sh
@@ -1,9 +1,9 @@
 #!/bin/bash
 
-current_hostname=`hostname`
-source /etc/profile
+MASTER_HOSTNAME=master
+CLIENT_HOSTNAME=client
 
-if [[ $current_hostname == *"client"* ]]; then
+if [[ `hostname` == *${CLIENT_HOSTNAME}* ]]; then
 	echo "Commands running from correct node"
 	ssh $MASTER_HOSTNAME '
 		source /etc/profile
diff --git a/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi_remotely.sh b/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi_remotely.sh
index 435c331..b4f7753 100644
--- a/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi_remotely.sh
+++ b/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi_remotely.sh
@@ -1,9 +1,9 @@
 #!/bin/bash
 
-current_hostname=`hostname`
-source /etc/profile
+MASTER_HOSTNAME=master
+CLIENT_HOSTNAME=client
 
-if [[ $current_hostname == *"client"* ]]; then
+if [[ `hostname` == *${CLIENT_HOSTNAME}* ]]; then
 	echo "Commands running from correct node"
 	ssh $MASTER_HOSTNAME '
 		source /etc/profile

From 39825d7ffb1c1d93f5f43e85299544f891036259 Mon Sep 17 00:00:00 2001
From: pranavmrane <pmr5279@g.rit.edu>
Date: Sun, 7 Apr 2019 03:40:50 -0400
Subject: [PATCH 09/13] webHDFS available using curl-REST API

---
 .../cluster/yarn_cluster_setup/configs/hadoop/hdfs-site.xml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/cluster/yarn_cluster_setup/configs/hadoop/hdfs-site.xml b/scripts/cluster/yarn_cluster_setup/configs/hadoop/hdfs-site.xml
index 4cb3516..373204b 100644
--- a/scripts/cluster/yarn_cluster_setup/configs/hadoop/hdfs-site.xml
+++ b/scripts/cluster/yarn_cluster_setup/configs/hadoop/hdfs-site.xml
@@ -7,7 +7,7 @@
    </property>
     <property>
         <name>dfs.namenode.http-address</name>
-        <value>master:50070</value>
+        <value>0.0.0.0:50070</value>
     </property>
     <property>
         <name>dfs.namenode.secondary.http-address</name>
@@ -25,10 +25,10 @@
         <name>dfs.datanode.data.dir</name>
         <value>/data/hadoop/data</value>
     </property>
-<!--     <property>
+    <property>
         <name>dfs.webhdfs.enabled</name>
         <value>false</value>
-    </property> -->
+    </property>
     <!-- <property>
         <name>dfs.namenode.acls.enabled</name>
         <value>true</value>

From 3e4c57fc6bbf92fdc3110263ae7b49099ce7ffa4 Mon Sep 17 00:00:00 2001
From: Pranav Mohan Rane <pmr5279@g.rit.edu>
Date: Sun, 7 Apr 2019 04:09:47 -0400
Subject: [PATCH 10/13] Image Name Corrected in Readme file

---
 scripts/cluster/yarn_cluster_setup/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/cluster/yarn_cluster_setup/README.md b/scripts/cluster/yarn_cluster_setup/README.md
index 18c4529..ea280d5 100644
--- a/scripts/cluster/yarn_cluster_setup/README.md
+++ b/scripts/cluster/yarn_cluster_setup/README.md
@@ -1,5 +1,5 @@
 
-Setup an experiment on Cloudlab using the SparkFHE-YARN-Client-Ubun18.04 image. Use the Wisconsin server.
+Setup an experiment on Cloudlab using the SparkFHE-YARN-Client-Ub18-HDFS image. Use the Wisconsin server.
 
 Please note that all scripts are designed to run on Client node.
 
@@ -106,4 +106,4 @@ Stop the SSH tunneling by Closing the Terminal Window or Hit Ctrl + C in the ter
 cd ..
 sudo bash stop_yarn_job.sh
 ```
-After running this command, the web interfaces will not work.
\ No newline at end of file
+After running this command, the web interfaces will not work.

From 2be52aa0e47382f2c8c776a1a582ede94b1d7b01 Mon Sep 17 00:00:00 2001
From: pranavmrane <pmr5279@g.rit.edu>
Date: Sun, 7 Apr 2019 11:46:03 -0400
Subject: [PATCH 11/13] webHDFS disabled, image name updated

---
 scripts/cluster/yarn_cluster_setup/README.md                  | 4 ++--
 .../cluster/yarn_cluster_setup/configs/hadoop/hdfs-site.xml   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/cluster/yarn_cluster_setup/README.md b/scripts/cluster/yarn_cluster_setup/README.md
index ea280d5..0257752 100644
--- a/scripts/cluster/yarn_cluster_setup/README.md
+++ b/scripts/cluster/yarn_cluster_setup/README.md
@@ -1,5 +1,5 @@
 
-Setup an experiment on Cloudlab using the SparkFHE-YARN-Client-Ub18-HDFS image. Use the Wisconsin server.
+Setup an experiment on Cloudlab using the SparkFHE-YARN-Client-Ubun18.04 image. Use the Wisconsin server.
 
 Please note that all scripts are designed to run on Client node.
 
@@ -21,7 +21,7 @@ SSH into the Client node and navigate to the address specified below:
 cd /spark-3.0.0-SNAPSHOT-bin-SparkFHE/SparkFHE-Addon/scripts/cluster/yarn_cluster_setup
 ```
 
-# Start Yarn Spark Cluster from Client Node
+# Start Yarn Spark Cluster and HDFS from Client Node
 Cluster can only be started on master node after installation is complete on all nodes and configuration files for Yarn and Spark are placed in correct folders.
 ```
 sudo bash start_yarn_cluster.sh
diff --git a/scripts/cluster/yarn_cluster_setup/configs/hadoop/hdfs-site.xml b/scripts/cluster/yarn_cluster_setup/configs/hadoop/hdfs-site.xml
index 373204b..b52fb77 100644
--- a/scripts/cluster/yarn_cluster_setup/configs/hadoop/hdfs-site.xml
+++ b/scripts/cluster/yarn_cluster_setup/configs/hadoop/hdfs-site.xml
@@ -7,7 +7,7 @@
    </property>
     <property>
         <name>dfs.namenode.http-address</name>
-        <value>0.0.0.0:50070</value>
+        <value>master:50070</value>
     </property>
     <property>
         <name>dfs.namenode.secondary.http-address</name>
@@ -27,7 +27,7 @@
     </property>
     <property>
         <name>dfs.webhdfs.enabled</name>
-        <value>false</value>
+        <value>true</value>
     </property>
     <!-- <property>
         <name>dfs.namenode.acls.enabled</name>

From a38f88307737daceaa2ef56ed65e20b7868420d8 Mon Sep 17 00:00:00 2001
From: pranavmrane <pmr5279@g.rit.edu>
Date: Sat, 20 Apr 2019 22:08:46 -0400
Subject: [PATCH 12/13] HDFS api working from remote IPs

---
 scripts/cluster/yarn_cluster_setup/README.md  | 156 +++++++++++++-----
 .../configs/hadoop/core-site.xml              |  11 +-
 .../configs/hadoop/hdfs-site.xml              |  12 +-
 .../configs/hadoop/yarn-site-capacity.xml     |  20 +--
 .../configs/hadoop/yarn-site-fair.xml         |  20 +--
 .../configs/hadoop/yarn-site-regular.xml      |  44 +++++
 .../configs/hadoop/yarn-site.xml              |  20 +--
 .../yarn_cluster_setup/configs/hostnames      |   3 +
 .../cluster/yarn_cluster_setup/configs/master |   1 +
 .../cluster/yarn_cluster_setup/configs/slaves |   2 +
 .../configs/spark/spark-defaults.conf         |  28 ++++
 .../install_yarn_cluster.sh                   |  12 +-
 .../install_yarn_master_slave.sh              |  39 ++++-
 .../yarn_cluster_setup/start_yarn_cluster.sh  |  15 +-
 .../yarn_cluster_setup/stop_yarn_cluster.sh   |  16 +-
 .../run_spark_test_job_pi_remotely.sh         |  12 +-
 16 files changed, 294 insertions(+), 117 deletions(-)
 create mode 100755 scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site-regular.xml
 create mode 100644 scripts/cluster/yarn_cluster_setup/configs/hostnames
 create mode 100644 scripts/cluster/yarn_cluster_setup/configs/master
 create mode 100644 scripts/cluster/yarn_cluster_setup/configs/slaves
 create mode 100644 scripts/cluster/yarn_cluster_setup/configs/spark/spark-defaults.conf

diff --git a/scripts/cluster/yarn_cluster_setup/README.md b/scripts/cluster/yarn_cluster_setup/README.md
index 0257752..89cca83 100644
--- a/scripts/cluster/yarn_cluster_setup/README.md
+++ b/scripts/cluster/yarn_cluster_setup/README.md
@@ -1,7 +1,7 @@
 
-Setup an experiment on Cloudlab using the SparkFHE-YARN-Client-Ubun18.04 image. Use the Wisconsin server.
+Setup an experiment on Cloudlab using the SparkFHE-YARN-Client-Ub18-HDFS image. Use the Wisconsin server.
 
-Please note that all scripts are designed to run on Client node.
+The installation scripts are designed to run from master. The cluster start/stop scripts and example scripts are designed to work from client.
 
 # SSH into Master Node
 SSH into the master node and navigate to the address specified below:
@@ -10,7 +10,7 @@ cd /spark-3.0.0-SNAPSHOT-bin-SparkFHE/SparkFHE-Addon/scripts/cluster/yarn_cluste
 ```
 
 # Install Hadoop and Configure Spark on all nodes through Master Node
-The hostnames of nodes in cluster will be picked up from etc/hosts
+The hostnames of nodes in cluster will be picked up from etc/hosts. Read Appendix for further details about Hostnames.
 ```
 sudo bash install_yarn_cluster.sh
 ```
@@ -22,7 +22,7 @@ cd /spark-3.0.0-SNAPSHOT-bin-SparkFHE/SparkFHE-Addon/scripts/cluster/yarn_cluste
 ```
 
 # Start Yarn Spark Cluster and HDFS from Client Node
-Cluster can only be started on master node after installation is complete on all nodes and configuration files for Yarn and Spark are placed in correct folders.
+Cluster can only be started on master node after installation is complete on all nodes and configuration files for Yarn and Spark are placed in correct folders. Check Appendix for HDFS Commands.
 ```
 sudo bash start_yarn_cluster.sh
 ```
@@ -34,76 +34,142 @@ sudo bash run_spark_test_job_pi_remotely.sh
 ```
 If the job is successfulll completed, final status is 'SUCCEEDED'. The links generated can be used by following the guide specified below.
 
-## Web Interfaces:
+# Web Interfaces:
 
-The public IP addresses of all nodes have been closed to bolster security. To view the web Interface, some additional steps will have to be performed.
+Different Web Interfaces can be accessed by changing the port number. The list is specified directly below.
 
-### Find Internal IP of Master Node
+To view the web Interface, some additional steps will have to be performed. Check Appendix for SSH Tunneling Instructions.
 
-On the client node run the following to get the internal IP of Master Node:
-```
-sudo ssh master "hostname -I | awk '{print \$1}'"
-```
-This same step can be done on any of the worker nodes.
+The public IP addresses of some nodes have been closed to bolster security. Check Appendix for Security for individual aspects of cluster.
 
-### Setup SSH Tunneling for nodes
+## YARN Interface:
 
-Open a Terminal window on local machine and type the following:
+http://<MASTER_NODE_IP_ADDRESS_INTERNAL>:8088/
 
-```
-ssh -4 -ND <PORT_NUMBER> <USERNAME@MASTER_NODE_ID.SERVER_AREA.cloudlab.us>
-```
-This step will bind the local machine's port to the IP address of Master Node.
+The output of test job is available in the link above.  
 
-### Configure Browser to open link
+Select the latest spplications, open the logs for that application, and select stdout. This should show the value for Pi calculated on the cluster.
 
-Open Mozilla Firefox browser in the local machine. 
+## Spark Interface:
 
-Click on three horizontal bars available on the top right hand side.
+http://<MASTER_NODE_IP_ADDRESS_INTERNAL>:8080/
 
-Select Preferences and look for 'Network Settings' on the page.
+## Namenode Interface:
 
-Once inside Network Settings, Select Manual Proxy Configuration.
+http://<MASTER_NODE_IP_ADDRESS_INTERNAL>:50070/
 
-Select Socks_v5 and type in the Port Number chosen in the previous step for SOCKS Host. The IP of SOCKS Host does not need to be changed. Save the Settings.
+## JobMaster Interface:
 
-### List of Web Interfaces
+http://<MASTER_NODE_IP_ADDRESS_INTERNAL>:19888/
 
-Different Web Interfaces can be accessed by changing the port number.
+## Datanode Interface:
 
-#### YARN Interface:
+http://<WORKER_NODE_IP_ADDRESS_INTERNAL>:50075/
 
-http://<MASTER_NODE_IP_ADDRESS_INTERNAL>:8088/
+# Stop the Cluster Through the client Node
+```
+cd ..
+sudo bash stop_yarn_job.sh
+```
+After running this command, the web interfaces will not work.
 
-The output of test job is available in the link above.  
+# Appendix
 
-Select the latest application, open the logs for that application, and select stdout. This should show the value for Pi calculated on the cluster.
+## Hostnames
+The current process is designed to read worker names from etc/hosts. This might not be the case for 3rd party products Amazon EC2. Changes will have to be made to the step. The user would have to manually enter public IP addresses of master and worker nodes.
 
-#### Spark Interface:
+## HDFS Commands
+An important condition to for HDFS to work is the public IP address. Please make sure that every node in the cluster has a publicly accessible IP address. 
 
-http://<MASTER_NODE_IP_ADDRESS_INTERNAL>:8080/
+HDFS is turned on when start_yarn_cluster.sh is executed. The individual command to turn on HDFS is <HADOOP_HOME>/sbin/start-dfs.sh. To close use <HADOOP_HOME>/sbin/stop-dfs.sh. 
 
-#### Namenode Interface:
+### HDFS Commands on cluster nodes
+Once on, following commands can be run from any of nodes in the cluster. 
+```
+# List Folders in HDFS
+hdfs dfs -ls /
+# Make Folder
+hadoop fs -mkdir -p /<DIRECTORY_TO_BE_CREATED>
+# Confirm Folder Creation
+hdfs dfs -ls /
+# Move Local file into HDFS
+hadoop fs -put <LOCAL_FILE_ADDRESS>/<FILE_NAME> /<DIRECTORY_TO_BE_CREATED>/
+# View content of file created in HDFS
+hdfs dfs -cat /<DIRECTORY_TO_BE_CREATED>/<FILE_NAME>
+```
+Additional Information can be found [here](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/FileSystemShell.html)
 
-http://<MASTER_NODE_IP_ADDRESS_INTERNAL>:50070/
+### HDFS Commands from remote machines using webHDFS i.e. port 9000
+For the most part the HDFS commands here stay similar HDFS commands on cluster nodes. The address to access the HDFS needs to be changed in the following manner. The standard address for hdfs/hadoop can be /usr/local/hadoop/etc/hadoop
+```
+# List Folders in HDFS
+hdfs dfs -ls hdfs://<MASTER_NODE_IP_ADDRESS_PUBLIC>:9000/
+# Make Folder
+hadoop fs -mkdir -p hdfs://<MASTER_NODE_IP_ADDRESS_PUBLIC>:9000/<DIRECTORY_TO_BE_CREATED>
+# Confirm Folder Creation
+hdfs dfs -ls hdfs://<MASTER_NODE_IP_ADDRESS_PUBLIC>:9000/
+# Move Local file into HDFS
+hadoop fs -put <FILE_ADDRESS>/<FILE_NAME> hdfs://<MASTER_NODE_IP_ADDRESS_PUBLIC>:9000/<DIRECTORY_TO_BE_CREATED>/
+# View content of file created in HDFS
+hdfs dfs -cat hdfs://<MASTER_NODE_IP_ADDRESS_PUBLIC>:9000/<DIRECTORY_TO_BE_CREATED>/<FILE_NAME>
+```
+Additional Information can be found [here](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/FileSystemShell.html)
 
-#### JobMaster Interface:
+### HDFS Commands from remote machines using webHDFS i.e. port 50070
+To run commands from machines outside the cluster, REST API can be used. Here are a few examples.
+```
+# Make folder
+curl -X put "http://<MASTER_NODE_IP_ADDRESS_PUBLIC>:50070/webhdfs/v1/user/<DIRECTORY_TO_BE_CREATED>?user.name=root&op=MKDIRS"
+# Create an empty file
+curl -i -X put "http://<MASTER_NODE_IP_ADDRESS_PUBLIC>:50070/webhdfs/v1/user/<DIRECTORY_TO_BE_CREATED>/<FILE_TO_BE_UPLOADED>?user.name=root&op=CREATE"
+# The command above generates a link(specified in quotes) that can be used to upload the file. Use it to append <FILE_TO_BE_UPLOADED> onto HDFS.
+curl -i -T <FILE_TO_BE_UPLOADED> "http://<MASTER_NODE_IP_ADDRESS_PUBLIC>:50075/webhdfs/v1/user/<DIRECTORY_TO_BE_CREATED>/<FILE_TO_BE_UPLOADED>?op=CREATE&user.name=root&namenoderpcaddress=master:9000&createflag=&createparent=true&overwrite=false"
 
-http://<MASTER_NODE_IP_ADDRESS_INTERNAL>:19888/
+```
+Additional Information can be found [here](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/WebHDFS.html)
 
-#### Datanode Interface:
+## SSH Tunneling Instructions
 
-http://<WORKER_NODE_IP_ADDRESS_INTERNAL>:50075/
+### Find Internal IP of Master/Worker Node
 
-### Remove Browser Configuration
+On the client node run the following to get the internal IP of Master Node:
+```
+sudo ssh master "hostname -I | awk '{print \$1}'"
+sudo ssh worker1 "hostname -I | awk '{print \$1}'"
+```
 
-To use the Mozilla Firefox browser regularly, Select 'No Proxy' in Network Settings and Save.
+### Setup SSH Tunneling for nodes
 
-Stop the SSH tunneling by Closing the Terminal Window or Hit Ctrl + C in the terminal window.
+Open a Terminal window on local machine and type the following:
 
-# Stop the Cluster Through the client Node
 ```
-cd ..
-sudo bash stop_yarn_job.sh
+ssh -4 -ND <PORT_NUMBER> <USERNAME@MASTER_NODE_ID.SERVER_AREA.cloudlab.us>
 ```
-After running this command, the web interfaces will not work.
+This step will bind the local machine's port to the IP address of Master Node.
+
+### Configure Browser to open link
+
+* Open Mozilla Firefox browser in the local machine. 
+
+* Click on three horizontal bars available on the top right hand side.
+
+* Select Preferences and look for 'Network Settings' on the page.
+
+* Once inside Network Settings, Select Manual Proxy Configuration.
+
+* Select Socks_v5 and type in the Port Number chosen in the previous step for SOCKS Host. The IP of SOCKS Host does not need to be changed. Select OK.
+
+### Open Weblinks (address-format and port number specified above)
+
+### Stop SSH Tunneling
+
+* To use the Mozilla Firefox browser as usual, Select 'No Proxy' in Network Settings and Select OK.
+
+* Stop the SSH tunneling by Closing the Terminal Window or Hit Ctrl + C in the terminal window.
+
+
+## Security for individual aspects of cluster
+* YARN - Accessible only on internal IP
+* Remote HDFS(Port 9000) - Publicly accessible
+* webHDFS(Port 50070) - Publicly Accesible
+* Spark - Publicly accessible
diff --git a/scripts/cluster/yarn_cluster_setup/configs/hadoop/core-site.xml b/scripts/cluster/yarn_cluster_setup/configs/hadoop/core-site.xml
index 710fa1e..5690604 100644
--- a/scripts/cluster/yarn_cluster_setup/configs/hadoop/core-site.xml
+++ b/scripts/cluster/yarn_cluster_setup/configs/hadoop/core-site.xml
@@ -1,19 +1,12 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 <configuration>
-    <!-- Default HDFS ip and port -->
     <property>
          <name>fs.defaultFS</name>
-         <value>hdfs://master:9000</value>
+         <value>hdfs://master-public-ip:9000</value>
     </property>
-    <!-- default RPC IP，and use 0.0.0.0 to represent all ips-->
     <property>
 	   <name>dfs.namenode.rpc-bind-host</name>
-	   <value>master</value>
+	   <value>master-public-ip</value>
     </property>
-    <!-- <property>
-        <name>hadoop.security.authorization</name>
-        <value>true</value>
-        <description>Service level authorization params.</description>
-    </property> -->
 </configuration>
\ No newline at end of file
diff --git a/scripts/cluster/yarn_cluster_setup/configs/hadoop/hdfs-site.xml b/scripts/cluster/yarn_cluster_setup/configs/hadoop/hdfs-site.xml
index b52fb77..7fba571 100644
--- a/scripts/cluster/yarn_cluster_setup/configs/hadoop/hdfs-site.xml
+++ b/scripts/cluster/yarn_cluster_setup/configs/hadoop/hdfs-site.xml
@@ -7,11 +7,15 @@
    </property>
     <property>
         <name>dfs.namenode.http-address</name>
-        <value>master:50070</value>
+        <value>master-variable-ip:50070</value>
     </property>
     <property>
         <name>dfs.namenode.secondary.http-address</name>
-        <value>master:50090</value>
+        <value>master-public-ip:50090</value>
+    </property>
+    <property>
+        <name>dfs.namenode.datanode.registration.ip-hostname-check</name>
+        <value>false</value>
     </property>
     <property>
         <name>dfs.replication</name>
@@ -29,8 +33,4 @@
         <name>dfs.webhdfs.enabled</name>
         <value>true</value>
     </property>
-    <!-- <property>
-        <name>dfs.namenode.acls.enabled</name>
-        <value>true</value>
-    </property> -->
 </configuration>
\ No newline at end of file
diff --git a/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site-capacity.xml b/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site-capacity.xml
index 10056c7..4a3ef7d 100755
--- a/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site-capacity.xml
+++ b/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site-capacity.xml
@@ -3,7 +3,7 @@
 <configuration>
     <property>
         <name>yarn.resourcemanager.hostname</name>
-        <value>master</value>
+        <value>master-internal-ip</value>
     </property>
     <property>
         <name>yarn.nodemanager.aux-services</name>
@@ -15,34 +15,30 @@
     </property>
   <property>
     <name>yarn.resourcemanager.address</name>
-    <value>master:8032</value>
+    <value>master-internal-ip:8032</value>
   </property>
   <property>
      <name>yarn.resourcemanager.scheduler.address</name>
-     <value>master:8030</value>
+     <value>master-internal-ip:8030</value>
   </property>
   <property>
      <name>yarn.resourcemanager.resource-tracker.address</name>
-     <value>master:8031</value>
+     <value>master-internal-ip:8031</value>
   </property>
   <property>
      <name>yarn.resourcemanager.admin.address</name>
-     <value>master:8033</value>
+     <value>master-internal-ip:8033</value>
    </property>
    <property>
       <name>yarn.resourcemanager.webapp.address</name>
-      <value>master:8088</value>
+      <value>master-internal-ip:8088</value>
    </property>
    <property>
       <name>mapreduce.jobhistory.address</name>
-      <value>master:10020</value>
+      <value>master-internal-ip:10020</value>
    </property>
    <property>
       <name>mapreduce.jobhistory.webapp.address</name>
-      <value>master:19888</value>
+      <value>master-internal-ip:19888</value>
    </property>
-    <!-- <property>
-      <name>yarn.acl.enable</name>
-      <value>true</value>
-   </property> -->
 </configuration>
\ No newline at end of file
diff --git a/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site-fair.xml b/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site-fair.xml
index dec55d6..687725d 100755
--- a/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site-fair.xml
+++ b/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site-fair.xml
@@ -3,7 +3,7 @@
 <configuration>
     <property>
         <name>yarn.resourcemanager.hostname</name>
-        <value>master</value>
+        <value>master-internal-ip</value>
     </property>
     <property>
         <name>yarn.nodemanager.aux-services</name>
@@ -15,36 +15,32 @@
     </property>
   <property>
     <name>yarn.resourcemanager.address</name>
-    <value>master:8032</value>
+    <value>master-internal-ip:8032</value>
   </property>
   <property>
      <name>yarn.resourcemanager.scheduler.address</name>
-     <value>master:8030</value>
+     <value>master-internal-ip:8030</value>
   </property>
   <property>
      <name>yarn.resourcemanager.resource-tracker.address</name>
-     <value>master:8031</value>
+     <value>master-internal-ip:8031</value>
   </property>
   <property>
      <name>yarn.resourcemanager.admin.address</name>
-     <value>master:8033</value>
+     <value>master-internal-ip:8033</value>
    </property>
    <property>
       <name>yarn.resourcemanager.webapp.address</name>
-      <value>master:8088</value>
+      <value>master-internal-ip:8088</value>
    </property>
    <property>
       <name>mapreduce.jobhistory.address</name>
-      <value>master:10020</value>
+      <value>master-internal-ip:10020</value>
    </property>
    <property>
       <name>mapreduce.jobhistory.webapp.address</name>
-      <value>master:19888</value>
+      <value>master-internal-ip:19888</value>
    </property>
-   <!--  <property>
-      <name>yarn.acl.enable</name>
-      <value>true</value>
-   </property> -->
   <property>
     <name>yarn.resourcemanager.scheduler.class</name>
     <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
diff --git a/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site-regular.xml b/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site-regular.xml
new file mode 100755
index 0000000..4a3ef7d
--- /dev/null
+++ b/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site-regular.xml
@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<configuration>
+    <property>
+        <name>yarn.resourcemanager.hostname</name>
+        <value>master-internal-ip</value>
+    </property>
+    <property>
+        <name>yarn.nodemanager.aux-services</name>
+        <value>mapreduce_shuffle</value>
+    </property>
+    <property>
+         <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
+         <value>org.apache.hadoop.mapred.ShuffleHandler</value>
+    </property>
+  <property>
+    <name>yarn.resourcemanager.address</name>
+    <value>master-internal-ip:8032</value>
+  </property>
+  <property>
+     <name>yarn.resourcemanager.scheduler.address</name>
+     <value>master-internal-ip:8030</value>
+  </property>
+  <property>
+     <name>yarn.resourcemanager.resource-tracker.address</name>
+     <value>master-internal-ip:8031</value>
+  </property>
+  <property>
+     <name>yarn.resourcemanager.admin.address</name>
+     <value>master-internal-ip:8033</value>
+   </property>
+   <property>
+      <name>yarn.resourcemanager.webapp.address</name>
+      <value>master-internal-ip:8088</value>
+   </property>
+   <property>
+      <name>mapreduce.jobhistory.address</name>
+      <value>master-internal-ip:10020</value>
+   </property>
+   <property>
+      <name>mapreduce.jobhistory.webapp.address</name>
+      <value>master-internal-ip:19888</value>
+   </property>
+</configuration>
\ No newline at end of file
diff --git a/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site.xml b/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site.xml
index e046711..4a3ef7d 100755
--- a/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site.xml
+++ b/scripts/cluster/yarn_cluster_setup/configs/hadoop/yarn-site.xml
@@ -3,7 +3,7 @@
 <configuration>
     <property>
         <name>yarn.resourcemanager.hostname</name>
-        <value>master</value>
+        <value>master-internal-ip</value>
     </property>
     <property>
         <name>yarn.nodemanager.aux-services</name>
@@ -15,34 +15,30 @@
     </property>
   <property>
     <name>yarn.resourcemanager.address</name>
-    <value>master:8032</value>
+    <value>master-internal-ip:8032</value>
   </property>
   <property>
      <name>yarn.resourcemanager.scheduler.address</name>
-     <value>master:8030</value>
+     <value>master-internal-ip:8030</value>
   </property>
   <property>
      <name>yarn.resourcemanager.resource-tracker.address</name>
-     <value>master:8031</value>
+     <value>master-internal-ip:8031</value>
   </property>
   <property>
      <name>yarn.resourcemanager.admin.address</name>
-     <value>master:8033</value>
+     <value>master-internal-ip:8033</value>
    </property>
    <property>
       <name>yarn.resourcemanager.webapp.address</name>
-      <value>master:8088</value>
+      <value>master-internal-ip:8088</value>
    </property>
    <property>
       <name>mapreduce.jobhistory.address</name>
-      <value>master:10020</value>
+      <value>master-internal-ip:10020</value>
    </property>
    <property>
       <name>mapreduce.jobhistory.webapp.address</name>
-      <value>master:19888</value>
+      <value>master-internal-ip:19888</value>
    </property>
-   <!--  <property>
-      <name>yarn.acl.enable</name>
-      <value>true</value>
-   </property> -->
 </configuration>
\ No newline at end of file
diff --git a/scripts/cluster/yarn_cluster_setup/configs/hostnames b/scripts/cluster/yarn_cluster_setup/configs/hostnames
new file mode 100644
index 0000000..821616e
--- /dev/null
+++ b/scripts/cluster/yarn_cluster_setup/configs/hostnames
@@ -0,0 +1,3 @@
+master
+worker1
+worker2
diff --git a/scripts/cluster/yarn_cluster_setup/configs/master b/scripts/cluster/yarn_cluster_setup/configs/master
new file mode 100644
index 0000000..1f7391f
--- /dev/null
+++ b/scripts/cluster/yarn_cluster_setup/configs/master
@@ -0,0 +1 @@
+master
diff --git a/scripts/cluster/yarn_cluster_setup/configs/slaves b/scripts/cluster/yarn_cluster_setup/configs/slaves
new file mode 100644
index 0000000..6e273a2
--- /dev/null
+++ b/scripts/cluster/yarn_cluster_setup/configs/slaves
@@ -0,0 +1,2 @@
+worker1
+worker2
diff --git a/scripts/cluster/yarn_cluster_setup/configs/spark/spark-defaults.conf b/scripts/cluster/yarn_cluster_setup/configs/spark/spark-defaults.conf
new file mode 100644
index 0000000..11e7fbb
--- /dev/null
+++ b/scripts/cluster/yarn_cluster_setup/configs/spark/spark-defaults.conf
@@ -0,0 +1,28 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Default system properties included when running spark-submit.
+# This is useful for setting default environmental settings.
+
+# Example:
+# spark.master                     spark://127.0.0.1:7077
+spark.eventLog.enabled           true
+spark.eventLog.dir               file:///tmp/spark-events
+spark.history.fs.logDirectory               file:///tmp/spark-events
+# spark.serializer                 org.apache.spark.serializer.KryoSerializer
+# spark.driver.memory              5g
+# spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
diff --git a/scripts/cluster/yarn_cluster_setup/install_yarn_cluster.sh b/scripts/cluster/yarn_cluster_setup/install_yarn_cluster.sh
index 7625dbc..77e2049 100644
--- a/scripts/cluster/yarn_cluster_setup/install_yarn_cluster.sh
+++ b/scripts/cluster/yarn_cluster_setup/install_yarn_cluster.sh
@@ -32,7 +32,10 @@ function checkSSH() {
     echo "Checking SSH connections"
     for(( i=0;i<${#host_array[@]};i++)) ; do
         echo ${host_array[i]}
-        ssh root@${host_array[i]} "hostname"
+        PUBLIC_IP=`ssh root@${host_array[i]} "hostname -i"`
+        # Replace internal hostnames with public IP
+        sed -i "s/${host_array[i]}/${PUBLIC_IP}/g" "$current_directory/configs/master"
+        sed -i "s/${host_array[i]}/${PUBLIC_IP}/g" "$current_directory/configs/slaves"
         if [ $? -eq 0 ]
         then
             echo -e "Can SSH to ${host_array[i]}"
@@ -45,12 +48,13 @@ function checkSSH() {
 
 checkSSH
 
+MASTER_PUBLIC_IP=`hostname -i`
+
 echo =========================================================
 echo "Setup Yarn Master"
 echo =========================================================
 echo "Installing Yarn-master"
-bash install_yarn_master_slave.sh
-
+bash install_yarn_master_slave.sh $MASTER_PUBLIC_IP
 
 # Move Config Files and install_yarn_master_slave.sh
 # Install Cluster on all Worker Nodes
@@ -62,7 +66,7 @@ for(( i=1;i<${#host_array[@]};i++)) ; do
     rsync -a --rsync-path="sudo rsync" $current_directory/configs/ ${host_array[i]}:$current_directory/configs/
     scp $current_directory/install_yarn_master_slave.sh ${host_array[i]}:$current_directory/
     echo "Installing on "${host_array[i]}
-    ssh root@${host_array[i]} -n "cd ${current_directory} && sudo bash install_yarn_master_slave.sh"
+    ssh root@${host_array[i]} -n "cd ${current_directory} && sudo bash install_yarn_master_slave.sh ${MASTER_PUBLIC_IP}"
     echo "Finished configuration on "${host_array[i]}
     echo ""
 done
\ No newline at end of file
diff --git a/scripts/cluster/yarn_cluster_setup/install_yarn_master_slave.sh b/scripts/cluster/yarn_cluster_setup/install_yarn_master_slave.sh
index 4e9d017..29f51fb 100644
--- a/scripts/cluster/yarn_cluster_setup/install_yarn_master_slave.sh
+++ b/scripts/cluster/yarn_cluster_setup/install_yarn_master_slave.sh
@@ -1,5 +1,13 @@
 #!/bin/sh
 
+if [ $# -eq 0 ]
+  then
+    echo "No arguments supplied, installation on node terminated"
+    exit 255
+fi
+
+# Accept Public IP of master as a parameter
+MASTER_PUBLIC_IP=$1
 JAVA_HOME_INFILE=/usr/lib/jvm/default-java/
 HADOOP_DATA=/data/hadoop/
 HADOOP_HOME_INFILE=/usr/local/hadoop/
@@ -8,6 +16,12 @@ HADOOP_CONFIG_LOCATION=${HADOOP_HOME_INFILE}etc/hadoop/
 HADOOP_VERSION=2.9.2
 HADOOP_WEB_SOURCE=https://www-us.apache.org/dist/hadoop/common/
 ROOT_VARIABLES_ADDRESS=/etc/profile
+SPARK_HISTORY_DATA=/tmp/spark-events
+
+# These variable values will change as node names change
+MASTER_INTERNAL_NAME=master
+WORKER_INTERNAL_NAME=worker
+current_hostname=`hostname`
 
 # Install Pre-Reqs
 apt-get update -y
@@ -60,10 +74,27 @@ cp -a $current_directory/configs/hadoop/. $HADOOP_CONFIG_LOCATION
 cp $current_directory/configs/master $HADOOP_CONFIG_LOCATION
 cp $current_directory/configs/slaves $HADOOP_CONFIG_LOCATION
 
+# Editing Config Files
+# Making Uniform Changes applicable to all nodes
+sed -i "s/master-public-ip/${MASTER_PUBLIC_IP}/g" "$HADOOP_CONFIG_LOCATION/core-site.xml"
+sed -i "s/master-public-ip/${MASTER_PUBLIC_IP}/g" "$HADOOP_CONFIG_LOCATION/hdfs-site.xml"
+sed -i "s/master-internal-ip/${MASTER_INTERNAL_NAME}/g" "$HADOOP_CONFIG_LOCATION/yarn-site-capacity.xml"
+sed -i "s/master-internal-ip/${MASTER_INTERNAL_NAME}/g" "$HADOOP_CONFIG_LOCATION/yarn-site-fair.xml"
+sed -i "s/master-internal-ip/${MASTER_INTERNAL_NAME}/g" "$HADOOP_CONFIG_LOCATION/yarn-site-regular.xml"
+sed -i "s/master-internal-ip/${MASTER_INTERNAL_NAME}/g" "$HADOOP_CONFIG_LOCATION/yarn-site.xml"
+
+# Following changes are different on master and worker node
+if [[ $current_hostname == *$MASTER_INTERNAL_NAME* ]]; then
+	echo "Changing namenode IP on master"
+	sed -i "s/master-variable-ip/0.0.0.0/g" "$HADOOP_CONFIG_LOCATION/hdfs-site.xml"
+else
+	echo "Changing namenode IP on worker"
+	sed -i "s/master-variable-ip/${MASTER_PUBLIC_IP}/g" "$HADOOP_CONFIG_LOCATION/hdfs-site.xml"
+fi
+
 echo "Hadoop Installation Complete on this node"
 
 SPARK_HOME_INFILE=`cd ${current_directory}/../../../.. && pwd`
-
 SPARK_CONFIG_LOCATION=$SPARK_HOME_INFILE/conf/
 
 # Remove Spark Global Variables
@@ -74,9 +105,13 @@ echo "export SPARK_HOME="$SPARK_HOME_INFILE >> $ROOT_VARIABLES_ADDRESS
 echo "export PATH=$PATH:"$SPARK_HOME_INFILE"/bin/" >> $ROOT_VARIABLES_ADDRESS
 source $ROOT_VARIABLES_ADDRESS
 
+# Make Spark Directory for History Recording
+sudo rm -rf $SPARK_HISTORY_DATA
+mkdir -p $SPARK_HISTORY_DATA
+
 # Copy Spark Config Files
 cp -a $current_directory/configs/spark/. $SPARK_CONFIG_LOCATION
-cp -a $current_directory/configs/hadoop/. $SPARK_CONFIG_LOCATION
+cp -a $HADOOP_CONFIG_LOCATION. $SPARK_CONFIG_LOCATION
 cp $current_directory/configs/master $SPARK_CONFIG_LOCATION
 cp $current_directory/configs/slaves $SPARK_CONFIG_LOCATION
 
diff --git a/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh b/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh
index 15161ac..fde2de5 100644
--- a/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh
+++ b/scripts/cluster/yarn_cluster_setup/start_yarn_cluster.sh
@@ -1,11 +1,15 @@
 #!/bin/bash
 
-MASTER_HOSTNAME=master
-CLIENT_HOSTNAME=client
-
-if [[ `hostname` == *${CLIENT_HOSTNAME}* ]]; then
+# Master, Client Name depends on cluster config
+# If cluster config changes, the variable values should change
+client_name=client
+master_name=master
+MASTER_HOSTNAME=`ssh root@$master_name "hostname -i"`
+current_hostname=`hostname`
+
+if [[ $current_hostname == *$client_name* ]]; then
 	echo "Commands running from correct node"
-	ssh $MASTER_HOSTNAME '
+	ssh root@$MASTER_HOSTNAME '
 		source /etc/profile
 
 		echo "STARTING HADOOP SERVICES"
@@ -19,6 +23,7 @@ if [[ `hostname` == *${CLIENT_HOSTNAME}* ]]; then
 		$HADOOP_HOME/bin/hdfs dfsadmin -safemode leave
 
 		echo "STARTING SPARK SERVICES"
+		$SPARK_HOME/sbin/start-history-server.sh
 		$SPARK_HOME/sbin/start-all.sh
 
 		echo "RUN jps - Java Virtual Machine Process Status Tool"
diff --git a/scripts/cluster/yarn_cluster_setup/stop_yarn_cluster.sh b/scripts/cluster/yarn_cluster_setup/stop_yarn_cluster.sh
index c0fe0b4..921ef4c 100644
--- a/scripts/cluster/yarn_cluster_setup/stop_yarn_cluster.sh
+++ b/scripts/cluster/yarn_cluster_setup/stop_yarn_cluster.sh
@@ -1,15 +1,19 @@
 #!/bin/bash
 
-MASTER_HOSTNAME=master
-CLIENT_HOSTNAME=client
-
-if [[ `hostname` == *${CLIENT_HOSTNAME}* ]]; then
+# Master, Client Name depends on cluster config
+# If cluster config changes, the variable values should change
+client_name=client
+master_name=master
+MASTER_HOSTNAME=`ssh root@$master_name "hostname -i"`
+current_hostname=`hostname`
+
+if [[ $current_hostname == *$client_name* ]]; then
 	echo "Commands running from correct node"
-	ssh $MASTER_HOSTNAME '
+	ssh root@$MASTER_HOSTNAME '
 		source /etc/profile
 
 		echo -e "STOPPING SPARK SERVICES"
-
+		$SPARK_HOME/sbin/stop-history-server.sh
 		$SPARK_HOME/sbin/stop-all.sh
 
 		echo -e "STOPPING HADOOP SERVICES"
diff --git a/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi_remotely.sh b/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi_remotely.sh
index b4f7753..33eeb24 100644
--- a/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi_remotely.sh
+++ b/scripts/cluster/yarn_cluster_setup/test_scripts/run_spark_test_job_pi_remotely.sh
@@ -1,11 +1,15 @@
 #!/bin/bash
 
-MASTER_HOSTNAME=master
-CLIENT_HOSTNAME=client
+# Master, Client Name depends on cluster config
+# If cluster config changes, the variable values should change
+client_name=client
+master_name=master
+MASTER_HOSTNAME=`ssh root@$master_name "hostname -i"`
+current_hostname=`hostname`
 
-if [[ `hostname` == *${CLIENT_HOSTNAME}* ]]; then
+if [[ $current_hostname == *"client"* ]]; then
 	echo "Commands running from correct node"
-	ssh $MASTER_HOSTNAME '
+	ssh root@$MASTER_HOSTNAME '
 		source /etc/profile
 
 		echo "SPARK TEST"

From b33c89b611cbc8b17364a34e34f6ac501240f0d2 Mon Sep 17 00:00:00 2001
From: pranavmrane <pmr5279@g.rit.edu>
Date: Sun, 21 Apr 2019 01:10:30 -0400
Subject: [PATCH 13/13] Host Verification Bug Solved by Pinging all Nodes

---
 .../install_yarn_cluster.sh                   | 23 ++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/scripts/cluster/yarn_cluster_setup/install_yarn_cluster.sh b/scripts/cluster/yarn_cluster_setup/install_yarn_cluster.sh
index 77e2049..4504523 100644
--- a/scripts/cluster/yarn_cluster_setup/install_yarn_cluster.sh
+++ b/scripts/cluster/yarn_cluster_setup/install_yarn_cluster.sh
@@ -69,4 +69,25 @@ for(( i=1;i<${#host_array[@]};i++)) ; do
     ssh root@${host_array[i]} -n "cd ${current_directory} && sudo bash install_yarn_master_slave.sh ${MASTER_PUBLIC_IP}"
     echo "Finished configuration on "${host_array[i]}
     echo ""
-done
\ No newline at end of file
+done
+
+
+echo "Starting Cluster to Ping all Nodes"
+
+source /etc/profile
+
+$HADOOP_HOME/sbin/start-dfs.sh
+$HADOOP_HOME/sbin/start-yarn.sh
+$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver
+$HADOOP_HOME/bin/hdfs dfsadmin -safemode leave
+$SPARK_HOME/sbin/start-history-server.sh
+$SPARK_HOME/sbin/start-all.sh
+jps
+$HADOOP_HOME/bin/hdfs dfsadmin -report
+
+echo "Stopping Cluster"
+$SPARK_HOME/sbin/stop-history-server.sh
+$SPARK_HOME/sbin/stop-all.sh
+$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh stop historyserver
+$HADOOP_HOME/sbin/stop-dfs.sh
+$HADOOP_HOME/sbin/stop-yarn.sh
\ No newline at end of file