eubr-atmosphere · giovannidispoto · Oct 15, 2021 · Oct 15, 2021 · Oct 16, 2021 · Oct 16, 2021
diff --git a/README.md b/README.md
@@ -5,11 +5,11 @@ Framework composed of a collection of python script to run, profile and collect
 The framework can be used with different machines and different applicaitons.
 The target architecture already supported by this version are:
 - inhouse server
-- Microsoft Azure VMs
+- local machine
 
 The application already supported by this version are:
 - CNN training with pytorch
-- CNN training with tensorflow
+- CNN and RNN training with tensorflow
 
 The framework can be configured via .ini configuration file.
 An example of configuration file is available in configurations/default.ini.

diff --git a/__pycache__/utility.cpython-36.pyc b/__pycache__/utility.cpython-36.pyc
diff --git a/__pycache__/utility.cpython-37.pyc b/__pycache__/utility.cpython-37.pyc
diff --git a/__pycache__/utility.cpython-39.pyc b/__pycache__/utility.cpython-39.pyc
diff --git a/apps/__pycache__/__init__.cpython-37.pyc b/apps/__pycache__/__init__.cpython-37.pyc
diff --git a/apps/__pycache__/tf.cpython-36.pyc b/apps/__pycache__/tf.cpython-36.pyc
diff --git a/apps/__pycache__/tf.cpython-37.pyc b/apps/__pycache__/tf.cpython-37.pyc
diff --git a/apps/__pycache__/tf.cpython-39.pyc b/apps/__pycache__/tf.cpython-39.pyc
diff --git a/apps/app.py b/apps/app.py
@@ -0,0 +1,75 @@
+"""
+Copyright 2019 Marco Lattuada
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import logging
+import os
+import sys
+
+import xmltodict
+
+def load_xml_configuration(parameters, application, root_tag):
+    """
+    Create a configuration dictionary combining comma-seperated list of parameters and xml file"
+
+    Parameters
+    ----------
+    parameters: str
+        A comma-separated list of parameters in the form parameter=value
+
+    application: str
+        The name of the application
+
+    root_tag: str
+        The name of the root tag of the xml file.
+
+    Return
+    ------
+    dict of str: dict of str: str
+        A dictionary containing the combination of input parameters and default configuration file
+    """
+    configuration_base = "default"
+    #First look for configuration
+    for parameter in parameters.split(","):
+        if len(parameter.split("=")) != 2:
+            logging.error("parameters must be a , seperated list of <parameter>=<value>: %s", parameter)
+            sys.exit(1)
+        if parameter.split("=")[0] == "configuration":
+            configuration_base = parameter.split("=")[1]
+            break
+
+    utility = __import__("utility")
+    root_project = utility.get_project_root()
+
+    #The absolute path of the configuration directory
+    confs_dir = os.path.join(root_project, "apps", application, "confs")
+    logging.info("conf directory is %s", confs_dir)
+
+    #Check the confs_dir exists
+    if not os.path.exists(confs_dir):
+        logging.error("Conf directory %s does not exist", confs_dir)
+        sys.exit(1)
+
+    #Check if xml file of the conf exist
+    xml_file_name = os.path.join(confs_dir, configuration_base + ".xml")
+    if not os.path.exists(xml_file_name):
+        logging.error("XML file %s not found", xml_file_name)
+        sys.exit(1)
+
+
+    #Load XML file
+    with open(xml_file_name) as xml_file:
+        doc = xmltodict.parse(xml_file.read(), force_list={'input_class'})
+    return doc[root_tag]
diff --git a/apps/hostname.py b/apps/hostname.py
@@ -18,9 +18,39 @@
 import socket
 
 def compute_configuration_name(cl_parameters):
+    """
+    Compute the configuration name. Since hostname does not take any parameter, there can be only one configuration named "no_parameters"
+
+    Paramters
+    ---------
+    cl_parameters: str
+        Added for generality with respect to other applications.
+
+    Return
+    ------
+    str
+        return "no_parameters"
+    """
     return "no_parameters"
 
 def collect_data(repetition_path, gpu_type, gpu_number, debug):
+    """
+    Add to csv (and creates it if it does not exist) data about the experiment whose output was saved in repetition_path
+
+    Parameters
+    ----------
+    repetition_path: str
+        The path containing the output of the currently analyzed experiment
+
+    gpu_type: str
+        The type of the GPU
+
+    gpu_number: str
+        The number of the GPUs of the VM
+
+    debug: boolean
+        True if debug messages have to be printed
+    """
     csv_file_name = "hostname.csv"
     if os.path.exists(csv_file_name):
         csv_file = open(csv_file_name, "a")

diff --git a/apps/pytorch.py b/apps/pytorch.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/env python3
 """
 Copyright 2018 Marco Lattuada
 
@@ -29,33 +29,49 @@
 
 import dicttoxml
 
-def compute_parameters(cl_parameters):
-    configuration_base = "default"
-    #First look for configuration
-    for cl_parameter in cl_parameters.split(","):
-        if len(cl_parameter.split("=")) != 2:
-            logging.error("parameters must be a , seperated list of <parameter>=<value>: %s", cl_parameter)
-            sys.exit(1)
-        if cl_parameter.split("=")[0] == "configuration":
-            configuration_base = cl_parameter.split("=")[1]
-            break
-
-    #Load configuration
-    parameters = load_xml_configuration(configuration_base + ".xml")["pytorch_configuration"]
+import app
 
+def compute_parameters(cl_parameters):
+    """
+    Combine the parameters of the single experiment with default values
+
+    Parameters
+    ----------
+    cl_parameter: str
+        A comma-separated list of parameter=value characterizing the experiment
+
+    Return
+    ------
+    dict of str: str
+        The dictionary containing the values of all the parameters
+    """
+    parameters = app.load_xml_configuration(cl_parameters, "pytorch", "pytorch_configuration")
     #Overwrite parameters
     for cl_parameter in cl_parameters.split(","):
         tokens = cl_parameter.split("=")
         if len(tokens) != 2:
             logging.error("parameters must be a , seperated list of <parameter>=<value>")
             sys.exit(1)
-        if not tokens[0] in parameters and tokens[0] != "configuration" and tokens[0] != "gpus_number" and tokens[0] != "n" and tokens[0] != "gpus_instances":
-            logging.error("parameter %s is not present in the source configuration", tokens[0])
-            sys.exit(1)
+        #if not tokens[0] in parameters and tokens[0] != "configuration" and tokens[0] != "gpus_number" and tokens[0] != "n" and tokens[0] != "gpus_instances":
+        #    logging.error("parameter %s is not present in the source configuration", tokens[0])
+        #    sys.exit(1)
         parameters[tokens[0]] = tokens[1]
     return parameters
 
 def compute_configuration_name(cl_parameters):
+    """
+    Compute the configuration name on the basis of the values of the experiment parameters
+
+    Parameters
+    ---------
+    cl_parameters: str
+        A comma separated list of parameter=value
+
+    Return
+    ------
+    str
+        The configuration name
+    """
     parameters = compute_parameters(cl_parameters)
     if "gpus_number" in parameters:
         gpus_number = "_gpus_number_" + parameters["gpus_number"]
@@ -75,36 +91,24 @@ def compute_configuration_name(cl_parameters):
     configuration_name = network_type + "_cl_" + parameters["num_classes"] + "_im_" + parameters["images_per_class"] + "_ep_" + parameters["epochs_number"] + "_bs_" + parameters["batch_size"] + "_mo_" + parameters["momentum"] + "_j_" + parameters["j"] + gpus_number + only_load
     return configuration_name
 
-def load_xml_configuration(xml_configuration_file):
-    #The absolute path of the current file
-    abs_script = os.path.realpath(__file__)
-
-    #The root directory of the script
-    abs_root = os.path.dirname(abs_script)
-
-    #The absolute path of the configuration directory
-    confs_dir = os.path.join(abs_root, "pytorch", "confs")
-    logging.info("conf directory is %s", confs_dir)
-
-    #Check the confs_dir exists
-    if not os.path.exists(confs_dir):
-        logging.error("Conf directory %s does not exist", confs_dir)
-        sys.exit(1)
-
-    #Check if xml file of the conf exist
-    xml_file_name = os.path.join(confs_dir, xml_configuration_file)
-    if not os.path.exists(xml_file_name):
-        logging.error("XML file %s not found", xml_file_name)
-        sys.exit(1)
+def collect_data(repetition_path, gpu_type, gpu_number, debug):
+    """
+    Add to csv (and creates it if it does not exist) data about the experiment whose output was saved in repetition_path
 
+    Parameters
+    ----------
+    repetition_path: str
+        The path containing the output of the currently analyzed experiment
 
-    #Load XML file
-    with open(xml_file_name) as xml_file:
-        doc = xmltodict.parse(xml_file.read(), force_list={'input_class'})
-    return doc
+    gpu_type: str
+        The type of the GPU
 
+    gpu_number: str
+        The number of the GPUs of the VM
 
-def collect_data(repetition_path, gpu_type, gpu_number, debug):
+    debug: boolean
+        True if debug messages have to be printed
+    """
     try:
         #The iterations fractions
         iteration_fractions = [0.25, 0.50, 0.75]
@@ -319,10 +323,10 @@ def collect_data(repetition_path, gpu_type, gpu_number, debug):
                 sys.exit(1)
 
         #Preparing csv file with cpu and gpu utilization
-        profile_cpu_output_filename = os.path.join(repetition_path, "profile_cpu_output")
-        profile_gpu_output_filename = os.path.join(repetition_path, "profile_gpu_output")
-        profile_file_name_cpu = os.path.join("pytorch_csvs", "profile_cpu_" + gpu_type.replace(" ", "-") + "_" + str(gpu_number) + "_" + configuration_path + "_" + experiment_path + "_" + str(starting_timestamp) + ".csv")
-        profile_file_name_sum_cpu = os.path.join("pytorch_csvs", "profile_sum_cpu_" + gpu_type.replace(" ", "-") + "_" + str(gpu_number) + "_" + configuration_path + "_" + experiment_path + "_" + str(starting_timestamp) + ".csv")
+        profile_cpu_output_filename = os.path.join(repetition_path, "profile_CPU_output")
+        profile_gpu_output_filename = os.path.join(repetition_path, "profile_GPU_output")
+        profile_file_name_cpu = os.path.join("pytorch_csvs", "profile_CPU_" + gpu_type.replace(" ", "-") + "_" + str(gpu_number) + "_" + configuration_path + "_" + experiment_path + "_" + str(starting_timestamp) + ".csv")
+        profile_file_name_sum_cpu = os.path.join("pytorch_csvs", "profile_sum_CPU_" + gpu_type.replace(" ", "-") + "_" + str(gpu_number) + "_" + configuration_path + "_" + experiment_path + "_" + str(starting_timestamp) + ".csv")
         if os.path.exists(profile_cpu_output_filename) and (not os.path.exists(profile_file_name_cpu) or not os.path.exists(profile_file_name_sum_cpu)):
             #The collected data
             cpu_data = {}
@@ -333,10 +337,10 @@ def collect_data(repetition_path, gpu_type, gpu_number, debug):
             #Analyzing profile_cpu_output
             for line in open(profile_cpu_output_filename, "r"):
                 #New entry
-                if line.find("%cpu %MEM ARGS") != -1:
+                if line.find("%CPU %MEM ARGS") != -1:
                     previous_timestamp = current_timestamp
                     #Old pattern
-                    if line.startswith("%cpu %MEM ARGS"):
+                    if line.startswith("%CPU %MEM ARGS"):
                         split = line.split()
                         if len(split) == 5:
                             read_timestamp = split[3] + " " + split[4][0:7]
@@ -350,7 +354,7 @@ def collect_data(repetition_path, gpu_type, gpu_number, debug):
                             current_timestamp = str(int(current_timestamp_datetime.timestamp()))
                     #New pattern
                     else:
-                        split = line.replace("\\n%cpu %MEM ARGS", "").split()
+                        split = line.replace("\\n%CPU %MEM ARGS", "").split()
                         current_timestamp_readable = split[4] + " " + split[5]
                         current_timestamp = split[1]
                     logging.debug("Found timestamp %s (%s(", current_timestamp, current_timestamp_readable)
@@ -401,8 +405,8 @@ def collect_data(repetition_path, gpu_type, gpu_number, debug):
                 profile_sum_file.write(str(cpu_sum_data["cpu" + str(cpu_number)]))
             profile_sum_file.write("\n")
             profile_sum_file.close()
-        profile_file_name_gpu = os.path.join("pytorch_csvs", "profile_gpu_" + gpu_type.replace(" ", "-") + "_" + str(gpu_number) + "_" + configuration_path + "_" + experiment_path + "_" + str(starting_timestamp) + ".csv")
-        profile_file_name_sum_gpu = os.path.join("pytorch_csvs", "profile_sum_gpu_" + gpu_type.replace(" ", "-") + "_" + str(gpu_number) + "_" + configuration_path + "_" + experiment_path + "_" + str(starting_timestamp) + ".csv")
+        profile_file_name_gpu = os.path.join("pytorch_csvs", "profile_GPU_" + gpu_type.replace(" ", "-") + "_" + str(gpu_number) + "_" + configuration_path + "_" + experiment_path + "_" + str(starting_timestamp) + ".csv")
+        profile_file_name_sum_gpu = os.path.join("pytorch_csvs", "profile_sum_GPU_" + gpu_type.replace(" ", "-") + "_" + str(gpu_number) + "_" + configuration_path + "_" + experiment_path + "_" + str(starting_timestamp) + ".csv")
 
         if os.path.exists(profile_gpu_output_filename) and (not os.path.exists(profile_file_name_gpu) or not os.path.exists(profile_file_name_sum_gpu)):
             #The collected data
@@ -544,10 +548,10 @@ def collect_data(repetition_path, gpu_type, gpu_number, debug):
             only_load = "0"
 
         #Retrieving machine information
-        #Add host_scripts to the directories for python packages search
+        #Add host_scripts to the directories for python modules search
         host_scripts_path = os.path.join(abs_root, "..", "host_scripts")
         sys.path.append(host_scripts_path)
-        collect_data_package = __import__("collect_data")
+        collect_data_module = __import__("collect_data")
 
         mac_address = ""
         system_uuid = ""
@@ -562,7 +566,7 @@ def collect_data(repetition_path, gpu_type, gpu_number, debug):
         if xml_configuration.get("system_UUID"):
             system_uuid = xml_configuration.get("system_UUID")
 
-        machine_information = collect_data_package.get_machine_information(mac_address, machine_name, system_uuid)
+        machine_information = collect_data_module.get_machine_information(mac_address, machine_name, system_uuid)
         mac_address = machine_information["mac_address"]
         system_uuid = machine_information["system_uuid"]
         machine_name = machine_information["machine_name"]
@@ -643,12 +647,24 @@ def collect_data(repetition_path, gpu_type, gpu_number, debug):
         raise
 
 def main():
+    """
+    The wrapper script for training a CNN on ImageNet dataset with PyTorch
+
+    The parameters are:
+        -d, --debug: enables the printing of the debug messages
+        -p, --parameters: a comma-separated list of parameters to be passed to the wrapped application
+        --no-clean: if True, removal of generated files (e.g., dumping of weights) is disabled
+    """
     #The absolute path of the current file
     abs_script = os.path.realpath(__file__)
 
     #The root directory of the script
     abs_root = os.path.dirname(abs_script)
 
+    sys.path.append(os.path.join(abs_root, ".."))
+    utility = __import__("utility")
+
+
     #The return value of the command
     return_value = 0
 
@@ -841,10 +857,7 @@ def main():
         logging.warning("/etc/machine-id does not exists")
     else:
         uuid_line = open("/etc/machine-id", "r").readline()
-        if len(uuid_line.split()) != 2:
-            logging.error("Error in loading uuid: %s", str(uuid_line.split()))
-            sys.exit(1)
-        uuid = uuid_line.split()[1]
+        uuid = uuid_line
 
         root["system_UUID"] = uuid
 

diff --git a/apps/pytorch/confs/remote_ex b/apps/pytorch/confs/remote_ex
@@ -0,0 +1 @@
+configuration=test_small