From 3d7bee908faaea24861d7f00c123a9bc9aaecf6b Mon Sep 17 00:00:00 2001 From: linjsh Date: Thu, 4 Jul 2024 10:52:40 +0800 Subject: [PATCH 1/4] fix ConsolidatedPlacement bug --- placement/consolidated.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/placement/consolidated.py b/placement/consolidated.py index 272adc2..2e12e5b 100644 --- a/placement/consolidated.py +++ b/placement/consolidated.py @@ -108,7 +108,7 @@ def _consolidated_placement( # found a node with more GPUs then needed if min_more_GPUs > len(free_gpus[node]): min_more_GPUs = len(free_gpus[node]) - node_with_min_moRE_gpUs = node + node_with_min_more_GPUs = node if node_with_min_more_GPUs is not None: # only extracting the GPUs we need return (free_gpus[node_with_min_more_GPUs][:numGPUs_needed], True) From cdc8653f4dfac186f0b6cae256281212f74347d9 Mon Sep 17 00:00:00 2001 From: linjsh Date: Thu, 4 Jul 2024 10:55:26 +0800 Subject: [PATCH 2/4] update Readme.md for more clear instructions --- Readme.md | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/Readme.md b/Readme.md index fd8a3d9..14228dd 100644 --- a/Readme.md +++ b/Readme.md @@ -42,6 +42,27 @@ For an example users should look at `las_scheduler.py` which implements Least At ### Running Blox +#### Installation +Blox uses gRpc, Matplotlib to communicate and Plot several collected Metric. +We suggest the users to create a virtual environment to install the dependencies. +``` +pip install grpcio +pip install matplotlib +pip install pandas==1.3.0 +pip install grpcio-tools + +pip install protobuf==4.21.1 +cd blox/deployment +mkdir grpc_stubs +make +``` + +#### Prepare the trace + +Take philly trace as an example, download the trace from [here](https://github.com/msr-fiddle/philly-traces/blob/master/trace-data.tar.gz) and unpack it. + +You can get a file named `job_cluster_log` which will be used in the following examples. + Blox has two modes for running. One real cluster workload and second simulator. ##### Simulation Mode @@ -87,15 +108,6 @@ PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python python node_manager.py --ipaddr ip ### Details for reproducing results for artifacts These are instructions for reproducing artifacts for Blox. -#### Installation -Blox uses gRpc, Matplotlib to communicate and Plot several collected Metric. -We suggest the users to create a virtual environment to install the dependencies. -``` -pip install grpcio -pip install matplotlib -pip install pandas==1.3.0 -pip install grpcio-tools -``` ###### Running Blox Code To perform simulation. From d0a2dd209312b587ec6f3017208f62a1729a9d1f Mon Sep 17 00:00:00 2001 From: linjsh Date: Thu, 4 Jul 2024 10:55:32 +0800 Subject: [PATCH 3/4] fix typo --- schedulers/scheduler_policy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schedulers/scheduler_policy.py b/schedulers/scheduler_policy.py index 792364e..cd9e613 100644 --- a/schedulers/scheduler_policy.py +++ b/schedulers/scheduler_policy.py @@ -48,7 +48,7 @@ def schedule( gpu_df: Contains GPU dataframe. Returns: - "order_job" : Mandatory key, list of dicts of jobs in the + "order_job" : Mandatory key, list of dicts of jobs in the order they are supposed to run. "run_all_jobs": Some scheduler will only output the jobs to run which will fit on the GPU or expecting From ce589fbad41c72cab66dd66570dc27d46eddf8e2 Mon Sep 17 00:00:00 2001 From: linjsh Date: Thu, 4 Jul 2024 11:33:23 +0800 Subject: [PATCH 4/4] fix placement bug --- placement/bebop.py | 2 +- placement/consolidated_placement.py | 2 +- placement/first-gpu.py | 2 +- placement/placement.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/placement/bebop.py b/placement/bebop.py index 10487da..d431b36 100644 --- a/placement/bebop.py +++ b/placement/bebop.py @@ -136,7 +136,7 @@ def _consolidated_placement( # found a node with more GPUs then needed if min_more_GPUs > len(free_gpus[node]): min_more_GPUs = len(free_gpus[node]) - node_with_min_moRE_gpUs = node + node_with_min_more_GPUs = node if node_with_min_more_GPUs is not None: # only extracting the GPUs we need return (free_gpus[node_with_min_more_GPUs][:numGPUs_needed], True) diff --git a/placement/consolidated_placement.py b/placement/consolidated_placement.py index 99400c7..96e3e89 100644 --- a/placement/consolidated_placement.py +++ b/placement/consolidated_placement.py @@ -106,7 +106,7 @@ def _consolidated_placement( # found a node with more GPUs then needed if min_more_GPUs > len(free_gpus[node]): min_more_GPUs = len(free_gpus[node]) - node_with_min_moRE_gpUs = node + node_with_min_more_GPUs = node if node_with_min_more_GPUs is not None: # only extracting the GPUs we need return (free_gpus[node_with_min_more_GPUs][:numGPUs_needed], True) diff --git a/placement/first-gpu.py b/placement/first-gpu.py index d4f3410..3801f5c 100644 --- a/placement/first-gpu.py +++ b/placement/first-gpu.py @@ -249,7 +249,7 @@ def _consolidated_placement( # found a node with more GPUs then needed if min_more_GPUs > len(free_gpus[node]): min_more_GPUs = len(free_gpus[node]) - node_with_min_moRE_gpUs = node + node_with_min_more_GPUs = node if node_with_min_more_GPUs is not None: # only extracting the GPUs we need return (free_gpus[node_with_min_more_GPUs][:numGPUs_needed], True) diff --git a/placement/placement.py b/placement/placement.py index 7cdaa5f..814bd1b 100644 --- a/placement/placement.py +++ b/placement/placement.py @@ -239,7 +239,7 @@ def _consolidated_placement( # found a node with more GPUs then needed if min_more_GPUs > len(free_gpus[node]): min_more_GPUs = len(free_gpus[node]) - node_with_min_moRE_gpUs = node + node_with_min_more_GPUs = node if node_with_min_more_GPUs is not None: # only extracting the GPUs we need return (free_gpus[node_with_min_more_GPUs][:numGPUs_needed], True)