Skip to content

Commit

Permalink
Update README
Browse files Browse the repository at this point in the history
HS: fix setup for MPS & cleanup
  • Loading branch information
FlorianDeconinck committed Aug 17, 2023
1 parent 86a4a30 commit b221f62
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 13 deletions.
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@ Validation capacities for physics compares OACC and original Fortran on.

Automatic benchmarking are as follow (legends after table)

| Experimentation | Resolutions | Layout | Setup |
| ----------------------------- | ------------|-----------| ---------------------------------------- |
| Held-Suarez | C180-L72 | 1x1 | Discover @ node-to-node (exclusive GPU) |
| | C180-L91 | 1x1 | Discover @ node-to-node (exclusive GPU) |
| | C180-L137 | 1x1 | Discover @ node-to-node (exclusive GPU) |
| Aquaplanet | C180-L72 | 1x1 | Discover @ node-to-node (exclusive GPU) |
| Experimentation | Resolutions | Layout | CPU/GPU |
| ----------------------------- | ----------- | ------ | --------------------------------- |
| Held-Suarez | C180-L72 | 4x4 | 96/8 Node-to-node (sharing GPU) |
| | C180-L137 | 4x4 | 96/8 Node-to-node (sharing GPU) |
| | C360-L72 | 4x4 | 96/8 Node-to-node (sharing GPU) |
| Aquaplanet | C180-L72 | 1x1 | 6/6 Node-to-node (exclusive GPU) |

Legend:

Expand Down
14 changes: 7 additions & 7 deletions geosongpu_ci/pipeline/held_suarez.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ def _make_srun_script(
executable_name=executable_name,
)
# Options
options = f""" {'export HARDWARE_SAMPLING=1' if hardware_sampler_on else 'unset HARDWARE_SAMPLING' }
{'export MPS_ON=1' if mps_on else 'unset MPS_ON' }
{f'export LOCAL_REDIRECT_LOG=1' if local_redirect_log else 'unset LOCAL_REDIRECT_LOG' }
options = f"""{'export HARDWARE_SAMPLING=1' if hardware_sampler_on else 'unset HARDWARE_SAMPLING' }
{'export MPS_ON=1' if mps_on else 'unset MPS_ON' }
{f'export LOCAL_REDIRECT_LOG=1' if local_redirect_log else 'unset LOCAL_REDIRECT_LOG' }
"""

if "dace" in gtfv3_config.GTFV3_BACKEND:
Expand Down Expand Up @@ -149,7 +149,7 @@ def _setup_1ts_1node_gtfv3(self, experiment_directory: str) -> ShellScript:

def _setup_1day_1node_gtfv3(self, experiment_directory: str) -> ShellScript:
return ShellScript(
name="_setup_config_1day_1node_gtfv3",
name="setup_config_1day_1node_gtfv3",
working_directory=experiment_directory,
).write(
shell_commands=[
Expand All @@ -175,7 +175,7 @@ def _setup_1day_1node_fortran(self, experiment_directory: str) -> ShellScript:

def _setup_1ts_2nodes_gtfv3(self, experiment_directory: str) -> ShellScript:
return ShellScript(
name="_setup_config_1ts_2nodes_gtfv3",
name="setup_config_1ts_2nodes_gtfv3",
working_directory=experiment_directory,
).write(
shell_commands=[
Expand All @@ -188,7 +188,7 @@ def _setup_1ts_2nodes_gtfv3(self, experiment_directory: str) -> ShellScript:

def _setup_1day_2nodes_gtfv3(self, experiment_directory: str) -> ShellScript:
return ShellScript(
name="_setup_config_1day_2nodes_gtfv3",
name="setup_config_1day_2nodes_gtfv3",
working_directory=experiment_directory,
).write(
shell_commands=[
Expand Down Expand Up @@ -335,7 +335,7 @@ def run_action(
experiment_directory=experiment_directory,
executable_name=self.executable_name,
prolog_scripts=prolog_scripts,
slurm_config=SlurmConfiguration.slurm_6CPUs_6GPUs(
slurm_config=SlurmConfiguration.slurm_96CPUs_8GPUs(
output="benchmark.cache.dacegpu.%t.out"
),
gtfv3_config=GTFV3Config.dace_gpu_32_bit_BAR(),
Expand Down

0 comments on commit b221f62

Please sign in to comment.