diff --git a/README.md b/README.md index c36437c..abd1334 100644 --- a/README.md +++ b/README.md @@ -20,12 +20,12 @@ Validation capacities for physics compares OACC and original Fortran on. Automatic benchmarking are as follow (legends after table) -| Experimentation | Resolutions | Layout | Setup | -| ----------------------------- | ------------|-----------| ---------------------------------------- | -| Held-Suarez | C180-L72 | 1x1 | Discover @ node-to-node (exclusive GPU) | -| | C180-L91 | 1x1 | Discover @ node-to-node (exclusive GPU) | -| | C180-L137 | 1x1 | Discover @ node-to-node (exclusive GPU) | -| Aquaplanet | C180-L72 | 1x1 | Discover @ node-to-node (exclusive GPU) | +| Experimentation | Resolutions | Layout | CPU/GPU | +| ----------------------------- | ----------- | ------ | --------------------------------- | +| Held-Suarez | C180-L72 | 4x4 | 96/8 Node-to-node (sharing GPU) | +| | C180-L137 | 4x4 | 96/8 Node-to-node (sharing GPU) | +| | C360-L72 | 4x4 | 96/8 Node-to-node (sharing GPU) | +| Aquaplanet | C180-L72 | 1x1 | 6/6 Node-to-node (exclusive GPU) | Legend: diff --git a/geosongpu_ci/pipeline/held_suarez.py b/geosongpu_ci/pipeline/held_suarez.py index b5b91f4..e08d6c0 100644 --- a/geosongpu_ci/pipeline/held_suarez.py +++ b/geosongpu_ci/pipeline/held_suarez.py @@ -87,9 +87,9 @@ def _make_srun_script( executable_name=executable_name, ) # Options - options = f""" {'export HARDWARE_SAMPLING=1' if hardware_sampler_on else 'unset HARDWARE_SAMPLING' } - {'export MPS_ON=1' if mps_on else 'unset MPS_ON' } - {f'export LOCAL_REDIRECT_LOG=1' if local_redirect_log else 'unset LOCAL_REDIRECT_LOG' } + options = f"""{'export HARDWARE_SAMPLING=1' if hardware_sampler_on else 'unset HARDWARE_SAMPLING' } +{'export MPS_ON=1' if mps_on else 'unset MPS_ON' } +{f'export LOCAL_REDIRECT_LOG=1' if local_redirect_log else 'unset LOCAL_REDIRECT_LOG' } """ if "dace" in gtfv3_config.GTFV3_BACKEND: @@ -149,7 +149,7 @@ def _setup_1ts_1node_gtfv3(self, experiment_directory: str) -> ShellScript: def _setup_1day_1node_gtfv3(self, experiment_directory: str) -> ShellScript: return ShellScript( - name="_setup_config_1day_1node_gtfv3", + name="setup_config_1day_1node_gtfv3", working_directory=experiment_directory, ).write( shell_commands=[ @@ -175,7 +175,7 @@ def _setup_1day_1node_fortran(self, experiment_directory: str) -> ShellScript: def _setup_1ts_2nodes_gtfv3(self, experiment_directory: str) -> ShellScript: return ShellScript( - name="_setup_config_1ts_2nodes_gtfv3", + name="setup_config_1ts_2nodes_gtfv3", working_directory=experiment_directory, ).write( shell_commands=[ @@ -188,7 +188,7 @@ def _setup_1ts_2nodes_gtfv3(self, experiment_directory: str) -> ShellScript: def _setup_1day_2nodes_gtfv3(self, experiment_directory: str) -> ShellScript: return ShellScript( - name="_setup_config_1day_2nodes_gtfv3", + name="setup_config_1day_2nodes_gtfv3", working_directory=experiment_directory, ).write( shell_commands=[ @@ -335,7 +335,7 @@ def run_action( experiment_directory=experiment_directory, executable_name=self.executable_name, prolog_scripts=prolog_scripts, - slurm_config=SlurmConfiguration.slurm_6CPUs_6GPUs( + slurm_config=SlurmConfiguration.slurm_96CPUs_8GPUs( output="benchmark.cache.dacegpu.%t.out" ), gtfv3_config=GTFV3Config.dace_gpu_32_bit_BAR(),