Skip to content

Commit

Permalink
Merge pull request #25 from GEOS-ESM/feature/hardware_sampelr
Browse files Browse the repository at this point in the history
Hardware Sampler & MPS
  • Loading branch information
FlorianDeconinck committed Aug 17, 2023
2 parents a0094dc + b221f62 commit ce52517
Show file tree
Hide file tree
Showing 16 changed files with 370 additions and 119 deletions.
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@ Validation capacities for physics compares OACC and original Fortran on.

Automatic benchmarking are as follow (legends after table)

| Experimentation | Resolutions | Layout | Setup |
| ----------------------------- | ------------|-----------| ---------------------------------------- |
| Held-Suarez | C180-L72 | 1x1 | Discover @ node-to-node (exclusive GPU) |
| | C180-L91 | 1x1 | Discover @ node-to-node (exclusive GPU) |
| | C180-L137 | 1x1 | Discover @ node-to-node (exclusive GPU) |
| Aquaplanet | C180-L72 | 1x1 | Discover @ node-to-node (exclusive GPU) |
| Experimentation | Resolutions | Layout | CPU/GPU |
| ----------------------------- | ----------- | ------ | --------------------------------- |
| Held-Suarez | C180-L72 | 4x4 | 96/8 Node-to-node (sharing GPU) |
| | C180-L137 | 4x4 | 96/8 Node-to-node (sharing GPU) |
| | C360-L72 | 4x4 | 96/8 Node-to-node (sharing GPU) |
| Aquaplanet | C180-L72 | 1x1 | 6/6 Node-to-node (exclusive GPU) |

Legend:

Expand Down
11 changes: 11 additions & 0 deletions geosongpu_ci/actions/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,14 @@ def slurm_96CPUs_8GPUs(cls, output: Optional[str] = None) -> "SlurmConfiguration
mem_per_gpu="40G",
output=output or cls.output,
)

@classmethod
def slurm_96CPUs(cls, output: Optional[str] = None) -> "SlurmConfiguration":
"""2 nodes configuration on Discover with Rome Epyc"""
return cls(
nodes=2,
ntasks=96,
ntasks_per_node=48,
sockets_per_node=2,
output=output or cls.output,
)
2 changes: 2 additions & 0 deletions geosongpu_ci/pipeline/gtfv3_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ def sh(self) -> str:
f"export PACE_FLOAT_PRECISION={self.PACE_FLOAT_PRECISION}\n"
f"export PACE_LOGLEVEL={self.PACE_LOGLEVEL}\n"
f"export GTFV3_BACKEND={self.GTFV3_BACKEND}\n"
f"export PER_DEVICE_PROCESS=12\n" # default for Discover
f"export PYTHONOPTIMIZE=1\n"
)

@classmethod
Expand Down
131 changes: 79 additions & 52 deletions geosongpu_ci/pipeline/held_suarez.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,48 +46,11 @@ def __init__(
def _make_gpu_wrapper_script(
self,
experiment_directory: str,
hardware_sampling: bool = False,
) -> None:
script_name = "gpu-wrapper-slurm"
pre_execution = []
post_execution = []
if hardware_sampling:
script_name += "-hws"
pre_execution.append("if [ $SLURM_LOCALID -eq 0 ]; then")
pre_execution.append(" geosongpu_hws server &")
pre_execution.append(" sleep 20")
pre_execution.append(" geosongpu_hws client start")
pre_execution.append("fi")

post_execution.append(
"if [ $SLURM_LOCALID -eq 0 ]; then",
)
post_execution.append(
" geosongpu_hws client dump",
)
post_execution.append(
" geosongpu_hws client stop",
)
post_execution.append(
"fi",
)

cuda_device_setup = [
"export CUDA_VISIBLE_DEVICES=$SLURM_LOCALID",
'echo "Node: $SLURM_NODEID | Rank: $SLURM_PROCID,'
' pinned to GPU: $CUDA_VISIBLE_DEVICES"',
]
execution = ["$*"]
self.gpu_wrapper = ShellScript(
script_name,
"gpu-wrapper-slurm-mps",
working_directory=experiment_directory,
).write(
modules=[],
shell_commands=cuda_device_setup
+ pre_execution
+ execution
+ post_execution,
)
).from_template(template_name="gpu-wrapper-slurm-mps.sh")

def _copy_executable_script(
self,
Expand All @@ -114,13 +77,29 @@ def _make_srun_script(
slurm_config: SlurmConfiguration,
gtfv3_config: GTFV3Config,
prolog_scripts: PrologScripts,
hardware_sampler_on: bool = False,
mps_on: bool = False,
local_redirect_log: bool = False,
) -> ShellScript:
# Executing command with the SLURM setup
srun_cmd = slurm_config.srun_bash(
wrapper=prolog_scripts.gpu_wrapper.path,
executable_name=executable_name,
)
srun_script_script = ShellScript(
f"srun_{slurm_config.ntasks}tasks_{gtfv3_config.backend_sanitized()}",
# Options
options = f"""{'export HARDWARE_SAMPLING=1' if hardware_sampler_on else 'unset HARDWARE_SAMPLING' }
{'export MPS_ON=1' if mps_on else 'unset MPS_ON' }
{f'export LOCAL_REDIRECT_LOG=1' if local_redirect_log else 'unset LOCAL_REDIRECT_LOG' }
"""

if "dace" in gtfv3_config.GTFV3_BACKEND:
backend = f"{gtfv3_config.backend_sanitized()}.{gtfv3_config.FV3_DACEMODE}"
else:
backend = f"{gtfv3_config.backend_sanitized()}"
srun_script_name = f"srun_{slurm_config.ntasks}tasks_{backend}"

srun_script = ShellScript(
srun_script_name,
working_directory=experiment_directory,
).write(
env_to_source=[
Expand All @@ -132,13 +111,14 @@ def _make_srun_script(
f"source {prolog_scripts.copy_executable.path}",
"",
f"{gtfv3_config.sh()}",
"export PYTHONOPTIMIZE=1",
f"export CUPY_CACHE_DIR={experiment_directory}/.cupy",
"",
f"{options}",
"",
f"{srun_cmd}",
],
)
return srun_script_script
return srun_script


VALIDATION_RESOLUTION = "C180-L72"
Expand Down Expand Up @@ -169,7 +149,7 @@ def _setup_1ts_1node_gtfv3(self, experiment_directory: str) -> ShellScript:

def _setup_1day_1node_gtfv3(self, experiment_directory: str) -> ShellScript:
return ShellScript(
name="_setup_config_1day_1node_gtfv3",
name="setup_config_1day_1node_gtfv3",
working_directory=experiment_directory,
).write(
shell_commands=[
Expand All @@ -193,6 +173,45 @@ def _setup_1day_1node_fortran(self, experiment_directory: str) -> ShellScript:
],
)

def _setup_1ts_2nodes_gtfv3(self, experiment_directory: str) -> ShellScript:
return ShellScript(
name="setup_config_1ts_2nodes_gtfv3",
working_directory=experiment_directory,
).write(
shell_commands=[
f"cd {experiment_directory}",
"cp -f AgcmSimple.rc.4x24.gtfv3 AgcmSimple.rc",
"cp -f input.nml.4x4 input.nml",
"cp -f CAP.rc.1ts CAP.rc",
],
)

def _setup_1day_2nodes_gtfv3(self, experiment_directory: str) -> ShellScript:
return ShellScript(
name="setup_config_1day_2nodes_gtfv3",
working_directory=experiment_directory,
).write(
shell_commands=[
f"cd {experiment_directory}",
"cp -f AgcmSimple.rc.4x24.gtfv3 AgcmSimple.rc",
"cp -f input.nml.4x4 input.nml",
"cp -f CAP.rc.1day CAP.rc",
],
)

def _setup_1day_2nodes_fortran(self, experiment_directory: str) -> ShellScript:
return ShellScript(
name="setup_config_1day_2nodes_fortran",
working_directory=experiment_directory,
).write(
shell_commands=[
f"cd {experiment_directory}",
"cp -f AgcmSimple.rc.4x24.fortran AgcmSimple.rc",
"cp -f input.nml.4x4 input.nml",
"cp -f CAP.rc.1day CAP.rc",
],
)

def prepare_experiment(
self,
input_directory: str,
Expand Down Expand Up @@ -224,13 +243,19 @@ def simulate(
gtfv3_config: GTFV3Config,
setup_script: ShellScript,
setup_only: bool = False,
hardware_sampler_on: bool = False,
mps_on: bool = False,
local_redirect_log: bool = False,
):
srun_script = _make_srun_script(
executable_name=executable_name,
experiment_directory=experiment_directory,
slurm_config=slurm_config,
gtfv3_config=gtfv3_config,
prolog_scripts=prolog_scripts,
hardware_sampler_on=hardware_sampler_on,
mps_on=mps_on,
local_redirect_log=local_redirect_log,
)

setup_script.execute()
Expand Down Expand Up @@ -310,11 +335,11 @@ def run_action(
experiment_directory=experiment_directory,
executable_name=self.executable_name,
prolog_scripts=prolog_scripts,
slurm_config=SlurmConfiguration.slurm_6CPUs_6GPUs(
slurm_config=SlurmConfiguration.slurm_96CPUs_8GPUs(
output="benchmark.cache.dacegpu.%t.out"
),
gtfv3_config=GTFV3Config.dace_gpu_32_bit_BAR(),
setup_script=self._setup_1ts_1node_gtfv3(experiment_directory),
setup_script=self._setup_1ts_2nodes_gtfv3(experiment_directory),
setup_only=env.setup_only,
)

Expand All @@ -323,25 +348,27 @@ def run_action(
experiment_directory=experiment_directory, # type: ignore
executable_name=self.executable_name,
prolog_scripts=prolog_scripts, # type: ignore
slurm_config=SlurmConfiguration.slurm_6CPUs_6GPUs(
output="benchmark.1day.dacegpu.%t.out"
slurm_config=SlurmConfiguration.slurm_96CPUs_8GPUs(
output="benchmark.1day.MPS.44.dacegpu.%t.out"
),
gtfv3_config=GTFV3Config.dace_gpu_32_bit_BAR(dacemode="Run"),
setup_script=self._setup_1day_1node_gtfv3(experiment_directory), # type: ignore
setup_script=self._setup_1day_2nodes_gtfv3(experiment_directory), # type: ignore
setup_only=env.setup_only,
mps_on=True,
)

# Run 1 day Fortran
self.simulate(
experiment_directory=experiment_directory, # type: ignore
executable_name=self.executable_name,
prolog_scripts=prolog_scripts, # type: ignore
slurm_config=SlurmConfiguration.slurm_72CPUs(
output="benchmark.1day.fortran.%t.out"
slurm_config=SlurmConfiguration.slurm_96CPUs(
output="benchmark.1day.MPS.44.fortran.%t.out"
),
gtfv3_config=GTFV3Config.fortran(),
setup_script=self._setup_1day_1node_fortran(experiment_directory), # type: ignore
setup_script=self._setup_1day_2nodes_fortran(experiment_directory), # type: ignore
setup_only=env.setup_only,
mps_on=True,
)

def check(
Expand Down
22 changes: 22 additions & 0 deletions geosongpu_ci/pipeline/templates/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import os
import sys
import site


def find_template(name: str) -> str:
# pip install geosongpu-ci
candidate = f"{sys.prefix}/geosongpu/templates/{name}.tpl"
if os.path.isfile(candidate):
return candidate
# pip install --user geosongpu-ci
candidate = f"{site.USER_BASE}/geosongpu/templates/{name}.tpl"
if os.path.isfile(candidate):
return candidate
# pip install -e geosongpu-ci
candidate = os.path.join(
os.path.dirname(__file__),
f"{name}.tpl",
)
if os.path.isfile(candidate):
return candidate
raise FileNotFoundError(f"Template: could not locate {name}")
76 changes: 76 additions & 0 deletions geosongpu_ci/pipeline/templates/gpu-wrapper-slurm-mps.sh.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/bin/sh

# We open GPU visibility to full node at first
export CUDA_VISIBLE_DEVICES=0,1,2,3

# Hardware sampling is a python tools that reads at intervals
# various hardware sensors (power, usage, memory load...)
if [ -z ${HARDWARE_SAMPLING} ]; then
echo "Hardware sampling is OFF"
else
echo "Hardware sampling is ON"
# We restrict usage to (world) rank 0
if [ $SLURM_PROCID -eq 0 ]; then
geosongpu_hws server &
sleep 10
geosongpu_hws client start
fi

fi

if [ -z ${MPS_ON} ]; then
echo "MPS is OFF"
# No MPS, we assume rank==GPU
GPU=$SLURM_LOCALID
export CUDA_VISIBLE_DEVICES=$GPU
else
echo "MPS is ON"
if [ -z ${PER_DEVICE_PROCESS} ]; then
echo "PER_DEVICE_PROCESS needs to be setup on MPS. Exiting."
exit 1
fi
# All ranks needs to know where to look
export CUDA_MPS_PIPE_DIRECTORY=./nvidia-mps/$SLURM_NODEID
export CUDA_MPS_LOG_DIRECTORY=./nvidia-log/$SLURM_NODEID
# Only 1 rank per node (local rank 0) handles the server chatter
if [ $SLURM_LOCALID -eq 0 ]; then
echo "Turn nvidia-cuda-mps-control on for node $SLURM_NODEID"
mkdir -p nvidia-mps
mkdir -p nvidia-log/$SLURM_NODEID
# sudo nividia -i 0 -c 3 # Per docs, we should insure GPU is in EXCLUSIVE mode but we might be curtail by HPC settings
nvidia-cuda-mps-control -d
fi
# MPS server is socket base, leave time for the filesystem
sleep 10
# Server should be spun, we restrict this rank to a single GPU
GPU=$((SLURM_LOCALID/PER_DEVICE_PROCESS))
export CUDA_VISIBLE_DEVICES=$GPU
fi


echo "Node: $SLURM_NODEID | Rank: $SLURM_PROCID, pinned to GPU: $CUDA_VISIBLE_DEVICES"

# Run program with or without log dump in file
if [ -z ${LOCAL_REDIRECT_LOG} ]; then
$*
else
$* > log.redirect_local.$SLURM_PROCID.out 2>&1
fi

# Clean up of all tools
if [ -z ${HARDWARE_SAMPLING} ]; then
echo ""
else
if [ $SLURM_PROCID -eq 0 ]; then
geosongpu_hws client dump
geosongpu_hws client stop
fi
fi
if [ -z ${MPS_ON} ]; then
echo ""
else
if [ $SLURM_LOCALID -eq 0 ]; then
echo quit | nvidia-cuda-mps-control
# sudo nividia -i 0 -c 0 # Per docs, we should insure GPU is flipped back to DEFAULT mode but we might be curtail by HPC settings
fi
fi
12 changes: 11 additions & 1 deletion geosongpu_ci/tools/benchmark/geos_log_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,9 @@ def parse_geos_log(filename: str) -> BenchmarkRawData:
benchmark.fv_dyncore_timings = _extract_numerics(interface_timings)

if "dace" in benchmark.backend:
dycore_timings = _grep(filename, "] Run...", exclude_pattern=True)
dycore_timings = _grep(
filename, "] Run...", exclude_pattern=True, expected=False
)
benchmark.inner_dycore_timings = _extract_numerics(dycore_timings)
else:
dycore_timings = _grep(filename, "0: fv_dynamics", exclude_pattern=True)
Expand Down Expand Up @@ -131,8 +133,16 @@ def parse_geos_log(filename: str) -> BenchmarkRawData:

# Model throughput
gloabl_profiler_entry = "Model Throughput"
global_init_time = _grep(
filename, "--Initialize", start_pattern=gloabl_profiler_entry
)
benchmark.global_init_time = _extract_numerics(global_init_time)[1]
global_run_time = _grep(filename, "--Run", start_pattern=gloabl_profiler_entry)
benchmark.global_run_time = _extract_numerics(global_run_time)[1]
global_finalize_time = _grep(
filename, "--Finalize", start_pattern=gloabl_profiler_entry
)
benchmark.global_finalize_time = _extract_numerics(global_finalize_time)[1]

return benchmark

Expand Down
Loading

0 comments on commit ce52517

Please sign in to comment.