generated from GEOS-ESM/geos-template-repo
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
433 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" | |
|
||
[project] | ||
name = "tcn" | ||
version = "2023.0.0" | ||
version = "2024.0.0" | ||
authors = [ | ||
{ name = "NASA Advanced Software and Technology Group", email = "[email protected]" }, | ||
] | ||
|
@@ -41,6 +41,7 @@ dependencies = [ | |
"netcdf4==1.6.3", | ||
"f90nml", | ||
"GitPython", | ||
"pandas", | ||
] | ||
|
||
[tool.setuptools] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,181 @@ | ||
{ | ||
"latency": { | ||
"message_size": [ | ||
1, | ||
2, | ||
4, | ||
8, | ||
16, | ||
32, | ||
64, | ||
128, | ||
256, | ||
512, | ||
1024, | ||
2048, | ||
4096, | ||
8192, | ||
16384, | ||
32768, | ||
65536, | ||
131072, | ||
262144, | ||
524288, | ||
1048576, | ||
2097152, | ||
4194304 | ||
], | ||
"sles12": [ | ||
19.32, | ||
19.34, | ||
19.32, | ||
19.28, | ||
19.25, | ||
19.53, | ||
19.66, | ||
19.74, | ||
19.89, | ||
19.93, | ||
14.79, | ||
11.12, | ||
11.13, | ||
11.21, | ||
11.34, | ||
11.54, | ||
12.04, | ||
13, | ||
14.27, | ||
17.01, | ||
22.44, | ||
33.64, | ||
59.6 | ||
], | ||
"sles15": [ | ||
3.21, | ||
3.26, | ||
3.27, | ||
3.28, | ||
3.28, | ||
3.61, | ||
3.72, | ||
3.78, | ||
3.83, | ||
3.99, | ||
13.32, | ||
13.41, | ||
13.78, | ||
13.89, | ||
14.05, | ||
14.12, | ||
14.61, | ||
15.56, | ||
16.96, | ||
19.52, | ||
25.22, | ||
36.2, | ||
57.97 | ||
] | ||
}, | ||
"bandwith_d_d": { | ||
"message_size": [ | ||
1, | ||
2, | ||
4, | ||
8, | ||
16, | ||
32, | ||
64, | ||
128, | ||
256, | ||
512, | ||
1024, | ||
2048, | ||
4096, | ||
8192, | ||
16384, | ||
32768, | ||
65536, | ||
131072, | ||
262144, | ||
524288, | ||
1048576, | ||
2097152, | ||
4194304 | ||
], | ||
"discover": [ | ||
0.8, | ||
1.61, | ||
3.26, | ||
6.61, | ||
12.79, | ||
25.27, | ||
46.96, | ||
93.44, | ||
185.84, | ||
344.61, | ||
698.7, | ||
1287.32, | ||
2531.08, | ||
5334.86, | ||
6660.63, | ||
7549.35, | ||
7921.56, | ||
8141.28, | ||
8280.57, | ||
8231.82, | ||
8238.02, | ||
8254.08, | ||
8262.24 | ||
], | ||
"discover_host": [ | ||
2.48, | ||
4.93, | ||
9.77, | ||
19.63, | ||
39.21, | ||
78.52, | ||
156.75, | ||
303.82, | ||
558.15, | ||
1100.59, | ||
2554.42, | ||
4710.63, | ||
6232.18, | ||
10533.33, | ||
15514.43, | ||
20568.02, | ||
22083.17, | ||
22433.31, | ||
22922.42, | ||
23234.75, | ||
23118, | ||
23169.48, | ||
27117.58 | ||
], | ||
"perlmutter": [ | ||
0.76, | ||
1.01, | ||
2.01, | ||
3.96, | ||
8.08, | ||
16.16, | ||
32.13, | ||
20.7, | ||
469.29, | ||
941.65, | ||
1881.96, | ||
3583.05, | ||
6990.1, | ||
14424.4, | ||
15612.7, | ||
16250.48, | ||
19572.69, | ||
21353.37, | ||
22171.34, | ||
22567.9, | ||
22894.79, | ||
22867.91, | ||
22921.46 | ||
] | ||
} | ||
} |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
import pathlib | ||
import pandas as pd | ||
import plotly.express as px | ||
import json | ||
from tcn.plots.colors import COLORS_RETRO_HIGH_CONTRAST # type:ignore | ||
|
||
THIS_DIR = pathlib.Path(__file__).parent.resolve() | ||
RESULT_DIR = THIS_DIR / "../" | ||
|
||
|
||
def summary(project_dir: str, data_name: str): | ||
with open(f"{project_dir}/{data_name}") as f: | ||
data = json.load(f) | ||
|
||
df = pd.DataFrame( | ||
[ | ||
data["latency"]["message_size"], | ||
data["latency"]["sles12"], | ||
data["latency"]["sles15"], | ||
] | ||
).T | ||
df.columns = ["message_size", "sles12", "sles15"] # type:ignore | ||
fig = px.line( | ||
df, | ||
x="message_size", | ||
y=["sles12", "sles15"], | ||
log_x=True, | ||
color_discrete_sequence=list(COLORS_RETRO_HIGH_CONTRAST.values()), | ||
title="Latency on Discover (lower is better)", | ||
template="simple_white", | ||
labels={ | ||
"message_size": "Message size (B)", | ||
"value": "Bandwidth (B/s)", | ||
"variable": "OS", | ||
}, | ||
) | ||
fig.write_image(f"{project_dir}/latency.png") | ||
|
||
df = pd.DataFrame( | ||
[ | ||
data["bandwith_d_d"]["message_size"], | ||
data["bandwith_d_d"]["discover"], | ||
data["bandwith_d_d"]["discover_host"], | ||
data["bandwith_d_d"]["perlmutter"], | ||
] | ||
).T | ||
df.columns = [ | ||
"message_size", | ||
"discover", | ||
"discover_host", | ||
"perlmutter", | ||
] # type:ignore | ||
fig = px.line( | ||
df, | ||
x="message_size", | ||
y=["discover", "discover_host", "perlmutter"], | ||
log_x=True, | ||
color_discrete_sequence=list(COLORS_RETRO_HIGH_CONTRAST.values()), | ||
title="Peak bandwith (higher is better)", | ||
template="simple_white", | ||
labels={ | ||
"message_size": "Message size (B)", | ||
"value": "Bandwidth (B/s)", | ||
"variable": "Machine", | ||
}, | ||
) | ||
fig.write_image(f"{project_dir}/bandwith.png") | ||
|
||
|
||
if __name__ == "__main__": | ||
summary( | ||
project_dir=str(RESULT_DIR / "mpi_gpu_rdma"), | ||
data_name="24W7.json", | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
ID Title URL Milestone (NASA) Assignees Status Priority Category xTeam Type Repository Linked pull requests Task Readiness | ||
P0 Planning Mx Backlog Project Sink Ok - Planned | ||
P1 User feedback survey Mx Backlog Project SI/GMAO Sink Ok - Planned | ||
P2 Milestone feedback Mx Backlog Project SI/GMAO Sink Ok - Planned | ||
MW0 Pace split https://github.com/GEOS-ESM/pace/issues/41 M1 FlorianDeconinck Done P0 Middleware NOAA Task GEOS-ESM/pace Ok - Planned | ||
MW1 Versioned release M1 Backlog P1 Middleware NOAA Task Ok - Planned | ||
MW2 MPP/FMS API Backlog Middleware NOAA Parent Ok - Unplanned | ||
MW2.1 Non-square layout M1 Backlog P2 Middleware NOAA Subtask Ok - Planned | ||
MW2.2 All grid capacities Backlog Middleware NOAA Subtask To Subtasks | ||
MW3 CPU optimization M2 Backlog Middleware Parent To Subtasks | ||
MW3.1 Partial expansion M2 Backlog Middleware LLNL Subtask Investigate | ||
MW3.2 Zero-copy on CPU from Fortran align M2 Backlog Middleware Subtask Ok - Planned | ||
MW4 Backend compilation refactor Backlog Middleware Parent To Subtasks | ||
MW4.1 Extend cube-sphere naming to stencil backends Backlog Middleware Subtask Ok - Unplanned | ||
MW4.2 Distributed compilation on many nodes Backlog Middleware Subtask Ok - Unplanned | ||
MW4.3 Better key-ing for the cube-sphere cache Backlog Middleware Subtask Ok - Unplanned | ||
MW4.4 Metadata extension Backlog Middleware Subtask Ok - Unplanned | ||
MW5 GT4Py.cartesian: global fields Backlog Middleware Task Ok - Unplanned | ||
MW6 GT4Py.cartesian: K indirect write Backlog Middleware Task Ok - Unplanned | ||
MW7 GT4Py.cartesian: return/end of loop Backlog Middleware Task Ok - Unplanned | ||
MW8 Fortran <> Python interface generator Backlog Middleware Task To Subtasks | ||
MW8.1 Translate-test-like architecture for timestep level testing Backlog Middleware Subtask Ok - Unplanned | ||
MW8.2 Scientifc fortran-python difference testing Backlog Middleware Subtask Ok - Unplanned | ||
MW9 DaCe opt: kernel merge investigation Backlog Middleware Parent To Subtasks | ||
MW9.1 DaCe opt: corners (dace.map and/or stree merge) Backlog Middleware LLNL Subtask Ok - Unplanned | ||
MW10 DaCe opt: auto-tuning Backlog Middleware Parent Ok - Unplanned | ||
MW10.1 Brute force auto-tuning Backlog Middleware Subtask To Subtasks | ||
MW10.2 ML auto-tuning (Daisy) Backlog Middleware Subtask Ok - Unplanned | ||
MW11 User configuration failure feedback Backlog Middleware Task To Subtasks | ||
MW12 JAX backend Backlog Middleware NOAA Parent To Subtasks | ||
MW12.1 `jax` backend: a working jax under-the-hood Backlog Middleware NOAA Subtask Ok - Unplanned | ||
MW12.2 `jax:ad` backend: a differentiating stencil backend Backlog Middleware NOAA Subtask Ok - Unplanned | ||
MW13 Production check Backlog Middleware Parent To Subtasks | ||
MW13.1 Low data on GPU (bad bandwith) Backlog Middleware Subtask Ok - Unplanned | ||
MW13.2 MPI not CUDA aware Backlog Middleware Subtask Ok - Unplanned | ||
MW13.3 VRAM exceed or low Backlog Middleware Subtask Ok - Unplanned | ||
MW13.4 No GPU or GPU unaccesible Backlog Middleware Subtask Ok - Unplanned | ||
MW14 Initialization performance Backlog Middleware Task To Subtasks | ||
MW15 Halo exchange full program integration Backlog Middleware Task Ok - Unplanned | ||
MW16 GT4Py.cartesian: data dimension parallelism Backlog Middleware Task To Subtasks | ||
MW17 AMD GPU performance Backlog Middleware Task To Subtasks | ||
MW18 GT4Py.cartesian: DaCe v0.15.x M1 Backlog P1 Middleware GT4Py Task Ok - Planned | ||
MW19 Instrumentation Backlog Middleware Parent To Subtasks | ||
MW19.1 DaCe instrumentation: kernel timings Backlog Middleware Subtask Ok - Unplanned | ||
MW20 GPU-CPU node optimal usage Backlog Middleware NOAA Parent To Subtasks | ||
MW20.1 CPU I/O on GPU runs - explore Backlog Middleware Subtask Investigate | ||
S0 Support to middleware users Mx Backlog Support Task To Subtasks | ||
S1 CI/CD Mx Backlog Support Parent To Subtasks | ||
S1.1 Workflow to be seen in github action directly Backlog Support Subtask Ok - Unplanned | ||
S1.2 Make GEOS cachable to speed up CI Backlog Support Subtask Ok - Unplanned | ||
G0 DyCore GEOS-FP on Discover M1 Backlog GEOS SI/GMAO Task To Subtasks | ||
G1 Dynamical core Grid Componenmt M1 Backlog GEOS GMAO Parent To Subtasks | ||
G1.1 Discover config of GEOS-FP M1 Backlog GEOS Subtask Ok - Planned | ||
G1.2 Mixed precision M1 Backlog GEOS Subtask Ok - Planned | ||
G1.3 SubGridZ M1 Backlog GEOS Subtask Ok - Planned | ||
G1.4 Tracer normalization M1 Backlog GEOS Subtask Ok - Planned | ||
G1.5 Wind update M1 Backlog GEOS Subtask Ok - Planned | ||
G2 Moist physics Grid Component M2 Backlog GEOS Parent To Subtasks | ||
G2.1 Compare to OACC port M2 Backlog GEOS Subtask Ok - Planned | ||
G2.2 UW convection port M2 Backlog GEOS Subtask Ok - Planned | ||
G2.3 GF convection port M2 Backlog GEOS Subtask Ok - Planned | ||
G2.4 Microphysics port M2 Backlog GEOS Subtask Ok - Planned | ||
G2.5 Merge all RUN components M2 Backlog GEOS SI Subtask Ok - Planned | ||
G3 Radiation physics Grid Component M2 Backlog GEOS Parent To Subtasks | ||
G3.1 RRTMGP port M2 Backlog GEOS Subtask Investigate | ||
G4 Land surface Gid Component M3 Backlog GEOS Task To Subtasks | ||
G5 Benchmark of GPU-ready GEOS-FP Mx Backlog GEOS Sink Ok - Planned | ||
G5.1 Define operational and HPC metrics M1 Backlog GEOS Subtask Ok - Planned | ||
G5.2 Define an archiving mechanism and baselining M1 Backlog GEOS Subtask Ok - Planned | ||
G5.3 Benchmark GPU-Dynamical Core GEOS-FP M1 Backlog GEOS Subtask Ok - Planned | ||
G5.4 Benchmark GPU-Moist GEOS-FP M2 Backlog GEOS Subtask Ok - Planned | ||
G5.5 Benchmark GPU-Radiation GEOS-FP M2 Backlog GEOS Subtask Ok - Planned | ||
G5.6 Benchmark GPU-Land Surface GEOS-FP M3 Backlog GEOS Subtask Ok - Planned | ||
G6 Validation of the GPU accelerated GEOS-FP Mx Backlog GEOS Task To Subtasks | ||
G7 Multi-grid support for Component level CPU-GPU switch M1 Backlog GEOS Task Investigate | ||
G8 Document and auto-generate VRAM guidelines Backlog GEOS Task To Subtasks | ||
G9 Pipeline recurring GEOS simulation M1 Backlog GEOS Parent To Subtasks | ||
G9.1 Fast scientific validation M1 Backlog GEOS Subtask Ok - Planned | ||
G9.2 Full scientific validation M1 Backlog GEOS Subtask Ok - Planned | ||
G9.3 Per port timestep-level validation Mx Backlog GEOS Parent To Subtasks | ||
D1 DyCore allowed namelist options M1 Backlog P0 Documentation Task Ok - Planned | ||
D2 Training: a step-by-step porting of a subroutine Backlog P2 Documentation Task Ok - Unplanned | ||
D3 Training: how to start writing a model using the DSL Backlog P2 Documentation Task Ok - Unplanned | ||
D4 Docs - GEOS GPU: quickstart Mx Backlog Documentation Sink Ok - Planned | ||
D5 Docs - GEOS GPU: usage & limitation Backlog P1 Documentation Task Ok - Unplanned | ||
D6 Project Mx Backlog Documentation Sink Ok - Planned | ||
D7 Docs - State of software stack supporting GEOS & GPU Mx Backlog Documentation Sink Ok - Planned | ||
D8 Docs - Developping in/around GEOS Mx Backlog Documentation Sink Ok - Planned | ||
D9 Docs - Team goals and means M1 Backlog Documentation Task Ok - Planned | ||
O0 Conference Mx Backlog Outreach Parent To Subtasks | ||
O0.1 PASC 24 M2 Backlog Outreach Subtask Ok - Planned | ||
O0.2 AMS 25 or AGU 24 M2 Backlog Outreach Subtask Ok - Planned | ||
O0.3 SC24 M2 Backlog Outreach Subtask Ok - Planned | ||
O0.4 Paper Mx Backlog Outreach Subtask Ok - Planned | ||
MW21 Better Translate test Backlog Middleware NOAA Parent To Subtasks | ||
MW21.1 Export precise metadata with NetCDF data and/or have an API to query Backlog Middleware Task Investigate | ||
MW4.5 Move orchestration in backend name. Backlog Middleware Subtask Ok - Unplanned | ||
MW4.6 Use DaCe hash system to remove need for FV3_DACEMODE Backlog Middleware Subtask Ok - Unplanned | ||
MW22 DaCe opt: do away with dynamic memlets M1 Backlog Middleware Task Ok - Planned | ||
G1.6 PyFV3: allow N tracers to be advected M1 Backlog GEOS Subtask Ok - Planned | ||
G1.7 PyFV3 `fv_mapz` is substantially different for GEOS M1 Backlog GEOS Subtask Ok - Planned | ||
G1.8 PyFV3: tracer advection is substantially different for GEOS M1 Backlog GEOS Subtask Investigate | ||
G10 Update GEOS-Dycore Mx Backlog GEOS Sink Ok - Planned | ||
MW23 Exposes "boilerplate" NDSL packages Backlog Middleware NOAA Parent To Subtasks | ||
G11 Central `@GMAO_SHARED/geos_shared/*` refactor and porting strategy. M1 Backlog P0 GEOS Task Ok - Planned | ||
MW24 Completeunit tests setup M1 Backlog Middleware NOAA Parent Ok - Planned | ||
MW24.2 Use gt:gpu backend as a base for GPU unit testing M1 Backlog Middleware Subtask Ok - Planned | ||
MW24.1 Find a strategy to run GPU unit tests M1 Backlog Middleware NOAA Subtask Ok - Planned | ||
MW24.3 Orchestration unit tests M1 Backlog Middleware Subtask Ok - Planned |
Oops, something went wrong.