generated from GEOS-ESM/geos-template-repo
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
16 changed files
with
271 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
#!/bin/sh | ||
|
||
# For debug of this script | ||
#set -x | ||
|
||
HOSTNAME=`hostname` | ||
if [ ${OMPI_COMM_WORLD_LOCAL_RANK:0} ]; then | ||
LOCAL_RANK=$OMPI_COMM_WORLD_LOCAL_RANK | ||
elif [ ${SLURM_LOCALID:0} ]; then | ||
LOCAL_RANK=$SLURM_LOCALID | ||
else | ||
if [ $LOCAL_RANK -eq 0 ]; then | ||
echo "Unimplemented MPI environement, can't read local rank. Exiting." | ||
fi | ||
exit 1 | ||
fi | ||
|
||
# Hardware sampling is a python tools that reads at intervals | ||
# various hardware sensors (power, usage, memory load...) | ||
if [ -z ${HARDWARE_SAMPLING} ]; then | ||
if [ $LOCAL_RANK -eq 0 ]; then | ||
echo "Hardware sampling is OFF" | ||
fi | ||
else | ||
if [ $LOCAL_RANK -eq 0 ]; then | ||
echo "Hardware sampling is ON" | ||
fi | ||
# We restrict usage to (world) rank 0 | ||
if [ $SLURM_PROCID -eq 0 ]; then | ||
geosongpu_hws server & | ||
sleep 10 | ||
geosongpu_hws client start | ||
fi | ||
fi | ||
|
||
# Nvidia's Multi Process Service required to run multiple processed | ||
# at the same time on one GPU | ||
|
||
# We open GPU visibility to full node at first | ||
export CUDA_VISIBLE_DEVICES=0,1,2,3 | ||
|
||
if [ -z ${MPS_ON} ]; then | ||
if [ $LOCAL_RANK -eq 0 ]; then | ||
echo "MPS is OFF" | ||
fi | ||
# No MPS, we assume rank==GPU | ||
GPU=$LOCAL_RANK | ||
export CUDA_VISIBLE_DEVICES=$GPU | ||
else | ||
if [ $LOCAL_RANK -eq 0 ]; then | ||
echo "MPS is ON" | ||
fi | ||
if [ -z ${PER_DEVICE_PROCESS} ]; then | ||
if [ $LOCAL_RANK -eq 0 ]; then | ||
echo "PER_DEVICE_PROCESS needs to be setup on MPS. Exiting." | ||
fi | ||
exit 1 | ||
fi | ||
# All ranks needs to know where to look | ||
export CUDA_MPS_PIPE_DIRECTORY=./nvidia-mps/$HOSTNAME | ||
export CUDA_MPS_LOG_DIRECTORY=./nvidia-log/$HOSTNAME | ||
# Only 1 rank per node (local rank 0) handles the server chatter | ||
if [ $LOCAL_RANK -eq 0 ]; then | ||
echo "Turn nvidia-cuda-mps-control on for node $HOSTNAME" | ||
mkdir -p $CUDA_MPS_PIPE_DIRECTORY | ||
mkdir -p $CUDA_MPS_LOG_DIRECTORY | ||
# sudo nividia -i 0 -c 3 # Per docs, we should insure GPU is in EXCLUSIVE mode but we might be curtail by HPC settings | ||
nvidia-cuda-mps-control -d | ||
fi | ||
# MPS server is socket base, leave time for the filesystem | ||
sleep 10 | ||
# Server should be spun, we restrict this rank to a single GPU | ||
GPU=$((LOCAL_RANK/PER_DEVICE_PROCESS)) | ||
export CUDA_VISIBLE_DEVICES=$GPU | ||
fi | ||
|
||
echo "Node: $HOSTNAME | Rank: $LOCAL_RANK, pinned to GPU: $CUDA_VISIBLE_DEVICES" | ||
|
||
# Run program with or without log dump in file | ||
if [ -z ${LOCAL_REDIRECT_LOG} ]; then | ||
$* | ||
else | ||
$* > log.redirect_local.$HOSTNAME.$LOCAL_RANK.out 2>&1 | ||
fi | ||
|
||
# Clean up of all tools | ||
if [ -z ${HARDWARE_SAMPLING} ]; then | ||
echo "" | ||
else | ||
if [ $LOCAL_RANK -eq 0 ]; then | ||
geosongpu_hws client dump | ||
geosongpu_hws client stop | ||
fi | ||
fi | ||
if [ -z ${MPS_ON} ]; then | ||
echo "" | ||
else | ||
if [ $LOCAL_RANK -eq 0 ]; then | ||
echo quit | nvidia-cuda-mps-control | ||
# sudo nividia -i 0 -c 0 # Per docs, we should insure GPU is flipped back to DEFAULT mode but we might be curtail by HPC settings | ||
fi | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# Turn MPS on | ||
export MPS_ON=1 | ||
|
||
# Read `np` | ||
NP="$2" | ||
shift | ||
shift | ||
|
||
# Forward to the launcher | ||
mpirun -np $NP ./gpu-mps-launcher.sh $* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
module stub_interface_mod | ||
|
||
use iso_c_binding, only: c_int, c_float, c_double, c_bool | ||
|
||
implicit none | ||
|
||
private | ||
public :: python_function_f, data_t | ||
|
||
type, bind(c) :: data_t | ||
real(c_float) :: x | ||
integer(c_int) :: y | ||
logical(c_bool) :: b | ||
! Magic number: help guaranteeing layout is kept | ||
! consistant through the interface. Imperfect. | ||
integer(c_int) :: i_am_123456789 = 123456789 | ||
end type | ||
|
||
interface | ||
|
||
subroutine python_function_f(data, value) bind(c, name='python_function') | ||
import data_t, c_int | ||
|
||
implicit none | ||
type(data_t), intent(in) :: data | ||
integer(kind=c_int), intent(in) :: value | ||
|
||
end subroutine python_function_f | ||
|
||
end interface | ||
|
||
end module stub_interface_mod |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# file plugin_build.py | ||
import cffi | ||
|
||
ffibuilder = cffi.FFI() | ||
|
||
with open("data_to_be_transited.h") as f: | ||
data = "".join([line for line in f if not line.startswith("#")]) | ||
data = data.replace("CFFI_DLLEXPORT", "") | ||
ffibuilder.embedding_api(data) | ||
|
||
ffibuilder.set_source( | ||
"bridge", | ||
r"""#include "data_to_be_transited.h" """, | ||
) | ||
|
||
ffibuilder.embedding_init_code( | ||
""" | ||
from bridge import ffi | ||
from runtime_code import check_function | ||
@ffi.def_extern() | ||
def python_function(data:"data_t", union_v: "union_t"): | ||
check_function(data) | ||
""" | ||
) | ||
|
||
ffibuilder.compile(target="bridge.so", verbose=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
#!/bin/bash | ||
|
||
python bridge.py | ||
gfortran bridge.f90 main.f90 -o test ./bridge.so | ||
PYTHONPATH=. ./test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/bin/bash | ||
|
||
rm -f bridge.so bridge.o stub_interface_mod.mod bridge.c test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
from dataclasses import dataclass | ||
|
||
|
||
@dataclass | ||
class Data_py_t: | ||
x: float | ||
y: int | ||
b: bool | ||
# Magic number: see Fortran | ||
i_am_123456789: int |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#pragma once | ||
#include <stdbool.h> | ||
|
||
typedef struct | ||
{ | ||
float x; | ||
int y; | ||
bool b; | ||
// Magic number, see Fortran | ||
int i_am_123456789; | ||
} data_t; | ||
|
||
typedef union | ||
{ | ||
void *void_ptr; | ||
int int_value; | ||
} union_t; | ||
|
||
extern void python_function(data_t *, union_t *); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
program test | ||
use stub_interface_mod, only: python_function_f, data_t | ||
|
||
implicit none | ||
|
||
type(data_t) :: d | ||
d = data_t(42.42, 24, .true.) | ||
call python_function_f(d, 39) | ||
|
||
print *, 'test' | ||
end program test | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
from data_desc import Data_py_t | ||
import inspect | ||
|
||
|
||
def check_function(data: Data_py_t): | ||
# Check the magic number | ||
if data.i_am_123456789 != 123456789: | ||
raise ValueError("Magic number failure") | ||
|
||
print(f"Data comes as {data} of type {type(data)}") | ||
members = inspect.getmembers(Data_py_t) | ||
keys = list( | ||
list(filter(lambda x: x[0] == "__dataclass_fields__", members))[0][1].values() | ||
) | ||
for k in keys: | ||
print(f"{k.name} of value {getattr(data, k.name)}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
10 changes: 10 additions & 0 deletions
10
sw_stack/discover/sles15/src/2024.03.00/verify_baselibs.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#!/bin/bash | ||
|
||
source ./basics.sh | ||
|
||
cd $DSLSW_BASE/baselibs-$DSLSW_BASELIBS_VER | ||
make ESMF_COMM=openmpi \ | ||
BUILD=ESSENTIALS \ | ||
ALLOW_ARGUMENT_MISMATCH=-fallow-argument-mismatch \ | ||
prefix=$DSLSW_INSTALL_DIR/baselibs-$DSLSW_BASELIBS_VER/install/x86_64-pc-linux-gnu/Linux \ | ||
verify |