Skip to content

Commit

Permalink
Merge pull request #93 from NaderAlAwar/multi_gpu_fix
Browse files Browse the repository at this point in the history
Enable usage of multiple GPUs in PyKokkos
  • Loading branch information
NaderAlAwar authored Sep 28, 2022
2 parents 5ea6a6b + 93ea2c1 commit e3fbe2b
Show file tree
Hide file tree
Showing 12 changed files with 343 additions and 43 deletions.
60 changes: 60 additions & 0 deletions examples/pykokkos/multi_gpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import pykokkos as pk

import numpy as np
import cupy as cp

pk.set_default_space(pk.Cuda)

size = 10000

pk.set_device_id(0)
cp_arr_0 = cp.arange(size).astype(np.int32)

pk.set_device_id(1)
cp_arr_1 = cp.arange(size).astype(np.int32)

print(cp_arr_0.device)
print(cp_arr_1.device)

@pk.workunit(cp_arr = pk.ViewTypeInfo(space=pk.CudaSpace))
def reduction_cp(i: int, acc: pk.Acc[int], cp_arr: pk.View1D[int]):
acc += cp_arr[i]

pk.set_device_id(1)
cp_view_0 = pk.from_cupy(cp_arr_1)
result_0 = pk.parallel_reduce(pk.RangePolicy(pk.Cuda, 0, size), reduction_cp, cp_arr=cp_view_0)
print(result_0)

pk.set_device_id(0)
cp_view_1 = pk.from_cupy(cp_arr_0)
result_1 = pk.parallel_reduce(pk.RangePolicy(pk.Cuda, 0, size), reduction_cp, cp_arr=cp_view_1)

print(f"Reducing array 0: {result_0}")
print(f"Reducing array 1: {result_1}")
print(f"Sum: {result_0 + result_1}")

pk.set_device_id(0)
view_0 = pk.View((size,), dtype=int)

pk.set_device_id(1)
view_1 = pk.View((size,), dtype=int)

@pk.workunit
def init_view(i: int, view: pk.View1D[int]):
view[i] = i

@pk.workunit
def reduce_view(i: int, acc: pk.Acc[int], view: pk.View1D[int]):
acc += view[i]

pk.set_device_id(0)
pk.parallel_for(pk.RangePolicy(pk.Cuda, 0, size), init_view, view=view_0)
result_0 = pk.parallel_reduce(pk.RangePolicy(pk.Cuda, 0, size), reduce_view, view=view_0)

pk.set_device_id(1)
pk.parallel_for(pk.RangePolicy(pk.Cuda, 0, size), init_view, view=view_1)
result_1 = pk.parallel_reduce(pk.RangePolicy(pk.Cuda, 0, size), reduce_view, view=view_1)

print(f"Reducing view 0: {result_0}")
print(f"Reducing view 1: {result_1}")
print(f"Sum: {result_0 + result_1}")
3 changes: 2 additions & 1 deletion pykokkos/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
initialize, finalize,
get_default_space, set_default_space,
get_default_precision, set_default_precision,
is_uvm_enabled, enable_uvm, disable_uvm
is_uvm_enabled, enable_uvm, disable_uvm,
set_device_id
)

initialize()
Expand Down
14 changes: 8 additions & 6 deletions pykokkos/core/compile.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ PK_REAL="${6}"
KOKKOS_LIB_PATH="${7}"
KOKKOS_INCLUDE_PATH="${8}"
COMPUTE_CAPABILITY="${9}"
LIB_SUFFIX="${10}"
COMPILER_PATH="${11}"
SRC=$(find -name "*.cpp")


Expand All @@ -34,11 +36,11 @@ if [ "${COMPILER}" == "g++" ]; then
-shared \
-fopenmp \
"${SRC}".o -o "${MODULE}" \
"${KOKKOS_LIB_PATH}/libkokkoscontainers.so" \
"${KOKKOS_LIB_PATH}/libkokkoscore.so"
"${KOKKOS_LIB_PATH}/libkokkoscontainers${LIB_SUFFIX}.so" \
"${KOKKOS_LIB_PATH}/libkokkoscore${LIB_SUFFIX}.so"

elif [ "${COMPILER}" == "nvcc" ]; then
"${KOKKOS_LIB_PATH}/../bin/nvcc_wrapper" \
"${COMPILER_PATH}" \
`python3 -m pybind11 --includes` \
-I.. \
-O3 \
Expand All @@ -54,14 +56,14 @@ elif [ "${COMPILER}" == "nvcc" ]; then
-Dpk_exec_space="Kokkos::${EXEC_SPACE}" \
-Dpk_real="${PK_REAL}"

"${KOKKOS_LIB_PATH}/../bin/nvcc_wrapper" \
"${COMPILER_PATH}" \
-I.. \
-O3 \
-shared \
-arch="${COMPUTE_CAPABILITY}" \
--expt-extended-lambda \
-fopenmp \
"${SRC}".o -o "${MODULE}" \
"${KOKKOS_LIB_PATH}/libkokkoscontainers.so" \
"${KOKKOS_LIB_PATH}/libkokkoscore.so"
"${KOKKOS_LIB_PATH}/libkokkoscontainers${LIB_SUFFIX}.so" \
"${KOKKOS_LIB_PATH}/libkokkoscore${LIB_SUFFIX}.so"
fi
2 changes: 1 addition & 1 deletion pykokkos/core/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def compile_entity(
if module_setup.is_compiled():
return

cpp_setup = CppSetup(module_setup.module_file, self.functor_file, self.bindings_file)
cpp_setup = CppSetup(module_setup.module_file, module_setup.gpu_module_files, self.functor_file, self.bindings_file)
translator = StaticTranslator(module_setup.name, self.functor_file, members)

t_start: float = time.perf_counter()
Expand Down
118 changes: 100 additions & 18 deletions pykokkos/core/cpp_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@
import shutil
import subprocess
import sys
from types import ModuleType
from typing import List, Tuple


from pykokkos.interface import ExecutionSpace, get_default_layout, get_default_memory_space
from pykokkos.interface import (
ExecutionSpace, get_default_layout, get_default_memory_space,
is_host_execution_space
)
import pykokkos.kokkos_manager as km


Expand All @@ -15,16 +18,18 @@ class CppSetup:
Creates the directory to hold the translation and invokes the compiler
"""

def __init__(self, module_file: str, functor: str, bindings: str):
def __init__(self, module_file: str, gpu_module_files: List[str], functor: str, bindings: str):
"""
CppSetup constructor
:param module: the name of the file containing the compiled Python module
:param gpu_module_files: the list of names of files containing for each gpu module
:param functor: the name of the generated functor file
:param bindings: the name of the generated bindings file
"""

self.module_file: str = module_file
self.gpu_module_files: List[str] = gpu_module_files
self.functor_file: str = functor
self.bindings_file: str = bindings

Expand Down Expand Up @@ -58,6 +63,8 @@ def compile(
self.write_source(output_dir, functor, bindings)
self.copy_script(output_dir)
self.invoke_script(output_dir, space, enable_uvm, compiler)
if space is ExecutionSpace.Cuda and km.is_multi_gpu_enabled():
self.copy_multi_gpu_kernel(output_dir)


def initialize_directory(self, name: Path) -> None:
Expand Down Expand Up @@ -115,15 +122,17 @@ def copy_script(self, output_dir: Path) -> None:
print(f"Exception while copying views and makefile: {ex}")
sys.exit(1)

def get_kokkos_paths(self) -> Tuple[Path, Path]:
def get_kokkos_paths(self, space: ExecutionSpace, compiler: str) -> Tuple[Path, Path, Path]:
"""
Get the paths of the Kokkos instal lib and include
directories. If the environment variable is set, use that
Kokkos install. If not, fall back to installed pykokkos-base
package.
Kokkos install. If not, fall back to the installed
pykokkos-base package.
:returns: a tuple of paths to the Kokkos lib/ and include/
directories respectively
:param space: the execution space to compile for
:param compiler: what compiler to use
:returns: a tuple of paths to the Kokkos lib/, include/,
and compiler to be used
"""

lib_path: Path
Expand All @@ -139,20 +148,46 @@ def get_kokkos_paths(self) -> Tuple[Path, Path]:

return lib_path, include_path

from pykokkos.bindings import kokkos
install_path = Path(kokkos.__path__[0]).parent
is_cpu: bool = is_host_execution_space(space)
kokkos_lib: ModuleType = km.get_kokkos_module(is_cpu)
install_path = Path(kokkos_lib.__path__[0])
lib_parent_path: Path
if km.is_multi_gpu_enabled():
lib_parent_path = install_path
else:
lib_parent_path = install_path.parent

if (install_path / "lib").is_dir():
lib_path = install_path / "lib"
elif (install_path / "lib64").is_dir():
lib_path = install_path / "lib64"
if (lib_parent_path / "lib").is_dir():
lib_path = lib_parent_path / "lib"
elif (lib_parent_path / "lib64").is_dir():
lib_path = lib_parent_path / "lib64"
else:
raise RuntimeError("lib/ or lib64/ directories not found in installed pykokkos-base package."
f" Try setting {self.lib_path_env} instead.")

include_path = lib_path.parent / "include/kokkos"
include_path = install_path.parent / "include/kokkos"

compiler_path: Path
if compiler != "nvcc":
compiler_path = Path("g++")
else:
compiler_path = install_path.parent / "bin/nvcc_wrapper"

return lib_path, include_path, compiler_path

def get_kokkos_lib_suffix(self, space: ExecutionSpace) -> str:
"""
Get the suffix of the libkokkoscore and libkokkoscontainers
libraries corresponding to the enabled device
:param space: the execution space to compile for
:returns: the suffix as a string
"""

if is_host_execution_space(space) or not km.is_multi_gpu_enabled():
return ""

return lib_path, include_path
return f"_{km.get_device_id()}"

def invoke_script(self, output_dir: Path, space: ExecutionSpace, enable_uvm: bool, compiler: str) -> None:
"""
Expand All @@ -176,8 +211,10 @@ def invoke_script(self, output_dir: Path, space: ExecutionSpace, enable_uvm: boo
precision: str = km.get_default_precision().__name__.split(".")[-1]
lib_path: Path
include_path: Path
lib_path, include_path = self.get_kokkos_paths()
compiler_path: Path
lib_path, include_path, compiler_path = self.get_kokkos_paths(space, compiler)
compute_capability: str = self.get_cuda_compute_capability(compiler)
lib_suffix: str = self.get_kokkos_lib_suffix(space)

command: List[str] = [f"./{self.script}",
compiler, # What compiler to use
Expand All @@ -188,7 +225,9 @@ def invoke_script(self, output_dir: Path, space: ExecutionSpace, enable_uvm: boo
precision, # Default real precision
str(lib_path), # Path to Kokkos install lib/ directory
str(include_path), # Path to Kokkos install include/ directory
compute_capability] # Device compute capability
compute_capability, # Device compute capability
lib_suffix, # The libkokkos* suffix identifying the gpu
str(compiler_path)] # The path to the compiler to use
compile_result = subprocess.run(command, cwd=output_dir, capture_output=True, check=False)

if compile_result.returncode != 0:
Expand All @@ -207,6 +246,49 @@ def invoke_script(self, output_dir: Path, space: ExecutionSpace, enable_uvm: boo
print(f"patchelf failed")
sys.exit(1)

def copy_multi_gpu_kernel(self, output_dir: Path) -> None:
"""
Copy the kernel .so file once for each device and run patchelf
to point to the right library
:param output_dir: the base directory
"""

original_module: Path = output_dir / self.module_file
for id, (kernel_filename, kokkos_gpu_module) in enumerate(zip(self.gpu_module_files, km.get_kokkos_gpu_modules())):
kernel_path: Path = output_dir / kernel_filename

try:
shutil.copy(original_module, kernel_path)
except Exception as ex:
print(f"Exception while copying kernel: {ex}")
sys.exit(1)

lib_path: Path = Path(kokkos_gpu_module.__path__[0]) / "lib"
patchelf: List[str] = ["patchelf",
"--set-rpath",
str(lib_path),
kernel_filename]

patchelf_result = subprocess.run(patchelf, cwd=output_dir, capture_output=True, check=False)
if patchelf_result.returncode != 0:
print(patchelf_result.stderr.decode("utf-8"))
print(f"patchelf failed")
sys.exit(1)

# Now replace the needed libkokkos* libraries with the correct version
needed_libraries: str = subprocess.run(["patchelf", "--print-needed", kernel_filename], cwd=output_dir, capture_output=True, check=False).stdout.decode("utf-8")

for line in needed_libraries.splitlines():
if "libkokkoscore" in line or "libkokkoscontainers" in line:
# Line will be of the form f"libkokkoscore_{id}.so.3.4"
# This will extract id
current_id: int = int(line.split("_")[1].split(".")[0])
to_remove: str = line
to_add: str = line.replace(f"_{current_id}", f"_{id}")

subprocess.run(["patchelf", "--replace-needed", to_remove, to_add, kernel_filename], cwd=output_dir, capture_output=True, check=False)

def get_cuda_compute_capability(self, compiler: str) -> str:
"""
Get the compute capability of an Nvidia GPU
Expand Down
8 changes: 7 additions & 1 deletion pykokkos/core/module_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import sys
import sysconfig
import time
from typing import Callable, Optional, Union
from typing import Callable, List, Optional, Union

from pykokkos.interface import ExecutionSpace
import pykokkos.kokkos_manager as km
Expand Down Expand Up @@ -105,9 +105,15 @@ def __init__(

self.main: Path = self.get_main_path()
self.output_dir: Optional[Path] = self.get_output_dir(self.main, self.metadata, space)
self.gpu_module_files: List[str] = []
if km.is_multi_gpu_enabled():
self.gpu_module_files = [f"kernel{device_id}{suffix}" for device_id in range(km.get_num_gpus())]

if self.output_dir is not None:
self.path: str = os.path.join(self.output_dir, self.module_file)
if km.is_multi_gpu_enabled():
self.gpu_module_paths: str = [os.path.join(self.output_dir, module_file) for module_file in self.gpu_module_files]

self.name: str = self.path.replace("/", "_")
self.name: str = self.name.replace("-", "_")
self.name: str = self.name.replace(".", "_")
Expand Down
Loading

0 comments on commit e3fbe2b

Please sign in to comment.