From 6acb0833101c0bdcbab3b2f7ac036948d1033b05 Mon Sep 17 00:00:00 2001 From: Christoph Schranz Date: Tue, 26 Nov 2024 14:31:18 +0100 Subject: [PATCH 1/3] updated INVALID config --- .build/Dockerfile | 45 +++++++++++++++++------------------- .build/docker-stacks | 2 +- .build/docker_healthcheck.py | 4 ++-- .build/start.sh | 14 +++++++---- custom/gpulibs.Dockerfile | 13 ++++++----- generate-Dockerfile.sh | 2 +- 6 files changed, 41 insertions(+), 39 deletions(-) diff --git a/.build/Dockerfile b/.build/Dockerfile index dd07a47..f8fd1d0 100755 --- a/.build/Dockerfile +++ b/.build/Dockerfile @@ -41,7 +41,7 @@ USER root # Install all OS dependencies for the Server that starts # but lacks all features (e.g., download as all possible file formats) -ENV DEBIAN_FRONTEND noninteractive +ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update --yes && \ # - `apt-get upgrade` is run to patch known vulnerabilities in system packages # as the Ubuntu base image is rebuilt too seldom sometimes (less than once a month) @@ -51,6 +51,10 @@ RUN apt-get update --yes && \ bzip2 \ ca-certificates \ locales \ + # - `netbase` provides /etc/{protocols,rpc,services}, part of POSIX + # and required by various C functions like getservbyname and getprotobyname + # https://github.com/jupyter/docker-stacks/pull/2129 + netbase \ sudo \ # - `tini` is installed as a helpful container entrypoint, # that reaps zombie processes and such of the actual executable we want to start @@ -85,7 +89,7 @@ RUN sed -i 's/^#force_color_prompt=yes/force_color_prompt=yes/' /etc/skel/.bashr # and docs: https://docs.conda.io/projects/conda/en/latest/dev-guide/deep-dives/activation.html echo 'eval "$(conda shell.bash hook)"' >> /etc/skel/.bashrc -# Create NB_USER with name jovyan user with UID=1000 and in the 'users' group +# Create "${NB_USER}" user (`jovyan` by default) with UID="${NB_UID}" (`1000` by default) and in the 'users' group # and make sure these dirs are writable by the `users` group. RUN echo "auth requisite pam_deny.so" >> /etc/pam.d/su && \ sed -i.bak -e 's/^%admin/#%admin/' /etc/sudoers && \ @@ -134,9 +138,9 @@ RUN set -x && \ --root-prefix="${CONDA_DIR}" \ --prefix="${CONDA_DIR}" \ --yes \ - "${PYTHON_SPECIFIER}" \ + 'jupyter_core' \ 'mamba' \ - 'jupyter_core' && \ + "${PYTHON_SPECIFIER}" && \ rm -rf /tmp/bin/ && \ # Pin major.minor version of python # https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-pkgs.html#preventing-packages-from-updating-pinning @@ -199,7 +203,7 @@ RUN apt-get update --yes && \ USER ${NB_UID} -# Install JupyterLab, Jupyter Notebook, JupyterHub and NBClassic +# Install JupyterHub, JupyterLab, NBClassic and Jupyter Notebook # Generate a Jupyter Server config # Cleanup temporary files # Correct permissions @@ -207,10 +211,10 @@ USER ${NB_UID} # files across image layers when the permissions change WORKDIR /tmp RUN mamba install --yes \ - 'jupyterlab' \ - 'notebook' \ 'jupyterhub' \ - 'nbclassic' && \ + 'jupyterlab' \ + 'nbclassic' \ + 'notebook' && \ jupyter server --generate-config && \ mamba clean --all -f -y && \ npm cache clean --force && \ @@ -339,7 +343,7 @@ RUN mamba install --yes \ 'dask' \ 'dill' \ 'h5py' \ - 'ipympl'\ + 'ipympl' \ 'ipywidgets' \ 'jupyterlab-git' \ 'matplotlib-base' \ @@ -357,7 +361,7 @@ RUN mamba install --yes \ 'sqlalchemy' \ 'statsmodels' \ 'sympy' \ - 'widgetsnbextension'\ + 'widgetsnbextension' \ 'xlrd' && \ mamba clean --all -f -y && \ fix-permissions "${CONDA_DIR}" && \ @@ -399,7 +403,7 @@ RUN mamba install --quiet --yes \ # using device_lib.list_local_devices() the cudNN version is shown, adapt version to tested compat USER ${NB_UID} RUN pip install --upgrade pip && \ - pip install --no-cache-dir tensorflow==2.16.1 keras==3.1.1 && \ + pip install --no-cache-dir tensorflow[and-cuda]==2.17.0 keras==3.6.0 && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" @@ -410,11 +414,11 @@ RUN pip install --upgrade pip && \ # && torchviz==0.0.2 --extra-index-url https://download.pytorch.org/whl/cu121 RUN set -ex \ && buildDeps=' \ - torch==2.2.2 \ - torchvision==0.17.2 \ - torchaudio==2.2.2 \ + torch==2.5.1 \ + torchvision==0.20.1 \ + torchaudio==2.5.1 \ ' \ - && pip install --no-cache-dir $buildDeps --extra-index-url https://download.pytorch.org/whl/cu121 \ + && pip install --no-cache-dir $buildDeps --extra-index-url https://download.pytorch.org/whl/cu124 \ && fix-permissions "${CONDA_DIR}" \ && fix-permissions "/home/${NB_USER}" @@ -426,14 +430,6 @@ RUN apt-get update && \ apt-get install -y --no-install-recommends cmake libncurses5-dev libncursesw5-dev git && \ apt-get clean && rm -rf /var/lib/apt/lists/* -USER $NB_UID -# These need to be two separate pip install commands, otherwise it will throw an error -# attempting to resolve the nvidia-cuda-nvcc package at the same time as nvidia-pyindex -RUN pip install --no-cache-dir nvidia-pyindex && \ - pip install --no-cache-dir nvidia-cuda-nvcc && \ - fix-permissions "${CONDA_DIR}" && \ - fix-permissions "/home/${NB_USER}" - # reinstall nvcc with cuda-nvcc to install ptax USER $NB_UID # These need to be two separate pip install commands, otherwise it will throw an error @@ -443,7 +439,8 @@ RUN pip install --no-cache-dir nvidia-pyindex && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" -# Install cuda-nvcc with sepecific version, see here: https://anaconda.org/nvidia/cuda-nvcc/labels +# Install cuda-nvcc with sepecific version, see here: +# https://anaconda.org/nvidia/cuda-nvcc/labels RUN mamba install -c nvidia cuda-nvcc=12.3.107 -y && \ mamba clean --all -f -y && \ fix-permissions $CONDA_DIR && \ diff --git a/.build/docker-stacks b/.build/docker-stacks index e838ff3..0098788 160000 --- a/.build/docker-stacks +++ b/.build/docker-stacks @@ -1 +1 @@ -Subproject commit e838ff397a2d9c2ad0faae051ef0ec4f20732320 +Subproject commit 00987883e58d139b5ed01f803f95e639c59bf340 diff --git a/.build/docker_healthcheck.py b/.build/docker_healthcheck.py index 7dd3de0..b0db1a8 100755 --- a/.build/docker_healthcheck.py +++ b/.build/docker_healthcheck.py @@ -9,9 +9,9 @@ import requests # Several operations below deliberately don't check for possible errors -# As this is a healthcheck, it should succeed or raise an exception on error +# As this is a health check, it should succeed or raise an exception on error -# Docker runs healtchecks using an exec +# Docker runs health checks using an exec # It uses the default user configured when running the image: root for the case of a custom NB_USER or jovyan for the case of the default image user. # We manually change HOME to make `jupyter --runtime-dir` report a correct path # More information: diff --git a/.build/start.sh b/.build/start.sh index 33d12d8..295ee26 100755 --- a/.build/start.sh +++ b/.build/start.sh @@ -155,11 +155,14 @@ if [ "$(id -u)" == 0 ]; then unset_explicit_env_vars _log "Running as ${NB_USER}:" "${cmd[@]}" - exec sudo --preserve-env --set-home --user "${NB_USER}" \ - LD_LIBRARY_PATH="${LD_LIBRARY_PATH}" \ - PATH="${PATH}" \ - PYTHONPATH="${PYTHONPATH:-}" \ - "${cmd[@]}" + if [ "${NB_USER}" = "root" ] && [ "${NB_UID}" = "$(id -u "${NB_USER}")" ] && [ "${NB_GID}" = "$(id -g "${NB_USER}")" ]; then + HOME="/home/root" exec "${cmd[@]}" + else + exec sudo --preserve-env --set-home --user "${NB_USER}" \ + LD_LIBRARY_PATH="${LD_LIBRARY_PATH}" \ + PATH="${PATH}" \ + PYTHONPATH="${PYTHONPATH:-}" \ + "${cmd[@]}" # Notes on how we ensure that the environment that this container is started # with is preserved (except vars listed in JUPYTER_ENV_VARS_TO_UNSET) when # we transition from running as root to running as NB_USER. @@ -187,6 +190,7 @@ if [ "$(id -u)" == 0 ]; then # above in /etc/sudoers.d/path. Thus PATH is irrelevant to how the above # sudo command resolves the path of `${cmd[@]}`. The PATH will be relevant # for resolving paths of any subprocesses spawned by `${cmd[@]}`. + fi # The container didn't start as the root user, so we will have to act as the # user we started as. diff --git a/custom/gpulibs.Dockerfile b/custom/gpulibs.Dockerfile index e789019..371ffbb 100644 --- a/custom/gpulibs.Dockerfile +++ b/custom/gpulibs.Dockerfile @@ -14,7 +14,7 @@ RUN mamba install --quiet --yes \ # using device_lib.list_local_devices() the cudNN version is shown, adapt version to tested compat USER ${NB_UID} RUN pip install --upgrade pip && \ - pip install --no-cache-dir tensorflow==2.16.1 keras==3.1.1 && \ + pip install --no-cache-dir tensorflow[and-cuda]==2.17.0 keras==3.6.0 && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" @@ -25,11 +25,11 @@ RUN pip install --upgrade pip && \ # && torchviz==0.0.2 --extra-index-url https://download.pytorch.org/whl/cu121 RUN set -ex \ && buildDeps=' \ - torch==2.2.2 \ - torchvision==0.17.2 \ - torchaudio==2.2.2 \ + torch==2.5.1 \ + torchvision==0.20.1 \ + torchaudio==2.5.1 \ ' \ - && pip install --no-cache-dir $buildDeps --extra-index-url https://download.pytorch.org/whl/cu121 \ + && pip install --no-cache-dir $buildDeps --extra-index-url https://download.pytorch.org/whl/cu124 \ && fix-permissions "${CONDA_DIR}" \ && fix-permissions "/home/${NB_USER}" @@ -50,7 +50,8 @@ RUN pip install --no-cache-dir nvidia-pyindex && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" -# Install cuda-nvcc with sepecific version, see here: https://anaconda.org/nvidia/cuda-nvcc/labels +# Install cuda-nvcc with sepecific version, see here: +# https://anaconda.org/nvidia/cuda-nvcc/labels RUN mamba install -c nvidia cuda-nvcc=12.3.107 -y && \ mamba clean --all -f -y && \ fix-permissions $CONDA_DIR && \ diff --git a/generate-Dockerfile.sh b/generate-Dockerfile.sh index a4ef0d6..4d77962 100755 --- a/generate-Dockerfile.sh +++ b/generate-Dockerfile.sh @@ -5,7 +5,7 @@ cd $(cd -P -- "$(dirname -- "$0")" && pwd -P) export DOCKERFILE=".build/Dockerfile" export STACKS_DIR=".build/docker-stacks" # please test the build of the commit in https://github.com/jupyter/docker-stacks/commits/main in advance -export HEAD_COMMIT="e838ff397a2d9c2ad0faae051ef0ec4f20732320" +export HEAD_COMMIT="00987883e58d139b5ed01f803f95e639c59bf340" while [[ "$#" -gt 0 ]]; do case $1 in -p|--pw|--password) PASSWORD="$2" && USE_PASSWORD=1; shift;; From abd6ecd5d0baaba14b2b6c3b74a1fb327e06f8b9 Mon Sep 17 00:00:00 2001 From: Christoph Schranz Date: Tue, 26 Nov 2024 14:57:17 +0100 Subject: [PATCH 2/3] updated to cuda 12.5 --- .build/Dockerfile | 28 ++++++++++++++-------------- custom/gpulibs.Dockerfile | 6 +++--- custom/header.Dockerfile | 4 ++-- custom/usefulpackages.Dockerfile | 18 +++++++++--------- 4 files changed, 28 insertions(+), 28 deletions(-) diff --git a/.build/Dockerfile b/.build/Dockerfile index f8fd1d0..6076194 100755 --- a/.build/Dockerfile +++ b/.build/Dockerfile @@ -7,8 +7,8 @@ # Use NVIDIA CUDA as base image and run the same installation as in the other packages. # The version of cuda must match those of the packages installed in src/Dockerfile.gpulibs -FROM nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04 -LABEL authors="Christoph Schranz , Mathematical Michael " +FROM nvidia/cuda:12.5.1-cudnn-runtime-ubuntu22.04 +LABEL authors="Christoph Schranz " # This is a concatenated Dockerfile, the maintainers of subsequent sections may vary. RUN chmod 1777 /tmp && chmod 1777 /var/tmp @@ -387,7 +387,7 @@ WORKDIR "${HOME}" ########################## Dependency: gpulibs ############################# ############################################################################ -LABEL maintainer="Christoph Schranz , Mathematical Michael " +LABEL authors="Christoph Schranz " # Install dependencies for e.g. PyTorch RUN mamba install --quiet --yes \ @@ -403,7 +403,7 @@ RUN mamba install --quiet --yes \ # using device_lib.list_local_devices() the cudNN version is shown, adapt version to tested compat USER ${NB_UID} RUN pip install --upgrade pip && \ - pip install --no-cache-dir tensorflow[and-cuda]==2.17.0 keras==3.6.0 && \ + pip install --no-cache-dir tensorflow==2.18.0 keras==3.6.0 && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" @@ -441,7 +441,7 @@ RUN pip install --no-cache-dir nvidia-pyindex && \ # Install cuda-nvcc with sepecific version, see here: # https://anaconda.org/nvidia/cuda-nvcc/labels -RUN mamba install -c nvidia cuda-nvcc=12.3.107 -y && \ +RUN mamba install -c nvidia cuda-nvcc=12.5.82 -y && \ mamba clean --all -f -y && \ fix-permissions $CONDA_DIR && \ fix-permissions /home/$NB_USER @@ -455,7 +455,7 @@ USER $NB_UID ############################ Useful packages ############################### ############################################################################ -LABEL authors="Christoph Schranz , Mathematical Michael " +LABEL authors="Christoph Schranz " USER root @@ -468,7 +468,7 @@ USER $NB_UID RUN set -ex \ && buildDeps=' \ graphviz==0.20.3 \ - pytest==8.1.1 \ + pytest==8.3.3 \ ' \ && pip install --no-cache-dir $buildDeps \ && fix-permissions "${CONDA_DIR}" \ @@ -477,8 +477,8 @@ RUN set -ex \ # upgrade jupyter-server for compatibility RUN set -ex \ && buildDeps=' \ - distributed==2024.4.1 \ - jupyter-server==2.13 \ + distributed==2024.11.2 \ + jupyter-server==2.14.2 \ ' \ && pip install --no-cache-dir $buildDeps \ && fix-permissions "${CONDA_DIR}" \ @@ -488,15 +488,15 @@ RUN set -ex \ && buildDeps=' \ # install extension manager jupyter_contrib_nbextensions==0.7.0 \ - jupyter_nbextensions_configurator==0.6.3 \ + jupyter_nbextensions_configurator==0.6.4 \ # install git extension - jupyterlab-git==0.50.0 \ + jupyterlab-git==0.50.2 \ # install plotly extension - plotly==5.20.0 \ + plotly==5.24.1 \ # install drawio and graphical extensions, not compatible with Jupyterlab 4.X yet # ipydrawio==1.3.0 \ - ipyleaflet==0.18.2 \ - ipywidgets==8.1.2 \ + ipyleaflet==0.19.2 \ + ipywidgets==8.1.5 \ # install spell checker jupyterlab-spellchecker==0.8.4 \ ' \ diff --git a/custom/gpulibs.Dockerfile b/custom/gpulibs.Dockerfile index 371ffbb..b7b2309 100644 --- a/custom/gpulibs.Dockerfile +++ b/custom/gpulibs.Dockerfile @@ -1,4 +1,4 @@ -LABEL maintainer="Christoph Schranz , Mathematical Michael " +LABEL authors="Christoph Schranz " # Install dependencies for e.g. PyTorch RUN mamba install --quiet --yes \ @@ -14,7 +14,7 @@ RUN mamba install --quiet --yes \ # using device_lib.list_local_devices() the cudNN version is shown, adapt version to tested compat USER ${NB_UID} RUN pip install --upgrade pip && \ - pip install --no-cache-dir tensorflow[and-cuda]==2.17.0 keras==3.6.0 && \ + pip install --no-cache-dir tensorflow==2.18.0 keras==3.6.0 && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" @@ -52,7 +52,7 @@ RUN pip install --no-cache-dir nvidia-pyindex && \ # Install cuda-nvcc with sepecific version, see here: # https://anaconda.org/nvidia/cuda-nvcc/labels -RUN mamba install -c nvidia cuda-nvcc=12.3.107 -y && \ +RUN mamba install -c nvidia cuda-nvcc=12.5.82 -y && \ mamba clean --all -f -y && \ fix-permissions $CONDA_DIR && \ fix-permissions /home/$NB_USER diff --git a/custom/header.Dockerfile b/custom/header.Dockerfile index 3d6ef22..b12636c 100644 --- a/custom/header.Dockerfile +++ b/custom/header.Dockerfile @@ -1,7 +1,7 @@ # Use NVIDIA CUDA as base image and run the same installation as in the other packages. # The version of cuda must match those of the packages installed in src/Dockerfile.gpulibs -FROM nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04 -LABEL authors="Christoph Schranz , Mathematical Michael " +FROM nvidia/cuda:12.5.1-cudnn-runtime-ubuntu22.04 +LABEL authors="Christoph Schranz " # This is a concatenated Dockerfile, the maintainers of subsequent sections may vary. RUN chmod 1777 /tmp && chmod 1777 /var/tmp diff --git a/custom/usefulpackages.Dockerfile b/custom/usefulpackages.Dockerfile index d7d2727..789b221 100644 --- a/custom/usefulpackages.Dockerfile +++ b/custom/usefulpackages.Dockerfile @@ -1,4 +1,4 @@ -LABEL authors="Christoph Schranz , Mathematical Michael " +LABEL authors="Christoph Schranz " USER root @@ -11,7 +11,7 @@ USER $NB_UID RUN set -ex \ && buildDeps=' \ graphviz==0.20.3 \ - pytest==8.1.1 \ + pytest==8.3.3 \ ' \ && pip install --no-cache-dir $buildDeps \ && fix-permissions "${CONDA_DIR}" \ @@ -20,8 +20,8 @@ RUN set -ex \ # upgrade jupyter-server for compatibility RUN set -ex \ && buildDeps=' \ - distributed==2024.4.1 \ - jupyter-server==2.13 \ + distributed==2024.11.2 \ + jupyter-server==2.14.2 \ ' \ && pip install --no-cache-dir $buildDeps \ && fix-permissions "${CONDA_DIR}" \ @@ -31,15 +31,15 @@ RUN set -ex \ && buildDeps=' \ # install extension manager jupyter_contrib_nbextensions==0.7.0 \ - jupyter_nbextensions_configurator==0.6.3 \ + jupyter_nbextensions_configurator==0.6.4 \ # install git extension - jupyterlab-git==0.50.0 \ + jupyterlab-git==0.50.2 \ # install plotly extension - plotly==5.20.0 \ + plotly==5.24.1 \ # install drawio and graphical extensions, not compatible with Jupyterlab 4.X yet # ipydrawio==1.3.0 \ - ipyleaflet==0.18.2 \ - ipywidgets==8.1.2 \ + ipyleaflet==0.19.2 \ + ipywidgets==8.1.5 \ # install spell checker jupyterlab-spellchecker==0.8.4 \ ' \ From feda82078163eafa5266f5229442caf081f1b7ec Mon Sep 17 00:00:00 2001 From: Christoph Schranz Date: Tue, 26 Nov 2024 16:31:03 +0100 Subject: [PATCH 3/3] end of swarm support --- README.md | 130 ++++++++++----------------------------- add-to-swarm.sh | 58 ----------------- docker-compose-swarm.yml | 35 ----------- remove-from-swarm.sh | 5 -- 4 files changed, 34 insertions(+), 194 deletions(-) delete mode 100755 add-to-swarm.sh delete mode 100644 docker-compose-swarm.yml delete mode 100755 remove-from-swarm.sh diff --git a/README.md b/README.md index 3229869..cbc8572 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,6 @@ for creating and maintaining a robust Python, R, and Julia toolstack for Data Sc 2. [Build Your image](#build-your-image) 3. [Tracing](#tracing) 4. [Configuration](#configuration) -5. [Deployment](#deployment-in-the-docker-swarm) 6. [Issues and Contributing](#issues-and-contributing) 7. [Support](#support) @@ -45,29 +44,29 @@ for creating and maintaining a robust Python, R, and Julia toolstack for Data Sc 3. Get access to your GPU via CUDA drivers within Docker containers. For this, follow the installation steps in this [Medium article](https://medium.com/@christoph.schranz/set-up-your-own-gpu-based-jupyterlab-e0d45fcacf43). You can confirm that you can access your GPU within Docker if the command below returns a result similar to this one: ```bash - docker run --gpus all nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04 nvidia-smi + docker run --gpus all nvidia/cuda:12.5.1-cudnn-runtime-ubuntu22.04 nvidia-smi ``` ```bash - Mon Apr 8 16:19:10 2024 - +---------------------------------------------------------------------------------------+ - | NVIDIA-SMI 545.23.05 Driver Version: 545.84 CUDA Version: 12.3 | - |-----------------------------------------+----------------------+----------------------+ - | GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC | - | Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. | - | | | MIG M. | - |=========================================+======================+======================| - | 0 NVIDIA GeForce RTX 3060 ... On | 00000000:01:00.0 Off | N/A | - | N/A 46C P8 10W / 60W | 105MiB / 6144MiB | 0% Default | - | | | N/A | - +-----------------------------------------+----------------------+----------------------+ - - +---------------------------------------------------------------------------------------+ - | Processes: | - | GPU GI CI PID Type Process name GPU Memory | - | ID ID Usage | - |=======================================================================================| - | No running processes found | - +---------------------------------------------------------------------------------------+ + Tue Nov 26 15:13:37 2024 + +-----------------------------------------------------------------------------------------+ + | NVIDIA-SMI 555.42.03 Driver Version: 555.85 CUDA Version: 12.5 | + |-----------------------------------------+------------------------+----------------------+ + | GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC | + | Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. | + | | | MIG M. | + |=========================================+========================+======================| + | 0 NVIDIA GeForce RTX 3060 ... On | 00000000:01:00.0 Off | N/A | + | N/A 43C P8 12W / 60W | 4569MiB / 6144MiB | 0% Default | + | | | N/A | + +-----------------------------------------+------------------------+----------------------+ + + +-----------------------------------------------------------------------------------------+ + | Processes: | + | GPU GI CI PID Type Process name GPU Memory | + | ID ID Usage | + |=========================================================================================| + | 0 N/A N/A 231 C /python3.11 N/A | + +-----------------------------------------------------------------------------------------+ ``` **It is important to keep your installed CUDA version in mind when you pull images. Note that you can't run images based on `nvidia/cuda:11.2` if you have only CUDA version 10.1 installed, use `nvcc --version` to get the correct cuda version. Additionally, a NVIDIA driver version of at least 520 is suggested, as the images are built and tested using this and later versions.** @@ -76,9 +75,9 @@ for creating and maintaining a robust Python, R, and Julia toolstack for Data Sc ```bash cd your-working-directory ll data # this path will be mounted by default - docker run --gpus all -d -it -p 8848:8888 -v $(pwd)/data:/home/jovyan/work -e GRANT_SUDO=yes -e JUPYTER_ENABLE_LAB=yes --user root cschranz/gpu-jupyter:v1.7_cuda-12.3_ubuntu-22.04 + docker run --gpus all -d -it -p 8848:8888 -v $(pwd)/data:/home/jovyan/work -e GRANT_SUDO=yes -e JUPYTER_ENABLE_LAB=yes --user root cschranz/gpu-jupyter:v1.8_cuda-12.5_ubuntu-22.04 ``` - This starts an instance of *GPU-Jupyter* with the tag `v1.7_cuda-12.3_ubuntu-22.04` at [http://localhost:8848](http://localhost:8848) (port `8848`). + This starts an instance of *GPU-Jupyter* with the tag `v1.8_cuda-12.5_ubuntu-22.04` at [http://localhost:8848](http://localhost:8848) (port `8848`). To log into Jupyterlab, you have to specify a token that you get from: ```bash docker exec -it [container-ID/name] jupyter server list @@ -89,6 +88,9 @@ for creating and maintaining a robust Python, R, and Julia toolstack for Data Sc Additionally, data within the host's `data` directory is shared with the container. The following images of GPU-Jupyter are available on [Dockerhub](https://hub.docker.com/r/cschranz/gpu-jupyter): + - `v1.8_cuda-12.5_ubuntu-22.04` (full image) + - `v1.8_cuda-12.5_ubuntu-22.04_python-only` (only with a python interpreter and without Julia and R) + - `v1.8_cuda-12.5_ubuntu-22.04_slim` (only with a python interpreter and without additional packages) - `v1.7_cuda-12.3_ubuntu-22.04` (full image) - `v1.7_cuda-12.3_ubuntu-22.04_python-only` (only with a python interpreter and without Julia and R) - `v1.7_cuda-12.3_ubuntu-22.04_slim` (only with a python interpreter and without additional packages) @@ -128,11 +130,11 @@ Additionally, data within the host's `data` directory is shared with the contain - `v1.4_cuda-10.1_ubuntu-18.04_slim` (only with a python interpreter and without additional packages) - The version, e.g. `v1.7`, declares the version of the generator setup. - The Cuda version, e.g. `cuda-12.3`, must match the CUDA driver version and be supported by the GPU libraries. + The version, e.g. `v1.8`, declares the version of the generator setup. + The Cuda version, e.g. `cuda-12.5`, must match the CUDA driver version and be supported by the GPU libraries. These and older versions of GPU-Jupyter are listed on [Dockerhub](https://hub.docker.com/r/cschranz/gpu-jupyter/tags?page=1&ordering=last_updated). In case you are using another version or the GPU libraries don't work on your hardware, please try to build the image on your own as described below. - Note that the images built for Ubuntu 20.04 LTS work also on Ubuntu 22.04 LTS is currently not supported. + Note that the images built for Ubuntu 20.04 LTS work also on Ubuntu 22.04 LTS. Within the Jupyterlab UI, ensure you can access your GPU by opening a new Terminal window and running `nvidia-smi`. Here, you can also install additional packages on top of the built image. @@ -143,16 +145,16 @@ we recommend checking out this [tutorial](https://www.youtube.com/watch?v=7wfPqA ## Build Your Image -Building a custom Docker image is the recommended option if you have a different GPU architecture or if you want to customize the pre-installed packages. The Dockerfiles in `custom/` can be modified to achieve this. To use a custom base image, modify `custom/header.Dockerfile`. To install specific GPU-related libraries, modify `custom/gpulibs.Dockerfile`, and to add specific libraries, append them to `custom/usefulpackages.Dockerfile`. +Building a custom Docker image is the recommended option if you have a different GPU architecture or if you want to customize the pre-installed packages. The Dockerfiles in `custom/` can be modified to achieve this. To use a custom base image, modify `custom/header.Dockerfile`. To install specific GPU-related libraries, modify `custom/gpulibs.Dockerfile`, and to add specific libraries, append them to `custom/usefulpackages.Dockerfile`. Moreover, this offers the option for a **static token** or password which does not change with a container's restart. -After making the necessary modifications, regenerate the `Dockerfile` in `/.build`. Once you have confirmed that your GPU is accessible within Docker containers by running `docker run --gpus all nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04 nvidia-smi` and seeing the GPU statistics, you can generate, build, and run the Docker image. +After making the necessary modifications, regenerate the `Dockerfile` in `/.build`. Once you have confirmed that your GPU is accessible within Docker containers by running `docker run --gpus all nvidia/cuda:12.5.1-cudnn-runtime-ubuntu22.04 nvidia-sm` and seeing the GPU statistics, you can generate, build, and run the Docker image. The following commands will start *GPU-Jupyter* on [localhost:8848](http://localhost:8848) with the default password `gpu-jupyter`. ```bash git clone https://github.com/iot-salzburg/gpu-jupyter.git cd gpu-jupyter git branch # Check for extisting branches -git checkout v1.7_cuda-12.3_ubuntu-22.04 # select or create a new version +git checkout v1.8_cuda-12.5_ubuntu-22.04 # select or create a new version # generate the Dockerfile with python and without Julia and R (see options: --help) ./generate-Dockerfile.sh --python-only docker build -t gpu-jupyter .build/ # will take a while @@ -380,29 +382,7 @@ a version conflict, as some files have to be adapted. Here are some examples of -## Deployment in the Docker Swarm - -A Jupyter instance often requires data from other services. -If that data source is containerized in Docker and sharing a port for communication shouldn't be allowed, e.g., for security reasons, -then connecting the data source with *GPU-Jupyter* within a Docker Swarm is a great option! - -### Set up Docker Swarm and Registry - -This step requires a running [Docker Swarm](https://www.youtube.com/watch?v=x843GyFRIIY) on a cluster or at least on this node. -In order to register custom images in a local Docker Swarm cluster, -a registry instance must be deployed in advance. -Note that we are using port 5001, as many services use the default port 5000. - -```bash -sudo docker service create --name registry --publish published=5001,target=5000 registry:2 -curl 127.0.0.1:5001/v2/ -``` -This should output `{}`. \ - -Afterward, check if the registry service is available using `docker service ls`. - - -### Configure the shared Docker network +### Configure a shared Docker network Additionally, *GPU-Jupyter* is connected to the data source via the same *docker-network*. Therefore, This network must be set to **attachable** in the source's `docker-compose.yml`: @@ -419,49 +399,7 @@ networks: driver: overlay attachable: true ``` - In this example, - * The docker stack was deployed in Docker swarm with the name **elk** (`docker stack deploy ... elk`), - * The docker network has the name **datastack** within the `docker-compose.yml` file, - * This network is configured to be attachable in the `docker-compose.yml` file - * and the docker network has the name **elk_datastack**, see the following output: - ```bash - sudo docker network ls - # ... - # [UID] elk_datastack overlay swarm - # ... - ``` - The docker network name **elk_datastack** is used in the next step as a parameter. - -### Start GPU-Jupyter in Docker Swarm - -Finally, *GPU-Jupyter* can be deployed in the Docker Swarm with the shared network, using: - -```bash -./generate-Dockerfile.sh -./add-to-swarm.sh -p [port] -n [docker-network] -r [registry-port] -# e.g. ./add-to-swarm.sh -p 8848 -n elk_datastack -r 5001 -``` -where: -* **-p:** port specifies the port on which the service will be available. -* **-n:** docker-network is the name of the attachable network from the previous step, -e.g., here it is **elk_datastack**. -* **-r:** registry port is the port that is published by the registry service, the default is `5000`. - -Now, *GPU-jupyter* will be accessible here on [localhost:8848](http://localhost:8848) -with the default password `gpu-jupyter` and shares the network with the other data source, i.e., -all ports of the data source will be accessible within *GPU-Jupyter*, -even if they aren't routed it the source's `docker-compose` file. - -Check if everything works well using: -```bash -sudo docker service ps gpu_gpu-jupyter -docker service ps gpu_gpu-jupyter -``` - -To remove the service from the swarm, use: -```bash -./remove-from-swarm.sh -``` + In this example, the docker network has the name **datastack** as defined within the `docker-compose.yml` file and is configured to be attachable. ## Issues and Contributing diff --git a/add-to-swarm.sh b/add-to-swarm.sh deleted file mode 100755 index 7865a87..0000000 --- a/add-to-swarm.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env bash -cd $(cd -P -- "$(dirname -- "$0")" && pwd -P) - -# Fetching port and network as input -PORT=8888 -REGISTRY=5000 -while [[ "$#" -gt 0 ]]; do case $1 in - -p|--port) PORT="$2"; shift;; - -r|--registry) REGISTRY="$2"; shift;; - -n|--network) NETWORK="$2"; shift;; -# -u|--uglify) uglify=1;; - *) echo "Unknown parameter passed: $1"; exit 1;; -esac; shift; done - -# Check if arguments are valid -if [[ $PORT != [0-9][0-9][0-9][0-9]* ]]; then - echo "Given port is not valid." - echo "Usage: $0 -p [port] -n [docker-network] -r [registry-port] # ports must be an integer with 4 or more digits." - exit 21 -fi - -if [[ $REGISTRY != [0-9][0-9][0-9][0-9]* ]]; then - echo "Given registry port is not valid." - echo "Usage: $0 -p [port] -n [docker-network] -r [registry-port] # ports must be an integer with 4 or more digits." - exit 21 -fi - -if [[ $NETWORK == "" ]]; then - echo "No docker network was provided to which this gpu-jupyter should be added to." - echo "Usage: $0 -p [port] -n [docker-network] -r [registry-port] # ports must be an integer with 4 or more digits." - exit 22 -fi -result=$(docker network ls) -if [[ "$result" != *" $NETWORK "* ]]; then - echo "Could not find network $NETWORK. Please provide a valid docker network." - echo "Please select a network:" - docker network ls - exit 23 -fi - -# starting in swarm -export HOSTNAME=$(hostname) -export JUPYTER_PORT=$PORT -export REGISTRY_PORT=$REGISTRY -export JUPYTER_NETWORK=$NETWORK -echo "Adding gpu-jupyter to the swarm on the node $HOSTNAME in the network $NETWORK on port $PORT and registry to port $REGISTRY." - -# substitute the blueprint docker-compose-swarm with the environment variables and stack deploy it. -envsubst < docker-compose-swarm.yml > .docker-compose-swarm.yml.envsubst -docker-compose -f .docker-compose-swarm.yml.envsubst build -docker-compose -f .docker-compose-swarm.yml.envsubst push -docker stack deploy --compose-file .docker-compose-swarm.yml.envsubst gpu -rm .docker-compose-swarm.yml.envsubst - -echo -echo "Added gpu-jupyter to docker swarm $NETWORK on port $JUPYTER_PORT." -echo "See 'docker service ps gpu_gpu-jupyter' for status info." -echo "See 'docker service logs -f gpu_gpu-jupyter' for logs." diff --git a/docker-compose-swarm.yml b/docker-compose-swarm.yml deleted file mode 100644 index 3fbdc56..0000000 --- a/docker-compose-swarm.yml +++ /dev/null @@ -1,35 +0,0 @@ -version: "3.4" -services: - gpu-jupyter: - image: 127.0.0.1:$REGISTRY_PORT/gpu-jupyter - build: .build - ports: - - $JUPYTER_PORT:8888 - volumes: - - ./data:/home/jovyan/work - environment: - GRANT_SUDO: "yes" - JUPYTER_ENABLE_LAB: "yes" - NB_UID: ${JUPYTER_UID:-1000} - NB_GID: ${JUPYTER_GID:-1000} - JUPYTER_TOKEN: ${JUPYTER_TOKEN} - # enable sudo permissions - user: - "root" - networks: - - default - - $JUPYTER_NETWORK - deploy: - placement: - constraints: [node.hostname == $HOSTNAME] - replicas: 1 - update_config: - parallelism: 2 - delay: 10s - restart_policy: - condition: on-failure - -networks: - $JUPYTER_NETWORK: - external: - name: $JUPYTER_NETWORK diff --git a/remove-from-swarm.sh b/remove-from-swarm.sh deleted file mode 100755 index 28671fd..0000000 --- a/remove-from-swarm.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env bash -cd $(cd -P -- "$(dirname -- "$0")" && pwd -P) - -echo "Removing gpu-jupyter from docker swarm." -docker stack rm gpu