From 12216d112f772bc2acdcbf1ddbdce55822bfddcd Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Thu, 24 Aug 2023 20:44:36 -0700 Subject: [PATCH] Update ml-slurm base image --- examples/README.md | 5 ++--- examples/ml-slurm.yaml | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/examples/README.md b/examples/README.md index 681ef2a5eb..9e026cfd77 100644 --- a/examples/README.md +++ b/examples/README.md @@ -258,7 +258,7 @@ to 256 ### [ml-slurm.yaml] ![core-badge] This blueprint provisions an HPC cluster running the Slurm scheduler with the -machine learning frameworks [PyTorch] and [TensorFlow] pre-installed on every +machine learning frameworks PyTorch and TensorFlow pre-installed on every VM. The cluster has 2 partitions: * [A2 family VMs][a2] with the NVIDIA A100 GPU accelerator @@ -295,8 +295,7 @@ sbatch -N 1 torch_test.sh When you are done, clean up the resources in reverse order of creation: ```text -terraform -chdir=ml-example/cluster destroy -terraform -chdir=ml-example/primary destroy +./ghpc destroy ml-example ``` Finally, browse to the [Cloud Console][console-images] to delete your custom diff --git a/examples/ml-slurm.yaml b/examples/ml-slurm.yaml index f2dfe6ca9a..23f732832e 100644 --- a/examples/ml-slurm.yaml +++ b/examples/ml-slurm.yaml @@ -134,7 +134,7 @@ deployment_groups: omit_external_ip: false source_image_project_id: [schedmd-slurm-public] # see latest in https://github.com/SchedMD/slurm-gcp/blob/master/docs/images.md#published-image-family - source_image_family: schedmd-v5-slurm-22-05-9-debian-11 + source_image_family: slurm-gcp-5-7-debian-11 # You can find size of source image by using following command # gcloud compute images describe-from-family --project schedmd-slurm-public disk_size: $(vars.disk_size_gb)