batch_compress_juwels.sh

#!/bin/bash
#SBATCH --partition=batch
#SBATCH --account=            #NEED TO SET ACCOUNT FOR COMPUTE TIME
#SBATCH --nodes=1
#SBATCH --time=12:00:00
#SBATCH --job-name=compress
#SBATCH --output='%x_%j.out'
#SBATCH --error='%x_%j.err'

# Begin of section with executable commands


# load conda environment -> check README.md to find out how to install
conda activate compression

# load parallel and netcdf module
module --force purge
module load Stages/2024 
module load GCC/12.3.0  
module load OpenMPI/4.1.5
module load netCDF/4.9.2
module load parallel/20230722

# go to folder where you keep compress_data.sh
cd /p/home/jusers/.......

# make sure HDF5_PLUGIN_PATH is correctly defined
export HDF5_PLUGIN_PATH='/p/project/chhb19/mueller29/miniconda3/envs/compression/lib/python3.10/site-packages/hdf5plugin/plugins/' #check README.md to find out the correct path

# probably not necessary, but the script was throwing warnings otherwise
export LANGUAGE=en_US.UTF-8
export LC_ALL=en_US.UTF-8
export LC_CTYPE=UTF-8
export LANG=en_US.UTF-8


data_path='/p/scratch/...' #put the path to the data you want to compress here
out_path='/p/scratch/...' #put the path to the directory where you want to save the compressed data here
tmp_path='/p/scratch/.../tmp/' #Juwels has limitation for the /tmp directory, this helps to avoid quota problems

./compress_data.sh ${data_path} ${out_path} zstd 40 ${tmp_path} # use 'zstd' or 'lz4' as compression method and define # of cores to run things in parallel