diff --git a/.github/workflows/CI-full.yml b/.github/workflows/CI-full.yml index ebfc08f9a1..cb5318ffea 100644 --- a/.github/workflows/CI-full.yml +++ b/.github/workflows/CI-full.yml @@ -94,10 +94,6 @@ jobs: java-version: '17' cache: 'maven' - - name: Install Singularity # to make singularity image for cluster - uses: eWaterCycle/setup-singularity@v6 - with: - singularity-version: 3.7.1 - name: build and publish all images shell: bash run: | @@ -105,23 +101,15 @@ jobs: sudo docker login -u ${{ secrets.ACTION_USER }} -p ${{ secrets.ACTION_TOKEN }} ghcr.io docker login -u ${{ secrets.ACTION_USER }} -p ${{ secrets.ACTION_TOKEN }} ghcr.io ./build.sh all ${{ env.VCELL_REPO_NAMESPACE }} ${{ env.VCELL_TAG }} - cd singularity-vm - singularity remote login -u ${{ secrets.ACTION_USER }} -p ${{ secrets.ACTION_TOKEN }} oras://ghcr.io - - name: tag as latest and push to registry # (jcs) are explicit singularity push commands redundant? (see ./build.sh) + + - name: tag as latest and push to registry shell: bash run: | for CONTAINER in vcell-api vcell-rest vcell-webapp-prod vcell-webapp-dev vcell-webapp-stage vcell-webapp-island vcell-batch vcell-opt vcell-clientgen vcell-data vcell-db vcell-mongo vcell-sched vcell-submit vcell-admin;\ - do docker tag ${VCELL_REPO_NAMESPACE}/$CONTAINER:${VCELL_TAG} ${VCELL_REPO_NAMESPACE}/$CONTAINER:latest;\ - docker tag ${VCELL_REPO_NAMESPACE}/$CONTAINER:${VCELL_TAG} ${VCELL_REPO_NAMESPACE}/$CONTAINER:${{ steps.version.outputs.tag }};\ - docker push --all-tags ${VCELL_REPO_NAMESPACE}/$CONTAINER;\ + do docker tag ${VCELL_REPO_NAMESPACE}/$CONTAINER:${VCELL_TAG} ${VCELL_REPO_NAMESPACE}/$CONTAINER:latest;\ + docker tag ${VCELL_REPO_NAMESPACE}/$CONTAINER:${VCELL_TAG} ${VCELL_REPO_NAMESPACE}/$CONTAINER:${{ steps.version.outputs.tag }};\ + docker push --all-tags ${VCELL_REPO_NAMESPACE}/$CONTAINER;\ done - cd docker/build/singularity-vm - singularity push -U $(ls *batch*img) oras://${VCELL_REPO_NAMESPACE}/vcell-batch-singularity:${VCELL_TAG} - singularity push -U $(ls *batch*img) oras://${VCELL_REPO_NAMESPACE}/vcell-batch-singularity:${{ steps.version.outputs.tag }} - singularity push -U $(ls *batch*img) oras://${VCELL_REPO_NAMESPACE}/vcell-batch-singularity:latest - singularity push -U $(ls *opt*img) oras://${VCELL_REPO_NAMESPACE}/vcell-opt-singularity:${VCELL_TAG} - singularity push -U $(ls *opt*img) oras://${VCELL_REPO_NAMESPACE}/vcell-opt-singularity:${{ steps.version.outputs.tag }} - singularity push -U $(ls *opt*img) oras://${VCELL_REPO_NAMESPACE}/vcell-opt-singularity:latest - name: Setup tmate session uses: mxschmitt/action-tmate@v3 diff --git a/.github/workflows/site_deploy.yml b/.github/workflows/site_deploy.yml index 5a5895f274..8c7cf0cf90 100644 --- a/.github/workflows/site_deploy.yml +++ b/.github/workflows/site_deploy.yml @@ -182,20 +182,6 @@ jobs: ssh-keyscan $VCELL_MANAGER_NODE >> ~/.ssh/known_hosts cd docker/swarm scp ${{ secrets.CD_FULL_USER }}@${VCELL_MANAGER_NODE}:${VCELL_DEPLOY_REMOTE_DIR}/${VCELL_CONFIG_FILE_NAME} . - - name: install singularity - uses: eWaterCycle/setup-singularity@v6 - with: - singularity-version: 3.7.1 - - name: retrieve batch and opt singularity images - run: | - set -ux - cd docker/swarm - export BATCH_SINGULARITY_FILENAME=`cat $VCELL_CONFIG_FILE_NAME | grep VCELL_BATCH_SINGULARITY_FILENAME | cut -d"=" -f2` - export OPT_SINGULARITY_FILENAME=`cat $VCELL_CONFIG_FILE_NAME | grep VCELL_OPT_SINGULARITY_FILENAME | cut -d"=" -f2` - cd ../build/singularity-vm - singularity remote login -u ${{ secrets.ACTION_USER }} -p ${{ secrets.ACTION_TOKEN }} oras://ghcr.io - singularity pull $BATCH_SINGULARITY_FILENAME oras://${VCELL_REPO_NAMESPACE}/vcell-batch-singularity:${{ github.event.inputs.vcell_version }}.${{ github.event.inputs.vcell_build }} - singularity pull $OPT_SINGULARITY_FILENAME oras://${VCELL_REPO_NAMESPACE}/vcell-opt-singularity:${{ github.event.inputs.vcell_version }}.${{ github.event.inputs.vcell_build }} - name: setup java 17 with maven cache (for documentation build) uses: actions/setup-java@v4 with: @@ -207,16 +193,15 @@ jobs: run: | set -ux mvn clean install -DskipTests - - name: deploy installers and singularity to kubernetes site and web help to vcell.org + - name: deploy installers and web help to vcell.org run: | set -ux cd docker/swarm ssh -t ${{ secrets.CD_FULL_USER }}@${VCELL_MANAGER_NODE} sudo docker login -u ${{ secrets.ACTION_USER }} -p ${{ secrets.ACTION_TOKEN }} ghcr.io if ${{ github.event.inputs.server_only != 'true' }}; then - # build and install the client installers, the singularity images, and the web help (kubernetes cluster deployments are separate) + # build and install the client installers, and the web help (kubernetes cluster deployments are separate) ./deploy-action-kubernetes.sh \ --ssh-user ${{ secrets.CD_FULL_USER }} \ - --install-singularity \ --build-installers \ --installer-deploy-dir $VCELL_INSTALLER_REMOTE_DIR \ --webhelp-local-dir ../../vcell-client/target/classes/vcellDoc \ @@ -227,13 +212,6 @@ jobs: ssh ${{ secrets.CD_FULL_USER }}@${VCELL_MANAGER_NODE} \ installer_deploy_dir=$VCELL_INSTALLER_REMOTE_DIR vcell_siteCamel=$VCELL_SITE_CAMEL vcell_version=$VCELL_VERSION vcell_build=$VCELL_BUILD \ 'bash -s' < link-installers.sh - else - # build and install only the singularity images (kubernetes cluster deployments are separate) - ./deploy-action-kubernetes.sh \ - --ssh-user ${{ secrets.CD_FULL_USER }} \ - --install-singularity \ - ${VCELL_MANAGER_NODE} \ - ./${VCELL_CONFIG_FILE_NAME} fi - name: Capitalize first character of site name id: capitalize diff --git a/docker/build/Dockerfile-submit-dev b/docker/build/Dockerfile-submit-dev index 17090260bd..9df249c39b 100644 --- a/docker/build/Dockerfile-submit-dev +++ b/docker/build/Dockerfile-submit-dev @@ -64,9 +64,13 @@ ENV softwareVersion=SOFTWARE-VERSION-NOT-SET \ htclogdir_external=/path/to/external/htclogs/ \ nativesolverdir_external=/path/to/external/nativesolvers/ \ htcnodelist="batch-host-not-set" \ - batch_singularity_imagefile=/path/to/external/batch/singularity.img \ - opt_singularity_imagefile=/path/to/external/opt/singularity_opt.img \ - docker_name="repo/namespace/vcell-batch:tag" \ + htc_vcellfvsolver_docker_name="htc-vcellfvsolver-docker-name-not-set" \ + htc_vcellfvsolver_solver_list="htc-vcellfvsolver-solver-list-not-set" \ + htc_vcellsolvers_docker_name="htc-vcellsolvers-docker-name-not-set" \ + htc_vcellsolvers_solver_list="htc-vcellsolvers-solver-list-not-set" \ + htc_vcellbatch_docker_name="htc-vcellbatch-docker-name-not-set" \ + htc_vcellbatch_solver_list="htc-vcellbatch-solver-list-not-set" \ + htc_vcellopt_docker_name="htc-vcellopt-docker-name-not-set" \ batchhost="batch-host-not-set" \ batchuser="batch-user-not-set" \ slurm_cmd_sbatch=sbatch \ @@ -80,8 +84,8 @@ ENV softwareVersion=SOFTWARE-VERSION-NOT-SET \ slurm_reservation_pu="slurm_reservation_pu-not-set" \ slurm_qos_pu="slurm_qos_pu-not-set" \ slurm_tmpdir="slurm-tmpdir-not-set" \ - slurm_local_singularity_dir="slurm_local_singularity_dir-not-set" \ - slurm_central_singularity_dir="slurm_central_singularity_dir-not-set" \ + slurm_singularity_cachedir="slurm_singularity_cachedir-not-set" \ + slurm_singularity_pullfolder="slurm_singularity_pullfolder-not-set" \ slurm_singularity_module_name="slurm_singularity_module_name-not-set" \ jmsblob_minsize=100000 \ vcell_ssh_cmd_cmdtimeout="cmdSrvcSshCmdTimeoutMS-not-set" \ @@ -131,12 +135,16 @@ ENTRYPOINT java \ -Dvcell.slurm.reservationpu="${slurm_reservation_pu}" \ -Dvcell.slurm.qospu="${slurm_qos_pu}" \ -Dvcell.slurm.tmpdir="${slurm_tmpdir}" \ - -Dvcell.slurm.local.singularity.dir="${slurm_local_singularity_dir}" \ - -Dvcell.slurm.central.singularity.dir="${slurm_central_singularity_dir}" \ + -Dvcell.slurm.singularity.cachedir="${slurm_singularity_cachedir}" \ + -Dvcell.slurm.singularity.pullfolder="${slurm_singularity_pullfolder}" \ -Dvcell.slurm.singularity.module.name="${slurm_singularity_module_name}" \ - -Dvcell.batch.singularity.image="${batch_singularity_imagefile}" \ - -Dvcell.opt.singularity.image="${opt_singularity_imagefile}" \ - -Dvcell.batch.docker.name="${docker_name}" \ + -Dvcell.htc.vcellfvsolver.docker.name="${htc_vcellfvsolver_docker_name}" \ + -Dvcell.htc.vcellfvsolver.solver.list="${htc_vcellfvsolver_solver_list}" \ + -Dvcell.htc.vcellsolvers.docker.name="${htc_vcellsolvers_docker_name}" \ + -Dvcell.htc.vcellsolvers.solver.list="${htc_vcellsolvers_solver_list}" \ + -Dvcell.htc.vcellbatch.docker.name="${htc_vcellbatch_docker_name}" \ + -Dvcell.htc.vcellbatch.solver.list="${htc_vcellbatch_solver_list}" \ + -Dvcell.htc.vcellopt.docker.name="${htc_vcellopt_docker_name}" \ -Dvcell.simulation.postprocessor=JavaPostprocessor64 \ -Dvcell.simulation.preprocessor=JavaPreprocessor64 \ -Dvcell.javaSimulation.executable=JavaSimExe64 \ diff --git a/docker/build/build.sh b/docker/build/build.sh index a5c5b39872..600c90beca 100755 --- a/docker/build/build.sh +++ b/docker/build/build.sh @@ -6,7 +6,6 @@ ssh_user=$(whoami) ssh_key= skip_push=false skip_maven=false -skip_singularity=false SUDO_CMD=sudo mvn_repo=$HOME/.m2 @@ -29,8 +28,6 @@ show_help() { echo "" echo " --ssh-key keyfile ssh key for passwordless ssh to node" echo "" - echo " --skip-singularity skip build of Singularity image for vcell-batch and vcell-opt containers (stored in ./singularity/)" - echo "" echo " --skip-maven skip vcell software build prior to building containers" echo "" echo " --skip-push skip pushing containers to repository" @@ -72,9 +69,6 @@ while :; do --skip-sudo) SUDO_CMD= ;; - --skip-singularity) - skip_singularity=true - ;; -?*) printf 'ERROR: Unknown option: %s\n' "$1" >&2 echo "" @@ -229,127 +223,6 @@ build_mongo() { fi } -build_batch_singularity() { - # turn on logging of commands - set -x - if [ "$skip_singularity" == "false" ]; then - if [ -x "$(command -v singularity)" ]; then - build_batch_singularity_direct - if [[ $? -ne 0 ]]; then echo "failed to build singularity image using singularity commands"; exit 1; fi - else - echo "singularity not found, cannot build singularity image" - exit 1 - fi - fi - # turn off logging of commands - set +x -} - -build_batch_singularity_direct() { - - echo "" - cmd="cd singularity-vm" - cd singularity-vm - echo "" - echo "CURRENT DIRECTORY IS $PWD" - - # - # create temporary Singularity file which imports existing docker image from registry and adds a custom entrypoint - # - _vcell_batch_docker_name="${repo}/vcell-batch:${tag}" - _singularity_image_file="${_vcell_batch_docker_name//[\/:]/_}.img" - _singularity_file="Singularity_${_vcell_batch_docker_name//[\/:]/_}" - -cat <$_singularity_file -Bootstrap: docker -From: $_vcell_batch_docker_name - -%runscript - - exec /vcellscripts/entrypoint.sh "\$@" - -%labels - -AUTHOR jcschaff -EOF - - echo "" - echo "wrote Singularity file $_singularity_file" - cat $_singularity_file - - # - # build the singularity image and place in singularity-vm directory - # - echo "" - remote_cmd1="sudo singularity build $_singularity_image_file $_singularity_file" - remote_cmd2="singularity build --fakeroot $_singularity_image_file $_singularity_file" - echo "$remote_cmd1" - ($remote_cmd1) - if [[ $? -ne 0 ]] - then - echo "failed to build singularity image with sudo, will try fakeroot" - echo "$remote_cmd2" - ($remote_cmd2) - if [[ $? -ne 0 ]]; then echo "failed to build singularity image with fakeroot"; exit 1; fi - fi - - echo "" - echo "created Singularity image for vcell-bash ./$_singularity_image_file locally (in ./singularity-vm folder), can be pushed to remote server during deploy" - echo "" - echo "cd .." - cd .. -} - - -build_opt_singularity() { - if [ "$skip_singularity" == "false" ]; then - if [ -x "$(command -v singularity)" ]; then - build_opt_singularity_direct - if [[ $? -ne 0 ]]; then echo "failed to build opt singularity image using singularity commands"; exit 1; fi - else - echo "singularity not found, cannot build opt singularity image" - exit 1 - fi - fi -} - -build_opt_singularity_direct() { - - echo "" - cmd="cd singularity-vm" - cd singularity-vm - echo "" - echo "CURRENT DIRECTORY IS $PWD" - - # - # create temporary Singularity file which imports existing docker image from registry and adds a custom entrypoint - # - _vcell_opt_docker_name="${repo}/vcell-opt:${tag}" - _singularity_image_file="${_vcell_opt_docker_name//[\/:]/_}.img" - _singularity_file="Singularity_${_vcell_opt_docker_name//[\/:]/_}" - - # - # build the singularity image and place in singularity-vm directory - # - echo "" - remote_cmd1="sudo singularity build ${_singularity_image_file} docker://${_vcell_opt_docker_name}" - remote_cmd2="singularity build --fakeroot ${_singularity_image_file} docker://${_vcell_opt_docker_name}" - echo "$remote_cmd1" - ($remote_cmd1) - if [[ $? -ne 0 ]] - then - echo "failed to build opt singularity image with sudo, will try fakeroot" - echo "$remote_cmd2" - ($remote_cmd2) - if [[ $? -ne 0 ]]; then echo "failed to build opt singularity image with fakeroot"; exit 1; fi - fi - - echo "" - echo "created Singularity image for vcell-opt ./$_singularity_image_file locally (in ./singularity-vm folder), can be pushed to remote server during deploy" - echo "" - echo "cd .." - cd .. -} shift @@ -362,11 +235,11 @@ fi case $target in batch) - build_batch && build_batch_singularity + build_batch exit $? ;; opt) - build_opt && build_opt_singularity + build_opt exit $? ;; api) @@ -414,8 +287,8 @@ case $target in exit $? ;; all) -# build_api && build_rest && build_db && build_sched && build_submit && build_data && build_mongo && build_batch && build_opt && build_clientgen && build_batch_singularity && build_opt_singularity && build_admin - build_api && build_rest && build_webapp && build_db && build_sched && build_submit && build_data && build_mongo && build_batch && build_opt && build_clientgen && build_batch_singularity && build_opt_singularity && build_admin +# build_api && build_rest && build_db && build_sched && build_submit && build_data && build_mongo && build_batch && build_opt && build_clientgen && build_admin + build_api && build_rest && build_webapp && build_db && build_sched && build_submit && build_data && build_mongo && build_batch && build_opt && build_clientgen && build_admin exit $? ;; appservices) diff --git a/docker/swarm/deploy-action-kubernetes.sh b/docker/swarm/deploy-action-kubernetes.sh index 7ecce0a8a1..0d9ffd258a 100755 --- a/docker/swarm/deploy-action-kubernetes.sh +++ b/docker/swarm/deploy-action-kubernetes.sh @@ -3,7 +3,7 @@ set -ux show_help() { - echo "Deploys vcell client installers, webhelp and singularity images for a Kubernetes deploy" + echo "Deploys vcell client installers and webhelp for a Kubernetes deploy" echo "" echo "usage: deploy-action-kubernetes.sh [OPTIONS] REQUIRED-ARGUMENTS" echo "" @@ -30,14 +30,11 @@ show_help() { echo " --webhelp-deploy-dir /remote/path/to/web/VCell_Help" echo " directory for deployed html webhelp published on web server" echo "" - echo " --install-singularity optionally install batch and opt singularity images on each compute node in 'vcell' SLURM partition" - echo "" echo "" echo "example:" echo "" echo "deploy-action-kubernetes.sh \\" echo " --ssh-user vcell \\" - echo " --install_singularity \\" echo " --build_installers --installer_deploy_dir /share/apps/vcell3/apache_webroot/htdocs/webstart/Alpha \\" echo " --webhelp_local_dir ../../vcell-client/target/classes/vcellDoc \\" echo " --webhelp_deploy_dir /share/apps/vcell3/apache_webroot/htdocs/webstart/VCell_Tutorials/VCell_Help \\" @@ -55,7 +52,6 @@ installer_deploy_dir= webhelp_local_dir= webhelp_deploy_dir= build_installers=false -install_singularity=false while :; do case $1 in -h|--help) @@ -78,9 +74,6 @@ while :; do shift webhelp_deploy_dir=$1 ;; - --install-singularity) - install_singularity=true - ;; --build-installers) build_installers=true ;; @@ -106,50 +99,6 @@ local_config_file=$2 vcell_siteCamel=$(grep VCELL_SITE_CAMEL "$local_config_file" | cut -d"=" -f2) vcell_version=$(grep VCELL_VERSION_NUMBER "$local_config_file" | cut -d"=" -f2) vcell_build=$(grep VCELL_BUILD_NUMBER "$local_config_file" | cut -d"=" -f2) -batch_singularity_filename=$(grep VCELL_BATCH_SINGULARITY_FILENAME "$local_config_file" | cut -d"=" -f2) -opt_singularity_filename=$(grep VCELL_OPT_SINGULARITY_FILENAME "$local_config_file" | cut -d"=" -f2) -slurm_singularity_central_dir=$(grep VCELL_SLURM_CENTRAL_SINGULARITY_DIR "$local_config_file" | cut -d"=" -f2) - - -# -# install the singularity images on the cluster nodes -# -if [ "$install_singularity" == "true" ]; then - - echo "" - pushd ../build/singularity-vm || (echo "pushd ../build/singularity-vm failed"; exit 1) - echo "" - echo "CURRENT DIRECTORY IS $PWD" - - # - # get configuration from config file and load into current bash environment - # - echo "" - - if [ ! -e "./${batch_singularity_filename}" ]; then - echo "failed to find local batch singularity image file $batch_singularity_filename in ./singularity-vm directory" - exit 1 - fi - - if ! scp "./${batch_singularity_filename}" "$ssh_user@$manager_node:${slurm_singularity_central_dir}"; then - echo "failed to copy batch singularity image to server" - exit 1 - fi - - if [ ! -e "./${opt_singularity_filename}" ]; then - echo "failed to find local opt singularity image file $opt_singularity_filename in ./singularity-vm directory" - exit 1 - fi - - if ! scp "./${opt_singularity_filename}" "$ssh_user@$manager_node:${slurm_singularity_central_dir}"; then - echo "failed to copy opt singularity image to server" - exit 1 - fi - - echo "popd" - popd || (echo "popd failed"; exit 1) -fi - # # if --build-installers, then generate client installers, placing then in ./generated_installers diff --git a/vcell-core/src/main/java/cbit/vcell/resource/PropertyLoader.java b/vcell-core/src/main/java/cbit/vcell/resource/PropertyLoader.java index 1897bb9dde..a8d6c5243c 100644 --- a/vcell-core/src/main/java/cbit/vcell/resource/PropertyLoader.java +++ b/vcell-core/src/main/java/cbit/vcell/resource/PropertyLoader.java @@ -80,6 +80,15 @@ public static void setConfigProvider(VCellConfigProvider configProvider) { public static final String htcPbsHome = record("vcell.htc.pbs.home",ValueType.GEN); public static final String htcSgeHome = record("vcell.htc.sge.home",ValueType.GEN); public static final String htcNodeList = record("vcell.htc.nodelist",ValueType.GEN); + + public static final String htc_vcellfvsolver_docker_name = record("vcell.htc.vcellfvsolver.docker.name",ValueType.GEN); + public static final String htc_vcellfvsolver_solver_list = record("vcell.htc.vcellfvsolver.solver.list",ValueType.GEN); + public static final String htc_vcellsolvers_docker_name = record("vcell.htc.vcellsolvers.docker.name",ValueType.GEN); + public static final String htc_vcellsolvers_solver_list = record("vcell.htc.vcellsolvers.solver.list",ValueType.GEN); + public static final String htc_vcellbatch_docker_name = record("vcell.htc.vcellbatch.docker.name",ValueType.GEN); + public static final String htc_vcellbatch_solver_list = record("vcell.htc.vcellbatch.solver.list",ValueType.GEN); + public static final String htc_vcellopt_docker_name = record("vcell.htc.vcellopt.docker.name",ValueType.GEN); + public static final String slurm_cmd_sbatch = record("vcell.slurm.cmd.sbatch",ValueType.GEN); public static final String slurm_cmd_scancel = record("vcell.slurm.cmd.scancel",ValueType.GEN); public static final String slurm_cmd_sacct = record("vcell.slurm.cmd.sacct",ValueType.GEN); @@ -93,8 +102,8 @@ public static void setConfigProvider(VCellConfigProvider configProvider) { public static final String slurm_reservation_pu = record("vcell.slurm.reservationpu",ValueType.GEN); public static final String slurm_qos_pu = record("vcell.slurm.qospu",ValueType.GEN); public static final String slurm_tmpdir = record("vcell.slurm.tmpdir",ValueType.GEN); - public static final String slurm_local_singularity_dir = record("vcell.slurm.local.singularity.dir",ValueType.GEN); - public static final String slurm_central_singularity_dir= record("vcell.slurm.central.singularity.dir",ValueType.GEN); + public static final String slurm_singularity_cachedir = record("vcell.slurm.singularity.cachedir",ValueType.GEN); + public static final String slurm_singularity_pullfolder= record("vcell.slurm.singularity.pullfolder",ValueType.GEN); public static final String slurm_singularity_module_name= record("vcell.slurm.singularity.module.name",ValueType.GEN); public static final String sgeModulePath = record("vcell.htc.sge.module",ValueType.GEN); public static final String pbsModulePath = record("vcell.htc.pbs.module",ValueType.GEN); @@ -237,10 +246,7 @@ public static void setConfigProvider(VCellConfigProvider configProvider) { public static final String vcellSMTPPort = record("vcell.smtp.port",ValueType.GEN); public static final String vcellSMTPEmailAddress = record("vcell.smtp.emailAddress",ValueType.GEN); - public static final String vcellbatch_docker_name = record("vcell.batch.docker.name",ValueType.GEN); public static final String vcellsubmit_service_host = record("vcell.submit.service.host",ValueType.GEN); - public static final String vcellbatch_singularity_image = record("vcell.batch.singularity.image",ValueType.GEN); - public static final String vcellopt_singularity_image = record("vcell.opt.singularity.image",ValueType.GEN); public static final String javaSimulationExecutable = record("vcell.javaSimulation.executable",ValueType.GEN); public static final String simulationPreprocessor = record("vcell.simulation.preprocessor",ValueType.GEN); public static final String simulationPostprocessor = record("vcell.simulation.postprocessor",ValueType.GEN); diff --git a/vcell-server/src/main/java/cbit/vcell/message/server/batch/sim/HtcSimulationWorker.java b/vcell-server/src/main/java/cbit/vcell/message/server/batch/sim/HtcSimulationWorker.java index 1229185e8e..1ab3d64dfd 100644 --- a/vcell-server/src/main/java/cbit/vcell/message/server/batch/sim/HtcSimulationWorker.java +++ b/vcell-server/src/main/java/cbit/vcell/message/server/batch/sim/HtcSimulationWorker.java @@ -666,11 +666,13 @@ public static void main(String[] args) throws IOException { PropertyLoader.simulationPostprocessor, PropertyLoader.simulationPreprocessor, PropertyLoader.slurm_partition, - PropertyLoader.vcellbatch_singularity_image, - PropertyLoader.vcellopt_singularity_image, - PropertyLoader.vcellbatch_docker_name, - PropertyLoader.slurm_local_singularity_dir, - PropertyLoader.slurm_central_singularity_dir, + PropertyLoader.htc_vcellbatch_docker_name, + PropertyLoader.htc_vcellbatch_solver_list, + PropertyLoader.htc_vcellsolvers_docker_name, + PropertyLoader.htc_vcellsolvers_solver_list, + PropertyLoader.htc_vcellbatch_docker_name, + PropertyLoader.htc_vcellbatch_solver_list, + PropertyLoader.htc_vcellopt_docker_name, PropertyLoader.slurm_singularity_module_name, PropertyLoader.slurm_reservation, PropertyLoader.slurm_qos, diff --git a/vcell-server/src/main/java/cbit/vcell/message/server/htc/slurm/SlurmProxy.java b/vcell-server/src/main/java/cbit/vcell/message/server/htc/slurm/SlurmProxy.java index 18d1accc64..85c0a086a8 100644 --- a/vcell-server/src/main/java/cbit/vcell/message/server/htc/slurm/SlurmProxy.java +++ b/vcell-server/src/main/java/cbit/vcell/message/server/htc/slurm/SlurmProxy.java @@ -33,6 +33,25 @@ public class SlurmProxy extends HtcProxy { private final static int SCANCEL_JOB_NOT_FOUND_RETURN_CODE = 1; private final static String SCANCEL_UNKNOWN_JOB_RESPONSE = "does not exist"; protected final static String SLURM_SUBMISSION_FILE_EXT = ".slurm.sub"; + + private enum CommandContainer { + SOLVER("${solver_container_prefix} "), + BATCH("${batch_container_prefix} "); + + private final String prefix; + CommandContainer(String prefix) { + this.prefix = prefix; + } + public String getPrefix() { + return prefix; + } + } + + private record SingularityBinding(String hostPath, String containerPath) { + public String getBinding() { + return "--bind "+hostPath+":"+containerPath; + } + } public SlurmProxy(CommandService commandService, String htcUser) { super(commandService, htcUser); @@ -156,29 +175,6 @@ public void killJobs(String jobNameSubstring) { } } - - /** - * adding MPICH command if necessary - * @param ncpus if != 1, {MPI_HOME} command prepended - * @param command command set - * @return new String - */ - private final String buildExeCommand(int ncpus,String command) { - if (ncpus == 1) { - return command; - } - String MPI_HOME_EXTERNAL= PropertyLoader.getProperty(PropertyLoader.MPI_HOME_EXTERNAL,""); - - final char SPACE = ' '; - StringBuilder sb = new StringBuilder( ); - sb.append(MPI_HOME_EXTERNAL); - sb.append("/bin/mpiexec -np "); - sb.append(ncpus); - sb.append(SPACE); - sb.append(command); - return sb.toString().trim( ); - } - @Override public HtcProxy cloneThreadsafe() { return new SlurmProxy(getCommandService().clone(), getHtcUser()); @@ -420,8 +416,7 @@ public String getPostProcessCommands() { } - SbatchSolverComponents generateScript(String jobName, ExecutableCommand.Container commandSet, int ncpus, double memSizeMB, Collection postProcessingCommands, SimulationTask simTask) { - final boolean isParallel = ncpus > 1; + SbatchSolverComponents generateScript(String jobName, ExecutableCommand.Container commandSet, double memSizeMB, Collection postProcessingCommands, SimulationTask simTask) { //SlurmProxy ultimately instantiated from {vcellroot}/docker/build/Dockerfile-submit-dev by way of cbit.vcell.message.server.batch.sim.HtcSimulationWorker String vcellUserid = simTask.getUser().getName(); @@ -432,7 +427,6 @@ SbatchSolverComponents generateScript(String jobName, ExecutableCommand.Containe LineStringBuilder slurmCommands = new LineStringBuilder(); slurmScriptInit(jobName, simTask.isPowerUser(), memoryMBAllowed, slurmCommands); - LineStringBuilder lsb = new LineStringBuilder(); String primaryDataDirExternal = PropertyLoader.getRequiredProperty(PropertyLoader.primarySimDataDirExternalProperty); String secondaryDataDirExternal = PropertyLoader.getRequiredProperty(PropertyLoader.secondarySimDataDirExternalProperty); @@ -448,17 +442,30 @@ SbatchSolverComponents generateScript(String jobName, ExecutableCommand.Containe String mongodb_database = PropertyLoader.getRequiredProperty(PropertyLoader.mongodbDatabase); String serverid=PropertyLoader.getRequiredProperty(PropertyLoader.vcellServerIDProperty); String softwareVersion=PropertyLoader.getRequiredProperty(PropertyLoader.vcellSoftwareVersion); - String remote_singularity_image = PropertyLoader.getRequiredProperty(PropertyLoader.vcellbatch_singularity_image); - String slurm_singularity_local_image_filepath = remote_singularity_image; -// String docker_image = PropertyLoader.getRequiredProperty(PropertyLoader.vcellbatch_docker_name); String slurm_tmpdir = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_tmpdir); - String slurm_central_singularity_dir = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_central_singularity_dir); - String slurm_local_singularity_dir = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_local_singularity_dir); + String slurm_singularity_cachedir = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_singularity_cachedir); + String slurm_singularity_pullfolder = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_singularity_pullfolder); String slurm_singularity_module_name = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_singularity_module_name); String simDataDirArchiveExternal = PropertyLoader.getRequiredProperty(PropertyLoader.simDataDirArchiveExternal); String simDataDirArchiveInternal = PropertyLoader.getRequiredProperty(PropertyLoader.simDataDirArchiveInternal); - File slurm_singularity_central_filepath = new File(slurm_central_singularity_dir,new File(slurm_singularity_local_image_filepath).getName()); - + + String solverName = simTask.getSimulation().getSolverTaskDescription().getSolverDescription().name(); + List vcellfvsolver_solverList = List.of(PropertyLoader.getRequiredProperty(PropertyLoader.htc_vcellfvsolver_solver_list).split(",")); + List vcellsolvers_solverList = List.of(PropertyLoader.getRequiredProperty(PropertyLoader.htc_vcellsolvers_solver_list).split(",")); + List vcellbatch_solverList = List.of(PropertyLoader.getRequiredProperty(PropertyLoader.htc_vcellbatch_solver_list).split(",")); + + final String solverDockerName; + final String batchDockerName = PropertyLoader.getRequiredProperty(PropertyLoader.htc_vcellbatch_docker_name); + if (vcellfvsolver_solverList.contains(solverName)) { + solverDockerName = PropertyLoader.getRequiredProperty(PropertyLoader.htc_vcellfvsolver_docker_name); + } else if (vcellsolvers_solverList.contains(solverName)) { + solverDockerName = PropertyLoader.getRequiredProperty(PropertyLoader.htc_vcellsolvers_docker_name); + } else if (vcellbatch_solverList.contains(solverName)) { + solverDockerName = batchDockerName; + } else { + throw new RuntimeException("solverName="+solverName+" not in vcellfvsolver_solverList="+vcellfvsolver_solverList+" or vcellsolvers_solverList="+vcellsolvers_solverList+" or vcellbatch_solverList="+vcellbatch_solverList); + } + String[] environmentVars = new String[] { "java_mem_Xmx="+memoryMBAllowed.getMemLimit()+"M", "jmshost_sim_internal="+jmshost_sim_external, @@ -476,36 +483,29 @@ SbatchSolverComponents generateScript(String jobName, ExecutableCommand.Containe "softwareVersion="+softwareVersion, "serverid="+serverid }; - lsb.write("echo \"1 date=`date`\""); + + List bindings = List.of( + new SingularityBinding(primaryDataDirExternal, "/simdata"), + new SingularityBinding(secondaryDataDirExternal, "/simdata_secondary"), + new SingularityBinding(simDataDirArchiveExternal, simDataDirArchiveInternal), + new SingularityBinding(htclogdir_external, "/htclogs"), + new SingularityBinding(slurm_tmpdir, "/solvertmp") + ); + + LineStringBuilder lsb = new LineStringBuilder(); LineStringBuilder singularityLSB = new LineStringBuilder(); - slurmInitSingularity(singularityLSB, primaryDataDirExternal, Optional.of(secondaryDataDirExternal), htclogdir_external, softwareVersion, - slurm_singularity_local_image_filepath, slurm_tmpdir, slurm_central_singularity_dir, - slurm_local_singularity_dir, simDataDirArchiveExternal, simDataDirArchiveInternal, slurm_singularity_central_filepath, + slurmInitSingularity(singularityLSB, + solverDockerName, Optional.of(batchDockerName), bindings, + slurm_tmpdir, slurm_singularity_cachedir, slurm_singularity_pullfolder, slurm_singularity_module_name, environmentVars); LineStringBuilder sendFailMsgLSB = new LineStringBuilder(); sendFailMsgScript(simTask, sendFailMsgLSB, jmshost_sim_external, jmsport_sim_external, jmsuser, jmspswd); - - if (isParallel) { - lsb.write("#BEGIN---------SlurmProxy.generateScript():isParallel----------"); - String MPI_HOME_EXTERNAL= PropertyLoader.getProperty(PropertyLoader.MPI_HOME_EXTERNAL,""); - - // #SBATCH -// lsb.append("#$ -pe mpich "); -// lsb.append(ncpus); -// lsb.newline(); - - lsb.append("#SBATCH -n " + ncpus); - lsb.newline(); - - lsb.append("#$ -v LD_LIBRARY_PATH="); - lsb.append(MPI_HOME_EXTERNAL+"/lib"); - lsb.write(":"+primaryDataDirExternal); - lsb.write("#END---------SlurmProxy.generateScript():isParallel----------"); + for (SingularityBinding binding : bindings) { + commandSet.translatePaths(new File(binding.hostPath), new File(binding.containerPath)); } - lsb.newline(); - + final boolean hasExitProcessor = commandSet.hasExitCodeCommand(); // lsb.write("run_in_container=\"singularity /path/to/data:/simdata /path/to/image/vcell-batch.img); LineStringBuilder callExitLSB = new LineStringBuilder(); @@ -515,10 +515,11 @@ SbatchSolverComponents generateScript(String jobName, ExecutableCommand.Containe LineStringBuilder preProcessLSB = new LineStringBuilder(); for (ExecutableCommand ec: commandSet.getExecCommands()) { + ExecutableCommand.Container commandSet2 = new ExecutableCommand.Container(); if(ec.getCommands().get(0).equals("JavaPreprocessor64")) { - execCommandScript(ncpus, isParallel, preProcessLSB, hasExitProcessor, ec); + execCommandScript(preProcessLSB, hasExitProcessor, ec, "${batch_container_prefix}"); }else { - execCommandScript(ncpus, isParallel, lsb, hasExitProcessor, ec); + execCommandScript(lsb, hasExitProcessor, ec, "${solver_container_prefix}"); } } @@ -538,37 +539,23 @@ SbatchSolverComponents generateScript(String jobName, ExecutableCommand.Containe private void callExitScript(ExecutableCommand.Container commandSet, LineStringBuilder lsb) { - lsb.write("#BEGIN---------SlurmProxy.generateScript():hasExitProcessor----------"); + lsb.newline(); ExecutableCommand exitCmd = commandSet.getExitCodeCommand(); exitCmd.stripPathFromCommand(); lsb.write("callExitProcessor( ) {"); - lsb.append("\techo exitCommand = "); - lsb.write("${container_prefix}" + exitCmd.getJoinedCommands("$1")); lsb.append('\t'); - lsb.write("${container_prefix}" + exitCmd.getJoinedCommands()); + lsb.write("${batch_container_prefix} " + exitCmd.getJoinedCommands("$1").trim()); lsb.write("}"); - lsb.write("#END---------SlurmProxy.generateScript():hasExitProcessor----------"); - lsb.write("echo"); + lsb.newline(); + lsb.newline(); } private void sendFailMsgScript(SimulationTask simTask, LineStringBuilder lsb, String jmshost_sim_external, String jmsport_sim_external, String jmsuser, String jmspswd) { - lsb.write("#BEGIN---------SlurmProxy.generateScript():sendFailureMsg----------"); + lsb.newline(); lsb.write("sendFailureMsg() {"); - lsb.write(" echo ${container_prefix} " + - " --msg-userid "+jmsuser+ - " --msg-password "+jmspswd+ - " --msg-host "+jmshost_sim_external+ - " --msg-port "+jmsport_sim_external+ - " --msg-job-host `hostname`"+ - " --msg-job-userid "+simTask.getUserName()+ - " --msg-job-simkey "+simTask.getSimKey()+ - " --msg-job-jobindex "+simTask.getSimulationJob().getJobIndex() + - " --msg-job-taskid "+simTask.getTaskID() + - " --msg-job-errmsg \"$1\"" + - " SendErrorMsg"); - lsb.write(" ${container_prefix} " + + lsb.write(" ${batch_container_prefix} " + " --msg-userid "+jmsuser+ " --msg-password "+jmspswd+ " --msg-host "+jmshost_sim_external+ @@ -587,79 +574,17 @@ private void sendFailMsgScript(SimulationTask simTask, LineStringBuilder lsb, St lsb.write(" echo 'sent failure message'"); lsb.write(" fi"); lsb.write("}"); - lsb.write("#END---------SlurmProxy.generateScript():sendFailureMsg----------"); + lsb.newline(); } - private void execCommandScript(int ncpus, final boolean isParallel, LineStringBuilder lsb,final boolean hasExitProcessor, ExecutableCommand ec) { - lsb.write("echo"); - lsb.write("#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------"+ec.getCommands().get(0)); + private void execCommandScript(LineStringBuilder lsb,final boolean hasExitProcessor, ExecutableCommand ec, String container_prefix) { ec.stripPathFromCommand(); - // - // The first token in the command list is always the name of the executable. - // if an executable with that name exists in the nativesolvers directory, then use that instead. - // String cmd= ec.getJoinedCommands(); - String exeName= ec.getCommands().get(0); - File nativeSolverDir = new File(PropertyLoader.getRequiredProperty(PropertyLoader.nativeSolverDir_External)); - File nativeExe = new File(nativeSolverDir,exeName); - lsb.write("echo \"testing existance of native exe '"+nativeExe.getAbsolutePath()+"' which overrides container invocations\""); - lsb.write("nativeExe="+nativeExe.getAbsolutePath()); - lsb.write("if [ -e \"${nativeExe}\" ]; then"); - lsb.write(" cmd_prefix=\""+nativeSolverDir.getAbsolutePath()+"/"+"\""); - lsb.write("else"); - lsb.write(" cmd_prefix=\"$container_prefix\""); - lsb.write("fi"); - lsb.write("echo \"cmd_prefix is '${cmd_prefix}'\""); - lsb.write("echo \"5 date=`date`\""); - if (ec.isParallel()) { - if (isParallel) { - cmd = buildExeCommand(ncpus, cmd); - } - else { - throw new UnsupportedOperationException("parallel command " + ec.getJoinedCommands() + " called in non-parallel submit"); - } - } - lsb.append("echo command = "); - lsb.write("${cmd_prefix}" + cmd); - -// lsb.write("("); - if (ec.getLdLibraryPath()!=null){ - lsb.write("if [ -z ${LD_LIBRARY_PATH+x} ]; then"); - lsb.write(" export LD_LIBRARY_PATH=" + ec.getLdLibraryPath().path); - lsb.write("else"); - lsb.write(" export LD_LIBRARY_PATH=" + ec.getLdLibraryPath().path + ":$LD_LIBRARY_PATH"); - lsb.write("fi"); - } - // lsb.write("singdevlooperr=\"Failed to mount squashfs image in (read only)\""); - // lsb.write("let c=0"); - // lsb.write("while [ true ]"); - // lsb.write(" do"); - // lsb.write(" cmdstdout=$("+"${cmd_prefix}" + cmd+" 2>&1)"); - // lsb.write(" innerstate=$?"); - // lsb.write(" if [[ $cmdstdout != *$singdevlooperr* ]]"); - // lsb.write(" then"); - // lsb.write(" exit $innerstate"); - // lsb.write(" fi"); - // lsb.write(" sleep 6"); - // lsb.write(" let c=c+1"); - // lsb.write(" if [ $c -eq 10 ]"); - // lsb.write(" then"); - // lsb.write(" echo \"Exceeded retry for singularity mount squashfs error\""); - // lsb.write(" exit $innerstate"); - // lsb.write(" fi"); - // lsb.write(" echo retrying $c of 10..."); - // lsb.write(" done"); - lsb.write(" command=\"${cmd_prefix}" + cmd + "\""); - lsb.write(" $command"); -// lsb.write(")"); // This line needs to stay - + lsb.write(container_prefix + " " + cmd.trim()); lsb.write("stat=$?"); - - lsb.append("echo "); - lsb.append("${cmd_prefix}" + cmd); - lsb.write("returned $stat"); - + lsb.append("echo returned $stat"); + lsb.newline(); lsb.write("if [ $stat -ne 0 ]; then"); if (hasExitProcessor) { lsb.write("\tcallExitProcessor $stat"); @@ -667,111 +592,64 @@ private void execCommandScript(int ncpus, final boolean isParallel, LineStringBu lsb.write("\techo returning $stat to Slurm"); lsb.write("\texit $stat"); lsb.write("fi"); - lsb.write("#END---------SlurmProxy.generateScript():ExecutableCommand----------"+ec.getCommands().get(0)); + lsb.newline(); } - private void slurmInitSingularity(LineStringBuilder lsb, String primaryDataDirExternal, - Optional secondaryDataDirExternal, String htclogdir_external, String softwareVersion, - String slurm_singularity_local_image_filepath, String slurm_tmpdir, - String slurm_central_singularity_dir, String slurm_local_singularity_dir, - String simDataDirArchiveExternal, String simDataDirArchiveInternal, - File slurm_singularity_central_filepath, String singularity_module_name, String[] environmentVars) { - lsb.write("#BEGIN---------SlurmProxy.generateScript():slurmInitSingularity----------"); - lsb.write("set -x"); - lsb.newline(); + private void slurmInitSingularity(LineStringBuilder lsb, + String solverDockerName, Optional batchDockerName, List bindings, + String slurm_tmpdir, String slurm_singularity_cachedir, String slurm_singularity_pullfolder, + String singularity_module_name, String[] environmentVars) { lsb.write("TMPDIR="+slurm_tmpdir); - lsb.write("echo \"using TMPDIR=$TMPDIR\""); lsb.write("if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi"); // // Initialize Singularity // - lsb.write("echo `hostname`\n"); - lsb.write("export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles\n"); - lsb.write("source /usr/share/Modules/init/bash\n"); - lsb.write("module load "+singularity_module_name+"\n"); + lsb.write("echo `hostname`"); + lsb.write("export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles"); + lsb.write("source /usr/share/Modules/init/bash"); + lsb.write("module load "+singularity_module_name); + lsb.write("export SINGULARITY_CACHEDIR="+slurm_singularity_cachedir); + lsb.write("export SINGULARITY_PULLFOLDER="+slurm_singularity_pullfolder); +// lsb.write("TEMP_DIRNAME=$(mktemp --directory --tmpdir=/local)"); lsb.write("echo \"job running on host `hostname -f`\""); - lsb.newline(); lsb.write("echo \"id is `id`\""); - lsb.newline(); - lsb.write("echo \"bash version is `bash --version`\""); - lsb.write("date"); - lsb.newline(); lsb.write("echo ENVIRONMENT"); lsb.write("env"); lsb.newline(); - - lsb.write("container_prefix="); - lsb.write("if command -v singularity >/dev/null 2>&1; then"); - lsb.write(" #"); - lsb.write(" # Copy of singularity image will be downloaded if not found in "+slurm_singularity_local_image_filepath); - lsb.write(" #"); - lsb.write(" localSingularityImage="+slurm_singularity_local_image_filepath); - lsb.write(" if [ ! -e \"$localSingularityImage\" ]; then"); - lsb.write(" echo \"local singularity image $localSingularityImage not found, trying to download to hpc from \""+slurm_singularity_central_filepath.getAbsolutePath()); - lsb.write(" mkdir -p "+slurm_local_singularity_dir); - lsb.write(" singularitytempfile=$(mktemp -up "+slurm_central_singularity_dir+")"); - // Copy using locking so when new deployments occur and singularity has to be copied to compute host - // and multiple parameter scan land on same compute host at same time and all try to download the singularity image - // they won't interfere with each other - lsb.write(" flock -E 100 -n /tmp/vcellSingularityLock_"+softwareVersion+".lock sh -c \"cp "+slurm_singularity_central_filepath.getAbsolutePath()+" ${singularitytempfile}"+" ; mv -n ${singularitytempfile} "+slurm_singularity_local_image_filepath+"\""); - lsb.write(" theStatus=$?"); - lsb.write(" if [ $theStatus -eq 100 ]"); - lsb.write(" then"); - lsb.write(" echo \"lock in use, waiting for lock owner to copy singularityImage\""); - lsb.write(" let c=0"); - lsb.write(" until [ -f $localSingularityImage ]"); - lsb.write(" do"); - lsb.write(" sleep 3"); - lsb.write(" let c=c+1"); - lsb.write(" if [ $c -eq 20 ]"); - lsb.write(" then"); - lsb.write(" echo \"Exceeded wait time for lock owner to copy singularityImage\""); - lsb.write(" break"); - lsb.write(" fi"); - lsb.write(" done"); - lsb.write(" else"); - lsb.write(" if [ $theStatus -eq 0 ]"); - lsb.write(" then"); - lsb.write(" echo copy succeeded"); - lsb.write(" else"); - lsb.write(" echo copy failed"); - lsb.write(" fi"); - lsb.write(" fi"); - - lsb.write(" rm -f ${singularitytempfile}"); - lsb.write(" if [ ! -e \"$localSingularityImage\" ]; then"); - lsb.write(" echo \"Failed to copy $localSingularityImage to hpc from central\""); - lsb.write(" exit 1"); - lsb.write(" else"); - lsb.write(" echo successful copy from "+slurm_singularity_central_filepath.getAbsolutePath()+" to "+slurm_singularity_local_image_filepath); - lsb.write(" fi"); - lsb.write(" fi"); - StringBuffer singularityEnvironmentVars = new StringBuffer(); + + boolean bFirstBind=true; + for (SingularityBinding binding : bindings) { + if (bFirstBind) { + lsb.write("container_bindings=\"--bind " + binding.hostPath + ":" + binding.containerPath + " \""); + bFirstBind = false; + }else{ + lsb.write("container_bindings+=\"--bind " + binding.hostPath + ":" + binding.containerPath + " \""); + } + } + boolean bFirstEnv=true; for (String envVar : environmentVars) { - singularityEnvironmentVars.append(" --env "+envVar); - } - lsb.write(" container_prefix=\"singularity run --containall " + - "--bind "+primaryDataDirExternal+":/simdata " + - ((secondaryDataDirExternal.isPresent()) ? "--bind "+secondaryDataDirExternal.get()+":/simdata_secondary " : "") + - "--bind "+simDataDirArchiveExternal+":"+simDataDirArchiveInternal+" " + - "--bind "+htclogdir_external+":/htclogs " + - "--bind "+slurm_tmpdir+":/solvertmp " + - "$localSingularityImage "+singularityEnvironmentVars+" \""); - lsb.write("else"); - lsb.write(" echo \"Required singularity command not found (maybe 'module load "+singularity_module_name+"' command didn't work) \""); - lsb.write(" exit 1"); -// StringBuffer dockerEnvironmentVars = new StringBuffer(); -// for (String envVar : environmentVars) { -// dockerEnvironmentVars.append(" -e "+envVar); -// } -// lsb.write(" container_prefix=\"docker run --rm -v "+primaryDataDirExternal+":/simdata -v "+htclogdir_external+":/htclogs -v "+slurm_tmpdir+":/solvertmp "+dockerEnvironmentVars+" "+docker_image+" \""); - lsb.write("fi"); - lsb.write("echo \"container_prefix is '${container_prefix}'\""); - lsb.write("echo \"3 date=`date`\""); - lsb.write("#END---------SlurmProxy.generateScript():slurmInitSingularity----------"); + if (bFirstEnv) { + lsb.write("container_env=\"--env " + envVar + " \""); + bFirstEnv = false; + }else { + lsb.write("container_env+=\"--env " + envVar + " \""); + } + } + lsb.write("solver_docker_name="+solverDockerName); + lsb.write("solver_container_prefix=\"singularity run --containall " + + "${container_bindings} " + + "${container_env} " + + "docker://${solver_docker_name}\""); + if (batchDockerName.isPresent()) { + lsb.write("batch_docker_name="+batchDockerName.get()); + lsb.write("batch_container_prefix=\"singularity run --containall " + + "${container_bindings} " + + "${container_env} " + + "docker://${batch_docker_name}\""); + } lsb.newline(); } @@ -805,8 +683,7 @@ private void slurmScriptInit(String jobName, boolean bPowerUser, MemLimitResults if (nodelist!=null && nodelist.trim().length()>0) { lsb.write("#SBATCH --nodelist="+nodelist); } -// lsb.write("echo \"1 date=`date`\""); - lsb.write("# VCell SlurmProxy memory limit source="+memoryMBAllowed.getMemLimitSource()); + lsb.write("# VCell SlurmProxy memory limit source='"+memoryMBAllowed.getMemLimitSource()+"'"); } @Override @@ -820,7 +697,7 @@ String createJobScriptText(String jobName, ExecutableCommand.Container commandSe if (LG.isDebugEnabled()) { LG.debug("generating local SLURM submit script for jobName="+jobName); } - SlurmProxy.SbatchSolverComponents sbatchSolverComponents = generateScript(jobName, commandSet, ncpus, memSizeMB, postProcessingCommands, simTask); + SlurmProxy.SbatchSolverComponents sbatchSolverComponents = generateScript(jobName, commandSet, memSizeMB, postProcessingCommands, simTask); StringBuilder scriptContent = new StringBuilder(); scriptContent.append(sbatchSolverComponents.getSingularityCommands()); @@ -880,21 +757,17 @@ public HtcJobID submitOptimizationJob(String jobName, File sub_file_internal, Fi } String createOptJobScript(String jobName, File optProblemInputFile, File optProblemOutputFile, File optReportFile) throws IOException { - String primaryDataDirInternal = PropertyLoader.getRequiredProperty(PropertyLoader.primarySimDataDirInternalProperty); - String primaryDataDirExternal = PropertyLoader.getRequiredProperty(PropertyLoader.primarySimDataDirExternalProperty); - String htclogdir_external = PropertyLoader.getRequiredProperty(PropertyLoader.htcLogDirExternal); - String serverid=PropertyLoader.getRequiredProperty(PropertyLoader.vcellServerIDProperty); - String softwareVersion=PropertyLoader.getRequiredProperty(PropertyLoader.vcellSoftwareVersion); - String remote_singularity_image = PropertyLoader.getRequiredProperty(PropertyLoader.vcellopt_singularity_image); - String slurm_singularity_local_image_filepath = remote_singularity_image; -// String docker_image = PropertyLoader.getRequiredProperty(PropertyLoader.vcellbatch_docker_name); - String slurm_tmpdir = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_tmpdir); - String slurm_central_singularity_dir = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_central_singularity_dir); - String slurm_local_singularity_dir = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_local_singularity_dir); - String slurm_singularity_module_name = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_singularity_module_name); - String simDataDirArchiveExternal = PropertyLoader.getRequiredProperty(PropertyLoader.simDataDirArchiveExternal); - String simDataDirArchiveInternal = PropertyLoader.getRequiredProperty(PropertyLoader.simDataDirArchiveInternal); - File slurm_singularity_central_filepath = new File(slurm_central_singularity_dir,new File(slurm_singularity_local_image_filepath).getName()); + final String primaryDataDirInternal = PropertyLoader.getRequiredProperty(PropertyLoader.primarySimDataDirInternalProperty); + final String primaryDataDirExternal = PropertyLoader.getRequiredProperty(PropertyLoader.primarySimDataDirExternalProperty); + final String htclogdir_external = PropertyLoader.getRequiredProperty(PropertyLoader.htcLogDirExternal); + final String slurm_tmpdir = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_tmpdir); + final String slurm_singularity_cachedir = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_singularity_cachedir); + final String slurm_singularity_pullfolder = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_singularity_pullfolder); + final String slurm_singularity_module_name = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_singularity_module_name); + final String simDataDirArchiveExternal = PropertyLoader.getRequiredProperty(PropertyLoader.simDataDirArchiveExternal); + final String simDataDirArchiveInternal = PropertyLoader.getRequiredProperty(PropertyLoader.simDataDirArchiveInternal); + final String solverDockerName = PropertyLoader.getRequiredProperty(PropertyLoader.htc_vcellopt_docker_name); + final Optional batchDockerName = Optional.empty(); MemLimitResults memoryMBAllowed = new MemLimitResults(256, "Optimization Default"); String[] environmentVars = new String[] { @@ -908,13 +781,19 @@ String createOptJobScript(String jobName, File optProblemInputFile, File optProb if (!optDataDirExternal.exists() && !optDataDirExternal.mkdir()){ LG.error("failed to make optimization data directory "+optDataDir.getAbsolutePath()); } -// if (optDataDirExternal.setWritable(true,false)) - slurmInitSingularity(lsb, optDataDirExternal.getAbsolutePath(), Optional.empty(), htclogdir_external, softwareVersion, - slurm_singularity_local_image_filepath, slurm_tmpdir, slurm_central_singularity_dir, - slurm_local_singularity_dir, simDataDirArchiveExternal, simDataDirArchiveInternal, - slurm_singularity_central_filepath, slurm_singularity_module_name, environmentVars); - lsb.write(" cmd_prefix=\"$container_prefix\""); + List bindings = List.of( + new SingularityBinding(optDataDirExternal.getAbsolutePath(), "/simdata"), + new SingularityBinding(slurm_tmpdir, "/solvertmp"), + new SingularityBinding(htclogdir_external, "/htclogs"), + new SingularityBinding(simDataDirArchiveExternal, simDataDirArchiveInternal) + ); + + slurmInitSingularity(lsb, solverDockerName, batchDockerName, + bindings, slurm_tmpdir, slurm_singularity_cachedir, slurm_singularity_pullfolder, + slurm_singularity_module_name, environmentVars); + + lsb.write("cmd_prefix=\"$solver_container_prefix\""); lsb.write("echo \"cmd_prefix is '${cmd_prefix}'\""); lsb.append("echo command = "); lsb.write("${cmd_prefix}" + ""); diff --git a/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java b/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java index 9cf9f78d67..7b7955546f 100644 --- a/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java +++ b/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java @@ -59,10 +59,9 @@ public void setup() setProperty(PropertyLoader.mongodbPortExternal, "30019"); setProperty(PropertyLoader.mongodbDatabase, "test"); setProperty(PropertyLoader.vcellSoftwareVersion, "Rel_Version_7.6.0_build_28"); - setProperty(PropertyLoader.vcellbatch_singularity_image, "/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img"); setProperty(PropertyLoader.slurm_tmpdir, "/scratch/vcell"); - setProperty(PropertyLoader.slurm_central_singularity_dir, "/share/apps/vcell3/singularityImages"); - setProperty(PropertyLoader.slurm_local_singularity_dir, "/state/partition1/singularityImages"); + setProperty(PropertyLoader.slurm_singularity_cachedir, "/share/apps/vcell3/singularity/cachdir"); + setProperty(PropertyLoader.slurm_singularity_pullfolder, "/share/apps/vcell3/singularity/pullfolder"); setProperty(PropertyLoader.slurm_singularity_module_name, "singularity/vcell-3.10.0"); setProperty(PropertyLoader.simDataDirArchiveExternal, "/share/apps/vcell12/users"); setProperty(PropertyLoader.simDataDirArchiveInternal, "/share/apps/vcell12/users"); @@ -72,7 +71,14 @@ public void setup() setProperty(PropertyLoader.simulationPreprocessor, "JavaPreprocessor64"); setProperty(PropertyLoader.primarySimDataDirInternalProperty, "/share/apps/vcell3/users"); - setProperty(PropertyLoader.vcellopt_singularity_image, "/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-opt_d6825f4.img"); + setProperty(PropertyLoader.htc_vcellopt_docker_name, "ghcr.io/virtualcell/vcell-opt:7.6.0.43"); + + setProperty(PropertyLoader.htc_vcellfvsolver_solver_list, "Smoldyn,SundialsPDE"); + setProperty(PropertyLoader.htc_vcellfvsolver_docker_name, "ghcr.io/virtualcell/vcell-fvsolver:v0.9.4"); + setProperty(PropertyLoader.htc_vcellsolvers_solver_list, "HybridMilstein,StochGibson,Smoldyn,MovingBoundary,SundialsPDE,CombinedSundials,NFSim"); + setProperty(PropertyLoader.htc_vcellsolvers_docker_name, "ghcr.io/virtualcell/vcell-solvers:v0.8.1.2"); + setProperty(PropertyLoader.htc_vcellbatch_solver_list, "RungeKuttaFehlberg,HybridMilstein,StochGibson,Langevin,AdamsMoulton,Smoldyn,MovingBoundary,SundialsPDE,CombinedSundials,NFSim"); + setProperty(PropertyLoader.htc_vcellbatch_docker_name, "ghcr.io/virtualcell/vcell-batch:7.6.0.43"); } @AfterEach diff --git a/vcell-server/src/test/resources/slurm_fixtures/adams_moulton/V_REL_274633859_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/adams_moulton/V_REL_274633859_0_0.slurm.sub index 895768e4db..009d8ed87a 100644 --- a/vcell-server/src/test/resources/slurm_fixtures/adams_moulton/V_REL_274633859_0_0.slurm.sub +++ b/vcell-server/src/test/resources/slurm_fixtures/adams_moulton/V_REL_274633859_0_0.slurm.sub @@ -8,88 +8,50 @@ #SBATCH --mem=4096M #SBATCH --no-kill #SBATCH --no-requeue -# VCell SlurmProxy memory limit source=Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB +# VCell SlurmProxy memory limit source='Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB' -#BEGIN---------SlurmProxy.generateScript():slurmInitSingularity---------- -set -x - TMPDIR=/scratch/vcell -echo "using TMPDIR=$TMPDIR" if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi echo `hostname` - export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles - source /usr/share/Modules/init/bash - module load singularity/vcell-3.10.0 - +export SINGULARITY_CACHEDIR=/share/apps/vcell3/singularity/cachdir +export SINGULARITY_PULLFOLDER=/share/apps/vcell3/singularity/pullfolder echo "job running on host `hostname -f`" - echo "id is `id`" - -echo "bash version is `bash --version`" -date - echo ENVIRONMENT env -container_prefix= -if command -v singularity >/dev/null 2>&1; then - # - # Copy of singularity image will be downloaded if not found in /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - # - localSingularityImage=/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - if [ ! -e "$localSingularityImage" ]; then - echo "local singularity image $localSingularityImage not found, trying to download to hpc from "/share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - mkdir -p /state/partition1/singularityImages - singularitytempfile=$(mktemp -up /share/apps/vcell3/singularityImages) - flock -E 100 -n /tmp/vcellSingularityLock_Rel_Version_7.6.0_build_28.lock sh -c "cp /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img ${singularitytempfile} ; mv -n ${singularitytempfile} /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img" - theStatus=$? - if [ $theStatus -eq 100 ] - then - echo "lock in use, waiting for lock owner to copy singularityImage" - let c=0 - until [ -f $localSingularityImage ] - do - sleep 3 - let c=c+1 - if [ $c -eq 20 ] - then - echo "Exceeded wait time for lock owner to copy singularityImage" - break - fi - done - else - if [ $theStatus -eq 0 ] - then - echo copy succeeded - else - echo copy failed - fi - fi - rm -f ${singularitytempfile} - if [ ! -e "$localSingularityImage" ]; then - echo "Failed to copy $localSingularityImage to hpc from central" - exit 1 - else - echo successful copy from /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img to /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - fi - fi - container_prefix="singularity run --containall --bind /share/apps/vcell3/users:/simdata --bind /share/apps/vcell7/users:/simdata_secondary --bind /share/apps/vcell12/users:/share/apps/vcell12/users --bind /share/apps/vcell3/htclogs:/htclogs --bind /scratch/vcell:/solvertmp $localSingularityImage --env java_mem_Xmx=4096M --env jmshost_sim_internal=rke-wn-01.cam.uchc.edu --env jmsport_sim_internal=31618 --env jmsrestport_sim_internal=30163 --env jmsuser=clientUser --env jmspswd=dummy --env jmsblob_minsize=100000 --env mongodbhost_internal=rke-wn-01.cam.uchc.edu --env mongodbport_internal=30019 --env mongodb_database=test --env primary_datadir_external=/share/apps/vcell3/users --env secondary_datadir_external=/share/apps/vcell7/users --env htclogdir_external=/share/apps/vcell3/htclogs --env softwareVersion=Rel_Version_7.6.0_build_28 --env serverid=REL " -else - echo "Required singularity command not found (maybe 'module load singularity/vcell-3.10.0' command didn't work) " - exit 1 -fi -echo "container_prefix is '${container_prefix}'" -echo "3 date=`date`" -#END---------SlurmProxy.generateScript():slurmInitSingularity---------- +container_bindings="--bind /share/apps/vcell3/users:/simdata " +container_bindings+="--bind /share/apps/vcell7/users:/simdata_secondary " +container_bindings+="--bind /share/apps/vcell12/users:/share/apps/vcell12/users " +container_bindings+="--bind /share/apps/vcell3/htclogs:/htclogs " +container_bindings+="--bind /scratch/vcell:/solvertmp " +container_env="--env java_mem_Xmx=4096M " +container_env+="--env jmshost_sim_internal=rke-wn-01.cam.uchc.edu " +container_env+="--env jmsport_sim_internal=31618 " +container_env+="--env jmsrestport_sim_internal=30163 " +container_env+="--env jmsuser=clientUser " +container_env+="--env jmspswd=dummy " +container_env+="--env jmsblob_minsize=100000 " +container_env+="--env mongodbhost_internal=rke-wn-01.cam.uchc.edu " +container_env+="--env mongodbport_internal=30019 " +container_env+="--env mongodb_database=test " +container_env+="--env primary_datadir_external=/share/apps/vcell3/users " +container_env+="--env secondary_datadir_external=/share/apps/vcell7/users " +container_env+="--env htclogdir_external=/share/apps/vcell3/htclogs " +container_env+="--env softwareVersion=Rel_Version_7.6.0_build_28 " +container_env+="--env serverid=REL " +solver_docker_name=ghcr.io/virtualcell/vcell-batch:7.6.0.43 +solver_container_prefix="singularity run --containall ${container_bindings} ${container_env} docker://${solver_docker_name}" +batch_docker_name=ghcr.io/virtualcell/vcell-batch:7.6.0.43 +batch_container_prefix="singularity run --containall ${container_bindings} ${container_env} docker://${batch_docker_name}" + -#BEGIN---------SlurmProxy.generateScript():sendFailureMsg---------- sendFailureMsg() { - echo ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274633859 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg - ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274633859 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + ${batch_container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274633859 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg stat=$? if [[ $stat -ne 0 ]]; then echo 'failed to send error message, retcode=$stat' @@ -97,41 +59,23 @@ sendFailureMsg() { echo 'sent failure message' fi } -#END---------SlurmProxy.generateScript():sendFailureMsg---------- -#BEGIN---------SlurmProxy.generateScript():hasExitProcessor---------- + + callExitProcessor( ) { - echo exitCommand = ${container_prefix}JavaPostprocessor64 274633859 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274633859_0_0.slurm.sub - ${container_prefix}JavaPostprocessor64 274633859 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274633859_0_0.slurm.sub + ${batch_container_prefix} JavaPostprocessor64 274633859 schaff 17 0 0 $1 /htclogs/V_REL_274633859_0_0.slurm.sub } -#END---------SlurmProxy.generateScript():hasExitProcessor---------- -echo -echo "1 date=`date`" -echo -#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------JavaSimExe64 -echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/JavaSimExe64' which overrides container invocations" -nativeExe=/share/apps/vcell3/nativesolvers/JavaSimExe64 -if [ -e "${nativeExe}" ]; then - cmd_prefix="/share/apps/vcell3/nativesolvers/" -else - cmd_prefix="$container_prefix" -fi -echo "cmd_prefix is '${cmd_prefix}'" -echo "5 date=`date`" -echo command = ${cmd_prefix}JavaSimExe64 /share/apps/vcell3/users/schaff/SimID_274633859_0__0.simtask.xml /share/apps/vcell3/users/schaff - command="${cmd_prefix}JavaSimExe64 /share/apps/vcell3/users/schaff/SimID_274633859_0__0.simtask.xml /share/apps/vcell3/users/schaff " - $command + +${solver_container_prefix} JavaSimExe64 /simdata/schaff/SimID_274633859_0__0.simtask.xml /simdata/schaff stat=$? -echo ${cmd_prefix}JavaSimExe64 /share/apps/vcell3/users/schaff/SimID_274633859_0__0.simtask.xml /share/apps/vcell3/users/schaff returned $stat +echo returned $stat if [ $stat -ne 0 ]; then callExitProcessor $stat echo returning $stat to Slurm exit $stat fi -#END---------SlurmProxy.generateScript():ExecutableCommand----------JavaSimExe64 -callExitProcessor 0 - -#Following commands (if any) are read by JavaPostProcessor64 +callExitProcessor 0 +#Following commands (if any) are read by JavaPostProcessor64 \ No newline at end of file diff --git a/vcell-server/src/test/resources/slurm_fixtures/cvode/V_REL_274630682_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/cvode/V_REL_274630682_0_0.slurm.sub index e3e41bf6a5..3e4a33399d 100644 --- a/vcell-server/src/test/resources/slurm_fixtures/cvode/V_REL_274630682_0_0.slurm.sub +++ b/vcell-server/src/test/resources/slurm_fixtures/cvode/V_REL_274630682_0_0.slurm.sub @@ -8,88 +8,50 @@ #SBATCH --mem=4096M #SBATCH --no-kill #SBATCH --no-requeue -# VCell SlurmProxy memory limit source=Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB +# VCell SlurmProxy memory limit source='Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB' -#BEGIN---------SlurmProxy.generateScript():slurmInitSingularity---------- -set -x - TMPDIR=/scratch/vcell -echo "using TMPDIR=$TMPDIR" if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi echo `hostname` - export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles - source /usr/share/Modules/init/bash - module load singularity/vcell-3.10.0 - +export SINGULARITY_CACHEDIR=/share/apps/vcell3/singularity/cachdir +export SINGULARITY_PULLFOLDER=/share/apps/vcell3/singularity/pullfolder echo "job running on host `hostname -f`" - echo "id is `id`" - -echo "bash version is `bash --version`" -date - echo ENVIRONMENT env -container_prefix= -if command -v singularity >/dev/null 2>&1; then - # - # Copy of singularity image will be downloaded if not found in /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - # - localSingularityImage=/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - if [ ! -e "$localSingularityImage" ]; then - echo "local singularity image $localSingularityImage not found, trying to download to hpc from "/share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - mkdir -p /state/partition1/singularityImages - singularitytempfile=$(mktemp -up /share/apps/vcell3/singularityImages) - flock -E 100 -n /tmp/vcellSingularityLock_Rel_Version_7.6.0_build_28.lock sh -c "cp /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img ${singularitytempfile} ; mv -n ${singularitytempfile} /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img" - theStatus=$? - if [ $theStatus -eq 100 ] - then - echo "lock in use, waiting for lock owner to copy singularityImage" - let c=0 - until [ -f $localSingularityImage ] - do - sleep 3 - let c=c+1 - if [ $c -eq 20 ] - then - echo "Exceeded wait time for lock owner to copy singularityImage" - break - fi - done - else - if [ $theStatus -eq 0 ] - then - echo copy succeeded - else - echo copy failed - fi - fi - rm -f ${singularitytempfile} - if [ ! -e "$localSingularityImage" ]; then - echo "Failed to copy $localSingularityImage to hpc from central" - exit 1 - else - echo successful copy from /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img to /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - fi - fi - container_prefix="singularity run --containall --bind /share/apps/vcell3/users:/simdata --bind /share/apps/vcell7/users:/simdata_secondary --bind /share/apps/vcell12/users:/share/apps/vcell12/users --bind /share/apps/vcell3/htclogs:/htclogs --bind /scratch/vcell:/solvertmp $localSingularityImage --env java_mem_Xmx=4096M --env jmshost_sim_internal=rke-wn-01.cam.uchc.edu --env jmsport_sim_internal=31618 --env jmsrestport_sim_internal=30163 --env jmsuser=clientUser --env jmspswd=dummy --env jmsblob_minsize=100000 --env mongodbhost_internal=rke-wn-01.cam.uchc.edu --env mongodbport_internal=30019 --env mongodb_database=test --env primary_datadir_external=/share/apps/vcell3/users --env secondary_datadir_external=/share/apps/vcell7/users --env htclogdir_external=/share/apps/vcell3/htclogs --env softwareVersion=Rel_Version_7.6.0_build_28 --env serverid=REL " -else - echo "Required singularity command not found (maybe 'module load singularity/vcell-3.10.0' command didn't work) " - exit 1 -fi -echo "container_prefix is '${container_prefix}'" -echo "3 date=`date`" -#END---------SlurmProxy.generateScript():slurmInitSingularity---------- +container_bindings="--bind /share/apps/vcell3/users:/simdata " +container_bindings+="--bind /share/apps/vcell7/users:/simdata_secondary " +container_bindings+="--bind /share/apps/vcell12/users:/share/apps/vcell12/users " +container_bindings+="--bind /share/apps/vcell3/htclogs:/htclogs " +container_bindings+="--bind /scratch/vcell:/solvertmp " +container_env="--env java_mem_Xmx=4096M " +container_env+="--env jmshost_sim_internal=rke-wn-01.cam.uchc.edu " +container_env+="--env jmsport_sim_internal=31618 " +container_env+="--env jmsrestport_sim_internal=30163 " +container_env+="--env jmsuser=clientUser " +container_env+="--env jmspswd=dummy " +container_env+="--env jmsblob_minsize=100000 " +container_env+="--env mongodbhost_internal=rke-wn-01.cam.uchc.edu " +container_env+="--env mongodbport_internal=30019 " +container_env+="--env mongodb_database=test " +container_env+="--env primary_datadir_external=/share/apps/vcell3/users " +container_env+="--env secondary_datadir_external=/share/apps/vcell7/users " +container_env+="--env htclogdir_external=/share/apps/vcell3/htclogs " +container_env+="--env softwareVersion=Rel_Version_7.6.0_build_28 " +container_env+="--env serverid=REL " +solver_docker_name=ghcr.io/virtualcell/vcell-solvers:v0.8.1.2 +solver_container_prefix="singularity run --containall ${container_bindings} ${container_env} docker://${solver_docker_name}" +batch_docker_name=ghcr.io/virtualcell/vcell-batch:7.6.0.43 +batch_container_prefix="singularity run --containall ${container_bindings} ${container_env} docker://${batch_docker_name}" + -#BEGIN---------SlurmProxy.generateScript():sendFailureMsg---------- sendFailureMsg() { - echo ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274630682 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg - ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274630682 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + ${batch_container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274630682 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg stat=$? if [[ $stat -ne 0 ]]; then echo 'failed to send error message, retcode=$stat' @@ -97,68 +59,32 @@ sendFailureMsg() { echo 'sent failure message' fi } -#END---------SlurmProxy.generateScript():sendFailureMsg---------- -#BEGIN---------SlurmProxy.generateScript():hasExitProcessor---------- + + callExitProcessor( ) { - echo exitCommand = ${container_prefix}JavaPostprocessor64 274630682 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274630682_0_0.slurm.sub - ${container_prefix}JavaPostprocessor64 274630682 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274630682_0_0.slurm.sub + ${batch_container_prefix} JavaPostprocessor64 274630682 schaff 17 0 0 $1 /htclogs/V_REL_274630682_0_0.slurm.sub } -#END---------SlurmProxy.generateScript():hasExitProcessor---------- -echo -echo -#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 -echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/JavaPreprocessor64' which overrides container invocations" -nativeExe=/share/apps/vcell3/nativesolvers/JavaPreprocessor64 -if [ -e "${nativeExe}" ]; then - cmd_prefix="/share/apps/vcell3/nativesolvers/" -else - cmd_prefix="$container_prefix" -fi -echo "cmd_prefix is '${cmd_prefix}'" -echo "5 date=`date`" -echo command = ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274630682_0__0.simtask.xml /share/apps/vcell3/users/schaff - command="${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274630682_0__0.simtask.xml /share/apps/vcell3/users/schaff " - $command + + +${batch_container_prefix} JavaPreprocessor64 /simdata/schaff/SimID_274630682_0__0.simtask.xml /simdata/schaff stat=$? -echo ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274630682_0__0.simtask.xml /share/apps/vcell3/users/schaff returned $stat +echo returned $stat if [ $stat -ne 0 ]; then callExitProcessor $stat echo returning $stat to Slurm exit $stat fi -#END---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 -echo "1 date=`date`" -echo -#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------/usr/local/app/localsolvers/linux64/SundialsSolverStandalone_x64 -echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/SundialsSolverStandalone_x64' which overrides container invocations" -nativeExe=/share/apps/vcell3/nativesolvers/SundialsSolverStandalone_x64 -if [ -e "${nativeExe}" ]; then - cmd_prefix="/share/apps/vcell3/nativesolvers/" -else - cmd_prefix="$container_prefix" -fi -echo "cmd_prefix is '${cmd_prefix}'" -echo "5 date=`date`" -echo command = ${cmd_prefix}SundialsSolverStandalone_x64 /share/apps/vcell3/users/schaff/SimID_274630682_0_.cvodeInput /share/apps/vcell3/users/schaff/SimID_274630682_0_.ida -tid 0 -if [ -z ${LD_LIBRARY_PATH+x} ]; then - export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64 -else - export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64:$LD_LIBRARY_PATH -fi - command="${cmd_prefix}SundialsSolverStandalone_x64 /share/apps/vcell3/users/schaff/SimID_274630682_0_.cvodeInput /share/apps/vcell3/users/schaff/SimID_274630682_0_.ida -tid 0 " - $command +${solver_container_prefix} SundialsSolverStandalone_x64 /simdata/schaff/SimID_274630682_0_.cvodeInput /simdata/schaff/SimID_274630682_0_.ida -tid 0 stat=$? -echo ${cmd_prefix}SundialsSolverStandalone_x64 /share/apps/vcell3/users/schaff/SimID_274630682_0_.cvodeInput /share/apps/vcell3/users/schaff/SimID_274630682_0_.ida -tid 0 returned $stat +echo returned $stat if [ $stat -ne 0 ]; then callExitProcessor $stat echo returning $stat to Slurm exit $stat fi -#END---------SlurmProxy.generateScript():ExecutableCommand----------SundialsSolverStandalone_x64 -callExitProcessor 0 - -#Following commands (if any) are read by JavaPostProcessor64 +callExitProcessor 0 +#Following commands (if any) are read by JavaPostProcessor64 \ No newline at end of file diff --git a/vcell-server/src/test/resources/slurm_fixtures/finite_volume/V_REL_274514696_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/finite_volume/V_REL_274514696_0_0.slurm.sub index 8356af8b47..4ac98ad675 100644 --- a/vcell-server/src/test/resources/slurm_fixtures/finite_volume/V_REL_274514696_0_0.slurm.sub +++ b/vcell-server/src/test/resources/slurm_fixtures/finite_volume/V_REL_274514696_0_0.slurm.sub @@ -8,88 +8,50 @@ #SBATCH --mem=4096M #SBATCH --no-kill #SBATCH --no-requeue -# VCell SlurmProxy memory limit source=Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB +# VCell SlurmProxy memory limit source='Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB' -#BEGIN---------SlurmProxy.generateScript():slurmInitSingularity---------- -set -x - TMPDIR=/scratch/vcell -echo "using TMPDIR=$TMPDIR" if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi echo `hostname` - export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles - source /usr/share/Modules/init/bash - module load singularity/vcell-3.10.0 - +export SINGULARITY_CACHEDIR=/share/apps/vcell3/singularity/cachdir +export SINGULARITY_PULLFOLDER=/share/apps/vcell3/singularity/pullfolder echo "job running on host `hostname -f`" - echo "id is `id`" - -echo "bash version is `bash --version`" -date - echo ENVIRONMENT env -container_prefix= -if command -v singularity >/dev/null 2>&1; then - # - # Copy of singularity image will be downloaded if not found in /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - # - localSingularityImage=/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - if [ ! -e "$localSingularityImage" ]; then - echo "local singularity image $localSingularityImage not found, trying to download to hpc from "/share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - mkdir -p /state/partition1/singularityImages - singularitytempfile=$(mktemp -up /share/apps/vcell3/singularityImages) - flock -E 100 -n /tmp/vcellSingularityLock_Rel_Version_7.6.0_build_28.lock sh -c "cp /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img ${singularitytempfile} ; mv -n ${singularitytempfile} /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img" - theStatus=$? - if [ $theStatus -eq 100 ] - then - echo "lock in use, waiting for lock owner to copy singularityImage" - let c=0 - until [ -f $localSingularityImage ] - do - sleep 3 - let c=c+1 - if [ $c -eq 20 ] - then - echo "Exceeded wait time for lock owner to copy singularityImage" - break - fi - done - else - if [ $theStatus -eq 0 ] - then - echo copy succeeded - else - echo copy failed - fi - fi - rm -f ${singularitytempfile} - if [ ! -e "$localSingularityImage" ]; then - echo "Failed to copy $localSingularityImage to hpc from central" - exit 1 - else - echo successful copy from /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img to /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - fi - fi - container_prefix="singularity run --containall --bind /share/apps/vcell3/users:/simdata --bind /share/apps/vcell7/users:/simdata_secondary --bind /share/apps/vcell12/users:/share/apps/vcell12/users --bind /share/apps/vcell3/htclogs:/htclogs --bind /scratch/vcell:/solvertmp $localSingularityImage --env java_mem_Xmx=4096M --env jmshost_sim_internal=rke-wn-01.cam.uchc.edu --env jmsport_sim_internal=31618 --env jmsrestport_sim_internal=30163 --env jmsuser=clientUser --env jmspswd=dummy --env jmsblob_minsize=100000 --env mongodbhost_internal=rke-wn-01.cam.uchc.edu --env mongodbport_internal=30019 --env mongodb_database=test --env primary_datadir_external=/share/apps/vcell3/users --env secondary_datadir_external=/share/apps/vcell7/users --env htclogdir_external=/share/apps/vcell3/htclogs --env softwareVersion=Rel_Version_7.6.0_build_28 --env serverid=REL " -else - echo "Required singularity command not found (maybe 'module load singularity/vcell-3.10.0' command didn't work) " - exit 1 -fi -echo "container_prefix is '${container_prefix}'" -echo "3 date=`date`" -#END---------SlurmProxy.generateScript():slurmInitSingularity---------- +container_bindings="--bind /share/apps/vcell3/users:/simdata " +container_bindings+="--bind /share/apps/vcell7/users:/simdata_secondary " +container_bindings+="--bind /share/apps/vcell12/users:/share/apps/vcell12/users " +container_bindings+="--bind /share/apps/vcell3/htclogs:/htclogs " +container_bindings+="--bind /scratch/vcell:/solvertmp " +container_env="--env java_mem_Xmx=4096M " +container_env+="--env jmshost_sim_internal=rke-wn-01.cam.uchc.edu " +container_env+="--env jmsport_sim_internal=31618 " +container_env+="--env jmsrestport_sim_internal=30163 " +container_env+="--env jmsuser=clientUser " +container_env+="--env jmspswd=dummy " +container_env+="--env jmsblob_minsize=100000 " +container_env+="--env mongodbhost_internal=rke-wn-01.cam.uchc.edu " +container_env+="--env mongodbport_internal=30019 " +container_env+="--env mongodb_database=test " +container_env+="--env primary_datadir_external=/share/apps/vcell3/users " +container_env+="--env secondary_datadir_external=/share/apps/vcell7/users " +container_env+="--env htclogdir_external=/share/apps/vcell3/htclogs " +container_env+="--env softwareVersion=Rel_Version_7.6.0_build_28 " +container_env+="--env serverid=REL " +solver_docker_name=ghcr.io/virtualcell/vcell-fvsolver:v0.9.4 +solver_container_prefix="singularity run --containall ${container_bindings} ${container_env} docker://${solver_docker_name}" +batch_docker_name=ghcr.io/virtualcell/vcell-batch:7.6.0.43 +batch_container_prefix="singularity run --containall ${container_bindings} ${container_env} docker://${batch_docker_name}" + -#BEGIN---------SlurmProxy.generateScript():sendFailureMsg---------- sendFailureMsg() { - echo ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274514696 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg - ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274514696 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + ${batch_container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274514696 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg stat=$? if [[ $stat -ne 0 ]]; then echo 'failed to send error message, retcode=$stat' @@ -97,68 +59,32 @@ sendFailureMsg() { echo 'sent failure message' fi } -#END---------SlurmProxy.generateScript():sendFailureMsg---------- -#BEGIN---------SlurmProxy.generateScript():hasExitProcessor---------- + + callExitProcessor( ) { - echo exitCommand = ${container_prefix}JavaPostprocessor64 274514696 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274514696_0_0.slurm.sub - ${container_prefix}JavaPostprocessor64 274514696 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274514696_0_0.slurm.sub + ${batch_container_prefix} JavaPostprocessor64 274514696 schaff 17 0 0 $1 /htclogs/V_REL_274514696_0_0.slurm.sub } -#END---------SlurmProxy.generateScript():hasExitProcessor---------- -echo -echo -#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 -echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/JavaPreprocessor64' which overrides container invocations" -nativeExe=/share/apps/vcell3/nativesolvers/JavaPreprocessor64 -if [ -e "${nativeExe}" ]; then - cmd_prefix="/share/apps/vcell3/nativesolvers/" -else - cmd_prefix="$container_prefix" -fi -echo "cmd_prefix is '${cmd_prefix}'" -echo "5 date=`date`" -echo command = ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274514696_0__0.simtask.xml /share/apps/vcell3/users/schaff - command="${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274514696_0__0.simtask.xml /share/apps/vcell3/users/schaff " - $command + + +${batch_container_prefix} JavaPreprocessor64 /simdata/schaff/SimID_274514696_0__0.simtask.xml /simdata/schaff stat=$? -echo ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274514696_0__0.simtask.xml /share/apps/vcell3/users/schaff returned $stat +echo returned $stat if [ $stat -ne 0 ]; then callExitProcessor $stat echo returning $stat to Slurm exit $stat fi -#END---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 -echo "1 date=`date`" -echo -#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------/usr/local/app/localsolvers/linux64/FiniteVolume_x64 -echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/FiniteVolume_x64' which overrides container invocations" -nativeExe=/share/apps/vcell3/nativesolvers/FiniteVolume_x64 -if [ -e "${nativeExe}" ]; then - cmd_prefix="/share/apps/vcell3/nativesolvers/" -else - cmd_prefix="$container_prefix" -fi -echo "cmd_prefix is '${cmd_prefix}'" -echo "5 date=`date`" -echo command = ${cmd_prefix}FiniteVolume_x64 /share/apps/vcell3/users/schaff/SimID_274514696_0_.fvinput -tid 0 -if [ -z ${LD_LIBRARY_PATH+x} ]; then - export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64 -else - export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64:$LD_LIBRARY_PATH -fi - command="${cmd_prefix}FiniteVolume_x64 /share/apps/vcell3/users/schaff/SimID_274514696_0_.fvinput -tid 0 " - $command +${solver_container_prefix} FiniteVolume_x64 /simdata/schaff/SimID_274514696_0_.fvinput -tid 0 stat=$? -echo ${cmd_prefix}FiniteVolume_x64 /share/apps/vcell3/users/schaff/SimID_274514696_0_.fvinput -tid 0 returned $stat +echo returned $stat if [ $stat -ne 0 ]; then callExitProcessor $stat echo returning $stat to Slurm exit $stat fi -#END---------SlurmProxy.generateScript():ExecutableCommand----------FiniteVolume_x64 -callExitProcessor 0 - -#Following commands (if any) are read by JavaPostProcessor64 +callExitProcessor 0 +#Following commands (if any) are read by JavaPostProcessor64 \ No newline at end of file diff --git a/vcell-server/src/test/resources/slurm_fixtures/gibson/V_REL_274635122_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/gibson/V_REL_274635122_0_0.slurm.sub index 66efc3e6b9..742352b14c 100644 --- a/vcell-server/src/test/resources/slurm_fixtures/gibson/V_REL_274635122_0_0.slurm.sub +++ b/vcell-server/src/test/resources/slurm_fixtures/gibson/V_REL_274635122_0_0.slurm.sub @@ -8,88 +8,50 @@ #SBATCH --mem=4096M #SBATCH --no-kill #SBATCH --no-requeue -# VCell SlurmProxy memory limit source=Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB +# VCell SlurmProxy memory limit source='Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB' -#BEGIN---------SlurmProxy.generateScript():slurmInitSingularity---------- -set -x - TMPDIR=/scratch/vcell -echo "using TMPDIR=$TMPDIR" if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi echo `hostname` - export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles - source /usr/share/Modules/init/bash - module load singularity/vcell-3.10.0 - +export SINGULARITY_CACHEDIR=/share/apps/vcell3/singularity/cachdir +export SINGULARITY_PULLFOLDER=/share/apps/vcell3/singularity/pullfolder echo "job running on host `hostname -f`" - echo "id is `id`" - -echo "bash version is `bash --version`" -date - echo ENVIRONMENT env -container_prefix= -if command -v singularity >/dev/null 2>&1; then - # - # Copy of singularity image will be downloaded if not found in /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - # - localSingularityImage=/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - if [ ! -e "$localSingularityImage" ]; then - echo "local singularity image $localSingularityImage not found, trying to download to hpc from "/share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - mkdir -p /state/partition1/singularityImages - singularitytempfile=$(mktemp -up /share/apps/vcell3/singularityImages) - flock -E 100 -n /tmp/vcellSingularityLock_Rel_Version_7.6.0_build_28.lock sh -c "cp /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img ${singularitytempfile} ; mv -n ${singularitytempfile} /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img" - theStatus=$? - if [ $theStatus -eq 100 ] - then - echo "lock in use, waiting for lock owner to copy singularityImage" - let c=0 - until [ -f $localSingularityImage ] - do - sleep 3 - let c=c+1 - if [ $c -eq 20 ] - then - echo "Exceeded wait time for lock owner to copy singularityImage" - break - fi - done - else - if [ $theStatus -eq 0 ] - then - echo copy succeeded - else - echo copy failed - fi - fi - rm -f ${singularitytempfile} - if [ ! -e "$localSingularityImage" ]; then - echo "Failed to copy $localSingularityImage to hpc from central" - exit 1 - else - echo successful copy from /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img to /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - fi - fi - container_prefix="singularity run --containall --bind /share/apps/vcell3/users:/simdata --bind /share/apps/vcell7/users:/simdata_secondary --bind /share/apps/vcell12/users:/share/apps/vcell12/users --bind /share/apps/vcell3/htclogs:/htclogs --bind /scratch/vcell:/solvertmp $localSingularityImage --env java_mem_Xmx=4096M --env jmshost_sim_internal=rke-wn-01.cam.uchc.edu --env jmsport_sim_internal=31618 --env jmsrestport_sim_internal=30163 --env jmsuser=clientUser --env jmspswd=dummy --env jmsblob_minsize=100000 --env mongodbhost_internal=rke-wn-01.cam.uchc.edu --env mongodbport_internal=30019 --env mongodb_database=test --env primary_datadir_external=/share/apps/vcell3/users --env secondary_datadir_external=/share/apps/vcell7/users --env htclogdir_external=/share/apps/vcell3/htclogs --env softwareVersion=Rel_Version_7.6.0_build_28 --env serverid=REL " -else - echo "Required singularity command not found (maybe 'module load singularity/vcell-3.10.0' command didn't work) " - exit 1 -fi -echo "container_prefix is '${container_prefix}'" -echo "3 date=`date`" -#END---------SlurmProxy.generateScript():slurmInitSingularity---------- +container_bindings="--bind /share/apps/vcell3/users:/simdata " +container_bindings+="--bind /share/apps/vcell7/users:/simdata_secondary " +container_bindings+="--bind /share/apps/vcell12/users:/share/apps/vcell12/users " +container_bindings+="--bind /share/apps/vcell3/htclogs:/htclogs " +container_bindings+="--bind /scratch/vcell:/solvertmp " +container_env="--env java_mem_Xmx=4096M " +container_env+="--env jmshost_sim_internal=rke-wn-01.cam.uchc.edu " +container_env+="--env jmsport_sim_internal=31618 " +container_env+="--env jmsrestport_sim_internal=30163 " +container_env+="--env jmsuser=clientUser " +container_env+="--env jmspswd=dummy " +container_env+="--env jmsblob_minsize=100000 " +container_env+="--env mongodbhost_internal=rke-wn-01.cam.uchc.edu " +container_env+="--env mongodbport_internal=30019 " +container_env+="--env mongodb_database=test " +container_env+="--env primary_datadir_external=/share/apps/vcell3/users " +container_env+="--env secondary_datadir_external=/share/apps/vcell7/users " +container_env+="--env htclogdir_external=/share/apps/vcell3/htclogs " +container_env+="--env softwareVersion=Rel_Version_7.6.0_build_28 " +container_env+="--env serverid=REL " +solver_docker_name=ghcr.io/virtualcell/vcell-solvers:v0.8.1.2 +solver_container_prefix="singularity run --containall ${container_bindings} ${container_env} docker://${solver_docker_name}" +batch_docker_name=ghcr.io/virtualcell/vcell-batch:7.6.0.43 +batch_container_prefix="singularity run --containall ${container_bindings} ${container_env} docker://${batch_docker_name}" + -#BEGIN---------SlurmProxy.generateScript():sendFailureMsg---------- sendFailureMsg() { - echo ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274635122 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg - ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274635122 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + ${batch_container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274635122 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg stat=$? if [[ $stat -ne 0 ]]; then echo 'failed to send error message, retcode=$stat' @@ -97,68 +59,32 @@ sendFailureMsg() { echo 'sent failure message' fi } -#END---------SlurmProxy.generateScript():sendFailureMsg---------- -#BEGIN---------SlurmProxy.generateScript():hasExitProcessor---------- + + callExitProcessor( ) { - echo exitCommand = ${container_prefix}JavaPostprocessor64 274635122 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274635122_0_0.slurm.sub - ${container_prefix}JavaPostprocessor64 274635122 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274635122_0_0.slurm.sub + ${batch_container_prefix} JavaPostprocessor64 274635122 schaff 17 0 0 $1 /htclogs/V_REL_274635122_0_0.slurm.sub } -#END---------SlurmProxy.generateScript():hasExitProcessor---------- -echo -echo -#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 -echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/JavaPreprocessor64' which overrides container invocations" -nativeExe=/share/apps/vcell3/nativesolvers/JavaPreprocessor64 -if [ -e "${nativeExe}" ]; then - cmd_prefix="/share/apps/vcell3/nativesolvers/" -else - cmd_prefix="$container_prefix" -fi -echo "cmd_prefix is '${cmd_prefix}'" -echo "5 date=`date`" -echo command = ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274635122_0__0.simtask.xml /share/apps/vcell3/users/schaff - command="${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274635122_0__0.simtask.xml /share/apps/vcell3/users/schaff " - $command + + +${batch_container_prefix} JavaPreprocessor64 /simdata/schaff/SimID_274635122_0__0.simtask.xml /simdata/schaff stat=$? -echo ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274635122_0__0.simtask.xml /share/apps/vcell3/users/schaff returned $stat +echo returned $stat if [ $stat -ne 0 ]; then callExitProcessor $stat echo returning $stat to Slurm exit $stat fi -#END---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 -echo "1 date=`date`" -echo -#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------/usr/local/app/localsolvers/linux64/VCellStoch_x64 -echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/VCellStoch_x64' which overrides container invocations" -nativeExe=/share/apps/vcell3/nativesolvers/VCellStoch_x64 -if [ -e "${nativeExe}" ]; then - cmd_prefix="/share/apps/vcell3/nativesolvers/" -else - cmd_prefix="$container_prefix" -fi -echo "cmd_prefix is '${cmd_prefix}'" -echo "5 date=`date`" -echo command = ${cmd_prefix}VCellStoch_x64 gibson /share/apps/vcell3/users/schaff/SimID_274635122_0_.stochInput /share/apps/vcell3/users/schaff/SimID_274635122_0_.ida -tid 0 -if [ -z ${LD_LIBRARY_PATH+x} ]; then - export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64 -else - export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64:$LD_LIBRARY_PATH -fi - command="${cmd_prefix}VCellStoch_x64 gibson /share/apps/vcell3/users/schaff/SimID_274635122_0_.stochInput /share/apps/vcell3/users/schaff/SimID_274635122_0_.ida -tid 0 " - $command +${solver_container_prefix} VCellStoch_x64 gibson /simdata/schaff/SimID_274635122_0_.stochInput /simdata/schaff/SimID_274635122_0_.ida -tid 0 stat=$? -echo ${cmd_prefix}VCellStoch_x64 gibson /share/apps/vcell3/users/schaff/SimID_274635122_0_.stochInput /share/apps/vcell3/users/schaff/SimID_274635122_0_.ida -tid 0 returned $stat +echo returned $stat if [ $stat -ne 0 ]; then callExitProcessor $stat echo returning $stat to Slurm exit $stat fi -#END---------SlurmProxy.generateScript():ExecutableCommand----------VCellStoch_x64 -callExitProcessor 0 - -#Following commands (if any) are read by JavaPostProcessor64 +callExitProcessor 0 +#Following commands (if any) are read by JavaPostProcessor64 \ No newline at end of file diff --git a/vcell-server/src/test/resources/slurm_fixtures/gibson_milstein/V_REL_274641698_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/gibson_milstein/V_REL_274641698_0_0.slurm.sub index 944e116d98..c44c6bbb6d 100644 --- a/vcell-server/src/test/resources/slurm_fixtures/gibson_milstein/V_REL_274641698_0_0.slurm.sub +++ b/vcell-server/src/test/resources/slurm_fixtures/gibson_milstein/V_REL_274641698_0_0.slurm.sub @@ -8,88 +8,50 @@ #SBATCH --mem=4096M #SBATCH --no-kill #SBATCH --no-requeue -# VCell SlurmProxy memory limit source=Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB +# VCell SlurmProxy memory limit source='Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB' -#BEGIN---------SlurmProxy.generateScript():slurmInitSingularity---------- -set -x - TMPDIR=/scratch/vcell -echo "using TMPDIR=$TMPDIR" if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi echo `hostname` - export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles - source /usr/share/Modules/init/bash - module load singularity/vcell-3.10.0 - +export SINGULARITY_CACHEDIR=/share/apps/vcell3/singularity/cachdir +export SINGULARITY_PULLFOLDER=/share/apps/vcell3/singularity/pullfolder echo "job running on host `hostname -f`" - echo "id is `id`" - -echo "bash version is `bash --version`" -date - echo ENVIRONMENT env -container_prefix= -if command -v singularity >/dev/null 2>&1; then - # - # Copy of singularity image will be downloaded if not found in /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - # - localSingularityImage=/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - if [ ! -e "$localSingularityImage" ]; then - echo "local singularity image $localSingularityImage not found, trying to download to hpc from "/share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - mkdir -p /state/partition1/singularityImages - singularitytempfile=$(mktemp -up /share/apps/vcell3/singularityImages) - flock -E 100 -n /tmp/vcellSingularityLock_Rel_Version_7.6.0_build_28.lock sh -c "cp /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img ${singularitytempfile} ; mv -n ${singularitytempfile} /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img" - theStatus=$? - if [ $theStatus -eq 100 ] - then - echo "lock in use, waiting for lock owner to copy singularityImage" - let c=0 - until [ -f $localSingularityImage ] - do - sleep 3 - let c=c+1 - if [ $c -eq 20 ] - then - echo "Exceeded wait time for lock owner to copy singularityImage" - break - fi - done - else - if [ $theStatus -eq 0 ] - then - echo copy succeeded - else - echo copy failed - fi - fi - rm -f ${singularitytempfile} - if [ ! -e "$localSingularityImage" ]; then - echo "Failed to copy $localSingularityImage to hpc from central" - exit 1 - else - echo successful copy from /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img to /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - fi - fi - container_prefix="singularity run --containall --bind /share/apps/vcell3/users:/simdata --bind /share/apps/vcell7/users:/simdata_secondary --bind /share/apps/vcell12/users:/share/apps/vcell12/users --bind /share/apps/vcell3/htclogs:/htclogs --bind /scratch/vcell:/solvertmp $localSingularityImage --env java_mem_Xmx=4096M --env jmshost_sim_internal=rke-wn-01.cam.uchc.edu --env jmsport_sim_internal=31618 --env jmsrestport_sim_internal=30163 --env jmsuser=clientUser --env jmspswd=dummy --env jmsblob_minsize=100000 --env mongodbhost_internal=rke-wn-01.cam.uchc.edu --env mongodbport_internal=30019 --env mongodb_database=test --env primary_datadir_external=/share/apps/vcell3/users --env secondary_datadir_external=/share/apps/vcell7/users --env htclogdir_external=/share/apps/vcell3/htclogs --env softwareVersion=Rel_Version_7.6.0_build_28 --env serverid=REL " -else - echo "Required singularity command not found (maybe 'module load singularity/vcell-3.10.0' command didn't work) " - exit 1 -fi -echo "container_prefix is '${container_prefix}'" -echo "3 date=`date`" -#END---------SlurmProxy.generateScript():slurmInitSingularity---------- +container_bindings="--bind /share/apps/vcell3/users:/simdata " +container_bindings+="--bind /share/apps/vcell7/users:/simdata_secondary " +container_bindings+="--bind /share/apps/vcell12/users:/share/apps/vcell12/users " +container_bindings+="--bind /share/apps/vcell3/htclogs:/htclogs " +container_bindings+="--bind /scratch/vcell:/solvertmp " +container_env="--env java_mem_Xmx=4096M " +container_env+="--env jmshost_sim_internal=rke-wn-01.cam.uchc.edu " +container_env+="--env jmsport_sim_internal=31618 " +container_env+="--env jmsrestport_sim_internal=30163 " +container_env+="--env jmsuser=clientUser " +container_env+="--env jmspswd=dummy " +container_env+="--env jmsblob_minsize=100000 " +container_env+="--env mongodbhost_internal=rke-wn-01.cam.uchc.edu " +container_env+="--env mongodbport_internal=30019 " +container_env+="--env mongodb_database=test " +container_env+="--env primary_datadir_external=/share/apps/vcell3/users " +container_env+="--env secondary_datadir_external=/share/apps/vcell7/users " +container_env+="--env htclogdir_external=/share/apps/vcell3/htclogs " +container_env+="--env softwareVersion=Rel_Version_7.6.0_build_28 " +container_env+="--env serverid=REL " +solver_docker_name=ghcr.io/virtualcell/vcell-solvers:v0.8.1.2 +solver_container_prefix="singularity run --containall ${container_bindings} ${container_env} docker://${solver_docker_name}" +batch_docker_name=ghcr.io/virtualcell/vcell-batch:7.6.0.43 +batch_container_prefix="singularity run --containall ${container_bindings} ${container_env} docker://${batch_docker_name}" + -#BEGIN---------SlurmProxy.generateScript():sendFailureMsg---------- sendFailureMsg() { - echo ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274641698 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg - ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274641698 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + ${batch_container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274641698 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg stat=$? if [[ $stat -ne 0 ]]; then echo 'failed to send error message, retcode=$stat' @@ -97,68 +59,32 @@ sendFailureMsg() { echo 'sent failure message' fi } -#END---------SlurmProxy.generateScript():sendFailureMsg---------- -#BEGIN---------SlurmProxy.generateScript():hasExitProcessor---------- + + callExitProcessor( ) { - echo exitCommand = ${container_prefix}JavaPostprocessor64 274641698 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274641698_0_0.slurm.sub - ${container_prefix}JavaPostprocessor64 274641698 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274641698_0_0.slurm.sub + ${batch_container_prefix} JavaPostprocessor64 274641698 schaff 17 0 0 $1 /htclogs/V_REL_274641698_0_0.slurm.sub } -#END---------SlurmProxy.generateScript():hasExitProcessor---------- -echo -echo -#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 -echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/JavaPreprocessor64' which overrides container invocations" -nativeExe=/share/apps/vcell3/nativesolvers/JavaPreprocessor64 -if [ -e "${nativeExe}" ]; then - cmd_prefix="/share/apps/vcell3/nativesolvers/" -else - cmd_prefix="$container_prefix" -fi -echo "cmd_prefix is '${cmd_prefix}'" -echo "5 date=`date`" -echo command = ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274641698_0__0.simtask.xml /share/apps/vcell3/users/schaff - command="${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274641698_0__0.simtask.xml /share/apps/vcell3/users/schaff " - $command + + +${batch_container_prefix} JavaPreprocessor64 /simdata/schaff/SimID_274641698_0__0.simtask.xml /simdata/schaff stat=$? -echo ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274641698_0__0.simtask.xml /share/apps/vcell3/users/schaff returned $stat +echo returned $stat if [ $stat -ne 0 ]; then callExitProcessor $stat echo returning $stat to Slurm exit $stat fi -#END---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 -echo "1 date=`date`" -echo -#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------/usr/local/app/localsolvers/linux64/Hybrid_EM_x64 -echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/Hybrid_EM_x64' which overrides container invocations" -nativeExe=/share/apps/vcell3/nativesolvers/Hybrid_EM_x64 -if [ -e "${nativeExe}" ]; then - cmd_prefix="/share/apps/vcell3/nativesolvers/" -else - cmd_prefix="$container_prefix" -fi -echo "cmd_prefix is '${cmd_prefix}'" -echo "5 date=`date`" -echo command = ${cmd_prefix}Hybrid_EM_x64 /share/apps/vcell3/users/schaff/SimID_274641698_0_.nc 100.0 10.0 0.01 0.1 -OV -tid 0 -if [ -z ${LD_LIBRARY_PATH+x} ]; then - export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64 -else - export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64:$LD_LIBRARY_PATH -fi - command="${cmd_prefix}Hybrid_EM_x64 /share/apps/vcell3/users/schaff/SimID_274641698_0_.nc 100.0 10.0 0.01 0.1 -OV -tid 0 " - $command +${solver_container_prefix} Hybrid_EM_x64 /simdata/schaff/SimID_274641698_0_.nc 100.0 10.0 0.01 0.1 -OV -tid 0 stat=$? -echo ${cmd_prefix}Hybrid_EM_x64 /share/apps/vcell3/users/schaff/SimID_274641698_0_.nc 100.0 10.0 0.01 0.1 -OV -tid 0 returned $stat +echo returned $stat if [ $stat -ne 0 ]; then callExitProcessor $stat echo returning $stat to Slurm exit $stat fi -#END---------SlurmProxy.generateScript():ExecutableCommand----------Hybrid_EM_x64 -callExitProcessor 0 - -#Following commands (if any) are read by JavaPostProcessor64 +callExitProcessor 0 +#Following commands (if any) are read by JavaPostProcessor64 \ No newline at end of file diff --git a/vcell-server/src/test/resources/slurm_fixtures/langevin/V_REL_274672135_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/langevin/V_REL_274672135_0_0.slurm.sub index 064eaa8abe..f61745b830 100644 --- a/vcell-server/src/test/resources/slurm_fixtures/langevin/V_REL_274672135_0_0.slurm.sub +++ b/vcell-server/src/test/resources/slurm_fixtures/langevin/V_REL_274672135_0_0.slurm.sub @@ -8,88 +8,50 @@ #SBATCH --mem=4096M #SBATCH --no-kill #SBATCH --no-requeue -# VCell SlurmProxy memory limit source=Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB +# VCell SlurmProxy memory limit source='Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB' -#BEGIN---------SlurmProxy.generateScript():slurmInitSingularity---------- -set -x - TMPDIR=/scratch/vcell -echo "using TMPDIR=$TMPDIR" if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi echo `hostname` - export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles - source /usr/share/Modules/init/bash - module load singularity/vcell-3.10.0 - +export SINGULARITY_CACHEDIR=/share/apps/vcell3/singularity/cachdir +export SINGULARITY_PULLFOLDER=/share/apps/vcell3/singularity/pullfolder echo "job running on host `hostname -f`" - echo "id is `id`" - -echo "bash version is `bash --version`" -date - echo ENVIRONMENT env -container_prefix= -if command -v singularity >/dev/null 2>&1; then - # - # Copy of singularity image will be downloaded if not found in /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - # - localSingularityImage=/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - if [ ! -e "$localSingularityImage" ]; then - echo "local singularity image $localSingularityImage not found, trying to download to hpc from "/share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - mkdir -p /state/partition1/singularityImages - singularitytempfile=$(mktemp -up /share/apps/vcell3/singularityImages) - flock -E 100 -n /tmp/vcellSingularityLock_Rel_Version_7.6.0_build_28.lock sh -c "cp /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img ${singularitytempfile} ; mv -n ${singularitytempfile} /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img" - theStatus=$? - if [ $theStatus -eq 100 ] - then - echo "lock in use, waiting for lock owner to copy singularityImage" - let c=0 - until [ -f $localSingularityImage ] - do - sleep 3 - let c=c+1 - if [ $c -eq 20 ] - then - echo "Exceeded wait time for lock owner to copy singularityImage" - break - fi - done - else - if [ $theStatus -eq 0 ] - then - echo copy succeeded - else - echo copy failed - fi - fi - rm -f ${singularitytempfile} - if [ ! -e "$localSingularityImage" ]; then - echo "Failed to copy $localSingularityImage to hpc from central" - exit 1 - else - echo successful copy from /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img to /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - fi - fi - container_prefix="singularity run --containall --bind /share/apps/vcell3/users:/simdata --bind /share/apps/vcell7/users:/simdata_secondary --bind /share/apps/vcell12/users:/share/apps/vcell12/users --bind /share/apps/vcell3/htclogs:/htclogs --bind /scratch/vcell:/solvertmp $localSingularityImage --env java_mem_Xmx=4096M --env jmshost_sim_internal=rke-wn-01.cam.uchc.edu --env jmsport_sim_internal=31618 --env jmsrestport_sim_internal=30163 --env jmsuser=clientUser --env jmspswd=dummy --env jmsblob_minsize=100000 --env mongodbhost_internal=rke-wn-01.cam.uchc.edu --env mongodbport_internal=30019 --env mongodb_database=test --env primary_datadir_external=/share/apps/vcell3/users --env secondary_datadir_external=/share/apps/vcell7/users --env htclogdir_external=/share/apps/vcell3/htclogs --env softwareVersion=Rel_Version_7.6.0_build_28 --env serverid=REL " -else - echo "Required singularity command not found (maybe 'module load singularity/vcell-3.10.0' command didn't work) " - exit 1 -fi -echo "container_prefix is '${container_prefix}'" -echo "3 date=`date`" -#END---------SlurmProxy.generateScript():slurmInitSingularity---------- +container_bindings="--bind /share/apps/vcell3/users:/simdata " +container_bindings+="--bind /share/apps/vcell7/users:/simdata_secondary " +container_bindings+="--bind /share/apps/vcell12/users:/share/apps/vcell12/users " +container_bindings+="--bind /share/apps/vcell3/htclogs:/htclogs " +container_bindings+="--bind /scratch/vcell:/solvertmp " +container_env="--env java_mem_Xmx=4096M " +container_env+="--env jmshost_sim_internal=rke-wn-01.cam.uchc.edu " +container_env+="--env jmsport_sim_internal=31618 " +container_env+="--env jmsrestport_sim_internal=30163 " +container_env+="--env jmsuser=clientUser " +container_env+="--env jmspswd=dummy " +container_env+="--env jmsblob_minsize=100000 " +container_env+="--env mongodbhost_internal=rke-wn-01.cam.uchc.edu " +container_env+="--env mongodbport_internal=30019 " +container_env+="--env mongodb_database=test " +container_env+="--env primary_datadir_external=/share/apps/vcell3/users " +container_env+="--env secondary_datadir_external=/share/apps/vcell7/users " +container_env+="--env htclogdir_external=/share/apps/vcell3/htclogs " +container_env+="--env softwareVersion=Rel_Version_7.6.0_build_28 " +container_env+="--env serverid=REL " +solver_docker_name=ghcr.io/virtualcell/vcell-batch:7.6.0.43 +solver_container_prefix="singularity run --containall ${container_bindings} ${container_env} docker://${solver_docker_name}" +batch_docker_name=ghcr.io/virtualcell/vcell-batch:7.6.0.43 +batch_container_prefix="singularity run --containall ${container_bindings} ${container_env} docker://${batch_docker_name}" + -#BEGIN---------SlurmProxy.generateScript():sendFailureMsg---------- sendFailureMsg() { - echo ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274672135 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg - ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274672135 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + ${batch_container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274672135 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg stat=$? if [[ $stat -ne 0 ]]; then echo 'failed to send error message, retcode=$stat' @@ -97,67 +59,32 @@ sendFailureMsg() { echo 'sent failure message' fi } -#END---------SlurmProxy.generateScript():sendFailureMsg---------- -#BEGIN---------SlurmProxy.generateScript():hasExitProcessor---------- + + callExitProcessor( ) { - echo exitCommand = ${container_prefix}JavaPostprocessor64 274672135 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274672135_0_0.slurm.sub - ${container_prefix}JavaPostprocessor64 274672135 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274672135_0_0.slurm.sub + ${batch_container_prefix} JavaPostprocessor64 274672135 schaff 17 0 0 $1 /htclogs/V_REL_274672135_0_0.slurm.sub } -#END---------SlurmProxy.generateScript():hasExitProcessor---------- -echo -echo -#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 -echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/JavaPreprocessor64' which overrides container invocations" -nativeExe=/share/apps/vcell3/nativesolvers/JavaPreprocessor64 -if [ -e "${nativeExe}" ]; then - cmd_prefix="/share/apps/vcell3/nativesolvers/" -else - cmd_prefix="$container_prefix" -fi -echo "cmd_prefix is '${cmd_prefix}'" -echo "5 date=`date`" -echo command = ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274672135_0__0.simtask.xml /share/apps/vcell3/users/schaff - command="${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274672135_0__0.simtask.xml /share/apps/vcell3/users/schaff " - $command + + +${batch_container_prefix} JavaPreprocessor64 /simdata/schaff/SimID_274672135_0__0.simtask.xml /simdata/schaff stat=$? -echo ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274672135_0__0.simtask.xml /share/apps/vcell3/users/schaff returned $stat +echo returned $stat if [ $stat -ne 0 ]; then callExitProcessor $stat echo returning $stat to Slurm exit $stat fi -#END---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 -echo "1 date=`date`" -echo -#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------/usr/local/app/localsolvers/linux64/langevin_x64 -echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/langevin_x64' which overrides container invocations" -nativeExe=/share/apps/vcell3/nativesolvers/langevin_x64 -if [ -e "${nativeExe}" ]; then - cmd_prefix="/share/apps/vcell3/nativesolvers/" -else - cmd_prefix="$container_prefix" -fi -echo "cmd_prefix is '${cmd_prefix}'" -echo "5 date=`date`" -echo command = ${cmd_prefix}langevin_x64 simulate --output-log=/share/apps/vcell3/users/schaff/SimID_274672135_0_.log --vc-send-status-config=/share/apps/vcell3/users/schaff/SimID_274672135_0_.langevinMessagingConfig /share/apps/vcell3/users/schaff/SimID_274672135_0_.langevinInput 0 -tid 0 -if [ -z ${LD_LIBRARY_PATH+x} ]; then - export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64 -else - export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64:$LD_LIBRARY_PATH -fi - command="${cmd_prefix}langevin_x64 simulate --output-log=/share/apps/vcell3/users/schaff/SimID_274672135_0_.log --vc-send-status-config=/share/apps/vcell3/users/schaff/SimID_274672135_0_.langevinMessagingConfig /share/apps/vcell3/users/schaff/SimID_274672135_0_.langevinInput 0 -tid 0 " - $command +${solver_container_prefix} langevin_x64 simulate --output-log=/simdata/schaff/SimID_274672135_0_.log --vc-send-status-config=/simdata/schaff/SimID_274672135_0_.langevinMessagingConfig /simdata/schaff/SimID_274672135_0_.langevinInput 0 -tid 0 stat=$? -echo ${cmd_prefix}langevin_x64 simulate --output-log=/share/apps/vcell3/users/schaff/SimID_274672135_0_.log --vc-send-status-config=/share/apps/vcell3/users/schaff/SimID_274672135_0_.langevinMessagingConfig /share/apps/vcell3/users/schaff/SimID_274672135_0_.langevinInput 0 -tid 0 returned $stat +echo returned $stat if [ $stat -ne 0 ]; then callExitProcessor $stat echo returning $stat to Slurm exit $stat fi -#END---------SlurmProxy.generateScript():ExecutableCommand----------langevin_x64 -callExitProcessor 0 +callExitProcessor 0 -#Following commands (if any) are read by JavaPostProcessor64 +#Following commands (if any) are read by JavaPostProcessor64 \ No newline at end of file diff --git a/vcell-server/src/test/resources/slurm_fixtures/moving_boundary/V_REL_274641196_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/moving_boundary/V_REL_274641196_0_0.slurm.sub index a5addad219..7ac68f8a5e 100644 --- a/vcell-server/src/test/resources/slurm_fixtures/moving_boundary/V_REL_274641196_0_0.slurm.sub +++ b/vcell-server/src/test/resources/slurm_fixtures/moving_boundary/V_REL_274641196_0_0.slurm.sub @@ -8,88 +8,50 @@ #SBATCH --mem=4096M #SBATCH --no-kill #SBATCH --no-requeue -# VCell SlurmProxy memory limit source=Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB +# VCell SlurmProxy memory limit source='Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB' -#BEGIN---------SlurmProxy.generateScript():slurmInitSingularity---------- -set -x - TMPDIR=/scratch/vcell -echo "using TMPDIR=$TMPDIR" if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi echo `hostname` - export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles - source /usr/share/Modules/init/bash - module load singularity/vcell-3.10.0 - +export SINGULARITY_CACHEDIR=/share/apps/vcell3/singularity/cachdir +export SINGULARITY_PULLFOLDER=/share/apps/vcell3/singularity/pullfolder echo "job running on host `hostname -f`" - echo "id is `id`" - -echo "bash version is `bash --version`" -date - echo ENVIRONMENT env -container_prefix= -if command -v singularity >/dev/null 2>&1; then - # - # Copy of singularity image will be downloaded if not found in /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - # - localSingularityImage=/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - if [ ! -e "$localSingularityImage" ]; then - echo "local singularity image $localSingularityImage not found, trying to download to hpc from "/share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - mkdir -p /state/partition1/singularityImages - singularitytempfile=$(mktemp -up /share/apps/vcell3/singularityImages) - flock -E 100 -n /tmp/vcellSingularityLock_Rel_Version_7.6.0_build_28.lock sh -c "cp /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img ${singularitytempfile} ; mv -n ${singularitytempfile} /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img" - theStatus=$? - if [ $theStatus -eq 100 ] - then - echo "lock in use, waiting for lock owner to copy singularityImage" - let c=0 - until [ -f $localSingularityImage ] - do - sleep 3 - let c=c+1 - if [ $c -eq 20 ] - then - echo "Exceeded wait time for lock owner to copy singularityImage" - break - fi - done - else - if [ $theStatus -eq 0 ] - then - echo copy succeeded - else - echo copy failed - fi - fi - rm -f ${singularitytempfile} - if [ ! -e "$localSingularityImage" ]; then - echo "Failed to copy $localSingularityImage to hpc from central" - exit 1 - else - echo successful copy from /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img to /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - fi - fi - container_prefix="singularity run --containall --bind /share/apps/vcell3/users:/simdata --bind /share/apps/vcell7/users:/simdata_secondary --bind /share/apps/vcell12/users:/share/apps/vcell12/users --bind /share/apps/vcell3/htclogs:/htclogs --bind /scratch/vcell:/solvertmp $localSingularityImage --env java_mem_Xmx=4096M --env jmshost_sim_internal=rke-wn-01.cam.uchc.edu --env jmsport_sim_internal=31618 --env jmsrestport_sim_internal=30163 --env jmsuser=clientUser --env jmspswd=dummy --env jmsblob_minsize=100000 --env mongodbhost_internal=rke-wn-01.cam.uchc.edu --env mongodbport_internal=30019 --env mongodb_database=test --env primary_datadir_external=/share/apps/vcell3/users --env secondary_datadir_external=/share/apps/vcell7/users --env htclogdir_external=/share/apps/vcell3/htclogs --env softwareVersion=Rel_Version_7.6.0_build_28 --env serverid=REL " -else - echo "Required singularity command not found (maybe 'module load singularity/vcell-3.10.0' command didn't work) " - exit 1 -fi -echo "container_prefix is '${container_prefix}'" -echo "3 date=`date`" -#END---------SlurmProxy.generateScript():slurmInitSingularity---------- +container_bindings="--bind /share/apps/vcell3/users:/simdata " +container_bindings+="--bind /share/apps/vcell7/users:/simdata_secondary " +container_bindings+="--bind /share/apps/vcell12/users:/share/apps/vcell12/users " +container_bindings+="--bind /share/apps/vcell3/htclogs:/htclogs " +container_bindings+="--bind /scratch/vcell:/solvertmp " +container_env="--env java_mem_Xmx=4096M " +container_env+="--env jmshost_sim_internal=rke-wn-01.cam.uchc.edu " +container_env+="--env jmsport_sim_internal=31618 " +container_env+="--env jmsrestport_sim_internal=30163 " +container_env+="--env jmsuser=clientUser " +container_env+="--env jmspswd=dummy " +container_env+="--env jmsblob_minsize=100000 " +container_env+="--env mongodbhost_internal=rke-wn-01.cam.uchc.edu " +container_env+="--env mongodbport_internal=30019 " +container_env+="--env mongodb_database=test " +container_env+="--env primary_datadir_external=/share/apps/vcell3/users " +container_env+="--env secondary_datadir_external=/share/apps/vcell7/users " +container_env+="--env htclogdir_external=/share/apps/vcell3/htclogs " +container_env+="--env softwareVersion=Rel_Version_7.6.0_build_28 " +container_env+="--env serverid=REL " +solver_docker_name=ghcr.io/virtualcell/vcell-solvers:v0.8.1.2 +solver_container_prefix="singularity run --containall ${container_bindings} ${container_env} docker://${solver_docker_name}" +batch_docker_name=ghcr.io/virtualcell/vcell-batch:7.6.0.43 +batch_container_prefix="singularity run --containall ${container_bindings} ${container_env} docker://${batch_docker_name}" + -#BEGIN---------SlurmProxy.generateScript():sendFailureMsg---------- sendFailureMsg() { - echo ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274641196 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg - ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274641196 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + ${batch_container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274641196 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg stat=$? if [[ $stat -ne 0 ]]; then echo 'failed to send error message, retcode=$stat' @@ -97,68 +59,32 @@ sendFailureMsg() { echo 'sent failure message' fi } -#END---------SlurmProxy.generateScript():sendFailureMsg---------- -#BEGIN---------SlurmProxy.generateScript():hasExitProcessor---------- + + callExitProcessor( ) { - echo exitCommand = ${container_prefix}JavaPostprocessor64 274641196 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274641196_0_0.slurm.sub - ${container_prefix}JavaPostprocessor64 274641196 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274641196_0_0.slurm.sub + ${batch_container_prefix} JavaPostprocessor64 274641196 schaff 17 0 0 $1 /htclogs/V_REL_274641196_0_0.slurm.sub } -#END---------SlurmProxy.generateScript():hasExitProcessor---------- -echo -echo -#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 -echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/JavaPreprocessor64' which overrides container invocations" -nativeExe=/share/apps/vcell3/nativesolvers/JavaPreprocessor64 -if [ -e "${nativeExe}" ]; then - cmd_prefix="/share/apps/vcell3/nativesolvers/" -else - cmd_prefix="$container_prefix" -fi -echo "cmd_prefix is '${cmd_prefix}'" -echo "5 date=`date`" -echo command = ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274641196_0__0.simtask.xml /share/apps/vcell3/users/schaff - command="${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274641196_0__0.simtask.xml /share/apps/vcell3/users/schaff " - $command + + +${batch_container_prefix} JavaPreprocessor64 /simdata/schaff/SimID_274641196_0__0.simtask.xml /simdata/schaff stat=$? -echo ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274641196_0__0.simtask.xml /share/apps/vcell3/users/schaff returned $stat +echo returned $stat if [ $stat -ne 0 ]; then callExitProcessor $stat echo returning $stat to Slurm exit $stat fi -#END---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 -echo "1 date=`date`" -echo -#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------/usr/local/app/localsolvers/linux64/MovingBoundary_x64 -echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/MovingBoundary_x64' which overrides container invocations" -nativeExe=/share/apps/vcell3/nativesolvers/MovingBoundary_x64 -if [ -e "${nativeExe}" ]; then - cmd_prefix="/share/apps/vcell3/nativesolvers/" -else - cmd_prefix="$container_prefix" -fi -echo "cmd_prefix is '${cmd_prefix}'" -echo "5 date=`date`" -echo command = ${cmd_prefix}MovingBoundary_x64 --config /share/apps/vcell3/users/schaff/SimID_274641196_0_mb.xml -tid 0 -if [ -z ${LD_LIBRARY_PATH+x} ]; then - export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64 -else - export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64:$LD_LIBRARY_PATH -fi - command="${cmd_prefix}MovingBoundary_x64 --config /share/apps/vcell3/users/schaff/SimID_274641196_0_mb.xml -tid 0 " - $command +${solver_container_prefix} MovingBoundary_x64 --config /simdata/schaff/SimID_274641196_0_mb.xml -tid 0 stat=$? -echo ${cmd_prefix}MovingBoundary_x64 --config /share/apps/vcell3/users/schaff/SimID_274641196_0_mb.xml -tid 0 returned $stat +echo returned $stat if [ $stat -ne 0 ]; then callExitProcessor $stat echo returning $stat to Slurm exit $stat fi -#END---------SlurmProxy.generateScript():ExecutableCommand----------MovingBoundary_x64 -callExitProcessor 0 - -#Following commands (if any) are read by JavaPostProcessor64 +callExitProcessor 0 +#Following commands (if any) are read by JavaPostProcessor64 \ No newline at end of file diff --git a/vcell-server/src/test/resources/slurm_fixtures/nfsim/V_REL_274642453_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/nfsim/V_REL_274642453_0_0.slurm.sub index 2b2a6103d3..a16c0ffde0 100644 --- a/vcell-server/src/test/resources/slurm_fixtures/nfsim/V_REL_274642453_0_0.slurm.sub +++ b/vcell-server/src/test/resources/slurm_fixtures/nfsim/V_REL_274642453_0_0.slurm.sub @@ -8,88 +8,50 @@ #SBATCH --mem=4096M #SBATCH --no-kill #SBATCH --no-requeue -# VCell SlurmProxy memory limit source=Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB +# VCell SlurmProxy memory limit source='Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB' -#BEGIN---------SlurmProxy.generateScript():slurmInitSingularity---------- -set -x - TMPDIR=/scratch/vcell -echo "using TMPDIR=$TMPDIR" if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi echo `hostname` - export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles - source /usr/share/Modules/init/bash - module load singularity/vcell-3.10.0 - +export SINGULARITY_CACHEDIR=/share/apps/vcell3/singularity/cachdir +export SINGULARITY_PULLFOLDER=/share/apps/vcell3/singularity/pullfolder echo "job running on host `hostname -f`" - echo "id is `id`" - -echo "bash version is `bash --version`" -date - echo ENVIRONMENT env -container_prefix= -if command -v singularity >/dev/null 2>&1; then - # - # Copy of singularity image will be downloaded if not found in /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - # - localSingularityImage=/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - if [ ! -e "$localSingularityImage" ]; then - echo "local singularity image $localSingularityImage not found, trying to download to hpc from "/share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - mkdir -p /state/partition1/singularityImages - singularitytempfile=$(mktemp -up /share/apps/vcell3/singularityImages) - flock -E 100 -n /tmp/vcellSingularityLock_Rel_Version_7.6.0_build_28.lock sh -c "cp /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img ${singularitytempfile} ; mv -n ${singularitytempfile} /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img" - theStatus=$? - if [ $theStatus -eq 100 ] - then - echo "lock in use, waiting for lock owner to copy singularityImage" - let c=0 - until [ -f $localSingularityImage ] - do - sleep 3 - let c=c+1 - if [ $c -eq 20 ] - then - echo "Exceeded wait time for lock owner to copy singularityImage" - break - fi - done - else - if [ $theStatus -eq 0 ] - then - echo copy succeeded - else - echo copy failed - fi - fi - rm -f ${singularitytempfile} - if [ ! -e "$localSingularityImage" ]; then - echo "Failed to copy $localSingularityImage to hpc from central" - exit 1 - else - echo successful copy from /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img to /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - fi - fi - container_prefix="singularity run --containall --bind /share/apps/vcell3/users:/simdata --bind /share/apps/vcell7/users:/simdata_secondary --bind /share/apps/vcell12/users:/share/apps/vcell12/users --bind /share/apps/vcell3/htclogs:/htclogs --bind /scratch/vcell:/solvertmp $localSingularityImage --env java_mem_Xmx=4096M --env jmshost_sim_internal=rke-wn-01.cam.uchc.edu --env jmsport_sim_internal=31618 --env jmsrestport_sim_internal=30163 --env jmsuser=clientUser --env jmspswd=dummy --env jmsblob_minsize=100000 --env mongodbhost_internal=rke-wn-01.cam.uchc.edu --env mongodbport_internal=30019 --env mongodb_database=test --env primary_datadir_external=/share/apps/vcell3/users --env secondary_datadir_external=/share/apps/vcell7/users --env htclogdir_external=/share/apps/vcell3/htclogs --env softwareVersion=Rel_Version_7.6.0_build_28 --env serverid=REL " -else - echo "Required singularity command not found (maybe 'module load singularity/vcell-3.10.0' command didn't work) " - exit 1 -fi -echo "container_prefix is '${container_prefix}'" -echo "3 date=`date`" -#END---------SlurmProxy.generateScript():slurmInitSingularity---------- +container_bindings="--bind /share/apps/vcell3/users:/simdata " +container_bindings+="--bind /share/apps/vcell7/users:/simdata_secondary " +container_bindings+="--bind /share/apps/vcell12/users:/share/apps/vcell12/users " +container_bindings+="--bind /share/apps/vcell3/htclogs:/htclogs " +container_bindings+="--bind /scratch/vcell:/solvertmp " +container_env="--env java_mem_Xmx=4096M " +container_env+="--env jmshost_sim_internal=rke-wn-01.cam.uchc.edu " +container_env+="--env jmsport_sim_internal=31618 " +container_env+="--env jmsrestport_sim_internal=30163 " +container_env+="--env jmsuser=clientUser " +container_env+="--env jmspswd=dummy " +container_env+="--env jmsblob_minsize=100000 " +container_env+="--env mongodbhost_internal=rke-wn-01.cam.uchc.edu " +container_env+="--env mongodbport_internal=30019 " +container_env+="--env mongodb_database=test " +container_env+="--env primary_datadir_external=/share/apps/vcell3/users " +container_env+="--env secondary_datadir_external=/share/apps/vcell7/users " +container_env+="--env htclogdir_external=/share/apps/vcell3/htclogs " +container_env+="--env softwareVersion=Rel_Version_7.6.0_build_28 " +container_env+="--env serverid=REL " +solver_docker_name=ghcr.io/virtualcell/vcell-solvers:v0.8.1.2 +solver_container_prefix="singularity run --containall ${container_bindings} ${container_env} docker://${solver_docker_name}" +batch_docker_name=ghcr.io/virtualcell/vcell-batch:7.6.0.43 +batch_container_prefix="singularity run --containall ${container_bindings} ${container_env} docker://${batch_docker_name}" + -#BEGIN---------SlurmProxy.generateScript():sendFailureMsg---------- sendFailureMsg() { - echo ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274642453 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg - ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274642453 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + ${batch_container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274642453 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg stat=$? if [[ $stat -ne 0 ]]; then echo 'failed to send error message, retcode=$stat' @@ -97,68 +59,32 @@ sendFailureMsg() { echo 'sent failure message' fi } -#END---------SlurmProxy.generateScript():sendFailureMsg---------- -#BEGIN---------SlurmProxy.generateScript():hasExitProcessor---------- + + callExitProcessor( ) { - echo exitCommand = ${container_prefix}JavaPostprocessor64 274642453 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274642453_0_0.slurm.sub - ${container_prefix}JavaPostprocessor64 274642453 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274642453_0_0.slurm.sub + ${batch_container_prefix} JavaPostprocessor64 274642453 schaff 17 0 0 $1 /htclogs/V_REL_274642453_0_0.slurm.sub } -#END---------SlurmProxy.generateScript():hasExitProcessor---------- -echo -echo -#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 -echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/JavaPreprocessor64' which overrides container invocations" -nativeExe=/share/apps/vcell3/nativesolvers/JavaPreprocessor64 -if [ -e "${nativeExe}" ]; then - cmd_prefix="/share/apps/vcell3/nativesolvers/" -else - cmd_prefix="$container_prefix" -fi -echo "cmd_prefix is '${cmd_prefix}'" -echo "5 date=`date`" -echo command = ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274642453_0__0.simtask.xml /share/apps/vcell3/users/schaff - command="${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274642453_0__0.simtask.xml /share/apps/vcell3/users/schaff " - $command + + +${batch_container_prefix} JavaPreprocessor64 /simdata/schaff/SimID_274642453_0__0.simtask.xml /simdata/schaff stat=$? -echo ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274642453_0__0.simtask.xml /share/apps/vcell3/users/schaff returned $stat +echo returned $stat if [ $stat -ne 0 ]; then callExitProcessor $stat echo returning $stat to Slurm exit $stat fi -#END---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 -echo "1 date=`date`" -echo -#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------/usr/local/app/localsolvers/linux64/NFsim_x64 -echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/NFsim_x64' which overrides container invocations" -nativeExe=/share/apps/vcell3/nativesolvers/NFsim_x64 -if [ -e "${nativeExe}" ]; then - cmd_prefix="/share/apps/vcell3/nativesolvers/" -else - cmd_prefix="$container_prefix" -fi -echo "cmd_prefix is '${cmd_prefix}'" -echo "5 date=`date`" -echo command = ${cmd_prefix}NFsim_x64 -seed 716746135 -vcell -xml /share/apps/vcell3/users/schaff/SimID_274642453_0_.nfsimInput -o /share/apps/vcell3/users/schaff/SimID_274642453_0_.gdat -sim 1.0 -ss /share/apps/vcell3/users/schaff/SimID_274642453_0_.species -oSteps 20 -notf -utl 1000 -cb -pcmatch -tid 0 -if [ -z ${LD_LIBRARY_PATH+x} ]; then - export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64 -else - export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64:$LD_LIBRARY_PATH -fi - command="${cmd_prefix}NFsim_x64 -seed 716746135 -vcell -xml /share/apps/vcell3/users/schaff/SimID_274642453_0_.nfsimInput -o /share/apps/vcell3/users/schaff/SimID_274642453_0_.gdat -sim 1.0 -ss /share/apps/vcell3/users/schaff/SimID_274642453_0_.species -oSteps 20 -notf -utl 1000 -cb -pcmatch -tid 0 " - $command +${solver_container_prefix} NFsim_x64 -seed 716746135 -vcell -xml /simdata/schaff/SimID_274642453_0_.nfsimInput -o /simdata/schaff/SimID_274642453_0_.gdat -sim 1.0 -ss /simdata/schaff/SimID_274642453_0_.species -oSteps 20 -notf -utl 1000 -cb -pcmatch -tid 0 stat=$? -echo ${cmd_prefix}NFsim_x64 -seed 716746135 -vcell -xml /share/apps/vcell3/users/schaff/SimID_274642453_0_.nfsimInput -o /share/apps/vcell3/users/schaff/SimID_274642453_0_.gdat -sim 1.0 -ss /share/apps/vcell3/users/schaff/SimID_274642453_0_.species -oSteps 20 -notf -utl 1000 -cb -pcmatch -tid 0 returned $stat +echo returned $stat if [ $stat -ne 0 ]; then callExitProcessor $stat echo returning $stat to Slurm exit $stat fi -#END---------SlurmProxy.generateScript():ExecutableCommand----------NFsim_x64 -callExitProcessor 0 - -#Following commands (if any) are read by JavaPostProcessor64 +callExitProcessor 0 +#Following commands (if any) are read by JavaPostProcessor64 \ No newline at end of file diff --git a/vcell-server/src/test/resources/slurm_fixtures/opt/CopasiParest_152878.sub b/vcell-server/src/test/resources/slurm_fixtures/opt/CopasiParest_152878.sub index 22391dfaa6..06570938d7 100644 --- a/vcell-server/src/test/resources/slurm_fixtures/opt/CopasiParest_152878.sub +++ b/vcell-server/src/test/resources/slurm_fixtures/opt/CopasiParest_152878.sub @@ -8,83 +8,29 @@ #SBATCH --mem=256M #SBATCH --no-kill #SBATCH --no-requeue -# VCell SlurmProxy memory limit source=Optimization Default -#BEGIN---------SlurmProxy.generateScript():slurmInitSingularity---------- -set -x - +# VCell SlurmProxy memory limit source='Optimization Default' TMPDIR=/scratch/vcell -echo "using TMPDIR=$TMPDIR" if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi echo `hostname` - export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles - source /usr/share/Modules/init/bash - module load singularity/vcell-3.10.0 - +export SINGULARITY_CACHEDIR=/share/apps/vcell3/singularity/cachdir +export SINGULARITY_PULLFOLDER=/share/apps/vcell3/singularity/pullfolder echo "job running on host `hostname -f`" - echo "id is `id`" - -echo "bash version is `bash --version`" -date - echo ENVIRONMENT env -container_prefix= -if command -v singularity >/dev/null 2>&1; then - # - # Copy of singularity image will be downloaded if not found in /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-opt_d6825f4.img - # - localSingularityImage=/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-opt_d6825f4.img - if [ ! -e "$localSingularityImage" ]; then - echo "local singularity image $localSingularityImage not found, trying to download to hpc from "/share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-opt_d6825f4.img - mkdir -p /state/partition1/singularityImages - singularitytempfile=$(mktemp -up /share/apps/vcell3/singularityImages) - flock -E 100 -n /tmp/vcellSingularityLock_Rel_Version_7.6.0_build_28.lock sh -c "cp /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-opt_d6825f4.img ${singularitytempfile} ; mv -n ${singularitytempfile} /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-opt_d6825f4.img" - theStatus=$? - if [ $theStatus -eq 100 ] - then - echo "lock in use, waiting for lock owner to copy singularityImage" - let c=0 - until [ -f $localSingularityImage ] - do - sleep 3 - let c=c+1 - if [ $c -eq 20 ] - then - echo "Exceeded wait time for lock owner to copy singularityImage" - break - fi - done - else - if [ $theStatus -eq 0 ] - then - echo copy succeeded - else - echo copy failed - fi - fi - rm -f ${singularitytempfile} - if [ ! -e "$localSingularityImage" ]; then - echo "Failed to copy $localSingularityImage to hpc from central" - exit 1 - else - echo successful copy from /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-opt_d6825f4.img to /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-opt_d6825f4.img - fi - fi - container_prefix="singularity run --containall --bind /share/apps/vcell3/users/parest_data:/simdata --bind /share/apps/vcell12/users:/share/apps/vcell12/users --bind /share/apps/vcell3/htclogs:/htclogs --bind /scratch/vcell:/solvertmp $localSingularityImage --env datadir_external=/share/apps/vcell3/users " -else - echo "Required singularity command not found (maybe 'module load singularity/vcell-3.10.0' command didn't work) " - exit 1 -fi -echo "container_prefix is '${container_prefix}'" -echo "3 date=`date`" -#END---------SlurmProxy.generateScript():slurmInitSingularity---------- +container_bindings="--bind /share/apps/vcell3/users/parest_data:/simdata " +container_bindings+="--bind /scratch/vcell:/solvertmp " +container_bindings+="--bind /share/apps/vcell3/htclogs:/htclogs " +container_bindings+="--bind /share/apps/vcell12/users:/share/apps/vcell12/users " +container_env="--env datadir_external=/share/apps/vcell3/users " +solver_docker_name=ghcr.io/virtualcell/vcell-opt:7.6.0.43 +solver_container_prefix="singularity run --containall ${container_bindings} ${container_env} docker://${solver_docker_name}" - cmd_prefix="$container_prefix" +cmd_prefix="$solver_container_prefix" echo "cmd_prefix is '${cmd_prefix}'" echo command = ${cmd_prefix} -${cmd_prefix} /simdata/CopasiParest_152878_optProblem.json /simdata/CopasiParest_152878_optRun.json /simdata/CopasiParest_152878_optReport.txt +${cmd_prefix} /simdata/CopasiParest_152878_optProblem.json /simdata/CopasiParest_152878_optRun.json /simdata/CopasiParest_152878_optReport.txt \ No newline at end of file diff --git a/vcell-server/src/test/resources/slurm_fixtures/runge_kutta_fehlberg/V_REL_274631114_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/runge_kutta_fehlberg/V_REL_274631114_0_0.slurm.sub index f7e55e5650..ada20fa438 100644 --- a/vcell-server/src/test/resources/slurm_fixtures/runge_kutta_fehlberg/V_REL_274631114_0_0.slurm.sub +++ b/vcell-server/src/test/resources/slurm_fixtures/runge_kutta_fehlberg/V_REL_274631114_0_0.slurm.sub @@ -8,88 +8,50 @@ #SBATCH --mem=4096M #SBATCH --no-kill #SBATCH --no-requeue -# VCell SlurmProxy memory limit source=Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB +# VCell SlurmProxy memory limit source='Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB' -#BEGIN---------SlurmProxy.generateScript():slurmInitSingularity---------- -set -x - TMPDIR=/scratch/vcell -echo "using TMPDIR=$TMPDIR" if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi echo `hostname` - export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles - source /usr/share/Modules/init/bash - module load singularity/vcell-3.10.0 - +export SINGULARITY_CACHEDIR=/share/apps/vcell3/singularity/cachdir +export SINGULARITY_PULLFOLDER=/share/apps/vcell3/singularity/pullfolder echo "job running on host `hostname -f`" - echo "id is `id`" - -echo "bash version is `bash --version`" -date - echo ENVIRONMENT env -container_prefix= -if command -v singularity >/dev/null 2>&1; then - # - # Copy of singularity image will be downloaded if not found in /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - # - localSingularityImage=/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - if [ ! -e "$localSingularityImage" ]; then - echo "local singularity image $localSingularityImage not found, trying to download to hpc from "/share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - mkdir -p /state/partition1/singularityImages - singularitytempfile=$(mktemp -up /share/apps/vcell3/singularityImages) - flock -E 100 -n /tmp/vcellSingularityLock_Rel_Version_7.6.0_build_28.lock sh -c "cp /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img ${singularitytempfile} ; mv -n ${singularitytempfile} /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img" - theStatus=$? - if [ $theStatus -eq 100 ] - then - echo "lock in use, waiting for lock owner to copy singularityImage" - let c=0 - until [ -f $localSingularityImage ] - do - sleep 3 - let c=c+1 - if [ $c -eq 20 ] - then - echo "Exceeded wait time for lock owner to copy singularityImage" - break - fi - done - else - if [ $theStatus -eq 0 ] - then - echo copy succeeded - else - echo copy failed - fi - fi - rm -f ${singularitytempfile} - if [ ! -e "$localSingularityImage" ]; then - echo "Failed to copy $localSingularityImage to hpc from central" - exit 1 - else - echo successful copy from /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img to /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - fi - fi - container_prefix="singularity run --containall --bind /share/apps/vcell3/users:/simdata --bind /share/apps/vcell7/users:/simdata_secondary --bind /share/apps/vcell12/users:/share/apps/vcell12/users --bind /share/apps/vcell3/htclogs:/htclogs --bind /scratch/vcell:/solvertmp $localSingularityImage --env java_mem_Xmx=4096M --env jmshost_sim_internal=rke-wn-01.cam.uchc.edu --env jmsport_sim_internal=31618 --env jmsrestport_sim_internal=30163 --env jmsuser=clientUser --env jmspswd=dummy --env jmsblob_minsize=100000 --env mongodbhost_internal=rke-wn-01.cam.uchc.edu --env mongodbport_internal=30019 --env mongodb_database=test --env primary_datadir_external=/share/apps/vcell3/users --env secondary_datadir_external=/share/apps/vcell7/users --env htclogdir_external=/share/apps/vcell3/htclogs --env softwareVersion=Rel_Version_7.6.0_build_28 --env serverid=REL " -else - echo "Required singularity command not found (maybe 'module load singularity/vcell-3.10.0' command didn't work) " - exit 1 -fi -echo "container_prefix is '${container_prefix}'" -echo "3 date=`date`" -#END---------SlurmProxy.generateScript():slurmInitSingularity---------- +container_bindings="--bind /share/apps/vcell3/users:/simdata " +container_bindings+="--bind /share/apps/vcell7/users:/simdata_secondary " +container_bindings+="--bind /share/apps/vcell12/users:/share/apps/vcell12/users " +container_bindings+="--bind /share/apps/vcell3/htclogs:/htclogs " +container_bindings+="--bind /scratch/vcell:/solvertmp " +container_env="--env java_mem_Xmx=4096M " +container_env+="--env jmshost_sim_internal=rke-wn-01.cam.uchc.edu " +container_env+="--env jmsport_sim_internal=31618 " +container_env+="--env jmsrestport_sim_internal=30163 " +container_env+="--env jmsuser=clientUser " +container_env+="--env jmspswd=dummy " +container_env+="--env jmsblob_minsize=100000 " +container_env+="--env mongodbhost_internal=rke-wn-01.cam.uchc.edu " +container_env+="--env mongodbport_internal=30019 " +container_env+="--env mongodb_database=test " +container_env+="--env primary_datadir_external=/share/apps/vcell3/users " +container_env+="--env secondary_datadir_external=/share/apps/vcell7/users " +container_env+="--env htclogdir_external=/share/apps/vcell3/htclogs " +container_env+="--env softwareVersion=Rel_Version_7.6.0_build_28 " +container_env+="--env serverid=REL " +solver_docker_name=ghcr.io/virtualcell/vcell-batch:7.6.0.43 +solver_container_prefix="singularity run --containall ${container_bindings} ${container_env} docker://${solver_docker_name}" +batch_docker_name=ghcr.io/virtualcell/vcell-batch:7.6.0.43 +batch_container_prefix="singularity run --containall ${container_bindings} ${container_env} docker://${batch_docker_name}" + -#BEGIN---------SlurmProxy.generateScript():sendFailureMsg---------- sendFailureMsg() { - echo ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274631114 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg - ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274631114 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + ${batch_container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274631114 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg stat=$? if [[ $stat -ne 0 ]]; then echo 'failed to send error message, retcode=$stat' @@ -97,41 +59,23 @@ sendFailureMsg() { echo 'sent failure message' fi } -#END---------SlurmProxy.generateScript():sendFailureMsg---------- -#BEGIN---------SlurmProxy.generateScript():hasExitProcessor---------- + + callExitProcessor( ) { - echo exitCommand = ${container_prefix}JavaPostprocessor64 274631114 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274631114_0_0.slurm.sub - ${container_prefix}JavaPostprocessor64 274631114 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274631114_0_0.slurm.sub + ${batch_container_prefix} JavaPostprocessor64 274631114 schaff 17 0 0 $1 /htclogs/V_REL_274631114_0_0.slurm.sub } -#END---------SlurmProxy.generateScript():hasExitProcessor---------- -echo -echo "1 date=`date`" -echo -#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------JavaSimExe64 -echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/JavaSimExe64' which overrides container invocations" -nativeExe=/share/apps/vcell3/nativesolvers/JavaSimExe64 -if [ -e "${nativeExe}" ]; then - cmd_prefix="/share/apps/vcell3/nativesolvers/" -else - cmd_prefix="$container_prefix" -fi -echo "cmd_prefix is '${cmd_prefix}'" -echo "5 date=`date`" -echo command = ${cmd_prefix}JavaSimExe64 /share/apps/vcell3/users/schaff/SimID_274631114_0__0.simtask.xml /share/apps/vcell3/users/schaff - command="${cmd_prefix}JavaSimExe64 /share/apps/vcell3/users/schaff/SimID_274631114_0__0.simtask.xml /share/apps/vcell3/users/schaff " - $command + +${solver_container_prefix} JavaSimExe64 /simdata/schaff/SimID_274631114_0__0.simtask.xml /simdata/schaff stat=$? -echo ${cmd_prefix}JavaSimExe64 /share/apps/vcell3/users/schaff/SimID_274631114_0__0.simtask.xml /share/apps/vcell3/users/schaff returned $stat +echo returned $stat if [ $stat -ne 0 ]; then callExitProcessor $stat echo returning $stat to Slurm exit $stat fi -#END---------SlurmProxy.generateScript():ExecutableCommand----------JavaSimExe64 -callExitProcessor 0 - -#Following commands (if any) are read by JavaPostProcessor64 +callExitProcessor 0 +#Following commands (if any) are read by JavaPostProcessor64 \ No newline at end of file diff --git a/vcell-server/src/test/resources/slurm_fixtures/smoldyn/V_REL_274630052_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/smoldyn/V_REL_274630052_0_0.slurm.sub index 4b4b0aefea..8fb09c12e8 100644 --- a/vcell-server/src/test/resources/slurm_fixtures/smoldyn/V_REL_274630052_0_0.slurm.sub +++ b/vcell-server/src/test/resources/slurm_fixtures/smoldyn/V_REL_274630052_0_0.slurm.sub @@ -8,88 +8,50 @@ #SBATCH --mem=4096M #SBATCH --no-kill #SBATCH --no-requeue -# VCell SlurmProxy memory limit source=Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB +# VCell SlurmProxy memory limit source='Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB' -#BEGIN---------SlurmProxy.generateScript():slurmInitSingularity---------- -set -x - TMPDIR=/scratch/vcell -echo "using TMPDIR=$TMPDIR" if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi echo `hostname` - export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles - source /usr/share/Modules/init/bash - module load singularity/vcell-3.10.0 - +export SINGULARITY_CACHEDIR=/share/apps/vcell3/singularity/cachdir +export SINGULARITY_PULLFOLDER=/share/apps/vcell3/singularity/pullfolder echo "job running on host `hostname -f`" - echo "id is `id`" - -echo "bash version is `bash --version`" -date - echo ENVIRONMENT env -container_prefix= -if command -v singularity >/dev/null 2>&1; then - # - # Copy of singularity image will be downloaded if not found in /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - # - localSingularityImage=/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - if [ ! -e "$localSingularityImage" ]; then - echo "local singularity image $localSingularityImage not found, trying to download to hpc from "/share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - mkdir -p /state/partition1/singularityImages - singularitytempfile=$(mktemp -up /share/apps/vcell3/singularityImages) - flock -E 100 -n /tmp/vcellSingularityLock_Rel_Version_7.6.0_build_28.lock sh -c "cp /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img ${singularitytempfile} ; mv -n ${singularitytempfile} /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img" - theStatus=$? - if [ $theStatus -eq 100 ] - then - echo "lock in use, waiting for lock owner to copy singularityImage" - let c=0 - until [ -f $localSingularityImage ] - do - sleep 3 - let c=c+1 - if [ $c -eq 20 ] - then - echo "Exceeded wait time for lock owner to copy singularityImage" - break - fi - done - else - if [ $theStatus -eq 0 ] - then - echo copy succeeded - else - echo copy failed - fi - fi - rm -f ${singularitytempfile} - if [ ! -e "$localSingularityImage" ]; then - echo "Failed to copy $localSingularityImage to hpc from central" - exit 1 - else - echo successful copy from /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img to /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img - fi - fi - container_prefix="singularity run --containall --bind /share/apps/vcell3/users:/simdata --bind /share/apps/vcell7/users:/simdata_secondary --bind /share/apps/vcell12/users:/share/apps/vcell12/users --bind /share/apps/vcell3/htclogs:/htclogs --bind /scratch/vcell:/solvertmp $localSingularityImage --env java_mem_Xmx=4096M --env jmshost_sim_internal=rke-wn-01.cam.uchc.edu --env jmsport_sim_internal=31618 --env jmsrestport_sim_internal=30163 --env jmsuser=clientUser --env jmspswd=dummy --env jmsblob_minsize=100000 --env mongodbhost_internal=rke-wn-01.cam.uchc.edu --env mongodbport_internal=30019 --env mongodb_database=test --env primary_datadir_external=/share/apps/vcell3/users --env secondary_datadir_external=/share/apps/vcell7/users --env htclogdir_external=/share/apps/vcell3/htclogs --env softwareVersion=Rel_Version_7.6.0_build_28 --env serverid=REL " -else - echo "Required singularity command not found (maybe 'module load singularity/vcell-3.10.0' command didn't work) " - exit 1 -fi -echo "container_prefix is '${container_prefix}'" -echo "3 date=`date`" -#END---------SlurmProxy.generateScript():slurmInitSingularity---------- +container_bindings="--bind /share/apps/vcell3/users:/simdata " +container_bindings+="--bind /share/apps/vcell7/users:/simdata_secondary " +container_bindings+="--bind /share/apps/vcell12/users:/share/apps/vcell12/users " +container_bindings+="--bind /share/apps/vcell3/htclogs:/htclogs " +container_bindings+="--bind /scratch/vcell:/solvertmp " +container_env="--env java_mem_Xmx=4096M " +container_env+="--env jmshost_sim_internal=rke-wn-01.cam.uchc.edu " +container_env+="--env jmsport_sim_internal=31618 " +container_env+="--env jmsrestport_sim_internal=30163 " +container_env+="--env jmsuser=clientUser " +container_env+="--env jmspswd=dummy " +container_env+="--env jmsblob_minsize=100000 " +container_env+="--env mongodbhost_internal=rke-wn-01.cam.uchc.edu " +container_env+="--env mongodbport_internal=30019 " +container_env+="--env mongodb_database=test " +container_env+="--env primary_datadir_external=/share/apps/vcell3/users " +container_env+="--env secondary_datadir_external=/share/apps/vcell7/users " +container_env+="--env htclogdir_external=/share/apps/vcell3/htclogs " +container_env+="--env softwareVersion=Rel_Version_7.6.0_build_28 " +container_env+="--env serverid=REL " +solver_docker_name=ghcr.io/virtualcell/vcell-fvsolver:v0.9.4 +solver_container_prefix="singularity run --containall ${container_bindings} ${container_env} docker://${solver_docker_name}" +batch_docker_name=ghcr.io/virtualcell/vcell-batch:7.6.0.43 +batch_container_prefix="singularity run --containall ${container_bindings} ${container_env} docker://${batch_docker_name}" + -#BEGIN---------SlurmProxy.generateScript():sendFailureMsg---------- sendFailureMsg() { - echo ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274630052 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg - ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274630052 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + ${batch_container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274630052 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg stat=$? if [[ $stat -ne 0 ]]; then echo 'failed to send error message, retcode=$stat' @@ -97,68 +59,32 @@ sendFailureMsg() { echo 'sent failure message' fi } -#END---------SlurmProxy.generateScript():sendFailureMsg---------- -#BEGIN---------SlurmProxy.generateScript():hasExitProcessor---------- + + callExitProcessor( ) { - echo exitCommand = ${container_prefix}JavaPostprocessor64 274630052 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274630052_0_0.slurm.sub - ${container_prefix}JavaPostprocessor64 274630052 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274630052_0_0.slurm.sub + ${batch_container_prefix} JavaPostprocessor64 274630052 schaff 17 0 0 $1 /htclogs/V_REL_274630052_0_0.slurm.sub } -#END---------SlurmProxy.generateScript():hasExitProcessor---------- -echo -echo -#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 -echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/JavaPreprocessor64' which overrides container invocations" -nativeExe=/share/apps/vcell3/nativesolvers/JavaPreprocessor64 -if [ -e "${nativeExe}" ]; then - cmd_prefix="/share/apps/vcell3/nativesolvers/" -else - cmd_prefix="$container_prefix" -fi -echo "cmd_prefix is '${cmd_prefix}'" -echo "5 date=`date`" -echo command = ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274630052_0__0.simtask.xml /share/apps/vcell3/users/schaff - command="${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274630052_0__0.simtask.xml /share/apps/vcell3/users/schaff " - $command + + +${batch_container_prefix} JavaPreprocessor64 /simdata/schaff/SimID_274630052_0__0.simtask.xml /simdata/schaff stat=$? -echo ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274630052_0__0.simtask.xml /share/apps/vcell3/users/schaff returned $stat +echo returned $stat if [ $stat -ne 0 ]; then callExitProcessor $stat echo returning $stat to Slurm exit $stat fi -#END---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 -echo "1 date=`date`" -echo -#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------/usr/local/app/localsolvers/linux64/smoldyn_x64 -echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/smoldyn_x64' which overrides container invocations" -nativeExe=/share/apps/vcell3/nativesolvers/smoldyn_x64 -if [ -e "${nativeExe}" ]; then - cmd_prefix="/share/apps/vcell3/nativesolvers/" -else - cmd_prefix="$container_prefix" -fi -echo "cmd_prefix is '${cmd_prefix}'" -echo "5 date=`date`" -echo command = ${cmd_prefix}smoldyn_x64 /share/apps/vcell3/users/schaff/SimID_274630052_0_.smoldynInput -tid 0 -if [ -z ${LD_LIBRARY_PATH+x} ]; then - export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64 -else - export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64:$LD_LIBRARY_PATH -fi - command="${cmd_prefix}smoldyn_x64 /share/apps/vcell3/users/schaff/SimID_274630052_0_.smoldynInput -tid 0 " - $command +${solver_container_prefix} smoldyn_x64 /simdata/schaff/SimID_274630052_0_.smoldynInput -tid 0 stat=$? -echo ${cmd_prefix}smoldyn_x64 /share/apps/vcell3/users/schaff/SimID_274630052_0_.smoldynInput -tid 0 returned $stat +echo returned $stat if [ $stat -ne 0 ]; then callExitProcessor $stat echo returning $stat to Slurm exit $stat fi -#END---------SlurmProxy.generateScript():ExecutableCommand----------smoldyn_x64 -callExitProcessor 0 - -#Following commands (if any) are read by JavaPostProcessor64 +callExitProcessor 0 +#Following commands (if any) are read by JavaPostProcessor64 \ No newline at end of file