Merge pull request #128 from afermg/fix-ci

ci: change zenodo root id, fix hash check.
jump-cellpainting · Oct 4, 2024 · 074a8a8 · 074a8a8
2 parents 1c2e38f + d96e4e1
commit 074a8a8
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 5 deletions.
diff --git a/README.md b/README.md
@@ -24,6 +24,8 @@ This collection comprises 4 datasets:
 - Different subsets of data in the principal dataset, assembled into single parquet files. The URLs to the subsets are [here](https://github.com/jump-cellpainting/datasets/blob/main/manifests/profile_index.csv). The corresponding folders for each contain all the data levels (e.g. this [folder](https://cellpainting-gallery.s3.amazonaws.com/index.html#cpg0016-jump-assembled/source_all/workspace/profiles/jump-profiling-recipe_2024_a917fa7/ORF/profiles_wellpos_cc_var_mad_outlier_featselect_sphering_harmony/)). Snakemake workflows for producing these assembled profiles are available [here](https://github.com/broadinstitute/jump-profiling-recipe/releases/tag/v0.1.0). We recommend working with the the `all` or `all_interpretable` subsets -- they contain all three data modalities in single dataframe. Note that cross-modality matching is still poor (ORF-CRISPR, COMPOUND-CRISPR, COMPOUND-ORF), but within modality generally works well. 
 - A [tutorial](https://broadinstitute.github.io/2023_12_JUMP_data_only_vignettes/howto/1_retrieve_profiles.html) to load these subsets of data.
 - Other [tutorials](https://broad.io/jump) to work with `cpg0016`.
+- The datasets and their DOI can be found on this [Zenodo](https://zenodo.org/records/13892061/latest) record.
+- Multiple datasets of interest for JUMP are available on our [Zenodo](https://zenodo.org/communities/broad-imaging/records?q=&l=list&p=1&s=10&sort=newest) community.
 
 ### What’s coming up
 

diff --git a/manifests/src/upload_index.sh b/manifests/src/upload_index.sh
@@ -1,8 +1,9 @@
 # Find the latest version of the dataset
 ZENODO_ENDPOINT="https://zenodo.org"
 DEPOSITION_PREFIX="${ZENODO_ENDPOINT}/api/deposit/depositions"
-ORIGINAL_ID="13146273"
+ORIGINAL_ID="13892061"
 FILE_TO_VERSION="manifests/profile_index.csv"
+FILENAME=$(echo ${FILE_TO_VERSION} | sed 's+.*/++g')
 
 echo "Checking that S3 ETags match their local counterpart"
 S3_ETAGS=$(cat ${FILE_TO_VERSION} | tail -n +2 | cut -f2 -d',' | xargs -I {} -- curl -I --silent "{}" | grep ETag | awk '{print $2}' | sed 's/\r$//' | md5sum | cut -f1 -d" ")
@@ -27,7 +28,7 @@ else # Update existing dataset
     LOCAL_HASH=$(md5sum ${FILE_TO_VERSION} | cut -f1 -d" ")
 
     echo "Checking for changes in file contents: Remote ${REMOTE_HASH} vs Local ${LOCAL_HASH}"
-    if [ "${REMOTE_HASH}" = "${LOCAL_HASH}" ]; then
+    if [ "${REMOTE_HASH}" == "${LOCAL_HASH}" ]; then
 	echo "The urls and md5sums have not changed"
 	exit 0
     fi
@@ -54,7 +55,7 @@ DEPOSITION=$(curl -H "Content-Type: application/json" \
 echo "New deposition ID is ${DEPOSITION}"
 
 # Variables
-BUCKET_DATA=$(curl "${DEPOSITION_PREFIX}/$DEPOSITION?access_token=$ZENODO_TOKEN")
+BUCKET_DATA=$(curl "${DEPOSITION_PREFIX}/${DEPOSITION}?access_token=${ZENODO_TOKEN}")
 BUCKET=$(echo "${BUCKET_DATA}" | jq --raw-output .links.bucket)
 
 if [ "${BUCKET}" = "null" ]; then
@@ -64,10 +65,11 @@ if [ "${BUCKET}" = "null" ]; then
 fi
 
 # Upload file
-echo "Uploading file to bucket ${BUCKET}"
+echo "Uploading file ${FILE_TO_VERSION} to bucket ${BUCKET}"
+cat ${FILE_TO_VERSION}
 curl -o /dev/null \
      --upload-file ${FILE_TO_VERSION} \
-     ${BUCKET}/${FILE_TO_VERSION}?access_token="${ZENODO_TOKEN}"
+     ${BUCKET}/${FILENAME}?access_token="${ZENODO_TOKEN}"
 
 
 # Upload Metadata