From 6b0c590722dc2054cac0190d85cdc1acc2150d74 Mon Sep 17 00:00:00 2001 From: matthew-a-dunlap Date: Thu, 1 Nov 2018 16:27:38 -0400 Subject: [PATCH 1/4] Zip package and create package sha --- scn/post_upload_s3.bash | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/scn/post_upload_s3.bash b/scn/post_upload_s3.bash index 8fb3b3b..026cad1 100755 --- a/scn/post_upload_s3.bash +++ b/scn/post_upload_s3.bash @@ -68,24 +68,47 @@ do #move to HOLD location if [ ! `aws s3 ls s3://${S3HOLD}/${ulidFromJson}/`]; then #this check is different than normal post_upload, we don't use the extra folder level - aws s3 cp --recursive ${DEPOSIT}/${ulidFolder}/${ulidFolder}/ s3://${S3HOLD}/${ulidFromJson}/ #this does not copy empty folders from DEPOSIT as folders do not actually exist in s3 + #MAD: Here we'll need to make the package a zip and upload it + #This probably will need a new checksum, I forget how we're doing that... + #will need to install zip + #yum install zip #not here + + #MAD: Should we be zipping to a different temp location? + packageName="package_$ulidFolder" + packageExt="zip" + #Am i correctly cleaning up my new files? + echo "beginning zip of ${DEPOSIT}/${ulidFolder}/${ulidFolder}/" + cd ${DEPOSIT}/${ulidFolder}/ + + #It would be awesome to someday zip everything while it is being streamed. + zip -r $packageName ${ulidFolder}/ #There are two layers of ${ulidFolder} + + + + #shasum $(find . -type f) > $checksumfile + shasum ${packageName}.${packageExt} > ${packageName}.sha + + echo "test: ${DEPOSIT}/${ulidFolder}/$packageName" + aws s3 cp ${packageName}.${packageExt} s3://${S3HOLD}/${ulidFromJson}/ #this does not copy empty folders from DEPOSIT as folders do not actually exist in s3 + aws s3 cp ${packageName}.sha s3://${S3HOLD}/${ulidFromJson}/ #this does not copy empty folders from DEPOSIT as folders do not actually exist in s3 err=$? if (( $err != 0 )) ; then echo "dcm: file move $ulid" break fi - rm -rf ${DEPOSIT}/${ulidFolder}/${ulidFolder} + rm -rf ${ulidFolder} + rm -rf $packageName echo "data moved" tmpfile=/tmp/dcm_fail-$$.json # not caring that the success tmp file has "fail" in the name - #This may prove to be slow with large datasets - sz=`aws s3 ls --summarize --human-readable --recursive s3://${S3HOLD}/${ulidFromJson}/ | grep "Total Size: " | cut -d' ' -f 6` + sz=`aws s3 ls --summarize --human-readable s3://${S3HOLD}/${ulidFromJson}/$packageName | grep "Total Size: " | cut -d' ' -f 6` echo "{\"status\":\"validation passed\",\"uploadFolder\":\"${ulidFromJson}\",\"totalSize\":$sz}" > $tmpfile + dvr=`curl -s --insecure -H "X-Dataverse-key: ${DVAPIKEY}" -H "Content-type: application/json" -X POST --upload-file $tmpfile "${DVHOST}/api/datasets/:persistentId/dataCaptureModule/checksumValidation?persistentId=doi:${DOI_SHOULDER}/${ulidFromJson}"` - dvst=`echo $dvr | jq -r .status` + dvst=`echo $dvr | jq -r .status` #jq errors when dataverse returns a 500 because it returns it as html if [ "OK" != "$dvst" ]; then #TODO - this block should email alerts queue @@ -114,7 +137,7 @@ do #Dataverse does not actually need this file so we aren't going to store it to s3 #aws s3 cp $DEPOSIT/processed/${ulidFolder}.json s3://${S3HOLD}/stage/ - + cd ../.. rm $DEPOSIT/processed/${ulidFolder}.json #de-activate key (still in id_dsa.pub if we need it) rm ${DEPOSIT}/${ulidFolder}/.ssh/authorized_keys From b51a6c64322d9935433bc859584e8486e527ac8f Mon Sep 17 00:00:00 2001 From: matthew-a-dunlap Date: Wed, 28 Nov 2018 13:29:17 -0500 Subject: [PATCH 2/4] install zip for s3package, comment clean --- rpm/dep/c6build.dockerfile | 2 +- rpm/dep/c7build.dockerfile | 2 +- scn/post_upload_s3.bash | 17 ++++------------- 3 files changed, 6 insertions(+), 15 deletions(-) diff --git a/rpm/dep/c6build.dockerfile b/rpm/dep/c6build.dockerfile index 69ef586..d60eb94 100644 --- a/rpm/dep/c6build.dockerfile +++ b/rpm/dep/c6build.dockerfile @@ -4,7 +4,7 @@ FROM centos:6 # starting centos 6 build image for DCM -RUN yum install -y rpm-build python-setuptools wget rpmdevtools +RUN yum install -y rpm-build python-setuptools wget rpmdevtools zip # need to match the jenkins uid/gid for this to work properly. # FIXME - this shouldn't be hard-coded diff --git a/rpm/dep/c7build.dockerfile b/rpm/dep/c7build.dockerfile index 99e99df..8a829d5 100644 --- a/rpm/dep/c7build.dockerfile +++ b/rpm/dep/c7build.dockerfile @@ -2,7 +2,7 @@ FROM centos:7 # starting centos 7 build image for DCM -RUN yum install -y rpm-build python-setuptools wget +RUN yum install -y rpm-build python-setuptools wget zip RUN useradd builder #USER builder diff --git a/scn/post_upload_s3.bash b/scn/post_upload_s3.bash index 026cad1..8bbd56a 100755 --- a/scn/post_upload_s3.bash +++ b/scn/post_upload_s3.bash @@ -68,29 +68,20 @@ do #move to HOLD location if [ ! `aws s3 ls s3://${S3HOLD}/${ulidFromJson}/`]; then #this check is different than normal post_upload, we don't use the extra folder level - #MAD: Here we'll need to make the package a zip and upload it - #This probably will need a new checksum, I forget how we're doing that... - #will need to install zip - #yum install zip #not here - - #MAD: Should we be zipping to a different temp location? packageName="package_$ulidFolder" packageExt="zip" - #Am i correctly cleaning up my new files? - echo "beginning zip of ${DEPOSIT}/${ulidFolder}/${ulidFolder}/" + cd ${DEPOSIT}/${ulidFolder}/ #It would be awesome to someday zip everything while it is being streamed. + echo "beginning zip of ${DEPOSIT}/${ulidFolder}/${ulidFolder}/" zip -r $packageName ${ulidFolder}/ #There are two layers of ${ulidFolder} - - - #shasum $(find . -type f) > $checksumfile shasum ${packageName}.${packageExt} > ${packageName}.sha echo "test: ${DEPOSIT}/${ulidFolder}/$packageName" - aws s3 cp ${packageName}.${packageExt} s3://${S3HOLD}/${ulidFromJson}/ #this does not copy empty folders from DEPOSIT as folders do not actually exist in s3 - aws s3 cp ${packageName}.sha s3://${S3HOLD}/${ulidFromJson}/ #this does not copy empty folders from DEPOSIT as folders do not actually exist in s3 + aws s3 cp ${packageName}.${packageExt} s3://${S3HOLD}/${ulidFromJson}/ + aws s3 cp ${packageName}.sha s3://${S3HOLD}/${ulidFromJson}/ err=$? if (( $err != 0 )) ; then From 18b2a10a3e65afe1093aaf0cb9d4f7b741feb090 Mon Sep 17 00:00:00 2001 From: matthew-a-dunlap Date: Tue, 4 Dec 2018 14:51:13 -0500 Subject: [PATCH 3/4] S3 PR Fixes, error handling etc --- rpm/dep/c6build.dockerfile | 2 +- rpm/dep/c7build.dockerfile | 2 +- scn/post_upload_s3.bash | 17 ++++++++++------- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/rpm/dep/c6build.dockerfile b/rpm/dep/c6build.dockerfile index d60eb94..69ef586 100644 --- a/rpm/dep/c6build.dockerfile +++ b/rpm/dep/c6build.dockerfile @@ -4,7 +4,7 @@ FROM centos:6 # starting centos 6 build image for DCM -RUN yum install -y rpm-build python-setuptools wget rpmdevtools zip +RUN yum install -y rpm-build python-setuptools wget rpmdevtools # need to match the jenkins uid/gid for this to work properly. # FIXME - this shouldn't be hard-coded diff --git a/rpm/dep/c7build.dockerfile b/rpm/dep/c7build.dockerfile index 8a829d5..99e99df 100644 --- a/rpm/dep/c7build.dockerfile +++ b/rpm/dep/c7build.dockerfile @@ -2,7 +2,7 @@ FROM centos:7 # starting centos 7 build image for DCM -RUN yum install -y rpm-build python-setuptools wget zip +RUN yum install -y rpm-build python-setuptools wget RUN useradd builder #USER builder diff --git a/scn/post_upload_s3.bash b/scn/post_upload_s3.bash index 8bbd56a..c58ac75 100755 --- a/scn/post_upload_s3.bash +++ b/scn/post_upload_s3.bash @@ -26,6 +26,7 @@ DEPOSIT=/deposit HOLD=/hold retry_delay=60 SRC=/opt/dcm/ +packageExt="zip" S3HOLD=test-dcm @@ -69,23 +70,25 @@ do if [ ! `aws s3 ls s3://${S3HOLD}/${ulidFromJson}/`]; then #this check is different than normal post_upload, we don't use the extra folder level packageName="package_$ulidFolder" - packageExt="zip" + cd ${DEPOSIT}/${ulidFolder}/ #It would be awesome to someday zip everything while it is being streamed. echo "beginning zip of ${DEPOSIT}/${ulidFolder}/${ulidFolder}/" zip -r $packageName ${ulidFolder}/ #There are two layers of ${ulidFolder} + err=$? + if (( $err != 0 )) ; then + echo "dcm: zip package $ulid" + break + fi shasum ${packageName}.${packageExt} > ${packageName}.sha - echo "test: ${DEPOSIT}/${ulidFolder}/$packageName" - aws s3 cp ${packageName}.${packageExt} s3://${S3HOLD}/${ulidFromJson}/ - aws s3 cp ${packageName}.sha s3://${S3HOLD}/${ulidFromJson}/ - + aws s3 cp ${packageName}.${packageExt} s3://${S3HOLD}/${ulidFromJson}/ && aws s3 cp ${packageName}.sha s3://${S3HOLD}/${ulidFromJson}/ err=$? if (( $err != 0 )) ; then - echo "dcm: file move $ulid" + echo "dcm: aws file move $ulid" break fi rm -rf ${ulidFolder} @@ -93,7 +96,7 @@ do echo "data moved" tmpfile=/tmp/dcm_fail-$$.json # not caring that the success tmp file has "fail" in the name - sz=`aws s3 ls --summarize --human-readable s3://${S3HOLD}/${ulidFromJson}/$packageName | grep "Total Size: " | cut -d' ' -f 6` + sz=`aws s3 ls --summarize --human-readable s3://${S3HOLD}/${ulidFromJson}/${packageName}.${packageExt} | grep "Total Size: " | cut -d' ' -f 6` echo "{\"status\":\"validation passed\",\"uploadFolder\":\"${ulidFromJson}\",\"totalSize\":$sz}" > $tmpfile From 925f3a20ec1a9454a5896909723eb7e925381f54 Mon Sep 17 00:00:00 2001 From: matthew-a-dunlap Date: Tue, 4 Dec 2018 15:48:03 -0500 Subject: [PATCH 4/4] Forgotten rpm.spec for s3 --- rpm/dcm.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpm/dcm.spec b/rpm/dcm.spec index c15c238..a2e7d57 100644 --- a/rpm/dcm.spec +++ b/rpm/dcm.spec @@ -6,7 +6,7 @@ Source: dcm-%{version}.tar.gz BuildArch: noarch BuildRoot: %{_tmppath}/%{name}-${version} License: proprietary -Requires: python python-pip python-dateutil redis lighttpd openssh-server openssh-clients rsync perl-Digest-SHA m4 jq rssh sudo +Requires: python python-pip python-dateutil redis lighttpd openssh-server openssh-clients rsync perl-Digest-SHA m4 jq rssh sudo zip %description data capture module, deposition protocol rsync+ssh protocol