Skip to content

Commit

Permalink
Merge branch 's3_package_zip'
Browse files Browse the repository at this point in the history
  • Loading branch information
pameyer committed Dec 5, 2018
2 parents 2381df4 + 925f3a2 commit 928c3df
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 8 deletions.
2 changes: 1 addition & 1 deletion rpm/dcm.spec
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Source: dcm-%{version}.tar.gz
BuildArch: noarch
BuildRoot: %{_tmppath}/%{name}-${version}
License: proprietary
Requires: python python-pip python-dateutil redis lighttpd openssh-server openssh-clients rsync perl-Digest-SHA m4 jq rssh sudo
Requires: python python-pip python-dateutil redis lighttpd openssh-server openssh-clients rsync perl-Digest-SHA m4 jq rssh sudo zip
%description
data capture module, deposition protocol rsync+ssh protocol

Expand Down
31 changes: 24 additions & 7 deletions scn/post_upload_s3.bash
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ DEPOSIT=/deposit
HOLD=/hold
retry_delay=60
SRC=/opt/dcm/
packageExt="zip"

S3HOLD=test-dcm

Expand Down Expand Up @@ -68,24 +69,40 @@ do
#move to HOLD location

if [ ! `aws s3 ls s3://${S3HOLD}/${ulidFromJson}/`]; then #this check is different than normal post_upload, we don't use the extra folder level
aws s3 cp --recursive ${DEPOSIT}/${ulidFolder}/${ulidFolder}/ s3://${S3HOLD}/${ulidFromJson}/ #this does not copy empty folders from DEPOSIT as folders do not actually exist in s3
packageName="package_$ulidFolder"


cd ${DEPOSIT}/${ulidFolder}/

#It would be awesome to someday zip everything while it is being streamed.
echo "beginning zip of ${DEPOSIT}/${ulidFolder}/${ulidFolder}/"
zip -r $packageName ${ulidFolder}/ #There are two layers of ${ulidFolder}
err=$?
if (( $err != 0 )) ; then
echo "dcm: file move $ulid"
echo "dcm: zip package $ulid"
break
fi
rm -rf ${DEPOSIT}/${ulidFolder}/${ulidFolder}

shasum ${packageName}.${packageExt} > ${packageName}.sha

aws s3 cp ${packageName}.${packageExt} s3://${S3HOLD}/${ulidFromJson}/ && aws s3 cp ${packageName}.sha s3://${S3HOLD}/${ulidFromJson}/
err=$?
if (( $err != 0 )) ; then
echo "dcm: aws file move $ulid"
break
fi
rm -rf ${ulidFolder}
rm -rf $packageName
echo "data moved"
tmpfile=/tmp/dcm_fail-$$.json # not caring that the success tmp file has "fail" in the name

#This may prove to be slow with large datasets
sz=`aws s3 ls --summarize --human-readable --recursive s3://${S3HOLD}/${ulidFromJson}/ | grep "Total Size: " | cut -d' ' -f 6`
sz=`aws s3 ls --summarize --human-readable s3://${S3HOLD}/${ulidFromJson}/${packageName}.${packageExt} | grep "Total Size: " | cut -d' ' -f 6`

echo "{\"status\":\"validation passed\",\"uploadFolder\":\"${ulidFromJson}\",\"totalSize\":$sz}" > $tmpfile


dvr=`curl -s --insecure -H "X-Dataverse-key: ${DVAPIKEY}" -H "Content-type: application/json" -X POST --upload-file $tmpfile "${DVHOST}/api/datasets/:persistentId/dataCaptureModule/checksumValidation?persistentId=doi:${DOI_SHOULDER}/${ulidFromJson}"`
dvst=`echo $dvr | jq -r .status`
dvst=`echo $dvr | jq -r .status` #jq errors when dataverse returns a 500 because it returns it as html

if [ "OK" != "$dvst" ]; then
#TODO - this block should email alerts queue
Expand Down Expand Up @@ -114,7 +131,7 @@ do

#Dataverse does not actually need this file so we aren't going to store it to s3
#aws s3 cp $DEPOSIT/processed/${ulidFolder}.json s3://${S3HOLD}/stage/

cd ../..
rm $DEPOSIT/processed/${ulidFolder}.json
#de-activate key (still in id_dsa.pub if we need it)
rm ${DEPOSIT}/${ulidFolder}/.ssh/authorized_keys
Expand Down

0 comments on commit 928c3df

Please sign in to comment.