Skip to content

Commit

Permalink
Forgotten mets (#2647)
Browse files Browse the repository at this point in the history
* add script to push METS records mentioned in a CSV

* Apply auto-formatting rules

* add source id script

---------

Co-authored-by: Buildkite on behalf of Wellcome Collection <[email protected]>
  • Loading branch information
paul-butcher and weco-bot authored May 17, 2024
1 parent f7893cc commit 3f740ca
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 0 deletions.
25 changes: 25 additions & 0 deletions mets_adapter/populate_mets/populate_mets_from_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import csv
import sys
from populate_mets import specific


# Push each Born Digital bag mentioned in a CSV through the adapter.
# The CSV should have a row corresponding to each Bag (Item) thus:
# Item, MY/BAG/1/2/3
# Item, MY/BAG/4/5/6
# More columns can be added, and rows starting with other values are ignored.


def main(csv_path):
with open(csv_path, "r") as csv_file:
specific(extract_bag_ids(csv.reader(csv_file)))


def extract_bag_ids(csv_reader):
for row in csv_reader:
if row[0] == "Item":
yield f"born-digital/{row[1]}"


if __name__ == "__main__":
main(sys.argv[1])
12 changes: 12 additions & 0 deletions scripts/es_source_identifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Given the identifier and source system name, return a GET request that can be
# used in the Elasticsearch console to fetch the record from the works-source index.

# It's basically just URL Encoding, but it can be a bit of a faff compared to ids in other indices
import sys
from urllib.parse import quote_plus

date = sys.argv[1]
source_system = sys.argv[2]
work_id = sys.argv[3]
doc_id = quote_plus(f"Work[{source_system}/{work_id.lower()}]")
print(f"GET works-source-{date}/_doc/{doc_id}")

0 comments on commit 3f740ca

Please sign in to comment.