Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BibAuthority: people collection #162

Open
wants to merge 3 commits into
base: prod
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions modules/bibauthority/bin/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#
# This file is part of Invenio.
# Copyright (C) 2011 CERN.
# Copyright (C) 2011, 2012, 2013, 2014, 2015 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
Expand All @@ -16,4 +15,8 @@
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

bin_SCRIPTS = bibauthority_people

EXTRA_DIST = bibauthority_people.in

CLEANFILES = *~ *.tmp
166 changes: 166 additions & 0 deletions modules/bibauthority/bin/bibauthority_people
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
#!/usr/bin/env python

# This file is part of Invenio.
# Copyright (C) 2015 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# Invenio is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

"""BibAuthority CERN people collection command line interface.

Map all CERN LDAP records to MARC 21 authority records, write to XML files and
upload to CDS.

See bibsched tasklet bst_bibauthority_updater for updating the collection.

Usage:
$python bibauthority_people -h
"""

import argparse
import sys
from glob import glob
from os.path import isdir, join

from invenio.bibauthority_people_config import (
CFG_BIBAUTHORITY_LDAP_ATTRLIST, CFG_BIBAUTHORITY_LDAP_SEARCHFILTER)
from invenio.bibauthority_people_mapper import Mapper, MapperError
from invenio.bibauthority_people_utils import (
bibupload, export_json, get_data_from_json, UtilsError)
from invenio.ldap_cern import get_users_records_data, LDAPError


def load_json(parser, json_file):
"""Return data from JSON file."""
try:
return get_data_from_json(json_file)
except UtilsError as e:
parser.error(e)


def get_records(ldap_searchfilter=CFG_BIBAUTHORITY_LDAP_SEARCHFILTER,
ldap_attrlist=CFG_BIBAUTHORITY_LDAP_ATTRLIST):
"""Return user records from LDAP."""
records = []
try:
records = get_users_records_data(
ldap_searchfilter, ldap_attrlist, "utf-8")
except LDAPError as e:
print(e)
return records

usage = ("bibauthority_people.py [-h] [[-r RECORDSIZE] [-x FILE [-l FILE] "
"[-j FILE]]] [-i FILE [FILE ...]] [-c]")

parser = argparse.ArgumentParser(
description="Command line interface for the CERN people collection. Map "
"all CERN LDAP records to MARC 21 authority records, write "
"to XML files and upload to CDS.",
usage=usage)

group1 = parser.add_argument_group("Export")
group2 = parser.add_argument_group("Insertion to CDS")
group3 = parser.add_argument_group("Information")

group1.add_argument(
"-r",
"--recordsize",
dest="recordsize",
type=int,
default=500,
help="limit number of record elements for each XML file and has to be "
"used together with '-x' [default: %(default)d]. For unlimited "
"records use 0")
group1.add_argument(
"-x",
"--exportxml",
dest="exportxml",
type=str,
metavar="FILE",
help="export mapped CERN LDAP records to XML FILE(s). Number of records "
"each FILE is based on RECORDSIZE")
group1.add_argument(
"-m",
"--mapping",
dest="inspireids",
type=lambda f: load_json(parser, f),
metavar="FILE",
help="mapping dictionary {'CERN-ID': 'Inspire-ID', ...} stored in FILE "
"used for mapping the CERN-ID to Inspire-ID instead of using "
"ATLAS GLANCE. Works together with '-x'")
group1.add_argument(
"-j",
"--exportjson",
dest="exportjson",
type=str,
metavar="FILE",
help="export CERN LDAP records to a JSON-formatted FILE, recommended "
"using it together with '-x'")
group2.add_argument(
"-i",
"--insert",
dest="insert",
type=str,
nargs="+",
metavar="FILE",
help="insert XML FILE(s) to CDS using inveio.bibupload -i. '-i DIRECTORY' "
"will upload all XML files in the given DIRECTORY")
group3.add_argument(
"-c",
"--count",
dest="count",
action="store_true",
help="count all primary CERN LDAP records")

args = parser.parse_args()

if args.exportxml or args.exportjson:
records = get_records()
print("{0} records fetched from CERN LDAP".format(len(records)))

if args.exportxml:
try:
mapper = Mapper(mapping_inspire_ids=args.inspireids or None)
mapper.map_ldap_records(records)
mapper.write_marcxml(args.exportxml, args.recordsize)
except MapperError as e:
sys.stderr.write(e)
sys.exit(1)

if args.exportjson:
try:
export_json(records, args.exportjson)
except UtilsError as e:
sys.stderr.write(e)
sys.exit(1)

if args.insert:
if args.insert:
arg0 = args.insert[0]
# Upload XML files (located in directory arg0) to CDS
if isdir(arg0):
for f in glob(join(arg0, "*.xml")):
task_id = bibupload(f, "-i", "bibauthority-people-insert")
else:
for f in args.insert:
task_id = bibupload(f, "-i", "bibauthority-people-insert")
if task_id:
print("Task (identifier: {0}) is correctly enqueued".format(
task_id))
else:
print("Error: failed to enqueue task")

if args.count:
records = get_records(ldap_attrlist=['employeeID'])
print("{0} records found on CERN LDAP".format(len(records)))
80 changes: 80 additions & 0 deletions modules/bibauthority/lib/bibauthority_people_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# This file is part of Invenio.
# Copyright (C) 2015 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# Invenio is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

"""BibAuthority CERN people collection configuration file."""

from os.path import join

from invenio.config import CFG_CACHEDIR, CFG_TMPSHAREDDIR
from invenio.containerutils import LazyDict
from invenio.websubmit_author_sources.atlas_glance import query_author_source


def _atlas_glance_to_inspire_id():
return dict(
(author["cernccid"], author["inspireid"]) for
author in query_author_source("") if
author.get("cernccid") and author.get("inspireid"))


# LDAP search filter
CFG_BIBAUTHORITY_LDAP_SEARCHFILTER = \
r"(&(objectClass=*)(employeeType=Primary))"

# LDAP attribute list
# bibauthority_people_mapper contains the same attributes
CFG_BIBAUTHORITY_LDAP_ATTRLIST = [
"employeeID",
"givenName",
"sn",
"displayName",
"facsimileTelephoneNumber",
"telephoneNumber",
"mobile",
"mail",
"department",
"cernGroup",
"description",
"division",
"cernInstituteName",
"extensionAttribute11"]

# Stores CERN LDAP records
CFG_BIBAUTHORITY_RECORDS_JSON_FILE = join(CFG_CACHEDIR, "records.json")

# Stores updated MARC 21 authority records
CFG_BIBAUTHORITY_RECORDS_UPDATES_FILE = join(
CFG_TMPSHAREDDIR, "records_updates.xml")

# Prefix
CFG_BIBAUTHORITY_AUTHOR_CDS = "AUTHOR|(CDS)"

# Prefix used in MARC field 035__a
CFG_BIBAUTHORITY_AUTHOR_CERN = "AUTHOR|(SzGeCERN)"

# Prefix used in MARC field 035__a
CFG_BIBAUTHORITY_AUTHOR_INSPIRE = "AUTHOR|(INSPIRE)"

# Dictionary containing the mapping CERN-ID: Inspire-ID
CFG_BIBAUTHORITY_ATLAS_GLANCE_CERN_ID_TO_INSPIRE_ID_MAPPING = LazyDict(
_atlas_glance_to_inspire_id)

# Send email if duplicate Inspire-IDs found
CFG_BIBAUTHORITY_ATLAS_GLANCE_EMAIL_FROM = "[email protected]"

# Send email if duplicate Inspire-IDs found
CFG_BIBAUTHORITY_ATLAS_GLANCE_EMAIL_TO = "[email protected]"
Loading