-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
37 lines (32 loc) · 1.36 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from nltk import *
import requests
from collections import Counter
import itertools
import re
from xml.etree.ElementTree import fromstring
ncbi_api = '9893ad891eedcd3802a273ea252798721e08'
def text_grab_multiple(pmcs):
pmcs_string = ','.join(pmcs)
abstract_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id='
search_abstract = abstract_url + pmcs_string + '&retmode=report_type&rettype=medline&api_key='+ncbi_api
r = requests.get(search_abstract).text
records = [i for i in r.split('PMC - PMC')]
abstracts = [i[i.find('AB') + 6:i.find('FAU')] for i in records]
for i in abstracts[1:]:
print(i)
def record_grab_multiple(pmcs):
# Retrieves the Date, Authors, Title and DOI from the API's XML report
pmcs_string = ','.join(pmcs)
base_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pmc&id='
search = base_url + pmcs_string+ '&retmode=report_type&rettype=xml&api_key='+ncbi_api
r = requests.get(search, stream=True)
tree = fromstring(r.content)
pmc_data = dict.fromkeys(pmcs)
for i,j in enumerate(list(pmc_data)):
root = tree[i]
pmc_data[j] = {'Date': root[1].text, 'Authors': [author.text for author in root[4]],
'Title': root[5].text.title(), 'DOI': root[10].text}
#
#
print(pmc_data)
record_grab_multiple(['2805706', '2805708'])