-
Notifications
You must be signed in to change notification settings - Fork 0
/
Main.py
54 lines (40 loc) · 1.46 KB
/
Main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import Scraper
import json
from multiprocessing import Pool
from itertools import chain
main_dict = {}
def keyword_search(query,amount):
pmc_ids= [pmc for pmc in Scraper.esearch(query, amount)]
alltext = [i for i in Scraper.text_grab_multiple(pmc_ids)]
keywords = [i.lower() for i in Scraper.get_continuous_chunks(" ".join(alltext), query)]
return keywords[:7]
def sub_search(keyword):
kwds = [i for i in keyword_search(keyword, '20')]
return kwds
def json_networker(dict):
nodes, links = (set() for i in range(2))
for k,v in dict.items():
nodes.add(k)
for i in v:
nodes.add(i)
links.add((k,i))
data = {}
data['nodes'] = [{"id":node} for node in nodes]
data['links'] = [{"source":link[0],"target":link[1],"value":10} for link in links]
with open('static/networks/network.json','w') as f:
json.dump(data,f)
def main_scraper(topic):
data = {}
data[topic] = keyword_search(topic,'25')
#scraping concurrency for keyword sub-searches
with Pool() as p:
results = p.map(sub_search, data[topic])
for i,j in enumerate(data[topic]):
data[j] = results[i]
# for i in data[topic]:
# data[i] = [i for i in keyword_search(i,'10')]
# all_nodes = ' '.join([k+' '+' '.join(v) for k,v in data.items()])
# for j in data[i]:
# if (j in all_nodes) or (j in all_nodes+'s'):
# data[i].remove(j)
json_networker(data)