This repository has been archived by the owner on Jan 27, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
/
task_cal_influence.py
80 lines (62 loc) · 2.14 KB
/
task_cal_influence.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# -*- coding: utf-8 -*-
from underscore import _ as us
from pymongo import MongoClient
import operator
from multiprocessing import Pool
import gc
gc.disable()
def cal_influence_ranking((actor, records)):
return (actor, sum(filter(lambda x: x > 1, us.pluck(records, 'pagerank'))))
def main():
client = MongoClient()
db = client['github']
pageranks = db['pageranks']
influences = db['influences']
def influence_ranks(spec):
pool = Pool(15)
ranks = sorted(
pool.map(cal_influence_ranking, us.groupBy(
list(pageranks.find(spec)), 'actor').items()),
key=operator.itemgetter(1), reverse=True)
pool.close()
pool.join()
return ranks
influence_specs = {
'General': {},
'JavaScript': {'language': 'JavaScript'},
'CSS': {'language': 'CSS'},
'Python': {'language': 'Python'},
'Ruby': {'language': 'Ruby'},
'Go': {'language': 'Go'},
'PHP': {'language': 'PHP'},
'Shell': {'language': 'Shell'},
'Perl': {'language': 'Perl'},
'Objective-C': {'language': 'Objective-C'},
'Swift': {'language': 'Swift'},
'Java': {'language': 'Java'},
'C++': {'language': 'C++'},
'C#': {'language': 'C#'},
'C': {'language': 'C'},
'Haskell': {'language': 'Haskell'},
'Scala': {'language': 'Scala'},
'Erlang': {'language': 'Erlang'},
'Clojure': {'language': 'Clojure'}
}
for field, spec in influence_specs.items():
influence = influences.find_one({'field': field})
if not influence:
ranks = influence_ranks(spec)
influences.insert({
'field': field,
'ranks': ranks[:1000]
})
else:
ranks = influence['ranks']
print '%s top 25:' % field
for i, (actor, pagerank) in enumerate(ranks[:25]):
print str(i + 1) + '.', actor, pagerank
print '%s top 25(hide score):' % field
for i, (actor, pagerank) in enumerate(ranks[:25]):
print str(i + 1) + '.', actor
if __name__ == '__main__':
main()