-
-
Notifications
You must be signed in to change notification settings - Fork 815
/
global_scraper.py
67 lines (63 loc) · 2.27 KB
/
global_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from parsers import *
from cleaners import *
from getters import *
from collector import collect_gw, merge_gw
from understat import parse_epl_data
import csv
def parse_data():
""" Parse and store all the data
"""
season = '2024-25'
base_filename = 'data/' + season + '/'
print("Getting data")
data = get_data()
print("Parsing summary data")
parse_players(data["elements"], base_filename)
xPoints = []
for e in data["elements"]:
xPoint = {}
xPoint['id'] = e['id']
xPoint['xP'] = e['ep_this']
xPoints += [xPoint]
gw_num = 0
events = data["events"]
for event in events:
if event["is_current"] == True:
gw_num = event["id"]
print("Cleaning summary data")
clean_players(base_filename + 'players_raw.csv', base_filename)
print("Getting fixtures data")
fixtures(base_filename)
print("Getting teams data")
parse_team_data(data["teams"], base_filename)
print("Extracting player ids")
id_players(base_filename + 'players_raw.csv', base_filename)
player_ids = get_player_ids(base_filename)
num_players = len(data["elements"])
player_base_filename = base_filename + 'players/'
gw_base_filename = base_filename + 'gws/'
print("Extracting player specific data")
for i,name in player_ids.items():
player_data = get_individual_player_data(i)
parse_player_history(player_data["history_past"], player_base_filename, name, i)
parse_player_gw_history(player_data["history"], player_base_filename, name, i)
if gw_num > 0:
print("Writing expected points")
with open(os.path.join(gw_base_filename, 'xP' + str(gw_num) + '.csv'), 'w+') as outf:
w = csv.DictWriter(outf, ['id', 'xP'])
w.writeheader()
for xp in xPoints:
w.writerow(xp)
print("Collecting gw scores")
collect_gw(gw_num, player_base_filename, gw_base_filename, base_filename)
print("Merging gw scores")
merge_gw(gw_num, gw_base_filename)
understat_filename = base_filename + 'understat'
parse_epl_data(understat_filename)
def fixtures(base_filename):
data = get_fixtures_data()
parse_fixtures(data, base_filename)
def main():
parse_data()
if __name__ == "__main__":
main()