-
Notifications
You must be signed in to change notification settings - Fork 0
/
stats_by_game.py
44 lines (32 loc) · 1.17 KB
/
stats_by_game.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import scrapy
import collections
class CricbaySpider(scrapy.Spider):
name = 'cricbay_spider'
start_urls = [
'http://www.cricbay.com/a_teamroster.asp?teamId=192&seasonEventId=61&cbPID='
]
players = dict()
games = {}
def parse_player_page(self, response):
name = str(response.css('h1::text').extract_first())
no_of_matches = 0
odd_games = response.css('tr.cb-oddRow') or []
even_games = response.css('tr.cb-evenRow') or []
games_played = odd_games + even_games
for game in games_played:
title = str(game.css('td')[3].css('a::text').extract_first())
if title == 'None':
continue
if title not in self.games:
self.games[title] = [name]
else:
self.games[title].append(name)
self.players[str(name)] = int(no_of_matches)
yield self.games
def parse(self, response):
for player_link in response.css('div.cb-team-roster-grid > ul > li > h4 > a::attr(href)'):
print player_link
player_page = player_link.extract()
if player_page is not None:
player_page = response.urljoin(player_page)
yield scrapy.Request(player_page, callback=self.parse_player_page)