Skip to content

Commit

Permalink
yes
Browse files Browse the repository at this point in the history
  • Loading branch information
oldshensheep committed Jul 22, 2023
1 parent 0201dba commit bc0a145
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 15 deletions.
26 changes: 15 additions & 11 deletions v2ex_scrapy/spiders/V2exMemberSpider.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import scrapy
import scrapy.http.response.html
from scrapy.spidermiddlewares.httperror import HttpError

from v2ex_scrapy import v2ex_parser
from v2ex_scrapy.DB import DB
Expand All @@ -9,11 +10,11 @@
class V2exTopicSpider(scrapy.Spider):
name = "v2ex-member"

def __init__(self, name=None, **kwargs):
super().__init__(name, **kwargs)
def __init__(self, start_id=1, end_id=635000, *args, **kwargs):
super().__init__(*args, **kwargs)
self.db = DB()
self.start_id = 1
self.end_id = 635000
self.start_id = start_id
self.end_id = end_id
self.logger.info(f"start from topic id {self.start_id}, end at {self.end_id}")

def start_requests(self):
Expand All @@ -25,17 +26,20 @@ def start_requests(self):
errback=self.member_err,
cb_kwargs={"uid": i},
)
else:
self.logger.info(f"skip member id:{i}, because it exists")

def parse(self, response: scrapy.http.response.html.HtmlResponse, uid: int):
for i in v2ex_parser.parse_member(response):
i.uid = uid
yield i

def member_err(self, failure):
yield MemberItem(
username="",
avatar_url="",
create_at=0,
social_link=[],
uid=failure.request.cb_kwargs["uid"],
)
if failure.check(HttpError):
yield MemberItem(
username="",
avatar_url="",
create_at=0,
social_link=[],
uid=failure.request.cb_kwargs["uid"],
)
2 changes: 1 addition & 1 deletion v2ex_scrapy/spiders/V2exNodeTopicSpider.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from v2ex_scrapy import utils


class V2exTopicSpider(scrapy.Spider):
class V2exNodeTopicSpider(scrapy.Spider):
name = "v2ex-node"

UPDATE_TOPIC_WHEN_REPLY_CHANGE = True
Expand Down
6 changes: 3 additions & 3 deletions v2ex_scrapy/spiders/V2exSpider.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
from v2ex_scrapy.spiders.CommonSpider import CommonSpider


class V2exTopicSpider(scrapy.Spider):
class V2exSpider(scrapy.Spider):
name = "v2ex"
FORCE_UPDATE_TOPIC = False
UPDATE_COMMENT = True

def __init__(self, name=None, **kwargs):
super().__init__(name, **kwargs)
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.db = DB()
self.start_id = 1
self.end_id = 1000000
Expand Down

0 comments on commit bc0a145

Please sign in to comment.