Skip to content

jhqwan/Qiushibaike-py3

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

2 Commits
 
 
 
 
 
 

Repository files navigation

Qiushibaike-py3

author = 'hzwer'

-- coding: utf-8 --

import re from urllib.request import *

class Spider: def init(self): self.page = 1 # 记录访问的页码 self.user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36r)' # 伪装浏览器君 self.headers = {'User-Agent': self.user_agent} self.stories = [] # 存储段子

def get_stories(self):
    try:
        url = 'http://www.qiushibaike.com/hot/page/' + str(self.page)
        request = Request(url, headers = self.headers)
        # 构建request
        self.page += 1
        # 翻页
        response = urlopen(request)
        content = response.read().decode("UTF-8").replace("<br/>", "\n")
        pattern = re.compile('alt="(.*?)".*?"content">\n(.*?)<!--(.*?)"number">(.*?)</i> 好笑', re.S)
        # 作者, 可能存在的图片信息, 内容, 赞数
        self.stories = re.findall(pattern, content)
        # 正则表达式匹配
        
    except URLError as e:
        if hasattr(e, "reason"):
            print ("获取失败,错误原因", e.reason) 
            # 错误信息
            return None
def start(self):
    print ("{:^70}".format('正在读取糗事百科'))
    while True:
        self.get_stories()
        # 获取一页段子
        for story in self.stories:
            # 遍历段子
            if not re.search('img', story[2]):
            # 去除带图段子
                Input = input("{:^70}".format('回车查看新段子, Q 键退出程序\n'))
                # 用户键入
                if Input is 'Q' or Input is 'q':
                    print ("{:^70}".format('再见'))
                    return
                print ('{:^70}'.format('第{}页 作者:{} 赞数{}').format(self.page-1, story[0], story[3]))
                print ('{}\n'.format(story[1]))
        print ("{:^70}".format('翻个页 TwT'))

spider = Spider() spider.start()

About

No description, website, or topics provided.

Resources

License

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published