-
Notifications
You must be signed in to change notification settings - Fork 1
/
crawl.py
97 lines (87 loc) · 3.13 KB
/
crawl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from ast import parse
import json
from types import NoneType
from bs4 import BeautifulSoup as bfs
import aiohttp
import typing
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36"
accept='text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9'
async def get(url:str)->str|None:
stimeout = aiohttp.ClientTimeout(total=None,sock_connect=0.2,sock_read=0.2)
try:
async with aiohttp.ClientSession() as session:
async with session.get(url,headers={
'user-agent':user_agent,
'accept':accept
}) as res:
data= await res.read()
return data.decode(encoding='utf-8')
except:
return None
async def expand_collection(url:str)->str|list[str]:
try:
root_url='https://www.planetminecraft.com'
data=await get(url)
root = bfs(data, 'html.parser')
root=root.find('ul',class_='resource_list grid')
root=root.find_all('a',class_='r-title')
data=[]
for atag in root:
data.append(root_url+atag['href'])
return data
except:
print(f'[Degug] Fail to crawl {url}')
return url
async def expand_datapack(url:str)->str|tuple[list[str]]:
try:
root_url='https://www.planetminecraft.com'
data=await get(url)
root = bfs(data, 'html.parser')
temp=root.find('ul',class_='content-actions').find_all('a')
data_packs=[root_url+d['href'] for d in temp]
temp=root.find('div',class_='content-actions')
if temp!=None:
temp=temp.find_all('a')
texture_packs=[root_url+d['href'] for d in temp]
else:
texture_packs=[]
return data_packs,texture_packs
except:
print(f'[Degug] Fail to crawl {url}')
return url
async def expand_texturepack(url:str)->str|list[str]:
try:
root_url='https://www.planetminecraft.com'
data=await get(url)
root = bfs(data, 'html.parser')
temp=root.find('ul',class_='content-actions').find_all('a')
return [root_url+d['href'] for d in temp]
except:
print(f'[Degug] Fail to crawl {url}')
return url
async def expand_mirror(url:str)->str|list[str]:
try:
data=await get(url)
return [json.loads(data)['forward_url']]
except:
print(f'[Degug] Fail to crawl {url}')
return url
async def expand_mediafire(url:str)->str|list[str]:
try:
data=await get(url)
root = bfs(data, 'html.parser')
return [root.find('a',class_='input popsok')['href']]
except:
print(f'[Degug] Fail to crawl {url}')
return url
async def expand_adfoc(url:str)->str|list[str]:
try:
data=await get(url)
root = bfs(data, 'html.parser')
return [root.find('a',class_='skip')['href']]
except:
print(f'[Degug] Fail to crawl {url}')
return url
async def expand_curseforge(url:str)->str|list[str]:
url+='file'if url[len(url)-1]=='/' else '/file'
return [url]