Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed search failure due to unexpected parser state #300

Merged
merged 2 commits into from
Oct 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 9 additions & 11 deletions nova3/engines/limetorrents.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#VERSION: 4.8
#VERSION: 4.9
# AUTHORS: Lima66
# CONTRIBUTORS: Diego de las Heras ([email protected])

Expand Down Expand Up @@ -38,7 +38,7 @@ def __init__(self, url):
HTMLParser.__init__(self)
self.url = url
self.current_item = {} # dict for found item
self.page_empty = 22000
self.page_items = 0
self.inside_table = False
self.inside_tr = False
self.column_index = -1
Expand Down Expand Up @@ -112,6 +112,7 @@ def handle_endtag(self, tag):
self.column_name = None
if "link" in self.current_item:
prettyPrinter(self.current_item)
self.page_items += 1

def download_torrent(self, info):
# since limetorrents provides torrent links in itorrent (cloudflare protected),
Expand All @@ -128,14 +129,11 @@ def search(self, query, cat='all'):
query = query.replace("%20", "-")
category = self.supported_categories[cat]

parser = self.MyHtmlParser(self.url)
page = 1
while True:
page_url = "{0}/search/{1}/{2}/seeds/{3}/".format(self.url, category, query, page)
for page in range(1, 5):
page_url = f"{self.url}/search/{category}/{query}/seeds/{page}/"
html = retrieve_url(page_url)
lunghezza_html = len(html)
if page > 6 or lunghezza_html <= parser.page_empty:
return
parser = self.MyHtmlParser(self.url)
parser.feed(html)
page += 1
parser.close()
parser.close()
if parser.page_items < 20:
break
30 changes: 7 additions & 23 deletions nova3/engines/solidtorrents.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# VERSION: 2.3
# VERSION: 2.4
# AUTHORS: nKlido

# LICENSING INFORMATION
Expand All @@ -24,7 +24,6 @@
from novaprinter import prettyPrinter
from html.parser import HTMLParser
from datetime import datetime
import math


class solidtorrents(object):
Expand All @@ -47,8 +46,6 @@ def __init__(self, url):
self.parseDate = False
self.column = 0
self.torrentReady = False
self.foundSearchStats = False
self.parseTotalResults = False
self.totalResults = 0

self.torrent_info = self.empty_torrent_info()
Expand All @@ -68,13 +65,6 @@ def empty_torrent_info(self):
def handle_starttag(self, tag, attrs):
params = dict(attrs)

if 'search-stats' in params.get('class', ''):
self.foundSearchStats = True

if (self.foundSearchStats and tag == 'b'):
self.parseTotalResults = True
self.foundSearchStats = False

if 'search-result' in params.get('class', ''):
self.foundResult = True
return
Expand Down Expand Up @@ -115,13 +105,10 @@ def handle_endtag(self, tag):
prettyPrinter(self.torrent_info)
self.torrentReady = False
self.torrent_info = self.empty_torrent_info()
self.totalResults += 1

def handle_data(self, data):

if (self.parseTotalResults):
self.totalResults = int(data.strip())
self.parseTotalResults = False

if (self.parseTitle):
if (bool(data.strip()) and data != '\n'):
self.torrent_info['name'] = data
Expand Down Expand Up @@ -161,12 +148,9 @@ def request(self, searchTerm, category, page=1):
def search(self, what, cat='all'):
category = self.supported_categories[cat]

parser = self.TorrentInfoParser(self.url)
parser.feed(self.request(what, category, 1))

totalPages = min(math.ceil(parser.totalResults / 20), 5)

for page in range(2, totalPages + 1):
for page in range(1, 5):
parser = self.TorrentInfoParser(self.url)
parser.feed(self.request(what, category, page))

parser.close()
parser.close()
if parser.totalResults < 15:
break
27 changes: 10 additions & 17 deletions nova3/engines/torlock.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
#VERSION: 2.23
#VERSION: 2.24
# AUTHORS: Douman ([email protected])
# CONTRIBUTORS: Diego de las Heras ([email protected])

from re import compile as re_compile
from html.parser import HTMLParser
from datetime import datetime, timedelta

Expand Down Expand Up @@ -35,6 +34,7 @@ def __init__(self, url):
self.item_bad = False # set to True for malicious links
self.current_item = None # dict for found item
self.item_name = None # key's name in current_item dict
self.page_items = 0
self.parser_class = {"td": "pub_date",
"ts": "size",
"tul": "seeds",
Expand Down Expand Up @@ -91,26 +91,19 @@ def handle_endtag(self, tag):
except Exception:
self.current_item["pub_date"] = -1
prettyPrinter(self.current_item)
self.page_items += 1
self.current_item = {}

def search(self, query, cat='all'):
""" Performs search """
query = query.replace("%20", "-")
category = self.supported_categories[cat]

parser = self.MyHtmlParser(self.url)
page = "".join((self.url, "/", self.supported_categories[cat],
"/torrents/", query, ".html?sort=seeds&page=1"))
html = retrieve_url(page)
parser.feed(html)

counter = 1
additional_pages = re_compile(r"/{0}/torrents/{1}.html\?sort=seeds&page=[0-9]+"
.format(self.supported_categories[cat], query))
list_searches = additional_pages.findall(html)[:-1] # last link is next(i.e. second)
for page in map(lambda link: "".join((self.url, link)), list_searches):
html = retrieve_url(page)
for page in range(1, 5):
parser = self.MyHtmlParser(self.url)
page_url = f"{self.url}/{category}/torrents/{query}.html?sort=seeds&page={page}"
html = retrieve_url(page_url)
parser.feed(html)
counter += 1
if counter > 3:
parser.close()
if parser.page_items < 20:
break
parser.close()
16 changes: 4 additions & 12 deletions nova3/engines/torrentproject.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#VERSION: 1.4
#VERSION: 1.5
#AUTHORS: mauricci

from helpers import retrieve_url
Expand Down Expand Up @@ -102,26 +102,18 @@ def handle_data(self, data):
elif curr_key != 'name':
self.singleResData[curr_key] += data.strip()

def feed(self, html):
HTMLParser.feed(self, html)
self.pageComplete = False
self.insideResults = False
self.insideDataDiv = False
self.spanCount = -1

def search(self, what, cat='all'):
# curr_cat = self.supported_categories[cat]
parser = self.MyHTMLParser(self.url)
what = what.replace('%20', '+')
# analyze first 5 pages of results
for currPage in range(0, 5):
url = self.url + '/browse?t={0}&p={1}'.format(what, currPage)
html = retrieve_url(url)
parser = self.MyHTMLParser(self.url)
parser.feed(html)
if len(parser.pageRes) <= 0:
parser.close()
if len(parser.pageRes) < 20:
break
del parser.pageRes[:]
parser.close()

def download_torrent(self, info):
""" Downloader """
Expand Down
8 changes: 4 additions & 4 deletions nova3/engines/versions.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
eztv: 1.16
jackett: 4.0
limetorrents: 4.8
limetorrents: 4.9
piratebay: 3.3
solidtorrents: 2.3
torlock: 2.23
torrentproject: 1.4
solidtorrents: 2.4
torlock: 2.24
torrentproject: 1.5
torrentscsv: 1.4