From 360bb867b22a2bbafd2260ceff1a206ffb643cfe Mon Sep 17 00:00:00 2001
From: Alex Duchesne <ducalex007@gmail.com>
Date: Thu, 12 Sep 2024 12:45:14 -0400
Subject: [PATCH] Fixed search failure due to unexpected parser state

In many plugins the parser's state wasn't reset between pages.

This meant that if a page ended in a weird state (truncated or temporary error or unexpected html), all following pages would fail to find results.

torrentproject noticed the issue and overrode feed() to reset some of its state between pages.

But creating a new parser for each page is simpler. I have updated all plugins with this issue.
---
 nova3/engines/limetorrents.py   | 19 ++++++++-----------
 nova3/engines/solidtorrents.py  | 30 +++++++-----------------------
 nova3/engines/torlock.py        | 27 ++++++++++-----------------
 nova3/engines/torrentproject.py | 16 ++++------------
 nova3/engines/versions.txt      |  8 ++++----
 5 files changed, 33 insertions(+), 67 deletions(-)

diff --git a/nova3/engines/limetorrents.py b/nova3/engines/limetorrents.py
index 37d8c5a..248aeda 100644
--- a/nova3/engines/limetorrents.py
+++ b/nova3/engines/limetorrents.py
@@ -1,4 +1,4 @@
-#VERSION: 4.7
+#VERSION: 4.8
 # AUTHORS: Lima66
 # CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es)
 
@@ -38,7 +38,7 @@ def __init__(self, url):
             self.url = url
             self.current_item = {}  # dict for found item
             self.item_name = None  # key's name in current_item dict
-            self.page_empty = 22000
+            self.page_items = 0
             self.inside_tr = False
             self.findTable = False
             self.parser_class = {"tdnormal": "size",  # class
@@ -113,14 +113,11 @@ def search(self, query, cat='all'):
         query = query.replace("%20", "-")
         category = self.supported_categories[cat]
 
-        parser = self.MyHtmlParser(self.url)
-        page = 1
-        while True:
-            page_url = "{0}/search/{1}/{2}/seeds/{3}/".format(self.url, category, query, page)
+        for page in range(1, 5):
+            page_url = f"{self.url}/search/{category}/{query}/seeds/{page}/"
             html = retrieve_url(page_url)
-            lunghezza_html = len(html)
-            if page > 6 or lunghezza_html <= parser.page_empty:
-                return
+            parser = self.MyHtmlParser(self.url)
             parser.feed(html)
-            page += 1
-        parser.close()
+            parser.close()
+            if parser.page_items < 20:
+                break
diff --git a/nova3/engines/solidtorrents.py b/nova3/engines/solidtorrents.py
index 5dfccd6..3a46f6a 100644
--- a/nova3/engines/solidtorrents.py
+++ b/nova3/engines/solidtorrents.py
@@ -1,4 +1,4 @@
-# VERSION: 2.3
+# VERSION: 2.4
 # AUTHORS: nKlido
 
 # LICENSING INFORMATION
@@ -24,7 +24,6 @@
 from novaprinter import prettyPrinter
 from html.parser import HTMLParser
 from datetime import datetime
-import math
 
 
 class solidtorrents(object):
@@ -47,8 +46,6 @@ def __init__(self, url):
             self.parseDate = False
             self.column = 0
             self.torrentReady = False
-            self.foundSearchStats = False
-            self.parseTotalResults = False
             self.totalResults = 0
 
             self.torrent_info = self.empty_torrent_info()
@@ -68,13 +65,6 @@ def empty_torrent_info(self):
         def handle_starttag(self, tag, attrs):
             params = dict(attrs)
 
-            if 'search-stats' in params.get('class', ''):
-                self.foundSearchStats = True
-
-            if (self.foundSearchStats and tag == 'b'):
-                self.parseTotalResults = True
-                self.foundSearchStats = False
-
             if 'search-result' in params.get('class', ''):
                 self.foundResult = True
                 return
@@ -115,13 +105,10 @@ def handle_endtag(self, tag):
                 prettyPrinter(self.torrent_info)
                 self.torrentReady = False
                 self.torrent_info = self.empty_torrent_info()
+                self.totalResults += 1
 
         def handle_data(self, data):
 
-            if (self.parseTotalResults):
-                self.totalResults = int(data.strip())
-                self.parseTotalResults = False
-
             if (self.parseTitle):
                 if (bool(data.strip()) and data != '\n'):
                     self.torrent_info['name'] = data
@@ -161,12 +148,9 @@ def request(self, searchTerm, category, page=1):
     def search(self, what, cat='all'):
         category = self.supported_categories[cat]
 
-        parser = self.TorrentInfoParser(self.url)
-        parser.feed(self.request(what, category, 1))
-
-        totalPages = min(math.ceil(parser.totalResults / 20), 5)
-
-        for page in range(2, totalPages + 1):
+        for page in range(1, 5):
+            parser = self.TorrentInfoParser(self.url)
             parser.feed(self.request(what, category, page))
-
-        parser.close()
+            parser.close()
+            if parser.totalResults < 15:
+                break
diff --git a/nova3/engines/torlock.py b/nova3/engines/torlock.py
index 7b60263..6aa6a9d 100644
--- a/nova3/engines/torlock.py
+++ b/nova3/engines/torlock.py
@@ -1,8 +1,7 @@
-#VERSION: 2.23
+#VERSION: 2.24
 # AUTHORS: Douman (custparasite@gmx.se)
 # CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es)
 
-from re import compile as re_compile
 from html.parser import HTMLParser
 from datetime import datetime, timedelta
 
@@ -35,6 +34,7 @@ def __init__(self, url):
             self.item_bad = False  # set to True for malicious links
             self.current_item = None  # dict for found item
             self.item_name = None  # key's name in current_item dict
+            self.page_items = 0
             self.parser_class = {"td": "pub_date",
                                  "ts": "size",
                                  "tul": "seeds",
@@ -91,26 +91,19 @@ def handle_endtag(self, tag):
                     except Exception:
                         self.current_item["pub_date"] = -1
                     prettyPrinter(self.current_item)
+                    self.page_items += 1
                 self.current_item = {}
 
     def search(self, query, cat='all'):
         """ Performs search """
         query = query.replace("%20", "-")
+        category = self.supported_categories[cat]
 
-        parser = self.MyHtmlParser(self.url)
-        page = "".join((self.url, "/", self.supported_categories[cat],
-                        "/torrents/", query, ".html?sort=seeds&page=1"))
-        html = retrieve_url(page)
-        parser.feed(html)
-
-        counter = 1
-        additional_pages = re_compile(r"/{0}/torrents/{1}.html\?sort=seeds&page=[0-9]+"
-                                      .format(self.supported_categories[cat], query))
-        list_searches = additional_pages.findall(html)[:-1]  # last link is next(i.e. second)
-        for page in map(lambda link: "".join((self.url, link)), list_searches):
-            html = retrieve_url(page)
+        for page in range(1, 5):
+            parser = self.MyHtmlParser(self.url)
+            page_url = f"{self.url}/{category}/torrents/{query}.html?sort=seeds&page={page}"
+            html = retrieve_url(page_url)
             parser.feed(html)
-            counter += 1
-            if counter > 3:
+            parser.close()
+            if parser.page_items < 20:
                 break
-        parser.close()
diff --git a/nova3/engines/torrentproject.py b/nova3/engines/torrentproject.py
index e736871..2db3b8d 100644
--- a/nova3/engines/torrentproject.py
+++ b/nova3/engines/torrentproject.py
@@ -1,4 +1,4 @@
-#VERSION: 1.4
+#VERSION: 1.5
 #AUTHORS: mauricci
 
 from helpers import retrieve_url
@@ -102,26 +102,18 @@ def handle_data(self, data):
                             elif curr_key != 'name':
                                 self.singleResData[curr_key] += data.strip()
 
-        def feed(self, html):
-            HTMLParser.feed(self, html)
-            self.pageComplete = False
-            self.insideResults = False
-            self.insideDataDiv = False
-            self.spanCount = -1
-
     def search(self, what, cat='all'):
         # curr_cat = self.supported_categories[cat]
-        parser = self.MyHTMLParser(self.url)
         what = what.replace('%20', '+')
         # analyze first 5 pages of results
         for currPage in range(0, 5):
             url = self.url + '/browse?t={0}&p={1}'.format(what, currPage)
             html = retrieve_url(url)
+            parser = self.MyHTMLParser(self.url)
             parser.feed(html)
-            if len(parser.pageRes) <= 0:
+            parser.close()
+            if len(parser.pageRes) < 20:
                 break
-            del parser.pageRes[:]
-        parser.close()
 
     def download_torrent(self, info):
         """ Downloader """
diff --git a/nova3/engines/versions.txt b/nova3/engines/versions.txt
index 672def0..65fc148 100644
--- a/nova3/engines/versions.txt
+++ b/nova3/engines/versions.txt
@@ -1,8 +1,8 @@
 eztv: 1.16
 jackett: 4.0
-limetorrents: 4.7
+limetorrents: 4.8
 piratebay: 3.3
-solidtorrents: 2.3
-torlock: 2.23
-torrentproject: 1.4
+solidtorrents: 2.4
+torlock: 2.24
+torrentproject: 1.5
 torrentscsv: 1.4