Skip to content

Commit

Permalink
Add pub_date support to limetorrents
Browse files Browse the repository at this point in the history
I had to overhauled the parser a little because the old way of using td classes didn't work for our purpose.
  • Loading branch information
ducalex authored and xavier2k6 committed Oct 1, 2024
1 parent 3a88c6f commit 9363598
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 36 deletions.
85 changes: 50 additions & 35 deletions nova3/engines/limetorrents.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#VERSION: 4.7
#VERSION: 4.8
# AUTHORS: Lima66
# CONTRIBUTORS: Diego de las Heras ([email protected])

import re
from datetime import datetime, timedelta
from html.parser import HTMLParser
from urllib.parse import quote

Expand Down Expand Up @@ -37,38 +38,49 @@ def __init__(self, url):
HTMLParser.__init__(self)
self.url = url
self.current_item = {} # dict for found item
self.item_name = None # key's name in current_item dict
self.page_empty = 22000
self.inside_table = False
self.inside_tr = False
self.findTable = False
self.parser_class = {"tdnormal": "size", # class
"tdseed": "seeds",
"tdleech": "leech"}
self.column_index = -1
self.column_name = None # key's name in current_item dict
self.columns = ["name", "pub_date", "size", "seeds", "leech"]

now = datetime.now()
self.date_parsers = {
r"yesterday": lambda m: now - timedelta(days=1),
r"last\s+month": lambda m: now - timedelta(days=30),
r"(\d+)\s+years?": lambda m: now - timedelta(days=int(m[1]) * 365),
r"(\d+)\s+months?": lambda m: now - timedelta(days=int(m[1]) * 30),
r"(\d+)\s+days?": lambda m: now - timedelta(days=int(m[1])),
r"(\d+)\s+hours?": lambda m: now - timedelta(hours=int(m[1])),
r"(\d+)\s+minutes?": lambda m: now - timedelta(minutes=int(m[1])),
}

def handle_starttag(self, tag, attrs):

params = dict(attrs)

if params.get('class') == 'table2':
self.findTable = True
self.inside_table = True
elif not self.inside_table:
return

if tag == self.TR and self.findTable and (params.get('bgcolor') == '#F4F4F4' or params.get('bgcolor') == '#FFFFFF'): # noqa
if tag == self.TR and (params.get('bgcolor') == '#F4F4F4' or params.get('bgcolor') == '#FFFFFF'): # noqa
self.inside_tr = True
self.current_item = {}
if not self.inside_tr:
self.column_index = -1
self.current_item = {"engine_url": self.url}
elif not self.inside_tr:
return

if self.inside_tr and tag == self.TD:
if "class" in params:
self.item_name = self.parser_class.get(params["class"], None)
if self.item_name:
self.current_item[self.item_name] = -1
if tag == self.TD:
self.column_index += 1
if self.column_index < len(self.columns):
self.column_name = self.columns[self.column_index]
else:
self.column_name = None

if self.inside_tr and tag == self.A and self.HREF in params:
if self.column_name == "name" and tag == self.A and self.HREF in params:
link = params["href"]
if link.startswith("http://itorrents.org/torrent/"):
self.current_item["engine_url"] = self.url
self.item_name = "name"
elif link.endswith(".html"):
if link.endswith(".html"):
try:
safe_link = quote(self.url + link, safe='/:')
except KeyError:
Expand All @@ -77,26 +89,29 @@ def handle_starttag(self, tag, attrs):
self.current_item["desc_link"] = safe_link

def handle_data(self, data):
if self.inside_tr and self.item_name:
if self.item_name == 'size' and (data.endswith('MB') or data.endswith('GB')):
self.current_item[self.item_name] = data.strip().replace(',', '')
elif not self.item_name == 'size':
self.current_item[self.item_name] = data.strip().replace(',', '')

self.item_name = None
if self.column_name:
if self.column_name in ["size", "seeds", "leech"]:
data = data.replace(',', '')
elif self.column_name == "pub_date":
timestamp = -1
for pattern, calc in self.date_parsers.items():
m = re.match(pattern, data, re.IGNORECASE)
if m:
timestamp = int(calc(m).timestamp())
break
data = str(timestamp)
self.current_item[self.column_name] = data.strip()
self.column_name = None

def handle_endtag(self, tag):
if tag == 'table':
self.findTable = False
self.inside_table = False

if self.inside_tr and tag == self.TR:
self.inside_tr = False
self.item_name = None
array_length = len(self.current_item)
if array_length < 1:
return
prettyPrinter(self.current_item)
self.current_item = {}
self.column_name = None
if "link" in self.current_item:
prettyPrinter(self.current_item)

def download_torrent(self, info):
# since limetorrents provides torrent links in itorrent (cloudflare protected),
Expand Down
2 changes: 1 addition & 1 deletion nova3/engines/versions.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
eztv: 1.16
jackett: 4.0
limetorrents: 4.7
limetorrents: 4.8
piratebay: 3.3
solidtorrents: 2.3
torlock: 2.23
Expand Down

0 comments on commit 9363598

Please sign in to comment.