From 653dbabd675fd960470b397446528f69799b1b56 Mon Sep 17 00:00:00 2001 From: dteviot Date: Sat, 23 Mar 2024 10:20:03 +1300 Subject: [PATCH] Add site https://www.wuxiabox.com/ See: https://github.com/dteviot/WebToEpub/issues/1263 Looks like readwn changed its name yet again. So, try using rule that examines site layout. Hopefully not have to update quite so often. Note, also cleaned up use of registerRule --- plugin/js/parsers/AdultfanfictionParser.js | 6 ++---- plugin/js/parsers/BlogspotParser.js | 2 +- plugin/js/parsers/MadaraParser.js | 7 ++----- plugin/js/parsers/ReadwnParser.js | 17 +++++++++++++++-- plugin/js/parsers/WordpressBaseParser.js | 2 +- 5 files changed, 21 insertions(+), 13 deletions(-) diff --git a/plugin/js/parsers/AdultfanfictionParser.js b/plugin/js/parsers/AdultfanfictionParser.js index fa4e1e60..0bb36e22 100644 --- a/plugin/js/parsers/AdultfanfictionParser.js +++ b/plugin/js/parsers/AdultfanfictionParser.js @@ -1,10 +1,8 @@ "use strict"; parserFactory.registerRule( - function(url) { - return AdultfanfictionParser.isAdultFanFiction(url) * 0.9; - }, - function() { return new AdultfanfictionParser() } + (url) => AdultfanfictionParser.isAdultFanFiction(url) * 0.9, + () => new AdultfanfictionParser() ); class AdultfanfictionParser extends Parser{ diff --git a/plugin/js/parsers/BlogspotParser.js b/plugin/js/parsers/BlogspotParser.js index 71dc3ccf..6c5ad127 100644 --- a/plugin/js/parsers/BlogspotParser.js +++ b/plugin/js/parsers/BlogspotParser.js @@ -16,7 +16,7 @@ parserFactory.registerRule( return (util.extractHostName(url).indexOf(".blogspot.") != -1) || ((BlogspotParser.findContentElement(dom) != null) * 0.5); }, - function() { return new BlogspotParser() } + () => new BlogspotParser() ); parserFactory.registerManualSelect( diff --git a/plugin/js/parsers/MadaraParser.js b/plugin/js/parsers/MadaraParser.js index f5eb8967..4b4d3ad3 100644 --- a/plugin/js/parsers/MadaraParser.js +++ b/plugin/js/parsers/MadaraParser.js @@ -13,11 +13,8 @@ parserFactory.register("mangabob.com", function() { return new MadaraParser() }) parserFactory.registerRule( - // return probability (0.0 to 1.0) web page is a Madara Themed page - function(url, dom) { - return MadaraParser.isMadaraTheme(dom) * 0.6; - }, - function() { return new MadaraParser() } + (url, dom) => MadaraParser.isMadaraTheme(dom) * 0.6, + () => new MadaraParser() ); class MadaraParser extends WordpressBaseParser{ diff --git a/plugin/js/parsers/ReadwnParser.js b/plugin/js/parsers/ReadwnParser.js index 125d8886..adcbb752 100644 --- a/plugin/js/parsers/ReadwnParser.js +++ b/plugin/js/parsers/ReadwnParser.js @@ -22,11 +22,21 @@ parserFactory.register("wuxiar.com", () => new ReadwnParser()); parserFactory.register("wuxiau.com", () => new ReadwnParser()); parserFactory.register("wuxiazone.com", () => new ReadwnParser()); +parserFactory.registerRule( + (url, dom) => ReadwnParser.isReadwn(dom) * 0.8, + () => new ReadwnParser() +); + class ReadwnParser extends Parser{ constructor() { super(); } + static isReadwn(dom) { + return (dom.querySelector(ReadwnParser.CoverSelector) !== null) + && (dom.querySelector(ReadwnParser.AuthorSelector) !== null) + } + async getChapterUrls(dom, chapterUrlsUI) { return this.getChapterUrlsFromMultipleTocPages(dom, ReadwnParser.extractPartialChapterList, @@ -77,7 +87,7 @@ class ReadwnParser extends Parser{ } extractAuthor(dom) { - let authorLabel = dom.querySelector("span[itemprop='author']"); + let authorLabel = dom.querySelector(ReadwnParser.AuthorSelector); return authorLabel?.textContent ?? super.extractAuthor(dom); } @@ -91,10 +101,13 @@ class ReadwnParser extends Parser{ } findCoverImageUrl(dom) { - return util.getFirstImgSrc(dom, "figure.cover"); + return util.getFirstImgSrc(dom, ReadwnParser.CoverSelector); } getInformationEpubItemChildNodes(dom) { return [...dom.querySelectorAll(".summary .content")]; } } + +ReadwnParser.CoverSelector = "figure.cover"; +ReadwnParser.AuthorSelector = "span[itemprop='author']"; diff --git a/plugin/js/parsers/WordpressBaseParser.js b/plugin/js/parsers/WordpressBaseParser.js index 0479302d..972766c0 100644 --- a/plugin/js/parsers/WordpressBaseParser.js +++ b/plugin/js/parsers/WordpressBaseParser.js @@ -21,7 +21,7 @@ parserFactory.registerRule( return ((WordpressBaseParser.findContentElement(dom) != null) && (WordpressBaseParser.findChapterTitleElement(dom) != null)) * 0.5; }, - function() { return new WordpressBaseParser() } + () => new WordpressBaseParser() ); parserFactory.registerManualSelect(