Skip to content

Commit

Permalink
去除掉廣告。
Browse files Browse the repository at this point in the history
  • Loading branch information
kanasimi committed Dec 21, 2019
1 parent dc758b6 commit ab43588
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 6 deletions.
11 changes: 10 additions & 1 deletion novel.cmn-Hans-CN/huaxiangju.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,18 @@ var crawler = CeL.jieqi_article({

// 去掉前後網站廣告。
remove_ads : function(text) {
return text.replace(
text = text.replace(
// 花香居提供女生言情小说在线阅读,言情小说免费阅读,言情小说TXT下载,言情小说阅读之家。https://www.huaxiangju.com/
/花香居.+?.com//g, '');

// e.g., https://www.huaxiangju.com/25087/6323179.html
text = text.replace(/^(?:热门|熱門|&#160;|&amp;|&nbsp;|<br[^<>]*>|[&:;])+/,
'');
if (!text.includes('<div'))
text = text.replace(/<\/div>/g, '').trim();

// console.log(text);
return text;
}
});

Expand Down
4 changes: 3 additions & 1 deletion novel.cmn-Hans-CN/luoxia.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,9 @@ crawler = new CeL.work_crawler({

// 解析 作品名稱 → 作品id get_work()
search_URL : '?s=',
parse_search_result : CeL.work_crawler.extract_work_id_from_search_result_link.bind(null,
parse_search_result :
//
CeL.work_crawler.extract_work_id_from_search_result_link.bind(null,
/<li class="cat-search-item">([\s\S]+?)<\/li>/g),

// 取得作品的章節資料。 get_work_data()
Expand Down
14 changes: 10 additions & 4 deletions novel.cmn-Hans-CN/piaotian.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ crawler = new CeL.work_crawler({
// recheck : 'changed',

site_name : '飘天文学',
base_URL : 'http://www.piaotian.com/',
// 2018: http://www.piaotian.com/
// 2019/11/23 前改為: https://www.ptwxz.com/
base_URL : 'https://www.ptwxz.com/',
charset : 'gbk',

// 解析 作品名稱 → 作品id get_work()
Expand All @@ -39,6 +41,7 @@ crawler = new CeL.work_crawler({
} ];
},
parse_search_result : function(html, get_label) {
// console.log(html);
if (html.includes('<span class="hottext">最新章节:</span>')) {
// 只有一個作品完全符合,引導到了作品資訊頁面。
var matched = html.match(/ href="[^<>"]+?\/\d{1,2}\/(\d{1,5})\/"/);
Expand Down Expand Up @@ -157,6 +160,7 @@ crawler = new CeL.work_crawler({

// 取得每一個章節的各個影像內容資料。 get_chapter_data()
parse_chapter_data : function(html, work_data, get_label, chapter_NO) {
// console.log(html);
// 在取得小說章節內容的時候,若發現有章節被目錄漏掉,則將之補上。
this.check_next_chapter(work_data, chapter_NO, html);

Expand All @@ -172,14 +176,16 @@ crawler = new CeL.work_crawler({
|| text.between('<div class="ad_content">', '<div class="bottomlink">')
//
.between('</div>', '</div>');
text = text.replace(/<script[^<>]*>[^<>]*<\/script>/g, '')
// 去除掉廣告。
.replace(PATTERN_AD, '');
// console.log(text);

this.add_ebook_chapter(work_data, chapter_NO, {
title : chapter_data.part_title,
sub_title : chapter_data.title
|| get_label(html.between('<H1>', '</H1>')),
text : text.replace(/<script[^<>]*>[^<>]*<\/script>/g, '')
// 去除掉廣告。
.replace(PATTERN_AD, '')
text : text
});
}
});
Expand Down
10 changes: 10 additions & 0 deletions novel.cmn-Hans-CN/x81zw.js
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,16 @@ crawler = CeL.PTCMS({
// 取得包含章節列表的文字範圍。
get_chapter_list_contents : function(html) {
return html.between('<div id="list">', '</div>');
},
pre_add_ebook_chapter : function(data) {
// console.log(data.text);
data.text = data.text.replace(/([\s\S]+?)<首发[\s\S]+?(?:<br>|$)/g, function(all, front) {
var index = front.lastIndexOf('<br>');
if (index >= 0)
front = front.slice(0, index);
return front;
});
// console.log(data.text);
}
});

Expand Down

0 comments on commit ab43588

Please sign in to comment.