Skip to content

Commit

Permalink
feat(route): add koreaherald route (#18008)
Browse files Browse the repository at this point in the history
* feat(route): add koreaherald route

* codefactor fix
  • Loading branch information
quiniapiezoelectricity authored Dec 31, 2024
1 parent 42c88d1 commit 336464d
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 0 deletions.
70 changes: 70 additions & 0 deletions lib/routes/koreaherald/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import { Route } from '@/types';
import cache from '@/utils/cache';
import got from '@/utils/got';
import { load } from 'cheerio';
import { parseDate } from '@/utils/parse-date';
import timezone from '@/utils/timezone';

export const route: Route = {
path: '/:category{.+}?',
categories: ['traditional-media'],
example: '/koreaherald/National',
parameters: {
category: 'Category from the path of the URL of the corresponding site, `National` by default',
},
features: {
requirePuppeteer: false,
antiCrawler: false,
supportBT: false,
supportPodcast: false,
supportScihub: false,
requireConfig: false,
},
name: 'News',
maintainers: ['quiniapiezoelectricity'],
handler,
description: `
:::tip
For example, the category for the page https://www.koreaherald.com/Business and https://www.koreaherald.com/Business/Market would be \`/Business\` and \`/Business/Market\` respectively.
:::
`,
radar: [
{
source: ['www.koreaherald.com/:category'],
target: '/:category',
},
],
};

async function handler(ctx) {
const category = ctx.req.param('category') ?? 'National';
const baseUrl = 'https://www.koreaherald.com/';

const response = await got(new URL(category, baseUrl).href);
const $ = load(response.data);
const title = $('ul.gnb').find('[class="on"]').length > 0 ? $('ul.gnb').find('[class="on"]').text() : $('div.nav_area > a.category').text();
const list = $('article.recent_news > ul.news_list > li')
.toArray()
.map((item) => new URL($(item).find('a').attr('href'), baseUrl).href);
const items = await Promise.all(
list.map((url) =>
cache.tryGet(url, async () => {
const response = await got(url);
const $ = load(response.data);
const metadata = JSON.parse($('[type="application/ld+json"]').text());
return {
title: metadata.headline,
link: url,
pubDate: timezone(parseDate(metadata.datePublished), +9),
author: metadata.author.name,
description: $('article.article-body').html(),
};
})
)
);
return {
title: `The Korea Herald - ${title}`,
link: new URL(category, baseUrl).href,
item: items,
};
}
6 changes: 6 additions & 0 deletions lib/routes/koreaherald/namespace.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import type { Namespace } from '@/types';

export const namespace: Namespace = {
name: 'The Korea Herald',
url: 'koreaherald.com',
};

0 comments on commit 336464d

Please sign in to comment.