diff --git a/README.md b/README.md index f850823..66ff11e 100644 --- a/README.md +++ b/README.md @@ -21,14 +21,29 @@ $ npm i hexo-filter-nofollow --save nofollow: enable: true field: site + elements: + - 'a' + - 'img' exclude: - - 'exclude1.com' - - 'exclude2.com' + - '*.exclude1.com' + - 'exclude2.com/path/*' + rel: + - 'external' + - 'noreferrer' + - 'nofollow' + - 'noopener' + referrerpolicy: 'no-referrer' ``` - **enable** - Enable the plugin. Default value is `true`. - **field** - The scope you want the plugin to proceed, can be 'site' or 'post'. Default value is `site`. - 'post' - Only add nofollow attribute to external links in your post content - 'site' - Add nofollow attribute to external links of whole sites -- **exclude** - Exclude hostname. Specify subdomain when applicable, including `www`. - - 'exclude1.com' does not apply to `www.exclude1.com` nor `en.exclude1.com`. +- **elements** - The tag to be processed, currently only supports `` and ``. +- **include** - Include hostname. You can use `*` or `?` glob wildcards. If include is configured, other external links will not be processed. +- **exclude** - Exclude hostname. You can use `*` or `?` glob wildcards. + - `exclude1.com` does not apply to `www.exclude1.com` nor `en.exclude1.com`. + - `*.exclude1.com` can be apply to `www.exclude1.com` or `en.exclude1.com`. +- **minimatch** - The glob wildcard is supported by [minimath](https://github.com/isaacs/minimatch), this field can be configured for it. +- **rel** - Configurable rel attribute value. +- **referrerpolicy** - Configurable referrerpolicy attribute value. diff --git a/index.js b/index.js index 9f05fd5..ea34bba 100755 --- a/index.js +++ b/index.js @@ -5,7 +5,11 @@ hexo.config.nofollow = Object.assign({ enable: true, field: 'site', - exclude: [] + elements: ['a'], + include: [], + exclude: [], + rel: ['noopener', 'external', 'nofollow', 'noreferrer'], + referrerpolicy: 'no-referrer' }, hexo.config.nofollow); const config = hexo.config.nofollow; diff --git a/lib/filter.js b/lib/filter.js index ba020b9..846c501 100755 --- a/lib/filter.js +++ b/lib/filter.js @@ -1,56 +1,116 @@ 'use strict'; const { parse } = require('url'); +const { Minimatch } = require('minimatch'); +/** + * Check whether the url is an external link + * @param {string} url + * @param {object} config + * @returns boolean + */ function isExternal(url, config) { - const exclude = config.nofollow.exclude; + const { includeGlobs, excludeGlobs } = config.nofollow; const data = parse(url); - const host = data.hostname; + const { hostname, path } = data; const sitehost = parse(config.url).hostname || config.url; - if (!data.protocol || !sitehost) return false; + if (!data.protocol || !hostname || !sitehost) return false; - if (exclude && exclude.length) { - for (const i of exclude) { - if (host === i) return false; + const target = hostname + path; + + if (excludeGlobs && excludeGlobs.length) { + for (const glob of excludeGlobs) { + if (glob.match(target)) return false; } } - if (host !== sitehost) return true; + if (includeGlobs && includeGlobs.length) { + for (const glob of includeGlobs) { + if (glob.match(target)) return true; + } + } - return false; + if (includeGlobs && includeGlobs.length) { + // If include is configured, other links will not be treated as external links + return false; + } + return hostname != sitehost; } -module.exports = function(data) { - const hexo = this; - const config = hexo.config; +/** + * Add attribute to the tag + * @param {string} source tag string + * @param {string} attribute string containing the url + * @param {string} new attribute key + * @param {string | array} new attribute value + * @returns new tag string + */ +function addAttr(tagStr, urlAttrStr, attrKey, attrValue) { + const value = [...toArray(attrValue)]; + const regexKey = new RegExp(`${attrKey}=`, 'gi'); + const attrRegex = new RegExp(`\\s${attrKey}="(.*?)"`, 'gi'); + if (regexKey.test(tagStr)) { + tagStr = tagStr.replace(attrRegex, (attrStr, attrStrValue) => { + value.push(...attrStrValue.split(' ')); + return ''; + }); + } + // De-duplicate + const uniqValue = [...new Set(value)]; + return tagStr.replace(urlAttrStr, `${urlAttrStr} ${attrKey}="${uniqValue.join(' ')}"`); +} - const exclude = config.nofollow.exclude; - if (exclude && !Array.isArray(exclude)) { - config.nofollow.exclude = [exclude]; +function toArray(data) { + return data && !Array.isArray(data) ? [data] : data; +} + +function addSitePath(sitePattern) { + if (!sitePattern || sitePattern.indexOf('/') >= 0) { + return sitePattern; } + // default wildcard under the site + return sitePattern + '/**'; +} - const filterExternal = data => { - return data.replace(//gi, (str, hrefStr, href) => { - if (!isExternal(href, config)) return str; +module.exports = function nofollow(data) { + const hexo = this; + const config = hexo.config; - let noFollow = ['noopener', 'external', 'nofollow', 'noreferrer']; + const { elements, include, exclude, minimatch } = config.nofollow; + config.nofollow.elements = toArray(elements); + config.nofollow.include = toArray(include); + config.nofollow.exclude = toArray(exclude); - if (/rel=/gi.test(str)) { - str = str.replace(/\srel="(.*?)"/gi, (relStr, rel) => { - rel = rel.split(' '); - noFollow.push(...rel); - // De-duplicate - noFollow = [...new Set(noFollow)]; + config.nofollow.includeGlobs = config.nofollow.include.map(pattern => new Minimatch(addSitePath(pattern), minimatch)); + config.nofollow.excludeGlobs = config.nofollow.exclude.map(pattern => new Minimatch(addSitePath(pattern), minimatch)); - return ''; - }); - } + const filterATagHrefExternal = data => { + return data.replace(//gi, (aTagRaw, hrefAttrRaw, href) => { + if (!isExternal(href, config)) return aTagRaw; + aTagRaw = addAttr(aTagRaw, hrefAttrRaw, 'referrerpolicy', config.nofollow.referrerpolicy); + return addAttr(aTagRaw, hrefAttrRaw, 'rel', config.nofollow.rel); + }); + }; - return str.replace(hrefStr, `${hrefStr} rel="${noFollow.join(' ')}"`); + const filterImgTagSrcExternal = data => { + return data.replace(//gi, (imgTagRaw, srcAttrRaw, src) => { + if (!isExternal(src, config)) return imgTagRaw; + imgTagRaw = addAttr(imgTagRaw, srcAttrRaw, 'referrerpolicy', config.nofollow.referrerpolicy); + return addAttr(imgTagRaw, srcAttrRaw, 'rel', config.nofollow.rel); }); }; + const filterExternal = data => { + if (config.nofollow.elements.includes('a')) { + data = filterATagHrefExternal(data); + } + if (config.nofollow.elements.includes('img')) { + data = filterImgTagSrcExternal(data); + } + return data; + }; + if (config.nofollow.field === 'post') { data.content = filterExternal(data.content); } else { @@ -59,3 +119,5 @@ module.exports = function(data) { return data; }; + + diff --git a/package.json b/package.json index 01213a4..4a47e24 100644 --- a/package.json +++ b/package.json @@ -37,5 +37,8 @@ "eslint-config-hexo": "^4.1.0", "hexo": "hexojs/hexo", "mocha": "^8.0.1" + }, + "dependencies": { + "minimatch": "^3.0.4" } } diff --git a/test/index.js b/test/index.js index 161b609..3fb43fe 100755 --- a/test/index.js +++ b/test/index.js @@ -9,49 +9,71 @@ describe('hexo-filter-nofollow', () => { const nofollowFilter = require('../lib/filter').bind(hexo); hexo.config.url = 'https://example.com'; - hexo.config.nofollow = {}; + hexo.config.nofollow = { include: [], exclude: [], elements: ['a', 'img'], rel: ['noopener', 'external', 'nofollow', 'noreferrer'], referrerpolicy: 'no-referrer' }; describe('Default', () => { const content = [ '# External link test', '1. External link', 'Hexo', + 'Hexo', '2. External link with existed "rel" Attribute', - 'Hexo', - 'Hexo', + 'Hexo', + 'Hexo', + 'Hexo', + 'Hexo', '3. External link with existing "rel=noopener", "rel=external" or "rel=noreferrer"', 'Hexo', 'Hexo', 'Hexo', 'Hexo', + 'Hexo', + 'Hexo', + 'Hexo', + 'Hexo', '4. External link with Other Attributes', 'Hexo', 'Hexo', + 'Hexo', + 'Hexo', '5. Internal link', 'Link', + 'Link', '6. Ignore links don\'t have "href" attribute', - 'Anchor' + 'Anchor', + 'Anchor' ].join('\n'); const expected = [ '# External link test', '1. External link', - 'Hexo', + 'Hexo', + 'Hexo', '2. External link with existed "rel" Attribute', - 'Hexo', - 'Hexo', + 'Hexo', + 'Hexo', + 'Hexo', + 'Hexo', '3. External link with existing "rel=noopener", "rel=external" or "rel=noreferrer"', - 'Hexo', - 'Hexo', - 'Hexo', - 'Hexo', + 'Hexo', + 'Hexo', + 'Hexo', + 'Hexo', + 'Hexo', + 'Hexo', + 'Hexo', + 'Hexo', '4. External link with Other Attributes', - 'Hexo', - 'Hexo', + 'Hexo', + 'Hexo', + 'Hexo', + 'Hexo', '5. Internal link', 'Link', + 'Link', '6. Ignore links don\'t have "href" attribute', - 'Anchor' + 'Anchor', + 'Anchor' ].join('\n'); it('Default to field = "site"', () => { @@ -72,6 +94,64 @@ describe('hexo-filter-nofollow', () => { }); }); + describe('Include & Pattern', () => { + const content = [ + '# Include & Pattern link test', + '1. External link', + 'Hexo', + '2. External links whose hostname is not match glob pattern', + 'Example Domain', + '3. External links whose hostname is included in glob pattern', + 'Example Domain', + 'Example Domain', + '4. External links whose hostname is included in glob pattern with path', + 'Example Domain', + 'Example Domain' + ].join('\n'); + + it('String', () => { + hexo.config.nofollow.include = ['hexo.io', '*.example.org']; + + const result = nofollowFilter(content); + + result.should.eql([ + '# Include & Pattern link test', + '1. External link', + 'Hexo', + '2. External links whose hostname is not match glob pattern', + 'Example Domain', + '3. External links whose hostname is included in glob pattern', + 'Example Domain', + 'Example Domain', + '4. External links whose hostname is included in glob pattern with path', + 'Example Domain', + 'Example Domain' + ].join('\n')); + }); + + it('Array', () => { + hexo.config.nofollow.include = 'hexo.io'; + hexo.config.nofollow.exclude = ['example.org', '*.example.org', 'path.example.org/**']; + + const result = nofollowFilter(content); + + result.should.eql([ + '# Include & Pattern link test', + '1. External link', + 'Hexo', + '2. External links whose hostname is not match glob pattern', + 'Example Domain', + '3. External links whose hostname is included in glob pattern', + 'Example Domain', + 'Example Domain', + '4. External links whose hostname is included in glob pattern with path', + 'Example Domain', + 'Example Domain' + ].join('\n')); + }); + }); + + describe('Exclude', () => { const content = [ '# Exclude link test', @@ -85,6 +165,7 @@ describe('hexo-filter-nofollow', () => { ].join('\n'); it('String', () => { + hexo.config.nofollow.include = []; hexo.config.nofollow.exclude = 'example.org'; const result = nofollowFilter(content); @@ -92,12 +173,12 @@ describe('hexo-filter-nofollow', () => { result.should.eql([ '# Exclude link test', '1. External link', - 'Hexo', + 'Hexo', '2. Ignore links whose hostname is same as config', 'Example Domain', '3. Ignore links whose hostname is included in exclude', 'Example Domain', - 'Example Domain' + 'Example Domain' ].join('\n')); }); @@ -109,7 +190,7 @@ describe('hexo-filter-nofollow', () => { result.should.eql([ '# Exclude link test', '1. External link', - 'Hexo', + 'Hexo', '2. Ignore links whose hostname is same as config', 'Example Domain', '3. Ignore links whose hostname is included in exclude',