2022-09-19 18:12:54 +03:00
|
|
|
class LinkReplacer {
|
|
|
|
/**
|
|
|
|
* Replaces the links in the provided HTML
|
|
|
|
* @param {string} html
|
2023-08-08 14:22:56 +03:00
|
|
|
* @param {(url: URL, originalPath: string): Promise<URL|string|false>} replaceLink
|
|
|
|
* @param {object} options
|
|
|
|
* @param {string} [options.base] If you want to replace relative links, this will replace them to an absolute link and call the replaceLink method too
|
2022-09-19 18:12:54 +03:00
|
|
|
* @returns {Promise<string>}
|
|
|
|
*/
|
2023-08-08 14:22:56 +03:00
|
|
|
async replace(html, replaceLink, options = {}) {
|
2024-03-06 12:11:49 +03:00
|
|
|
const {tokenize} = require('html5parser');
|
2023-03-28 13:29:15 +03:00
|
|
|
const entities = require('entities');
|
2024-03-06 12:11:49 +03:00
|
|
|
|
2023-02-16 13:26:35 +03:00
|
|
|
try {
|
2024-03-06 12:11:49 +03:00
|
|
|
const tokens = tokenize(html); // IToken[]
|
|
|
|
const replacements = [];
|
|
|
|
|
|
|
|
let inAnchor = false;
|
|
|
|
let inHref = false;
|
|
|
|
|
|
|
|
// interface IToken {
|
|
|
|
// start: number;
|
|
|
|
// end: number;
|
|
|
|
// value: string;
|
|
|
|
// type: TokenKind;
|
|
|
|
// }
|
|
|
|
|
|
|
|
// const enum TokenKind {
|
|
|
|
// 0 Literal,
|
|
|
|
// 1 OpenTag, // trim leading '<'
|
|
|
|
// 2 OpenTagEnd, // trim tailing '>', only could be '/' or ''
|
|
|
|
// 3 CloseTag, // trim leading '</' and tailing '>'
|
|
|
|
// 4 Whitespace, // the whitespace between attributes
|
|
|
|
// 5 AttrValueEq,
|
|
|
|
// 6 AttrValueNq,
|
|
|
|
// 7 AttrValueSq,
|
|
|
|
// 8 AttrValueDq,
|
|
|
|
// }
|
|
|
|
|
|
|
|
for (const token of tokens) {
|
|
|
|
if (token.type === 1 && token.value === 'a') {
|
|
|
|
inAnchor = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (inAnchor) {
|
|
|
|
if (token.type === 2) {
|
|
|
|
inAnchor = false;
|
|
|
|
inHref = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (token.type === 6 && token.value === 'href') {
|
|
|
|
inHref = true;
|
2023-02-16 13:26:35 +03:00
|
|
|
}
|
2024-03-06 12:11:49 +03:00
|
|
|
|
|
|
|
if (inHref && token.type === 8) {
|
|
|
|
const path = entities.decode(token.value.substring(1, token.value.length - 1));
|
|
|
|
let url;
|
|
|
|
try {
|
|
|
|
url = new URL(path, options.base);
|
|
|
|
} catch (e) {
|
|
|
|
// Ignore invalid URLs
|
|
|
|
}
|
|
|
|
if (url) {
|
|
|
|
url = await replaceLink(url, path);
|
|
|
|
const str = url.toString();
|
|
|
|
replacements.push({url: str, start: token.start + 1, end: token.end - 1});
|
|
|
|
}
|
|
|
|
|
|
|
|
inHref = false;
|
2023-02-16 13:26:35 +03:00
|
|
|
}
|
2022-09-19 18:12:54 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-03-06 12:11:49 +03:00
|
|
|
let offsetAdjustment = 0;
|
|
|
|
|
|
|
|
replacements.forEach(({url, start, end}) => {
|
|
|
|
const originalLength = end - start;
|
|
|
|
const replacementLength = url.length;
|
|
|
|
|
|
|
|
html = html.slice(0, start + offsetAdjustment) + url + html.slice(end + offsetAdjustment);
|
|
|
|
|
|
|
|
offsetAdjustment += replacementLength - originalLength;
|
|
|
|
});
|
|
|
|
|
|
|
|
return html;
|
2023-02-16 13:26:35 +03:00
|
|
|
} catch (e) {
|
2024-03-06 12:11:49 +03:00
|
|
|
// do nothing in case of error,
|
|
|
|
// we don't want to break the content for the sake of member attribution
|
2023-02-16 13:26:35 +03:00
|
|
|
return html;
|
|
|
|
}
|
2022-09-19 18:12:54 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
module.exports = new LinkReplacer();
|