Ghost/ghost/link-replacer/lib/LinkReplacer.js
Simon Backx 4184b279d2
🐛 Fixed HTML escaping when using outbound link tagging (#16380)
fixes https://github.com/TryGhost/Team/issues/2666

- Somehow occurrences of `&map_` got replaced with `↦`
- Disables escaping &, ', " and other HTML characters when not needed
(escaping is already handled by mobiledoc/lexical)
- Bumps unit test coverage of link replacer to 100%
2023-03-08 16:30:54 +01:00

46 lines
1.4 KiB
JavaScript

class LinkReplacer {
/**
* Replaces the links in the provided HTML
* @param {string} html
* @param {(url: URL): Promise<URL|string>} replaceLink
* @returns {Promise<string>}
*/
async replace(html, replaceLink) {
const cheerio = require('cheerio');
try {
const $ = cheerio.load(html, {
xml: {
// This makes sure we use the faster and less destructive htmlparser2 parser
xmlMode: false
},
// Do not replace &, ', " and others with HTML entities (is bugged because it replaces &map_ with something weird (&#x21A6;))
decodeEntities: false
}, false);
for (const el of $('a').toArray()) {
const href = $(el).attr('href');
if (href) {
let url;
try {
url = new URL(href);
} catch (e) {
// Ignore invalid URLs
}
if (url) {
url = await replaceLink(url);
const str = url.toString();
$(el).attr('href', str);
}
}
}
return $.html();
} catch (e) {
// Catch errors from cheerio
return html;
}
}
}
module.exports = new LinkReplacer();