Added naive domain matching when inlining media

refs https://github.com/TryGhost/Toolbox/issues/524

- Mobiledoc from a migrated content could contain html/markdown cards that could have a variety of different resource <> url pairs. To avoid complex logic and parsing html/md going with a simplest approach - matching external content URLs purely based on provided domains. This gives useful enough of a tool to migrate external content for a specific service (e.g Revue)
- In cases where the content is not supported the fetching will fail with a message and move on to the next match - which is a reasonable behavior for a migration tool
This commit is contained in:
Naz 2023-03-08 15:22:51 +08:00
parent 93ea9a2976
commit 6fa00faaa2
No known key found for this signature in database
2 changed files with 54 additions and 1 deletions

View File

@ -108,7 +108,10 @@ class ExternalMediaInliner {
async #inlineMibiledoc(mobiledoc, domains) {
for (const domain of domains) {
const regex = new RegExp(`"src":"(${domain}.*?)"`, 'igm');
// NOTE: the src could end with a quote, apostrophe or double-backslash. backlashes are added to mobiledoc
// as an escape character
const srcTerminationSymbols = `"|'|\\\\`;
const regex = new RegExp(`(${domain}.*?)(${srcTerminationSymbols})`, 'igm');
const matches = mobiledoc.matchAll(regex);
for (const [,src] of matches) {

View File

@ -110,6 +110,56 @@ describe('ExternalMediaInliner', function () {
}));
});
it('inlines the image from post\'s mobiledoc containing html card', async function () {
const imageURL = 'https://bucketeer-e05bbc84-baa3-437e-9518-adb32be77984.s3.amazonaws.com/public/images/39719fcb-5af0-4764-bf8b-d375f37a09e5_1141x860';
const requestMock = nock('https://bucketeer-e05bbc84-baa3-437e-9518-adb32be77984.s3.amazonaws.com')
.get('/public/images/39719fcb-5af0-4764-bf8b-d375f37a09e5_1141x860')
.reply(200, GIF1x1);
const postModelInstanceStub = {
id: 'inlined-post-with-htmlcard-id',
get: sinon.stub()
.withArgs('mobiledoc')
.returns(`{"version":"0.3.1","atoms":[],"cards":[["html",{"html":"<img src="${imageURL}" alt="Lorem ipsum">"}]],"markups":[],"sections":[[10,0],[1,"p",[]]],"ghostVersion":"4.0"}`)
};
postModelStub = {
findPage: sinon.stub().returns({
data: [postModelInstanceStub]
}),
edit: sinon.stub().resolves()
};
sinon.stub(path, 'relative')
.withArgs('/content/images', '/content/images/unique-image.jpg')
.returns('unique-image.jpg');
const inliner = new ExternalMediaInliner({
PostModel: postModelStub,
PostMetaModel: postMetaModelStub,
TagModel: tagModelStub,
UserModel: userModelStub,
getMediaStorage: sinon.stub().withArgs('.jpg').returns({
getTargetDir: () => '/content/images',
getUniqueFileName: () => '/content/images/unique-image.jpg',
saveRaw: () => '/content/images/unique-image.jpg'
})
});
await inliner.inline(['https://bucketeer-e05bbc84-baa3-437e-9518-adb32be77984.s3.amazonaws.com']);
assert.ok(requestMock.isDone());
assert.ok(postModelStub.edit.calledOnce);
assert.deepEqual(postModelStub.edit.args[0][0], {
mobiledoc: `{"version":"0.3.1","atoms":[],"cards":[["html",{"html":"<img src="__GHOST_URL__/content/images/unique-image.jpg" alt="Lorem ipsum">"}]],"markups":[],"sections":[[10,0],[1,"p",[]]],"ghostVersion":"4.0"}`
});
assert.deepEqual(postModelStub.edit.args[0][1], {
id: 'inlined-post-with-htmlcard-id',
context: {
internal: true
}
});
});
it('logs an error when fetching an external media fails', async function () {
const imageURL = 'https://img.stockfresh.com/files/f/image.jpg';
const requestMock = nock('https://img.stockfresh.com')