From 6d4dcdf45d94ec5a4504c607f25017bd2c256ead Mon Sep 17 00:00:00 2001 From: Naz Date: Mon, 6 Mar 2023 22:50:24 +0800 Subject: [PATCH] Extended external media inlining refs https://github.com/TryGhost/Toolbox/issues/524 - We need to be able to inline external media in all internal resources: tags, users, post's meta fields. - This change adds media inlining logic to all these resources --- .../server/services/media-inliner/service.js | 3 + .../lib/ExternalMediaInliner.js | 176 ++++++++- .../test/ExternalMediaInliner.test.js | 338 ++++++++++++++++-- 3 files changed, 478 insertions(+), 39 deletions(-) diff --git a/ghost/core/core/server/services/media-inliner/service.js b/ghost/core/core/server/services/media-inliner/service.js index 5ed7bd1a1c..7095f47652 100644 --- a/ghost/core/core/server/services/media-inliner/service.js +++ b/ghost/core/core/server/services/media-inliner/service.js @@ -13,6 +13,9 @@ module.exports = { const mediaInliner = new MediaInliner({ PostModel: models.Post, + TagModel: models.Tag, + UserModel: models.User, + PostMetaModel: models.PostsMeta, getMediaStorage: (extension) => { if (config.get('uploads').images.extensions.includes(extension)) { return imageStorage; diff --git a/ghost/external-media-inliner/lib/ExternalMediaInliner.js b/ghost/external-media-inliner/lib/ExternalMediaInliner.js index 23c7252659..5584fd0ea4 100644 --- a/ghost/external-media-inliner/lib/ExternalMediaInliner.js +++ b/ghost/external-media-inliner/lib/ExternalMediaInliner.js @@ -7,14 +7,29 @@ class ExternalMediaInliner { /** @type {object} */ #PostModel; + /** @type {object} */ + #PostMetaModel; + + /** @type {object} */ + #TagModel; + + /** @type {object} */ + #UserModel; + /** * * @param {Object} deps * @param {Object} deps.PostModel - Post model + * @param {Object} deps.PostMetaModel - PostMeta model + * @param {Object} deps.TagModel - Tag model + * @param {Object} deps.UserModel - User model * @param {(extension) => import('ghost-storage-base')} deps.getMediaStorage - getMediaStorage */ constructor(deps) { this.#PostModel = deps.PostModel; + this.#PostMetaModel = deps.PostMetaModel; + this.#TagModel = deps.TagModel; + this.#UserModel = deps.UserModel; this.getMediaStorage = deps.getMediaStorage; } @@ -64,7 +79,28 @@ class ExternalMediaInliner { }; } - async #inlinePost(mobiledoc, domains) { + /** + * + * @param {Object} media - media to store locally + * @returns {Promise} - path to stored media + */ + async #storeMediaLocally(media) { + const storage = this.getMediaStorage(media.extension); + + if (!storage) { + logging.warn(`No storage adapter found for file extension: ${media.extension}`); + return null; + } else { + const targetDir = storage.getTargetDir(storage.storagePath); + const uniqueFileName = await storage.getUniqueFileName({ + name: media.filename + }, targetDir); + const filePath = await storage.saveRaw(media.fileBuffer, uniqueFileName); + return filePath; + } + } + + async #inlineMibiledoc(mobiledoc, domains) { for (const domain of domains) { const regex = new RegExp(`"src":"(${domain}.*?)"`, 'igm'); const matches = mobiledoc.matchAll(regex); @@ -78,22 +114,15 @@ class ExternalMediaInliner { } if (media) { - const storage = this.getMediaStorage(media.extension); + const filePath = await this.#storeMediaLocally(media); - if (!storage) { - logging.warn(`No storage adapter found for file extension: ${media.extension}`); - } else { - const targetDir = storage.getTargetDir(storage.storagePath); - const uniqueFileName = await storage.getUniqueFileName({ - name: media.filename - }, targetDir); - const filePath = await storage.saveRaw(media.fileBuffer, uniqueFileName); + if (filePath) { const inlinedSrc = `__GHOST_URL__${filePath}`; // NOTE: does not account for duplicate images in mobiledoc // in those cases would be processed twice mobiledoc = mobiledoc.replace(src, inlinedSrc); - logging.info('Inlined media: ', src, ' -> ', inlinedSrc); + logging.info(`Inlined media: ${src} -> ${inlinedSrc}`); } } } @@ -102,6 +131,76 @@ class ExternalMediaInliner { return mobiledoc; } + /** + * + * @param {Object} resourceModel - one of PostModel, TagModel, UserModel instances + * @param {String[]} fields - fields to inline + * @param {String[]} domains - domains to inline media from + * @returns Promise - updated fields map with local media paths + */ + async #inlineFields(resourceModel, fields, domains) { + const updatedFields = {}; + + for (const field of fields) { + for (const domain of domains) { + const src = resourceModel.get(field); + + if (src && src.startsWith(domain)) { + const response = await this.#getRemoteMedia(src); + + let media; + if (response) { + media = this.#extractFileDataFromResponse(src, response); + } + + if (media) { + const filePath = await this.#storeMediaLocally(media); + + if (filePath) { + const inlinedSrc = `__GHOST_URL__${filePath}`; + + updatedFields[field] = inlinedSrc; + logging.info(`Added media to inline: ${src} -> ${inlinedSrc}`); + } + } + } + } + } + + return updatedFields; + } + + /** + * + * @param {Object[]} resources - array of model instances + * @param {Object} model - resource model + * @param {string[]} fields - fields to inline + * @param {string[]} domains - domains to inline media from + */ + async #inlineSimpleFields(resources, model, fields, domains) { + logging.info(`Starting inlining external media for ${resources?.length} ${model.tableName}`); + + for (const resource of resources) { + try { + const updatedFields = await this.#inlineFields(resource, fields, domains); + + if (Object.keys(updatedFields).length > 0) { + await model.edit(updatedFields, { + id: resource.id, + context: { + internal: true + } + }); + } + } catch (err) { + logging.error(`Error inlining media for ${model.tableName}: ${resource.id}`); + logging.error(new errors.DataImportError({ + err + })); + } + } + } + /** * * @param {string[]} domains domains to inline media from @@ -111,17 +210,27 @@ class ExternalMediaInliner { limit: 'all', status: 'all' }); + const postsInilingFields = [ + 'feature_image' + ]; + + logging.info(`Starting inlining external media for posts: ${posts?.length}`); - logging.info('Starting inlining external media for posts: ', posts?.length); for (const post of posts) { try { - const inlinedMobiledoc = await this.#inlinePost(post.get('mobiledoc'), domains); + const inlinedMobiledoc = await this.#inlineMibiledoc(post.get('mobiledoc'), domains); + const updatedFields = await this.#inlineFields(post, postsInilingFields, domains); if (inlinedMobiledoc !== post.get('mobiledoc')) { - await this.#PostModel.edit({ - mobiledoc: inlinedMobiledoc - }, { - id: post.id + updatedFields.mobiledoc = inlinedMobiledoc; + } + + if (Object.keys(updatedFields).length > 0) { + await this.#PostModel.edit(updatedFields, { + id: post.id, + context: { + internal: true + } }); } } catch (err) { @@ -132,7 +241,38 @@ class ExternalMediaInliner { } } - logging.info('Finished inlining external media'); + const {data: postsMetas} = await this.#PostMetaModel.findPage({ + limit: 'all' + }); + const postsMetaInilingFields = [ + 'og_image', + 'twitter_image' + ]; + + await this.#inlineSimpleFields(postsMetas, this.#PostMetaModel, postsMetaInilingFields, domains); + + const {data: tags} = await this.#TagModel.findPage({ + limit: 'all' + }); + const tagInliningFields = [ + 'feature_image', + 'og_image', + 'twitter_image' + ]; + + await this.#inlineSimpleFields(tags, this.#TagModel, tagInliningFields, domains); + + const {data: users} = await this.#UserModel.findPage({ + limit: 'all' + }); + const userInliningFields = [ + 'profile_image', + 'cover_image' + ]; + + await this.#inlineSimpleFields(users, this.#UserModel, userInliningFields, domains); + + logging.info('Finished inlining external media for posts, tags, and users'); } } diff --git a/ghost/external-media-inliner/test/ExternalMediaInliner.test.js b/ghost/external-media-inliner/test/ExternalMediaInliner.test.js index ca18eba9cb..b7e1c916b7 100644 --- a/ghost/external-media-inliner/test/ExternalMediaInliner.test.js +++ b/ghost/external-media-inliner/test/ExternalMediaInliner.test.js @@ -7,6 +7,10 @@ const ExternalMediaInliner = require('../index'); describe('ExternalMediaInliner', function () { let logging; let GIF1x1; + let postModelStub; + let postMetaModelStub; + let tagModelStub; + let userModelStub; beforeEach(function () { // use a 1x1 gif in nock responses because it's really small and easy to work with @@ -16,6 +20,35 @@ describe('ExternalMediaInliner', function () { error: sinon.stub(loggingLib, 'error'), warn: sinon.stub(loggingLib, 'warn') }; + + postModelStub = { + tableName: 'posts', + findPage: sinon.stub().resolves({ + data: [] + }), + edit: sinon.stub().resolves() + }; + postMetaModelStub = { + tableName: 'posts_meta', + findPage: sinon.stub().resolves({ + data: [] + }), + edit: sinon.stub().resolves() + }; + tagModelStub = { + tableName: 'tags', + findPage: sinon.stub().resolves({ + data: [] + }), + edit: sinon.stub().resolves() + }; + userModelStub = { + tableName: 'users', + findPage: sinon.stub().resolves({ + data: [] + }), + edit: sinon.stub().resolves() + }; }); afterEach(function () { @@ -34,20 +67,24 @@ describe('ExternalMediaInliner', function () { .get('/files/f/image.jpg') .reply(200, GIF1x1); - const postModelStub = { + const postModelInstanceStub = { id: 'inlined-post-id', get: sinon.stub() .withArgs('mobiledoc') .returns(`{"version":"0.3.1","atoms":[],"cards":[["image",{"src":"${imageURL}"}]]}`) }; - const postModelMock = { + postModelStub = { findPage: sinon.stub().returns({ - data: [postModelStub] + data: [postModelInstanceStub] }), edit: sinon.stub().resolves() }; + const inliner = new ExternalMediaInliner({ - PostModel: postModelMock, + PostModel: postModelStub, + PostMetaModel: postMetaModelStub, + TagModel: tagModelStub, + UserModel: userModelStub, getMediaStorage: sinon.stub().withArgs('.jpg').returns({ getTargetDir: () => '/content/images', getUniqueFileName: () => '/content/images/unique-image.jpg', @@ -58,11 +95,14 @@ describe('ExternalMediaInliner', function () { await inliner.inline(['https://img.stockfresh.com']); assert.ok(requestMock.isDone()); - assert.ok(postModelMock.edit.calledOnce); - assert.ok(postModelMock.edit.calledWith({ + assert.ok(postModelStub.edit.calledOnce); + assert.ok(postModelStub.edit.calledWith({ mobiledoc: '{"version":"0.3.1","atoms":[],"cards":[["image",{"src":"__GHOST_URL__/content/images/unique-image.jpg"}]]}' }, { - id: 'inlined-post-id' + id: 'inlined-post-id', + context: { + internal: true + } })); }); @@ -71,50 +111,86 @@ describe('ExternalMediaInliner', function () { const requestMock = nock('https://img.stockfresh.com') .get('/files/f/image.jpg') .reply(404); - const postModelStub = { + const postModelInstanceStub = { id: 'inlined-post-id', get: sinon.stub() .withArgs('mobiledoc') .returns(`{"version":"0.3.1","atoms":[],"cards":[["image",{"src":"${imageURL}"}]]}`) }; - const postModelMock = { + postModelStub = { findPage: sinon.stub().returns({ - data: [postModelStub] + data: [postModelInstanceStub] }) }; const inliner = new ExternalMediaInliner({ - PostModel: postModelMock + PostModel: postModelStub, + PostMetaModel: postMetaModelStub, + TagModel: tagModelStub, + UserModel: userModelStub }); await inliner.inline(['https://img.stockfresh.com']); assert.ok(requestMock.isDone()); - assert.ok(logging.error.calledTwice); assert.equal(logging.error.args[0][0], 'Error downloading remote media: https://img.stockfresh.com/files/f/image.jpg'); }); + it('logs an error when fetching an external media for simple fields fails', async function () { + const imageURL = 'https://img.stockfresh.com/files/f/simple-image.jpg'; + const requestMock = nock('https://img.stockfresh.com') + .get('/files/f/simple-image.jpg') + .reply(500); + const userModelInstanceStub = { + id: 'inlined-user-id', + get: sinon.stub() + .withArgs('profile_image') + .returns(imageURL) + }; + + userModelStub = { + findPage: sinon.stub().returns({ + data: [userModelInstanceStub] + }) + }; + + const inliner = new ExternalMediaInliner({ + PostModel: postModelStub, + PostMetaModel: postMetaModelStub, + TagModel: tagModelStub, + UserModel: userModelStub + }); + + await inliner.inline(['https://img.stockfresh.com']); + + assert.ok(requestMock.isDone()); + assert.equal(logging.error.args[0][0], 'Error downloading remote media: https://img.stockfresh.com/files/f/simple-image.jpg'); + }); + it('logs a warning when no suitable storage adapter found for inlined media extension', async function () { const fileURL = 'https://img.stockfresh.com/files/f/inlined.exe'; const requestMock = nock('https://img.stockfresh.com') .get('/files/f/inlined.exe') .reply(200, GIF1x1); - const postModelStub = { + const postModelInstanceStub = { id: 'inlined-post-id', get: sinon.stub() .withArgs('mobiledoc') .returns(`{"version":"0.3.1","atoms":[],"cards":[["image",{"src":"${fileURL}"}]]}`) }; - const postModelMock = { + postModelStub = { findPage: sinon.stub().returns({ - data: [postModelStub] + data: [postModelInstanceStub] }), edit: sinon.stub().resolves() }; const inliner = new ExternalMediaInliner({ - PostModel: postModelMock, + PostModel: postModelStub, + PostMetaModel: postMetaModelStub, + TagModel: tagModelStub, + UserModel: userModelStub, getMediaStorage: sinon.stub().withArgs('.exe').returns(null) }); @@ -131,20 +207,23 @@ describe('ExternalMediaInliner', function () { .get('/files/f/image.jpg') .reply(200, GIF1x1); - const postModelStub = { + postModelStub = { id: 'errored-post-id', get: sinon.stub() .withArgs('mobiledoc') .returns(`{"version":"0.3.1","atoms":[],"cards":[["image",{"src":"${imageURL}"}]]}`) }; - const postModelMock = { + postModelStub = { findPage: sinon.stub().returns({ data: [postModelStub] }), edit: sinon.stub().throws(new Error('Error saving the post')) }; const inliner = new ExternalMediaInliner({ - PostModel: postModelMock, + PostModel: postModelStub, + PostMetaModel: postMetaModelStub, + TagModel: tagModelStub, + UserModel: userModelStub, getMediaStorage: sinon.stub().withArgs('.jpg').returns({ getTargetDir: () => '/content/images', getUniqueFileName: () => '/content/images/unique-image.jpg', @@ -155,9 +234,226 @@ describe('ExternalMediaInliner', function () { await inliner.inline(['https://img.stockfresh.com']); assert.ok(requestMock.isDone()); - assert.ok(postModelMock.edit.calledOnce); - assert.ok(logging.error.calledTwice); + assert.ok(postModelStub.edit.calledOnce); assert.equal(logging.error.args[0][0], 'Error inlining media for post: errored-post-id'); }); + + it('logs an error when handling tag simple fields inlining throws an error', async function (){ + const imageURL = 'https://img.stockfresh.com/files/f/simple-image.jpg'; + const requestMock = nock('https://img.stockfresh.com') + .get('/files/f/simple-image.jpg') + .reply(200, GIF1x1); + + const getMethodStub = sinon.stub(); + getMethodStub.withArgs('feature_image').returns(imageURL); + getMethodStub.withArgs('og_image').returns(null); + getMethodStub.withArgs('twitter_image').returns(null); + + const tagModelInstanceStub = { + id: 'errored-tag-id', + get: getMethodStub + }; + tagModelStub.findPage = sinon.stub().returns({ + data: [tagModelInstanceStub] + }); + tagModelStub.edit = sinon.stub().throws(new Error('Error saving the tag')); + + const inliner = new ExternalMediaInliner({ + PostModel: postModelStub, + PostMetaModel: postMetaModelStub, + TagModel: tagModelStub, + UserModel: userModelStub, + getMediaStorage: sinon.stub().withArgs('.jpg').returns({ + getTargetDir: () => '/content/images', + getUniqueFileName: () => '/content/images/unique-image.jpg', + saveRaw: () => '/content/images/unique-image.jpg' + }) + }); + + await inliner.inline(['https://img.stockfresh.com']); + + assert.ok(requestMock.isDone()); + assert.ok(tagModelStub.edit.calledOnce); + assert.equal(logging.error.args[0][0], 'Error inlining media for tags: errored-tag-id'); + }); + + it('inlines image in the post\'s feature_image field', async function () { + const imageURL = 'https://img.stockfresh.com/files/f/posts_feature_image.jpg'; + const requestMock = nock('https://img.stockfresh.com') + .get('/files/f/posts_feature_image.jpg') + .reply(200, GIF1x1); + + postModelStub = { + id: 'inlined-post-id', + get: sinon.stub() + .withArgs('feature_image') + .returns(imageURL) + }; + const postModelMock = { + findPage: sinon.stub().returns({ + data: [postModelStub] + }), + edit: sinon.stub().resolves() + }; + const inliner = new ExternalMediaInliner({ + PostModel: postModelMock, + PostMetaModel: postMetaModelStub, + TagModel: tagModelStub, + UserModel: userModelStub, + getMediaStorage: sinon.stub().withArgs('.jpg').returns({ + getTargetDir: () => '/content/images', + getUniqueFileName: () => '/content/images/unique-feature-image.jpg', + saveRaw: () => '/content/images/unique-feature-image.jpg' + }) + }); + + await inliner.inline(['https://img.stockfresh.com']); + + assert.ok(requestMock.isDone()); + assert.ok(postModelMock.edit.calledOnce); + assert.ok(postModelMock.edit.calledWith({ + feature_image: '__GHOST_URL__/content/images/unique-feature-image.jpg' + }, { + id: 'inlined-post-id', + context: { + internal: true + } + })); + }); + + it('inlines og_image image in posts_meta table', async function () { + const imageURL = 'https://img.stockfresh.com/files/f/posts_meta_image.jpg'; + const requestMock = nock('https://img.stockfresh.com') + .get('/files/f/posts_meta_image.jpg') + .reply(200, GIF1x1); + + const getMethodStub = sinon.stub(); + getMethodStub.withArgs('og_image').returns(imageURL); + getMethodStub.withArgs('twitter_image').returns(null); + const postsMetaModelInstanceStub = { + id: 'inlined-post-meta-id', + get: getMethodStub + }; + + postMetaModelStub.findPage = sinon.stub().resolves({ + data: [postsMetaModelInstanceStub] + }); + + const inliner = new ExternalMediaInliner({ + PostModel: postModelStub, + PostMetaModel: postMetaModelStub, + TagModel: tagModelStub, + UserModel: userModelStub, + getMediaStorage: sinon.stub().withArgs('.jpg').returns({ + getTargetDir: () => '/content/images', + getUniqueFileName: () => '/content/images/unique-posts-meta-image.jpg', + saveRaw: () => '/content/images/unique-posts-meta-image.jpg' + }) + }); + + await inliner.inline(['https://img.stockfresh.com']); + + assert.ok(requestMock.isDone()); + assert.ok(postMetaModelStub.edit.calledOnce); + assert.deepEqual(postMetaModelStub.edit.args[0][0], { + og_image: '__GHOST_URL__/content/images/unique-posts-meta-image.jpg' + }); + assert.deepEqual(postMetaModelStub.edit.args[0][1], { + id: 'inlined-post-meta-id', + context: { + internal: true + } + }); + }); + + it('inlines twitter_image image in tags table', async function () { + const imageURL = 'https://img.stockfresh.com/files/f/tag_twitter_image.jpg'; + const requestMock = nock('https://img.stockfresh.com') + .get('/files/f/tag_twitter_image.jpg') + .reply(200, GIF1x1); + + const getMethodStub = sinon.stub(); + getMethodStub.withArgs('twitter_image').returns(imageURL); + getMethodStub.returns(null); + const tagModelInstanceStub = { + id: 'inlined-tag-id', + get: getMethodStub + }; + + tagModelStub.findPage = sinon.stub().resolves({ + data: [tagModelInstanceStub] + }); + + const inliner = new ExternalMediaInliner({ + PostModel: postModelStub, + PostMetaModel: postMetaModelStub, + TagModel: tagModelStub, + UserModel: userModelStub, + getMediaStorage: sinon.stub().withArgs('.jpg').returns({ + getTargetDir: () => '/content/images', + getUniqueFileName: () => '/content/images/unique-tag-twitter-image.jpg', + saveRaw: () => '/content/images/unique-tag-twitter-image.jpg' + }) + }); + + await inliner.inline(['https://img.stockfresh.com']); + + assert.ok(requestMock.isDone()); + assert.ok(tagModelStub.edit.calledOnce); + assert.deepEqual(tagModelStub.edit.args[0][0], { + twitter_image: '__GHOST_URL__/content/images/unique-tag-twitter-image.jpg' + }); + assert.deepEqual(tagModelStub.edit.args[0][1], { + id: 'inlined-tag-id', + context: { + internal: true + } + }); + }); + + it('inlines cover_image image in users table', async function () { + const imageURL = 'https://img.stockfresh.com/files/f/user_cover_image.jpg'; + const requestMock = nock('https://img.stockfresh.com') + .get('/files/f/user_cover_image.jpg') + .reply(200, GIF1x1); + + const getMethodStub = sinon.stub(); + getMethodStub.withArgs('cover_image').returns(imageURL); + getMethodStub.returns(null); + const userModelInstanceStub = { + id: 'inlined-user-id', + get: getMethodStub + }; + + userModelStub.findPage = sinon.stub().resolves({ + data: [userModelInstanceStub] + }); + + const inliner = new ExternalMediaInliner({ + PostModel: postModelStub, + PostMetaModel: postMetaModelStub, + TagModel: tagModelStub, + UserModel: userModelStub, + getMediaStorage: sinon.stub().withArgs('.jpg').returns({ + getTargetDir: () => '/content/images', + getUniqueFileName: () => '/content/images/user-cover-image.jpg', + saveRaw: () => '/content/images/user-cover-image.jpg' + }) + }); + + await inliner.inline(['https://img.stockfresh.com']); + + assert.ok(requestMock.isDone()); + assert.ok(userModelStub.edit.calledOnce); + assert.deepEqual(userModelStub.edit.args[0][0], { + cover_image: '__GHOST_URL__/content/images/user-cover-image.jpg' + }); + assert.deepEqual(userModelStub.edit.args[0][1], { + id: 'inlined-user-id', + context: { + internal: true + } + }); + }); }); });