From 2ce992ed003bb977cdfe727ce3c6b0784ea702da Mon Sep 17 00:00:00 2001 From: Naz Date: Fri, 3 Mar 2023 19:08:18 +0800 Subject: [PATCH] Added media inliner for mobiledoc content refs https://github.com/TryGhost/Toolbox/issues/523 - This is a first pass media inliner going through all posts and checking to inline media from specified domains - As a working copy the inliner looks for image content from Revue and Substack --- .../server/services/media-inliner/service.js | 39 ++++- ghost/core/package.json | 1 + .../lib/ExternalMediaInliner.js | 136 ++++++++++++++- ghost/external-media-inliner/package.json | 4 +- .../test/ExternalMediaInliner.test.js | 159 +++++++++++++++++- yarn.lock | 2 +- 6 files changed, 334 insertions(+), 7 deletions(-) diff --git a/ghost/core/core/server/services/media-inliner/service.js b/ghost/core/core/server/services/media-inliner/service.js index 4d60f39ae6..47a6d90c63 100644 --- a/ghost/core/core/server/services/media-inliner/service.js +++ b/ghost/core/core/server/services/media-inliner/service.js @@ -1,12 +1,47 @@ module.exports = { async init() { const debug = require('@tryghost/debug')('mediaInliner'); + const MediaInliner = require('@tryghost/external-media-inliner'); + const models = require('../../models'); + + const mediaStorage = require('../../adapters/storage').getStorage('media'); + const imageStorage = require('../../adapters/storage').getStorage('images'); + const fileStorage = require('../../adapters/storage').getStorage('files'); + + const config = require('../../../shared/config'); + + const mediaInliner = new MediaInliner({ + PostModel: models.Post, + getMediaStorage: (extension) => { + if (config.get('uploads').images.extensions.includes(extension)) { + return imageStorage; + } else if (config.get('uploads').media.extensions.includes(extension)) { + return mediaStorage; + } else if (config.get('uploads').files.extensions.includes(extension)) { + return fileStorage; + } else { + return null; + } + } + }); this.api = { - // @NOTE: the inlining should become an offloaded job - // startMediaInliner: mediaInliner.inlineMedia + startMediaInliner: (domains) => { + if (!domains || !domains.length) { + // default domains to inline from if none are provided + domains = [ + 'https://s3.amazonaws.com/revue', + 'https://substackcdn.com' + ]; + } + debug('[Inliner] Starting media inlining job for domains: ', domains); + + // @NOTE: the inlining should become an offloaded job + // startMediaInliner: mediaInliner.inlineMedia + mediaInliner.inline(domains); + return { status: 'success' }; diff --git a/ghost/core/package.json b/ghost/core/package.json index fdc79d53b0..c29126c4cc 100644 --- a/ghost/core/package.json +++ b/ghost/core/package.json @@ -84,6 +84,7 @@ "@tryghost/errors": "1.2.21", "@tryghost/event-aware-cache-wrapper": "0.0.0", "@tryghost/express-dynamic-redirects": "0.0.0", + "@tryghost/external-media-inliner": "0.0.0", "@tryghost/helpers": "1.1.75", "@tryghost/html-to-plaintext": "0.0.0", "@tryghost/http-cache-utils": "0.1.7", diff --git a/ghost/external-media-inliner/lib/ExternalMediaInliner.js b/ghost/external-media-inliner/lib/ExternalMediaInliner.js index f88e7798c0..23c7252659 100644 --- a/ghost/external-media-inliner/lib/ExternalMediaInliner.js +++ b/ghost/external-media-inliner/lib/ExternalMediaInliner.js @@ -1,5 +1,139 @@ -class ExternalMediaInliner { +const mime = require('mime-types'); +const request = require('@tryghost/request'); +const errors = require('@tryghost/errors'); +const logging = require('@tryghost/logging'); +class ExternalMediaInliner { + /** @type {object} */ + #PostModel; + + /** + * + * @param {Object} deps + * @param {Object} deps.PostModel - Post model + * @param {(extension) => import('ghost-storage-base')} deps.getMediaStorage - getMediaStorage + */ + constructor(deps) { + this.#PostModel = deps.PostModel; + this.getMediaStorage = deps.getMediaStorage; + } + + /** + * + * @param {string} requestURL - url of remote media + * @returns {Promise} + */ + async #getRemoteMedia(requestURL) { + try { + return await request(requestURL, { + followRedirect: true, + encoding: null + }); + } catch (error) { + // NOTE: add special case for 404s + logging.error(`Error downloading remote media: ${requestURL}`); + logging.error(new errors.DataImportError({ + err: error + })); + + return null; + } + } + + /** + * + * @param {Object} response - response from request + * @returns {Object} + */ + #extractFileDataFromResponse(requestURL, response) { + const headers = response.headers; + const contentType = headers['content-type']; + + const filename = requestURL + .split('/') + .pop() + .split('#')[0] + .split('?')[0]; + + const extension = mime.extension(contentType) || filename.split('.').pop(); + + return { + fileBuffer: response.body, + filename: filename, + extension: `.${extension}` + }; + } + + async #inlinePost(mobiledoc, domains) { + for (const domain of domains) { + const regex = new RegExp(`"src":"(${domain}.*?)"`, 'igm'); + const matches = mobiledoc.matchAll(regex); + + for (const [,src] of matches) { + const response = await this.#getRemoteMedia(src); + + let media; + if (response) { + media = this.#extractFileDataFromResponse(src, response); + } + + if (media) { + const storage = this.getMediaStorage(media.extension); + + if (!storage) { + logging.warn(`No storage adapter found for file extension: ${media.extension}`); + } else { + const targetDir = storage.getTargetDir(storage.storagePath); + const uniqueFileName = await storage.getUniqueFileName({ + name: media.filename + }, targetDir); + const filePath = await storage.saveRaw(media.fileBuffer, uniqueFileName); + const inlinedSrc = `__GHOST_URL__${filePath}`; + + // NOTE: does not account for duplicate images in mobiledoc + // in those cases would be processed twice + mobiledoc = mobiledoc.replace(src, inlinedSrc); + logging.info('Inlined media: ', src, ' -> ', inlinedSrc); + } + } + } + } + + return mobiledoc; + } + + /** + * + * @param {string[]} domains domains to inline media from + */ + async inline(domains) { + const {data: posts} = await this.#PostModel.findPage({ + limit: 'all', + status: 'all' + }); + + logging.info('Starting inlining external media for posts: ', posts?.length); + for (const post of posts) { + try { + const inlinedMobiledoc = await this.#inlinePost(post.get('mobiledoc'), domains); + + if (inlinedMobiledoc !== post.get('mobiledoc')) { + await this.#PostModel.edit({ + mobiledoc: inlinedMobiledoc + }, { + id: post.id + }); + } + } catch (err) { + logging.error(`Error inlining media for post: ${post.id}`); + logging.error(new errors.DataImportError({ + err + })); + } + } + + logging.info('Finished inlining external media'); + } } module.exports = ExternalMediaInliner; diff --git a/ghost/external-media-inliner/package.json b/ghost/external-media-inliner/package.json index d839b6fa70..59044c0dd8 100644 --- a/ghost/external-media-inliner/package.json +++ b/ghost/external-media-inliner/package.json @@ -22,5 +22,7 @@ "mocha": "10.2.0", "sinon": "15.0.1" }, - "dependencies": {} +"dependencies": { + "mime-types": "2.1.35" +} } diff --git a/ghost/external-media-inliner/test/ExternalMediaInliner.test.js b/ghost/external-media-inliner/test/ExternalMediaInliner.test.js index cc0b295798..ca18eba9cb 100644 --- a/ghost/external-media-inliner/test/ExternalMediaInliner.test.js +++ b/ghost/external-media-inliner/test/ExternalMediaInliner.test.js @@ -1,8 +1,163 @@ const assert = require('assert'); +const sinon = require('sinon'); +const nock = require('nock'); +const loggingLib = require('@tryghost/logging'); const ExternalMediaInliner = require('../index'); describe('ExternalMediaInliner', function () { - it('Creates an instance', function () { - assert.ok(new ExternalMediaInliner()); + let logging; + let GIF1x1; + + beforeEach(function () { + // use a 1x1 gif in nock responses because it's really small and easy to work with + GIF1x1 = Buffer.from('R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==', 'base64'); + logging = { + info: sinon.stub(loggingLib, 'info'), + error: sinon.stub(loggingLib, 'error'), + warn: sinon.stub(loggingLib, 'warn') + }; + }); + + afterEach(function () { + sinon.restore(); + nock.cleanAll(); + }); + + it('Creates an External Media Inliner instance', function () { + assert.ok(new ExternalMediaInliner({})); + }); + + describe('inline', function () { + it('inlines image in the post\'s mobiledoc content', async function () { + const imageURL = 'https://img.stockfresh.com/files/f/image.jpg'; + const requestMock = nock('https://img.stockfresh.com') + .get('/files/f/image.jpg') + .reply(200, GIF1x1); + + const postModelStub = { + id: 'inlined-post-id', + get: sinon.stub() + .withArgs('mobiledoc') + .returns(`{"version":"0.3.1","atoms":[],"cards":[["image",{"src":"${imageURL}"}]]}`) + }; + const postModelMock = { + findPage: sinon.stub().returns({ + data: [postModelStub] + }), + edit: sinon.stub().resolves() + }; + const inliner = new ExternalMediaInliner({ + PostModel: postModelMock, + getMediaStorage: sinon.stub().withArgs('.jpg').returns({ + getTargetDir: () => '/content/images', + getUniqueFileName: () => '/content/images/unique-image.jpg', + saveRaw: () => '/content/images/unique-image.jpg' + }) + }); + + await inliner.inline(['https://img.stockfresh.com']); + + assert.ok(requestMock.isDone()); + assert.ok(postModelMock.edit.calledOnce); + assert.ok(postModelMock.edit.calledWith({ + mobiledoc: '{"version":"0.3.1","atoms":[],"cards":[["image",{"src":"__GHOST_URL__/content/images/unique-image.jpg"}]]}' + }, { + id: 'inlined-post-id' + })); + }); + + it('logs an error when fetching an external media fails', async function () { + const imageURL = 'https://img.stockfresh.com/files/f/image.jpg'; + const requestMock = nock('https://img.stockfresh.com') + .get('/files/f/image.jpg') + .reply(404); + const postModelStub = { + id: 'inlined-post-id', + get: sinon.stub() + .withArgs('mobiledoc') + .returns(`{"version":"0.3.1","atoms":[],"cards":[["image",{"src":"${imageURL}"}]]}`) + }; + + const postModelMock = { + findPage: sinon.stub().returns({ + data: [postModelStub] + }) + }; + + const inliner = new ExternalMediaInliner({ + PostModel: postModelMock + }); + + await inliner.inline(['https://img.stockfresh.com']); + + assert.ok(requestMock.isDone()); + assert.ok(logging.error.calledTwice); + assert.equal(logging.error.args[0][0], 'Error downloading remote media: https://img.stockfresh.com/files/f/image.jpg'); + }); + + it('logs a warning when no suitable storage adapter found for inlined media extension', async function () { + const fileURL = 'https://img.stockfresh.com/files/f/inlined.exe'; + const requestMock = nock('https://img.stockfresh.com') + .get('/files/f/inlined.exe') + .reply(200, GIF1x1); + + const postModelStub = { + id: 'inlined-post-id', + get: sinon.stub() + .withArgs('mobiledoc') + .returns(`{"version":"0.3.1","atoms":[],"cards":[["image",{"src":"${fileURL}"}]]}`) + }; + const postModelMock = { + findPage: sinon.stub().returns({ + data: [postModelStub] + }), + edit: sinon.stub().resolves() + }; + const inliner = new ExternalMediaInliner({ + PostModel: postModelMock, + getMediaStorage: sinon.stub().withArgs('.exe').returns(null) + }); + + await inliner.inline(['https://img.stockfresh.com']); + + assert.ok(requestMock.isDone()); + assert.ok(logging.warn.calledOnce); + assert.equal(logging.warn.args[0][0], 'No storage adapter found for file extension: .exe'); + }); + + it('logs an error when handling post inlining throws an error', async function (){ + const imageURL = 'https://img.stockfresh.com/files/f/image.jpg'; + const requestMock = nock('https://img.stockfresh.com') + .get('/files/f/image.jpg') + .reply(200, GIF1x1); + + const postModelStub = { + id: 'errored-post-id', + get: sinon.stub() + .withArgs('mobiledoc') + .returns(`{"version":"0.3.1","atoms":[],"cards":[["image",{"src":"${imageURL}"}]]}`) + }; + const postModelMock = { + findPage: sinon.stub().returns({ + data: [postModelStub] + }), + edit: sinon.stub().throws(new Error('Error saving the post')) + }; + const inliner = new ExternalMediaInliner({ + PostModel: postModelMock, + getMediaStorage: sinon.stub().withArgs('.jpg').returns({ + getTargetDir: () => '/content/images', + getUniqueFileName: () => '/content/images/unique-image.jpg', + saveRaw: () => '/content/images/unique-image.jpg' + }) + }); + + await inliner.inline(['https://img.stockfresh.com']); + + assert.ok(requestMock.isDone()); + assert.ok(postModelMock.edit.calledOnce); + assert.ok(logging.error.calledTwice); + assert.equal(logging.error.args[0][0], 'Error inlining media for post: errored-post-id'); + }); }); }); diff --git a/yarn.lock b/yarn.lock index 9fd1b40a72..9d2ad09318 100644 --- a/yarn.lock +++ b/yarn.lock @@ -19455,7 +19455,7 @@ mime-types@2.1.18: dependencies: mime-db "~1.33.0" -mime-types@^2.1.12, mime-types@^2.1.18, mime-types@^2.1.26, mime-types@^2.1.27, mime-types@^2.1.31, mime-types@~2.1.17, mime-types@~2.1.19, mime-types@~2.1.24, mime-types@~2.1.34: +mime-types@2.1.35, mime-types@^2.1.12, mime-types@^2.1.18, mime-types@^2.1.26, mime-types@^2.1.27, mime-types@^2.1.31, mime-types@~2.1.17, mime-types@~2.1.19, mime-types@~2.1.24, mime-types@~2.1.34: version "2.1.35" resolved "https://registry.yarnpkg.com/mime-types/-/mime-types-2.1.35.tgz#381a871b62a734450660ae3deee44813f70d959a" integrity sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==