Added media inliner for mobiledoc content

refs https://github.com/TryGhost/Toolbox/issues/523

- This is a first pass media inliner going through all posts and checking to inline media from specified domains
- As a working copy the inliner looks for image content from  Revue and Substack
This commit is contained in:
Naz 2023-03-03 19:08:18 +08:00
parent 0b2f88c100
commit 2ce992ed00
No known key found for this signature in database
6 changed files with 334 additions and 7 deletions

View File

@ -1,12 +1,47 @@
module.exports = {
async init() {
const debug = require('@tryghost/debug')('mediaInliner');
const MediaInliner = require('@tryghost/external-media-inliner');
const models = require('../../models');
const mediaStorage = require('../../adapters/storage').getStorage('media');
const imageStorage = require('../../adapters/storage').getStorage('images');
const fileStorage = require('../../adapters/storage').getStorage('files');
const config = require('../../../shared/config');
const mediaInliner = new MediaInliner({
PostModel: models.Post,
getMediaStorage: (extension) => {
if (config.get('uploads').images.extensions.includes(extension)) {
return imageStorage;
} else if (config.get('uploads').media.extensions.includes(extension)) {
return mediaStorage;
} else if (config.get('uploads').files.extensions.includes(extension)) {
return fileStorage;
} else {
return null;
}
}
});
this.api = {
// @NOTE: the inlining should become an offloaded job
// startMediaInliner: mediaInliner.inlineMedia
startMediaInliner: (domains) => {
if (!domains || !domains.length) {
// default domains to inline from if none are provided
domains = [
'https://s3.amazonaws.com/revue',
'https://substackcdn.com'
];
}
debug('[Inliner] Starting media inlining job for domains: ', domains);
// @NOTE: the inlining should become an offloaded job
// startMediaInliner: mediaInliner.inlineMedia
mediaInliner.inline(domains);
return {
status: 'success'
};

View File

@ -84,6 +84,7 @@
"@tryghost/errors": "1.2.21",
"@tryghost/event-aware-cache-wrapper": "0.0.0",
"@tryghost/express-dynamic-redirects": "0.0.0",
"@tryghost/external-media-inliner": "0.0.0",
"@tryghost/helpers": "1.1.75",
"@tryghost/html-to-plaintext": "0.0.0",
"@tryghost/http-cache-utils": "0.1.7",

View File

@ -1,5 +1,139 @@
class ExternalMediaInliner {
const mime = require('mime-types');
const request = require('@tryghost/request');
const errors = require('@tryghost/errors');
const logging = require('@tryghost/logging');
class ExternalMediaInliner {
/** @type {object} */
#PostModel;
/**
*
* @param {Object} deps
* @param {Object} deps.PostModel - Post model
* @param {(extension) => import('ghost-storage-base')} deps.getMediaStorage - getMediaStorage
*/
constructor(deps) {
this.#PostModel = deps.PostModel;
this.getMediaStorage = deps.getMediaStorage;
}
/**
*
* @param {string} requestURL - url of remote media
* @returns {Promise<Object>}
*/
async #getRemoteMedia(requestURL) {
try {
return await request(requestURL, {
followRedirect: true,
encoding: null
});
} catch (error) {
// NOTE: add special case for 404s
logging.error(`Error downloading remote media: ${requestURL}`);
logging.error(new errors.DataImportError({
err: error
}));
return null;
}
}
/**
*
* @param {Object} response - response from request
* @returns {Object}
*/
#extractFileDataFromResponse(requestURL, response) {
const headers = response.headers;
const contentType = headers['content-type'];
const filename = requestURL
.split('/')
.pop()
.split('#')[0]
.split('?')[0];
const extension = mime.extension(contentType) || filename.split('.').pop();
return {
fileBuffer: response.body,
filename: filename,
extension: `.${extension}`
};
}
async #inlinePost(mobiledoc, domains) {
for (const domain of domains) {
const regex = new RegExp(`"src":"(${domain}.*?)"`, 'igm');
const matches = mobiledoc.matchAll(regex);
for (const [,src] of matches) {
const response = await this.#getRemoteMedia(src);
let media;
if (response) {
media = this.#extractFileDataFromResponse(src, response);
}
if (media) {
const storage = this.getMediaStorage(media.extension);
if (!storage) {
logging.warn(`No storage adapter found for file extension: ${media.extension}`);
} else {
const targetDir = storage.getTargetDir(storage.storagePath);
const uniqueFileName = await storage.getUniqueFileName({
name: media.filename
}, targetDir);
const filePath = await storage.saveRaw(media.fileBuffer, uniqueFileName);
const inlinedSrc = `__GHOST_URL__${filePath}`;
// NOTE: does not account for duplicate images in mobiledoc
// in those cases would be processed twice
mobiledoc = mobiledoc.replace(src, inlinedSrc);
logging.info('Inlined media: ', src, ' -> ', inlinedSrc);
}
}
}
}
return mobiledoc;
}
/**
*
* @param {string[]} domains domains to inline media from
*/
async inline(domains) {
const {data: posts} = await this.#PostModel.findPage({
limit: 'all',
status: 'all'
});
logging.info('Starting inlining external media for posts: ', posts?.length);
for (const post of posts) {
try {
const inlinedMobiledoc = await this.#inlinePost(post.get('mobiledoc'), domains);
if (inlinedMobiledoc !== post.get('mobiledoc')) {
await this.#PostModel.edit({
mobiledoc: inlinedMobiledoc
}, {
id: post.id
});
}
} catch (err) {
logging.error(`Error inlining media for post: ${post.id}`);
logging.error(new errors.DataImportError({
err
}));
}
}
logging.info('Finished inlining external media');
}
}
module.exports = ExternalMediaInliner;

View File

@ -22,5 +22,7 @@
"mocha": "10.2.0",
"sinon": "15.0.1"
},
"dependencies": {}
"dependencies": {
"mime-types": "2.1.35"
}
}

View File

@ -1,8 +1,163 @@
const assert = require('assert');
const sinon = require('sinon');
const nock = require('nock');
const loggingLib = require('@tryghost/logging');
const ExternalMediaInliner = require('../index');
describe('ExternalMediaInliner', function () {
it('Creates an instance', function () {
assert.ok(new ExternalMediaInliner());
let logging;
let GIF1x1;
beforeEach(function () {
// use a 1x1 gif in nock responses because it's really small and easy to work with
GIF1x1 = Buffer.from('R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==', 'base64');
logging = {
info: sinon.stub(loggingLib, 'info'),
error: sinon.stub(loggingLib, 'error'),
warn: sinon.stub(loggingLib, 'warn')
};
});
afterEach(function () {
sinon.restore();
nock.cleanAll();
});
it('Creates an External Media Inliner instance', function () {
assert.ok(new ExternalMediaInliner({}));
});
describe('inline', function () {
it('inlines image in the post\'s mobiledoc content', async function () {
const imageURL = 'https://img.stockfresh.com/files/f/image.jpg';
const requestMock = nock('https://img.stockfresh.com')
.get('/files/f/image.jpg')
.reply(200, GIF1x1);
const postModelStub = {
id: 'inlined-post-id',
get: sinon.stub()
.withArgs('mobiledoc')
.returns(`{"version":"0.3.1","atoms":[],"cards":[["image",{"src":"${imageURL}"}]]}`)
};
const postModelMock = {
findPage: sinon.stub().returns({
data: [postModelStub]
}),
edit: sinon.stub().resolves()
};
const inliner = new ExternalMediaInliner({
PostModel: postModelMock,
getMediaStorage: sinon.stub().withArgs('.jpg').returns({
getTargetDir: () => '/content/images',
getUniqueFileName: () => '/content/images/unique-image.jpg',
saveRaw: () => '/content/images/unique-image.jpg'
})
});
await inliner.inline(['https://img.stockfresh.com']);
assert.ok(requestMock.isDone());
assert.ok(postModelMock.edit.calledOnce);
assert.ok(postModelMock.edit.calledWith({
mobiledoc: '{"version":"0.3.1","atoms":[],"cards":[["image",{"src":"__GHOST_URL__/content/images/unique-image.jpg"}]]}'
}, {
id: 'inlined-post-id'
}));
});
it('logs an error when fetching an external media fails', async function () {
const imageURL = 'https://img.stockfresh.com/files/f/image.jpg';
const requestMock = nock('https://img.stockfresh.com')
.get('/files/f/image.jpg')
.reply(404);
const postModelStub = {
id: 'inlined-post-id',
get: sinon.stub()
.withArgs('mobiledoc')
.returns(`{"version":"0.3.1","atoms":[],"cards":[["image",{"src":"${imageURL}"}]]}`)
};
const postModelMock = {
findPage: sinon.stub().returns({
data: [postModelStub]
})
};
const inliner = new ExternalMediaInliner({
PostModel: postModelMock
});
await inliner.inline(['https://img.stockfresh.com']);
assert.ok(requestMock.isDone());
assert.ok(logging.error.calledTwice);
assert.equal(logging.error.args[0][0], 'Error downloading remote media: https://img.stockfresh.com/files/f/image.jpg');
});
it('logs a warning when no suitable storage adapter found for inlined media extension', async function () {
const fileURL = 'https://img.stockfresh.com/files/f/inlined.exe';
const requestMock = nock('https://img.stockfresh.com')
.get('/files/f/inlined.exe')
.reply(200, GIF1x1);
const postModelStub = {
id: 'inlined-post-id',
get: sinon.stub()
.withArgs('mobiledoc')
.returns(`{"version":"0.3.1","atoms":[],"cards":[["image",{"src":"${fileURL}"}]]}`)
};
const postModelMock = {
findPage: sinon.stub().returns({
data: [postModelStub]
}),
edit: sinon.stub().resolves()
};
const inliner = new ExternalMediaInliner({
PostModel: postModelMock,
getMediaStorage: sinon.stub().withArgs('.exe').returns(null)
});
await inliner.inline(['https://img.stockfresh.com']);
assert.ok(requestMock.isDone());
assert.ok(logging.warn.calledOnce);
assert.equal(logging.warn.args[0][0], 'No storage adapter found for file extension: .exe');
});
it('logs an error when handling post inlining throws an error', async function (){
const imageURL = 'https://img.stockfresh.com/files/f/image.jpg';
const requestMock = nock('https://img.stockfresh.com')
.get('/files/f/image.jpg')
.reply(200, GIF1x1);
const postModelStub = {
id: 'errored-post-id',
get: sinon.stub()
.withArgs('mobiledoc')
.returns(`{"version":"0.3.1","atoms":[],"cards":[["image",{"src":"${imageURL}"}]]}`)
};
const postModelMock = {
findPage: sinon.stub().returns({
data: [postModelStub]
}),
edit: sinon.stub().throws(new Error('Error saving the post'))
};
const inliner = new ExternalMediaInliner({
PostModel: postModelMock,
getMediaStorage: sinon.stub().withArgs('.jpg').returns({
getTargetDir: () => '/content/images',
getUniqueFileName: () => '/content/images/unique-image.jpg',
saveRaw: () => '/content/images/unique-image.jpg'
})
});
await inliner.inline(['https://img.stockfresh.com']);
assert.ok(requestMock.isDone());
assert.ok(postModelMock.edit.calledOnce);
assert.ok(logging.error.calledTwice);
assert.equal(logging.error.args[0][0], 'Error inlining media for post: errored-post-id');
});
});
});

View File

@ -19455,7 +19455,7 @@ mime-types@2.1.18:
dependencies:
mime-db "~1.33.0"
mime-types@^2.1.12, mime-types@^2.1.18, mime-types@^2.1.26, mime-types@^2.1.27, mime-types@^2.1.31, mime-types@~2.1.17, mime-types@~2.1.19, mime-types@~2.1.24, mime-types@~2.1.34:
mime-types@2.1.35, mime-types@^2.1.12, mime-types@^2.1.18, mime-types@^2.1.26, mime-types@^2.1.27, mime-types@^2.1.31, mime-types@~2.1.17, mime-types@~2.1.19, mime-types@~2.1.24, mime-types@~2.1.34:
version "2.1.35"
resolved "https://registry.yarnpkg.com/mime-types/-/mime-types-2.1.35.tgz#381a871b62a734450660ae3deee44813f70d959a"
integrity sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==