Added media inliner for mobiledoc content
refs https://github.com/TryGhost/Toolbox/issues/523 - This is a first pass media inliner going through all posts and checking to inline media from specified domains - As a working copy the inliner looks for image content from Revue and Substack
This commit is contained in:
parent
0b2f88c100
commit
2ce992ed00
@ -1,12 +1,47 @@
|
||||
module.exports = {
|
||||
async init() {
|
||||
const debug = require('@tryghost/debug')('mediaInliner');
|
||||
const MediaInliner = require('@tryghost/external-media-inliner');
|
||||
const models = require('../../models');
|
||||
|
||||
const mediaStorage = require('../../adapters/storage').getStorage('media');
|
||||
const imageStorage = require('../../adapters/storage').getStorage('images');
|
||||
const fileStorage = require('../../adapters/storage').getStorage('files');
|
||||
|
||||
const config = require('../../../shared/config');
|
||||
|
||||
const mediaInliner = new MediaInliner({
|
||||
PostModel: models.Post,
|
||||
getMediaStorage: (extension) => {
|
||||
if (config.get('uploads').images.extensions.includes(extension)) {
|
||||
return imageStorage;
|
||||
} else if (config.get('uploads').media.extensions.includes(extension)) {
|
||||
return mediaStorage;
|
||||
} else if (config.get('uploads').files.extensions.includes(extension)) {
|
||||
return fileStorage;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
this.api = {
|
||||
// @NOTE: the inlining should become an offloaded job
|
||||
// startMediaInliner: mediaInliner.inlineMedia
|
||||
|
||||
startMediaInliner: (domains) => {
|
||||
if (!domains || !domains.length) {
|
||||
// default domains to inline from if none are provided
|
||||
domains = [
|
||||
'https://s3.amazonaws.com/revue',
|
||||
'https://substackcdn.com'
|
||||
];
|
||||
}
|
||||
|
||||
debug('[Inliner] Starting media inlining job for domains: ', domains);
|
||||
|
||||
// @NOTE: the inlining should become an offloaded job
|
||||
// startMediaInliner: mediaInliner.inlineMedia
|
||||
mediaInliner.inline(domains);
|
||||
|
||||
return {
|
||||
status: 'success'
|
||||
};
|
||||
|
@ -84,6 +84,7 @@
|
||||
"@tryghost/errors": "1.2.21",
|
||||
"@tryghost/event-aware-cache-wrapper": "0.0.0",
|
||||
"@tryghost/express-dynamic-redirects": "0.0.0",
|
||||
"@tryghost/external-media-inliner": "0.0.0",
|
||||
"@tryghost/helpers": "1.1.75",
|
||||
"@tryghost/html-to-plaintext": "0.0.0",
|
||||
"@tryghost/http-cache-utils": "0.1.7",
|
||||
|
@ -1,5 +1,139 @@
|
||||
class ExternalMediaInliner {
|
||||
const mime = require('mime-types');
|
||||
const request = require('@tryghost/request');
|
||||
const errors = require('@tryghost/errors');
|
||||
const logging = require('@tryghost/logging');
|
||||
|
||||
class ExternalMediaInliner {
|
||||
/** @type {object} */
|
||||
#PostModel;
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {Object} deps
|
||||
* @param {Object} deps.PostModel - Post model
|
||||
* @param {(extension) => import('ghost-storage-base')} deps.getMediaStorage - getMediaStorage
|
||||
*/
|
||||
constructor(deps) {
|
||||
this.#PostModel = deps.PostModel;
|
||||
this.getMediaStorage = deps.getMediaStorage;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {string} requestURL - url of remote media
|
||||
* @returns {Promise<Object>}
|
||||
*/
|
||||
async #getRemoteMedia(requestURL) {
|
||||
try {
|
||||
return await request(requestURL, {
|
||||
followRedirect: true,
|
||||
encoding: null
|
||||
});
|
||||
} catch (error) {
|
||||
// NOTE: add special case for 404s
|
||||
logging.error(`Error downloading remote media: ${requestURL}`);
|
||||
logging.error(new errors.DataImportError({
|
||||
err: error
|
||||
}));
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {Object} response - response from request
|
||||
* @returns {Object}
|
||||
*/
|
||||
#extractFileDataFromResponse(requestURL, response) {
|
||||
const headers = response.headers;
|
||||
const contentType = headers['content-type'];
|
||||
|
||||
const filename = requestURL
|
||||
.split('/')
|
||||
.pop()
|
||||
.split('#')[0]
|
||||
.split('?')[0];
|
||||
|
||||
const extension = mime.extension(contentType) || filename.split('.').pop();
|
||||
|
||||
return {
|
||||
fileBuffer: response.body,
|
||||
filename: filename,
|
||||
extension: `.${extension}`
|
||||
};
|
||||
}
|
||||
|
||||
async #inlinePost(mobiledoc, domains) {
|
||||
for (const domain of domains) {
|
||||
const regex = new RegExp(`"src":"(${domain}.*?)"`, 'igm');
|
||||
const matches = mobiledoc.matchAll(regex);
|
||||
|
||||
for (const [,src] of matches) {
|
||||
const response = await this.#getRemoteMedia(src);
|
||||
|
||||
let media;
|
||||
if (response) {
|
||||
media = this.#extractFileDataFromResponse(src, response);
|
||||
}
|
||||
|
||||
if (media) {
|
||||
const storage = this.getMediaStorage(media.extension);
|
||||
|
||||
if (!storage) {
|
||||
logging.warn(`No storage adapter found for file extension: ${media.extension}`);
|
||||
} else {
|
||||
const targetDir = storage.getTargetDir(storage.storagePath);
|
||||
const uniqueFileName = await storage.getUniqueFileName({
|
||||
name: media.filename
|
||||
}, targetDir);
|
||||
const filePath = await storage.saveRaw(media.fileBuffer, uniqueFileName);
|
||||
const inlinedSrc = `__GHOST_URL__${filePath}`;
|
||||
|
||||
// NOTE: does not account for duplicate images in mobiledoc
|
||||
// in those cases would be processed twice
|
||||
mobiledoc = mobiledoc.replace(src, inlinedSrc);
|
||||
logging.info('Inlined media: ', src, ' -> ', inlinedSrc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return mobiledoc;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {string[]} domains domains to inline media from
|
||||
*/
|
||||
async inline(domains) {
|
||||
const {data: posts} = await this.#PostModel.findPage({
|
||||
limit: 'all',
|
||||
status: 'all'
|
||||
});
|
||||
|
||||
logging.info('Starting inlining external media for posts: ', posts?.length);
|
||||
for (const post of posts) {
|
||||
try {
|
||||
const inlinedMobiledoc = await this.#inlinePost(post.get('mobiledoc'), domains);
|
||||
|
||||
if (inlinedMobiledoc !== post.get('mobiledoc')) {
|
||||
await this.#PostModel.edit({
|
||||
mobiledoc: inlinedMobiledoc
|
||||
}, {
|
||||
id: post.id
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
logging.error(`Error inlining media for post: ${post.id}`);
|
||||
logging.error(new errors.DataImportError({
|
||||
err
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
logging.info('Finished inlining external media');
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = ExternalMediaInliner;
|
||||
|
@ -22,5 +22,7 @@
|
||||
"mocha": "10.2.0",
|
||||
"sinon": "15.0.1"
|
||||
},
|
||||
"dependencies": {}
|
||||
"dependencies": {
|
||||
"mime-types": "2.1.35"
|
||||
}
|
||||
}
|
||||
|
@ -1,8 +1,163 @@
|
||||
const assert = require('assert');
|
||||
const sinon = require('sinon');
|
||||
const nock = require('nock');
|
||||
const loggingLib = require('@tryghost/logging');
|
||||
const ExternalMediaInliner = require('../index');
|
||||
|
||||
describe('ExternalMediaInliner', function () {
|
||||
it('Creates an instance', function () {
|
||||
assert.ok(new ExternalMediaInliner());
|
||||
let logging;
|
||||
let GIF1x1;
|
||||
|
||||
beforeEach(function () {
|
||||
// use a 1x1 gif in nock responses because it's really small and easy to work with
|
||||
GIF1x1 = Buffer.from('R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==', 'base64');
|
||||
logging = {
|
||||
info: sinon.stub(loggingLib, 'info'),
|
||||
error: sinon.stub(loggingLib, 'error'),
|
||||
warn: sinon.stub(loggingLib, 'warn')
|
||||
};
|
||||
});
|
||||
|
||||
afterEach(function () {
|
||||
sinon.restore();
|
||||
nock.cleanAll();
|
||||
});
|
||||
|
||||
it('Creates an External Media Inliner instance', function () {
|
||||
assert.ok(new ExternalMediaInliner({}));
|
||||
});
|
||||
|
||||
describe('inline', function () {
|
||||
it('inlines image in the post\'s mobiledoc content', async function () {
|
||||
const imageURL = 'https://img.stockfresh.com/files/f/image.jpg';
|
||||
const requestMock = nock('https://img.stockfresh.com')
|
||||
.get('/files/f/image.jpg')
|
||||
.reply(200, GIF1x1);
|
||||
|
||||
const postModelStub = {
|
||||
id: 'inlined-post-id',
|
||||
get: sinon.stub()
|
||||
.withArgs('mobiledoc')
|
||||
.returns(`{"version":"0.3.1","atoms":[],"cards":[["image",{"src":"${imageURL}"}]]}`)
|
||||
};
|
||||
const postModelMock = {
|
||||
findPage: sinon.stub().returns({
|
||||
data: [postModelStub]
|
||||
}),
|
||||
edit: sinon.stub().resolves()
|
||||
};
|
||||
const inliner = new ExternalMediaInliner({
|
||||
PostModel: postModelMock,
|
||||
getMediaStorage: sinon.stub().withArgs('.jpg').returns({
|
||||
getTargetDir: () => '/content/images',
|
||||
getUniqueFileName: () => '/content/images/unique-image.jpg',
|
||||
saveRaw: () => '/content/images/unique-image.jpg'
|
||||
})
|
||||
});
|
||||
|
||||
await inliner.inline(['https://img.stockfresh.com']);
|
||||
|
||||
assert.ok(requestMock.isDone());
|
||||
assert.ok(postModelMock.edit.calledOnce);
|
||||
assert.ok(postModelMock.edit.calledWith({
|
||||
mobiledoc: '{"version":"0.3.1","atoms":[],"cards":[["image",{"src":"__GHOST_URL__/content/images/unique-image.jpg"}]]}'
|
||||
}, {
|
||||
id: 'inlined-post-id'
|
||||
}));
|
||||
});
|
||||
|
||||
it('logs an error when fetching an external media fails', async function () {
|
||||
const imageURL = 'https://img.stockfresh.com/files/f/image.jpg';
|
||||
const requestMock = nock('https://img.stockfresh.com')
|
||||
.get('/files/f/image.jpg')
|
||||
.reply(404);
|
||||
const postModelStub = {
|
||||
id: 'inlined-post-id',
|
||||
get: sinon.stub()
|
||||
.withArgs('mobiledoc')
|
||||
.returns(`{"version":"0.3.1","atoms":[],"cards":[["image",{"src":"${imageURL}"}]]}`)
|
||||
};
|
||||
|
||||
const postModelMock = {
|
||||
findPage: sinon.stub().returns({
|
||||
data: [postModelStub]
|
||||
})
|
||||
};
|
||||
|
||||
const inliner = new ExternalMediaInliner({
|
||||
PostModel: postModelMock
|
||||
});
|
||||
|
||||
await inliner.inline(['https://img.stockfresh.com']);
|
||||
|
||||
assert.ok(requestMock.isDone());
|
||||
assert.ok(logging.error.calledTwice);
|
||||
assert.equal(logging.error.args[0][0], 'Error downloading remote media: https://img.stockfresh.com/files/f/image.jpg');
|
||||
});
|
||||
|
||||
it('logs a warning when no suitable storage adapter found for inlined media extension', async function () {
|
||||
const fileURL = 'https://img.stockfresh.com/files/f/inlined.exe';
|
||||
const requestMock = nock('https://img.stockfresh.com')
|
||||
.get('/files/f/inlined.exe')
|
||||
.reply(200, GIF1x1);
|
||||
|
||||
const postModelStub = {
|
||||
id: 'inlined-post-id',
|
||||
get: sinon.stub()
|
||||
.withArgs('mobiledoc')
|
||||
.returns(`{"version":"0.3.1","atoms":[],"cards":[["image",{"src":"${fileURL}"}]]}`)
|
||||
};
|
||||
const postModelMock = {
|
||||
findPage: sinon.stub().returns({
|
||||
data: [postModelStub]
|
||||
}),
|
||||
edit: sinon.stub().resolves()
|
||||
};
|
||||
const inliner = new ExternalMediaInliner({
|
||||
PostModel: postModelMock,
|
||||
getMediaStorage: sinon.stub().withArgs('.exe').returns(null)
|
||||
});
|
||||
|
||||
await inliner.inline(['https://img.stockfresh.com']);
|
||||
|
||||
assert.ok(requestMock.isDone());
|
||||
assert.ok(logging.warn.calledOnce);
|
||||
assert.equal(logging.warn.args[0][0], 'No storage adapter found for file extension: .exe');
|
||||
});
|
||||
|
||||
it('logs an error when handling post inlining throws an error', async function (){
|
||||
const imageURL = 'https://img.stockfresh.com/files/f/image.jpg';
|
||||
const requestMock = nock('https://img.stockfresh.com')
|
||||
.get('/files/f/image.jpg')
|
||||
.reply(200, GIF1x1);
|
||||
|
||||
const postModelStub = {
|
||||
id: 'errored-post-id',
|
||||
get: sinon.stub()
|
||||
.withArgs('mobiledoc')
|
||||
.returns(`{"version":"0.3.1","atoms":[],"cards":[["image",{"src":"${imageURL}"}]]}`)
|
||||
};
|
||||
const postModelMock = {
|
||||
findPage: sinon.stub().returns({
|
||||
data: [postModelStub]
|
||||
}),
|
||||
edit: sinon.stub().throws(new Error('Error saving the post'))
|
||||
};
|
||||
const inliner = new ExternalMediaInliner({
|
||||
PostModel: postModelMock,
|
||||
getMediaStorage: sinon.stub().withArgs('.jpg').returns({
|
||||
getTargetDir: () => '/content/images',
|
||||
getUniqueFileName: () => '/content/images/unique-image.jpg',
|
||||
saveRaw: () => '/content/images/unique-image.jpg'
|
||||
})
|
||||
});
|
||||
|
||||
await inliner.inline(['https://img.stockfresh.com']);
|
||||
|
||||
assert.ok(requestMock.isDone());
|
||||
assert.ok(postModelMock.edit.calledOnce);
|
||||
assert.ok(logging.error.calledTwice);
|
||||
assert.equal(logging.error.args[0][0], 'Error inlining media for post: errored-post-id');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
@ -19455,7 +19455,7 @@ mime-types@2.1.18:
|
||||
dependencies:
|
||||
mime-db "~1.33.0"
|
||||
|
||||
mime-types@^2.1.12, mime-types@^2.1.18, mime-types@^2.1.26, mime-types@^2.1.27, mime-types@^2.1.31, mime-types@~2.1.17, mime-types@~2.1.19, mime-types@~2.1.24, mime-types@~2.1.34:
|
||||
mime-types@2.1.35, mime-types@^2.1.12, mime-types@^2.1.18, mime-types@^2.1.26, mime-types@^2.1.27, mime-types@^2.1.31, mime-types@~2.1.17, mime-types@~2.1.19, mime-types@~2.1.24, mime-types@~2.1.34:
|
||||
version "2.1.35"
|
||||
resolved "https://registry.yarnpkg.com/mime-types/-/mime-types-2.1.35.tgz#381a871b62a734450660ae3deee44813f70d959a"
|
||||
integrity sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==
|
||||
|
Loading…
Reference in New Issue
Block a user