Extended external media inlining

refs https://github.com/TryGhost/Toolbox/issues/524

- We need to be able to inline external media in all internal resources: tags, users, post's meta fields.
- This change adds media inlining logic to all these resources
This commit is contained in:
Naz 2023-03-06 22:50:24 +08:00
parent 47b6c0b61d
commit 6d4dcdf45d
No known key found for this signature in database
3 changed files with 478 additions and 39 deletions

View File

@ -13,6 +13,9 @@ module.exports = {
const mediaInliner = new MediaInliner({
PostModel: models.Post,
TagModel: models.Tag,
UserModel: models.User,
PostMetaModel: models.PostsMeta,
getMediaStorage: (extension) => {
if (config.get('uploads').images.extensions.includes(extension)) {
return imageStorage;

View File

@ -7,14 +7,29 @@ class ExternalMediaInliner {
/** @type {object} */
#PostModel;
/** @type {object} */
#PostMetaModel;
/** @type {object} */
#TagModel;
/** @type {object} */
#UserModel;
/**
*
* @param {Object} deps
* @param {Object} deps.PostModel - Post model
* @param {Object} deps.PostMetaModel - PostMeta model
* @param {Object} deps.TagModel - Tag model
* @param {Object} deps.UserModel - User model
* @param {(extension) => import('ghost-storage-base')} deps.getMediaStorage - getMediaStorage
*/
constructor(deps) {
this.#PostModel = deps.PostModel;
this.#PostMetaModel = deps.PostMetaModel;
this.#TagModel = deps.TagModel;
this.#UserModel = deps.UserModel;
this.getMediaStorage = deps.getMediaStorage;
}
@ -64,7 +79,28 @@ class ExternalMediaInliner {
};
}
async #inlinePost(mobiledoc, domains) {
/**
*
* @param {Object} media - media to store locally
* @returns {Promise<string>} - path to stored media
*/
async #storeMediaLocally(media) {
const storage = this.getMediaStorage(media.extension);
if (!storage) {
logging.warn(`No storage adapter found for file extension: ${media.extension}`);
return null;
} else {
const targetDir = storage.getTargetDir(storage.storagePath);
const uniqueFileName = await storage.getUniqueFileName({
name: media.filename
}, targetDir);
const filePath = await storage.saveRaw(media.fileBuffer, uniqueFileName);
return filePath;
}
}
async #inlineMibiledoc(mobiledoc, domains) {
for (const domain of domains) {
const regex = new RegExp(`"src":"(${domain}.*?)"`, 'igm');
const matches = mobiledoc.matchAll(regex);
@ -78,22 +114,15 @@ class ExternalMediaInliner {
}
if (media) {
const storage = this.getMediaStorage(media.extension);
const filePath = await this.#storeMediaLocally(media);
if (!storage) {
logging.warn(`No storage adapter found for file extension: ${media.extension}`);
} else {
const targetDir = storage.getTargetDir(storage.storagePath);
const uniqueFileName = await storage.getUniqueFileName({
name: media.filename
}, targetDir);
const filePath = await storage.saveRaw(media.fileBuffer, uniqueFileName);
if (filePath) {
const inlinedSrc = `__GHOST_URL__${filePath}`;
// NOTE: does not account for duplicate images in mobiledoc
// in those cases would be processed twice
mobiledoc = mobiledoc.replace(src, inlinedSrc);
logging.info('Inlined media: ', src, ' -> ', inlinedSrc);
logging.info(`Inlined media: ${src} -> ${inlinedSrc}`);
}
}
}
@ -102,6 +131,76 @@ class ExternalMediaInliner {
return mobiledoc;
}
/**
*
* @param {Object} resourceModel - one of PostModel, TagModel, UserModel instances
* @param {String[]} fields - fields to inline
* @param {String[]} domains - domains to inline media from
* @returns Promise<Object> - updated fields map with local media paths
*/
async #inlineFields(resourceModel, fields, domains) {
const updatedFields = {};
for (const field of fields) {
for (const domain of domains) {
const src = resourceModel.get(field);
if (src && src.startsWith(domain)) {
const response = await this.#getRemoteMedia(src);
let media;
if (response) {
media = this.#extractFileDataFromResponse(src, response);
}
if (media) {
const filePath = await this.#storeMediaLocally(media);
if (filePath) {
const inlinedSrc = `__GHOST_URL__${filePath}`;
updatedFields[field] = inlinedSrc;
logging.info(`Added media to inline: ${src} -> ${inlinedSrc}`);
}
}
}
}
}
return updatedFields;
}
/**
*
* @param {Object[]} resources - array of model instances
* @param {Object} model - resource model
* @param {string[]} fields - fields to inline
* @param {string[]} domains - domains to inline media from
*/
async #inlineSimpleFields(resources, model, fields, domains) {
logging.info(`Starting inlining external media for ${resources?.length} ${model.tableName}`);
for (const resource of resources) {
try {
const updatedFields = await this.#inlineFields(resource, fields, domains);
if (Object.keys(updatedFields).length > 0) {
await model.edit(updatedFields, {
id: resource.id,
context: {
internal: true
}
});
}
} catch (err) {
logging.error(`Error inlining media for ${model.tableName}: ${resource.id}`);
logging.error(new errors.DataImportError({
err
}));
}
}
}
/**
*
* @param {string[]} domains domains to inline media from
@ -111,17 +210,27 @@ class ExternalMediaInliner {
limit: 'all',
status: 'all'
});
const postsInilingFields = [
'feature_image'
];
logging.info(`Starting inlining external media for posts: ${posts?.length}`);
logging.info('Starting inlining external media for posts: ', posts?.length);
for (const post of posts) {
try {
const inlinedMobiledoc = await this.#inlinePost(post.get('mobiledoc'), domains);
const inlinedMobiledoc = await this.#inlineMibiledoc(post.get('mobiledoc'), domains);
const updatedFields = await this.#inlineFields(post, postsInilingFields, domains);
if (inlinedMobiledoc !== post.get('mobiledoc')) {
await this.#PostModel.edit({
mobiledoc: inlinedMobiledoc
}, {
id: post.id
updatedFields.mobiledoc = inlinedMobiledoc;
}
if (Object.keys(updatedFields).length > 0) {
await this.#PostModel.edit(updatedFields, {
id: post.id,
context: {
internal: true
}
});
}
} catch (err) {
@ -132,7 +241,38 @@ class ExternalMediaInliner {
}
}
logging.info('Finished inlining external media');
const {data: postsMetas} = await this.#PostMetaModel.findPage({
limit: 'all'
});
const postsMetaInilingFields = [
'og_image',
'twitter_image'
];
await this.#inlineSimpleFields(postsMetas, this.#PostMetaModel, postsMetaInilingFields, domains);
const {data: tags} = await this.#TagModel.findPage({
limit: 'all'
});
const tagInliningFields = [
'feature_image',
'og_image',
'twitter_image'
];
await this.#inlineSimpleFields(tags, this.#TagModel, tagInliningFields, domains);
const {data: users} = await this.#UserModel.findPage({
limit: 'all'
});
const userInliningFields = [
'profile_image',
'cover_image'
];
await this.#inlineSimpleFields(users, this.#UserModel, userInliningFields, domains);
logging.info('Finished inlining external media for posts, tags, and users');
}
}

View File

@ -7,6 +7,10 @@ const ExternalMediaInliner = require('../index');
describe('ExternalMediaInliner', function () {
let logging;
let GIF1x1;
let postModelStub;
let postMetaModelStub;
let tagModelStub;
let userModelStub;
beforeEach(function () {
// use a 1x1 gif in nock responses because it's really small and easy to work with
@ -16,6 +20,35 @@ describe('ExternalMediaInliner', function () {
error: sinon.stub(loggingLib, 'error'),
warn: sinon.stub(loggingLib, 'warn')
};
postModelStub = {
tableName: 'posts',
findPage: sinon.stub().resolves({
data: []
}),
edit: sinon.stub().resolves()
};
postMetaModelStub = {
tableName: 'posts_meta',
findPage: sinon.stub().resolves({
data: []
}),
edit: sinon.stub().resolves()
};
tagModelStub = {
tableName: 'tags',
findPage: sinon.stub().resolves({
data: []
}),
edit: sinon.stub().resolves()
};
userModelStub = {
tableName: 'users',
findPage: sinon.stub().resolves({
data: []
}),
edit: sinon.stub().resolves()
};
});
afterEach(function () {
@ -34,20 +67,24 @@ describe('ExternalMediaInliner', function () {
.get('/files/f/image.jpg')
.reply(200, GIF1x1);
const postModelStub = {
const postModelInstanceStub = {
id: 'inlined-post-id',
get: sinon.stub()
.withArgs('mobiledoc')
.returns(`{"version":"0.3.1","atoms":[],"cards":[["image",{"src":"${imageURL}"}]]}`)
};
const postModelMock = {
postModelStub = {
findPage: sinon.stub().returns({
data: [postModelStub]
data: [postModelInstanceStub]
}),
edit: sinon.stub().resolves()
};
const inliner = new ExternalMediaInliner({
PostModel: postModelMock,
PostModel: postModelStub,
PostMetaModel: postMetaModelStub,
TagModel: tagModelStub,
UserModel: userModelStub,
getMediaStorage: sinon.stub().withArgs('.jpg').returns({
getTargetDir: () => '/content/images',
getUniqueFileName: () => '/content/images/unique-image.jpg',
@ -58,11 +95,14 @@ describe('ExternalMediaInliner', function () {
await inliner.inline(['https://img.stockfresh.com']);
assert.ok(requestMock.isDone());
assert.ok(postModelMock.edit.calledOnce);
assert.ok(postModelMock.edit.calledWith({
assert.ok(postModelStub.edit.calledOnce);
assert.ok(postModelStub.edit.calledWith({
mobiledoc: '{"version":"0.3.1","atoms":[],"cards":[["image",{"src":"__GHOST_URL__/content/images/unique-image.jpg"}]]}'
}, {
id: 'inlined-post-id'
id: 'inlined-post-id',
context: {
internal: true
}
}));
});
@ -71,50 +111,86 @@ describe('ExternalMediaInliner', function () {
const requestMock = nock('https://img.stockfresh.com')
.get('/files/f/image.jpg')
.reply(404);
const postModelStub = {
const postModelInstanceStub = {
id: 'inlined-post-id',
get: sinon.stub()
.withArgs('mobiledoc')
.returns(`{"version":"0.3.1","atoms":[],"cards":[["image",{"src":"${imageURL}"}]]}`)
};
const postModelMock = {
postModelStub = {
findPage: sinon.stub().returns({
data: [postModelStub]
data: [postModelInstanceStub]
})
};
const inliner = new ExternalMediaInliner({
PostModel: postModelMock
PostModel: postModelStub,
PostMetaModel: postMetaModelStub,
TagModel: tagModelStub,
UserModel: userModelStub
});
await inliner.inline(['https://img.stockfresh.com']);
assert.ok(requestMock.isDone());
assert.ok(logging.error.calledTwice);
assert.equal(logging.error.args[0][0], 'Error downloading remote media: https://img.stockfresh.com/files/f/image.jpg');
});
it('logs an error when fetching an external media for simple fields fails', async function () {
const imageURL = 'https://img.stockfresh.com/files/f/simple-image.jpg';
const requestMock = nock('https://img.stockfresh.com')
.get('/files/f/simple-image.jpg')
.reply(500);
const userModelInstanceStub = {
id: 'inlined-user-id',
get: sinon.stub()
.withArgs('profile_image')
.returns(imageURL)
};
userModelStub = {
findPage: sinon.stub().returns({
data: [userModelInstanceStub]
})
};
const inliner = new ExternalMediaInliner({
PostModel: postModelStub,
PostMetaModel: postMetaModelStub,
TagModel: tagModelStub,
UserModel: userModelStub
});
await inliner.inline(['https://img.stockfresh.com']);
assert.ok(requestMock.isDone());
assert.equal(logging.error.args[0][0], 'Error downloading remote media: https://img.stockfresh.com/files/f/simple-image.jpg');
});
it('logs a warning when no suitable storage adapter found for inlined media extension', async function () {
const fileURL = 'https://img.stockfresh.com/files/f/inlined.exe';
const requestMock = nock('https://img.stockfresh.com')
.get('/files/f/inlined.exe')
.reply(200, GIF1x1);
const postModelStub = {
const postModelInstanceStub = {
id: 'inlined-post-id',
get: sinon.stub()
.withArgs('mobiledoc')
.returns(`{"version":"0.3.1","atoms":[],"cards":[["image",{"src":"${fileURL}"}]]}`)
};
const postModelMock = {
postModelStub = {
findPage: sinon.stub().returns({
data: [postModelStub]
data: [postModelInstanceStub]
}),
edit: sinon.stub().resolves()
};
const inliner = new ExternalMediaInliner({
PostModel: postModelMock,
PostModel: postModelStub,
PostMetaModel: postMetaModelStub,
TagModel: tagModelStub,
UserModel: userModelStub,
getMediaStorage: sinon.stub().withArgs('.exe').returns(null)
});
@ -131,20 +207,23 @@ describe('ExternalMediaInliner', function () {
.get('/files/f/image.jpg')
.reply(200, GIF1x1);
const postModelStub = {
postModelStub = {
id: 'errored-post-id',
get: sinon.stub()
.withArgs('mobiledoc')
.returns(`{"version":"0.3.1","atoms":[],"cards":[["image",{"src":"${imageURL}"}]]}`)
};
const postModelMock = {
postModelStub = {
findPage: sinon.stub().returns({
data: [postModelStub]
}),
edit: sinon.stub().throws(new Error('Error saving the post'))
};
const inliner = new ExternalMediaInliner({
PostModel: postModelMock,
PostModel: postModelStub,
PostMetaModel: postMetaModelStub,
TagModel: tagModelStub,
UserModel: userModelStub,
getMediaStorage: sinon.stub().withArgs('.jpg').returns({
getTargetDir: () => '/content/images',
getUniqueFileName: () => '/content/images/unique-image.jpg',
@ -155,9 +234,226 @@ describe('ExternalMediaInliner', function () {
await inliner.inline(['https://img.stockfresh.com']);
assert.ok(requestMock.isDone());
assert.ok(postModelMock.edit.calledOnce);
assert.ok(logging.error.calledTwice);
assert.ok(postModelStub.edit.calledOnce);
assert.equal(logging.error.args[0][0], 'Error inlining media for post: errored-post-id');
});
it('logs an error when handling tag simple fields inlining throws an error', async function (){
const imageURL = 'https://img.stockfresh.com/files/f/simple-image.jpg';
const requestMock = nock('https://img.stockfresh.com')
.get('/files/f/simple-image.jpg')
.reply(200, GIF1x1);
const getMethodStub = sinon.stub();
getMethodStub.withArgs('feature_image').returns(imageURL);
getMethodStub.withArgs('og_image').returns(null);
getMethodStub.withArgs('twitter_image').returns(null);
const tagModelInstanceStub = {
id: 'errored-tag-id',
get: getMethodStub
};
tagModelStub.findPage = sinon.stub().returns({
data: [tagModelInstanceStub]
});
tagModelStub.edit = sinon.stub().throws(new Error('Error saving the tag'));
const inliner = new ExternalMediaInliner({
PostModel: postModelStub,
PostMetaModel: postMetaModelStub,
TagModel: tagModelStub,
UserModel: userModelStub,
getMediaStorage: sinon.stub().withArgs('.jpg').returns({
getTargetDir: () => '/content/images',
getUniqueFileName: () => '/content/images/unique-image.jpg',
saveRaw: () => '/content/images/unique-image.jpg'
})
});
await inliner.inline(['https://img.stockfresh.com']);
assert.ok(requestMock.isDone());
assert.ok(tagModelStub.edit.calledOnce);
assert.equal(logging.error.args[0][0], 'Error inlining media for tags: errored-tag-id');
});
it('inlines image in the post\'s feature_image field', async function () {
const imageURL = 'https://img.stockfresh.com/files/f/posts_feature_image.jpg';
const requestMock = nock('https://img.stockfresh.com')
.get('/files/f/posts_feature_image.jpg')
.reply(200, GIF1x1);
postModelStub = {
id: 'inlined-post-id',
get: sinon.stub()
.withArgs('feature_image')
.returns(imageURL)
};
const postModelMock = {
findPage: sinon.stub().returns({
data: [postModelStub]
}),
edit: sinon.stub().resolves()
};
const inliner = new ExternalMediaInliner({
PostModel: postModelMock,
PostMetaModel: postMetaModelStub,
TagModel: tagModelStub,
UserModel: userModelStub,
getMediaStorage: sinon.stub().withArgs('.jpg').returns({
getTargetDir: () => '/content/images',
getUniqueFileName: () => '/content/images/unique-feature-image.jpg',
saveRaw: () => '/content/images/unique-feature-image.jpg'
})
});
await inliner.inline(['https://img.stockfresh.com']);
assert.ok(requestMock.isDone());
assert.ok(postModelMock.edit.calledOnce);
assert.ok(postModelMock.edit.calledWith({
feature_image: '__GHOST_URL__/content/images/unique-feature-image.jpg'
}, {
id: 'inlined-post-id',
context: {
internal: true
}
}));
});
it('inlines og_image image in posts_meta table', async function () {
const imageURL = 'https://img.stockfresh.com/files/f/posts_meta_image.jpg';
const requestMock = nock('https://img.stockfresh.com')
.get('/files/f/posts_meta_image.jpg')
.reply(200, GIF1x1);
const getMethodStub = sinon.stub();
getMethodStub.withArgs('og_image').returns(imageURL);
getMethodStub.withArgs('twitter_image').returns(null);
const postsMetaModelInstanceStub = {
id: 'inlined-post-meta-id',
get: getMethodStub
};
postMetaModelStub.findPage = sinon.stub().resolves({
data: [postsMetaModelInstanceStub]
});
const inliner = new ExternalMediaInliner({
PostModel: postModelStub,
PostMetaModel: postMetaModelStub,
TagModel: tagModelStub,
UserModel: userModelStub,
getMediaStorage: sinon.stub().withArgs('.jpg').returns({
getTargetDir: () => '/content/images',
getUniqueFileName: () => '/content/images/unique-posts-meta-image.jpg',
saveRaw: () => '/content/images/unique-posts-meta-image.jpg'
})
});
await inliner.inline(['https://img.stockfresh.com']);
assert.ok(requestMock.isDone());
assert.ok(postMetaModelStub.edit.calledOnce);
assert.deepEqual(postMetaModelStub.edit.args[0][0], {
og_image: '__GHOST_URL__/content/images/unique-posts-meta-image.jpg'
});
assert.deepEqual(postMetaModelStub.edit.args[0][1], {
id: 'inlined-post-meta-id',
context: {
internal: true
}
});
});
it('inlines twitter_image image in tags table', async function () {
const imageURL = 'https://img.stockfresh.com/files/f/tag_twitter_image.jpg';
const requestMock = nock('https://img.stockfresh.com')
.get('/files/f/tag_twitter_image.jpg')
.reply(200, GIF1x1);
const getMethodStub = sinon.stub();
getMethodStub.withArgs('twitter_image').returns(imageURL);
getMethodStub.returns(null);
const tagModelInstanceStub = {
id: 'inlined-tag-id',
get: getMethodStub
};
tagModelStub.findPage = sinon.stub().resolves({
data: [tagModelInstanceStub]
});
const inliner = new ExternalMediaInliner({
PostModel: postModelStub,
PostMetaModel: postMetaModelStub,
TagModel: tagModelStub,
UserModel: userModelStub,
getMediaStorage: sinon.stub().withArgs('.jpg').returns({
getTargetDir: () => '/content/images',
getUniqueFileName: () => '/content/images/unique-tag-twitter-image.jpg',
saveRaw: () => '/content/images/unique-tag-twitter-image.jpg'
})
});
await inliner.inline(['https://img.stockfresh.com']);
assert.ok(requestMock.isDone());
assert.ok(tagModelStub.edit.calledOnce);
assert.deepEqual(tagModelStub.edit.args[0][0], {
twitter_image: '__GHOST_URL__/content/images/unique-tag-twitter-image.jpg'
});
assert.deepEqual(tagModelStub.edit.args[0][1], {
id: 'inlined-tag-id',
context: {
internal: true
}
});
});
it('inlines cover_image image in users table', async function () {
const imageURL = 'https://img.stockfresh.com/files/f/user_cover_image.jpg';
const requestMock = nock('https://img.stockfresh.com')
.get('/files/f/user_cover_image.jpg')
.reply(200, GIF1x1);
const getMethodStub = sinon.stub();
getMethodStub.withArgs('cover_image').returns(imageURL);
getMethodStub.returns(null);
const userModelInstanceStub = {
id: 'inlined-user-id',
get: getMethodStub
};
userModelStub.findPage = sinon.stub().resolves({
data: [userModelInstanceStub]
});
const inliner = new ExternalMediaInliner({
PostModel: postModelStub,
PostMetaModel: postMetaModelStub,
TagModel: tagModelStub,
UserModel: userModelStub,
getMediaStorage: sinon.stub().withArgs('.jpg').returns({
getTargetDir: () => '/content/images',
getUniqueFileName: () => '/content/images/user-cover-image.jpg',
saveRaw: () => '/content/images/user-cover-image.jpg'
})
});
await inliner.inline(['https://img.stockfresh.com']);
assert.ok(requestMock.isDone());
assert.ok(userModelStub.edit.calledOnce);
assert.deepEqual(userModelStub.edit.args[0][0], {
cover_image: '__GHOST_URL__/content/images/user-cover-image.jpg'
});
assert.deepEqual(userModelStub.edit.args[0][1], {
id: 'inlined-user-id',
context: {
internal: true
}
});
});
});
});