From 0b4e249037388586e978b8e34c889552ebd72d61 Mon Sep 17 00:00:00 2001 From: Kevin Ansfield Date: Thu, 20 Jun 2024 22:06:53 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Fixed=20bookmark=20creation=20fo?= =?UTF-8?q?r=20sites=20that=20block=20some=20user=20agents?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit closes https://linear.app/tryghost/issue/ENG-762 - nytimes.com and other sites return 403 responses when requests do not match typical browser user-agents - our bookmark fetching requests were using `Ghost(https://github.com/TryGhost/Ghost)` meaning bookmark creation failed for these user-agent-blocking sites - switched to using a standard browser user-agent string to avoid such blocks --- ghost/oembed-service/lib/OEmbedService.js | 12 +++++++++++- ghost/oembed-service/test/oembed-service.test.js | 15 +++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/ghost/oembed-service/lib/OEmbedService.js b/ghost/oembed-service/lib/OEmbedService.js index 977a67edd6..83eee585cf 100644 --- a/ghost/oembed-service/lib/OEmbedService.js +++ b/ghost/oembed-service/lib/OEmbedService.js @@ -6,6 +6,9 @@ const _ = require('lodash'); const charset = require('charset'); const iconv = require('iconv-lite'); +// Some sites block non-standard user agents so we need to mimic a typical browser +const USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9'; + const messages = { noUrlProvided: 'No url provided.', insufficientMetadata: 'URL contains insufficient metadata.', @@ -126,6 +129,9 @@ class OEmbedService { return this.externalRequest( url, { + headers: { + 'user-agent': USER_AGENT + }, timeout: 2000, followRedirect: true, ...options @@ -205,7 +211,11 @@ class OEmbedService { * @returns {Promise} */ async fetchBookmarkData(url, html) { - const gotOpts = {}; + const gotOpts = { + headers: { + 'User-Agent': USER_AGENT + } + }; if (process.env.NODE_ENV?.startsWith('test')) { gotOpts.retry = 0; diff --git a/ghost/oembed-service/test/oembed-service.test.js b/ghost/oembed-service/test/oembed-service.test.js index d87d5fcdb9..b2d72aa3c5 100644 --- a/ghost/oembed-service/test/oembed-service.test.js +++ b/ghost/oembed-service/test/oembed-service.test.js @@ -131,5 +131,20 @@ describe('oembed-service', function () { assert.equal(response.author_url, 'https://example.com/user/testauthor'); assert.equal(response.html, ''); }); + + it('uses a known user-agent for bookmark requests', async function () { + nock('https://www.example.com') + .get('/') + .query(true) + .matchHeader('User-Agent', /Mozilla\/.*/) + .reply(200, `Example`); + + const response = await oembedService.fetchOembedDataFromUrl('https://www.example.com', 'bookmark'); + + assert.equal(response.version, '1.0'); + assert.equal(response.type, 'bookmark'); + assert.equal(response.url, 'https://www.example.com'); + assert.equal(response.metadata.title, 'Example'); + }); }); });