From 1eab73c76d2d48cfa3ce15d2ec6505b37e9335e7 Mon Sep 17 00:00:00 2001 From: Kevin Ansfield Date: Thu, 1 Aug 2024 16:50:31 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Fixed=20YouTube=20live=20embeds?= =?UTF-8?q?=20failing=20in=20some=20situations?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ref https://linear.app/tryghost/issue/ONC-197 - YouTube has started responding to video page requests with localised content when requested from certain IPs, with that localised content not containing the required `` tag pointing to the oembed endpoint - we were fetching video pages rather than the oembed endpoint for YouTube Live URLs because they are not recognised by the oembed extraction library we use - by modifying the URL from a live URL to a watch URL before we perform oembed lookup/extraction we are able to bypass the (localised) page fetch and instead grab the oembed content directly --- ghost/oembed-service/lib/OEmbedService.js | 12 +++++++++ .../test/oembed-service.test.js | 26 +++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/ghost/oembed-service/lib/OEmbedService.js b/ghost/oembed-service/lib/OEmbedService.js index c9876027bb..dc7b7cce48 100644 --- a/ghost/oembed-service/lib/OEmbedService.js +++ b/ghost/oembed-service/lib/OEmbedService.js @@ -373,6 +373,18 @@ class OEmbedService { try { const urlObject = new URL(url); + // YouTube has started not returning oembed tags for some live URLs + // when fetched from an IP address that's in a non-EN region. + // We convert live URLs to watch URLs so we can go straight to the + // oembed request via a known provider rather than going through the page fetch routine. + const ytLiveRegex = /^\/live\/([a-zA-Z0-9_-]+)$/; + if (urlObject.hostname === 'www.youtube.com' && ytLiveRegex.test(urlObject.pathname)) { + const videoId = ytLiveRegex.exec(urlObject.pathname)[1]; + urlObject.pathname = '/watch'; + urlObject.searchParams.set('v', videoId); + url = urlObject.toString(); + } + // Trimming solves the difference of url validation between `new URL(url)` // and metascraper. url = url.trim(); diff --git a/ghost/oembed-service/test/oembed-service.test.js b/ghost/oembed-service/test/oembed-service.test.js index 18adf678e2..2e4d45984f 100644 --- a/ghost/oembed-service/test/oembed-service.test.js +++ b/ghost/oembed-service/test/oembed-service.test.js @@ -172,5 +172,31 @@ describe('oembed-service', function () { assert.equal(response.url, 'https://www.example.com'); assert.equal(response.metadata.title, 'Example'); }); + + it('converts YT live URLs to watch URLs', async function () { + nock('https://www.youtube.com') + .get('/oembed') + .query((query) => { + // Ensure the URL is converted to a watch URL and retains existing query params. + const actual = query.url; + const expected = 'https://youtube.com/watch?param=existing&v=1234'; + + assert.equal(actual, expected, 'URL passed to oembed endpoint is incorrect'); + + return actual === expected; + }) + .reply(200, { + type: 'rich', + version: '1.0', + title: 'Test Title', + author_name: 'Test Author', + author_url: 'https://example.com/user/testauthor', + html: '', + width: 640, + height: null + }); + + await oembedService.fetchOembedDataFromUrl('https://www.youtube.com/live/1234?param=existing'); + }); }); });