From 86343f028bd049a4b32a2c6b9b2ff72b15857c90 Mon Sep 17 00:00:00 2001 From: Kevin Ansfield Date: Mon, 12 Aug 2019 08:57:13 +0100 Subject: [PATCH 1/2] Refactored url-utils (#141) no issue - extracted most standalone util functions into separate files+tests - switched the main `UrlUtils` function into a class (we had mixed uses of the returned object previously as both `urlUtils()` and `new UrlUtils()` - the class makes it obvious that the `new` keyword is required) - removed some unnecessary use of lodash and switched to using `URL` in place of `url` so that the package can have parity across node+browser without needing an additional package bundled in --- .../utils/deduplicate-subdirectory.test.js | 131 ++++++++++++++++++ .../lib/utils/deduplicate-subdirectory.js | 31 +++++ 2 files changed, 162 insertions(+) create mode 100644 ghost/config-url-helpers/test/unit/utils/deduplicate-subdirectory.test.js create mode 100644 ghost/config-url-hepers/lib/utils/deduplicate-subdirectory.js diff --git a/ghost/config-url-helpers/test/unit/utils/deduplicate-subdirectory.test.js b/ghost/config-url-helpers/test/unit/utils/deduplicate-subdirectory.test.js new file mode 100644 index 0000000000..42fd94df09 --- /dev/null +++ b/ghost/config-url-helpers/test/unit/utils/deduplicate-subdirectory.test.js @@ -0,0 +1,131 @@ +// Switch these lines once there are useful utils +// const testUtils = require('./utils'); +require('../../utils'); + +const deduplicateSubdirectory = require('../../../lib/utils/deduplicate-subdirectory'); + +describe('utils: deduplicateSubdirectory()', function () { + describe('with url', function () { + it('ignores rootUrl with no subdirectory', function () { + let url = 'http://example.com/my/my/path.png'; + + deduplicateSubdirectory(url, 'https://example.com') + .should.eql('http://example.com/my/my/path.png', 'without root trailing-slash'); + + deduplicateSubdirectory(url, 'https://example.com/') + .should.eql('http://example.com/my/my/path.png', 'with root trailing-slash'); + }); + + it('deduplicates single directory', function () { + let url = 'http://example.com/subdir/subdir/my/path.png'; + + deduplicateSubdirectory(url, 'http://example.com/subdir') + .should.eql('http://example.com/subdir/my/path.png', 'without root trailing-slash'); + + deduplicateSubdirectory(url, 'http://example.com/subdir/') + .should.eql('http://example.com/subdir/my/path.png', 'with root trailing-slash'); + }); + + it('deduplicates multiple directories', function () { + let url = 'http://example.com/my/subdir/my/subdir/my/path.png'; + + deduplicateSubdirectory(url, 'http://example.com/my/subdir') + .should.eql('http://example.com/my/subdir/my/path.png', 'without root trailing-slash'); + + deduplicateSubdirectory(url, 'http://example.com/my/subdir/') + .should.eql('http://example.com/my/subdir/my/path.png', 'with root trailing-slash'); + }); + + it('handles file that matches subdirectory', function () { + let url = 'http://example.com/my/path/my/path.png'; + + deduplicateSubdirectory(url, 'http://example.com/my/path') + .should.eql('http://example.com/my/path/my/path.png', 'without root trailing-slash'); + + deduplicateSubdirectory(url, 'http://example.com/my/path/') + .should.eql('http://example.com/my/path/my/path.png', 'with root trailing-slash'); + }); + + it('handles subdirectory that matches tld', function () { + let url = 'http://example.blog/blog/file.png'; + + deduplicateSubdirectory(url, 'http://example.blog/blog/subdir') + .should.eql('http://example.blog/blog/file.png', 'without root trailing-slash'); + + deduplicateSubdirectory(url, 'http://example.blog/blog/subdir/') + .should.eql('http://example.blog/blog/file.png', 'with root trailing-slash'); + }); + + it('keeps query and hash params', function () { + let url = 'http://example.blog/blog/blog/file.png?test=true#testing'; + + deduplicateSubdirectory(url, 'http://example.blog/blog/subdir') + .should.eql('http://example.blog/blog/blog/file.png?test=true#testing', 'without root trailing-slash'); + + deduplicateSubdirectory(url, 'http://example.blog/blog/subdir/') + .should.eql('http://example.blog/blog/blog/file.png?test=true#testing', 'with root trailing-slash'); + }); + }); + + describe('with path', function () { + it('ignores rootUrl with no subdirectory', function () { + let path = '/my/my/path.png'; + + deduplicateSubdirectory(path, 'https://example.com') + .should.eql('/my/my/path.png', 'without root trailing-slash'); + + deduplicateSubdirectory(path, 'https://example.com/') + .should.eql('/my/my/path.png', 'with root trailing-slash'); + }); + + it('deduplicates single directory', function () { + let path = '/subdir/subdir/my/path.png'; + + deduplicateSubdirectory(path, 'https://example.com/subdir') + .should.eql('/subdir/my/path.png', 'without root trailing-slash'); + + deduplicateSubdirectory(path, 'https://example.com/subdir/') + .should.eql('/subdir/my/path.png', 'with root trailing-slash'); + }); + + it('deduplicates multiple directories', function () { + let path = '/my/subdir/my/subdir/my/path.png'; + + deduplicateSubdirectory(path, 'http://example.com/my/subdir') + .should.eql('/my/subdir/my/path.png', 'without root trailing-slash'); + + deduplicateSubdirectory(path, 'http://example.com/my/subdir/') + .should.eql('/my/subdir/my/path.png', 'with root trailing-slash'); + }); + + it('handles file that matches subdirectory', function () { + let path = '/my/path/my/path.png'; + + deduplicateSubdirectory(path, 'http://example.com/my/path') + .should.eql('/my/path/my/path.png', 'without root trailing-slash'); + + deduplicateSubdirectory(path, 'http://example.com/my/path/') + .should.eql('/my/path/my/path.png', 'with root trailing-slash'); + }); + + it('handles subdirectory that matches tld', function () { + let path = '/blog/file.png'; + + deduplicateSubdirectory(path, 'http://example.blog/blog/subdir') + .should.eql('/blog/file.png', 'without root trailing-slash'); + + deduplicateSubdirectory(path, 'http://example.blog/blog/subdir/') + .should.eql('/blog/file.png', 'with root trailing-slash'); + }); + + it('keeps query and hash params', function () { + let path = '/blog/blog/file.png?test=true#testing'; + + deduplicateSubdirectory(path, 'http://example.blog/blog/subdir') + .should.eql('/blog/blog/file.png?test=true#testing', 'without root trailing-slash'); + + deduplicateSubdirectory(path, 'http://example.blog/blog/subdir/') + .should.eql('/blog/blog/file.png?test=true#testing', 'with root trailing-slash'); + }); + }); +}); diff --git a/ghost/config-url-hepers/lib/utils/deduplicate-subdirectory.js b/ghost/config-url-hepers/lib/utils/deduplicate-subdirectory.js new file mode 100644 index 0000000000..520f932a1e --- /dev/null +++ b/ghost/config-url-hepers/lib/utils/deduplicate-subdirectory.js @@ -0,0 +1,31 @@ +const {URL} = require('url'); + +/** + * Remove duplicated directories from the start of a path or url's path + * + * @param {string} url URL or pathname with possible duplicate subdirectory + * @param {string} rootUrl Root URL with an optional subdirectory + * @returns {string} URL or pathname with any duplicated subdirectory removed + */ +const deduplicateSubdirectory = function deduplicateSubdirectory(url, rootUrl) { + // force root url to always have a trailing-slash for consistent behaviour + if (!rootUrl.endsWith('/')) { + rootUrl = `${rootUrl}/`; + } + + const parsedRoot = new URL(rootUrl); + + // do nothing if rootUrl does not have a subdirectory + if (parsedRoot.pathname === '/') { + return url; + } + + const subdir = parsedRoot.pathname.replace(/(^\/|\/$)+/g, ''); + // we can have subdirs that match TLDs so we need to restrict matches to + // duplicates that start with a / or the beginning of the url + const subdirRegex = new RegExp(`(^|/)${subdir}/${subdir}/`); + + return url.replace(subdirRegex, `$1${subdir}/`); +}; + +module.exports = deduplicateSubdirectory; From 523278b295ffe82bb7bd54757b499294bb4e45e3 Mon Sep 17 00:00:00 2001 From: Kevin Ansfield Date: Wed, 2 Oct 2019 12:33:34 +0100 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=90=9B=20Fixed=20subdirectory=20handl?= =?UTF-8?q?ing=20and=20deduplication?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit no issue Ghost's relative->absolute handling is a little strange when the rootUrl includes a subdirectory. Root-relative paths such as /content/image.jpg are actually treated as subdirectory-relative. This means that it's possible to migrate from a root config to a subdirectory config without migrating data in the database, _however_ that means that the database will now have a mix of path styles (/content/image.png and /subdir/content/image.png). To handle this when all root-relative paths are treated as subdir-relative we have to rely on subdirectory deduplication. - updates tests to reflect correct subdirectory handling according to the above rules - fixes missing subdirectories when root urls contain subdirectories but relative paths do not - fixes subdirectory deduplication when the supplied url/path does not have a trailing slash but matches the root url's subdirectory --- .../test/unit/utils/deduplicate-subdirectory.test.js | 8 ++++++++ .../lib/utils/deduplicate-subdirectory.js | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/ghost/config-url-helpers/test/unit/utils/deduplicate-subdirectory.test.js b/ghost/config-url-helpers/test/unit/utils/deduplicate-subdirectory.test.js index 42fd94df09..4eb62c3f70 100644 --- a/ghost/config-url-helpers/test/unit/utils/deduplicate-subdirectory.test.js +++ b/ghost/config-url-helpers/test/unit/utils/deduplicate-subdirectory.test.js @@ -127,5 +127,13 @@ describe('utils: deduplicateSubdirectory()', function () { deduplicateSubdirectory(path, 'http://example.blog/blog/subdir/') .should.eql('/blog/blog/file.png?test=true#testing', 'with root trailing-slash'); }); + + it('deduplicates path with no trailing slash that matches subdir', function () { + deduplicateSubdirectory('/blog/blog', 'http://example.com/blog') + .should.equal('/blog/', 'without root trailing-slash'); + + deduplicateSubdirectory('/blog/blog', 'http://example.com/blog/') + .should.equal('/blog/', 'with root trailing-slash'); + }); }); }); diff --git a/ghost/config-url-hepers/lib/utils/deduplicate-subdirectory.js b/ghost/config-url-hepers/lib/utils/deduplicate-subdirectory.js index 520f932a1e..d86ee4a744 100644 --- a/ghost/config-url-hepers/lib/utils/deduplicate-subdirectory.js +++ b/ghost/config-url-hepers/lib/utils/deduplicate-subdirectory.js @@ -23,7 +23,7 @@ const deduplicateSubdirectory = function deduplicateSubdirectory(url, rootUrl) { const subdir = parsedRoot.pathname.replace(/(^\/|\/$)+/g, ''); // we can have subdirs that match TLDs so we need to restrict matches to // duplicates that start with a / or the beginning of the url - const subdirRegex = new RegExp(`(^|/)${subdir}/${subdir}/`); + const subdirRegex = new RegExp(`(^|/)${subdir}/${subdir}(/|$)`); return url.replace(subdirRegex, `$1${subdir}/`); };