From c0a415e0e1f161247ac41eb63d31f8591507b831 Mon Sep 17 00:00:00 2001 From: Naz Date: Tue, 18 Oct 2022 13:05:31 +0800 Subject: [PATCH] Added strict header mapping parsing refs https://github.com/TryGhost/Toolbox/issues/430 - Previously the CSV parser had "map whatever you can and pass on unknown properties further" approach to CSV parsing. This logic has led to unwanted fields leaking through CSV imports - messy, dangerous. - The strict mapping rules act as a "validator" to the user input, only passing through the fields we expect explicitly - safer clean cut solution with no unintended side-effects. --- ghost/members-csv/lib/parse.js | 21 ++++++++++---- ghost/members-csv/test/parse.test.js | 41 +++++++++++----------------- 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/ghost/members-csv/lib/parse.js b/ghost/members-csv/lib/parse.js index 3ef8d75977..16add0b831 100644 --- a/ghost/members-csv/lib/parse.js +++ b/ghost/members-csv/lib/parse.js @@ -10,18 +10,17 @@ const fs = require('fs-extra'); * @param {Array} [defaultLabels] - A list of labels to apply to every parsed member row * @returns */ -module.exports = (path, headerMapping = {}, defaultLabels = []) => { +module.exports = (path, headerMapping, defaultLabels = []) => { return new Promise(function (resolve, reject) { const csvFileStream = fs.createReadStream(path); const csvParserStream = papaparse.parse(papaparse.NODE_STREAM_INPUT, { header: true, transformHeader(_header) { - let header = _header; - if (headerMapping && Reflect.has(headerMapping, _header)) { - header = headerMapping[_header]; + if (!headerMapping || !Reflect.has(headerMapping, _header)) { + return undefined; } - return header; + return headerMapping[_header]; }, transform(value, header) { if (header === 'labels') { @@ -73,11 +72,23 @@ module.exports = (path, headerMapping = {}, defaultLabels = []) => { }); parsedCSVStream.on('data', (row) => { + // unmapped columns end up being assigned to 'undefined' property + // in the transformHeader stage, those should be removed completely + if (Reflect.has(row, 'undefined')) { + delete row.undefined; + } + + // skip a rows with no data + if (!Object.keys(row).length){ + return; + } + if (row.labels) { row.labels = row.labels.concat(defaultLabels); } else { row.labels = defaultLabels; } + rows.push(row); }); }); diff --git a/ghost/members-csv/test/parse.test.js b/ghost/members-csv/test/parse.test.js index 57930e4a5d..8c801dc7d2 100644 --- a/ghost/members-csv/test/parse.test.js +++ b/ghost/members-csv/test/parse.test.js @@ -5,9 +5,14 @@ const {parse} = require('../index'); const csvPath = path.join(__dirname, '/fixtures/'); describe('parse', function () { + const DEFAULT_HEADER_MAPPING = { + email: 'email', + name: 'name' + }; + it('empty file', async function () { const filePath = csvPath + 'empty.csv'; - const result = await parse(filePath); + const result = await parse(filePath, DEFAULT_HEADER_MAPPING); should.exist(result); result.length.should.eql(0); @@ -15,7 +20,7 @@ describe('parse', function () { it('one column', async function () { const filePath = csvPath + 'single-column-with-header.csv'; - const result = await parse(filePath); + const result = await parse(filePath, DEFAULT_HEADER_MAPPING); should.exist(result); result.length.should.eql(2); @@ -33,7 +38,7 @@ describe('parse', function () { it('two columns, 1 filter', async function () { const filePath = csvPath + 'two-columns-with-header.csv'; - const result = await parse(filePath); + const result = await parse(filePath, DEFAULT_HEADER_MAPPING); should.exist(result); result.length.should.eql(2); @@ -44,6 +49,7 @@ describe('parse', function () { it('two columns, 2 filters', async function () { const filePath = csvPath + 'two-columns-obscure-header.csv'; const mapping = { + id: 'id', 'Email Address': 'email' }; const result = await parse(filePath, mapping); @@ -59,6 +65,7 @@ describe('parse', function () { it('two columns with mapping', async function () { const filePath = csvPath + 'two-columns-mapping-header.csv'; const mapping = { + id: 'id', correo_electronico: 'email', nombre: 'name' }; @@ -78,6 +85,7 @@ describe('parse', function () { it('two columns with partial mapping', async function () { const filePath = csvPath + 'two-columns-mapping-header.csv'; const mapping = { + id: 'id', correo_electronico: 'email' }; const result = await parse(filePath, mapping); @@ -85,33 +93,15 @@ describe('parse', function () { should.exist(result); result.length.should.eql(2); result[0].email.should.eql('jbloggs@example.com'); - result[0].nombre.should.eql('joe'); result[0].id.should.eql('1'); result[1].email.should.eql('test@example.com'); - result[1].nombre.should.eql('test'); - result[1].id.should.eql('2'); - }); - - it('two columns with empty mapping', async function () { - const filePath = csvPath + 'two-columns-mapping-header.csv'; - const mapping = {}; - const result = await parse(filePath, mapping); - - should.exist(result); - result.length.should.eql(2); - result[0].correo_electronico.should.eql('jbloggs@example.com'); - result[0].nombre.should.eql('joe'); - result[0].id.should.eql('1'); - - result[1].correo_electronico.should.eql('test@example.com'); - result[1].nombre.should.eql('test'); result[1].id.should.eql('2'); }); it('transforms empty values to nulls', async function () { const filePath = csvPath + 'multiple-records-with-empty-values.csv'; - const result = await parse(filePath); + const result = await parse(filePath, DEFAULT_HEADER_MAPPING); should.exist(result); result.length.should.eql(2); @@ -125,6 +115,7 @@ describe('parse', function () { it(' transforms "subscribed_to_emails" column to "subscribed" property when the mapping is passed in', async function () { const filePath = csvPath + 'subscribed-to-emails-header.csv'; const mapping = { + email: 'email', subscribed_to_emails: 'subscribed' }; const result = await parse(filePath, mapping); @@ -140,14 +131,14 @@ describe('parse', function () { it('DOES NOT transforms "subscribed_to_emails" column to "subscribed" property when the WITHOUT mapping', async function () { const filePath = csvPath + 'subscribed-to-emails-header.csv'; - const result = await parse(filePath); + const result = await parse(filePath, DEFAULT_HEADER_MAPPING); assert.ok(result); assert.equal(result.length, 2); assert.equal(result[0].email, 'jbloggs@example.com'); - assert.ok(result[0].subscribed_to_emails); + assert.equal(result[0].subscribed_to_emails, undefined, 'property not present in the mapping should not be defined'); assert.equal(result[1].email, 'test@example.com'); - assert.equal(result[1].subscribed_to_emails, false); + assert.equal(result[1].subscribed_to_emails, undefined, 'property not present in the mapping should not be defined'); }); });