Added strict header mapping parsing

refs https://github.com/TryGhost/Toolbox/issues/430

- Previously the CSV parser had "map whatever you can and pass on unknown properties further" approach to CSV parsing. This logic has led to unwanted fields leaking through CSV imports - messy, dangerous.
- The strict mapping rules act as a "validator" to the user input, only passing through the fields we expect explicitly - safer clean cut solution with no unintended side-effects.
This commit is contained in:
Naz 2022-10-18 13:05:31 +08:00
parent eca4b142d2
commit c0a415e0e1
No known key found for this signature in database
2 changed files with 32 additions and 30 deletions

View File

@ -10,18 +10,17 @@ const fs = require('fs-extra');
* @param {Array<string>} [defaultLabels] - A list of labels to apply to every parsed member row
* @returns
*/
module.exports = (path, headerMapping = {}, defaultLabels = []) => {
module.exports = (path, headerMapping, defaultLabels = []) => {
return new Promise(function (resolve, reject) {
const csvFileStream = fs.createReadStream(path);
const csvParserStream = papaparse.parse(papaparse.NODE_STREAM_INPUT, {
header: true,
transformHeader(_header) {
let header = _header;
if (headerMapping && Reflect.has(headerMapping, _header)) {
header = headerMapping[_header];
if (!headerMapping || !Reflect.has(headerMapping, _header)) {
return undefined;
}
return header;
return headerMapping[_header];
},
transform(value, header) {
if (header === 'labels') {
@ -73,11 +72,23 @@ module.exports = (path, headerMapping = {}, defaultLabels = []) => {
});
parsedCSVStream.on('data', (row) => {
// unmapped columns end up being assigned to 'undefined' property
// in the transformHeader stage, those should be removed completely
if (Reflect.has(row, 'undefined')) {
delete row.undefined;
}
// skip a rows with no data
if (!Object.keys(row).length){
return;
}
if (row.labels) {
row.labels = row.labels.concat(defaultLabels);
} else {
row.labels = defaultLabels;
}
rows.push(row);
});
});

View File

@ -5,9 +5,14 @@ const {parse} = require('../index');
const csvPath = path.join(__dirname, '/fixtures/');
describe('parse', function () {
const DEFAULT_HEADER_MAPPING = {
email: 'email',
name: 'name'
};
it('empty file', async function () {
const filePath = csvPath + 'empty.csv';
const result = await parse(filePath);
const result = await parse(filePath, DEFAULT_HEADER_MAPPING);
should.exist(result);
result.length.should.eql(0);
@ -15,7 +20,7 @@ describe('parse', function () {
it('one column', async function () {
const filePath = csvPath + 'single-column-with-header.csv';
const result = await parse(filePath);
const result = await parse(filePath, DEFAULT_HEADER_MAPPING);
should.exist(result);
result.length.should.eql(2);
@ -33,7 +38,7 @@ describe('parse', function () {
it('two columns, 1 filter', async function () {
const filePath = csvPath + 'two-columns-with-header.csv';
const result = await parse(filePath);
const result = await parse(filePath, DEFAULT_HEADER_MAPPING);
should.exist(result);
result.length.should.eql(2);
@ -44,6 +49,7 @@ describe('parse', function () {
it('two columns, 2 filters', async function () {
const filePath = csvPath + 'two-columns-obscure-header.csv';
const mapping = {
id: 'id',
'Email Address': 'email'
};
const result = await parse(filePath, mapping);
@ -59,6 +65,7 @@ describe('parse', function () {
it('two columns with mapping', async function () {
const filePath = csvPath + 'two-columns-mapping-header.csv';
const mapping = {
id: 'id',
correo_electronico: 'email',
nombre: 'name'
};
@ -78,6 +85,7 @@ describe('parse', function () {
it('two columns with partial mapping', async function () {
const filePath = csvPath + 'two-columns-mapping-header.csv';
const mapping = {
id: 'id',
correo_electronico: 'email'
};
const result = await parse(filePath, mapping);
@ -85,33 +93,15 @@ describe('parse', function () {
should.exist(result);
result.length.should.eql(2);
result[0].email.should.eql('jbloggs@example.com');
result[0].nombre.should.eql('joe');
result[0].id.should.eql('1');
result[1].email.should.eql('test@example.com');
result[1].nombre.should.eql('test');
result[1].id.should.eql('2');
});
it('two columns with empty mapping', async function () {
const filePath = csvPath + 'two-columns-mapping-header.csv';
const mapping = {};
const result = await parse(filePath, mapping);
should.exist(result);
result.length.should.eql(2);
result[0].correo_electronico.should.eql('jbloggs@example.com');
result[0].nombre.should.eql('joe');
result[0].id.should.eql('1');
result[1].correo_electronico.should.eql('test@example.com');
result[1].nombre.should.eql('test');
result[1].id.should.eql('2');
});
it('transforms empty values to nulls', async function () {
const filePath = csvPath + 'multiple-records-with-empty-values.csv';
const result = await parse(filePath);
const result = await parse(filePath, DEFAULT_HEADER_MAPPING);
should.exist(result);
result.length.should.eql(2);
@ -125,6 +115,7 @@ describe('parse', function () {
it(' transforms "subscribed_to_emails" column to "subscribed" property when the mapping is passed in', async function () {
const filePath = csvPath + 'subscribed-to-emails-header.csv';
const mapping = {
email: 'email',
subscribed_to_emails: 'subscribed'
};
const result = await parse(filePath, mapping);
@ -140,14 +131,14 @@ describe('parse', function () {
it('DOES NOT transforms "subscribed_to_emails" column to "subscribed" property when the WITHOUT mapping', async function () {
const filePath = csvPath + 'subscribed-to-emails-header.csv';
const result = await parse(filePath);
const result = await parse(filePath, DEFAULT_HEADER_MAPPING);
assert.ok(result);
assert.equal(result.length, 2);
assert.equal(result[0].email, 'jbloggs@example.com');
assert.ok(result[0].subscribed_to_emails);
assert.equal(result[0].subscribed_to_emails, undefined, 'property not present in the mapping should not be defined');
assert.equal(result[1].email, 'test@example.com');
assert.equal(result[1].subscribed_to_emails, false);
assert.equal(result[1].subscribed_to_emails, undefined, 'property not present in the mapping should not be defined');
});
});