Added strict header mapping parsing
refs https://github.com/TryGhost/Toolbox/issues/430 - Previously the CSV parser had "map whatever you can and pass on unknown properties further" approach to CSV parsing. This logic has led to unwanted fields leaking through CSV imports - messy, dangerous. - The strict mapping rules act as a "validator" to the user input, only passing through the fields we expect explicitly - safer clean cut solution with no unintended side-effects.
This commit is contained in:
parent
eca4b142d2
commit
c0a415e0e1
@ -10,18 +10,17 @@ const fs = require('fs-extra');
|
||||
* @param {Array<string>} [defaultLabels] - A list of labels to apply to every parsed member row
|
||||
* @returns
|
||||
*/
|
||||
module.exports = (path, headerMapping = {}, defaultLabels = []) => {
|
||||
module.exports = (path, headerMapping, defaultLabels = []) => {
|
||||
return new Promise(function (resolve, reject) {
|
||||
const csvFileStream = fs.createReadStream(path);
|
||||
const csvParserStream = papaparse.parse(papaparse.NODE_STREAM_INPUT, {
|
||||
header: true,
|
||||
transformHeader(_header) {
|
||||
let header = _header;
|
||||
if (headerMapping && Reflect.has(headerMapping, _header)) {
|
||||
header = headerMapping[_header];
|
||||
if (!headerMapping || !Reflect.has(headerMapping, _header)) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return header;
|
||||
return headerMapping[_header];
|
||||
},
|
||||
transform(value, header) {
|
||||
if (header === 'labels') {
|
||||
@ -73,11 +72,23 @@ module.exports = (path, headerMapping = {}, defaultLabels = []) => {
|
||||
});
|
||||
|
||||
parsedCSVStream.on('data', (row) => {
|
||||
// unmapped columns end up being assigned to 'undefined' property
|
||||
// in the transformHeader stage, those should be removed completely
|
||||
if (Reflect.has(row, 'undefined')) {
|
||||
delete row.undefined;
|
||||
}
|
||||
|
||||
// skip a rows with no data
|
||||
if (!Object.keys(row).length){
|
||||
return;
|
||||
}
|
||||
|
||||
if (row.labels) {
|
||||
row.labels = row.labels.concat(defaultLabels);
|
||||
} else {
|
||||
row.labels = defaultLabels;
|
||||
}
|
||||
|
||||
rows.push(row);
|
||||
});
|
||||
});
|
||||
|
@ -5,9 +5,14 @@ const {parse} = require('../index');
|
||||
const csvPath = path.join(__dirname, '/fixtures/');
|
||||
|
||||
describe('parse', function () {
|
||||
const DEFAULT_HEADER_MAPPING = {
|
||||
email: 'email',
|
||||
name: 'name'
|
||||
};
|
||||
|
||||
it('empty file', async function () {
|
||||
const filePath = csvPath + 'empty.csv';
|
||||
const result = await parse(filePath);
|
||||
const result = await parse(filePath, DEFAULT_HEADER_MAPPING);
|
||||
|
||||
should.exist(result);
|
||||
result.length.should.eql(0);
|
||||
@ -15,7 +20,7 @@ describe('parse', function () {
|
||||
|
||||
it('one column', async function () {
|
||||
const filePath = csvPath + 'single-column-with-header.csv';
|
||||
const result = await parse(filePath);
|
||||
const result = await parse(filePath, DEFAULT_HEADER_MAPPING);
|
||||
|
||||
should.exist(result);
|
||||
result.length.should.eql(2);
|
||||
@ -33,7 +38,7 @@ describe('parse', function () {
|
||||
|
||||
it('two columns, 1 filter', async function () {
|
||||
const filePath = csvPath + 'two-columns-with-header.csv';
|
||||
const result = await parse(filePath);
|
||||
const result = await parse(filePath, DEFAULT_HEADER_MAPPING);
|
||||
|
||||
should.exist(result);
|
||||
result.length.should.eql(2);
|
||||
@ -44,6 +49,7 @@ describe('parse', function () {
|
||||
it('two columns, 2 filters', async function () {
|
||||
const filePath = csvPath + 'two-columns-obscure-header.csv';
|
||||
const mapping = {
|
||||
id: 'id',
|
||||
'Email Address': 'email'
|
||||
};
|
||||
const result = await parse(filePath, mapping);
|
||||
@ -59,6 +65,7 @@ describe('parse', function () {
|
||||
it('two columns with mapping', async function () {
|
||||
const filePath = csvPath + 'two-columns-mapping-header.csv';
|
||||
const mapping = {
|
||||
id: 'id',
|
||||
correo_electronico: 'email',
|
||||
nombre: 'name'
|
||||
};
|
||||
@ -78,6 +85,7 @@ describe('parse', function () {
|
||||
it('two columns with partial mapping', async function () {
|
||||
const filePath = csvPath + 'two-columns-mapping-header.csv';
|
||||
const mapping = {
|
||||
id: 'id',
|
||||
correo_electronico: 'email'
|
||||
};
|
||||
const result = await parse(filePath, mapping);
|
||||
@ -85,33 +93,15 @@ describe('parse', function () {
|
||||
should.exist(result);
|
||||
result.length.should.eql(2);
|
||||
result[0].email.should.eql('jbloggs@example.com');
|
||||
result[0].nombre.should.eql('joe');
|
||||
result[0].id.should.eql('1');
|
||||
|
||||
result[1].email.should.eql('test@example.com');
|
||||
result[1].nombre.should.eql('test');
|
||||
result[1].id.should.eql('2');
|
||||
});
|
||||
|
||||
it('two columns with empty mapping', async function () {
|
||||
const filePath = csvPath + 'two-columns-mapping-header.csv';
|
||||
const mapping = {};
|
||||
const result = await parse(filePath, mapping);
|
||||
|
||||
should.exist(result);
|
||||
result.length.should.eql(2);
|
||||
result[0].correo_electronico.should.eql('jbloggs@example.com');
|
||||
result[0].nombre.should.eql('joe');
|
||||
result[0].id.should.eql('1');
|
||||
|
||||
result[1].correo_electronico.should.eql('test@example.com');
|
||||
result[1].nombre.should.eql('test');
|
||||
result[1].id.should.eql('2');
|
||||
});
|
||||
|
||||
it('transforms empty values to nulls', async function () {
|
||||
const filePath = csvPath + 'multiple-records-with-empty-values.csv';
|
||||
const result = await parse(filePath);
|
||||
const result = await parse(filePath, DEFAULT_HEADER_MAPPING);
|
||||
|
||||
should.exist(result);
|
||||
result.length.should.eql(2);
|
||||
@ -125,6 +115,7 @@ describe('parse', function () {
|
||||
it(' transforms "subscribed_to_emails" column to "subscribed" property when the mapping is passed in', async function () {
|
||||
const filePath = csvPath + 'subscribed-to-emails-header.csv';
|
||||
const mapping = {
|
||||
email: 'email',
|
||||
subscribed_to_emails: 'subscribed'
|
||||
};
|
||||
const result = await parse(filePath, mapping);
|
||||
@ -140,14 +131,14 @@ describe('parse', function () {
|
||||
|
||||
it('DOES NOT transforms "subscribed_to_emails" column to "subscribed" property when the WITHOUT mapping', async function () {
|
||||
const filePath = csvPath + 'subscribed-to-emails-header.csv';
|
||||
const result = await parse(filePath);
|
||||
const result = await parse(filePath, DEFAULT_HEADER_MAPPING);
|
||||
|
||||
assert.ok(result);
|
||||
assert.equal(result.length, 2);
|
||||
assert.equal(result[0].email, 'jbloggs@example.com');
|
||||
assert.ok(result[0].subscribed_to_emails);
|
||||
assert.equal(result[0].subscribed_to_emails, undefined, 'property not present in the mapping should not be defined');
|
||||
|
||||
assert.equal(result[1].email, 'test@example.com');
|
||||
assert.equal(result[1].subscribed_to_emails, false);
|
||||
assert.equal(result[1].subscribed_to_emails, undefined, 'property not present in the mapping should not be defined');
|
||||
});
|
||||
});
|
||||
|
Loading…
Reference in New Issue
Block a user