Merge pull request #4646 from ErisDS/importer-refactor

Refactor importer with tests
This commit is contained in:
Jason Williams 2014-12-18 13:24:11 -06:00
commit 68df133d65
5 changed files with 524 additions and 95 deletions

View File

@ -1,57 +1,19 @@
// # DB API
// API for DB operations
var dataExport = require('../data/export'),
dataImport = require('../data/import'),
dataProvider = require('../models'),
fs = require('fs-extra'),
var _ = require('lodash'),
Promise = require('bluebird'),
_ = require('lodash'),
path = require('path'),
os = require('os'),
glob = require('glob'),
uuid = require('node-uuid'),
extract = require('extract-zip'),
errors = require('../../server/errors'),
dataExport = require('../data/export'),
importer = require('../data/importer'),
models = require('../models'),
errors = require('../errors'),
canThis = require('../permissions').canThis,
utils = require('./utils'),
api = {},
db,
types = ['application/octet-stream', 'application/json', 'application/zip', 'application/x-zip-compressed'],
extensions = ['.json', '.zip'];
db;
api.settings = require('./settings');
// TODO refactor this out of here
function isJSON(ext) {
return ext === '.json';
}
function isZip(ext) {
return ext === '.zip';
}
function getJSONFileContents(filepath, ext) {
if (isJSON(ext)) {
// if it's just a JSON file, read it
return Promise.promisify(fs.readFile)(filepath);
} else if (isZip(ext)) {
var tmpdir = path.join(os.tmpdir(), uuid.v4());
return Promise.promisify(extract)(filepath, {dir: tmpdir}).then(function () {
return Promise.promisify(glob)('**/*.json', {cwd: tmpdir}).then(function (files) {
if (files[0]) {
// @TODO: handle multiple JSON files
return Promise.promisify(fs.readFile)(path.join(tmpdir, files[0]));
} else {
return Promise.reject(new errors.UnsupportedMediaTypeError(
'Zip did not include any content to import.'
));
}
});
});
}
}
/**
* ## DB API Methods
*
@ -90,9 +52,6 @@ db = {
*/
importContent: function (options) {
options = options || {};
var databaseVersion,
ext,
filepath;
// Check if a file was provided
if (!utils.checkFileExists(options, 'importfile')) {
@ -100,58 +59,20 @@ db = {
}
// Check if the file is valid
if (!utils.checkFileIsValid(options.importfile, types, extensions)) {
if (!utils.checkFileIsValid(options.importfile, importer.getTypes(), importer.getExtensions())) {
return Promise.reject(new errors.UnsupportedMediaTypeError(
'Please select either a .json or .zip file to import.'
'Unsupported file. Please try any of the following formats: ' +
_.reduce(importer.getExtensions(), function (memo, ext) {
return memo ? memo + ', ' + ext : ext;
})
));
}
// TODO refactor this out of here
filepath = options.importfile.path;
ext = path.extname(options.importfile.name).toLowerCase();
// Permissions check
return canThis(options.context).importContent.db().then(function () {
return api.settings.read(
{key: 'databaseVersion', context: {internal: true}}
).then(function (response) {
var setting = response.settings[0];
return setting.value;
}).then(function (version) {
databaseVersion = version;
// Read the file contents
return getJSONFileContents(filepath, ext);
}).then(function (fileContents) {
var importData;
// Parse the json data
try {
importData = JSON.parse(fileContents);
// if importData follows JSON-API format `{ db: [exportedData] }`
if (_.keys(importData).length === 1 && Array.isArray(importData.db)) {
importData = importData.db[0];
}
} catch (e) {
errors.logError(e, 'API DB import content', 'check that the import file is valid JSON.');
return Promise.reject(new errors.BadRequestError('Failed to parse the import JSON file.'));
}
if (!importData.meta || !importData.meta.version) {
return Promise.reject(
new errors.ValidationError('Import data does not specify version', 'meta.version')
);
}
// Import for the current version
return dataImport(databaseVersion, importData);
}).then(api.settings.updateSettingsCache)
.return({db: []})
.finally(function () {
// Unlink the file after import
return Promise.promisify(fs.unlink)(filepath);
});
return importer.importFromFile(options.importfile)
.then(api.settings.updateSettingsCache)
.return({db: []});
}, function () {
return Promise.reject(new errors.NoPermissionError('You do not have permission to import data (no rights).'));
});
@ -168,7 +89,7 @@ db = {
options = options || {};
return canThis(options.context).deleteAllContent.db().then(function () {
return Promise.resolve(dataProvider.deleteAllContent())
return Promise.resolve(models.deleteAllContent())
.return({db: []})
.catch(function (error) {
return Promise.reject(new errors.InternalServerError(error.message || error));

View File

@ -0,0 +1,36 @@
var _ = require('lodash'),
Promise = require('bluebird'),
fs = require('fs-extra'),
errors = require('../../../errors'),
JSONHandler;
JSONHandler = {
type: 'data',
extensions: ['.json'],
types: ['application/octet-stream', 'application/json'],
loadFile: function (files, startDir) {
/*jshint unused:false */
// @TODO: Handle multiple JSON files
var filePath = files[0].path;
return Promise.promisify(fs.readFile)(filePath).then(function (fileData) {
var importData;
try {
importData = JSON.parse(fileData);
// if importData follows JSON-API format `{ db: [exportedData] }`
if (_.keys(importData).length === 1 && Array.isArray(importData.db)) {
importData = importData.db[0];
}
return importData;
} catch (e) {
errors.logError(e, 'API DB import content', 'check that the import file is valid JSON.');
return Promise.reject(new errors.BadRequestError('Failed to parse the import JSON file.'));
}
});
}
};
module.exports = JSONHandler;

View File

@ -0,0 +1,15 @@
var importer = require('../../import'),
DataImporter;
DataImporter = {
type: 'data',
preProcess: function (importData) {
importData.preProcessedByData = true;
return importData;
},
doImport: function (importData) {
return importer('003', importData);
}
};
module.exports = DataImporter;

View File

@ -0,0 +1,266 @@
var _ = require('lodash'),
Promise = require('bluebird'),
sequence = require('../../utils/sequence'),
pipeline = require('../../utils/pipeline'),
fs = require('fs-extra'),
path = require('path'),
os = require('os'),
glob = require('glob'),
uuid = require('node-uuid'),
extract = require('extract-zip'),
errors = require('../../errors'),
JSONHandler = require('./handlers/json'),
DataImporter = require('./importers/data'),
defaults;
defaults = {
extensions: ['.zip'],
types: ['application/zip', 'application/x-zip-compressed']
};
function ImportManager() {
this.importers = [DataImporter];
this.handlers = [JSONHandler];
}
/**
* A number, or a string containing a number.
* @typedef {Object} ImportData
* @property [Object] data
* @property [Array] images
*/
_.extend(ImportManager.prototype, {
/**
* Get an array of all the file extensions for which we have handlers
* @returns []
*/
getExtensions: function () {
return _.flatten(_.union(_.pluck(this.handlers, 'extensions'), defaults.extensions));
},
/**
* Get an array of all the mime types for which we have handlers
* @returns []
*/
getTypes: function () {
return _.flatten(_.union(_.pluck(this.handlers, 'types'), defaults.types));
},
/**
* Convert the extensions supported by a given handler into a glob string
* @returns String
*/
getGlobPattern: function (handler) {
return '**/*+(' + _.reduce(handler.extensions, function (memo, ext) {
return memo !== '' ? memo + '|' + ext : ext;
}, '') + ')';
},
/**
* Remove a file after we're done (abstracted into a function for easier testing)
* @param {File} file
* @returns {Function}
*/
cleanUp: function (file) {
var fileToDelete = file;
return function (result) {
try {
fs.remove(fileToDelete);
} catch (err) {
errors.logError(err, 'Import could not clean up file', 'You blog will continue to work as expected');
}
return result;
};
},
/**
* Return true if the given file is a Zip
* @returns Boolean
*/
isZip: function (ext) {
return _.contains(defaults.extensions, ext);
},
/**
* Use the extract module to extract the given zip file to a temp directory & return the temp directory path
* @param {String} filePath
* @returns {Promise[]} Files
*/
extractZip: function (filePath) {
var tmpDir = path.join(os.tmpdir(), uuid.v4());
return Promise.promisify(extract)(filePath, {dir: tmpDir}).then(function () {
return tmpDir;
});
},
/**
* Use the handler extensions to get a globbing pattern, then use that to fetch all the files from the zip which
* are relevant to the given handler, and return them as a name and path combo
* @param {Object} handler
* @param {String} directory
* @returns [] Files
*/
getFilesFromZip: function (handler, directory) {
var globPattern = this.getGlobPattern(handler);
return _.map(glob.sync(globPattern, {cwd: directory}), function (file) {
return {name: file, path: path.join(directory, file)};
});
},
/**
* Process Zip
* Takes a reference to a zip file, extracts it, sends any relevant files from inside to the right handler, and
* returns an object in the importData format: {data: {}, images: []}
* The data key contains JSON representing any data that should be imported
* The image key contains references to images that will be stored (and where they will be stored)
* @param {File} file
* @returns {Promise(ImportData)}
*/
processZip: function (file) {
var self = this;
return this.extractZip(file.path).then(function (directory) {
var ops = [],
importData = {},
startDir = glob.sync(file.name.replace('.zip', ''), {cwd: directory});
startDir = startDir[0] || false;
_.each(self.handlers, function (handler) {
if (importData.hasOwnProperty(handler.type)) {
// This limitation is here to reduce the complexity of the importer for now
return Promise.reject(new errors.UnsupportedMediaTypeError(
'Zip file contains too many types of import data. Please split it up and import separately.'
));
}
var files = self.getFilesFromZip(handler, directory);
if (files.length > 0) {
ops.push(function () {
return handler.loadFile(files, startDir).then(function (data) {
importData[handler.type] = data;
});
});
}
});
if (ops.length === 0) {
return Promise.reject(new errors.UnsupportedMediaTypeError(
'Zip did not include any content to import.'
));
}
return sequence(ops).then(function () {
return importData;
}).finally(self.cleanUp(directory));
});
},
/**
* Process File
* Takes a reference to a single file, sends it to the relevant handler to be loaded and returns an object in the
* importData format: {data: {}, images: []}
* The data key contains JSON representing any data that should be imported
* The image key contains references to images that will be stored (and where they will be stored)
* @param {File} file
* @returns {Promise(ImportData)}
*/
processFile: function (file, ext) {
var fileHandler = _.find(this.handlers, function (handler) {
return _.contains(handler.extensions, ext);
});
return fileHandler.loadFile([_.pick(file, 'name', 'path')]).then(function (loadedData) {
// normalize the returned data
var importData = {};
importData[fileHandler.type] = loadedData;
return importData;
});
},
/**
* Import Step 1:
* Load the given file into usable importData in the format: {data: {}, images: []}, regardless of
* whether the file is a single importable file like a JSON file, or a zip file containing loads of files.
* @param {File} file
* @returns {Promise}
*/
loadFile: function (file) {
var self = this,
ext = path.extname(file.name).toLowerCase();
return Promise.resolve(this.isZip(ext)).then(function (isZip) {
if (isZip) {
// If it's a zip, process the zip file
return self.processZip(file);
} else {
// Else process the file
return self.processFile(file, ext);
}
}).finally(self.cleanUp(file.path));
},
/**
* Import Step 2:
* Pass the prepared importData through the preProcess function of the various importers, so that the importers can
* make any adjustments to the data based on relationships between it
* @param {ImportData} importData
* @returns {Promise(ImportData)}
*/
preProcess: function (importData) {
var ops = [];
_.each(this.importers, function (importer) {
ops.push(function () {
return importer.preProcess(importData);
});
});
return pipeline(ops);
},
/**
* Import Step 3:
* Each importer gets passed the data from importData which has the key matching its type - i.e. it only gets the
* data that it should import. Each importer then handles actually importing that data into Ghost
* @param {ImportData} importData
* @returns {Promise(ImportData)}
*/
doImport: function (importData) {
var ops = [];
_.each(this.importers, function (importer) {
if (importData.hasOwnProperty(importer.type)) {
ops.push(function () {
return importer.doImport(importData[importer.type]);
});
}
});
return sequence(ops).then(function (importResult) {
return importResult;
});
},
/**
* Import Step 4:
* Report on what was imported, currently a no-op
* @param {ImportData} importData
* @returns {Promise(ImportData)}
*/
generateReport: function (importData) {
return Promise.resolve(importData);
},
/**
* Import From File
* The main method of the ImportManager, call this to kick everything off!
* @param {File} file
* @returns {*}
*/
importFromFile: function (file) {
var self = this;
// Step 1: Handle converting the file to usable data
return this.loadFile(file).then(function (importData) {
// Step 2: Let the importers pre-process the data
return self.preProcess(importData);
}).then(function (importData) {
// Step 3: Actually do the import
// @TODO: It would be cool to have some sort of dry run flag here
return self.doImport(importData);
}).then(function (importData) {
// Step 4: Finally, report on the import
return self.generateReport(importData);
});
}
});
module.exports = new ImportManager();

View File

@ -0,0 +1,191 @@
/*globals describe, afterEach, it*/
/*jshint expr:true*/
var should = require('should'),
sinon = require('sinon'),
Promise = require('bluebird'),
_ = require('lodash'),
// Stuff we are testing
ImportManager = require('../../server/data/importer'),
JSONHandler = require('../../server/data/importer/handlers/json'),
DataImporter = require('../../server/data/importer/importers/data'),
sandbox = sinon.sandbox.create();
// To stop jshint complaining
should.equal(true, true);
describe('Importer', function () {
afterEach(function () {
sandbox.restore();
});
describe('ImportManager', function () {
it('has the correct interface', function () {
ImportManager.handlers.should.be.instanceof(Array).and.have.lengthOf(1);
ImportManager.importers.should.be.instanceof(Array).and.have.lengthOf(1);
ImportManager.loadFile.should.be.instanceof(Function);
ImportManager.preProcess.should.be.instanceof(Function);
ImportManager.doImport.should.be.instanceof(Function);
ImportManager.generateReport.should.be.instanceof(Function);
});
it('gets the correct extensions', function () {
ImportManager.getExtensions().should.be.instanceof(Array).and.have.lengthOf(2);
ImportManager.getExtensions().should.containEql('.json');
ImportManager.getExtensions().should.containEql('.zip');
});
it('gets the correct types', function () {
ImportManager.getTypes().should.be.instanceof(Array).and.have.lengthOf(4);
ImportManager.getTypes().should.containEql('application/octet-stream');
ImportManager.getTypes().should.containEql('application/json');
ImportManager.getTypes().should.containEql('application/zip');
ImportManager.getTypes().should.containEql('application/x-zip-compressed');
});
it('globs extensions correctly', function () {
ImportManager.getGlobPattern(JSONHandler).should.equal('**/*+(.json)');
});
// Step 1 of importing is loadFile
describe('loadFile', function () {
it('knows when to process a file', function (done) {
var testFile = {name: 'myFile.json', path: '/my/path/myFile.json'},
zipSpy = sandbox.stub(ImportManager, 'processZip').returns(Promise.resolve()),
fileSpy = sandbox.stub(ImportManager, 'processFile').returns(Promise.resolve()),
cleanSpy = sandbox.stub(ImportManager, 'cleanUp').returns(Promise.resolve());
ImportManager.loadFile(testFile).then(function () {
zipSpy.calledOnce.should.be.false;
fileSpy.calledOnce.should.be.true;
cleanSpy.calledOnce.should.be.true;
done();
});
});
// We need to make sure we don't actually extract a zip and leave temporary files everywhere!
it('knows when to process a zip', function (done) {
var testZip = {name: 'myFile.zip', path: '/my/path/myFile.zip'},
zipSpy = sandbox.stub(ImportManager, 'processZip').returns(Promise.resolve()),
fileSpy = sandbox.stub(ImportManager, 'processFile').returns(Promise.resolve()),
cleanSpy = sandbox.stub(ImportManager, 'cleanUp').returns(Promise.resolve());
ImportManager.loadFile(testZip).then(function () {
zipSpy.calledOnce.should.be.true;
fileSpy.calledOnce.should.be.false;
cleanSpy.calledOnce.should.be.true;
done();
});
});
it('has same result for zips and files', function (done) {
var testFile = {name: 'myFile.json', path: '/my/path/myFile.json'},
testZip = {name: 'myFile.zip', path: '/my/path/myFile.zip'},
// need to stub out the extract and glob function for zip
extractSpy = sandbox.stub(ImportManager, 'extractZip').returns(Promise.resolve('/tmp/dir/')),
getFileSpy = sandbox.stub(ImportManager, 'getFilesFromZip').returns(['/tmp/dir/myFile.json']),
jsonSpy = sandbox.stub(JSONHandler, 'loadFile').returns(Promise.resolve({posts: []})),
cleanSpy = sandbox.stub(ImportManager, 'cleanUp').returns(Promise.resolve());
ImportManager.processZip(testZip).then(function (zipResult) {
extractSpy.calledOnce.should.be.true;
getFileSpy.calledOnce.should.be.true;
jsonSpy.calledOnce.should.be.true;
cleanSpy.calledOnce.should.be.true;
ImportManager.processFile(testFile, '.json').then(function (fileResult) {
jsonSpy.calledTwice.should.be.true;
// They should both have data keys, and they should be equivalent
zipResult.should.have.property('data');
fileResult.should.have.property('data');
zipResult.should.eql(fileResult);
done();
});
});
});
});
// Step 2 of importing is preProcess
describe('preProcess', function () {
// preProcess can modify the data prior to importing
it('calls the DataImporter preProcess method', function (done) {
var input = {data: {}, images: []},
// pass a copy so that input doesn't get modified
inputCopy = _.cloneDeep(input),
dataSpy = sandbox.spy(DataImporter, 'preProcess');
ImportManager.preProcess(inputCopy).then(function (output) {
dataSpy.calledOnce.should.be.true;
dataSpy.calledWith(inputCopy).should.be.true;
// eql checks for equality
// equal checks the references are for the same object
output.should.not.equal(input);
output.should.have.property('preProcessedByData', true);
done();
});
});
});
// Step 3 of importing is doImport
describe('doImport', function () {
// doImport calls the real importers and has an effect on the DB. We don't want any of those calls to be made,
// but to test that the right calls would be made
it('calls the DataImporter doImport method with the data object', function (done) {
var input = {data: {posts: []}, images: []},
// pass a copy so that input doesn't get modified
inputCopy = _.cloneDeep(input),
dataSpy = sandbox.stub(DataImporter, 'doImport', function (i) {
return Promise.resolve(i);
}),
// The data importer should get the data object
expect = input.data;
ImportManager.doImport(inputCopy).then(function (output) {
// eql checks for equality
// equal checks the references are for the same object
dataSpy.calledOnce.should.be.true;
dataSpy.getCall(0).args[0].should.eql(expect);
// we stubbed this as a noop but ImportManager calls with sequence, so we should get an array
output.should.eql([expect]);
done();
});
});
});
// Step 4 of importing is generateReport
describe('generateReport', function () {
// generateReport is intended to create a message to show to the user about what has been imported
// it is currently a noop
it('is currently a noop', function (done) {
var input = {data: {}, images: []};
ImportManager.generateReport(input).then(function (output) {
output.should.equal(input);
done();
});
});
});
});
describe('JSONHandler', function () {
it('has the correct interface', function () {
JSONHandler.type.should.eql('data');
JSONHandler.extensions.should.be.instanceof(Array).and.have.lengthOf(1);
JSONHandler.extensions.should.containEql('.json');
JSONHandler.types.should.be.instanceof(Array).and.have.lengthOf(2);
JSONHandler.types.should.containEql('application/octet-stream');
JSONHandler.types.should.containEql('application/json');
JSONHandler.loadFile.should.be.instanceof(Function);
});
});
describe('DataImporter', function () {
it('has the correct interface', function () {
DataImporter.type.should.eql('data');
DataImporter.preProcess.should.be.instanceof(Function);
DataImporter.doImport.should.be.instanceof(Function);
});
});
});