I have an application that allows csv upload, converts to JSON and then imports into mongo. Once imported it is possible to update records to assign it to a category which works fine.
However what I would now like to implement is a check on import of new csv data to see if there are any names in the data that have previously been assigned to a category, and if there are to automatically set to the same category.
Here is my POST route using multer:
router.post('/',
multer({
storage: storage,
fileFilter: function(req, file, callback) {
var ext = path.extname(file.originalname)
if (ext !== '.csv') {
return callback(res.end('Only csvs are allowed'), null)
}
callback(null, true)
}
})
.single('statement'),
function(req, res) {
console.log(req.body.bank, 'Body');
var filename = req.file.filename;
statementImport.statementSeeder(statements, function (err) {
if (err) throw err;
console.log('finished running some-script.js');
});
res.redirect('/upload/today');
});
statementImport.statementSeeder
is a function I call in which persists the data:
var statementSeeder = function statementSeeder(statements) {
var Statement = require('../models/statement');
var mongoose = require('mongoose');
var parseDate = require('./parseDate');
var splitName = require('./splitName');
var done = 0;
// set substring to three space. This will be used to check for those
spaces in the name values in the source data
substring = " ";
for( var i = 0; i < statements.length; i++ ) {
// create new statement object
var newStatement = new Statement();
//use the stringToDate helper function to cast the string in imput file to a Date
newStatement.date = parseDate.stringToDate(statements[i].date);
//check the name field to see it includes the spaces defined in substring var above
if(statements[i].vendor.includes(substring)){
//if it does contain spaces then we want to get the last three chars of the entire string
//and store those in method var which will then go into db.
//we trim to remove any white space for methods that are only 2 chars long
var method = statements[i].vendor.substr(statements[i].vendor.length - 3).trim();
//for some reason HSBC often appends ))) to the end of strings. We want to chuck away those away!
if(method !==')))'){
//if the value isn't ))) and meets the criteria above we can inset into the db
newStatement.method = statements[i].vendor.substr(statements[i].vendor.length - 3).trim();
}
}
newStatement.name = splitName.splitName(statements[i].vendor);
newStatement.amount = Number(statements[i].amount.replace(/,/g, ''));
newStatement.save(function(err, result){
done++;
if(done === statements.length){
exit();
}
else {
console.log('imported '+ statements[done].vendor)
}
});
}
function exit(){
mongoose.disconnect();
};
};
module.exports = {
statementSeeder: statementSeeder
};
splitName.splitName(statements[i].vendor)
is the name of the value I want to check to see already has a category assigned (the splitName
function just breaks a load of white space and uneccesary chars from the value in the csv).
I have some code which goes part way to doing what I want but I can't quite figure out how to integrate it.
var categoriesCallback = function(docs){
for(i=0; i < docs.length; i++){
var needle = "AMERICAN EXP 3717";
var isInArray = docs[i].name.includes(needle);
if(isInArray){
console.log(docs[i]._id);
return docs[i]._id;
}
}
};
var getCategories = function(categoriesCallback){
// find all the distinct catgories in the db
Statement.aggregate(
[
{ $group : { _id : "$category", name: { $push: "$name" } } }
]
, function(err, docs){
categoriesCallback(docs);
});
};
You can see that I have hardcoded the value of needle
here to be AMERICAN EXP 3717
while I test but in reality I think this should be the value of splitName.splitName(statements[i].vendor)
to check for any assigned category.
Running getCategories(categoriesCallback);
correctly goes through the records from Mongo, checks whether AMERICAN EXP 3717
has already been assigned a category and then document.logs the the correct category name console.log(docs[i]._id);
.
However I don't know how to make my code go through the names of all the records in another upload and then write the correct category name to mongo (schema has category: {type:String, default:'Not Set'},
) . I'm not sure returning the category value as I am doing is the right way.
Any help much appreciated!
via Stuart Brown
No comments:
Post a Comment