Tuesday, 23 May 2017

NodeJS and Mongo - checking for already assigned values

I have an application that allows csv upload, converts to JSON and then imports into mongo. Once imported it is possible to update records to assign it to a category which works fine.

However what I would now like to implement is a check on import of new csv data to see if there are any names in the data that have previously been assigned to a category, and if there are to automatically set to the same category.

Here is my POST route using multer:

router.post('/',
  multer({
    storage: storage,
    fileFilter: function(req, file, callback) {
        var ext = path.extname(file.originalname)
        if (ext !== '.csv') {
            return callback(res.end('Only csvs are allowed'), null)
        }
        callback(null, true)
    }
})
    .single('statement'),
function(req, res) {
    console.log(req.body.bank, 'Body');
    var filename = req.file.filename;

    statementImport.statementSeeder(statements, function (err) {
        if (err) throw err;
        console.log('finished running some-script.js');
    });

    res.redirect('/upload/today');

});

statementImport.statementSeeder is a function I call in which persists the data:

var statementSeeder = function statementSeeder(statements) {
var Statement = require('../models/statement');
var mongoose = require('mongoose');


var parseDate = require('./parseDate');
var splitName = require('./splitName');


var done = 0;
// set substring to three space.  This will be used to check for those 
spaces in the name values in the source data
substring = "   ";
for( var i = 0; i < statements.length; i++ ) {
    // create new statement object
    var newStatement = new Statement();
    //use the stringToDate helper function to cast the string in imput file to a Date
    newStatement.date = parseDate.stringToDate(statements[i].date);

    //check the name field to see it includes the spaces defined in substring var above
    if(statements[i].vendor.includes(substring)){
        //if it does contain spaces then we want to get the last three chars of the entire string
        //and store those in method var which will then go into db.
        //we trim to remove any white space for methods that are only 2 chars long
        var method = statements[i].vendor.substr(statements[i].vendor.length - 3).trim();

        //for some reason HSBC often appends ))) to the end of strings. We want to chuck away those away!
        if(method !==')))'){

            //if the value isn't ))) and meets the criteria above we can inset into the db
            newStatement.method = statements[i].vendor.substr(statements[i].vendor.length - 3).trim();
        }
    }
    newStatement.name = splitName.splitName(statements[i].vendor);
    newStatement.amount = Number(statements[i].amount.replace(/,/g, ''));

    newStatement.save(function(err, result){
        done++;
        if(done === statements.length){
            exit();
        }
        else {
            console.log('imported '+ statements[done].vendor)
        }
    });

}

function exit(){
    mongoose.disconnect();
};

};


module.exports = {
  statementSeeder: statementSeeder
};

splitName.splitName(statements[i].vendor) is the name of the value I want to check to see already has a category assigned (the splitName function just breaks a load of white space and uneccesary chars from the value in the csv).

I have some code which goes part way to doing what I want but I can't quite figure out how to integrate it.

var categoriesCallback = function(docs){
    for(i=0; i < docs.length; i++){
        var needle = "AMERICAN EXP 3717";
        var isInArray = docs[i].name.includes(needle);
        if(isInArray){
            console.log(docs[i]._id);
            return docs[i]._id;
        }
    }
};

var getCategories = function(categoriesCallback){
    // find all the distinct catgories in the db
    Statement.aggregate(
        [
            { $group : { _id : "$category", name: { $push: "$name" } } }
        ]
        , function(err, docs){
            categoriesCallback(docs);
        });
};

You can see that I have hardcoded the value of needle here to be AMERICAN EXP 3717 while I test but in reality I think this should be the value of splitName.splitName(statements[i].vendor) to check for any assigned category.

Running getCategories(categoriesCallback); correctly goes through the records from Mongo, checks whether AMERICAN EXP 3717 has already been assigned a category and then document.logs the the correct category name console.log(docs[i]._id);.

However I don't know how to make my code go through the names of all the records in another upload and then write the correct category name to mongo (schema has category: {type:String, default:'Not Set'},) . I'm not sure returning the category value as I am doing is the right way.

Any help much appreciated!



via Stuart Brown

No comments:

Post a Comment