Friday, 2 June 2017

How can I safely perform operations on, and then insert 250,000+ words from a .txt file asynchronously without causing a stack overflow?

What I'm trying to do, is read in a .txt file of words, slit them at newlines, and then for each word in the constructed array, perform operations on the word (that matches the Word Schema I'm using) to determine the letter count for each word like for "word (0 A's, 0 B's, 1 W, 1 O, 1 R, 1 D, O Z's etc...), and then insert each Word into the database.

Here is the mongoose schema for a Word "shape" for database entries (models/words.js)

var restful = require('node-restful');
var mongoose = restful.mongoose;

// MongoDB Schema
var wordSchema = new mongoose.Schema({
    code: String,
    word: String,
    lettersCount: {
        'a': Number,
        'b': Number,
        'c': Number,
        'd': Number,
        'e': Number,
        'f': Number,
        'g': Number,
        'h': Number,
        'i': Number,
        'j': Number,
        'k': Number,
        'l': Number,
        'm': Number,
        'n': Number,
        'o': Number,
        'p': Number,
        'q': Number,
        'r': Number,
        's': Number,
        't': Number,
        'u': Number,
        'v': Number,
        'w': Number,
        'x': Number,
        'y': Number,
        'z': Number
    }
});

// Return model
module.exports = restful.model(
    'Words',
    wordSchema
);

Now, my data is in the file dictionaries/words.txt.

In the main file called server.js, I'm calling this function:

populateDictionary();

The tasks/populateDictionary.js file has the following function to do the database entries:

var populateDictionary = function(dict) {
    Word.remove().exec();
    fs.readFileAsync('dictionaries/words.txt', 'utf8').then(function(data, err) {
        if (err) throw err;
        var dictionary = data.split('\n');
        for (var i = 0; i < dictionary.length; i++) {
            var entry = new Word({
                word: dictionary[i],
                lettersCount: {
                    'a': 0, 'b': 0, 'c': 0, 'd': 0,
                    'e': 0, 'f': 0, 'g': 0, 'h': 0,
                    'i': 0, 'j': 0, 'k': 0, 'l': 0,
                    'm': 0, 'n': 0, 'o': 0, 'p': 0,
                    'q': 0, 'r': 0, 's': 0, 't': 0,
                    'u': 0, 'v': 0, 'w': 0, 'x': 0,
                    'y': 0, 'z': 0
                }
            });
            for (var j = 0; j < entry.word.length; j++) {
                entry.lettersCount[entry.word[j]]++;
            }
            console.log(entry);
            entry.save();
        }
    });
};

So, I'm fairly new to databases, but think there's a good solution out there to this, just not sure what... I'm basically making a huge call stack, and it's crashing my computer. I'm looking for the right way to do this kind of thing. Thanks!



via Joshua Michael

No comments:

Post a Comment