I am trying to replicate http://pin1yin1.com/ (https://github.com/Pin1yin1/pin1yin1) which takes a string of chinese characters, groups into bands of 5 characters and tries to looks up the value in a database. If no matches are found it removes the last character and searches the 4 character string, until it finds a match.
For example 大猫头鹰 (big owl) will do the following:
- Search 大猫头鹰 - no results
- Search 大猫头 - no results
- Search 大猫 - no results
- Search 大 - result big
- Search 猫头鹰 - result owl
I have this successfully working using Async waterfalls - however my solution is quite a bit slower than pin1pin1. Pin1pin1 is almost instantaneous result - even for strings with 50+ characters, mine returns in 15 seconds for 50+ characters.
Have I misunderstood pinpins algorithm or is my approach inefficient?
var returnArray = [];
var charArray = str.split('');
var i = 0;
var j = 5;
ar searchWord = str.substring(i, j);
async.whilst(function(){return i <= charArray.length - 1},
function(sentenceCallback){
async.waterfall([
function(callback) {
dictionary.where('s', searchWord).fetchAll().then(function(words) {
callback(null, words);
}).catch(function(err) {
resolve(null, err);
});
},
], function (err, result) {
if (result.length > 0) {
var answers = [];
for (var item in result.models) {
var obj = {
s: result.models[item].attributes.s,
p: result.models[item].attributes.p,
d: result.models[item].attributes.d
}
answers.push(obj)
}
var obj = {
r: answers
}
returnArray.push(obj);
i = i + searchWord.length;
j = 5+i;
if (j > charArray.length) {
j = charArray.length;
}
searchWord = str.substring(i, j);
} else {
// is null
j--;
if (j > i) {
searchWord = str.substring(i, j);
}
}
sentenceCallback();
});
}, function(){
resolve(returnArray, null);
});
via Starchand
No comments:
Post a Comment