i am using crawler package
in nodejs
and i am able to get link of next page using jquery but i am stuck in automation. so i want to automate the process by running same script again and again so i can scrape entire website.
var Crawler = require('Crawler');
var c = new Crawler({
maxConnactions:10,
callback: function(err,rs,done){
if (err) {throw err;}
else{
var $ = rs.$;
var tag = $('span.next-button');
tag.each(function(index,item){
//targetting next page url
var target = $(this).find('a').attr('href');
//stored in db...
db.push(target);
console.log(target);
})
}
}
})
c.queue('https://www.reddit.com/r/fullmoviesongoogle/');
//so i want this link right here but its not possible beacouse of asysc code..
//and i also want to run this same function when i get this new link..
c.queue(db[0]);
so basically i want to build crawler sort of thing that scrape entire website by scraping next page link...
thanks in advance :)
via Kartik Garasia
No comments:
Post a Comment