Tuesday, 2 May 2017

Recursive NodeJS HTTP Request on Failure until Success

I'm trying to build a web scraper using crawlera. I'm a bit new to this and so I tried creating a wrapper function:

const request = require('request')

var crawlera = {
    apikey: 'asdfasdfasdfasdf',
    limit : 10,
    count : 0
}

crawlera.get = (url) => {
    console.log('count: ' + crawlera.count + ' limit: ' + crawlera.limit)
    var promise
    if(crawlera.count < crawlera.limit){
        promise = new Promise((resolve, reject) =>{
        let options = {url: url, proxy: 'http://'+crawlera.apikey+':@proxy.crawlera.com:8010', "rejectUnauthorized": false}
            request(options, (err, res, html) => {
                crawlera.count -= 1 
                if (err) {
                    console.log('Proxy Error. Retrying..')
                    crawlera.get(url)
                } else{
                    console.log('Crawlera: ' + url)
                    results = {}
                    results.html = html
                    results.url = options.url
                    resolve(results);
                }               
            })
            crawlera.count+=1
        })
        return promise
    } else{
        return Promise.resolve('concurrency count maxed')
    }
}

module.exports = crawlera

Which I then call in my main app with.

crawlera.get('http://someurl').then(res => console.log('do something with the result.')

The operation above sometimes gets a proxy error timeout. So when that happens I want to pretty much retry the request again until a success happens and then continue down the .then chaining but line 19 is not achieving this.



via Query

No comments:

Post a Comment