I'm trying to build a web scraper using crawlera. I'm a bit new to this and so I tried creating a wrapper function:
const request = require('request')
var crawlera = {
apikey: 'asdfasdfasdfasdf',
limit : 10,
count : 0
}
crawlera.get = (url) => {
console.log('count: ' + crawlera.count + ' limit: ' + crawlera.limit)
var promise
if(crawlera.count < crawlera.limit){
promise = new Promise((resolve, reject) =>{
let options = {url: url, proxy: 'http://'+crawlera.apikey+':@proxy.crawlera.com:8010', "rejectUnauthorized": false}
request(options, (err, res, html) => {
crawlera.count -= 1
if (err) {
console.log('Proxy Error. Retrying..')
crawlera.get(url)
} else{
console.log('Crawlera: ' + url)
results = {}
results.html = html
results.url = options.url
resolve(results);
}
})
crawlera.count+=1
})
return promise
} else{
return Promise.resolve('concurrency count maxed')
}
}
module.exports = crawlera
Which I then call in my main app with.
crawlera.get('http://someurl').then(res => console.log('do something with the result.')
The operation above sometimes gets a proxy error timeout. So when that happens I want to pretty much retry the request again until a success happens and then continue down the .then chaining but line 19 is not achieving this.
via Query
No comments:
Post a Comment