I'm going to be honest. I'm way in over my head here.
I need to scrape data from a dynamic site for my employer. Before the data is visible on the page, there are some clicks and waits necessary. Simple PHP scraping won't do. So I found out about this NodeJS + PhantomJS combo. Quite a pain to set up, but I did manage to load a site, run some code and get a result.
I wrote a piece of jQuery which uses timeout loops to wait for some data to be loaded. Eventually I get a js object that I want to write to a file (JSON).
The issue I'm facing. I build up the the js object inside the PhantomJS .evaluate scope, which runs in a headerless browser, so not directly in my Node.JS server scope. How do I send the variable I built up inside evaluate back to my server so I can write it to my file?
Some example code (I know it's ugly, but it's for illustrative purposes). I use node-phantom-simple as a bridge between Phantom and Node
var phantom = require('node-phantom-simple'),
fs = require('fs'),
webPage = 'https://www.imagemedia.com/printing/business-card-printing/'
phantom.create(function(err, ph) {
return ph.createPage(function(err, page) {
return page.open(webPage, function(err, status) {
page.onConsoleMessage = function(msg) {
console.log(msg);
};
console.log("opened site? ", status);
page.evaluate(function() {
setTimeout(function() {
$('.price-select-cnt').eq(0).find('select').val('1266').change()
timeOutLoop()
function timeOutLoop() {
console.log('looping')
setTimeout(function() {
if ($('#ajax_price_tool div').length != 6) {
timeOutLoop()
} else {
$('.price-select-cnt').eq(1).find('select').val('25')
$('.price-select-cnt').eq(2).find('select').val('Premium Card Stock')
$('.price-select-cnt').eq(3).find('select').val('Standard').change()
timeOutLoop2()
}
}, 100)
}
function timeOutLoop2() {
console.log('looping2')
setTimeout(function() {
if ($('.pricing-cost-cnt').text() == '$0' || $('.pricing-cost-cnt').text() == '') {
timeOutLoop2()
} else {
var price = $('.pricing-cost-cnt').text()
console.log(price)
}
}, 100)
}
}, 4000)
});
});
});
});
function writeJSON(plsWrite) {
var key = 'file'
fs.writeFile('./results/' + key + '.json', plsWrite, 'utf8', function() {
console.log('The JSON file is saved as');
console.log('results/' + key + '.json');
});
}
So do do I write the price this code takes from the website, get it out of the evaluate scope and write it to a file?
via Chanan Ippel
No comments:
Post a Comment