Thursday, 13 April 2017

Calling external function from within Phantomjs+node.js

I'm going to be honest. I'm way in over my head here.

I need to scrape data from a dynamic site for my employer. Before the data is visible on the page, there are some clicks and waits necessary. Simple PHP scraping won't do. So I found out about this NodeJS + PhantomJS combo. Quite a pain to set up, but I did manage to load a site, run some code and get a result.

I wrote a piece of jQuery which uses timeout loops to wait for some data to be loaded. Eventually I get a js object that I want to write to a file (JSON).

The issue I'm facing. I build up the the js object inside the PhantomJS .evaluate scope, which runs in a headerless browser, so not directly in my Node.JS server scope. How do I send the variable I built up inside evaluate back to my server so I can write it to my file?

Some example code (I know it's ugly, but it's for illustrative purposes). I use node-phantom-simple as a bridge between Phantom and Node

var phantom = require('node-phantom-simple'),
  fs = require('fs'),
  webPage = 'https://www.imagemedia.com/printing/business-card-printing/'

phantom.create(function(err, ph) {
  return ph.createPage(function(err, page) {
    return page.open(webPage, function(err, status) {
      page.onConsoleMessage = function(msg) {
        console.log(msg);
      };
      console.log("opened site? ", status);
      page.evaluate(function() {
        setTimeout(function() {
          $('.price-select-cnt').eq(0).find('select').val('1266').change()
          timeOutLoop()

          function timeOutLoop() {
            console.log('looping')
            setTimeout(function() {
              if ($('#ajax_price_tool div').length != 6) {
                timeOutLoop()
              } else {
                $('.price-select-cnt').eq(1).find('select').val('25')
                $('.price-select-cnt').eq(2).find('select').val('Premium Card Stock')
                $('.price-select-cnt').eq(3).find('select').val('Standard').change()
                timeOutLoop2()
              }
            }, 100)
          }

          function timeOutLoop2() {
            console.log('looping2')
            setTimeout(function() {
              if ($('.pricing-cost-cnt').text() == '$0' || $('.pricing-cost-cnt').text() == '') {
                timeOutLoop2()
              } else {
                var price = $('.pricing-cost-cnt').text()
                console.log(price)
              }
            }, 100)
          }
        }, 4000)
      });
    });
  });
});

function writeJSON(plsWrite) {
  var key = 'file'
  fs.writeFile('./results/' + key + '.json', plsWrite, 'utf8', function() {
    console.log('The JSON file is saved as');
    console.log('results/' + key + '.json');
  });
}

So do do I write the price this code takes from the website, get it out of the evaluate scope and write it to a file?



via Chanan Ippel

No comments:

Post a Comment