I am running a PhantomJS script from node, using a child process and execFile() function. I am passing a url from my node server to the script as an argument. Sometimes the url results in a 404, and no script tags are found within the page. This results in the PhantomJS script crashing, and eventually my app. How can I take care of null values when calling document.getElementsByTagName('script')[i].innerHTML in the following code ?
"use strict";
var page = require('webpage').create();
var system = require('system');
var args = system.args;
page.open(args[1], function () {
var scriptCtnt = page.evaluate(function() {
for (var i = 0; i < 20; i++) {
var scriptStr = [].map.call(document.getElementsByTagName('script')[i].innerHTML, function(data) {
return data;
}).join('');
if (scriptStr.indexOf('some.field') !== -1) {
return scriptStr;
}
}
});
var str = scriptCtnt;
str = str.replace('//<![CDATA[','{');
str = str.replace(/}},/g,'}}');
var jsonStr = JSON.stringify(JSON.parse(str));
console.log(jsonStr);
phantom.exit();
});
N.B : I tried to make a variable and to check for undefined values, but it didn't work. I also can't deal with this issue in my node api, because neither error nor stderr are called.
The node code calling the PhantomJS script :
const execFile = require('child_process').execFile;
let bin = "phantomjs"
let args = ['./my.service.js', urlToScrape];
let jsonStr;
execFile(bin, args , (error, stdout, stderr) => {
if (error) {
return;
}
console.log(`stdout: ${stdout}`);
if (stdout) {
res.json(JSON.parse(stdout));
} else {
res.json(stderr);
}
});
via Stanislasdrg
No comments:
Post a Comment