I'm practicing scraping and I am trying to scrape a list of agents into a JSON array. My code currently only scrapes the last person 4 times. I'm wondering how to iterate through each class that repeats.
var express = require('express');
var fs = require('fs');
var request = require('request');
var cheerio = require('cheerio');
var app = express();
app.get('/scrape', function(req, res){
char = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x',
'y','z']
url = 'http://www.nhlpa.com/inside-nhlpa/certified-player-agents/find-an-agent?ln=A';
request(url, function(error, response, html){
if(!error){
var $ = cheerio.load(html);
var agent, agency, address, street, city, state, country, zip, deskphone, fax, email, cell;
var json = { agent : "", agency : "", street : "", city : "", state : "", country : "", zip : "", deskphone : "", fax : "", email : "", cell : ""};
var jsonarry = []
$('.inBox').each(function(i, elem) {
$('.inBodyText').filter(function(){
var data = $(this);
agent = data.children().first().text();
//agency = data.children().last().children().text();
json.agent = agent;
})
$('.inCaption').filter(function(){
var data = $(this);
agency = data.children().children().first().next().text();
json.agency = agency;
street = data.children().children().first().next().next().text();
json.street = street;
address = data.children().children().first().next().next().next().text().replace(/ /g,'');
address = address.split(",");
json.city = address[0];
json.state = address[1]
json.country = address[2]
zip = data.children().children().first().next().next().next().next().text();
json.zip = zip
deskphone = data.children().children().last().prev().prev().prev().text();
json.deskphone = deskphone
fax = data.children().children().last().prev().prev().text();
json.fax = fax
email = data.children().children().last().prev().text();
json.email = email
cell = data.children().children().last().text();
json.cell = cell
})
jsonarry.push(json)
});
}
fs.writeFile('output.json', JSON.stringify(jsonarry, null, 4), function(err){
console.log('File successfully written! - Check your project directory for the output.json file');
})
// Finally, we'll just send out a message to the browser reminding you that this app does not have a UI.
res.send(html)
}) ;
})
app.listen('8081')
console.log('Magic happens on port 8081');
exports = module.exports = app;
via user1093111
No comments:
Post a Comment