Wednesday, 26 April 2017

Downloading images off a website with node.js

I'm trying to download every image on a website using a node script.

I wrote it and for the most part it seems to be working, however, it's only downloading the first image. It downloads it equal to the number of images on the site.

Here is my code.

const http = require('http'),
cheerio = require('cheerio'),
fs = require('fs');
var document = '';
var imageData = '';
http.get('http://www.wikihow.com/Start-a-Blog', function(res){
    res.on('data', function(chunk){
        document+=chunk;
    })
res.on('end', function(){
    let $ = cheerio.load(document); 
    var array = [];
    var array = $("img").toArray();
    var data = [];
    array.forEach(function (ele) {
        if (ele.attribs.src !== undefined)
        data.push(ele.attribs.src);
    })
    var counter = 0;
    data.forEach(function (ele) {
        ripImage(ele, counter);
        counter ++;
    })
})
});

function ripImage(ele, counter){

http.get(ele, function(res){
    console.log(res);
    res.setEncoding('binary')
    res.on('data', function(chunk){
        imageData += chunk;
    })
    res.on('end', function(){
        //console.log(ele);
            fs.writeFile("dump/file" + counter + ".jpg", imageData, 'binary', function(err){
            if (err) throw err
            //console.log('File saved.')
        });
    //res.pipe(file);
    })
});
}

I think the problem lies somewhere in the ripImage() function. If you guys can see the problem, and help me fix it, that'd be really appreciated.

Thanks guys.



via Gwinert

No comments:

Post a Comment