I am using this NodeJS wrapper for PhantomJS with PhantomJS Prebuilt to be able to compile my program into one executable later.
My goal is to log into a website that requires JavaScript, access the downloads page and download the correct file. I cannot do this directly because the download URL consists of unknown hashes and requires authentication that is achieved by logging in. (I have not tried sending a post request directly to log into the downloads page, if that is possible)
I have easily managed to do all of that except the download part. I think the reason I am getting errors is because the download URL requests a header with a Content-Disposition attachment.
The link looks like this: https://store.domain.com/download/bcad1d9e-1504-11e7-aee3-d4bed9b6dbe0/6480bc7c625cbb05701e8d12bce2b034
The headers sent look like this:
Accept-Ranges:bytes
Cache-Control:public
Content-Disposition:attachment; filename="File-1.0.zip"
Content-Transfer-Encoding:binary
Content-Type:application/zip
Date:Tue, 09 May 2017 16:20:29 GMT
Last-Modified:Thu, 30 Mar 2017 04:47:04 GMT
Server:Apache
Set-Cookie:keeper=<some long hash here>; expires=Tue, 09-May-2017 18:20:29 GMT; path=/; domain=.domain.com; httponly; Secure
Strict-Transport-Security:max-age=31536000
Transfer-Encoding:chunked
X-Application-Server:customer-web2
X-Frame-Options:SAMEORIGIN
Using the horseman.download() function does not work on a Content-Disposition attachment, but works on other links. This is the error I get:
Unhandled rejection [object Object] at Horseman.<anonymous> (/Users/artis/dev/projects/myproject/repo/nodejs/node_modules/node-horseman/lib/actions.js:839:38)
at Horseman.tryCatcher (/Users/artis/dev/projects/myproject/repo/nodejs/node_modules/bluebird/js/release/util.js:16:23)
at Promise._settlePromiseFromHandler (/Users/artis/dev/projects/myproject/repo/nodejs/node_modules/bluebird/js/release/promise.js:512:31)
at Promise._settlePromise (/Users/artis/dev/projects/myproject/repo/nodejs/node_modules/bluebird/js/release/promise.js:569:18)
at Promise._settlePromiseCtx (/Users/artis/dev/projects/myproject/repo/nodejs/node_modules/bluebird/js/release/promise.js:606:10)
at Async._drainQueue (/Users/artis/dev/projects/myproject/repo/nodejs/node_modules/bluebird/js/release/async.js:138:12)
at Async._drainQueues (/Users/artis/dev/projects/myproject/repo/nodejs/node_modules/bluebird/js/release/async.js:143:10)
at Immediate.Async.drainQueues (/Users/artis/dev/projects/myproject/repo/nodejs/node_modules/bluebird/js/release/async.js:17:14)
at runCallback (timers.js:672:20)
at tryOnImmediate (timers.js:645:5)
at processImmediate [as _immediateCallback] (timers.js:617:5)
I have tried a two other options I could come up with: - Clicking the link with .click(), which does not seem initiate any download (I wonder what happens with responses with attached files then) - With jQuery AJAX $.get() store the contents of the URL into a variable, return that to NodeJS and write to a file. (The variable appears undefined and I get some sort of a recursive archive then)
And here is my code:
var Horseman = require('node-horseman')
var horseman = new Horseman()
var fs = require('fs')
horseman
.open('https://store.domain.com/download')
.type('input[name="email"]', 'myemail@domain.com')
.type('input[name="password"]', 'mypass')
.click('button[type="submit"]')
.waitForNextPage({timeout: 11000})
.waitForSelector('#dropdown')
.select('#dropdown', 'Example_Choice')
.evaluate(function() {
target_link = $('a:contains("File-1.0.zip")').attr('href')
return target_link
// - An attempt to get the file contents into a JS variable
// so I can pass it to NodeJS and write to file:
// target_url = 'https://' + window.location.hostname + $('a:contains("File-1.0.zip")').attr('href')
// $.get(target_url, function(response) {
// return response
// })
})
.then(function(returned_link) {
horseman.download(returned_link, 'downloaded_file.zip', true)
horseman.close()
})
via Artis
No comments:
Post a Comment