Tuesday, 4 April 2017

Node download archives from s3 - Error: incorrect header check in gunzip

I'm writing a simple method to download and unzip gzip compressed archives from S3:

var downloadArchive = function(params,callback) {
        var self=this;

        var gunzip = zlib.createGunzip();
        var file = fs.createWriteStream( options.filePath );

        s3.getObject({ Bucket: this.options.Bucket, Key: this.options.Key })
        .on('error', function (error) {
            if(self.logger) self.logger.error("%@",error);
            return callback(error);
        })
        .on('httpData', function (chunk) {
            file.write(chunk);
        })
        .on('httpDone', function () {

            file.end();

            if(self.logger) self.logger.info("downloadArchive downloaded %s", options.filePath);

            fs.createReadStream( options.filePath )
            .on('error', (error) => {
                return callback(error);
            })
            .on('end', () => {
                if(self.logger) self.logger.info("downloadArchive unarchived %s", options.fileDest);
                return callback(null, options.fileDest);
            })
            .pipe(gunzip)
            .pipe(fs.createWriteStream(options.fileDest))
        })
        .send();

    }//downloadArchive

I have compressed the a sample archive with gzip like (see here for details).

gzip < test.txt > test.gz

and uploaded to S3 in the ordinary shell script:

s3Key=$1
s3Secret=$2
objectName=$3
FileName=$4
bucket=my-bucket
path=my-folder
CONTENT_TYPE=$5

[ -z "$s3Key" ] && { echo "Usage: $0 S3_KEY S3_SECRET S3_OBJECT_NAME FILENAME CONTENT_TYPE"; exit 1; }

resource="/${bucket}/${path}/${objectName}"

dateValue=`date -R`
contentType="$CONTENT_TYPE"
stringToSign="PUT\n\n${contentType}\n${dateValue}\n${resource}"
signature=$(echo -en "${stringToSign}" | openssl sha1 -hmac "${s3Secret}" -binary | base64)
curl -X PUT -T "$FileName" \
    -H "Host: $bucket.s3.amazonaws.com" \
    -H "Date: $dateValue" \
    -H "Content-Type: $contentType" \
    -H "Authorization: AWS ${s3Key}:${signature}" \
    https://${bucket}.s3.amazonaws.com/${path}/${objectName}

so for this kind of archive (mime type: application/x-gzip)

$ ./upload.sh AWS_KEY AWS_SECRET test.gz ./test.gz application/x-gzip

When I call downloadArchive I successfully download the file, but I get the incorrect header check error:

 [Tue Apr 04 2017 16:15:38 GMT+0200 (CEST)] downloadArchive downloaded /var/folders/_b/szqwdfn979n4fdg7f2j875_r0000gn/T/test.gz
events.js:160
      throw er; // Unhandled 'error' event
      ^

Error: incorrect header check
    at Zlib._handle.onerror (zlib.js:370:17)
[loretoparisi@:mbploreto dataset]$ 

I have checked the downloaded archive file test.gz and it seems that it is corrupted in some way:

$ node
> fs=require('fs'),gunzip = zlib.createGunzip();
> fs.createReadStream( '/var/folders/_b/szqwdfn979n4fdg7f2j875_r0000gn/T/test.gz' ).pipe(gunzip).pipe(fs.createWriteStream('./out.txt')).on('close', () => { console.log('done'); }).on('error', (error) => { console.error(error) });
> Error: incorrect header check
    at Zlib._handle.onerror (zlib.js:370:17)



via loretoparisi

No comments:

Post a Comment