Thursday 20 April 2017

Lambda exits before completing, but works locally

I'm writing a simple Lambda function for a data processing pipeline, which gets called by Kinesis Firehose. This function downloads images from a URL, uploads them to S3, checks whether they have a face in them using OpenCV, and if they do, loads the image into Rekognition.

When I run it locally, I have no issues. Everything downloads, uploads and is processed with no errors. However, if I run it on Lambda, it gives me an error about exiting before completing.

'use strict';

const AWS = require('aws-sdk');
const request = require('request');
const cv = require('opencv');

const s3 = new AWS.S3();
const rek = new AWS.Rekognition();

const uploadImage = data => {
  return new Promise((resolve, reject) => {
    request({
      url: data.images.standard,
      encoding: null // creates a buffer
    }, function(err, res, body) {
      if (err) return reject({ type: 'err', err: err });
      if (res.statusCode != 200) return reject({ type: 'fail', code: res.statusCode });

      console.log(`Fetched ${data._id}`)

      // Upload to s3
      s3.putObject({
        Bucket: 'mybucket',
        Key: `${data._id}.jpg`,
        ContentType: res.headers['content-type'],
        ContentLength: res.headers['content-length'],
        Body: body // buffer
      }, (err) => {
        if (err) return reject({ type: err, err: err });
        resolve({ record: data, buffer: body });
      });
    })
  })
}

const indexFacesLocal = data => {
  return new Promise((resolve, reject) => {
    cv.readImage(data.buffer, (err, image) => {
      if (err) return reject({ type: 'err', err: err });
      image.detectObject(cv.FACE_CASCADE, {}, (err, faces) => {
        if (err) return reject({ type: 'err', err: err });

        // Set default if undefined
        if (!faces) faces = [];

        console.log(`Completed OpenCV ${data.record._id}, should process = ${!!faces.length}`)

        data._jackal = !!faces.length;
        resolve(data);
      })
    })
  })
}

const indexFacesJackal = data => {
  return new Promise((resolve, reject) => {
    if (!data._jackal) return resolve(data.record);

    // Discard buffer and other data
    data = data.record;

    let params = {
      CollectionId: process.env.REK_COLLECTION,
      Image: {
        S3Object: {
          Bucket: `mybucket`,
          Name: `${data._id}.jpg`,
        }
      },
      DetectionAttributes: ['ALL'],
      ExternalImageId: data._id
    }

    rek.indexFaces(params, (err, faces) => {
      if (err) return reject({ type: 'err', err: err });

      console.log(`Indexed on Rek ${data._id}`)

      // Check if data is present
      if (!faces.FaceRecords.length) {
        return resolve(data);
      }

      // Do some data manipulation stuff here, nothing big
      // just used to reformat AWS response

      console.log(`Mapped ${data._id}`)

      return resolve(data);
    })
  })
}

exports.handler = function(event, ctx, callback) {
  /* Process the list of records and transform them */
  Promise.all(event.records.map(record => {
    return uploadImage(record.data)
      .then(indexFacesLocal)
      .then(indexFacesJackal)
      .then(data => {
        return {
          recordId: record.recordId,
          result: 'Ok',
          data: data,
        }
      }).catch(res => {

        if (res.type == 'fail') {
          // Unable to fetch media from Instagram
          console.log(`[${res.code}] - ${record.recordId}`);
          return {
            recordId: record.recordId,
            result: 'Dropped'
          }
        }

        console.log(`Processing failed for ${record.recordId}`);
        console.log(res.err.stack);

        return {
          recordId: record.recordId,
          result: 'ProcessingFailed'
        }
      })
  })).then(output => {
    console.log(`Processing completed, handled ${output.length} items`)
    callback(null, { records: output })
  })
};

When run locally, my output is:

Fetched 1392753031552166622
Fetched 1379923796962022364
Fetched 1392750801239391628
Fetched 1392748163315653017
Completed OpenCV 1379923796962022364, should process = true
Completed OpenCV 1392748163315653017, should process = false
Completed OpenCV 1392750801239391628, should process = true
Completed OpenCV 1392753031552166622, should process = true
Indexed on Rek 1379923796962022364
Mapped 1379923796962022364
Indexed on Rek 1392750801239391628
Mapped 1392750801239391628
Indexed on Rek 1392753031552166622
Mapped 1392753031552166622
Processing completed, handled 4 items
{ records: 
   [ { recordId: '1379923796962022364', result: 'Ok', data: [Object] },
     { recordId: '1392748163315653017', result: 'Ok', data: [Object] },
     { recordId: '1392750801239391628', result: 'Ok', data: [Object] },
     { recordId: '1392753031552166622', result: 'Ok', data: [Object] } ] }

When run on AWS, I get:

START Version: $LATEST

Fetched 1392753031552166622
Fetched 1392748163315653017
Fetched 1392750801239391628
Fetched 1379923796962022364
Completed OpenCV 1379923796962022364, should process = true
Completed OpenCV 1392748163315653017, should process = false
Completed OpenCV 1392750801239391628, should process = true

END
Process exited before completing request

I've checked memory allocation, it's only using ~130MB of 512. There are no other errors thrown. It successfully imports the OpenCV build for Amazon Linux, so it's not a problem with the binary either. I've also checked timeout, which is set to 5 minutes (it runs for ~8 seconds each time).



via Benedict Lewis

No comments:

Post a Comment