I need to create a Zip file that consists of a selection of files (videos and images) located in my s3 bucket.
The problem at the moment using my code below is that I quickly hit the memory limit on Lambda.
async.eachLimit(files, 10, function(file, next) { var params = { Bucket: bucket, // bucket name Key: file.key }; s3.getObject(params, function(err, data) { if (err) { console.log('file', file.key); console.log('get image files err',err, err.stack); // an error occurred } else { console.log('file', file.key); zip.file(file.key, data.Body); next(); } }); }, function(err) { if (err) { console.log('err', err); } else { console.log('zip', zip); content = zip.generateNodeStream({ type: 'nodebuffer', streamFiles:true }); var params = { Bucket: bucket, // name of dest bucket Key: 'zipped/images.zip', Body: content }; s3.upload(params, function(err, data) { if (err) { console.log('upload zip to s3 err',err, err.stack); // an error occurred } else { console.log(data); // successful response } }); } });
Is this possible using Lambda, or should I look at a different approach?
Is it possible to write to a compressed zip file on the fly, therefore eliminating the memory issue somewhat, or do I need to have the files collected before compression?
Any help would be much appreciated.
If you head to the Properties tab of your S3 bucket, you can set up an Event Notification for all object “create” events (or just PutObject events). As the destination, you can select the Lambda function where you will write your code to unzip and gzip files.
Zips S3 filesTakes an amazon s3 bucket folder and zips it to a: Stream. Local File. Local File Fragments (zip multiple files broken up by max number of files or size)
Create S3 Bucket in Source Account, to which the logs will be uploaded. Add below Bucket Access policy to the IAM Role created in Destination account. Lambda function will assume the Role of Destination IAM Role and copy the S3 object from Source bucket to Destination.
Okay, I got to do this today and it works. Direct Buffer to Stream, no disk involved. So memory or disk limitation won't be an issue here:
'use strict'; const AWS = require("aws-sdk"); AWS.config.update( { region: "eu-west-1" } ); const s3 = new AWS.S3( { apiVersion: '2006-03-01'} ); const _archiver = require('archiver'); //This returns us a stream.. consider it as a real pipe sending fluid to S3 bucket.. Don't forget it const streamTo = (_bucket, _key) => { var stream = require('stream'); var _pass = new stream.PassThrough(); s3.upload( { Bucket: _bucket, Key: _key, Body: _pass }, (_err, _data) => { /*...Handle Errors Here*/ } ); return _pass; }; exports.handler = async (_req, _ctx, _cb) => { var _keys = ['list of your file keys in s3']; var _list = await Promise.all(_keys.map(_key => new Promise((_resolve, _reject) => { s3.getObject({Bucket:'bucket-name', Key:_key}) .then(_data => _resolve( { data: _data.Body, name: `${_key.split('/').pop()}` } )); } ))).catch(_err => { throw new Error(_err) } ); await new Promise((_resolve, _reject) => { var _myStream = streamTo('bucket-name', 'fileName.zip'); //Now we instantiate that pipe... var _archive = _archiver('zip'); _archive.on('error', err => { throw new Error(err); } ); //Your promise gets resolved when the fluid stops running... so that's when you get to close and resolve _myStream.on('close', _resolve); _myStream.on('end', _resolve); _myStream.on('error', _reject); _archive.pipe(_myStream); //Pass that pipe to _archive so it can push the fluid straigh down to S3 bucket _list.forEach(_itm => _archive.append(_itm.data, { name: _itm.name } ) ); //And then we start adding files to it _archive.finalize(); //Tell is, that's all we want to add. Then when it finishes, the promise will resolve in one of those events up there }).catch(_err => { throw new Error(_err) } ); _cb(null, { } ); //Handle response back to server };
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With