Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Accessing the raw file stream from a node-formidable file upload

I am creating an application that takes some file uploads and send them straight up to S3. I would prefer not to even have the tmp file on my server, so I am using the Knox module and would like to take the raw stream from Formidable and send it over Knox to S3. I have done something similar using Knox to download a file using this code:

knox.downloads.get(widget.download).on('response',function(sres){
    res.writeHead(200, {
        'Content-Type':'application/zip',
        'Content-Length': sres.headers['content-length'],
        'Content-Disposition':'attachment; filename=' + widget.download
    });
    util.pump(sres, res);
}).end();

Now I would like to do something similar in the oposite direction (File upload from the browser to S3).

So far I have written an event handler to capture each piece of data from the file as it's being uploaded:

var form = new formidable.IncomingForm();
form.onPart = function(part){
    if(!part.filename){
        form.handlePart(part);
    }else{
        if(part.name == 'download'){
            // Upload to download bucket
            controller.putDownload(part);
        }else{
            // Upload to the image bucket
            controller.putImage(part);
        }
        //res.send(sys.inspect(part));
    }
}
form.parse(req, function(err, fields, files){
    if(err){
        res.json(err);
    }else{
        res.send(sys.inspect({fields:fields, files:files}), {'content-type':'text/plain'});
        //controller.createWidget(res,fields,files);            
    }
});


controller.putDownload = function(part){
    part.addListener('data', function(buffer){
        knox.download.putStream(data,part.filename, function(err,s3res){
            if(err)throwError(err);
            else{
                console.log(s3res);
            }
        });
    })
    knox.downloads.putStream(part, part.filename, function(err,s3res){

        if(err)throwError(err);
        else{
            console.log(s3res);
        }
    });
}

But the data event only give me the buffer. So is it possible to capture the stream itself and push it to S3?

like image 923
Dave Long Avatar asked Oct 22 '11 15:10

Dave Long


3 Answers

What you want to do is override the Form.onPart method:

IncomingForm.prototype.onPart = function(part) {
  // this method can be overwritten by the user
  this.handlePart(part);
};

Formidable's default behavior is to write the part to a file. You don't want that. You want to handle the 'part' events to write to the knox download. Start with this:

form.onPart = function(part) {
    if (!part.filename) {
        // let formidable handle all non-file parts
        form.handlePart(part);
        return;
    }

Then open the knox request and handle the raw part events yourself:

part.on('data', function(data) {
    req.write(data);
});
part.on('end', function() {
    req.end();
});
part.on('error', function(err) {
    // handle this too
});

As a bonus, if the req.write(data) return false that means the send buffer is full. You should pause the Formidable parser. When you get a drain event from the Knox stream you should resume Formidable.

like image 92
kgilpin Avatar answered Oct 18 '22 13:10

kgilpin


Use multiparty instead. It supports this kind of streaming like you want. It even has an example of streaming directly to s3: https://github.com/superjoe30/node-multiparty/blob/master/examples/s3.js

like image 34
andrewrk Avatar answered Oct 18 '22 11:10

andrewrk


In an Express middleware, I use formidable together with PassThrough to stream-upload a file to S3 (in my case, to Minio which is S3 compatible through Minio SDK; and I believe it works for AWS S3 too with the same Minio SDK)

Here is the sample code.

const formidable = require('formidable')
const { PassThrough } = require('stream')

const form = new formidable.IncomingForm()
const pass = new PassThrough()

const fileMeta = {}
form.onPart = part => {
  if (!part.filename) {
    form.handlePart(part)
    return
  }
  fileMeta.name = part.filename
  fileMeta.type = part.mime
  part.on('data', function (buffer) {
    pass.write(buffer)
  })
  part.on('end', function () {
    pass.end()
  })
}
form.parse(req, err => {
  if (err) {
    req.minio = { error: err }
    next()
  } else {
    handlePostStream(req, next, fileMeta, pass)
  }
})

And handlePostStream looks like below, for your reference:

const uuidv1 = require('uuid/v1')

const handlePostStream = async (req, next, fileMeta, fileStream) => {
  let filename = uuidv1()

  try {
    const metaData = {
      'content-type': fileMeta.type,
      'file-name': Buffer.from(fileMeta.name).toString('base64')
    }

    const minioClient = /* Get Minio Client*/
    await minioClient.putObject(MINIO_BUCKET, filename, fileStream, metaData)

    req.minio = { post: { filename: `${filename}` } }
  } catch (error) {
    req.minio = { error }
  }
  next()
}

You can find the source code on GitHub, and its unit tests too.

like image 4
Yuci Avatar answered Oct 18 '22 11:10

Yuci