Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Node.js File Upload (Express 4, MongoDB, GridFS, GridFS-Stream)

I am trying to setup a file API in my node.js application. My goal is to be able to write the file stream directly to gridfs, without needing to store the file to disk initially. It seems like my create code is working. I am able to save a file upload to gridfs. The problem is reading the file. When I try to download a saved file via a web browser window, I see that the file contents are wrapped with something like the following:

------WebKitFormBoundarye38W9pfG1wiA100l
Content-Disposition: form-data; name="file"; filename="myfile.txt"
Content-Type: text/javascript

***File contents here***

------WebKitFormBoundarye38W9pfG1wiA100l--

So my question is what do I need to do to strip the boundary information from the file stream before saving it to gridfs? Here's the code i'm working with:

'use strict';

var mongoose = require('mongoose');
var _ = require('lodash');

var Grid = require('gridfs-stream');
Grid.mongo = mongoose.mongo;
var gfs = new Grid(mongoose.connection.db);

// I think this works. I see the file record in fs.files
exports.create = function(req, res) {
    var fileId = new mongoose.Types.ObjectId();

    var writeStream = gfs.createWriteStream({
        _id: fileId,
        filename: req.query.name,
        mode: 'w',
        content_type: req.query.type,
        metadata: {
            uploadedBy: req.user._id,
        }
    });

    writeStream.on('finish', function() {
        return res.status(200).send({
            message: fileId.toString()
        });
    });

    req.pipe(writeStream);
};

// File data is returned, but it's wrapped with
// WebKitFormBoundary and has headers.
exports.read = function(req, res) {
    gfs.findOne({ _id: req.params.id }, function (err, file) {
        if (err) return res.status(400).send(err);

        // With this commented out, my browser will prompt
        // me to download the raw file where I can see the
        // webkit boundary and request headers
        //res.writeHead(200, { 'Content-Type': file.contentType });

        var readstream = gfs.createReadStream({
            _id: req.params.id
            // I also tried this way:
            //_id: file._id
        });

        readstream.pipe(res);
    });
};

By the way, i'm not currently using any middleware for these routes, but am open to doing so. I just didn't want the file to hit the disk prior to being sent to gridfs.

Edit:

Per @fardjad, I added the node-multiparty module for multipart/form-data parsing and it kind of worked. But when I download an uploaded file and compare with an original (as text), there are lots of differences in the encoding, and the downloaded file won't open. Here's my latest attempt.

'use strict';

var mongoose = require('mongoose');
var _ = require('lodash');
var multiparty = require('multiparty');
var Grid = require('gridfs-stream');
Grid.mongo = mongoose.mongo;
var gfs = new Grid(mongoose.connection.db);

exports.create = function(req, res) {
    var form = new multiparty.Form();
    var fileId = new mongoose.Types.ObjectId();

    form.on('error', function(err) {
      console.log('Error parsing form: ' + err.stack);
    });

    form.on('part', function(part) {
        if (part.filename) {
            var writeStream = gfs.createWriteStream({
                _id: fileId,
                filename: part.filename,
                mode: 'w',
                content_type: part.headers['content-type'],
                metadata: {
                    uploadedBy: req.user._id,
                }
            })

            part.pipe(writeStream);
        }
    });

    // Close emitted after form parsed
    form.on('close', function() {
        return res.status(200).send({
            message: fileId.toString()
        });
    });

    // Parse req
    form.parse(req);
};

exports.read = function(req, res) {
    gfs.findOne({ _id: req.params.id }, function (err, file) {
        if (err) return res.status(400).send(err);

        res.writeHead(200, { 'Content-Type': file.contentType });

        var readstream = gfs.createReadStream({
            _id: req.params.id
        });

        readstream.pipe(res);
    });
};

Final Edit:

Here's a simple implementation that I copied from another developer and modified. This is working for me: (I'm still trying to figure out why it won't work in my original express app. Something seems to be interfering)

https://gist.github.com/pos1tron/094ac862c9d116096572

var Busboy = require('busboy'); // 0.2.9
var express = require('express'); // 4.12.3
var mongo = require('mongodb'); // 2.0.31
var Grid = require('gridfs-stream'); // 1.1.1"
var app = express();
var server = app.listen(9002);

var db = new mongo.Db('test', new mongo.Server('127.0.0.1', 27017));
var gfs;
db.open(function(err, db) {
  if (err) throw err;
  gfs = Grid(db, mongo);
});

app.post('/file', function(req, res) {
  var busboy = new Busboy({ headers : req.headers });
  var fileId = new mongo.ObjectId();

  busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
    console.log('got file', filename, mimetype, encoding);
    var writeStream = gfs.createWriteStream({
      _id: fileId,
      filename: filename,
      mode: 'w',
      content_type: mimetype,
    });
    file.pipe(writeStream);
  }).on('finish', function() {
    // show a link to the uploaded file
    res.writeHead(200, {'content-type': 'text/html'});
    res.end('<a href="/file/' + fileId.toString() + '">download file</a>');
  });

  req.pipe(busboy);
});

app.get('/', function(req, res) {
  // show a file upload form
  res.writeHead(200, {'content-type': 'text/html'});
  res.end(
    '<form action="/file" enctype="multipart/form-data" method="post">'+
    '<input type="file" name="file"><br>'+
    '<input type="submit" value="Upload">'+
    '</form>'
  );
});

app.get('/file/:id', function(req, res) {
  gfs.findOne({ _id: req.params.id }, function (err, file) {
    if (err) return res.status(400).send(err);
    if (!file) return res.status(404).send('');

    res.set('Content-Type', file.contentType);
    res.set('Content-Disposition', 'attachment; filename="' + file.filename + '"');

    var readstream = gfs.createReadStream({
      _id: file._id
    });

    readstream.on("error", function(err) {
      console.log("Got error while processing stream " + err.message);
      res.end();
    });

    readstream.pipe(res);
  });
});
like image 537
Devon Sams Avatar asked May 07 '15 18:05

Devon Sams


2 Answers

See my comment on the issue you created on github. I had the same problem but I managed to debug the issue. I narrowed it down to where i was confident that the problem was a piece of express middleware modified the request. I disabled my middleware one by one until i found the unlikely culprit: connect-livereload

I commented out app.use(require('connect-livereload')()); and the problem went away. I believe it was injecting the livereload script into the response (a binary image file).

like image 144
Nick Dawson Avatar answered Sep 29 '22 16:09

Nick Dawson


Looks like the file has been uploaded through an HTML form, in that case you need to decode the multipart/form-data encoded data, re-assemble the parts if needed and save the file to GridFS. For parsing, you can use something like node-multiparty.

like image 33
fardjad Avatar answered Sep 29 '22 15:09

fardjad