Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

AWS SDK file upload to S3 via Node/Express using stream PassThrough - file is always corrupt

It's pretty straightforward. Using this code, any image file that is uploaded, is corrupt and cannot be opened. PDFs seem fine, but I noticed it's injecting values into text-based files. It's the correct file size in s3, not zero like something went wrong. I'm not sure if it's a problem w/ Express, the SDK, or a combination of both? Is it Postman? I built something similar in a work project in March of this year, and it worked flawlessly. I no longer have access to that code to compare.

No errors, no indication of any problems.

const aws = require("aws-sdk");
const stream = require("stream");
const express = require("express");
const router = express.Router();

const AWS_ACCESS_KEY_ID = "XXXXXXXXXXXXXXXXXXXX";
const AWS_SECRET_ACCESS_KEY = "superSecretAccessKey";
const BUCKET_NAME = "my-bucket";
const BUCKET_REGION = "us-east-1";

const s3 = new aws.S3({
    region: BUCKET_REGION,
    accessKeyId: AWS_ACCESS_KEY_ID,
    secretAccessKey: AWS_SECRET_ACCESS_KEY
});

const uploadStream = key => {
    let streamPass = new stream.PassThrough();
    let params = {
        Bucket: BUCKET_NAME,
        Key: key,
        Body: streamPass
    };
    let streamPromise = s3.upload(params, (err, data) => {
        if (err) {
            console.error("ERROR: uploadStream:", err);
        } else {
            console.log("INFO: uploadStream:", data);
        }
    }).promise();
    return {
        streamPass: streamPass,
        streamPromise: streamPromise
    };
};

router.post("/upload", async (req, res) => {
    try {
        let key = req.query.file_name;
        let { streamPass, streamPromise } = uploadStream(key);
        req.pipe(streamPass);
        await streamPromise;
        res.status(200).send({ result: "Success!" });
    } catch (e) {
        res.status(500).send({ result: "Fail!" });
    }
});

module.exports = router;

Here's my package.json:

{
  "name": "expresss3streampass",
  "version": "0.0.0",
  "private": true,
  "scripts": {
    "start": "node ./bin/www"
  },
  "dependencies": {
    "aws-sdk": "^2.812.0",
    "cookie-parser": "~1.4.4",
    "debug": "~2.6.9",
    "express": "~4.16.1",
    "morgan": "~1.9.1"
  }
}

UPDATE:

After further testing, I noticed plain-text files are being changed by Postman. For example, this source file:

{
    "question_id": null,
    "position_type_id": 1,
    "question_category_id": 1,
    "position_level_id": 1,
    "question": "Do you test your code before calling it \"done\"?",
    "answer": "Candidate should respond that they at least happy path test every feature and bug fix they write.",
    "active": 1
}

...looks like this after it lands in the bucket:

----------------------------472518836063077482836177
Content-Disposition: form-data; name="file"; filename="question.json"
Content-Type: application/json

{
    "question_id": null,
    "position_type_id": 1,
    "question_category_id": 1,
    "position_level_id": 1,
    "question": "Do you test your code before calling it \"done\"?",
    "answer": "Candidate should respond that they at least happy path test every feature and bug fix they write.",
    "active": 1
}
----------------------------472518836063077482836177--

I have to think this is the problem. Postman is the only thing that changed in this equation, from when this code first worked for me. My request headers look like this:

enter image description here

I was the one who had originally added the "application/x-www-form-urlencoded" header. If I use that now, I end up with a file that has 0 bytes, in the bucket.

like image 548
Tsar Bomba Avatar asked Dec 18 '20 19:12

Tsar Bomba


2 Answers

Multer is the way to go.

It provides a few different modes, but as far as I could tell, you have to write a custom storage handler in order to access the underlying Stream, otherwise it's going to buffer all the data in memory and only callback once it's done.

If you check req.file in your route handler, Multer would normally provide a Buffer under the buffer field, but it's no longer present as I don't pass anything along in the callback, so I'm reasonably confident this is streaming as expected.

Below is a working solution.

Note: parse.single('image') is passed into the route handler. This refers to the multi-part field name I used.

const aws = require('aws-sdk');
const stream = require('stream');
const express = require('express');
const router = express.Router();
const multer = require('multer')

const AWS_ACCESS_KEY_ID = "XXXXXXXXXXXXXXXXXXXX";
const AWS_SECRET_ACCESS_KEY = "superSecretAccessKey";
const BUCKET_NAME = "my-bucket";
const BUCKET_REGION = "us-east-1";

const s3 = new aws.S3({
    region: BUCKET_REGION,
    accessKeyId: AWS_ACCESS_KEY_ID,
    secretAccessKey: AWS_SECRET_ACCESS_KEY
});

const uploadStream = key => {
    let streamPass = new stream.PassThrough();
    let params = {
        Bucket: BUCKET_NAME,
        Key: key,
        Body: streamPass
    };
    let streamPromise = s3.upload(params, (err, data) => {
        if (err) {
            console.error('ERROR: uploadStream:', err);
        } else {
            console.log('INFO: uploadStream:', data);
        }
    }).promise();
    return {
        streamPass: streamPass,
        streamPromise: streamPromise
    };
};

class CustomStorage {
    _handleFile(req, file, cb) {
        let key = req.query.file_name;
        let { streamPass, streamPromise } = uploadStream(key);
        file.stream.pipe(streamPass)
        streamPromise.then(() => cb(null, {}))
    }
}

const storage = new CustomStorage();
const parse = multer({storage});

router.post('/upload', parse.single('image'), async (req, res) => {
    try {
        res.status(200).send({ result: 'Success!' });
    } catch (e) {
        console.log(e)
        res.status(500).send({ result: 'Fail!' });
    }
});

module.exports = router;

Update: A Better Solution

The Multer based solution I provided above is a bit hacky. So I took a look under the hood to see how it worked. This solution just uses Busboy to parse and stream the file. Multer is really just a wrapper for this with some disk I/O convenience functions.

const aws = require('aws-sdk');
const express = require('express');
const Busboy = require('busboy');
const router = express.Router();

const AWS_ACCESS_KEY_ID = "XXXXXXXXXXXXXXXXXXXX";
const AWS_SECRET_ACCESS_KEY = "superSecretAccessKey";
const BUCKET_NAME = "my-bucket";
const BUCKET_REGION = "us-east-1";

const s3 = new aws.S3({
    region: BUCKET_REGION,
    accessKeyId: AWS_ACCESS_KEY_ID,
    secretAccessKey: AWS_SECRET_ACCESS_KEY
});

function multipart(request){
    return new Promise(async (resolve, reject) => {
        const headers = request.headers;
        const busboy = new Busboy({ headers });
        // you may need to add cleanup logic using 'busboy.on' events
        busboy.on('error', err => reject(err))
        busboy.on('file', function (fieldName, fileStream, fileName, encoding, mimeType) {
            const params = {
                Bucket: BUCKET_NAME,
                Key: fileName,
                Body: fileStream
            };
            s3.upload(params).promise().then(() => resolve());
        })
        request.pipe(busboy)
    })
}

router.post('/upload', async (req, res) => {
    try {
        await multipart(req)
        res.status(200).send({ result: 'Success!' });
    } catch (e) {
        console.log(e)
        res.status(500).send({ result: 'Fail!' });
    }
});

module.exports = router;
like image 161
Richard Dunn Avatar answered Oct 13 '22 12:10

Richard Dunn


As far as I can tell, Postman is behaving as it should — the "text-injection" is actually a web standard, used to identify/demarcate files on upload. Please see this MDN Web Doc as well as this one for why.

It's actually injecting that part regardless of the file type:

let streamPass = new stream.PassThrough();
// adding this
const chunks = [];
streamPass.on('data', (chunk) => chunks.push(chunk) );
streamPass.on("end", () => {
    body = Buffer.concat(chunks).toString();
    console.log(chunks, chunks.length)
    console.log("finished", body);  // <-- see it here
});

I tried several methods to control/change this, with no luck on a simple method — from the Postman end, I don't think this is a setting that can be changed, and from the NodeJS end...I mean it's possible, but the solution will most likely be clunky/complicated, which I suspect you don't want. (I could be wrong though...)

Given the above, I'll join @relief.melone in recommending multer as a simple solution.

If you'd like to use multer with streams, try this: (I've indicated where I made changes to your code):

// const uploadStream = (key) => {
const uploadStream = (key, mime_type) => {      // <- adding the mimetype

    let streamPass = new stream.PassThrough();
    
    let params = {
        Bucket: BUCKET_NAME,
        Key: key,
        Body: streamPass,
        ACL: 'public-read', // <- you can remove this
        ContentType: mime_type  // <- adding the mimetype
    };
    let streamPromise = s3.upload(params, (err, data) => {
        if (err) {
            console.error("ERROR: uploadStream:", err);
        } else {
            console.log("INFO: uploadStream:", data);
        }
    }).promise();
    
    return {
        streamPass: streamPass,
        streamPromise: streamPromise
    };
};

// router.post("/upload", async (req, res) => {
router.post("/upload", multer().single('file'), async (req, res) => {      // <- we're adding multer
    try {
        
        let key = req.query.file_name;
        // === change starts here 

            // console.log(req.file); // <- if you want to see, uncomment this file

            let { streamPass, streamPromise } = uploadStream(key, req.file.mimetype);   // adding the mimetype

            var bufferStream = new stream.PassThrough();

            bufferStream.end(req.file.buffer);

            bufferStream.pipe(streamPass); // no longer req.pipe(streamPass);

        // === change ends here 
        await streamPromise;
        
        res.status(200).send({ result: "Success!" });
    } catch (e) {
        console.log(e)
        res.status(500).send({ result: "Fail!" });
    }
});
like image 33
Deolu A Avatar answered Oct 13 '22 11:10

Deolu A