How to download and unzip a zip file in memory in NodeJs?

Tags:

I want to download a zip file from the internet and unzip it in memory without saving to a temporary file. How can I do this?

Here is what I tried:

var url = 'http://bdn-ak.bloomberg.com/precanned/Comdty_Calendar_Spread_Option_20120428.txt.zip';  var request = require('request'), fs = require('fs'), zlib = require('zlib');    request.get(url, function(err, res, file) {      if(err) throw err;      zlib.unzip(file, function(err, txt) {         if(err) throw err;         console.log(txt.toString()); //outputs nothing      });   });

[EDIT] As, suggested, I tried using the adm-zip library and I still cannot make this work:

var ZipEntry = require('adm-zip/zipEntry'); request.get(url, function(err, res, zipFile) {         if(err) throw err;         var zip = new ZipEntry();         zip.setCompressedData(new Buffer(zipFile.toString('utf-8')));         var text = zip.getData();         console.log(text.toString()); // fails     });

993

asked Apr 28 '12 00:04

pathikrit

2 Answers

You need a library that can handle buffers. The latest version of adm-zip will do:

npm install adm-zip

My solution uses the http.get method, since it returns Buffer chunks.

Code:

var file_url = 'http://notepad-plus-plus.org/repository/7.x/7.6/npp.7.6.bin.x64.zip';  var AdmZip = require('adm-zip'); var http = require('http');  http.get(file_url, function(res) {   var data = [], dataLen = 0;     res.on('data', function(chunk) {     data.push(chunk);     dataLen += chunk.length;    }).on('end', function() {     var buf = Buffer.alloc(dataLen);      for (var i = 0, len = data.length, pos = 0; i < len; i++) {        data[i].copy(buf, pos);        pos += data[i].length;      }       var zip = new AdmZip(buf);     var zipEntries = zip.getEntries();     console.log(zipEntries.length)      for (var i = 0; i < zipEntries.length; i++) {       if (zipEntries[i].entryName.match(/readme/))         console.log(zip.readAsText(zipEntries[i]));     }   }); });

The idea is to create an array of buffers and concatenate them into a new one at the end. This is due to the fact that buffers cannot be resized.

Update

This is a simpler solution that uses the request module to obtain the response in a buffer, by setting encoding: null in the options. It also follows redirects and resolves http/https automatically.

var file_url = 'https://github.com/mihaifm/linq/releases/download/3.1.1/linq.js-3.1.1.zip';  var AdmZip = require('adm-zip'); var request = require('request');  request.get({url: file_url, encoding: null}, (err, res, body) => {   var zip = new AdmZip(body);   var zipEntries = zip.getEntries();   console.log(zipEntries.length);    zipEntries.forEach((entry) => {     if (entry.entryName.match(/readme/i))       console.log(zip.readAsText(entry));   }); });

The body of the response is a buffer that can be passed directly to AdmZip, simplifying the whole process.

145

answered Oct 11 '22 13:10

mihai

Sadly you can't pipe the response stream into the unzip job as node zlib lib allows you to do, you have to cache and wait the end of the response. I suggest you to pipe the response to a fs stream in case of big files, otherwise you will full fill your memory in a blink!

I don't completely understand what you are trying to do, but imho this is the best approach. You should keep your data in memory only the time you really need it, and then stream to the csv parser.

If you want to keep all your data in memory you can replace the csv parser method fromPath with from that takes a buffer instead and in getData return directly unzipped

You can use the AMDZip (as @mihai said) instead of node-zip, just pay attention because AMDZip is not yet published in npm so you need:

$ npm install git://github.com/cthackers/adm-zip.git

N.B. Assumption: the zip file contains only one file

var request = require('request'),     fs = require('fs'),     csv = require('csv')     NodeZip = require('node-zip')  function getData(tmpFolder, url, callback) {   var tempZipFilePath = tmpFolder + new Date().getTime() + Math.random()   var tempZipFileStream = fs.createWriteStream(tempZipFilePath)   request.get({     url: url,     encoding: null   }).on('end', function() {     fs.readFile(tempZipFilePath, 'base64', function (err, zipContent) {       var zip = new NodeZip(zipContent, { base64: true })       Object.keys(zip.files).forEach(function (filename) {         var tempFilePath = tmpFolder + new Date().getTime() + Math.random()         var unzipped = zip.files[filename].data         fs.writeFile(tempFilePath, unzipped, function (err) {           callback(err, tempFilePath)         })       })     })   }).pipe(tempZipFileStream) }  getData('/tmp/', 'http://bdn-ak.bloomberg.com/precanned/Comdty_Calendar_Spread_Option_20120428.txt.zip', function (err, path) {   if (err) {     return console.error('error: %s' + err.message)   }   var metadata = []   csv().fromPath(path, {     delimiter: '|',     columns: true   }).transform(function (data){     // do things with your data     if (data.NAME[0] === '#') {       metadata.push(data.NAME)     } else {       return data     }   }).on('data', function (data, index) {     console.log('#%d %s', index, JSON.stringify(data, null, '  '))   }).on('end',function (count) {     console.log('Metadata: %s', JSON.stringify(metadata, null, '  '))     console.log('Number of lines: %d', count)   }).on('error', function (error) {     console.error('csv parsing error: %s', error.message)   }) })

answered Oct 11 '22 14:10

kilianc

Related questions
                            
                                How to clear leaflet map of all markers and layers before adding new ones?
                            
                                Difference between import X and import * as X in node.js (ES6 / Babel)?
                            
                                slow function call in V8 when using the same key for the functions in different objects
                            
                                What is the difference between JavaScript and jQuery? [closed]
                            
                                How target DOM with react useRef in map
                            
                                jQuery min/max property from array of elements
                            
                                Why can't I pass "window.location.reload" as an argument to setTimeout?
                            
                                Jest URL.createObjectURL is not a function
                            
                                How to execute 'npm run' command programmatically?
                            
                                How can I simulate a click to an anchor tag?
                            
                                jQuery key code for command key
                            
                                How can I use backslashes (\) in a string?
                            
                                match Vs exec in JavaScript [duplicate]
                            
                                How do you find out the caller function in JavaScript when use strict is enabled?
                            
                                Mocking up static methods in jest
                            
                                Assignment with double ampersand "&&" [duplicate]
                            
                                How to get the innerHTML of selectable jquery element?
                            
                                Why does an onclick property set with setAttribute fail to work in IE?
                            
                                Using jQuery to find an element at a particular position?
                            
                                Applying a function to each object in a JavaScript array

Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!

Donate Us With

How to download and unzip a zip file in memory in NodeJs?

Tags:

javascript

node.js

zip

zlib

unzip

pathikrit

People also ask

2 Answers

mihai

kilianc

Recent Activity

Donate For Us