Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Node.js fs.readdir recursive directory search

Any ideas on an async directory search using fs.readdir? I realize that we could introduce recursion and call the read directory function with the next directory to read, but I'm a little worried about it not being async...

Any ideas? I've looked at node-walk which is great, but doesn't give me just the files in an array, like readdir does. Although

Looking for output like...

['file1.txt', 'file2.txt', 'dir/file3.txt'] 
like image 571
crawf Avatar asked Apr 29 '11 03:04

crawf


2 Answers

There are basically two ways of accomplishing this. In an async environment you'll notice that there are two kinds of loops: serial and parallel. A serial loop waits for one iteration to complete before it moves onto the next iteration - this guarantees that every iteration of the loop completes in order. In a parallel loop, all the iterations are started at the same time, and one may complete before another, however, it is much faster than a serial loop. So in this case, it's probably better to use a parallel loop because it doesn't matter what order the walk completes in, just as long as it completes and returns the results (unless you want them in order).

A parallel loop would look like this:

var fs = require('fs'); var path = require('path'); var walk = function(dir, done) {   var results = [];   fs.readdir(dir, function(err, list) {     if (err) return done(err);     var pending = list.length;     if (!pending) return done(null, results);     list.forEach(function(file) {       file = path.resolve(dir, file);       fs.stat(file, function(err, stat) {         if (stat && stat.isDirectory()) {           walk(file, function(err, res) {             results = results.concat(res);             if (!--pending) done(null, results);           });         } else {           results.push(file);           if (!--pending) done(null, results);         }       });     });   }); }; 

A serial loop would look like this:

var fs = require('fs'); var path = require('path'); var walk = function(dir, done) {   var results = [];   fs.readdir(dir, function(err, list) {     if (err) return done(err);     var i = 0;     (function next() {       var file = list[i++];       if (!file) return done(null, results);       file = path.resolve(dir, file);       fs.stat(file, function(err, stat) {         if (stat && stat.isDirectory()) {           walk(file, function(err, res) {             results = results.concat(res);             next();           });         } else {           results.push(file);           next();         }       });     })();   }); }; 

And to test it out on your home directory (WARNING: the results list will be huge if you have a lot of stuff in your home directory):

walk(process.env.HOME, function(err, results) {   if (err) throw err;   console.log(results); }); 

EDIT: Improved examples.

like image 56
chjj Avatar answered Sep 21 '22 18:09

chjj


This one uses the maximum amount of new, buzzwordy features available in node 8, including Promises, util/promisify, destructuring, async-await, map+reduce and more, making your co-workers scratch their heads as they try to figure out what is going on.

Node 8+

No external dependencies.

const { promisify } = require('util'); const { resolve } = require('path'); const fs = require('fs'); const readdir = promisify(fs.readdir); const stat = promisify(fs.stat);  async function getFiles(dir) {   const subdirs = await readdir(dir);   const files = await Promise.all(subdirs.map(async (subdir) => {     const res = resolve(dir, subdir);     return (await stat(res)).isDirectory() ? getFiles(res) : res;   }));   return files.reduce((a, f) => a.concat(f), []); } 

Usage

getFiles(__dirname)   .then(files => console.log(files))   .catch(e => console.error(e)); 

Node 10.10+

Updated for node 10+ with even more whizbang:

const { resolve } = require('path'); const { readdir } = require('fs').promises;  async function getFiles(dir) {   const dirents = await readdir(dir, { withFileTypes: true });   const files = await Promise.all(dirents.map((dirent) => {     const res = resolve(dir, dirent.name);     return dirent.isDirectory() ? getFiles(res) : res;   }));   return Array.prototype.concat(...files); } 

Note that starting with node 11.15.0 you can use files.flat() instead of Array.prototype.concat(...files) to flatten the files array.

Node 11+

If you want to blow everybody's head up completely, you can use the following version using async iterators. In addition to being really cool, it also allows consumers to pull results one-at-a-time, making it better suited for really large directories.

const { resolve } = require('path'); const { readdir } = require('fs').promises;  async function* getFiles(dir) {   const dirents = await readdir(dir, { withFileTypes: true });   for (const dirent of dirents) {     const res = resolve(dir, dirent.name);     if (dirent.isDirectory()) {       yield* getFiles(res);     } else {       yield res;     }   } } 

Usage has changed because the return type is now an async iterator instead of a promise

;(async () => {   for await (const f of getFiles('.')) {     console.log(f);   } })() 

In case somebody is interested, I've written more about async iterators here: https://qwtel.com/posts/software/async-generators-in-the-wild/

like image 36
qwtel Avatar answered Sep 20 '22 18:09

qwtel