listTree performance #45

dhbaird · 2013-08-12T23:05:02Z

I noticed that listTree() was going slower than I expected for large numbers of files. Therefore, I decided to do some benchmarks to narrow down the slow down. As a result, it looks like an opportunity exists to close a performance gap in either Q or Q-IO. Performance becomes very noticeably affected in the benchmark code with Node-style callbacks versus Q promises, and therefore, that may be a good place to investigate optimization. HTH! More info and/or pull requests to follow, I hope.

Time information in seconds: lower Real time is better.

Test	Paths Returned	Real	User	Sys
require('q-io/fs').listTree('stuff')	96737	40.392	40.373	2.918
require('glob')('stuff/**', { dot:true })	96737	12.873	12.955	2.431
syncListTree('stuff')	96737	3.302	2.203	1.103
asyncFindTree('stuff')	96737	3.175	3.290	2.563
asyncFindTreeQstat('stuff')	96737	18.138	18.064	2.769
asyncFindTreeQstatQreaddir('stuff')	96737	21.262	21.482	2.688
asyncFindTreeQ('stuff')	96737	27.629	27.736	3.027
find \| stat	96737	1.776	0.619	2.962

Edit: I updated a wrong measurement for require('q-io/fs').listTree('stuff'). The correct measurement is ~40 seconds (not 35 seconds as I had written).

The vanilla sync and async times are very good. The shell find | stat result is also provided for a baseline reference outside of Node.js. I've also included the glob() tool, which looks like it could benefit from optimization as well. Our goal is to get Q and/or Q-IO almost as good as vanilla async.

Benchmark codes are:

require('q-io/fs').listTree('stuff')
.then(function(paths) {
  console.log('count =', paths.length);
});


require('glob')('stuff/**', { dot:true }, function(err, paths) {
  console.log('count =', paths.length);
});


var FS = require('fs');
var path = require('path');
function syncListTree(basePath) {
  var stack = [basePath];
  var result = [];
  while (stack.length) {
    var basePath = stack.pop();
    var stat = FS.statSync(basePath);
    if (stat.isDirectory()) {
      var list = FS.readdirSync(basePath);
      stack = stack.concat(list.map(function(x) { return path.join(basePath, x); }));
    }
    result.push(basePath);
  }
  return result;
}
console.log('count =', syncListTree('stuff').length);


var path = require('path');
var FS = require('fs');
function asyncFindTree(basePath, cb) {
  FS.stat(basePath, function(error, stat) {
    if (stat.isDirectory()) {
      FS.readdir(basePath, function(error, children) {
        children = children.map(function(child) { return path.join(basePath, child); });
        var count = children.length;
        var paths_ = [[basePath]];
        function checkCompletion() {
          if (count == 0) {
            var result = Array.prototype.concat.apply([], paths_);
            cb(null, result);
          }
        }
        checkCompletion(); // <-- for the case of children.length == 0
        children.forEach(function(child) {
          asyncFindTree(child, function(err, paths) {
            --count;
            paths_.push(paths);
            checkCompletion();
          });
        });
      });
    } else {
      cb(null, [basePath]);
    }
  });
}
asyncFindTree('stuff', function(error, paths) {
  console.log('count =', paths.length);
});


var Q = require('q');
var path = require('path');
var FS = require('fs');
function asyncFindTreeQstat(basePath, cb) {
  return Q.nfcall(FS.stat, basePath)
  .then(function(stat) {
    if (stat.isDirectory()) {
      FS.readdir(basePath, function(error, children) {
        children = children.map(function(child) { return path.join(basePath, child); });
        var count = children.length;
        var paths_ = [[basePath]];
        function checkCompletion() {
          if (count == 0) {
            var result = Array.prototype.concat.apply([], paths_);
            cb(null, result);
          }
        }
        checkCompletion(); // <-- for the case of children.length == 0
        children.forEach(function(child) {
          asyncFindTreeQstat(child, function(err, paths) {
            --count;
            paths_.push(paths);
            checkCompletion();
          });
        });
      });
    } else {
      cb(null, [basePath]);
    }
  });
}
asyncFindTreeQstat('stuff', function(error, paths) {
  console.log('count =', paths.length);
});


var Q = require('q');
var path = require('path');
var FS = require('fs');
function asyncFindTreeQstatQreaddir(basePath, cb) {
  return Q.nfcall(FS.stat, basePath)
  .then(function(stat) {
    if (stat.isDirectory()) {
      return Q.nfcall(FS.readdir, basePath)
      .then(function(children) {
        children = children.map(function(child) { return path.join(basePath, child); });
        var count = children.length;
        var paths_ = [[basePath]];
        function checkCompletion() {
          if (count == 0) {
            var result = Array.prototype.concat.apply([], paths_);
            cb(null, result);
          }
        }
        checkCompletion(); // <-- for the case of children.length == 0
        children.forEach(function(child) {
          asyncFindTreeQstatQreaddir(child, function(err, paths) {
            --count;
            paths_.push(paths);
            checkCompletion();
          });
        });
      });
    } else {
      cb(null, [basePath]);
    }
  });
}
asyncFindTreeQstatQreaddir('stuff', function(error, paths) {
  console.log('count =', paths.length);
});


var Q = require('q');
var path = require('path');
var FS = require('fs');
function asyncFindTreeQ(basePath) {
  return Q.nfcall(FS.stat, basePath)
  .then(function(stat) {
    if (stat.isDirectory()) {
      return Q.nfcall(FS.readdir, basePath)
      .then(function(children) {
        children = children.map(function(child) { return path.join(basePath, child); });
        return Q.all(children.map(function(child) {
          return asyncFindTreeQ(child)
        }))
        .then(function(paths_) {
          return Array.prototype.concat.apply([basePath], paths_);
        });
      });
    } else {
      return [basePath];
    }
  });
}
asyncFindTreeQ('stuff')
.then(function(paths) {
  console.log('count =', paths.length);
});

time find -L stuff | xargs stat --format '%Y :%y %n' | wc --lines

The text was updated successfully, but these errors were encountered:

kriskowal · 2013-08-12T23:11:00Z

listTree has a conceptual problem since it produces a promise for an array. My hope is to replace that with a promise for a Q stream in a future release—perhaps under a different name to retain backward compatibility—or a promise for completion and just use the guard emitter for streaming purposes.

dhbaird · 2013-08-12T23:33:09Z

That sounds pretty cool @ Q streams. In a particular case that I am working on, I need stat results on all files to check their mtimes, and so it is no problem if I get the entire array all at once; (The only problem is that after I listTree(), then I have to run stat() on all the paths. It sounds like Q streams or the guard emitter might help with that... but that's a separate issue from this.)

Edit: I should say: streaming isn't a problem yet here for me, and probably won't be in this application. My directory trees and associated data aren't big enough to eat up all the RAM - they can happily be all resident in memory for now. But I can see benefit to streaming though otherwise.

kriskowal · 2013-08-12T23:36:55Z

The guard function can already do what you need, I think.

FS.listTree(path, function (name, stat) {
    // return true to include
    // return false to exclude
    // return null to exclude and skip subtree
}))
.then(function () {
})

dhbaird · 2013-08-12T23:45:11Z

That's a cool trick with guard. Looks like this would do the trick of snarfing the stats:

var stats = [];
FS.listTree(path, function(name, stat) {
  stats.push([name, stat])
  return true; // edit: don't forget, this is a guard function afterall
})
.then(function() {
  return stats;
})
.then(function(stats) {
  // awesome, got the stats I need!
});

But this doesn't address the speed gap of using Q.

The issue here is how long it takes listTree() to finish. The fact that it collects a list isn't really a problem. Using Q on stat and readdir inside of listTree is the primary cause of the slow down. I can rewrite listTree() to internally not use promises, and then it works like a little speed demon. Obviously, then it loses the clarity that promises provide which is undesirable. So, I'm wondering if there is potential to identify a specific bottleneck that for some reason causes Q-code to be slower than Node.js callback-style code (and by a large factor: 10x).

kriskowal · 2013-08-12T23:47:06Z

@dhbaird I presume that the problem is recursive array aggregation. It might be faster to pass a single array forward and push results onto it instead of concatenating results from all. Of course, listTree may lose some of the clarity in the process.

dhbaird · 2013-08-12T23:57:41Z

I thought recursive array aggregation might have been a problem too, but take a look at asyncFindTree(). It uses recursive array aggregation (see its nested checkCompletion() function). And it's fast.

When you get a moment, could you take a look at the following: asyncFindTree() and asyncFindTreeQstat(). The difference in code is very small: a single Q.nfcall() is added, and that results in a substantial performance deviation. The run time goes from 3 seconds to 18 seconds.

Edit: Here's the diff (to make it easier to compare the two):

--- a.js        2013-08-12 17:58:04.111106000 -0600
+++ b.js        2013-08-12 17:58:07.262917000 -0600
@@ -1,7 +1,9 @@
+var Q = require('q');
 var path = require('path');
 var FS = require('fs');
-function asyncFindTree(basePath, cb) {
-  FS.stat(basePath, function(error, stat) {
+function asyncFindTreeQstat(basePath, cb) {
+  return Q.nfcall(FS.stat, basePath)
+  .then(function(stat) {
     if (stat.isDirectory()) {
       FS.readdir(basePath, function(error, children) {
         children = children.map(function(child) { return path.join(basePath, child); });

dhbaird · 2013-08-13T00:00:54Z

(Note - I edited previous comment to include diff)

kriskowal · 2013-08-13T20:06:13Z

@dhbaird That is very curious. nfcall does do some extra work in this case, coercing FS.stat to a promise for FS.stat, manipulating the arguments, sending the apply message to that promise, but I would not expect it to dominate the performance like that. You might try using Q.nfbind instead of Q.nfcall to cache the node-ified function in the parent scope to see whether that makes a difference.

dhbaird · 2013-08-13T21:02:57Z

@kriskowal something like this?

var stat2 = Q.nfbind(FS.stat); // aka denodeify
function asyncFindTreeQstat(basePath, cb) {
  return stat2(basePath)
  .then(function(stat) {
...

Good idea, but performance remains about the same.

kriskowal · 2013-08-13T21:17:56Z

Weirder and weirder.

I am going to have to let this issue idle for a while, but if you send a PR that helps the issue without impacting the API, I can get a patch out quickly. If it requires an API change, I can entertain a proposal as well.

dhbaird · 2013-08-13T21:22:59Z

Sounds good to me. I just wanted to get the word out, and I'm happy if you want to close this and re-open it later. I'll keep looking into it.

kriskowal · 2014-05-29T16:14:34Z

I will probably introduce listStream() and listTreeStream() in the v2 branch.

dhbaird assigned kriskowal May 29, 2014

hthetiot assigned hthetiot and unassigned kriskowal Jul 20, 2018

hthetiot modified the milestones: 1.13, Future Jul 20, 2018

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

listTree performance #45

listTree performance #45

dhbaird commented Aug 12, 2013

kriskowal commented Aug 12, 2013

dhbaird commented Aug 12, 2013

kriskowal commented Aug 12, 2013

dhbaird commented Aug 12, 2013

kriskowal commented Aug 12, 2013

dhbaird commented Aug 12, 2013

dhbaird commented Aug 13, 2013

kriskowal commented Aug 13, 2013

dhbaird commented Aug 13, 2013

kriskowal commented Aug 13, 2013

dhbaird commented Aug 13, 2013

kriskowal commented May 29, 2014

listTree performance #45

listTree performance #45

Comments

dhbaird commented Aug 12, 2013

kriskowal commented Aug 12, 2013

dhbaird commented Aug 12, 2013

kriskowal commented Aug 12, 2013

dhbaird commented Aug 12, 2013

kriskowal commented Aug 12, 2013

dhbaird commented Aug 12, 2013

dhbaird commented Aug 13, 2013

kriskowal commented Aug 13, 2013

dhbaird commented Aug 13, 2013

kriskowal commented Aug 13, 2013

dhbaird commented Aug 13, 2013

kriskowal commented May 29, 2014