fs-walk.js/README.md

307 lines
7.2 KiB
Markdown
Raw Normal View History

2020-12-09 12:01:29 +00:00
# 2021 Update
2010-11-21 05:02:53 +00:00
2020-12-09 12:01:29 +00:00
Consider using [`@root/walk`](https://npmjs.org/package/@root/walk) instead.
I created `walk` quite literally a decade ago, in the Node v0.x days.
Back then using an EventEmitter seemed like the thing to do. Nowadays,
it seems a bit overkill for the simple task of walking over directories.
There's nothing wrong with `walk` - it's about the same as it was 10 years ago -
however, at only 50 lines of code long, `@root/walk` is much simpler and much faster.
# node-walk
| a [Root](https://rootprojects.org) project
2018-03-26 02:15:58 +00:00
2011-02-04 07:35:56 +00:00
nodejs walk implementation.
This is somewhat of a port python's `os.walk`, but using Node.JS conventions.
2020-12-09 12:01:29 +00:00
- EventEmitter
- Asynchronous
- Chronological (optionally)
- Built-in flow-control
- includes Synchronous version (same API as Asynchronous)
As few file descriptors are opened at a time as possible.
This is particularly well suited for single hard disks which are not flash or solid state.
2020-12-09 12:01:29 +00:00
## Installation
2010-11-21 05:02:53 +00:00
2015-01-06 01:36:02 +00:00
```bash
npm install --save walk
```
2010-11-21 05:02:53 +00:00
2020-12-09 12:01:29 +00:00
# Getting Started
2015-01-06 01:36:02 +00:00
```javascript
2020-12-09 12:01:29 +00:00
'use strict';
2015-01-06 01:36:02 +00:00
2020-12-09 12:01:29 +00:00
var walk = require('walk');
var fs = require('fs');
var walker;
var options = {};
2015-01-06 01:36:02 +00:00
2020-12-09 12:01:29 +00:00
walker = walk.walk('/tmp', options);
2015-01-06 01:36:02 +00:00
2020-12-09 12:01:29 +00:00
walker.on('file', function (root, fileStats, next) {
fs.readFile(fileStats.name, function () {
// doStuff
2015-01-06 01:36:02 +00:00
next();
});
2020-12-09 12:01:29 +00:00
});
2015-01-06 01:36:02 +00:00
2020-12-09 12:01:29 +00:00
walker.on('errors', function (root, nodeStatsArray, next) {
next();
});
walker.on('end', function () {
console.log('all done');
});
2015-01-06 01:36:02 +00:00
```
2020-12-09 12:01:29 +00:00
## Common Events
2015-01-06 01:36:02 +00:00
All single event callbacks are in the form of `function (root, stat, next) {}`.
All multiple event callbacks callbacks are in the form of `function (root, stats, next) {}`, except **names** which is an array of strings.
All **error** event callbacks are in the form `function (root, stat/stats, next) {}`.
**`stat.error`** contains the error.
2020-12-09 12:01:29 +00:00
- `names`
- `directory`
- `directories`
- `file`
- `files`
- `end`
- `nodeError` (`stat` failed)
- `directoryError` (`stat` succedded, but `readdir` failed)
- `errors` (a collection of any errors encountered)
2015-01-06 01:36:02 +00:00
A typical `stat` event looks like this:
```javascript
{ dev: 16777223,
mode: 33188,
nlink: 1,
uid: 501,
gid: 20,
rdev: 0,
blksize: 4096,
ino: 49868100,
size: 5617,
blocks: 16,
atime: Mon Jan 05 2015 18:18:10 GMT-0700 (MST),
mtime: Thu Sep 25 2014 21:21:28 GMT-0600 (MDT),
ctime: Thu Sep 25 2014 21:21:28 GMT-0600 (MDT),
birthtime: Thu Sep 25 2014 21:21:28 GMT-0600 (MDT),
name: 'README.md',
type: 'file' }
```
2020-12-09 12:01:29 +00:00
# Advanced Example
2010-11-21 05:02:53 +00:00
2011-05-03 03:11:03 +00:00
Both Asynchronous and Synchronous versions are provided.
2013-06-23 06:09:08 +00:00
```javascript
2020-12-09 12:01:29 +00:00
'use strict';
2013-06-23 06:09:08 +00:00
2020-12-09 12:01:29 +00:00
var walk = require('walk');
var fs = require('fs');
var options;
var walker;
2013-06-23 06:09:08 +00:00
2020-12-09 12:01:29 +00:00
options = {
followLinks: false,
// directories with these keys will be skipped
filters: ['Temp', '_Temp'],
};
2013-06-23 06:09:08 +00:00
2020-12-09 12:01:29 +00:00
walker = walk.walk('/tmp', options);
2013-06-23 06:09:08 +00:00
2020-12-09 12:01:29 +00:00
// OR
// walker = walk.walkSync("/tmp", options);
2013-06-23 06:09:08 +00:00
2020-12-09 12:01:29 +00:00
walker.on('names', function (root, nodeNamesArray) {
nodeNamesArray.sort(function (a, b) {
if (a > b) return 1;
if (a < b) return -1;
return 0;
});
2020-12-09 12:01:29 +00:00
});
2013-06-23 06:09:08 +00:00
2020-12-09 12:01:29 +00:00
walker.on('directories', function (root, dirStatsArray, next) {
// dirStatsArray is an array of `stat` objects with the additional attributes
// * type
// * error
// * name
2018-03-26 02:15:58 +00:00
2020-12-09 12:01:29 +00:00
next();
});
2020-12-09 12:01:29 +00:00
walker.on('file', function (root, fileStats, next) {
fs.readFile(fileStats.name, function () {
// doStuff
2013-06-23 06:09:08 +00:00
next();
});
2020-12-09 12:01:29 +00:00
});
2013-06-23 06:09:08 +00:00
2020-12-09 12:01:29 +00:00
walker.on('errors', function (root, nodeStatsArray, next) {
next();
});
walker.on('end', function () {
console.log('all done');
});
```
2013-06-23 06:09:08 +00:00
### Sync
2013-09-02 22:38:53 +00:00
Note: You **can't use EventEmitter** if you want truly synchronous walker
2013-09-02 23:25:49 +00:00
(although it's synchronous under the hood, it appears not to be due to the use of `process.nextTick()`).
2013-09-02 22:38:53 +00:00
Instead **you must use `options.listeners`** for truly synchronous walker.
2013-09-02 23:25:49 +00:00
Although the sync version uses all of the `fs.readSync`, `fs.readdirSync`, and other sync methods,
I don't think I can prevent the `process.nextTick()` that `EventEmitter` calls.
```javascript
2013-06-23 06:09:08 +00:00
(function () {
2020-12-09 12:01:29 +00:00
'use strict';
2018-03-26 02:15:58 +00:00
var walk = require('walk');
var fs = require('fs');
var options;
var walker;
2013-06-23 06:09:08 +00:00
2013-09-02 22:38:53 +00:00
// To be truly synchronous in the emitter and maintain a compatible api,
// the listeners must be listed before the object is created
2013-06-23 06:09:08 +00:00
options = {
listeners: {
2013-06-23 06:09:08 +00:00
names: function (root, nodeNamesArray) {
2014-07-17 07:17:57 +00:00
nodeNamesArray.sort(function (a, b) {
2013-06-23 06:09:08 +00:00
if (a > b) return 1;
if (a < b) return -1;
return 0;
});
2020-12-09 12:01:29 +00:00
},
directories: function (root, dirStatsArray, next) {
2013-06-23 06:09:08 +00:00
// dirStatsArray is an array of `stat` objects with the additional attributes
// * type
// * error
// * name
2018-03-26 02:15:58 +00:00
next();
2020-12-09 12:01:29 +00:00
},
file: function (root, fileStats, next) {
2013-06-23 06:09:08 +00:00
fs.readFile(fileStats.name, function () {
// doStuff
next();
2013-06-23 06:09:08 +00:00
});
2020-12-09 12:01:29 +00:00
},
errors: function (root, nodeStatsArray, next) {
2013-06-23 06:09:08 +00:00
next();
2020-12-09 12:01:29 +00:00
},
},
2013-06-23 06:09:08 +00:00
};
2020-12-09 12:01:29 +00:00
walker = walk.walkSync('/tmp', options);
2020-12-09 12:01:29 +00:00
console.log('all done');
})();
```
2020-12-09 12:01:29 +00:00
# API
Emitted Values
2020-12-09 12:01:29 +00:00
- `on('XYZ', function(root, stats, next) {})`
2011-02-04 07:14:19 +00:00
2020-12-09 12:01:29 +00:00
- `root` - the containing the files to be inspected
- _stats[Array]_ - a single `stats` object or an array with some added attributes
- type - 'file', 'directory', etc
- error
- name - the name of the file, dir, etc
- next - no more files will be read until this is called
Single Events - fired immediately
2020-12-09 12:01:29 +00:00
- `end` - No files, dirs, etc left to inspect
2020-12-09 12:01:29 +00:00
- `directoryError` - Error when `fstat` succeeded, but reading path failed (Probably due to permissions).
- `nodeError` - Error `fstat` did not succeeded.
- `node` - a `stats` object for a node of any type
- `file` - includes links when `followLinks` is `true`
- `directory` - **NOTE** you could get a recursive loop if `followLinks` and a directory links to its parent
- `symbolicLink` - always empty when `followLinks` is `true`
- `blockDevice`
- `characterDevice`
- `FIFO`
- `socket`
Events with Array Arguments - fired after all files in the dir have been `stat`ed
2020-12-09 12:01:29 +00:00
- `names` - before any `stat` takes place. Useful for sorting and filtering.
- Note: the array is an array of `string`s, not `stat` objects
- Note: the `next` argument is a `noop`
2011-02-04 07:14:19 +00:00
2020-12-09 12:01:29 +00:00
- `errors` - errors encountered by `fs.stat` when reading ndes in a directory
- `nodes` - an array of `stats` of any type
- `files`
- `directories` - modification of this array - sorting, removing, etc - affects traversal
- `symbolicLinks`
- `blockDevices`
- `characterDevices`
- `FIFOs`
- `sockets`
2010-11-21 05:02:53 +00:00
2011-02-04 07:14:19 +00:00
**Warning** beware of infinite loops when `followLinks` is true (using `walk-recurse` varient).
2020-12-09 12:01:29 +00:00
# Comparisons
2010-11-21 05:02:53 +00:00
Tested on my `/System` containing 59,490 (+ self) directories (and lots of files).
The size of the text output was 6mb.
2010-11-21 05:02:53 +00:00
`find`:
2020-12-09 12:01:29 +00:00
time bash -c "find /System -type d | wc"
59491 97935 6262916
2010-11-21 05:02:53 +00:00
real 2m27.114s
user 0m1.193s
sys 0m14.859s
2010-11-21 05:02:53 +00:00
`find.js`:
Note that `find.js` omits the start directory
time bash -c "node examples/find.js /System -type d | wc"
59490 97934 6262908
2018-03-26 02:15:58 +00:00
# Test 1
real 2m52.273s
user 0m20.374s
sys 0m27.800s
2018-03-26 02:15:58 +00:00
# Test 2
real 2m23.725s
user 0m18.019s
sys 0m23.202s
# Test 3
real 2m50.077s
user 0m17.661s
sys 0m24.008s
2011-05-03 03:11:03 +00:00
In conclusion node.js asynchronous walk is much slower than regular "find".
2012-05-08 15:57:33 +00:00
2020-12-09 12:01:29 +00:00
# LICENSE
2012-05-08 15:57:33 +00:00
`node-walk` is available under the following licenses:
2020-12-09 12:01:29 +00:00
- MIT
- Apache 2
2012-05-08 15:57:33 +00:00
Copyright 2011 - Present AJ ONeal