Browse Source

fs: optimize realpath using uv_fs_realpath()

Remove realpath() and realpathSync() cache.
Use the native uv_fs_realpath() which is faster
then the JS implementation by a few orders of magnitude.

PR-URL: https://github.com/nodejs/node/pull/3594
Reviewed-By: Trevor Norris <trev.norris@gmail.com>
Reviewed-By: Brian White <mscdex@mscdex.net>
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Johan Bergström <bugs@bergstroem.nu>
process-exit-stdio-flushing
Yuval Brik 9 years ago
committed by Brian White
parent
commit
b488b19eaf
No known key found for this signature in database GPG Key ID: 606D7358F94DA209
  1. 36
      doc/api/fs.markdown
  2. 249
      lib/fs.js
  3. 13
      lib/module.js
  4. 52
      src/node_file.cc
  5. 99
      test/parallel/test-fs-realpath.js
  6. 14
      test/sequential/test-regress-GH-3542.js

36
doc/api/fs.markdown

@ -916,26 +916,20 @@ object with an `encoding` property specifying the character encoding to use for
the link path passed to the callback. If the `encoding` is set to `'buffer'`,
the link path returned will be passed as a `Buffer` object.
## fs.realpath(path[, cache], callback)
## fs.realpath(path[, options], callback)
* `path` {String | Buffer}
* `cache` {Object}
* `options` {String | Object}
* `encoding` {String} default = `'utf8'`
* `callback` {Function}
Asynchronous realpath(2). The `callback` gets two arguments `(err,
resolvedPath)`. May use `process.cwd` to resolve relative paths. `cache` is an
object literal of mapped paths that can be used to force a specific path
resolution or avoid additional `fs.stat` calls for known real paths.
Example:
resolvedPath)`. May use `process.cwd` to resolve relative paths.
```js
var cache = {'/etc':'/private/etc'};
fs.realpath('/etc/passwd', cache, (err, resolvedPath) => {
if (err) throw err;
console.log(resolvedPath);
});
```
The optional `options` argument can be a string specifying an encoding, or an
object with an `encoding` property specifying the character encoding to use for
the path passed to the callback. If the `encoding` is set to `'buffer'`,
the path returned will be passed as a `Buffer` object.
## fs.readSync(fd, buffer, offset, length, position)
@ -947,14 +941,18 @@ fs.realpath('/etc/passwd', cache, (err, resolvedPath) => {
Synchronous version of [`fs.read()`][]. Returns the number of `bytesRead`.
## fs.realpathSync(path[, cache])
## fs.realpathSync(path[, options])
* `path` {String | Buffer};
* `cache` {Object}
* `options` {String | Object}
* `encoding` {String} default = `'utf8'`
Synchronous realpath(2). Returns the resolved path. `cache` is an
object literal of mapped paths that can be used to force a specific path
resolution or avoid additional `fs.stat` calls for known real paths.
Synchronous realpath(2). Returns the resolved path.
The optional `options` argument can be a string specifying an encoding, or an
object with an `encoding` property specifying the character encoding to use for
the path passed to the callback. If the `encoding` is set to `'buffer'`,
the path returned will be passed as a `Buffer` object.
## fs.rename(oldPath, newPath, callback)

249
lib/fs.js

@ -1557,234 +1557,37 @@ fs.unwatchFile = function(filename, listener) {
}
};
// Regexp that finds the next partion of a (partial) path
// result is [base_with_slash, base], e.g. ['somedir/', 'somedir']
const nextPartRe = isWindows ?
/(.*?)(?:[\/\\]+|$)/g :
/(.*?)(?:[\/]+|$)/g;
// Regex to find the device root, including trailing slash. E.g. 'c:\\'.
const splitRootRe = isWindows ?
/^(?:[a-zA-Z]:|[\\\/]{2}[^\\\/]+[\\\/][^\\\/]+)?[\\\/]*/ :
/^[\/]*/;
fs.realpathSync = function realpathSync(p, cache) {
// make p is absolute
p = pathModule.resolve(p);
if (cache && Object.prototype.hasOwnProperty.call(cache, p)) {
return cache[p];
}
const original = p;
const seenLinks = {};
const knownHard = {};
// current character position in p
var pos;
// the partial path so far, including a trailing slash if any
var current;
// the partial path without a trailing slash (except when pointing at a root)
var base;
// the partial path scanned in the previous round, with slash
var previous;
start();
function start() {
// Skip over roots
var m = splitRootRe.exec(p);
pos = m[0].length;
current = m[0];
base = m[0];
previous = '';
// On windows, check that the root exists. On unix there is no need.
if (isWindows && !knownHard[base]) {
fs.lstatSync(base);
knownHard[base] = true;
}
}
// walk down the path, swapping out linked pathparts for their real
// values
// NB: p.length changes.
while (pos < p.length) {
// find the next part
nextPartRe.lastIndex = pos;
var result = nextPartRe.exec(p);
previous = current;
current += result[0];
base = previous + result[1];
pos = nextPartRe.lastIndex;
// continue if not a symlink
if (knownHard[base] || (cache && cache[base] === base)) {
continue;
}
var resolvedLink;
if (cache && Object.prototype.hasOwnProperty.call(cache, base)) {
// some known symbolic link. no need to stat again.
resolvedLink = cache[base];
} else {
var stat = fs.lstatSync(base);
if (!stat.isSymbolicLink()) {
knownHard[base] = true;
if (cache) cache[base] = base;
continue;
}
// read the link if it wasn't read before
// dev/ino always return 0 on windows, so skip the check.
var linkTarget = null;
if (!isWindows) {
var id = stat.dev.toString(32) + ':' + stat.ino.toString(32);
if (seenLinks.hasOwnProperty(id)) {
linkTarget = seenLinks[id];
}
}
if (linkTarget === null) {
fs.statSync(base);
linkTarget = fs.readlinkSync(base);
}
resolvedLink = pathModule.resolve(previous, linkTarget);
// track this, if given a cache.
if (cache) cache[base] = resolvedLink;
if (!isWindows) seenLinks[id] = linkTarget;
}
// resolve the link, then start over
p = pathModule.resolve(resolvedLink, p.slice(pos));
start();
}
if (cache) cache[original] = p;
return p;
fs.realpathSync = function realpathSync(path, options) {
if (!options)
options = {};
else if (typeof options === 'string')
options = {encoding: options};
else if (typeof options !== 'object')
throw new TypeError('"options" must be a string or an object');
nullCheck(path);
return binding.realpath(pathModule._makeLong(path), options.encoding);
};
fs.realpath = function realpath(p, cache, cb) {
if (typeof cb !== 'function') {
cb = maybeCallback(cache);
cache = null;
}
// make p is absolute
p = pathModule.resolve(p);
if (cache && Object.prototype.hasOwnProperty.call(cache, p)) {
return process.nextTick(cb.bind(null, null, cache[p]));
}
const original = p;
const seenLinks = {};
const knownHard = {};
// current character position in p
var pos;
// the partial path so far, including a trailing slash if any
var current;
// the partial path without a trailing slash (except when pointing at a root)
var base;
// the partial path scanned in the previous round, with slash
var previous;
start();
function start() {
// Skip over roots
var m = splitRootRe.exec(p);
pos = m[0].length;
current = m[0];
base = m[0];
previous = '';
// On windows, check that the root exists. On unix there is no need.
if (isWindows && !knownHard[base]) {
fs.lstat(base, function(err) {
if (err) return cb(err);
knownHard[base] = true;
LOOP();
});
} else {
process.nextTick(LOOP);
}
}
// walk down the path, swapping out linked pathparts for their real
// values
function LOOP() {
// stop if scanned past end of path
if (pos >= p.length) {
if (cache) cache[original] = p;
return cb(null, p);
}
// find the next part
nextPartRe.lastIndex = pos;
var result = nextPartRe.exec(p);
previous = current;
current += result[0];
base = previous + result[1];
pos = nextPartRe.lastIndex;
// continue if not a symlink
if (knownHard[base] || (cache && cache[base] === base)) {
return process.nextTick(LOOP);
}
if (cache && Object.prototype.hasOwnProperty.call(cache, base)) {
// known symbolic link. no need to stat again.
return gotResolvedLink(cache[base]);
}
return fs.lstat(base, gotStat);
}
function gotStat(err, stat) {
if (err) return cb(err);
// if not a symlink, skip to the next path part
if (!stat.isSymbolicLink()) {
knownHard[base] = true;
if (cache) cache[base] = base;
return process.nextTick(LOOP);
}
// stat & read the link if not read before
// call gotTarget as soon as the link target is known
// dev/ino always return 0 on windows, so skip the check.
if (!isWindows) {
var id = stat.dev.toString(32) + ':' + stat.ino.toString(32);
if (seenLinks.hasOwnProperty(id)) {
return gotTarget(null, seenLinks[id], base);
}
}
fs.stat(base, function(err) {
if (err) return cb(err);
fs.readlink(base, function(err, target) {
if (!isWindows) seenLinks[id] = target;
gotTarget(err, target);
});
});
}
function gotTarget(err, target, base) {
if (err) return cb(err);
var resolvedLink = pathModule.resolve(previous, target);
if (cache) cache[base] = resolvedLink;
gotResolvedLink(resolvedLink);
}
function gotResolvedLink(resolvedLink) {
// resolve the link, then start over
p = pathModule.resolve(resolvedLink, p.slice(pos));
start();
fs.realpath = function realpath(path, options, callback) {
if (!options) {
options = {};
} else if (typeof options === 'function') {
callback = options;
options = {};
} else if (typeof options === 'string') {
options = {encoding: options};
} else if (typeof options !== 'object') {
throw new TypeError('"options" must be a string or an object');
}
callback = makeCallback(callback);
if (!nullCheck(path, callback))
return;
var req = new FSReqWrap();
req.oncomplete = callback;
binding.realpath(pathModule._makeLong(path), options.encoding, req);
return;
};

13
lib/module.js

@ -108,19 +108,10 @@ function tryPackage(requestPath, exts) {
tryExtensions(path.resolve(filename, 'index'), exts);
}
// In order to minimize unnecessary lstat() calls,
// this cache is a list of known-real paths.
// Set to an empty object to reset.
Module._realpathCache = {};
// check if the file exists and is not a directory
function tryFile(requestPath) {
const rc = stat(requestPath);
return rc === 0 && toRealPath(requestPath);
}
function toRealPath(requestPath) {
return fs.realpathSync(requestPath, Module._realpathCache);
return rc === 0 && fs.realpathSync(requestPath);
}
// given a path check a the file exists with any of the set extensions
@ -163,7 +154,7 @@ Module._findPath = function(request, paths) {
if (!trailingSlash) {
const rc = stat(basePath);
if (rc === 0) { // File.
filename = toRealPath(basePath);
filename = fs.realpathSync(basePath);
} else if (rc === 1) { // Directory.
if (exts === undefined)
exts = Object.keys(Module._extensions);

52
src/node_file.cc

@ -241,6 +241,22 @@ static void After(uv_fs_t *req) {
}
break;
case UV_FS_REALPATH:
link = StringBytes::Encode(env->isolate(),
static_cast<const char*>(req->ptr),
req_wrap->encoding_);
if (link.IsEmpty()) {
argv[0] = UVException(env->isolate(),
UV_EINVAL,
req_wrap->syscall(),
"Invalid character encoding for link",
req->path,
req_wrap->data());
} else {
argv[1] = link;
}
break;
case UV_FS_READ:
// Buffer interface
argv[1] = Integer::New(env->isolate(), req->result);
@ -863,6 +879,41 @@ static void MKDir(const FunctionCallbackInfo<Value>& args) {
}
}
static void RealPath(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);
const int argc = args.Length();
if (argc < 1)
return TYPE_ERROR("path required");
BufferValue path(env->isolate(), args[0]);
ASSERT_PATH(path)
const enum encoding encoding = ParseEncoding(env->isolate(), args[1], UTF8);
Local<Value> callback = Null(env->isolate());
if (argc == 3)
callback = args[2];
if (callback->IsObject()) {
ASYNC_CALL(realpath, callback, encoding, *path);
} else {
SYNC_CALL(realpath, *path, *path);
const char* link_path = static_cast<const char*>(SYNC_REQ.ptr);
Local<Value> rc = StringBytes::Encode(env->isolate(),
link_path,
encoding);
if (rc.IsEmpty()) {
return env->ThrowUVException(UV_EINVAL,
"realpath",
"Invalid character encoding for path",
*path);
}
args.GetReturnValue().Set(rc);
}
}
static void ReadDir(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);
@ -1432,6 +1483,7 @@ void InitFs(Local<Object> target,
env->SetMethod(target, "writeBuffer", WriteBuffer);
env->SetMethod(target, "writeBuffers", WriteBuffers);
env->SetMethod(target, "writeString", WriteString);
env->SetMethod(target, "realpath", RealPath);
env->SetMethod(target, "chmod", Chmod);
env->SetMethod(target, "fchmod", FChmod);

99
test/parallel/test-fs-realpath.js

@ -10,9 +10,13 @@ var skipSymlinks = false;
common.refreshTmpDir();
var root = '/';
var assertEqualPath = assert.equal;
if (common.isWindows) {
// something like "C:\\"
root = process.cwd().substr(0, 3);
assertEqualPath = function(path_left, path_right, message) {
assert.equal(path_left.toLowerCase(), path_right.toLowerCase(), message);
};
// On Windows, creating symlinks requires admin privileges.
// We'll only try to run symlink test if we have enough privileges.
@ -96,9 +100,9 @@ function test_simple_relative_symlink(callback) {
unlink.push(t[0]);
});
var result = fs.realpathSync(entry);
assert.equal(result, path.resolve(expected));
assertEqualPath(result, path.resolve(expected));
asynctest(fs.realpath, [entry], callback, function(err, result) {
assert.equal(result, path.resolve(expected));
assertEqualPath(result, path.resolve(expected));
});
}
@ -122,9 +126,9 @@ function test_simple_absolute_symlink(callback) {
unlink.push(t[0]);
});
var result = fs.realpathSync(entry);
assert.equal(result, path.resolve(expected));
assertEqualPath(result, path.resolve(expected));
asynctest(fs.realpath, [entry], callback, function(err, result) {
assert.equal(result, path.resolve(expected));
assertEqualPath(result, path.resolve(expected));
});
}
@ -151,9 +155,9 @@ function test_deep_relative_file_symlink(callback) {
unlink.push(linkPath1);
unlink.push(entry);
assert.equal(fs.realpathSync(entry), path.resolve(expected));
assertEqualPath(fs.realpathSync(entry), path.resolve(expected));
asynctest(fs.realpath, [entry], callback, function(err, result) {
assert.equal(result, path.resolve(expected));
assertEqualPath(result, path.resolve(expected));
});
}
@ -178,10 +182,10 @@ function test_deep_relative_dir_symlink(callback) {
unlink.push(linkPath1b);
unlink.push(entry);
assert.equal(fs.realpathSync(entry), path.resolve(expected));
assertEqualPath(fs.realpathSync(entry), path.resolve(expected));
asynctest(fs.realpath, [entry], callback, function(err, result) {
assert.equal(result, path.resolve(expected));
assertEqualPath(result, path.resolve(expected));
});
}
@ -223,9 +227,9 @@ function test_cyclic_link_overprotection(callback) {
try {fs.unlinkSync(link);} catch (ex) {}
fs.symlinkSync(cycles, link, 'dir');
unlink.push(link);
assert.equal(fs.realpathSync(testPath), path.resolve(expected));
assertEqualPath(fs.realpathSync(testPath), path.resolve(expected));
asynctest(fs.realpath, [testPath], callback, function(er, res) {
assert.equal(res, path.resolve(expected));
assertEqualPath(res, path.resolve(expected));
});
}
@ -258,10 +262,10 @@ function test_relative_input_cwd(callback) {
var origcwd = process.cwd();
process.chdir(entrydir);
assert.equal(fs.realpathSync(entry), path.resolve(expected));
assertEqualPath(fs.realpathSync(entry), path.resolve(expected));
asynctest(fs.realpath, [entry], callback, function(err, result) {
process.chdir(origcwd);
assert.equal(result, path.resolve(expected));
assertEqualPath(result, path.resolve(expected));
return true;
});
}
@ -311,9 +315,9 @@ function test_deep_symlink_mix(callback) {
unlink.push(tmp('node-test-realpath-d2'));
}
var expected = tmpAbsDir + '/cycles/root.js';
assert.equal(fs.realpathSync(entry), path.resolve(expected));
assertEqualPath(fs.realpathSync(entry), path.resolve(expected));
asynctest(fs.realpath, [entry], callback, function(err, result) {
assert.equal(result, path.resolve(expected));
assertEqualPath(result, path.resolve(expected));
return true;
});
}
@ -325,10 +329,10 @@ function test_non_symlinks(callback) {
var expected = tmpAbsDir + '/cycles/root.js';
var origcwd = process.cwd();
process.chdir(entrydir);
assert.equal(fs.realpathSync(entry), path.resolve(expected));
assertEqualPath(fs.realpathSync(entry), path.resolve(expected));
asynctest(fs.realpath, [entry], callback, function(err, result) {
process.chdir(origcwd);
assert.equal(result, path.resolve(expected));
assertEqualPath(result, path.resolve(expected));
return true;
});
}
@ -337,13 +341,13 @@ var upone = path.join(process.cwd(), '..');
function test_escape_cwd(cb) {
console.log('test_escape_cwd');
asynctest(fs.realpath, ['..'], cb, function(er, uponeActual) {
assert.equal(upone, uponeActual,
assertEqualPath(upone, uponeActual,
'realpath("..") expected: ' + path.resolve(upone) +
' actual:' + uponeActual);
});
}
var uponeActual = fs.realpathSync('..');
assert.equal(upone, uponeActual,
assertEqualPath(upone, uponeActual,
'realpathSync("..") expected: ' + path.resolve(upone) +
' actual:' + uponeActual);
@ -385,14 +389,14 @@ function test_up_multiple(cb) {
var abedabeda = tmp('abedabeda'.split('').join('/'));
var abedabeda_real = tmp('a');
assert.equal(fs.realpathSync(abedabeda), abedabeda_real);
assert.equal(fs.realpathSync(abedabed), abedabed_real);
assertEqualPath(fs.realpathSync(abedabeda), abedabeda_real);
assertEqualPath(fs.realpathSync(abedabed), abedabed_real);
fs.realpath(abedabeda, function(er, real) {
if (er) throw er;
assert.equal(abedabeda_real, real);
assertEqualPath(abedabeda_real, real);
fs.realpath(abedabed, function(er, real) {
if (er) throw er;
assert.equal(abedabed_real, real);
assertEqualPath(abedabed_real, real);
cb();
cleanup();
});
@ -450,56 +454,14 @@ function test_abs_with_kids(cb) {
var expectPath = root + '/a/b/c/x.txt';
var actual = fs.realpathSync(linkPath);
// console.log({link:linkPath,expect:expectPath,actual:actual},'sync');
assert.equal(actual, path.resolve(expectPath));
assertEqualPath(actual, path.resolve(expectPath));
asynctest(fs.realpath, [linkPath], cb, function(er, actual) {
// console.log({link:linkPath,expect:expectPath,actual:actual},'async');
assert.equal(actual, path.resolve(expectPath));
assertEqualPath(actual, path.resolve(expectPath));
cleanup();
});
}
function test_lying_cache_liar(cb) {
var n = 2;
// this should not require *any* stat calls, since everything
// checked by realpath will be found in the cache.
console.log('test_lying_cache_liar');
var cache = { '/foo/bar/baz/bluff' : '/foo/bar/bluff',
'/1/2/3/4/5/6/7' : '/1',
'/a' : '/a',
'/a/b' : '/a/b',
'/a/b/c' : '/a/b',
'/a/b/d' : '/a/b/d' };
if (common.isWindows) {
var wc = {};
Object.keys(cache).forEach(function(k) {
wc[ path.resolve(k) ] = path.resolve(cache[k]);
});
cache = wc;
}
var bluff = path.resolve('/foo/bar/baz/bluff');
var rps = fs.realpathSync(bluff, cache);
assert.equal(cache[bluff], rps);
var nums = path.resolve('/1/2/3/4/5/6/7');
var called = false; // no sync cb calling!
fs.realpath(nums, cache, function(er, rp) {
called = true;
assert.equal(cache[nums], rp);
if (--n === 0) cb();
});
assert(called === false);
const test = path.resolve('/a/b/c/d');
const expect = path.resolve('/a/b/d');
var actual = fs.realpathSync(test, cache);
assert.equal(expect, actual);
fs.realpath(test, cache, function(er, actual) {
assert.equal(expect, actual);
if (--n === 0) cb();
});
}
// ----------------------------------------------------------------------------
var tests = [
@ -515,7 +477,6 @@ var tests = [
test_non_symlinks,
test_escape_cwd,
test_abs_with_kids,
test_lying_cache_liar,
test_up_multiple
];
var numtests = tests.length;
@ -532,10 +493,10 @@ function runNextTest(err) {
}
assert.equal(root, fs.realpathSync('/'));
assertEqualPath(root, fs.realpathSync('/'));
fs.realpath('/', function(err, result) {
assert.equal(null, err);
assert.equal(root, result);
assertEqualPath(root, result);
});

14
test/sequential/test-regress-GH-3542.js

@ -13,21 +13,21 @@ if (!common.isWindows) {
function test(p) {
var result = fs.realpathSync(p);
assert.strictEqual(result, path.resolve(p));
assert.strictEqual(result.toLowerCase(), path.resolve(p).toLowerCase());
fs.realpath(p, function(err, result) {
assert.ok(!err);
assert.strictEqual(result, path.resolve(p));
assert.strictEqual(result.toLowerCase(), path.resolve(p).toLowerCase());
succeeded++;
});
}
test('//localhost/c$/windows/system32');
test('//localhost/c$/windows');
test('//localhost/c$/Windows/System32');
test('//localhost/c$/Windows');
test('//localhost/c$/');
test('\\\\localhost\\c$');
test('c:\\');
test('c:');
test('\\\\localhost\\c$\\');
test('C:\\');
test('C:');
test(process.env.windir);
process.on('exit', function() {

Loading…
Cancel
Save