mirror of https://github.com/lukechilds/node.git
Browse Source
Buffer.byteLength is important for speed because it is called whenever a new Buffer is created from a string. This commit optimizes Buffer.byteLength execution by: - moving base64 length calculation into JS-land, which is now much faster - remove redundant code and streamline the UTF8 length calculation It also adds a benchmark and better tests. PR-URL: https://github.com/nodejs/io.js/pull/1713 Reviewed-By: Trevor Norris <trev.norris@gmail.com> Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>v2.3.1-release
Brendan Ashworth
10 years ago
5 changed files with 155 additions and 41 deletions
@ -0,0 +1,55 @@ |
|||
var common = require('../common'); |
|||
|
|||
var bench = common.createBenchmark(main, { |
|||
encoding: ['utf8', 'base64'], |
|||
len: [1, 2, 4, 16, 64, 256], // x16
|
|||
n: [5e6] |
|||
}); |
|||
|
|||
// 16 chars each
|
|||
var chars = [ |
|||
'hello brendan!!!', // 1 byte
|
|||
'ΰαβγδεζηθικλμνξο', // 2 bytes
|
|||
'挰挱挲挳挴挵挶挷挸挹挺挻挼挽挾挿', // 3 bytes
|
|||
'𠜎𠜱𠝹𠱓𠱸𠲖𠳏𠳕𠴕𠵼𠵿𠸎𠸏𠹷𠺝𠺢' // 4 bytes
|
|||
]; |
|||
|
|||
function main(conf) { |
|||
var n = conf.n | 0; |
|||
var len = conf.len | 0; |
|||
var encoding = conf.encoding; |
|||
|
|||
var strings = []; |
|||
for (var string of chars) { |
|||
// Strings must be built differently, depending on encoding
|
|||
var data = buildString(string, len); |
|||
if (encoding === 'utf8') { |
|||
strings.push(data); |
|||
} else if (encoding === 'base64') { |
|||
// Base64 strings will be much longer than their UTF8 counterparts
|
|||
strings.push(new Buffer(data, 'utf8').toString('base64')); |
|||
} |
|||
} |
|||
|
|||
// Check the result to ensure it is *properly* optimized
|
|||
var results = strings.map(function(val) { |
|||
return Buffer.byteLength(val, encoding); |
|||
}); |
|||
|
|||
bench.start(); |
|||
for (var i = 0; i < n; i++) { |
|||
var index = n % strings.length; |
|||
// Go!
|
|||
var r = Buffer.byteLength(strings[index], encoding); |
|||
|
|||
if (r !== results[index]) |
|||
throw Error('incorrect return value'); |
|||
} |
|||
bench.end(n); |
|||
} |
|||
|
|||
function buildString(str, times) { |
|||
if (times == 1) return str; |
|||
|
|||
return str + buildString(str, times - 1); |
|||
} |
@ -0,0 +1,46 @@ |
|||
'use strict'; |
|||
|
|||
var common = require('../common'); |
|||
var assert = require('assert'); |
|||
var Buffer = require('buffer').Buffer; |
|||
|
|||
// coerce values to string
|
|||
assert.equal(Buffer.byteLength(32, 'raw'), 2); |
|||
assert.equal(Buffer.byteLength(NaN, 'utf8'), 3); |
|||
assert.equal(Buffer.byteLength({}, 'raws'), 15); |
|||
assert.equal(Buffer.byteLength(), 9); |
|||
|
|||
// special case: zero length string
|
|||
assert.equal(Buffer.byteLength('', 'ascii'), 0); |
|||
assert.equal(Buffer.byteLength('', 'HeX'), 0); |
|||
|
|||
// utf8
|
|||
assert.equal(Buffer.byteLength('∑éllö wørl∂!', 'utf-8'), 19); |
|||
assert.equal(Buffer.byteLength('κλμνξο', 'utf8'), 12); |
|||
assert.equal(Buffer.byteLength('挵挶挷挸挹', 'utf-8'), 15); |
|||
assert.equal(Buffer.byteLength('𠝹𠱓𠱸', 'UTF8'), 12); |
|||
// without an encoding, utf8 should be assumed
|
|||
assert.equal(Buffer.byteLength('hey there'), 9); |
|||
assert.equal(Buffer.byteLength('𠱸挶νξ#xx :)'), 17); |
|||
assert.equal(Buffer.byteLength('hello world', ''), 11); |
|||
// it should also be assumed with unrecognized encoding
|
|||
assert.equal(Buffer.byteLength('hello world', 'abc'), 11); |
|||
assert.equal(Buffer.byteLength('ßœ∑≈', 'unkn0wn enc0ding'), 10); |
|||
|
|||
// base64
|
|||
assert.equal(Buffer.byteLength('aGVsbG8gd29ybGQ=', 'base64'), 11); |
|||
assert.equal(Buffer.byteLength('bm9kZS5qcyByb2NrcyE=', 'base64'), 14); |
|||
assert.equal(Buffer.byteLength('aGkk', 'base64'), 3); |
|||
assert.equal(Buffer.byteLength('bHNrZGZsa3NqZmtsc2xrZmFqc2RsZmtqcw==', |
|||
'base64'), 25); |
|||
// special padding
|
|||
assert.equal(Buffer.byteLength('aaa=', 'base64'), 2); |
|||
assert.equal(Buffer.byteLength('aaaa==', 'base64'), 3); |
|||
|
|||
assert.equal(Buffer.byteLength('Il était tué'), 14); |
|||
assert.equal(Buffer.byteLength('Il était tué', 'utf8'), 14); |
|||
assert.equal(Buffer.byteLength('Il était tué', 'ascii'), 12); |
|||
assert.equal(Buffer.byteLength('Il était tué', 'binary'), 12); |
|||
['ucs2', 'ucs-2', 'utf16le', 'utf-16le'].forEach(function(encoding) { |
|||
assert.equal(24, Buffer.byteLength('Il était tué', encoding)); |
|||
}); |
Loading…
Reference in new issue