Browse Source

querystring: improve parse() performance

PR-URL: https://github.com/nodejs/node/pull/10874
Reviewed-By: James M Snell <jasnell@gmail.com>
v6
Brian White 8 years ago
parent
commit
2298bc4b1f
No known key found for this signature in database GPG Key ID: 606D7358F94DA209
  1. 33
      benchmark/querystring/querystring-parse.js
  2. 104
      lib/querystring.js

33
benchmark/querystring/querystring-parse.js

@ -3,35 +3,26 @@ var common = require('../common.js');
var querystring = require('querystring');
var v8 = require('v8');
var types = [
'noencode',
'multicharsep',
'encodemany',
'encodelast',
'multivalue',
'multivaluemany',
'manypairs'
];
var inputs = {
noencode: 'foo=bar&baz=quux&xyzzy=thud',
multicharsep: 'foo=bar&&&&&&&&&&baz=quux&&&&&&&&&&xyzzy=thud',
encodefake: 'foo=%©ar&baz=%A©uux&xyzzy=%©ud',
encodemany: '%66%6F%6F=bar&%62%61%7A=quux&xyzzy=%74h%75d',
encodelast: 'foo=bar&baz=quux&xyzzy=thu%64',
multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz',
multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' +
'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz',
manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z'
};
var bench = common.createBenchmark(main, {
type: types,
type: Object.keys(inputs),
n: [1e6],
});
function main(conf) {
var type = conf.type;
var n = conf.n | 0;
var inputs = {
noencode: 'foo=bar&baz=quux&xyzzy=thud',
multicharsep: 'foo=bar&&&&&&&&&&baz=quux&&&&&&&&&&xyzzy=thud',
encodemany: '%66%6F%6F=bar&%62%61%7A=quux&xyzzy=%74h%75d',
encodelast: 'foo=bar&baz=quux&xyzzy=thu%64',
multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz',
multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' +
'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz',
manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z'
};
var input = inputs[type];
// Force-optimize querystring.parse() so that the benchmark doesn't get

104
lib/querystring.js

@ -1,8 +1,19 @@
// Query String Utilities
'use strict';
const QueryString = exports;
const QueryString = module.exports = {
unescapeBuffer,
// `unescape()` is a JS global, so we need to use a different local name
unescape: qsUnescape,
// `escape()` is a JS global, so we need to use a different local name
escape: qsEscape,
stringify,
encode: stringify,
parse,
decode: parse
};
const Buffer = require('buffer').Buffer;
// This constructor is used to store parsed query string values. Instantiating
@ -13,7 +24,7 @@ ParsedQueryString.prototype = Object.create(null);
// a safe fast alternative to decodeURIComponent
QueryString.unescapeBuffer = function(s, decodeSpaces) {
function unescapeBuffer(s, decodeSpaces) {
var out = Buffer.allocUnsafe(s.length);
var state = 0;
var n, m, hexchar;
@ -77,7 +88,7 @@ QueryString.unescapeBuffer = function(s, decodeSpaces) {
// TODO support returning arbitrary buffers.
return out.slice(0, outIndex - 1);
};
}
function qsUnescape(s, decodeSpaces) {
@ -87,13 +98,12 @@ function qsUnescape(s, decodeSpaces) {
return QueryString.unescapeBuffer(s, decodeSpaces).toString();
}
}
QueryString.unescape = qsUnescape;
var hexTable = new Array(256);
for (var i = 0; i < 256; ++i)
hexTable[i] = '%' + ((i < 16 ? '0' : '') + i.toString(16)).toUpperCase();
QueryString.escape = function(str) {
function qsEscape(str) {
// replaces encodeURIComponent
// http://www.ecma-international.org/ecma-262/5.1/#sec-15.1.3.4
if (typeof str !== 'string') {
@ -164,9 +174,9 @@ QueryString.escape = function(str) {
if (lastPos < str.length)
return out + str.slice(lastPos);
return out;
};
}
var stringifyPrimitive = function(v) {
function stringifyPrimitive(v) {
if (typeof v === 'string')
return v;
if (typeof v === 'number' && isFinite(v))
@ -174,10 +184,10 @@ var stringifyPrimitive = function(v) {
if (typeof v === 'boolean')
return v ? 'true' : 'false';
return '';
};
}
QueryString.stringify = QueryString.encode = function(obj, sep, eq, options) {
function stringify(obj, sep, eq, options) {
sep = sep || '&';
eq = eq || '=';
@ -215,34 +225,43 @@ QueryString.stringify = QueryString.encode = function(obj, sep, eq, options) {
return fields;
}
return '';
};
}
// Parse a key/val string.
QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
sep = sep || '&';
eq = eq || '=';
function charCodes(str) {
if (str.length === 0) return [];
if (str.length === 1) return [str.charCodeAt(0)];
const ret = [];
for (var i = 0; i < str.length; ++i)
ret[ret.length] = str.charCodeAt(i);
return ret;
}
const defSepCodes = [38]; // &
const defEqCodes = [61]; // =
// Parse a key/val string.
function parse(qs, sep, eq, options) {
const obj = new ParsedQueryString();
if (typeof qs !== 'string' || qs.length === 0) {
return obj;
}
if (typeof sep !== 'string')
sep += '';
const eqLen = eq.length;
const sepLen = sep.length;
var sepCodes = (!sep ? defSepCodes : charCodes(sep + ''));
var eqCodes = (!eq ? defEqCodes : charCodes(eq + ''));
const sepLen = sepCodes.length;
const eqLen = eqCodes.length;
var maxKeys = 1000;
var pairs = 1000;
if (options && typeof options.maxKeys === 'number') {
maxKeys = options.maxKeys;
// -1 is used in place of a value like Infinity for meaning
// "unlimited pairs" because of additional checks V8 (at least as of v5.4)
// has to do when using variables that contain values like Infinity. Since
// `pairs` is always decremented and checked explicitly for 0, -1 works
// effectively the same as Infinity, while providing a significant
// performance boost.
pairs = (options.maxKeys > 0 ? options.maxKeys : -1);
}
var pairs = Infinity;
if (maxKeys > 0)
pairs = maxKeys;
var decode = QueryString.unescape;
if (options && typeof options.decodeURIComponent === 'function') {
decode = options.decodeURIComponent;
@ -262,7 +281,7 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
const code = qs.charCodeAt(i);
// Try matching key/value pair separator (e.g. '&')
if (code === sep.charCodeAt(sepIdx)) {
if (code === sepCodes[sepIdx]) {
if (++sepIdx === sepLen) {
// Key/value pair separator match!
const end = i - sepIdx + 1;
@ -284,10 +303,10 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
keys[keys.length] = key;
} else {
const curValue = obj[key];
// `instanceof Array` is used instead of Array.isArray() because it
// is ~15-20% faster with v8 4.7 and is safe to use because we are
// using it with values being created within this function
if (curValue instanceof Array)
// A simple Array-specific property check is enough here to
// distinguish from a string value and is faster and still safe since
// we are generating all of the values being assigned.
if (curValue.pop)
curValue[curValue.length] = value;
else
obj[key] = [curValue, value];
@ -322,7 +341,7 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
// Try matching key/value separator (e.g. '=') if we haven't already
if (eqIdx < eqLen) {
if (code === eq.charCodeAt(eqIdx)) {
if (code === eqCodes[eqIdx]) {
if (++eqIdx === eqLen) {
// Key/value separator match!
const end = i - eqIdx + 1;
@ -354,12 +373,12 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
if (code === 43/*+*/) {
if (eqIdx < eqLen) {
if (i - lastPos > 0)
if (lastPos < i)
key += qs.slice(lastPos, i);
key += '%20';
keyEncoded = true;
} else {
if (i - lastPos > 0)
if (lastPos < i)
value += qs.slice(lastPos, i);
value += '%20';
valEncoded = true;
@ -369,7 +388,7 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
}
// Check if we have leftover key or value data
if (pairs > 0 && (lastPos < qs.length || eqIdx > 0)) {
if (pairs !== 0 && (lastPos < qs.length || eqIdx > 0)) {
if (lastPos < qs.length) {
if (eqIdx < eqLen)
key += qs.slice(lastPos);
@ -387,10 +406,10 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
keys[keys.length] = key;
} else {
const curValue = obj[key];
// `instanceof Array` is used instead of Array.isArray() because it
// is ~15-20% faster with v8 4.7 and is safe to use because we are
// using it with values being created within this function
if (curValue instanceof Array)
// A simple Array-specific property check is enough here to
// distinguish from a string value and is faster and still safe since
// we are generating all of the values being assigned.
if (curValue.pop)
curValue[curValue.length] = value;
else
obj[key] = [curValue, value];
@ -398,11 +417,12 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
}
return obj;
};
}
// v8 does not optimize functions with try-catch blocks, so we isolate them here
// to minimize the damage
// to minimize the damage (Note: no longer true as of V8 5.4 -- but still will
// not be inlined).
function decodeStr(s, decoder) {
try {
return decoder(s);

Loading…
Cancel
Save