Browse Source

querystring: improve parse() performance

PR-URL: https://github.com/nodejs/node/pull/10874
Reviewed-By: James M Snell <jasnell@gmail.com>
v6.x
Brian White 8 years ago
committed by Myles Borins
parent
commit
23f3f20963
No known key found for this signature in database GPG Key ID: 933B01F40B5CA946
  1. 29
      benchmark/querystring/querystring-parse.js
  2. 104
      lib/querystring.js

29
benchmark/querystring/querystring-parse.js

@ -3,28 +3,10 @@ var common = require('../common.js');
var querystring = require('querystring'); var querystring = require('querystring');
var v8 = require('v8'); var v8 = require('v8');
var types = [
'noencode',
'multicharsep',
'encodemany',
'encodelast',
'multivalue',
'multivaluemany',
'manypairs'
];
var bench = common.createBenchmark(main, {
type: types,
n: [1e6],
});
function main(conf) {
var type = conf.type;
var n = conf.n | 0;
var inputs = { var inputs = {
noencode: 'foo=bar&baz=quux&xyzzy=thud', noencode: 'foo=bar&baz=quux&xyzzy=thud',
multicharsep: 'foo=bar&&&&&&&&&&baz=quux&&&&&&&&&&xyzzy=thud', multicharsep: 'foo=bar&&&&&&&&&&baz=quux&&&&&&&&&&xyzzy=thud',
encodefake: 'foo=%©ar&baz=%A©uux&xyzzy=%©ud',
encodemany: '%66%6F%6F=bar&%62%61%7A=quux&xyzzy=%74h%75d', encodemany: '%66%6F%6F=bar&%62%61%7A=quux&xyzzy=%74h%75d',
encodelast: 'foo=bar&baz=quux&xyzzy=thu%64', encodelast: 'foo=bar&baz=quux&xyzzy=thu%64',
multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz', multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz',
@ -32,6 +14,15 @@ function main(conf) {
'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz', 'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz',
manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z' manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z'
}; };
var bench = common.createBenchmark(main, {
type: Object.keys(inputs),
n: [1e6],
});
function main(conf) {
var type = conf.type;
var n = conf.n | 0;
var input = inputs[type]; var input = inputs[type];
// Force-optimize querystring.parse() so that the benchmark doesn't get // Force-optimize querystring.parse() so that the benchmark doesn't get

104
lib/querystring.js

@ -1,8 +1,19 @@
// Query String Utilities
'use strict'; 'use strict';
const QueryString = exports; const QueryString = module.exports = {
unescapeBuffer,
// `unescape()` is a JS global, so we need to use a different local name
unescape: qsUnescape,
// `escape()` is a JS global, so we need to use a different local name
escape: qsEscape,
stringify,
encode: stringify,
parse,
decode: parse
};
const Buffer = require('buffer').Buffer; const Buffer = require('buffer').Buffer;
// This constructor is used to store parsed query string values. Instantiating // This constructor is used to store parsed query string values. Instantiating
@ -13,7 +24,7 @@ ParsedQueryString.prototype = Object.create(null);
// a safe fast alternative to decodeURIComponent // a safe fast alternative to decodeURIComponent
QueryString.unescapeBuffer = function(s, decodeSpaces) { function unescapeBuffer(s, decodeSpaces) {
var out = Buffer.allocUnsafe(s.length); var out = Buffer.allocUnsafe(s.length);
var state = 0; var state = 0;
var n, m, hexchar; var n, m, hexchar;
@ -77,7 +88,7 @@ QueryString.unescapeBuffer = function(s, decodeSpaces) {
// TODO support returning arbitrary buffers. // TODO support returning arbitrary buffers.
return out.slice(0, outIndex - 1); return out.slice(0, outIndex - 1);
}; }
function qsUnescape(s, decodeSpaces) { function qsUnescape(s, decodeSpaces) {
@ -87,13 +98,12 @@ function qsUnescape(s, decodeSpaces) {
return QueryString.unescapeBuffer(s, decodeSpaces).toString(); return QueryString.unescapeBuffer(s, decodeSpaces).toString();
} }
} }
QueryString.unescape = qsUnescape;
var hexTable = new Array(256); var hexTable = new Array(256);
for (var i = 0; i < 256; ++i) for (var i = 0; i < 256; ++i)
hexTable[i] = '%' + ((i < 16 ? '0' : '') + i.toString(16)).toUpperCase(); hexTable[i] = '%' + ((i < 16 ? '0' : '') + i.toString(16)).toUpperCase();
QueryString.escape = function(str) { function qsEscape(str) {
// replaces encodeURIComponent // replaces encodeURIComponent
// http://www.ecma-international.org/ecma-262/5.1/#sec-15.1.3.4 // http://www.ecma-international.org/ecma-262/5.1/#sec-15.1.3.4
if (typeof str !== 'string') { if (typeof str !== 'string') {
@ -164,9 +174,9 @@ QueryString.escape = function(str) {
if (lastPos < str.length) if (lastPos < str.length)
return out + str.slice(lastPos); return out + str.slice(lastPos);
return out; return out;
}; }
var stringifyPrimitive = function(v) { function stringifyPrimitive(v) {
if (typeof v === 'string') if (typeof v === 'string')
return v; return v;
if (typeof v === 'number' && isFinite(v)) if (typeof v === 'number' && isFinite(v))
@ -174,10 +184,10 @@ var stringifyPrimitive = function(v) {
if (typeof v === 'boolean') if (typeof v === 'boolean')
return v ? 'true' : 'false'; return v ? 'true' : 'false';
return ''; return '';
}; }
QueryString.stringify = QueryString.encode = function(obj, sep, eq, options) { function stringify(obj, sep, eq, options) {
sep = sep || '&'; sep = sep || '&';
eq = eq || '='; eq = eq || '=';
@ -215,34 +225,43 @@ QueryString.stringify = QueryString.encode = function(obj, sep, eq, options) {
return fields; return fields;
} }
return ''; return '';
}; }
// Parse a key/val string. function charCodes(str) {
QueryString.parse = QueryString.decode = function(qs, sep, eq, options) { if (str.length === 0) return [];
sep = sep || '&'; if (str.length === 1) return [str.charCodeAt(0)];
eq = eq || '='; const ret = [];
for (var i = 0; i < str.length; ++i)
ret[ret.length] = str.charCodeAt(i);
return ret;
}
const defSepCodes = [38]; // &
const defEqCodes = [61]; // =
// Parse a key/val string.
function parse(qs, sep, eq, options) {
const obj = new ParsedQueryString(); const obj = new ParsedQueryString();
if (typeof qs !== 'string' || qs.length === 0) { if (typeof qs !== 'string' || qs.length === 0) {
return obj; return obj;
} }
if (typeof sep !== 'string') var sepCodes = (!sep ? defSepCodes : charCodes(sep + ''));
sep += ''; var eqCodes = (!eq ? defEqCodes : charCodes(eq + ''));
const sepLen = sepCodes.length;
const eqLen = eqCodes.length;
const eqLen = eq.length; var pairs = 1000;
const sepLen = sep.length;
var maxKeys = 1000;
if (options && typeof options.maxKeys === 'number') { if (options && typeof options.maxKeys === 'number') {
maxKeys = options.maxKeys; // -1 is used in place of a value like Infinity for meaning
// "unlimited pairs" because of additional checks V8 (at least as of v5.4)
// has to do when using variables that contain values like Infinity. Since
// `pairs` is always decremented and checked explicitly for 0, -1 works
// effectively the same as Infinity, while providing a significant
// performance boost.
pairs = (options.maxKeys > 0 ? options.maxKeys : -1);
} }
var pairs = Infinity;
if (maxKeys > 0)
pairs = maxKeys;
var decode = QueryString.unescape; var decode = QueryString.unescape;
if (options && typeof options.decodeURIComponent === 'function') { if (options && typeof options.decodeURIComponent === 'function') {
decode = options.decodeURIComponent; decode = options.decodeURIComponent;
@ -262,7 +281,7 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
const code = qs.charCodeAt(i); const code = qs.charCodeAt(i);
// Try matching key/value pair separator (e.g. '&') // Try matching key/value pair separator (e.g. '&')
if (code === sep.charCodeAt(sepIdx)) { if (code === sepCodes[sepIdx]) {
if (++sepIdx === sepLen) { if (++sepIdx === sepLen) {
// Key/value pair separator match! // Key/value pair separator match!
const end = i - sepIdx + 1; const end = i - sepIdx + 1;
@ -284,10 +303,10 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
keys[keys.length] = key; keys[keys.length] = key;
} else { } else {
const curValue = obj[key]; const curValue = obj[key];
// `instanceof Array` is used instead of Array.isArray() because it // A simple Array-specific property check is enough here to
// is ~15-20% faster with v8 4.7 and is safe to use because we are // distinguish from a string value and is faster and still safe since
// using it with values being created within this function // we are generating all of the values being assigned.
if (curValue instanceof Array) if (curValue.pop)
curValue[curValue.length] = value; curValue[curValue.length] = value;
else else
obj[key] = [curValue, value]; obj[key] = [curValue, value];
@ -322,7 +341,7 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
// Try matching key/value separator (e.g. '=') if we haven't already // Try matching key/value separator (e.g. '=') if we haven't already
if (eqIdx < eqLen) { if (eqIdx < eqLen) {
if (code === eq.charCodeAt(eqIdx)) { if (code === eqCodes[eqIdx]) {
if (++eqIdx === eqLen) { if (++eqIdx === eqLen) {
// Key/value separator match! // Key/value separator match!
const end = i - eqIdx + 1; const end = i - eqIdx + 1;
@ -354,12 +373,12 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
if (code === 43/*+*/) { if (code === 43/*+*/) {
if (eqIdx < eqLen) { if (eqIdx < eqLen) {
if (i - lastPos > 0) if (lastPos < i)
key += qs.slice(lastPos, i); key += qs.slice(lastPos, i);
key += '%20'; key += '%20';
keyEncoded = true; keyEncoded = true;
} else { } else {
if (i - lastPos > 0) if (lastPos < i)
value += qs.slice(lastPos, i); value += qs.slice(lastPos, i);
value += '%20'; value += '%20';
valEncoded = true; valEncoded = true;
@ -369,7 +388,7 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
} }
// Check if we have leftover key or value data // Check if we have leftover key or value data
if (pairs > 0 && (lastPos < qs.length || eqIdx > 0)) { if (pairs !== 0 && (lastPos < qs.length || eqIdx > 0)) {
if (lastPos < qs.length) { if (lastPos < qs.length) {
if (eqIdx < eqLen) if (eqIdx < eqLen)
key += qs.slice(lastPos); key += qs.slice(lastPos);
@ -387,10 +406,10 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
keys[keys.length] = key; keys[keys.length] = key;
} else { } else {
const curValue = obj[key]; const curValue = obj[key];
// `instanceof Array` is used instead of Array.isArray() because it // A simple Array-specific property check is enough here to
// is ~15-20% faster with v8 4.7 and is safe to use because we are // distinguish from a string value and is faster and still safe since
// using it with values being created within this function // we are generating all of the values being assigned.
if (curValue instanceof Array) if (curValue.pop)
curValue[curValue.length] = value; curValue[curValue.length] = value;
else else
obj[key] = [curValue, value]; obj[key] = [curValue, value];
@ -398,11 +417,12 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
} }
return obj; return obj;
}; }
// v8 does not optimize functions with try-catch blocks, so we isolate them here // v8 does not optimize functions with try-catch blocks, so we isolate them here
// to minimize the damage // to minimize the damage (Note: no longer true as of V8 5.4 -- but still will
// not be inlined).
function decodeStr(s, decoder) { function decodeStr(s, decoder) {
try { try {
return decoder(s); return decoder(s);

Loading…
Cancel
Save