Browse Source

url: fix surrogate handling in encodeAuth()

Also factor out common parts in querystring and url.

PR-URL: https://github.com/nodejs/node/pull/11161
Reviewed-By: Joyee Cheung <joyeec9h3@gmail.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Brian White <mscdex@mscdex.net>
v6
Timothy Gu 8 years ago
parent
commit
c6b12d0984
  1. 15
      lib/internal/querystring.js
  2. 72
      lib/internal/url.js
  3. 15
      lib/querystring.js
  4. 84
      lib/url.js
  5. 1
      node.gyp
  6. 9
      test/parallel/test-url-format.js

15
lib/internal/querystring.js

@ -0,0 +1,15 @@
'use strict';
const hexTable = new Array(256);
for (var i = 0; i < 256; ++i)
hexTable[i] = '%' + ((i < 16 ? '0' : '') + i.toString(16)).toUpperCase();
// Instantiating this is faster than explicitly calling `Object.create(null)`
// to get a "clean" empty object (tested with v8 v4.9).
function StorageObject() {}
StorageObject.prototype = Object.create(null);
module.exports = {
hexTable,
StorageObject
};

72
lib/internal/url.js

@ -1,6 +1,7 @@
'use strict';
const util = require('util');
const { StorageObject } = require('internal/querystring');
const binding = process.binding('url');
const context = Symbol('context');
const cannotBeBase = Symbol('cannot-be-base');
@ -22,9 +23,6 @@ const IteratorPrototype = Object.getPrototypeOf(
Object.getPrototypeOf([][Symbol.iterator]())
);
function StorageObject() {}
StorageObject.prototype = Object.create(null);
class OpaqueOrigin {
toString() {
return 'null';
@ -528,73 +526,6 @@ Object.defineProperties(URL.prototype, {
}
});
const hexTable = new Array(256);
for (var i = 0; i < 256; ++i)
hexTable[i] = '%' + ((i < 16 ? '0' : '') + i.toString(16)).toUpperCase();
function encodeAuth(str) {
// faster encodeURIComponent alternative for encoding auth uri components
var out = '';
var lastPos = 0;
for (var i = 0; i < str.length; ++i) {
var c = str.charCodeAt(i);
// These characters do not need escaping:
// ! - . _ ~
// ' ( ) * :
// digits
// alpha (uppercase)
// alpha (lowercase)
if (c === 0x21 || c === 0x2D || c === 0x2E || c === 0x5F || c === 0x7E ||
(c >= 0x27 && c <= 0x2A) ||
(c >= 0x30 && c <= 0x3A) ||
(c >= 0x41 && c <= 0x5A) ||
(c >= 0x61 && c <= 0x7A)) {
continue;
}
if (i - lastPos > 0)
out += str.slice(lastPos, i);
lastPos = i + 1;
// Other ASCII characters
if (c < 0x80) {
out += hexTable[c];
continue;
}
// Multi-byte characters ...
if (c < 0x800) {
out += hexTable[0xC0 | (c >> 6)] + hexTable[0x80 | (c & 0x3F)];
continue;
}
if (c < 0xD800 || c >= 0xE000) {
out += hexTable[0xE0 | (c >> 12)] +
hexTable[0x80 | ((c >> 6) & 0x3F)] +
hexTable[0x80 | (c & 0x3F)];
continue;
}
// Surrogate pair
++i;
var c2;
if (i < str.length)
c2 = str.charCodeAt(i) & 0x3FF;
else
c2 = 0;
c = 0x10000 + (((c & 0x3FF) << 10) | c2);
out += hexTable[0xF0 | (c >> 18)] +
hexTable[0x80 | ((c >> 12) & 0x3F)] +
hexTable[0x80 | ((c >> 6) & 0x3F)] +
hexTable[0x80 | (c & 0x3F)];
}
if (lastPos === 0)
return str;
if (lastPos < str.length)
return out + str.slice(lastPos);
return out;
}
function update(url, params) {
if (!url)
return;
@ -1254,7 +1185,6 @@ exports.URL = URL;
exports.URLSearchParams = URLSearchParams;
exports.domainToASCII = domainToASCII;
exports.domainToUnicode = domainToUnicode;
exports.encodeAuth = encodeAuth;
exports.urlToOptions = urlToOptions;
exports.formatSymbol = kFormat;
exports.searchParamsSymbol = searchParams;

15
lib/querystring.js

@ -1,5 +1,7 @@
'use strict';
const { Buffer } = require('buffer');
const { StorageObject, hexTable } = require('internal/querystring');
const QueryString = module.exports = {
unescapeBuffer,
// `unescape()` is a JS global, so we need to use a different local name
@ -14,13 +16,6 @@ const QueryString = module.exports = {
parse,
decode: parse
};
const Buffer = require('buffer').Buffer;
// This constructor is used to store parsed query string values. Instantiating
// this is faster than explicitly calling `Object.create(null)` to get a
// "clean" empty object (tested with v8 v4.9).
function ParsedQueryString() {}
ParsedQueryString.prototype = Object.create(null);
const unhexTable = [
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0 - 15
@ -116,10 +111,6 @@ function qsUnescape(s, decodeSpaces) {
}
const hexTable = [];
for (var i = 0; i < 256; ++i)
hexTable[i] = '%' + ((i < 16 ? '0' : '') + i.toString(16)).toUpperCase();
// These characters do not need escaping when generating query strings:
// ! - . _ ~
// ' ( ) *
@ -282,7 +273,7 @@ const defEqCodes = [61]; // =
// Parse a key/val string.
function parse(qs, sep, eq, options) {
const obj = new ParsedQueryString();
const obj = new StorageObject();
if (typeof qs !== 'string' || qs.length === 0) {
return obj;

84
lib/url.js

@ -10,8 +10,8 @@ function importPunycode() {
const { toASCII } = importPunycode();
const { StorageObject, hexTable } = require('internal/querystring');
const internalUrl = require('internal/url');
const encodeAuth = internalUrl.encodeAuth;
exports.parse = urlParse;
exports.resolve = urlResolve;
exports.resolveObject = urlResolveObject;
@ -76,12 +76,6 @@ const slashedProtocol = {
};
const querystring = require('querystring');
// This constructor is used to store parsed query string values. Instantiating
// this is faster than explicitly calling `Object.create(null)` to get a
// "clean" empty object (tested with v8 v4.9).
function ParsedQueryString() {}
ParsedQueryString.prototype = Object.create(null);
function urlParse(url, parseQueryString, slashesDenoteHost) {
if (url instanceof Url) return url;
@ -190,7 +184,7 @@ Url.prototype.parse = function(url, parseQueryString, slashesDenoteHost) {
}
} else if (parseQueryString) {
this.search = '';
this.query = new ParsedQueryString();
this.query = new StorageObject();
}
return this;
}
@ -380,7 +374,7 @@ Url.prototype.parse = function(url, parseQueryString, slashesDenoteHost) {
} else if (parseQueryString) {
// no query string, but parseQueryString still requested
this.search = '';
this.query = new ParsedQueryString();
this.query = new StorageObject();
}
var firstIdx = (questionIdx !== -1 &&
@ -948,3 +942,75 @@ function spliceOne(list, index) {
list[i] = list[k];
list.pop();
}
// These characters do not need escaping:
// ! - . _ ~
// ' ( ) * :
// digits
// alpha (uppercase)
// alpha (lowercase)
const noEscapeAuth = [
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00 - 0x0F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10 - 0x1F
0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, // 0x20 - 0x2F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, // 0x30 - 0x3F
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 - 0x4F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 0x50 - 0x5F
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 - 0x6F
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 // 0x70 - 0x7F
];
function encodeAuth(str) {
// faster encodeURIComponent alternative for encoding auth uri components
var out = '';
var lastPos = 0;
for (var i = 0; i < str.length; ++i) {
var c = str.charCodeAt(i);
// ASCII
if (c < 0x80) {
if (noEscapeAuth[c] === 1)
continue;
if (lastPos < i)
out += str.slice(lastPos, i);
lastPos = i + 1;
out += hexTable[c];
continue;
}
if (lastPos < i)
out += str.slice(lastPos, i);
// Multi-byte characters ...
if (c < 0x800) {
lastPos = i + 1;
out += hexTable[0xC0 | (c >> 6)] + hexTable[0x80 | (c & 0x3F)];
continue;
}
if (c < 0xD800 || c >= 0xE000) {
lastPos = i + 1;
out += hexTable[0xE0 | (c >> 12)] +
hexTable[0x80 | ((c >> 6) & 0x3F)] +
hexTable[0x80 | (c & 0x3F)];
continue;
}
// Surrogate pair
++i;
var c2;
if (i < str.length)
c2 = str.charCodeAt(i) & 0x3FF;
else
c2 = 0;
lastPos = i + 1;
c = 0x10000 + (((c & 0x3FF) << 10) | c2);
out += hexTable[0xF0 | (c >> 18)] +
hexTable[0x80 | ((c >> 12) & 0x3F)] +
hexTable[0x80 | ((c >> 6) & 0x3F)] +
hexTable[0x80 | (c & 0x3F)];
}
if (lastPos === 0)
return str;
if (lastPos < str.length)
return out + str.slice(lastPos);
return out;
}

1
node.gyp

@ -93,6 +93,7 @@
'lib/internal/process/stdio.js',
'lib/internal/process/warning.js',
'lib/internal/process.js',
'lib/internal/querystring.js',
'lib/internal/readline.js',
'lib/internal/repl.js',
'lib/internal/socket_list.js',

9
test/parallel/test-url-format.js

@ -235,6 +235,15 @@ const formatTests = {
protocol: 'file',
pathname: '/home/user',
path: '/home/user'
},
// surrogate in auth
'http://%F0%9F%98%80@www.example.com/': {
href: 'http://%F0%9F%98%80@www.example.com/',
protocol: 'http:',
auth: '\uD83D\uDE00',
hostname: 'www.example.com',
pathname: '/'
}
};
for (const u in formatTests) {

Loading…
Cancel
Save