Browse Source

url: adding WHATWG URL support

Implements WHATWG URL support. Example:

```
var u = new url.URL('http://example.org');
```

Currently passing all WHATWG url parsing tests and all but two of the
setter tests. The two setter tests are intentionally skipped for now
but will be revisited.

PR-URL: https://github.com/nodejs/node/pull/7448
Reviewed-By: Ilkka Myller <ilkka.myller@nodefield.com>
v7.x
James M Snell 9 years ago
parent
commit
a8ece149e2
  1. 57
      benchmark/url/new-url-parse.js
  2. 629
      lib/internal/url.js
  3. 70
      lib/url.js
  4. 2
      node.gyp
  5. 12
      src/node_i18n.cc
  6. 7
      src/node_i18n.h
  7. 1406
      src/node_url.cc
  8. 538
      src/node_url.h
  9. 1134
      test/fixtures/url-setter-tests.json
  10. 122
      test/parallel/test-whatwg-url-parsing.js
  11. 36
      test/parallel/test-whatwg-url-searchparams.js
  12. 24
      test/parallel/test-whatwg-url-setters.js

57
benchmark/url/new-url-parse.js

@ -0,0 +1,57 @@
'use strict';
const common = require('../common.js');
const url = require('url');
const v8 = require('v8');
const bench = common.createBenchmark(main, {
type: 'one two three four five'.split(' '),
method: ['old', 'new'],
n: [25e4]
});
function useOld(n, input) {
// Force-optimize url.parse() so that the benchmark doesn't get
// disrupted by the optimizer kicking in halfway through.
url.parse(input);
v8.setFlagsFromString('--allow_natives_syntax');
eval('%OptimizeFunctionOnNextCall(url.parse)');
bench.start();
for (var i = 0; i < n; i += 1)
url.parse(input);
bench.end(n);
}
function useNew(n, input) {
bench.start();
for (var i = 0; i < n; i += 1)
new url.URL(input);
bench.end(n);
}
function main(conf) {
const type = conf.type;
const n = conf.n | 0;
const method = conf.method;
var inputs = {
one: 'http://nodejs.org/docs/latest/api/url.html#url_url_format_urlobj',
two: 'http://blog.nodejs.org/',
three: 'https://encrypted.google.com/search?q=url&q=site:npmjs.org&hl=en',
four: 'javascript:alert("node is awesome");',
//five: 'some.ran/dom/url.thing?oh=yes#whoo',
five: 'https://user:pass@example.com/',
};
var input = inputs[type] || '';
switch (method) {
case 'old':
useOld(n, input);
break;
case 'new':
useNew(n, input);
break;
default:
throw new Error('Unknown method');
}
}

629
lib/internal/url.js

@ -0,0 +1,629 @@
'use strict';
function getPunycode() {
try {
return process.binding('icu');
} catch (err) {
return require('punycode');
}
}
const punycode = getPunycode();
const binding = process.binding('url');
const context = Symbol('context');
const cannotBeBase = Symbol('cannot-be-base');
const special = Symbol('special');
const searchParams = Symbol('query');
const querystring = require('querystring');
const kScheme = Symbol('scheme');
const kHost = Symbol('host');
const kPort = Symbol('port');
const kDomain = Symbol('domain');
function StorageObject() {}
StorageObject.prototype = Object.create(null);
class OpaqueOrigin {
toString() {
return 'null';
}
get effectiveDomain() {
return this;
}
}
class TupleOrigin {
constructor(scheme, host, port, domain) {
this[kScheme] = scheme;
this[kHost] = host;
this[kPort] = port;
this[kDomain] = domain;
}
get scheme() {
return this[kScheme];
}
get host() {
return this[kHost];
}
get port() {
return this[kPort];
}
get domain() {
return this[kDomain];
}
get effectiveDomain() {
return this[kDomain] || this[kHost];
}
toString(unicode = false) {
var result = this.scheme;
result += '://';
result += unicode ? URL.domainToUnicode(this.host) : this.host;
if (this.port !== undefined && this.port !== null)
result += `:${this.port}`;
return result;
}
}
class URL {
constructor(input, base) {
if (base !== undefined && !(base instanceof URL))
base = new URL(String(base));
input = String(input);
const base_context = base ? base[context] : undefined;
this[context] = new StorageObject();
binding.parse(input.trim(), -1, base_context, undefined,
(flags, protocol, username, password,
host, port, path, query, fragment) => {
if (flags & binding.URL_FLAGS_FAILED)
throw new TypeError('Invalid URL');
this[context].flags = flags;
this[context].scheme = protocol;
this[context].username = username;
this[context].password = password;
this[context].port = port;
this[context].path = path;
this[context].query = query;
this[context].fragment = fragment;
this[context].host = host;
this[searchParams] = new URLSearchParams(this);
});
}
get origin() {
return URL.originFor(this).toString(true);
}
get [special]() {
return (this[context].flags & binding.URL_FLAGS_SPECIAL) != 0;
}
get [cannotBeBase]() {
return (this[context].flags & binding.URL_FLAGS_CANNOT_BE_BASE) != 0;
}
get protocol() {
return this[context].scheme;
}
get searchParams() {
return this[searchParams];
}
set protocol(scheme) {
scheme = String(scheme);
if (scheme.length === 0)
return;
binding.parse(scheme,
binding.kSchemeStart,
null,
this[context],
(flags, protocol, username, password,
host, port, path, query, fragment) => {
if (flags & binding.URL_FLAGS_FAILED)
return;
const newIsSpecial = (flags & binding.URL_FLAGS_SPECIAL) != 0;
if ((this[special] && !newIsSpecial) ||
(!this[special] && newIsSpecial) ||
(newIsSpecial && !this[special] &&
this[context].host === undefined)) {
return;
}
if (newIsSpecial) {
this[context].flags |= binding.URL_FLAGS_SPECIAL;
} else {
this[context].flags &= ~binding.URL_FLAGS_SPECIAL;
}
if (protocol) {
this[context].scheme = protocol;
this[context].flags |= binding.URL_FLAGS_HAS_SCHEME;
} else {
this[context].flags &= ~binding.URL_FLAGS_HAS_SCHEME;
}
});
}
get username() {
return this[context].username || '';
}
set username(username) {
username = String(username);
if (!this.hostname)
return;
if (!username) {
this[context].username = null;
this[context].flags &= ~binding.URL_FLAGS_HAS_USERNAME;
return;
}
this[context].username = binding.encodeAuth(username);
this[context].flags |= binding.URL_FLAGS_HAS_USERNAME;
}
get password() {
return this[context].password || '';
}
set password(password) {
password = String(password);
if (!this.hostname)
return;
if (!password) {
this[context].password = null;
this[context].flags &= ~binding.URL_FLAGS_HAS_PASSWORD;
return;
}
this[context].password = binding.encodeAuth(password);
this[context].flags |= binding.URL_FLAGS_HAS_PASSWORD;
}
get host() {
var ret = this[context].host || '';
if (this[context].port !== undefined)
ret += `:${this[context].port}`;
return ret;
}
set host(host) {
host = String(host);
if (this[cannotBeBase] ||
(this[special] && host.length === 0)) {
// Cannot set the host if cannot-be-base is set or
// scheme is special and host length is zero
return;
}
if (!host) {
this[context].host = null;
this[context].flags &= ~binding.URL_FLAGS_HAS_HOST;
return;
}
binding.parse(host, binding.kHost, null, this[context],
(flags, protocol, username, password,
host, port, path, query, fragment) => {
if (flags & binding.URL_FLAGS_FAILED)
return;
if (host) {
this[context].host = host;
this[context].flags |= binding.URL_FLAGS_HAS_HOST;
} else {
this[context].flags &= ~binding.URL_FLAGS_HAS_HOST;
}
if (port !== undefined)
this[context].port = port;
});
}
get hostname() {
return this[context].host || '';
}
set hostname(host) {
host = String(host);
if (this[cannotBeBase] ||
(this[special] && host.length === 0)) {
// Cannot set the host if cannot-be-base is set or
// scheme is special and host length is zero
return;
}
if (!host) {
this[context].host = null;
this[context].flags &= ~binding.URL_FLAGS_HAS_HOST;
return;
}
binding.parse(host,
binding.kHostname,
null,
this[context],
(flags, protocol, username, password,
host, port, path, query, fragment) => {
if (flags & binding.URL_FLAGS_FAILED)
return;
if (host) {
this[context].host = host;
this[context].flags |= binding.URL_FLAGS_HAS_HOST;
} else {
this[context].flags &= ~binding.URL_FLAGS_HAS_HOST;
}
});
}
get port() {
const port = this[context].port;
return port === undefined ? '' : String(port);
}
set port(port) {
if (!this[context].host || this[cannotBeBase] || this.protocol === 'file:')
return;
port = String(port);
if (port === '') {
// Currently, if port number is empty, left unchanged.
// TODO(jasnell): This might be changing in the spec
return;
}
binding.parse(port, binding.kPort, null, this[context],
(flags, protocol, username, password,
host, port, path, query, fragment) => {
if (flags & binding.URL_FLAGS_FAILED)
return;
this[context].port = port;
});
}
get pathname() {
if (this[cannotBeBase])
return this[context].path[0];
return this[context].path !== undefined ?
`/${this[context].path.join('/')}` : '';
}
set pathname(path) {
if (this[cannotBeBase])
return;
path = String(path);
binding.parse(path,
binding.kPathStart,
null,
this[context],
(flags, protocol, username, password,
host, port, path, query, fragment) => {
if (flags & binding.URL_FLAGS_FAILED)
return;
if (path) {
this[context].path = path;
this[context].flags |= binding.URL_FLAGS_HAS_PATH;
} else {
this[context].flags &= ~binding.URL_FLAGS_HAS_PATH;
}
});
}
get search() {
return !this[context].query ? '' : `?${this[context].query}`;
}
set search(search) {
update(this, search);
this[searchParams][searchParams] = querystring.parse(this.search);
}
get hash() {
return !this[context].fragment ? '' : `#${this[context].fragment}`;
}
set hash(hash) {
hash = String(hash);
if (this.protocol === 'javascript:')
return;
if (!hash) {
this[context].fragment = null;
this[context].flags &= ~binding.URL_FLAGS_HAS_FRAGMENT;
return;
}
if (hash[0] === '#') hash = hash.slice(1);
this[context].fragment = '';
binding.parse(hash,
binding.kFragment,
null,
this[context],
(flags, protocol, username, password,
host, port, path, query, fragment) => {
if (flags & binding.URL_FLAGS_FAILED)
return;
if (fragment) {
this[context].fragment = fragment;
this[context].flags |= binding.URL_FLAGS_HAS_FRAGMENT;
} else {
this[context].flags &= ~binding.URL_FLAGS_HAS_FRAGMENT;
}
});
}
get href() {
return this.toString();
}
toString(options) {
options = options || {};
const fragment =
options.fragment !== undefined ?
!!options.fragment : true;
const unicode = !!options.unicode;
var ret;
if (this.protocol)
ret = this.protocol;
if (this[context].host !== undefined) {
ret += '//';
const has_username = typeof this[context].username === 'string';
const has_password = typeof this[context].password === 'string';
if (has_username || has_password) {
if (has_username)
ret += this[context].username;
if (has_password)
ret += `:${this[context].password}`;
ret += '@';
}
if (unicode) {
ret += punycode.toUnicode(this.hostname);
if (this.port !== undefined)
ret += `:${this.port}`;
} else {
ret += this.host;
}
} else if (this[context].scheme === 'file:') {
ret += '//';
}
if (this.pathname)
ret += this.pathname;
if (typeof this[context].query === 'string')
ret += `?${this[context].query}`;
if (fragment & typeof this[context].fragment === 'string')
ret += `#${this[context].fragment}`;
return ret;
}
inspect(depth, opts) {
var ret = 'URL {\n';
ret += ` href: ${this.href}\n`;
if (this[context].scheme !== undefined)
ret += ` protocol: ${this.protocol}\n`;
if (this[context].username !== undefined)
ret += ` username: ${this.username}\n`;
if (this[context].password !== undefined) {
const pwd = opts.showHidden ? this[context].password : '--------';
ret += ` password: ${pwd}\n`;
}
if (this[context].host !== undefined)
ret += ` hostname: ${this.hostname}\n`;
if (this[context].port !== undefined)
ret += ` port: ${this.port}\n`;
if (this[context].path !== undefined)
ret += ` pathname: ${this.pathname}\n`;
if (this[context].query !== undefined)
ret += ` search: ${this.search}\n`;
if (this[context].fragment !== undefined)
ret += ` hash: ${this.hash}\n`;
if (opts.showHidden) {
ret += ` cannot-be-base: ${this[cannotBeBase]}\n`;
ret += ` special: ${this[special]}\n;`;
}
ret += '}';
return ret;
}
}
var hexTable = new Array(256);
for (var i = 0; i < 256; ++i)
hexTable[i] = '%' + ((i < 16 ? '0' : '') + i.toString(16)).toUpperCase();
function encodeAuth(str) {
// faster encodeURIComponent alternative for encoding auth uri components
var out = '';
var lastPos = 0;
for (var i = 0; i < str.length; ++i) {
var c = str.charCodeAt(i);
// These characters do not need escaping:
// ! - . _ ~
// ' ( ) * :
// digits
// alpha (uppercase)
// alpha (lowercase)
if (c === 0x21 || c === 0x2D || c === 0x2E || c === 0x5F || c === 0x7E ||
(c >= 0x27 && c <= 0x2A) ||
(c >= 0x30 && c <= 0x3A) ||
(c >= 0x41 && c <= 0x5A) ||
(c >= 0x61 && c <= 0x7A)) {
continue;
}
if (i - lastPos > 0)
out += str.slice(lastPos, i);
lastPos = i + 1;
// Other ASCII characters
if (c < 0x80) {
out += hexTable[c];
continue;
}
// Multi-byte characters ...
if (c < 0x800) {
out += hexTable[0xC0 | (c >> 6)] + hexTable[0x80 | (c & 0x3F)];
continue;
}
if (c < 0xD800 || c >= 0xE000) {
out += hexTable[0xE0 | (c >> 12)] +
hexTable[0x80 | ((c >> 6) & 0x3F)] +
hexTable[0x80 | (c & 0x3F)];
continue;
}
// Surrogate pair
++i;
var c2;
if (i < str.length)
c2 = str.charCodeAt(i) & 0x3FF;
else
c2 = 0;
c = 0x10000 + (((c & 0x3FF) << 10) | c2);
out += hexTable[0xF0 | (c >> 18)] +
hexTable[0x80 | ((c >> 12) & 0x3F)] +
hexTable[0x80 | ((c >> 6) & 0x3F)] +
hexTable[0x80 | (c & 0x3F)];
}
if (lastPos === 0)
return str;
if (lastPos < str.length)
return out + str.slice(lastPos);
return out;
}
function update(url, search) {
search = String(search);
if (!search) {
url[context].query = null;
url[context].flags &= ~binding.URL_FLAGS_HAS_QUERY;
return;
}
if (search[0] === '?') search = search.slice(1);
url[context].query = '';
binding.parse(search,
binding.kQuery,
null,
url[context],
(flags, protocol, username, password,
host, port, path, query, fragment) => {
if (flags & binding.URL_FLAGS_FAILED)
return;
if (query) {
url[context].query = query;
url[context].flags |= binding.URL_FLAGS_HAS_QUERY;
} else {
url[context].flags &= ~binding.URL_FLAGS_HAS_QUERY;
}
});
}
class URLSearchParams {
constructor(url) {
this[context] = url;
this[searchParams] = querystring.parse(url[context].search || '');
}
append(name, value) {
const obj = this[searchParams];
name = String(name);
value = String(value);
var existing = obj[name];
if (!existing) {
obj[name] = value;
} else if (Array.isArray(existing)) {
existing.push(value);
} else {
obj[name] = [existing, value];
}
update(this[context], querystring.stringify(obj));
}
delete(name) {
const obj = this[searchParams];
name = String(name);
delete obj[name];
update(this[context], querystring.stringify(obj));
}
set(name, value) {
const obj = this[searchParams];
name = String(name);
value = String(value);
obj[name] = value;
update(this[context], querystring.stringify(obj));
}
get(name) {
const obj = this[searchParams];
name = String(name);
var value = obj[name];
return Array.isArray(value) ? value[0] : value;
}
getAll(name) {
const obj = this[searchParams];
name = String(name);
var value = obj[name];
return value === undefined ? [] : Array.isArray(value) ? value : [value];
}
has(name) {
const obj = this[searchParams];
name = String(name);
return name in obj;
}
*[Symbol.iterator]() {
const obj = this[searchParams];
for (const name in obj) {
const value = obj[name];
if (Array.isArray(value)) {
for (const item of value)
yield [name, item];
} else {
yield [name, value];
}
}
}
toString() {
return querystring.stringify(this[searchParams]);
}
}
URL.originFor = function(url) {
if (!(url instanceof URL))
url = new URL(url);
var origin;
const protocol = url.protocol;
switch (protocol) {
case 'blob:':
if (url[context].path && url[context].path.length > 0) {
try {
return (new URL(url[context].path[0])).origin;
} catch (err) {
// fall through... do nothing
}
}
origin = new OpaqueOrigin();
break;
case 'ftp:':
case 'gopher:':
case 'http:':
case 'https:':
case 'ws:':
case 'wss:':
case 'file':
origin = new TupleOrigin(protocol.slice(0, -1),
url[context].host,
url[context].port,
null);
break;
default:
origin = new OpaqueOrigin();
}
return origin;
};
URL.domainToASCII = function(domain) {
return binding.domainToASCII(String(domain));
};
URL.domainToUnicode = function(domain) {
return binding.domainToUnicode(String(domain));
};
exports.URL = URL;
exports.encodeAuth = encodeAuth;

70
lib/url.js

@ -10,10 +10,14 @@ function importPunycode() {
const { toASCII } = importPunycode();
const internalUrl = require('internal/url');
const encodeAuth = internalUrl.encodeAuth;
exports.parse = urlParse;
exports.resolve = urlResolve;
exports.resolveObject = urlResolveObject;
exports.format = urlFormat;
exports.URL = internalUrl.URL;
exports.Url = Url;
@ -942,69 +946,3 @@ function spliceOne(list, index) {
list[i] = list[k];
list.pop();
}
var hexTable = new Array(256);
for (var i = 0; i < 256; ++i)
hexTable[i] = '%' + ((i < 16 ? '0' : '') + i.toString(16)).toUpperCase();
function encodeAuth(str) {
// faster encodeURIComponent alternative for encoding auth uri components
var out = '';
var lastPos = 0;
for (var i = 0; i < str.length; ++i) {
var c = str.charCodeAt(i);
// These characters do not need escaping:
// ! - . _ ~
// ' ( ) * :
// digits
// alpha (uppercase)
// alpha (lowercase)
if (c === 0x21 || c === 0x2D || c === 0x2E || c === 0x5F || c === 0x7E ||
(c >= 0x27 && c <= 0x2A) ||
(c >= 0x30 && c <= 0x3A) ||
(c >= 0x41 && c <= 0x5A) ||
(c >= 0x61 && c <= 0x7A)) {
continue;
}
if (i - lastPos > 0)
out += str.slice(lastPos, i);
lastPos = i + 1;
// Other ASCII characters
if (c < 0x80) {
out += hexTable[c];
continue;
}
// Multi-byte characters ...
if (c < 0x800) {
out += hexTable[0xC0 | (c >> 6)] + hexTable[0x80 | (c & 0x3F)];
continue;
}
if (c < 0xD800 || c >= 0xE000) {
out += hexTable[0xE0 | (c >> 12)] +
hexTable[0x80 | ((c >> 6) & 0x3F)] +
hexTable[0x80 | (c & 0x3F)];
continue;
}
// Surrogate pair
++i;
var c2;
if (i < str.length)
c2 = str.charCodeAt(i) & 0x3FF;
else
c2 = 0;
c = 0x10000 + (((c & 0x3FF) << 10) | c2);
out += hexTable[0xF0 | (c >> 18)] +
hexTable[0x80 | ((c >> 12) & 0x3F)] +
hexTable[0x80 | ((c >> 6) & 0x3F)] +
hexTable[0x80 | (c & 0x3F)];
}
if (lastPos === 0)
return str;
if (lastPos < str.length)
return out + str.slice(lastPos);
return out;
}

2
node.gyp

@ -89,6 +89,7 @@
'lib/internal/readline.js',
'lib/internal/repl.js',
'lib/internal/socket_list.js',
'lib/internal/url.js',
'lib/internal/util.js',
'lib/internal/v8_prof_polyfill.js',
'lib/internal/v8_prof_processor.js',
@ -158,6 +159,7 @@
'src/node_main.cc',
'src/node_os.cc',
'src/node_revert.cc',
'src/node_url.cc',
'src/node_util.cc',
'src/node_v8.cc',
'src/node_stat_watcher.cc',

12
src/node_i18n.cc

@ -79,9 +79,9 @@ bool InitializeICUDirectory(const char* icu_data_path) {
}
}
static int32_t ToUnicode(MaybeStackBuffer<char>* buf,
const char* input,
size_t length) {
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
const char* input,
size_t length) {
UErrorCode status = U_ZERO_ERROR;
uint32_t options = UIDNA_DEFAULT;
options |= UIDNA_NONTRANSITIONAL_TO_UNICODE;
@ -113,9 +113,9 @@ static int32_t ToUnicode(MaybeStackBuffer<char>* buf,
return len;
}
static int32_t ToASCII(MaybeStackBuffer<char>* buf,
const char* input,
size_t length) {
int32_t ToASCII(MaybeStackBuffer<char>* buf,
const char* input,
size_t length) {
UErrorCode status = U_ZERO_ERROR;
uint32_t options = UIDNA_DEFAULT;
options |= UIDNA_NONTRANSITIONAL_TO_ASCII;

7
src/node_i18n.h

@ -15,6 +15,13 @@ namespace i18n {
bool InitializeICUDirectory(const char* icu_data_path);
int32_t ToASCII(MaybeStackBuffer<char>* buf,
const char* input,
size_t length);
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
const char* input,
size_t length);
} // namespace i18n
} // namespace node

1406
src/node_url.cc

File diff suppressed because it is too large

538
src/node_url.h

@ -0,0 +1,538 @@
#ifndef SRC_NODE_URL_H_
#define SRC_NODE_URL_H_
#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
#include "node.h"
#include <string>
namespace node {
namespace url {
#define BIT_AT(a, i) \
(!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
(1 << ((unsigned int) (i) & 7))))
#define TAB_AND_NEWLINE(ch) \
(ch == 0x09 || ch == 0x0a || ch == 0x0d)
#define ASCII_DIGIT(ch) \
(ch >= 0x30 && ch <= 0x39)
#define ASCII_HEX_DIGIT(ch) \
(ASCII_DIGIT(ch) || (ch >= 0x41 && ch <= 0x46) || (ch >= 0x61 && ch <= 0x66))
#define ASCII_ALPHA(ch) \
((ch >= 0x41 && ch <= 0x5a) || (ch >= 0x61 && ch <= 0x7a))
#define ASCII_ALPHANUMERIC(ch) \
(ASCII_DIGIT(ch) || ASCII_ALPHA(ch))
#define TO_LOWER(ch) \
(ASCII_ALPHA(ch) ? (ch | 0x20) : ch)
#define SCHEME_CHAR(ch) \
(ASCII_ALPHANUMERIC(ch) || ch == '+' || ch == '-' || ch == '.')
#define WINDOWS_DRIVE_LETTER(ch, next) \
(ASCII_ALPHA(ch) && (next == ':' || next == '|'))
#define NORMALIZED_WINDOWS_DRIVE_LETTER(str) \
(str.length() == 2 && \
ASCII_ALPHA(str[0]) && \
str[1] == ':')
static const char* hex[256] = {
"%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
"%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
"%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
"%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
"%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
"%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F",
"%30", "%31", "%32", "%33", "%34", "%35", "%36", "%37",
"%38", "%39", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
"%40", "%41", "%42", "%43", "%44", "%45", "%46", "%47",
"%48", "%49", "%4A", "%4B", "%4C", "%4D", "%4E", "%4F",
"%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57",
"%58", "%59", "%5A", "%5B", "%5C", "%5D", "%5E", "%5F",
"%60", "%61", "%62", "%63", "%64", "%65", "%66", "%67",
"%68", "%69", "%6A", "%6B", "%6C", "%6D", "%6E", "%6F",
"%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77",
"%78", "%79", "%7A", "%7B", "%7C", "%7D", "%7E", "%7F",
"%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
"%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
"%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
"%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
"%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
"%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
"%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
"%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
"%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
"%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
"%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
"%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
"%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
"%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
"%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
"%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
};
static const uint8_t SIMPLE_ENCODE_SET[32] = {
// 00 01 02 03 04 05 06 07
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 08 09 0A 0B 0C 0D 0E 0F
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 10 11 12 13 14 15 16 17
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 18 19 1A 1B 1C 1D 1E 1F
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 20 21 22 23 24 25 26 27
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 28 29 2A 2B 2C 2D 2E 2F
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 30 31 32 33 34 35 36 37
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 38 39 3A 3B 3C 3D 3E 3F
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 40 41 42 43 44 45 46 47
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 48 49 4A 4B 4C 4D 4E 4F
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 50 51 52 53 54 55 56 57
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 58 59 5A 5B 5C 5D 5E 5F
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 60 61 62 63 64 65 66 67
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 68 69 6A 6B 6C 6D 6E 6F
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 70 71 72 73 74 75 76 77
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 78 79 7A 7B 7C 7D 7E 7F
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
// 80 81 82 83 84 85 86 87
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 88 89 8A 8B 8C 8D 8E 8F
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 90 91 92 93 94 95 96 97
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 98 99 9A 9B 9C 9D 9E 9F
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// A0 A1 A2 A3 A4 A5 A6 A7
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// A8 A9 AA AB AC AD AE AF
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// B0 B1 B2 B3 B4 B5 B6 B7
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// B8 B9 BA BB BC BD BE BF
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// C0 C1 C2 C3 C4 C5 C6 C7
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// C8 C9 CA CB CC CD CE CF
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// D0 D1 D2 D3 D4 D5 D6 D7
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// D8 D9 DA DB DC DD DE DF
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// E0 E1 E2 E3 E4 E5 E6 E7
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// E8 E9 EA EB EC ED EE EF
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// F0 F1 F2 F3 F4 F5 F6 F7
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// F8 F9 FA FB FC FD FE FF
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
};
static const uint8_t DEFAULT_ENCODE_SET[32] = {
// 00 01 02 03 04 05 06 07
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 08 09 0A 0B 0C 0D 0E 0F
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 10 11 12 13 14 15 16 17
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 18 19 1A 1B 1C 1D 1E 1F
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 20 21 22 23 24 25 26 27
0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
// 28 29 2A 2B 2C 2D 2E 2F
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 30 31 32 33 34 35 36 37
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 38 39 3A 3B 3C 3D 3E 3F
0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x80,
// 40 41 42 43 44 45 46 47
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 48 49 4A 4B 4C 4D 4E 4F
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 50 51 52 53 54 55 56 57
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 58 59 5A 5B 5C 5D 5E 5F
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 60 61 62 63 64 65 66 67
0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 68 69 6A 6B 6C 6D 6E 6F
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 70 71 72 73 74 75 76 77
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 78 79 7A 7B 7C 7D 7E 7F
0x00 | 0x00 | 0x00 | 0x08 | 0x00 | 0x20 | 0x00 | 0x80,
// 80 81 82 83 84 85 86 87
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 88 89 8A 8B 8C 8D 8E 8F
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 90 91 92 93 94 95 96 97
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 98 99 9A 9B 9C 9D 9E 9F
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// A0 A1 A2 A3 A4 A5 A6 A7
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// A8 A9 AA AB AC AD AE AF
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// B0 B1 B2 B3 B4 B5 B6 B7
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// B8 B9 BA BB BC BD BE BF
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// C0 C1 C2 C3 C4 C5 C6 C7
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// C8 C9 CA CB CC CD CE CF
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// D0 D1 D2 D3 D4 D5 D6 D7
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// D8 D9 DA DB DC DD DE DF
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// E0 E1 E2 E3 E4 E5 E6 E7
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// E8 E9 EA EB EC ED EE EF
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// F0 F1 F2 F3 F4 F5 F6 F7
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// F8 F9 FA FB FC FD FE FF
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
};
static const uint8_t USERINFO_ENCODE_SET[32] = {
// 00 01 02 03 04 05 06 07
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 08 09 0A 0B 0C 0D 0E 0F
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 10 11 12 13 14 15 16 17
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 18 19 1A 1B 1C 1D 1E 1F
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 20 21 22 23 24 25 26 27
0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
// 28 29 2A 2B 2C 2D 2E 2F
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
// 30 31 32 33 34 35 36 37
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 38 39 3A 3B 3C 3D 3E 3F
0x00 | 0x00 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 40 41 42 43 44 45 46 47
0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 48 49 4A 4B 4C 4D 4E 4F
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 50 51 52 53 54 55 56 57
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 58 59 5A 5B 5C 5D 5E 5F
0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x40 | 0x00,
// 60 61 62 63 64 65 66 67
0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 68 69 6A 6B 6C 6D 6E 6F
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 70 71 72 73 74 75 76 77
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 78 79 7A 7B 7C 7D 7E 7F
0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x00 | 0x80,
// 80 81 82 83 84 85 86 87
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 88 89 8A 8B 8C 8D 8E 8F
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 90 91 92 93 94 95 96 97
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 98 99 9A 9B 9C 9D 9E 9F
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// A0 A1 A2 A3 A4 A5 A6 A7
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// A8 A9 AA AB AC AD AE AF
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// B0 B1 B2 B3 B4 B5 B6 B7
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// B8 B9 BA BB BC BD BE BF
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// C0 C1 C2 C3 C4 C5 C6 C7
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// C8 C9 CA CB CC CD CE CF
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// D0 D1 D2 D3 D4 D5 D6 D7
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// D8 D9 DA DB DC DD DE DF
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// E0 E1 E2 E3 E4 E5 E6 E7
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// E8 E9 EA EB EC ED EE EF
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// F0 F1 F2 F3 F4 F5 F6 F7
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// F8 F9 FA FB FC FD FE FF
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
};
static const uint8_t QUERY_ENCODE_SET[32] = {
// 00 01 02 03 04 05 06 07
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 08 09 0A 0B 0C 0D 0E 0F
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 10 11 12 13 14 15 16 17
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 18 19 1A 1B 1C 1D 1E 1F
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 20 21 22 23 24 25 26 27
0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
// 28 29 2A 2B 2C 2D 2E 2F
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 30 31 32 33 34 35 36 37
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 38 39 3A 3B 3C 3D 3E 3F
0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
// 40 41 42 43 44 45 46 47
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 48 49 4A 4B 4C 4D 4E 4F
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 50 51 52 53 54 55 56 57
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 58 59 5A 5B 5C 5D 5E 5F
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 60 61 62 63 64 65 66 67
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 68 69 6A 6B 6C 6D 6E 6F
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 70 71 72 73 74 75 76 77
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
// 78 79 7A 7B 7C 7D 7E 7F
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
// 80 81 82 83 84 85 86 87
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 88 89 8A 8B 8C 8D 8E 8F
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 90 91 92 93 94 95 96 97
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// 98 99 9A 9B 9C 9D 9E 9F
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// A0 A1 A2 A3 A4 A5 A6 A7
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// A8 A9 AA AB AC AD AE AF
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// B0 B1 B2 B3 B4 B5 B6 B7
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// B8 B9 BA BB BC BD BE BF
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// C0 C1 C2 C3 C4 C5 C6 C7
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// C8 C9 CA CB CC CD CE CF
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// D0 D1 D2 D3 D4 D5 D6 D7
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// D8 D9 DA DB DC DD DE DF
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// E0 E1 E2 E3 E4 E5 E6 E7
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// E8 E9 EA EB EC ED EE EF
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// F0 F1 F2 F3 F4 F5 F6 F7
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
// F8 F9 FA FB FC FD FE FF
0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
};
// Must return true if the character is to be percent-encoded
typedef bool (*must_escape_cb)(const unsigned char ch);
// Appends ch to str. If test(ch) returns true, the ch will
// be percent-encoded then appended.
static inline void AppendOrEscape(std::string* str,
const unsigned char ch,
must_escape_cb test) {
if (test(ch))
*str += hex[ch];
else
*str += ch;
}
static inline bool SimpleEncodeSet(const unsigned char ch) {
return BIT_AT(SIMPLE_ENCODE_SET, ch);
}
static inline bool DefaultEncodeSet(const unsigned char ch) {
return BIT_AT(DEFAULT_ENCODE_SET, ch);
}
static inline bool UserinfoEncodeSet(const unsigned char ch) {
return BIT_AT(USERINFO_ENCODE_SET, ch);
}
static inline bool QueryEncodeSet(const unsigned char ch) {
return BIT_AT(QUERY_ENCODE_SET, ch);
}
static inline unsigned hex2bin(const char ch) {
if (ch >= '0' && ch <= '9')
return ch - '0';
if (ch >= 'A' && ch <= 'F')
return 10 + (ch - 'A');
if (ch >= 'a' && ch <= 'f')
return 10 + (ch - 'a');
return static_cast<unsigned>(-1);
}
static inline int PercentDecode(const char* input,
size_t len,
std::string* dest) {
if (len == 0)
return 0;
dest->reserve(len);
const char* pointer = input;
const char* end = input + len;
size_t remaining = pointer - end - 1;
while (pointer < end) {
const char ch = pointer[0];
remaining = (end - pointer) + 1;
if (ch != '%' || remaining < 2 ||
(ch == '%' &&
(!ASCII_HEX_DIGIT(pointer[1]) ||
!ASCII_HEX_DIGIT(pointer[2])))) {
*dest += ch;
pointer++;
continue;
} else {
unsigned a = hex2bin(pointer[1]);
unsigned b = hex2bin(pointer[2]);
char c = static_cast<char>(a * 16 + b);
*dest += static_cast<char>(c);
pointer += 3;
}
}
return 0;
}
#define SPECIALS(XX) \
XX("ftp:", 21) \
XX("file:", -1) \
XX("gopher:", 70) \
XX("http:", 80) \
XX("https:", 443) \
XX("ws:", 80) \
XX("wss:", 443)
#define PARSESTATES(XX) \
XX(kSchemeStart) \
XX(kScheme) \
XX(kNoScheme) \
XX(kSpecialRelativeOrAuthority) \
XX(kPathOrAuthority) \
XX(kRelative) \
XX(kRelativeSlash) \
XX(kSpecialAuthoritySlashes) \
XX(kSpecialAuthorityIgnoreSlashes) \
XX(kAuthority) \
XX(kHost) \
XX(kHostname) \
XX(kPort) \
XX(kFile) \
XX(kFileSlash) \
XX(kFileHost) \
XX(kPathStart) \
XX(kPath) \
XX(kCannotBeBase) \
XX(kQuery) \
XX(kFragment)
#define FLAGS(XX) \
XX(URL_FLAGS_NONE, 0) \
XX(URL_FLAGS_FAILED, 0x01) \
XX(URL_FLAGS_CANNOT_BE_BASE, 0x02) \
XX(URL_FLAGS_INVALID_PARSE_STATE, 0x04) \
XX(URL_FLAGS_TERMINATED, 0x08) \
XX(URL_FLAGS_SPECIAL, 0x10) \
XX(URL_FLAGS_HAS_SCHEME, 0x20) \
XX(URL_FLAGS_HAS_USERNAME, 0x40) \
XX(URL_FLAGS_HAS_PASSWORD, 0x80) \
XX(URL_FLAGS_HAS_HOST, 0x100) \
XX(URL_FLAGS_HAS_PATH, 0x200) \
XX(URL_FLAGS_HAS_QUERY, 0x400) \
XX(URL_FLAGS_HAS_FRAGMENT, 0x800)
#define ARGS(XX) \
XX(ARG_FLAGS) \
XX(ARG_PROTOCOL) \
XX(ARG_USERNAME) \
XX(ARG_PASSWORD) \
XX(ARG_HOST) \
XX(ARG_PORT) \
XX(ARG_PATH) \
XX(ARG_QUERY) \
XX(ARG_FRAGMENT)
static const char kEOL = -1;
enum url_parse_state {
kUnknownState = -1,
#define XX(name) name,
PARSESTATES(XX)
#undef XX
} url_parse_state;
enum url_flags {
#define XX(name, val) name = val,
FLAGS(XX)
#undef XX
} url_flags;
enum url_cb_args {
#define XX(name) name,
ARGS(XX)
#undef XX
} url_cb_args;
static inline bool IsSpecial(std::string scheme) {
#define XX(name, _) if (scheme == name) return true;
SPECIALS(XX);
#undef XX
return false;
}
static inline int NormalizePort(std::string scheme, int p) {
#define XX(name, port) if (scheme == name && p == port) return -1;
SPECIALS(XX);
#undef XX
return p;
}
struct url_data {
int32_t flags = URL_FLAGS_NONE;
int port = -1;
std::string scheme;
std::string username;
std::string password;
std::string host;
std::string query;
std::string fragment;
std::vector<std::string> path;
};
union url_host_value {
std::string domain;
uint32_t ipv4;
uint16_t ipv6[8];
~url_host_value() {}
};
enum url_host_type {
HOST_TYPE_FAILED = -1,
HOST_TYPE_DOMAIN = 0,
HOST_TYPE_IPV4 = 1,
HOST_TYPE_IPV6 = 2
};
struct url_host {
url_host_value value;
enum url_host_type type;
};
} // namespace url
} // namespace node
#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
#endif // SRC_NODE_URL_H_

1134
test/fixtures/url-setter-tests.json

File diff suppressed because it is too large

122
test/parallel/test-whatwg-url-parsing.js

@ -0,0 +1,122 @@
'use strict';
const common = require('../common');
const URL = require('url').URL;
const path = require('path');
const assert = require('assert');
const tests = require(path.join(common.fixturesDir, 'url-tests.json'));
for (const test of tests) {
if (typeof test === 'string')
continue;
if (test.failure) {
assert.throws(() => new URL(test.input, test.base), /Invalid URL/);
} else {
assert.doesNotThrow(() => {
const url = new URL(test.input, test.base);
assert.strictEqual(url.href, test.href);
});
}
}
const additional_tests = [
{
'url': 'tftp://foobar.com/someconfig;mode=netascii',
'protocol': 'tftp:',
'hostname': 'foobar.com',
'pathname': '/someconfig;mode=netascii'
},
{
'url': 'telnet://user:pass@foobar.com:23/',
'protocol': 'telnet:',
'username': 'user',
'password': 'pass',
'hostname': 'foobar.com',
'port': '23',
'pathname': '/'
},
{
'url': 'ut2004://10.10.10.10:7777/Index.ut2',
'protocol': 'ut2004:',
'hostname': '10.10.10.10',
'port': '7777',
'pathname': '/Index.ut2'
},
{
'url': 'redis://foo:bar@somehost:6379/0?baz=bam&qux=baz',
'protocol': 'redis:',
'username': 'foo',
'password': 'bar',
'hostname': 'somehost',
'port': '6379',
'pathname': '/0',
'search': '?baz=bam&qux=baz'
},
{
'url': 'rsync://foo@host:911/sup',
'protocol': 'rsync:',
'username': 'foo',
'hostname': 'host',
'port': '911',
'pathname': '/sup'
},
{
'url': 'git://github.com/foo/bar.git',
'protocol': 'git:',
'hostname': 'github.com',
'pathname': '/foo/bar.git'
},
{
'url': 'irc://myserver.com:6999/channel?passwd',
'protocol': 'irc:',
'hostname': 'myserver.com',
'port': '6999',
'pathname': '/channel',
'search': '?passwd'
},
{
'url': 'dns://fw.example.org:9999/foo.bar.org?type=TXT',
'protocol': 'dns:',
'hostname': 'fw.example.org',
'port': '9999',
'pathname': '/foo.bar.org',
'search': '?type=TXT'
},
{
'url': 'ldap://localhost:389/ou=People,o=JNDITutorial',
'protocol': 'ldap:',
'hostname': 'localhost',
'port': '389',
'pathname': '/ou=People,o=JNDITutorial'
},
{
'url': 'git+https://github.com/foo/bar',
'protocol': 'git+https:',
'hostname': 'github.com',
'pathname': '/foo/bar'
},
{
'url': 'urn:ietf:rfc:2648',
'protocol': 'urn:',
'pathname': 'ietf:rfc:2648'
},
{
'url': 'tag:joe@example.org,2001:foo/bar',
'protocol': 'tag:',
'pathname': 'joe@example.org,2001:foo/bar'
}
];
additional_tests.forEach((test) => {
const u = new URL(test.url);
if (test.protocol) assert.strictEqual(test.protocol, u.protocol);
if (test.username) assert.strictEqual(test.username, u.username);
if (test.password) assert.strictEqual(test.password, u.password);
if (test.hostname) assert.strictEqual(test.hostname, u.hostname);
if (test.host) assert.strictEqual(test.host, u.host);
if (test.port !== undefined) assert.strictEqual(test.port, u.port);
if (test.pathname) assert.strictEqual(test.pathname, u.pathname);
if (test.search) assert.strictEqual(test.search, u.search);
if (test.hash) assert.strictEqual(test.hash, u.hash);
});

36
test/parallel/test-whatwg-url-searchparams.js

@ -0,0 +1,36 @@
'use strict';
require('../common');
const assert = require('assert');
const URL = require('url').URL;
const serialized = 'a=a&a=1&a=true&a=undefined&a=null&a=%5Bobject%20Object%5D';
const values = ['a', 1, true, undefined, null, {}];
const m = new URL('http://example.org');
const sp = m.searchParams;
assert(sp);
assert.strictEqual(sp.toString(), '');
assert.strictEqual(m.search, '');
assert(!sp.has('a'));
values.forEach((i) => sp.set('a', i));
assert(sp.has('a'));
assert.strictEqual(sp.get('a'), '[object Object]');
sp.delete('a');
assert(!sp.has('a'));
values.forEach((i) => sp.append('a', i));
assert(sp.has('a'));
assert.strictEqual(sp.getAll('a').length, 6);
assert.strictEqual(sp.get('a'), 'a');
assert.strictEqual(sp.toString(), serialized);
assert.strictEqual(m.search, `?${serialized}`);
var key, val, n = 0;
for ([key, val] of sp) {
assert.strictEqual(key, 'a');
assert.strictEqual(val, String(values[n++]));
}

24
test/parallel/test-whatwg-url-setters.js

@ -0,0 +1,24 @@
'use strict';
const common = require('../common');
const path = require('path');
const URL = require('url').URL;
const assert = require('assert');
const attrs = require(path.join(common.fixturesDir, 'url-setter-tests.json'));
for (const attr in attrs) {
if (attr === 'comment')
continue;
const tests = attrs[attr];
var n = 0;
for (const test of tests) {
if (test.skip) continue;
n++;
const url = new URL(test.href);
url[attr] = test.new_value;
for (const test_attr in test.expected) {
assert.equal(test.expected[test_attr], url[test_attr],
`${n} ${attr} ${test_attr} ${test.href} ${test.comment}`);
}
}
}
Loading…
Cancel
Save