From ac529233083007af9eaf81386576d9df77484570 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Wed, 19 Apr 2017 11:34:35 -0700 Subject: [PATCH] url: update WHATWG URL API to latest spec - Update to spec - Add opaque hosts - File state did not correctly deal with lack of base URL - Cleanup API for file and non-special URLs - Allow % and IPv6 addresses in non-special URL hosts - Use specific names for percent-encode sets - Add empty host concept for file and non-special URLs - Clarify IPv6 serializer - Fix existing mistakes - Add missing ':' to forbidden host code point list. - Correct IPv4 parser empty label behavior - Maintain type equivalence in URLContext with spec - scheme, username, and password should always be strings - host, port, query, and fragment may be strings or null - Align scheme state more closely with the spec - Make sure the `special` variable is always synced with URL_FLAG_SPECIAL. PR-URL: https://github.com/nodejs/node/pull/12523 Fixes: https://github.com/nodejs/node/issues/10608 Fixes: https://github.com/nodejs/node/issues/10634 Refs: https://github.com/whatwg/url/pull/185 Refs: https://github.com/whatwg/url/pull/225 Refs: https://github.com/whatwg/url/pull/224 Refs: https://github.com/whatwg/url/pull/218 Refs: https://github.com/whatwg/url/pull/243 Refs: https://github.com/whatwg/url/pull/260 Refs: https://github.com/whatwg/url/pull/268 Reviewed-By: James M Snell Reviewed-By: Daijiro Wachi Reviewed-By: Joyee Cheung --- doc/api/url.md | 28 +- lib/internal/url.js | 179 +++++---- src/node_url.cc | 301 ++++++++++----- test/fixtures/url-setter-tests.js | 466 ++++++++++++------------ test/fixtures/url-tests.js | 584 +++++++++++++++--------------- 5 files changed, 830 insertions(+), 728 deletions(-) diff --git a/doc/api/url.md b/doc/api/url.md index 088ea95d23..857611a0f2 100644 --- a/doc/api/url.md +++ b/doc/api/url.md @@ -1049,23 +1049,25 @@ located within the structure of the URL. The WHATWG URL Standard uses a more selective and fine grained approach to selecting encoded characters than that used by the older [`url.parse()`][] and [`url.format()`][] methods. -The WHATWG algorithm defines three "encoding sets" that describe ranges of -characters that must be percent-encoded: +The WHATWG algorithm defines three "percent-encode sets" that describe ranges +of characters that must be percent-encoded: -* The *simple encode set* includes code points in range U+0000 to U+001F - (inclusive) and all code points greater than U+007E. +* The *C0 control percent-encode set* includes code points in range U+0000 to + U+001F (inclusive) and all code points greater than U+007E. -* The *default encode set* includes the *simple encode set* and code points - U+0020, U+0022, U+0023, U+003C, U+003E, U+003F, U+0060, U+007B, and U+007D. +* The *path percent-encode set* includes the *C0 control percent-encode set* + and code points U+0020, U+0022, U+0023, U+003C, U+003E, U+003F, U+0060, + U+007B, and U+007D. -* The *userinfo encode set* includes the *default encode set* and code points - U+002F, U+003A, U+003B, U+003D, U+0040, U+005B, U+005C, U+005D, U+005E, and - U+007C. +* The *userinfo encode set* includes the *path percent-encode set* and code + points U+002F, U+003A, U+003B, U+003D, U+0040, U+005B, U+005C, U+005D, + U+005E, and U+007C. -The *simple encode set* is used primary for URL fragments and certain specific -conditions for the path. The *userinfo encode set* is used specifically for -username and passwords encoded within the URL. The *default encode set* is used -for all other cases. +The *userinfo percent-encode set* is used exclusively for username and +passwords encoded within the URL. The *path percent-encode set* is used for the +path of most URLs. The *C0 control percent-encode set* is used for all +other cases, including URL fragments in particular, but also host and path +under certain specific conditions. When non-ASCII characters appear within a hostname, the hostname is encoded using the [Punycode][] algorithm. Note, however, that a hostname *may* contain diff --git a/lib/internal/url.js b/lib/internal/url.js index 771a916d70..9929c006d5 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -8,6 +8,8 @@ const { const binding = process.binding('url'); const context = Symbol('context'); const cannotBeBase = Symbol('cannot-be-base'); +const cannotHaveUsernamePasswordPort = + Symbol('cannot-have-username-password-port'); const special = Symbol('special'); const searchParams = Symbol('query'); const querystring = require('querystring'); @@ -42,7 +44,7 @@ const kOpaqueOrigin = 'null'; // - https://html.spec.whatwg.org/multipage/browsers.html#ascii-serialisation-of-an-origin function serializeTupleOrigin(scheme, host, port, unicode = true) { const unicodeHost = unicode ? domainToUnicode(host) : host; - return `${scheme}//${unicodeHost}${port == null ? '' : `:${port}`}`; + return `${scheme}//${unicodeHost}${port === null ? '' : `:${port}`}`; } // This class provides the internal state of a URL object. An instance of this @@ -54,14 +56,14 @@ function serializeTupleOrigin(scheme, host, port, unicode = true) { class URLContext { constructor() { this.flags = 0; - this.scheme = undefined; - this.username = undefined; - this.password = undefined; - this.host = undefined; - this.port = undefined; + this.scheme = ':'; + this.username = ''; + this.password = ''; + this.host = null; + this.port = null; this.path = []; - this.query = undefined; - this.fragment = undefined; + this.query = null; + this.fragment = null; } } @@ -70,10 +72,10 @@ function onParseComplete(flags, protocol, username, password, var ctx = this[context]; ctx.flags = flags; ctx.scheme = protocol; - ctx.username = username; - ctx.password = password; + ctx.username = (flags & binding.URL_FLAGS_HAS_USERNAME) !== 0 ? username : ''; + ctx.password = (flags & binding.URL_FLAGS_HAS_PASSWORD) !== 0 ? password : ''; ctx.port = port; - ctx.path = path; + ctx.path = (flags & binding.URL_FLAGS_HAS_PATH) !== 0 ? path : []; ctx.query = query; ctx.fragment = fragment; ctx.host = host; @@ -101,52 +103,37 @@ function parse(url, input, base) { function onParseProtocolComplete(flags, protocol, username, password, host, port, path, query, fragment) { - const newIsSpecial = (flags & binding.URL_FLAGS_SPECIAL) !== 0; - const s = this[special]; const ctx = this[context]; - if ((s && !newIsSpecial) || (!s && newIsSpecial)) { - return; - } - if (protocol === 'file:' && - (ctx.username || ctx.password || ctx.port !== undefined)) { - return; - } - if (ctx.scheme === 'file:' && !ctx.host) { - return; - } - if (newIsSpecial) { + if ((flags & binding.URL_FLAGS_SPECIAL) !== 0) { ctx.flags |= binding.URL_FLAGS_SPECIAL; } else { ctx.flags &= ~binding.URL_FLAGS_SPECIAL; } - if (protocol) { - ctx.scheme = protocol; - ctx.flags |= binding.URL_FLAGS_HAS_SCHEME; - } else { - ctx.flags &= ~binding.URL_FLAGS_HAS_SCHEME; - } + ctx.scheme = protocol; } function onParseHostComplete(flags, protocol, username, password, host, port, path, query, fragment) { const ctx = this[context]; - if (host) { + if ((flags & binding.URL_FLAGS_HAS_HOST) !== 0) { ctx.host = host; ctx.flags |= binding.URL_FLAGS_HAS_HOST; } else { + ctx.host = null; ctx.flags &= ~binding.URL_FLAGS_HAS_HOST; } - if (port !== undefined) + if (port !== null) ctx.port = port; } function onParseHostnameComplete(flags, protocol, username, password, host, port, path, query, fragment) { const ctx = this[context]; - if (host) { + if ((flags & binding.URL_FLAGS_HAS_HOST) !== 0) { ctx.host = host; ctx.flags |= binding.URL_FLAGS_HAS_HOST; } else { + ctx.host = null; ctx.flags &= ~binding.URL_FLAGS_HAS_HOST; } } @@ -159,29 +146,29 @@ function onParsePortComplete(flags, protocol, username, password, function onParsePathComplete(flags, protocol, username, password, host, port, path, query, fragment) { const ctx = this[context]; - if (path) { + if ((flags & binding.URL_FLAGS_HAS_PATH) !== 0) { ctx.path = path; ctx.flags |= binding.URL_FLAGS_HAS_PATH; } else { + ctx.path = []; ctx.flags &= ~binding.URL_FLAGS_HAS_PATH; } + + // The C++ binding may set host to empty string. + if ((flags & binding.URL_FLAGS_HAS_HOST) !== 0) { + ctx.host = host; + ctx.flags |= binding.URL_FLAGS_HAS_HOST; + } } function onParseSearchComplete(flags, protocol, username, password, host, port, path, query, fragment) { - const ctx = this[context]; - ctx.query = query; + this[context].query = query; } function onParseHashComplete(flags, protocol, username, password, host, port, path, query, fragment) { - const ctx = this[context]; - if (fragment) { - ctx.fragment = fragment; - ctx.flags |= binding.URL_FLAGS_HAS_FRAGMENT; - } else { - ctx.flags &= ~binding.URL_FLAGS_HAS_FRAGMENT; - } + this[context].fragment = fragment; } function getEligibleConstructor(obj) { @@ -214,6 +201,14 @@ class URL { return (this[context].flags & binding.URL_FLAGS_CANNOT_BE_BASE) !== 0; } + // https://url.spec.whatwg.org/#cannot-have-a-username-password-port + get [cannotHaveUsernamePasswordPort]() { + const { host, scheme } = this[context]; + return ((host == null || host === '') || + this[cannotBeBase] || + scheme === 'file:'); + } + [util.inspect.custom](depth, opts) { if (this == null || Object.getPrototypeOf(this[context]) !== URLContext.prototype) { @@ -235,7 +230,7 @@ class URL { obj.origin = this.origin; obj.protocol = this.protocol; obj.username = this.username; - obj.password = (opts.showHidden || ctx.password == null) ? + obj.password = (opts.showHidden || ctx.password === '') ? this.password : '--------'; obj.host = this.host; obj.hostname = this.hostname; @@ -270,14 +265,11 @@ Object.defineProperties(URL.prototype, { auth: true }, options); const ctx = this[context]; - var ret; - if (this.protocol) - ret = this.protocol; - if (ctx.host !== undefined) { + var ret = ctx.scheme; + if (ctx.host !== null) { ret += '//'; - const has_username = typeof ctx.username === 'string'; - const has_password = typeof ctx.password === 'string' && - ctx.password !== ''; + const has_username = ctx.username !== ''; + const has_password = ctx.password !== ''; if (options.auth && (has_username || has_password)) { if (has_username) ret += ctx.username; @@ -292,9 +284,9 @@ Object.defineProperties(URL.prototype, { } if (this.pathname) ret += this.pathname; - if (options.search && typeof ctx.query === 'string') + if (options.search && ctx.query !== null) ret += `?${ctx.query}`; - if (options.fragment && typeof ctx.fragment === 'string') + if (options.fragment && ctx.fragment !== null) ret += `#${ctx.fragment}`; return ret; } @@ -363,7 +355,12 @@ Object.defineProperties(URL.prototype, { scheme = `${scheme}`; if (scheme.length === 0) return; - binding.parse(scheme, binding.kSchemeStart, null, this[context], + const ctx = this[context]; + if (ctx.scheme === 'file:' && + (ctx.host === '' || ctx.host === null)) { + return; + } + binding.parse(scheme, binding.kSchemeStart, null, ctx, onParseProtocolComplete.bind(this)); } }, @@ -371,16 +368,16 @@ Object.defineProperties(URL.prototype, { enumerable: true, configurable: true, get() { - return this[context].username || ''; + return this[context].username; }, set(username) { // toUSVString is not needed. username = `${username}`; - if (!this.hostname) + if (this[cannotHaveUsernamePasswordPort]) return; const ctx = this[context]; - if (!username) { - ctx.username = null; + if (username === '') { + ctx.username = ''; ctx.flags &= ~binding.URL_FLAGS_HAS_USERNAME; return; } @@ -392,16 +389,16 @@ Object.defineProperties(URL.prototype, { enumerable: true, configurable: true, get() { - return this[context].password || ''; + return this[context].password; }, set(password) { // toUSVString is not needed. password = `${password}`; - if (!this.hostname) + if (this[cannotHaveUsernamePasswordPort]) return; const ctx = this[context]; - if (!password) { - ctx.password = null; + if (password === '') { + ctx.password = ''; ctx.flags &= ~binding.URL_FLAGS_HAS_PASSWORD; return; } @@ -415,7 +412,7 @@ Object.defineProperties(URL.prototype, { get() { const ctx = this[context]; var ret = ctx.host || ''; - if (ctx.port !== undefined) + if (ctx.port !== null) ret += `:${ctx.port}`; return ret; }, @@ -423,15 +420,8 @@ Object.defineProperties(URL.prototype, { const ctx = this[context]; // toUSVString is not needed. host = `${host}`; - if (this[cannotBeBase] || - (this[special] && host.length === 0)) { - // Cannot set the host if cannot-be-base is set or - // scheme is special and host length is zero - return; - } - if (!host) { - ctx.host = null; - ctx.flags &= ~binding.URL_FLAGS_HAS_HOST; + if (this[cannotBeBase]) { + // Cannot set the host if cannot-be-base is set return; } binding.parse(host, binding.kHost, null, ctx, @@ -448,15 +438,8 @@ Object.defineProperties(URL.prototype, { const ctx = this[context]; // toUSVString is not needed. host = `${host}`; - if (this[cannotBeBase] || - (this[special] && host.length === 0)) { - // Cannot set the host if cannot-be-base is set or - // scheme is special and host length is zero - return; - } - if (!host) { - ctx.host = null; - ctx.flags &= ~binding.URL_FLAGS_HAS_HOST; + if (this[cannotBeBase]) { + // Cannot set the host if cannot-be-base is set return; } binding.parse(host, binding.kHostname, null, ctx, @@ -468,17 +451,16 @@ Object.defineProperties(URL.prototype, { configurable: true, get() { const port = this[context].port; - return port === undefined ? '' : String(port); + return port === null ? '' : String(port); }, set(port) { // toUSVString is not needed. port = `${port}`; - const ctx = this[context]; - if (!ctx.host || this[cannotBeBase] || - this.protocol === 'file:') + if (this[cannotHaveUsernamePasswordPort]) return; + const ctx = this[context]; if (port === '') { - ctx.port = undefined; + ctx.port = null; return; } binding.parse(port, binding.kPort, null, ctx, @@ -492,7 +474,9 @@ Object.defineProperties(URL.prototype, { const ctx = this[context]; if (this[cannotBeBase]) return ctx.path[0]; - return ctx.path !== undefined ? `/${ctx.path.join('/')}` : ''; + if (ctx.path.length === 0) + return ''; + return `/${ctx.path.join('/')}`; }, set(path) { // toUSVString is not needed. @@ -507,13 +491,15 @@ Object.defineProperties(URL.prototype, { enumerable: true, configurable: true, get() { - const ctx = this[context]; - return !ctx.query ? '' : `?${ctx.query}`; + const { query } = this[context]; + if (query === null || query === '') + return ''; + return `?${query}`; }, set(search) { const ctx = this[context]; search = toUSVString(search); - if (!search) { + if (search === '') { ctx.query = null; ctx.flags &= ~binding.URL_FLAGS_HAS_QUERY; } else { @@ -539,8 +525,10 @@ Object.defineProperties(URL.prototype, { enumerable: true, configurable: true, get() { - const ctx = this[context]; - return !ctx.fragment ? '' : `#${ctx.fragment}`; + const { fragment } = this[context]; + if (fragment === null || fragment === '') + return ''; + return `#${fragment}`; }, set(hash) { const ctx = this[context]; @@ -553,6 +541,7 @@ Object.defineProperties(URL.prototype, { } if (hash[0] === '#') hash = hash.slice(1); ctx.fragment = ''; + ctx.flags |= binding.URL_FLAGS_HAS_FRAGMENT; binding.parse(hash, binding.kFragment, null, ctx, onParseHashComplete.bind(this)); } @@ -1384,10 +1373,10 @@ function constructUrl(flags, protocol, username, password, var ctx = new URLContext(); ctx.flags = flags; ctx.scheme = protocol; - ctx.username = username; - ctx.password = password; + ctx.username = (flags & binding.URL_FLAGS_HAS_USERNAME) !== 0 ? username : ''; + ctx.password = (flags & binding.URL_FLAGS_HAS_PASSWORD) !== 0 ? password : ''; ctx.port = port; - ctx.path = path; + ctx.path = (flags & binding.URL_FLAGS_HAS_PATH) !== 0 ? path : []; ctx.query = query; ctx.fragment = fragment; ctx.host = host; diff --git a/src/node_url.cc b/src/node_url.cc index 7df9461fdd..50a9380d37 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -59,10 +59,12 @@ static const char kEOL = -1; // Used in ToUSVString(). static const char16_t kUnicodeReplacementCharacter = 0xFFFD; +// https://url.spec.whatwg.org/#concept-host union url_host_value { std::string domain; uint32_t ipv4; uint16_t ipv6[8]; + std::string opaque; ~url_host_value() {} }; @@ -70,7 +72,8 @@ enum url_host_type { HOST_TYPE_FAILED = -1, HOST_TYPE_DOMAIN = 0, HOST_TYPE_IPV4 = 1, - HOST_TYPE_IPV6 = 2 + HOST_TYPE_IPV6 = 2, + HOST_TYPE_OPAQUE = 3, }; struct url_host { @@ -151,6 +154,13 @@ static inline T ASCIILowercase(T ch) { return IsASCIIAlpha(ch) ? (ch | 0x20) : ch; } +// https://url.spec.whatwg.org/#forbidden-host-code-point +CHAR_TEST(8, IsForbiddenHostCodePoint, + ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' || + ch == ' ' || ch == '#' || ch == '%' || ch == '/' || + ch == ':' || ch == '?' || ch == '@' || ch == '[' || + ch == '\\' || ch == ']') + // https://url.spec.whatwg.org/#windows-drive-letter TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter, (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|'))) @@ -206,7 +216,7 @@ static const char* hex[256] = { "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF" }; -static const uint8_t SIMPLE_ENCODE_SET[32] = { +static const uint8_t C0_CONTROL_ENCODE_SET[32] = { // 00 01 02 03 04 05 06 07 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, // 08 09 0A 0B 0C 0D 0E 0F @@ -273,7 +283,7 @@ static const uint8_t SIMPLE_ENCODE_SET[32] = { 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 }; -static const uint8_t DEFAULT_ENCODE_SET[32] = { +static const uint8_t PATH_ENCODE_SET[32] = { // 00 01 02 03 04 05 06 07 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, // 08 09 0A 0B 0C 0D 0E 0F @@ -756,8 +766,8 @@ static url_host_type ParseIPv4Host(url_host* host, if (ch == '.' || ch == kEOL) { if (++parts > 4) goto end; - if (pointer - mark == 0) - break; + if (pointer == mark) + goto end; int64_t n = ParseNumber(mark, pointer); if (n < 0) goto end; @@ -797,9 +807,32 @@ static url_host_type ParseIPv4Host(url_host* host, return type; } +static url_host_type ParseOpaqueHost(url_host* host, + const char* input, + size_t length) { + url_host_type type = HOST_TYPE_OPAQUE; + std::string output; + output.reserve(length * 3); + for (size_t i = 0; i < length; i++) { + const char ch = input[i]; + if (ch != '%' && IsForbiddenHostCodePoint(ch)) { + type = HOST_TYPE_FAILED; + goto end; + } else { + AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET); + } + } + + host->value.opaque = output; + end: + host->type = type; + return type; +} + static url_host_type ParseHost(url_host* host, const char* input, size_t length, + bool is_special, bool unicode = false) { url_host_type type = HOST_TYPE_FAILED; const char* pointer = input; @@ -814,6 +847,9 @@ static url_host_type ParseHost(url_host* host, return ParseIPv6Host(host, ++pointer, length - 2); } + if (!is_special) + return ParseOpaqueHost(host, input, length); + // First, we have to percent decode PercentDecode(input, length, &decoded); @@ -824,10 +860,7 @@ static url_host_type ParseHost(url_host* host, // If any of the following characters are still present, we have to fail for (size_t n = 0; n < decoded.size(); n++) { const char ch = decoded[n]; - if (ch == 0x00 || ch == 0x09 || ch == 0x0a || ch == 0x0d || - ch == 0x20 || ch == '#' || ch == '%' || ch == '/' || - ch == '?' || ch == '@' || ch == '[' || ch == '\\' || - ch == ']') { + if (IsForbiddenHostCodePoint(ch)) { goto end; } } @@ -907,14 +940,17 @@ static url_host_type WriteHost(url_host* host, std::string* dest) { uint16_t* start = &host->value.ipv6[0]; uint16_t* compress_pointer = FindLongestZeroSequence(start, 8); + bool ignore0 = false; for (int n = 0; n <= 7; n++) { uint16_t* piece = &host->value.ipv6[n]; + if (ignore0 && *piece == 0) + continue; + else if (ignore0) + ignore0 = false; if (compress_pointer == piece) { *dest += n == 0 ? "::" : ":"; - while (*piece == 0 && ++n < 8) - piece = &host->value.ipv6[n]; - if (n == 8) - break; + ignore0 = true; + continue; } char buf[5]; char* buffer = buf; @@ -926,6 +962,9 @@ static url_host_type WriteHost(url_host* host, std::string* dest) { *dest += ']'; break; } + case HOST_TYPE_OPAQUE: + *dest = host->value.opaque; + break; case HOST_TYPE_FAILED: break; } @@ -934,11 +973,14 @@ static url_host_type WriteHost(url_host* host, std::string* dest) { static bool ParseHost(std::string* input, std::string* output, + bool is_special, bool unicode = false) { - if (input->length() == 0) + if (input->length() == 0) { + output->clear(); return true; + } url_host host{{""}, HOST_TYPE_DOMAIN}; - ParseHost(&host, input->c_str(), input->length(), unicode); + ParseHost(&host, input->c_str(), input->length(), is_special, unicode); if (host.type == HOST_TYPE_FAILED) return false; WriteHost(&host, output); @@ -1006,6 +1048,12 @@ static inline void HarvestContext(Environment* env, context->flags |= URL_FLAGS_SPECIAL; if (_flags & URL_FLAGS_CANNOT_BE_BASE) context->flags |= URL_FLAGS_CANNOT_BE_BASE; + if (_flags & URL_FLAGS_HAS_USERNAME) + context->flags |= URL_FLAGS_HAS_USERNAME; + if (_flags & URL_FLAGS_HAS_PASSWORD) + context->flags |= URL_FLAGS_HAS_PASSWORD; + if (_flags & URL_FLAGS_HAS_HOST) + context->flags |= URL_FLAGS_HAS_HOST; } Local scheme = GET(env, context_obj, "scheme"); if (scheme->IsString()) { @@ -1015,6 +1063,23 @@ static inline void HarvestContext(Environment* env, Local port = GET(env, context_obj, "port"); if (port->IsInt32()) context->port = port->Int32Value(env->context()).FromJust(); + if (context->flags & URL_FLAGS_HAS_USERNAME) { + Local username = GET(env, context_obj, "username"); + CHECK(username->IsString()); + Utf8Value value(env->isolate(), username); + context->username.assign(*value, value.length()); + } + if (context->flags & URL_FLAGS_HAS_PASSWORD) { + Local password = GET(env, context_obj, "password"); + CHECK(password->IsString()); + Utf8Value value(env->isolate(), password); + context->password.assign(*value, value.length()); + } + Local host = GET(env, context_obj, "host"); + if (host->IsString()) { + Utf8Value value(env->isolate(), host); + context->host.assign(*value, value.length()); + } } // Single dot segment can be ".", "%2e", or "%2E" @@ -1077,7 +1142,6 @@ void URL::Parse(const char* input, bool atflag = false; bool sbflag = false; bool uflag = false; - bool base_is_file = false; int wskip = 0; std::string buffer; @@ -1137,25 +1201,40 @@ void URL::Parse(const char* input, case kScheme: if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') { buffer += ASCIILowercase(ch); - p++; - continue; } else if (ch == ':' || (has_state_override && ch == kEOL)) { - if (buffer.size() > 0) { - buffer += ':'; - url->scheme = buffer; - } else if (has_state_override) { + if (has_state_override && buffer.size() == 0) { url->flags |= URL_FLAGS_TERMINATED; return; } - if (IsSpecial(url->scheme)) { + buffer += ':'; + + bool new_is_special = IsSpecial(buffer); + + if (has_state_override) { + if ((special != new_is_special) || + ((buffer == "file:") && + ((url->flags & URL_FLAGS_HAS_USERNAME) || + (url->flags & URL_FLAGS_HAS_PASSWORD) || + (url->port != -1)))) { + url->flags |= URL_FLAGS_TERMINATED; + return; + } + + // File scheme && (host == empty or null) check left to JS-land + // as it can be done before even entering C++ binding. + } + + url->scheme = buffer; + if (new_is_special) { url->flags |= URL_FLAGS_SPECIAL; special = true; } else { url->flags &= ~URL_FLAGS_SPECIAL; + special = false; } + buffer.clear(); if (has_state_override) return; - buffer.clear(); if (url->scheme == "file:") { state = kFile; } else if (special && @@ -1195,6 +1274,7 @@ void URL::Parse(const char* input, special = true; } else { url->flags &= ~URL_FLAGS_SPECIAL; + special = false; } if (base->flags & URL_FLAGS_HAS_PATH) { url->flags |= URL_FLAGS_HAS_PATH; @@ -1246,6 +1326,7 @@ void URL::Parse(const char* input, special = true; } else { url->flags &= ~URL_FLAGS_SPECIAL; + special = false; } switch (ch) { case kEOL: @@ -1414,6 +1495,10 @@ void URL::Parse(const char* input, ch == '?' || ch == '#' || special_back_slash) { + if (atflag && buffer.size() == 0) { + url->flags |= URL_FLAGS_FAILED; + return; + } p -= buffer.size() + 1 + wskip; buffer.clear(); state = kHost; @@ -1423,13 +1508,16 @@ void URL::Parse(const char* input, break; case kHost: case kHostname: - if (ch == ':' && !sbflag) { - if (special && buffer.size() == 0) { + if (has_state_override && url->scheme == "file:") { + state = kFileHost; + continue; + } else if (ch == ':' && !sbflag) { + if (buffer.size() == 0) { url->flags |= URL_FLAGS_FAILED; return; } url->flags |= URL_FLAGS_HAS_HOST; - if (!ParseHost(&buffer, &url->host)) { + if (!ParseHost(&buffer, &url->host, special)) { url->flags |= URL_FLAGS_FAILED; return; } @@ -1448,8 +1536,15 @@ void URL::Parse(const char* input, url->flags |= URL_FLAGS_FAILED; return; } + if (has_state_override && + buffer.size() == 0 && + ((url->username.size() > 0 || url->password.size() > 0) || + url->port != -1)) { + url->flags |= URL_FLAGS_TERMINATED; + return; + } url->flags |= URL_FLAGS_HAS_HOST; - if (!ParseHost(&buffer, &url->host)) { + if (!ParseHost(&buffer, &url->host, special)) { url->flags |= URL_FLAGS_FAILED; return; } @@ -1463,7 +1558,7 @@ void URL::Parse(const char* input, sbflag = true; if (ch == ']') sbflag = false; - buffer += ASCIILowercase(ch); + buffer += ch; } break; case kPort: @@ -1508,12 +1603,12 @@ void URL::Parse(const char* input, } break; case kFile: - base_is_file = ( - has_base && - base->scheme == "file:"); - switch (ch) { - case kEOL: - if (base_is_file) { + url->scheme = "file:"; + if (ch == '/' || ch == '\\') { + state = kFileSlash; + } else if (has_base && base->scheme == "file:") { + switch (ch) { + case kEOL: if (base->flags & URL_FLAGS_HAS_HOST) { url->flags |= URL_FLAGS_HAS_HOST; url->host = base->host; @@ -1527,15 +1622,7 @@ void URL::Parse(const char* input, url->query = base->query; } break; - } - state = kPath; - continue; - case '\\': - case '/': - state = kFileSlash; - break; - case '?': - if (base_is_file) { + case '?': if (base->flags & URL_FLAGS_HAS_HOST) { url->flags |= URL_FLAGS_HAS_HOST; url->host = base->host; @@ -1545,11 +1632,10 @@ void URL::Parse(const char* input, url->path = base->path; } url->flags |= URL_FLAGS_HAS_QUERY; + url->query.clear(); state = kQuery; break; - } - case '#': - if (base_is_file) { + case '#': if (base->flags & URL_FLAGS_HAS_HOST) { url->flags |= URL_FLAGS_HAS_HOST; url->host = base->host; @@ -1562,29 +1648,33 @@ void URL::Parse(const char* input, url->flags |= URL_FLAGS_HAS_QUERY; url->query = base->query; } + url->flags |= URL_FLAGS_HAS_FRAGMENT; + url->fragment.clear(); state = kFragment; break; - } - default: - if (base_is_file && - (!IsWindowsDriveLetter(ch, p[1]) || - end - p == 1 || - (p[2] != '/' && - p[2] != '\\' && - p[2] != '?' && - p[2] != '#'))) { - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; + default: + if ((!IsWindowsDriveLetter(ch, p[1]) || + end - p == 1 || + (p[2] != '/' && + p[2] != '\\' && + p[2] != '?' && + p[2] != '#'))) { + if (base->flags & URL_FLAGS_HAS_HOST) { + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; + } + if (base->flags & URL_FLAGS_HAS_PATH) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path = base->path; + } + ShortenUrlPath(url); } - ShortenUrlPath(url); - } - state = kPath; - continue; + state = kPath; + continue; + } + } else { + state = kPath; + continue; } break; case kFileSlash: @@ -1597,8 +1687,13 @@ void URL::Parse(const char* input, url->flags |= URL_FLAGS_HAS_PATH; url->path.push_back(base->path[0]); } else { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; + if (base->flags & URL_FLAGS_HAS_HOST) { + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; + } else { + url->flags &= ~URL_FLAGS_HAS_HOST; + url->host.clear(); + } } } state = kPath; @@ -1611,19 +1706,28 @@ void URL::Parse(const char* input, ch == '\\' || ch == '?' || ch == '#') { - if (buffer.size() == 2 && + if (!has_state_override && + buffer.size() == 2 && IsWindowsDriveLetter(buffer)) { state = kPath; } else if (buffer.size() == 0) { + url->flags |= URL_FLAGS_HAS_HOST; + url->host.clear(); + if (has_state_override) + return; state = kPathStart; } else { - if (buffer != "localhost") { - url->flags |= URL_FLAGS_HAS_HOST; - if (!ParseHost(&buffer, &url->host)) { - url->flags |= URL_FLAGS_FAILED; - return; - } + std::string host; + if (!ParseHost(&buffer, &host, special)) { + url->flags |= URL_FLAGS_FAILED; + return; } + if (host == "localhost") + host.clear(); + url->flags |= URL_FLAGS_HAS_HOST; + url->host = host; + if (has_state_override) + return; buffer.clear(); state = kPathStart; } @@ -1664,17 +1768,20 @@ void URL::Parse(const char* input, url->flags |= URL_FLAGS_HAS_PATH; url->path.push_back(""); } - } else if (IsSingleDotSegment(buffer)) { - if (ch != '/' && !special_back_slash) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path.push_back(""); - } + } else if (IsSingleDotSegment(buffer) && + ch != '/' && !special_back_slash) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path.push_back(""); } else if (!IsSingleDotSegment(buffer)) { if (url->scheme == "file:" && url->path.empty() && buffer.size() == 2 && IsWindowsDriveLetter(buffer)) { - url->flags &= ~URL_FLAGS_HAS_HOST; + if ((url->flags & URL_FLAGS_HAS_HOST) && + !url->host.empty()) { + url->host.clear(); + url->flags |= URL_FLAGS_HAS_HOST; + } buffer[1] = ':'; } url->flags |= URL_FLAGS_HAS_PATH; @@ -1697,7 +1804,7 @@ void URL::Parse(const char* input, state = kFragment; } } else { - AppendOrEscape(&buffer, ch, DEFAULT_ENCODE_SET); + AppendOrEscape(&buffer, ch, PATH_ENCODE_SET); } break; case kCannotBeBase: @@ -1712,7 +1819,7 @@ void URL::Parse(const char* input, if (url->path.size() == 0) url->path.push_back(""); if (url->path.size() > 0 && ch != kEOL) - AppendOrEscape(&url->path[0], ch, SIMPLE_ENCODE_SET); + AppendOrEscape(&url->path[0], ch, C0_CONTROL_ENCODE_SET); } break; case kQuery: @@ -1735,7 +1842,7 @@ void URL::Parse(const char* input, case 0: break; default: - AppendOrEscape(&buffer, ch, SIMPLE_ENCODE_SET); + AppendOrEscape(&buffer, ch, C0_CONTROL_ENCODE_SET); } break; default: @@ -1800,17 +1907,18 @@ static void Parse(Environment* env, // Define the return value placeholders const Local undef = Undefined(isolate); + const Local null = Null(isolate); if (!(url.flags & URL_FLAGS_FAILED)) { Local argv[9] = { undef, undef, undef, undef, + null, // host defaults to null + null, // port defaults to null undef, - undef, - undef, - undef, - undef, + null, // query defaults to null + null, // fragment defaults to null }; SetArgs(env, argv, &url); (void)cb->Call(context, recv, arraysize(argv), argv); @@ -1914,7 +2022,8 @@ static void DomainToASCII(const FunctionCallbackInfo& args) { Utf8Value value(env->isolate(), args[0]); url_host host{{""}, HOST_TYPE_DOMAIN}; - ParseHost(&host, *value, value.length()); + // Assuming the host is used for a special scheme. + ParseHost(&host, *value, value.length(), true); if (host.type == HOST_TYPE_FAILED) { args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); return; @@ -1934,7 +2043,8 @@ static void DomainToUnicode(const FunctionCallbackInfo& args) { Utf8Value value(env->isolate(), args[0]); url_host host{{""}, HOST_TYPE_DOMAIN}; - ParseHost(&host, *value, value.length(), true); + // Assuming the host is used for a special scheme. + ParseHost(&host, *value, value.length(), true, true); if (host.type == HOST_TYPE_FAILED) { args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); return; @@ -1957,6 +2067,7 @@ const Local URL::ToObject(Environment* env) const { Context::Scope context_scope(context); const Local undef = Undefined(isolate); + const Local null = Null(isolate); if (context_.flags & URL_FLAGS_FAILED) return Local(); @@ -1966,11 +2077,11 @@ const Local URL::ToObject(Environment* env) const { undef, undef, undef, + null, // host defaults to null + null, // port defaults to null undef, - undef, - undef, - undef, - undef, + null, // query defaults to null + null, // fragment defaults to null }; SetArgs(env, argv, &context_); diff --git a/test/fixtures/url-setter-tests.js b/test/fixtures/url-setter-tests.js index f537075674..42580312c2 100644 --- a/test/fixtures/url-setter-tests.js +++ b/test/fixtures/url-setter-tests.js @@ -370,14 +370,14 @@ module.exports = "username": "wario" } }, - // { - // "href": "file://test/", - // "new_value": "test", - // "expected": { - // "href": "file://test/", - // "username": "" - // } - // } + { + "href": "file://test/", + "new_value": "test", + "expected": { + "href": "file://test/", + "username": "" + } + } ], "password": [ { @@ -473,14 +473,14 @@ module.exports = "password": "bowser" } }, - // { - // "href": "file://test/", - // "new_value": "test", - // "expected": { - // "href": "file://test/", - // "password": "" - // } - // } + { + "href": "file://test/", + "new_value": "test", + "expected": { + "href": "file://test/", + "password": "" + } + } ], "host": [ { @@ -493,33 +493,33 @@ module.exports = "hostname": "x" } }, - // { - // "href": "sc://x/", - // "new_value": "\u0009", - // "expected": { - // "href": "sc:///", - // "host": "", - // "hostname": "" - // } - // }, - // { - // "href": "sc://x/", - // "new_value": "\u000A", - // "expected": { - // "href": "sc:///", - // "host": "", - // "hostname": "" - // } - // }, - // { - // "href": "sc://x/", - // "new_value": "\u000D", - // "expected": { - // "href": "sc:///", - // "host": "", - // "hostname": "" - // } - // }, + { + "href": "sc://x/", + "new_value": "\u0009", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "\u000A", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "\u000D", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, { "href": "sc://x/", "new_value": " ", @@ -529,33 +529,33 @@ module.exports = "hostname": "x" } }, - // { - // "href": "sc://x/", - // "new_value": "#", - // "expected": { - // "href": "sc:///", - // "host": "", - // "hostname": "" - // } - // }, - // { - // "href": "sc://x/", - // "new_value": "/", - // "expected": { - // "href": "sc:///", - // "host": "", - // "hostname": "" - // } - // }, - // { - // "href": "sc://x/", - // "new_value": "?", - // "expected": { - // "href": "sc:///", - // "host": "", - // "hostname": "" - // } - // }, + { + "href": "sc://x/", + "new_value": "#", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "/", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "?", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, { "href": "sc://x/", "new_value": "@", @@ -565,15 +565,15 @@ module.exports = "hostname": "x" } }, - // { - // "href": "sc://x/", - // "new_value": "ß", - // "expected": { - // "href": "sc://%C3%9F/", - // "host": "%C3%9F", - // "hostname": "%C3%9F" - // } - // }, + { + "href": "sc://x/", + "new_value": "ß", + "expected": { + "href": "sc://%C3%9F/", + "host": "%C3%9F", + "hostname": "%C3%9F" + } + }, { "comment": "IDNA Nontransitional_Processing", "href": "https://x/", @@ -916,56 +916,56 @@ module.exports = "hostname": "example.net" } }, - // { - // "href": "file://y/", - // "new_value": "x:123", - // "expected": { - // "href": "file://y/", - // "host": "y", - // "hostname": "y", - // "port": "" - // } - // }, - // { - // "href": "file://y/", - // "new_value": "loc%41lhost", - // "expected": { - // "href": "file:///", - // "host": "", - // "hostname": "", - // "port": "" - // } - // }, - // { - // "href": "file://hi/x", - // "new_value": "", - // "expected": { - // "href": "file:///x", - // "host": "", - // "hostname": "", - // "port": "" - // } - // }, - // { - // "href": "sc://test@test/", - // "new_value": "", - // "expected": { - // "href": "sc://test@test/", - // "host": "test", - // "hostname": "test", - // "username": "test" - // } - // }, - // { - // "href": "sc://test:12/", - // "new_value": "", - // "expected": { - // "href": "sc://test:12/", - // "host": "test:12", - // "hostname": "test", - // "port": "12" - // } - // } + { + "href": "file://y/", + "new_value": "x:123", + "expected": { + "href": "file://y/", + "host": "y", + "hostname": "y", + "port": "" + } + }, + { + "href": "file://y/", + "new_value": "loc%41lhost", + "expected": { + "href": "file:///", + "host": "", + "hostname": "", + "port": "" + } + }, + { + "href": "file://hi/x", + "new_value": "", + "expected": { + "href": "file:///x", + "host": "", + "hostname": "", + "port": "" + } + }, + { + "href": "sc://test@test/", + "new_value": "", + "expected": { + "href": "sc://test@test/", + "host": "test", + "hostname": "test", + "username": "test" + } + }, + { + "href": "sc://test:12/", + "new_value": "", + "expected": { + "href": "sc://test:12/", + "host": "test:12", + "hostname": "test", + "port": "12" + } + } ], "hostname": [ { @@ -978,33 +978,33 @@ module.exports = "hostname": "x" } }, - // { - // "href": "sc://x/", - // "new_value": "\u0009", - // "expected": { - // "href": "sc:///", - // "host": "", - // "hostname": "" - // } - // }, - // { - // "href": "sc://x/", - // "new_value": "\u000A", - // "expected": { - // "href": "sc:///", - // "host": "", - // "hostname": "" - // } - // }, - // { - // "href": "sc://x/", - // "new_value": "\u000D", - // "expected": { - // "href": "sc:///", - // "host": "", - // "hostname": "" - // } - // }, + { + "href": "sc://x/", + "new_value": "\u0009", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "\u000A", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "\u000D", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, { "href": "sc://x/", "new_value": " ", @@ -1014,33 +1014,33 @@ module.exports = "hostname": "x" } }, - // { - // "href": "sc://x/", - // "new_value": "#", - // "expected": { - // "href": "sc:///", - // "host": "", - // "hostname": "" - // } - // }, - // { - // "href": "sc://x/", - // "new_value": "/", - // "expected": { - // "href": "sc:///", - // "host": "", - // "hostname": "" - // } - // }, - // { - // "href": "sc://x/", - // "new_value": "?", - // "expected": { - // "href": "sc:///", - // "host": "", - // "hostname": "" - // } - // }, + { + "href": "sc://x/", + "new_value": "#", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "/", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, + { + "href": "sc://x/", + "new_value": "?", + "expected": { + "href": "sc:///", + "host": "", + "hostname": "" + } + }, { "href": "sc://x/", "new_value": "@", @@ -1250,56 +1250,56 @@ module.exports = "hostname": "example.net" } }, - // { - // "href": "file://y/", - // "new_value": "x:123", - // "expected": { - // "href": "file://y/", - // "host": "y", - // "hostname": "y", - // "port": "" - // } - // }, - // { - // "href": "file://y/", - // "new_value": "loc%41lhost", - // "expected": { - // "href": "file:///", - // "host": "", - // "hostname": "", - // "port": "" - // } - // }, - // { - // "href": "file://hi/x", - // "new_value": "", - // "expected": { - // "href": "file:///x", - // "host": "", - // "hostname": "", - // "port": "" - // } - // }, - // { - // "href": "sc://test@test/", - // "new_value": "", - // "expected": { - // "href": "sc://test@test/", - // "host": "test", - // "hostname": "test", - // "username": "test" - // } - // }, - // { - // "href": "sc://test:12/", - // "new_value": "", - // "expected": { - // "href": "sc://test:12/", - // "host": "test:12", - // "hostname": "test", - // "port": "12" - // } - // } + { + "href": "file://y/", + "new_value": "x:123", + "expected": { + "href": "file://y/", + "host": "y", + "hostname": "y", + "port": "" + } + }, + { + "href": "file://y/", + "new_value": "loc%41lhost", + "expected": { + "href": "file:///", + "host": "", + "hostname": "", + "port": "" + } + }, + { + "href": "file://hi/x", + "new_value": "", + "expected": { + "href": "file:///x", + "host": "", + "hostname": "", + "port": "" + } + }, + { + "href": "sc://test@test/", + "new_value": "", + "expected": { + "href": "sc://test@test/", + "host": "test", + "hostname": "test", + "username": "test" + } + }, + { + "href": "sc://test:12/", + "new_value": "", + "expected": { + "href": "sc://test:12/", + "host": "test:12", + "hostname": "test", + "port": "12" + } + } ], "port": [ { diff --git a/test/fixtures/url-tests.js b/test/fixtures/url-tests.js index d44a36bcfe..69d289bd06 100644 --- a/test/fixtures/url-tests.js +++ b/test/fixtures/url-tests.js @@ -1045,16 +1045,16 @@ module.exports = "search": "", "hash": "" }, - // { - // "input": "file://example:1/", - // "base": "about:blank", - // "failure": true - // }, - // { - // "input": "file://example:test/", - // "base": "about:blank", - // "failure": true - // }, + { + "input": "file://example:1/", + "base": "about:blank", + "failure": true + }, + { + "input": "file://example:test/", + "base": "about:blank", + "failure": true + }, { "input": "file://example%/", "base": "about:blank", @@ -3674,35 +3674,35 @@ module.exports = "search": "", "hash": "" }, - // { - // "input": "https://faß.ExAmPlE/", - // "base": "about:blank", - // "href": "https://xn--fa-hia.example/", - // "origin": "https://faß.example", - // "protocol": "https:", - // "username": "", - // "password": "", - // "host": "xn--fa-hia.example", - // "hostname": "xn--fa-hia.example", - // "port": "", - // "pathname": "/", - // "search": "", - // "hash": "" - // }, - // { - // "input": "sc://faß.ExAmPlE/", - // "base": "about:blank", - // "href": "sc://fa%C3%9F.ExAmPlE/", - // "protocol": "sc:", - // "username": "", - // "password": "", - // "host": "fa%C3%9F.ExAmPlE", - // "hostname": "fa%C3%9F.ExAmPlE", - // "port": "", - // "pathname": "/", - // "search": "", - // "hash": "" - // }, + { + "input": "https://faß.ExAmPlE/", + "base": "about:blank", + "href": "https://xn--fa-hia.example/", + "origin": "https://faß.example", + "protocol": "https:", + "username": "", + "password": "", + "host": "xn--fa-hia.example", + "hostname": "xn--fa-hia.example", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "sc://faß.ExAmPlE/", + "base": "about:blank", + "href": "sc://fa%C3%9F.ExAmPlE/", + "protocol": "sc:", + "username": "", + "password": "", + "host": "fa%C3%9F.ExAmPlE", + "hostname": "fa%C3%9F.ExAmPlE", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, "Invalid escaped characters should fail and the percents should be escaped. https://www.w3.org/Bugs/Public/show_bug.cgi?id=24191", { "input": "http://%zz%66%a.com", @@ -4362,37 +4362,37 @@ module.exports = "search": "", "hash": "" }, - // "# unknown schemes and their hosts", - // { - // "input": "sc://ñ.test/", - // "base": "about:blank", - // "href": "sc://%C3%B1.test/", - // "origin": "null", - // "protocol": "sc:", - // "username": "", - // "password": "", - // "host": "%C3%B1.test", - // "hostname": "%C3%B1.test", - // "port": "", - // "pathname": "/", - // "search": "", - // "hash": "" - // }, - // { - // "input": "sc://\u001F!\"$&'()*+,-.;<=>^_`{|}~/", - // "base": "about:blank", - // "href": "sc://%1F!\"$&'()*+,-.;<=>^_`{|}~/", - // "origin": "null", - // "protocol": "sc:", - // "username": "", - // "password": "", - // "host": "%1F!\"$&'()*+,-.;<=>^_`{|}~", - // "hostname": "%1F!\"$&'()*+,-.;<=>^_`{|}~", - // "port": "", - // "pathname": "/", - // "search": "", - // "hash": "" - // }, + "# unknown schemes and their hosts", + { + "input": "sc://ñ.test/", + "base": "about:blank", + "href": "sc://%C3%B1.test/", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1.test", + "hostname": "%C3%B1.test", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "sc://\u001F!\"$&'()*+,-.;<=>^_`{|}~/", + "base": "about:blank", + "href": "sc://%1F!\"$&'()*+,-.;<=>^_`{|}~/", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%1F!\"$&'()*+,-.;<=>^_`{|}~", + "hostname": "%1F!\"$&'()*+,-.;<=>^_`{|}~", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, { "input": "sc://\u0000/", "base": "about:blank", @@ -4403,40 +4403,40 @@ module.exports = "base": "about:blank", "failure": true }, - // { - // "input": "sc://%/", - // "base": "about:blank", - // "href": "sc://%/", - // "protocol": "sc:", - // "username": "", - // "password": "", - // "host": "%", - // "hostname": "%", - // "port": "", - // "pathname": "/", - // "search": "", - // "hash": "" - // }, - // { - // "input": "sc://@/", - // "base": "about:blank", - // "failure": true - // }, - // { - // "input": "sc://te@s:t@/", - // "base": "about:blank", - // "failure": true - // }, - // { - // "input": "sc://:/", - // "base": "about:blank", - // "failure": true - // }, - // { - // "input": "sc://:12/", - // "base": "about:blank", - // "failure": true - // }, + { + "input": "sc://%/", + "base": "about:blank", + "href": "sc://%/", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%", + "hostname": "%", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "sc://@/", + "base": "about:blank", + "failure": true + }, + { + "input": "sc://te@s:t@/", + "base": "about:blank", + "failure": true + }, + { + "input": "sc://:/", + "base": "about:blank", + "failure": true + }, + { + "input": "sc://:12/", + "base": "about:blank", + "failure": true + }, { "input": "sc://[/", "base": "about:blank", @@ -4452,21 +4452,21 @@ module.exports = "base": "about:blank", "failure": true }, - // { - // "input": "x", - // "base": "sc://ñ", - // "href": "sc://%C3%B1/x", - // "origin": "null", - // "protocol": "sc:", - // "username": "", - // "password": "", - // "host": "%C3%B1", - // "hostname": "%C3%B1", - // "port": "", - // "pathname": "/x", - // "search": "", - // "hash": "" - // }, + { + "input": "x", + "base": "sc://ñ", + "href": "sc://%C3%B1/x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "/x", + "search": "", + "hash": "" + }, "# unknown schemes and backslashes", { "input": "sc:\\../", @@ -5327,20 +5327,20 @@ module.exports = "search": "", "hash": "" }, - // { - // "input": "/..//localhost//pig", - // "base": "file://lion/", - // "href": "file://lion/localhost//pig", - // "protocol": "file:", - // "username": "", - // "password": "", - // "host": "lion", - // "hostname": "lion", - // "port": "", - // "pathname": "/localhost//pig", - // "search": "", - // "hash": "" - // }, + { + "input": "/..//localhost//pig", + "base": "file://lion/", + "href": "file://lion/localhost//pig", + "protocol": "file:", + "username": "", + "password": "", + "host": "lion", + "hostname": "lion", + "port": "", + "pathname": "/localhost//pig", + "search": "", + "hash": "" + }, { "input": "file://", "base": "file://ape/", @@ -5356,34 +5356,34 @@ module.exports = "hash": "" }, "# File URLs with non-empty hosts", - // { - // "input": "/rooibos", - // "base": "file://tea/", - // "href": "file://tea/rooibos", - // "protocol": "file:", - // "username": "", - // "password": "", - // "host": "tea", - // "hostname": "tea", - // "port": "", - // "pathname": "/rooibos", - // "search": "", - // "hash": "" - // }, - // { - // "input": "/?chai", - // "base": "file://tea/", - // "href": "file://tea/?chai", - // "protocol": "file:", - // "username": "", - // "password": "", - // "host": "tea", - // "hostname": "tea", - // "port": "", - // "pathname": "/", - // "search": "?chai", - // "hash": "" - // }, + { + "input": "/rooibos", + "base": "file://tea/", + "href": "file://tea/rooibos", + "protocol": "file:", + "username": "", + "password": "", + "host": "tea", + "hostname": "tea", + "port": "", + "pathname": "/rooibos", + "search": "", + "hash": "" + }, + { + "input": "/?chai", + "base": "file://tea/", + "href": "file://tea/?chai", + "protocol": "file:", + "username": "", + "password": "", + "host": "tea", + "hostname": "tea", + "port": "", + "pathname": "/", + "search": "?chai", + "hash": "" + }, "# Windows drive letter quirk with not empty host", { "input": "file://example.net/C:/", @@ -5567,109 +5567,109 @@ module.exports = "failure": true }, "# Non-special-URL path tests", - // { - // "input": "sc://ñ", - // "base": "about:blank", - // "href": "sc://%C3%B1", - // "origin": "null", - // "protocol": "sc:", - // "username": "", - // "password": "", - // "host": "%C3%B1", - // "hostname": "%C3%B1", - // "port": "", - // "pathname": "", - // "search": "", - // "hash": "" - // }, - // { - // "input": "sc://ñ?x", - // "base": "about:blank", - // "href": "sc://%C3%B1?x", - // "origin": "null", - // "protocol": "sc:", - // "username": "", - // "password": "", - // "host": "%C3%B1", - // "hostname": "%C3%B1", - // "port": "", - // "pathname": "", - // "search": "?x", - // "hash": "" - // }, - // { - // "input": "sc://ñ#x", - // "base": "about:blank", - // "href": "sc://%C3%B1#x", - // "origin": "null", - // "protocol": "sc:", - // "username": "", - // "password": "", - // "host": "%C3%B1", - // "hostname": "%C3%B1", - // "port": "", - // "pathname": "", - // "search": "", - // "hash": "#x" - // }, - // { - // "input": "#x", - // "base": "sc://ñ", - // "href": "sc://%C3%B1#x", - // "origin": "null", - // "protocol": "sc:", - // "username": "", - // "password": "", - // "host": "%C3%B1", - // "hostname": "%C3%B1", - // "port": "", - // "pathname": "", - // "search": "", - // "hash": "#x" - // }, - // { - // "input": "?x", - // "base": "sc://ñ", - // "href": "sc://%C3%B1?x", - // "origin": "null", - // "protocol": "sc:", - // "username": "", - // "password": "", - // "host": "%C3%B1", - // "hostname": "%C3%B1", - // "port": "", - // "pathname": "", - // "search": "?x", - // "hash": "" - // }, - // { - // "input": "sc://?", - // "base": "about:blank", - // "href": "sc://?", - // "protocol": "sc:", - // "username": "", - // "password": "", - // "host": "", - // "hostname": "", - // "port": "", - // "pathname": "", - // "search": "", - // "hash": "" - // }, - // { - // "input": "sc://#", - // "base": "about:blank", - // "href": "sc://#", - // "protocol": "sc:", - // "username": "", - // "password": "", - // "host": "", - // "hostname": "", - // "port": "", - // "pathname": "", - // "search": "", - // "hash": "" - // }, + { + "input": "sc://ñ", + "base": "about:blank", + "href": "sc://%C3%B1", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, + { + "input": "sc://ñ?x", + "base": "about:blank", + "href": "sc://%C3%B1?x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "?x", + "hash": "" + }, + { + "input": "sc://ñ#x", + "base": "about:blank", + "href": "sc://%C3%B1#x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "", + "hash": "#x" + }, + { + "input": "#x", + "base": "sc://ñ", + "href": "sc://%C3%B1#x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "", + "hash": "#x" + }, + { + "input": "?x", + "base": "sc://ñ", + "href": "sc://%C3%B1?x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "?x", + "hash": "" + }, + { + "input": "sc://?", + "base": "about:blank", + "href": "sc://?", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, + { + "input": "sc://#", + "base": "about:blank", + "href": "sc://#", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, { "input": "///", "base": "sc://x/", @@ -5893,34 +5893,34 @@ module.exports = "hash": "" }, "# percent encoded hosts in non-special-URLs", - // { - // "input": "non-special://%E2%80%A0/", - // "base": "about:blank", - // "href": "non-special://%E2%80%A0/", - // "protocol": "non-special:", - // "username": "", - // "password": "", - // "host": "%E2%80%A0", - // "hostname": "%E2%80%A0", - // "port": "", - // "pathname": "/", - // "search": "", - // "hash": "" - // }, - // { - // "input": "non-special://H%4fSt/path", - // "base": "about:blank", - // "href": "non-special://H%4fSt/path", - // "protocol": "non-special:", - // "username": "", - // "password": "", - // "host": "H%4fSt", - // "hostname": "H%4fSt", - // "port": "", - // "pathname": "/path", - // "search": "", - // "hash": "" - // }, + { + "input": "non-special://%E2%80%A0/", + "base": "about:blank", + "href": "non-special://%E2%80%A0/", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "%E2%80%A0", + "hostname": "%E2%80%A0", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "non-special://H%4fSt/path", + "base": "about:blank", + "href": "non-special://H%4fSt/path", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "H%4fSt", + "hostname": "H%4fSt", + "port": "", + "pathname": "/path", + "search": "", + "hash": "" + }, "# IPv6 in non-special-URLs", { "input": "non-special://[1:2:0:0:5:0:0:0]/",