Browse Source

src: WHATWG URL C++ parser cleanup

- Clarify port state
- Remove scheme flag
- Clarify URL_FLAG_TERMINATED

PR-URL: https://github.com/nodejs/node/pull/12507
Reviewed-By: James M Snell <jasnell@gmail.com>
v7.x
Timothy Gu 8 years ago
committed by Evan Lucas
parent
commit
078316c630
  1. 54
      src/node_url.cc
  2. 13
      src/node_url.h

54
src/node_url.cc

@ -494,7 +494,9 @@ namespace url {
if (flags->IsInt32()) if (flags->IsInt32())
base->flags = flags->Int32Value(context).FromJust(); base->flags = flags->Int32Value(context).FromJust();
GET_AND_SET(env, base_obj, scheme, base, URL_FLAGS_HAS_SCHEME); Local<Value> scheme = GET(env, base_obj, "scheme");
base->scheme = Utf8Value(env->isolate(), scheme).out();
GET_AND_SET(env, base_obj, username, base, URL_FLAGS_HAS_USERNAME); GET_AND_SET(env, base_obj, username, base, URL_FLAGS_HAS_USERNAME);
GET_AND_SET(env, base_obj, password, base, URL_FLAGS_HAS_PASSWORD); GET_AND_SET(env, base_obj, password, base, URL_FLAGS_HAS_PASSWORD);
GET_AND_SET(env, base_obj, host, base, URL_FLAGS_HAS_HOST); GET_AND_SET(env, base_obj, host, base, URL_FLAGS_HAS_HOST);
@ -644,7 +646,7 @@ namespace url {
state = kNoScheme; state = kNoScheme;
continue; continue;
} else { } else {
url->flags |= URL_FLAGS_TERMINATED; url->flags |= URL_FLAGS_FAILED;
return; return;
} }
break; break;
@ -654,10 +656,12 @@ namespace url {
p++; p++;
continue; continue;
} else if (ch == ':' || (has_state_override && ch == kEOL)) { } else if (ch == ':' || (has_state_override && ch == kEOL)) {
buffer += ':';
if (buffer.size() > 0) { if (buffer.size() > 0) {
url->flags |= URL_FLAGS_HAS_SCHEME; buffer += ':';
url->scheme = buffer; url->scheme = buffer;
} else if (has_state_override) {
url->flags |= URL_FLAGS_TERMINATED;
return;
} }
if (IsSpecial(url->scheme)) { if (IsSpecial(url->scheme)) {
url->flags |= URL_FLAGS_SPECIAL; url->flags |= URL_FLAGS_SPECIAL;
@ -672,7 +676,6 @@ namespace url {
state = kFile; state = kFile;
} else if (special && } else if (special &&
has_base && has_base &&
base->flags & URL_FLAGS_HAS_SCHEME &&
url->scheme == base->scheme) { url->scheme == base->scheme) {
state = kSpecialRelativeOrAuthority; state = kSpecialRelativeOrAuthority;
} else if (special) { } else if (special) {
@ -692,7 +695,7 @@ namespace url {
p = input; p = input;
continue; continue;
} else { } else {
url->flags |= URL_FLAGS_TERMINATED; url->flags |= URL_FLAGS_FAILED;
return; return;
} }
break; break;
@ -702,7 +705,6 @@ namespace url {
url->flags |= URL_FLAGS_FAILED; url->flags |= URL_FLAGS_FAILED;
return; return;
} else if (cannot_be_base && ch == '#') { } else if (cannot_be_base && ch == '#') {
url->flags |= URL_FLAGS_HAS_SCHEME;
url->scheme = base->scheme; url->scheme = base->scheme;
if (IsSpecial(url->scheme)) { if (IsSpecial(url->scheme)) {
url->flags |= URL_FLAGS_SPECIAL; url->flags |= URL_FLAGS_SPECIAL;
@ -725,12 +727,10 @@ namespace url {
url->flags |= URL_FLAGS_CANNOT_BE_BASE; url->flags |= URL_FLAGS_CANNOT_BE_BASE;
state = kFragment; state = kFragment;
} else if (has_base && } else if (has_base &&
base->flags & URL_FLAGS_HAS_SCHEME &&
base->scheme != "file:") { base->scheme != "file:") {
state = kRelative; state = kRelative;
continue; continue;
} else { } else {
url->flags |= URL_FLAGS_HAS_SCHEME;
url->scheme = "file:"; url->scheme = "file:";
url->flags |= URL_FLAGS_SPECIAL; url->flags |= URL_FLAGS_SPECIAL;
special = true; special = true;
@ -756,7 +756,6 @@ namespace url {
} }
break; break;
case kRelative: case kRelative:
url->flags |= URL_FLAGS_HAS_SCHEME;
url->scheme = base->scheme; url->scheme = base->scheme;
if (IsSpecial(url->scheme)) { if (IsSpecial(url->scheme)) {
url->flags |= URL_FLAGS_SPECIAL; url->flags |= URL_FLAGS_SPECIAL;
@ -951,7 +950,6 @@ namespace url {
buffer.clear(); buffer.clear();
state = kPort; state = kPort;
if (state_override == kHostname) { if (state_override == kHostname) {
url->flags |= URL_FLAGS_TERMINATED;
return; return;
} }
} else if (ch == kEOL || } else if (ch == kEOL ||
@ -972,7 +970,6 @@ namespace url {
buffer.clear(); buffer.clear();
state = kPathStart; state = kPathStart;
if (has_state_override) { if (has_state_override) {
url->flags |= URL_FLAGS_TERMINATED;
return; return;
} }
} else { } else {
@ -996,13 +993,26 @@ namespace url {
int port = 0; int port = 0;
for (size_t i = 0; i < buffer.size(); i++) for (size_t i = 0; i < buffer.size(); i++)
port = port * 10 + buffer[i] - '0'; port = port * 10 + buffer[i] - '0';
if (port >= 0 && port <= 0xffff) { if (port < 0 || port > 0xffff) {
url->port = NormalizePort(url->scheme, port); // TODO(TimothyGu): This hack is currently needed for the host
} else if (!has_state_override) { // setter since it needs access to hostname if it is valid, and
url->flags |= URL_FLAGS_FAILED; // if the FAILED flag is set the entire response to JS layer
// will be empty.
if (state_override == kHost)
url->port = -1;
else
url->flags |= URL_FLAGS_FAILED;
return; return;
} }
url->port = NormalizePort(url->scheme, port);
buffer.clear(); buffer.clear();
} else if (has_state_override) {
// TODO(TimothyGu): Similar case as above.
if (state_override == kHost)
url->port = -1;
else
url->flags |= URL_FLAGS_TERMINATED;
return;
} }
state = kPathStart; state = kPathStart;
continue; continue;
@ -1014,7 +1024,6 @@ namespace url {
case kFile: case kFile:
base_is_file = ( base_is_file = (
has_base && has_base &&
base->flags & URL_FLAGS_HAS_SCHEME &&
base->scheme == "file:"); base->scheme == "file:");
switch (ch) { switch (ch) {
case kEOL: case kEOL:
@ -1097,7 +1106,6 @@ namespace url {
state = kFileHost; state = kFileHost;
} else { } else {
if (has_base && if (has_base &&
base->flags & URL_FLAGS_HAS_SCHEME &&
base->scheme == "file:" && base->scheme == "file:" &&
base->flags & URL_FLAGS_HAS_PATH && base->flags & URL_FLAGS_HAS_PATH &&
base->path.size() > 0 && base->path.size() > 0 &&
@ -1158,8 +1166,7 @@ namespace url {
url->path.push_back(""); url->path.push_back("");
} }
} else { } else {
if (url->flags & URL_FLAGS_HAS_SCHEME && if (url->scheme == "file:" &&
url->scheme == "file:" &&
url->path.empty() && url->path.empty() &&
buffer.size() == 2 && buffer.size() == 2 &&
WINDOWS_DRIVE_LETTER(buffer[0], buffer[1])) { WINDOWS_DRIVE_LETTER(buffer[0], buffer[1])) {
@ -1233,8 +1240,7 @@ namespace url {
const struct url_data* url) { const struct url_data* url) {
Isolate* isolate = env->isolate(); Isolate* isolate = env->isolate();
argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url->flags); argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url->flags);
if (url->flags & URL_FLAGS_HAS_SCHEME) argv[ARG_PROTOCOL] = OneByteString(isolate, url->scheme.c_str());
argv[ARG_PROTOCOL] = OneByteString(isolate, url->scheme.c_str());
if (url->flags & URL_FLAGS_HAS_USERNAME) if (url->flags & URL_FLAGS_HAS_USERNAME)
argv[ARG_USERNAME] = UTF8STRING(isolate, url->username); argv[ARG_USERNAME] = UTF8STRING(isolate, url->username);
if (url->flags & URL_FLAGS_HAS_PASSWORD) if (url->flags & URL_FLAGS_HAS_PASSWORD)
@ -1275,7 +1281,9 @@ namespace url {
HarvestBase(env, &base, base_obj.As<Object>()); HarvestBase(env, &base, base_obj.As<Object>());
URL::Parse(input, len, state_override, &url, &base, has_base); URL::Parse(input, len, state_override, &url, &base, has_base);
if (url.flags & URL_FLAGS_INVALID_PARSE_STATE) if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
((state_override != kUnknownState) &&
(url.flags & URL_FLAGS_TERMINATED)))
return; return;
// Define the return value placeholders // Define the return value placeholders

13
src/node_url.h

@ -451,13 +451,12 @@ static inline void PercentDecode(const char* input,
XX(URL_FLAGS_INVALID_PARSE_STATE, 0x04) \ XX(URL_FLAGS_INVALID_PARSE_STATE, 0x04) \
XX(URL_FLAGS_TERMINATED, 0x08) \ XX(URL_FLAGS_TERMINATED, 0x08) \
XX(URL_FLAGS_SPECIAL, 0x10) \ XX(URL_FLAGS_SPECIAL, 0x10) \
XX(URL_FLAGS_HAS_SCHEME, 0x20) \ XX(URL_FLAGS_HAS_USERNAME, 0x20) \
XX(URL_FLAGS_HAS_USERNAME, 0x40) \ XX(URL_FLAGS_HAS_PASSWORD, 0x40) \
XX(URL_FLAGS_HAS_PASSWORD, 0x80) \ XX(URL_FLAGS_HAS_HOST, 0x80) \
XX(URL_FLAGS_HAS_HOST, 0x100) \ XX(URL_FLAGS_HAS_PATH, 0x100) \
XX(URL_FLAGS_HAS_PATH, 0x200) \ XX(URL_FLAGS_HAS_QUERY, 0x200) \
XX(URL_FLAGS_HAS_QUERY, 0x400) \ XX(URL_FLAGS_HAS_FRAGMENT, 0x400)
XX(URL_FLAGS_HAS_FRAGMENT, 0x800)
#define ARGS(XX) \ #define ARGS(XX) \
XX(ARG_FLAGS) \ XX(ARG_FLAGS) \

Loading…
Cancel
Save