Browse Source

src: add native URL class

Adds a URL native class for use within the node.js c/c++
code. This is primarily intended to be used by the eventual
ES6 modules implementation but can be used generally wherever
URL parsing within the c/c++ may be necessary.

```c
URL url1("http://example.org");
URL url2("foo", "http://example.org/bar");
URL url3("baz", &url2);
```

While we're at it, reduce reliance on macros to simplify impl.

PR-URL: https://github.com/nodejs/node/pull/11801
Reviewed-By: Anna Henningsen <anna@addaleax.net>
v6
James M Snell 8 years ago
parent
commit
e26b6c6baa
  1. 554
      src/node_url.cc
  2. 91
      src/node_url.h

554
src/node_url.cc

@ -47,45 +47,6 @@ using v8::Value;
} \ } \
} }
#define CANNOT_BE_BASE() url.flags |= URL_FLAGS_CANNOT_BE_BASE;
#define INVALID_PARSE_STATE() url.flags |= URL_FLAGS_INVALID_PARSE_STATE;
#define SPECIAL() \
{ \
url.flags |= URL_FLAGS_SPECIAL; \
special = true; \
}
#define TERMINATE() \
{ \
url.flags |= URL_FLAGS_TERMINATED; \
goto done; \
}
#define URL_FAILED() \
{ \
url.flags |= URL_FLAGS_FAILED; \
goto done; \
}
#define CHECK_FLAG(flags, name) (flags & URL_FLAGS_##name) /* NOLINT */
#define IS_CANNOT_BE_BASE(flags) CHECK_FLAG(flags, CANNOT_BE_BASE)
#define IS_FAILED(flags) CHECK_FLAG(flags, FAILED)
#define DOES_HAVE_SCHEME(url) CHECK_FLAG(url.flags, HAS_SCHEME)
#define DOES_HAVE_USERNAME(url) CHECK_FLAG(url.flags, HAS_USERNAME)
#define DOES_HAVE_PASSWORD(url) CHECK_FLAG(url.flags, HAS_PASSWORD)
#define DOES_HAVE_HOST(url) CHECK_FLAG(url.flags, HAS_HOST)
#define DOES_HAVE_PATH(url) CHECK_FLAG(url.flags, HAS_PATH)
#define DOES_HAVE_QUERY(url) CHECK_FLAG(url.flags, HAS_QUERY)
#define DOES_HAVE_FRAGMENT(url) CHECK_FLAG(url.flags, HAS_FRAGMENT)
#define SET_HAVE_SCHEME() url.flags |= URL_FLAGS_HAS_SCHEME;
#define SET_HAVE_USERNAME() url.flags |= URL_FLAGS_HAS_USERNAME;
#define SET_HAVE_PASSWORD() url.flags |= URL_FLAGS_HAS_PASSWORD;
#define SET_HAVE_HOST() url.flags |= URL_FLAGS_HAS_HOST;
#define SET_HAVE_PATH() url.flags |= URL_FLAGS_HAS_PATH;
#define SET_HAVE_QUERY() url.flags |= URL_FLAGS_HAS_QUERY;
#define SET_HAVE_FRAGMENT() url.flags |= URL_FLAGS_HAS_FRAGMENT;
#define UTF8STRING(isolate, str) \ #define UTF8STRING(isolate, str) \
String::NewFromUtf8(isolate, str.c_str(), v8::NewStringType::kNormal) \ String::NewFromUtf8(isolate, str.c_str(), v8::NewStringType::kNormal) \
.ToLocalChecked() .ToLocalChecked()
@ -93,7 +54,7 @@ using v8::Value;
namespace url { namespace url {
#if defined(NODE_HAVE_I18N_SUPPORT) #if defined(NODE_HAVE_I18N_SUPPORT)
static bool ToUnicode(std::string* input, std::string* output) { static inline bool ToUnicode(std::string* input, std::string* output) {
MaybeStackBuffer<char> buf; MaybeStackBuffer<char> buf;
if (i18n::ToUnicode(&buf, input->c_str(), input->length()) < 0) if (i18n::ToUnicode(&buf, input->c_str(), input->length()) < 0)
return false; return false;
@ -101,7 +62,7 @@ namespace url {
return true; return true;
} }
static bool ToASCII(std::string* input, std::string* output) { static inline bool ToASCII(std::string* input, std::string* output) {
MaybeStackBuffer<char> buf; MaybeStackBuffer<char> buf;
if (i18n::ToASCII(&buf, input->c_str(), input->length()) < 0) if (i18n::ToASCII(&buf, input->c_str(), input->length()) < 0)
return false; return false;
@ -110,12 +71,12 @@ namespace url {
} }
#else #else
// Intentional non-ops if ICU is not present. // Intentional non-ops if ICU is not present.
static bool ToUnicode(std::string* input, std::string* output) { static inline bool ToUnicode(std::string* input, std::string* output) {
*output = *input; *output = *input;
return true; return true;
} }
static bool ToASCII(std::string* input, std::string* output) { static inline bool ToASCII(std::string* input, std::string* output) {
*output = *input; *output = *input;
return true; return true;
} }
@ -619,41 +580,26 @@ namespace url {
url->path.pop_back(); url->path.pop_back();
} }
static void Parse(Environment* env, void URL::Parse(const char* input,
Local<Value> recv,
const char* input,
const size_t len, const size_t len,
enum url_parse_state state_override, enum url_parse_state state_override,
Local<Value> base_obj, struct url_data* url,
Local<Value> context_obj, const struct url_data* base,
Local<Function> cb) { bool has_base) {
Isolate* isolate = env->isolate();
Local<Context> context = env->context();
HandleScope handle_scope(isolate);
Context::Scope context_scope(context);
const bool has_base = base_obj->IsObject();
bool atflag = false; bool atflag = false;
bool sbflag = false; bool sbflag = false;
bool uflag = false; bool uflag = false;
bool base_is_file = false; bool base_is_file = false;
int wskip = 0; int wskip = 0;
struct url_data base;
struct url_data url;
if (context_obj->IsObject())
HarvestContext(env, &url, context_obj.As<Object>());
if (has_base)
HarvestBase(env, &base, base_obj.As<Object>());
std::string buffer; std::string buffer;
url.scheme.reserve(len); url->scheme.reserve(len);
url.username.reserve(len); url->username.reserve(len);
url.password.reserve(len); url->password.reserve(len);
url.host.reserve(len); url->host.reserve(len);
url.path.reserve(len); url->path.reserve(len);
url.query.reserve(len); url->query.reserve(len);
url.fragment.reserve(len); url->fragment.reserve(len);
buffer.reserve(len); buffer.reserve(len);
// Set the initial parse state. // Set the initial parse state.
@ -665,8 +611,8 @@ namespace url {
const char* end = input + len; const char* end = input + len;
if (state < kSchemeStart || state > kFragment) { if (state < kSchemeStart || state > kFragment) {
INVALID_PARSE_STATE(); url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
goto done; return;
} }
while (p <= end) { while (p <= end) {
@ -684,7 +630,8 @@ namespace url {
continue; continue;
} }
bool special = url.flags & URL_FLAGS_SPECIAL; bool special = (url->flags & URL_FLAGS_SPECIAL);
bool cannot_be_base;
const bool special_back_slash = (special && ch == '\\'); const bool special_back_slash = (special && ch == '\\');
switch (state) { switch (state) {
case kSchemeStart: case kSchemeStart:
@ -695,7 +642,8 @@ namespace url {
state = kNoScheme; state = kNoScheme;
continue; continue;
} else { } else {
TERMINATE() url->flags |= URL_FLAGS_TERMINATED;
return;
} }
break; break;
case kScheme: case kScheme:
@ -706,23 +654,24 @@ namespace url {
} else if (ch == ':' || (has_state_override && ch == kEOL)) { } else if (ch == ':' || (has_state_override && ch == kEOL)) {
buffer += ':'; buffer += ':';
if (buffer.size() > 0) { if (buffer.size() > 0) {
SET_HAVE_SCHEME() url->flags |= URL_FLAGS_HAS_SCHEME;
url.scheme = buffer; url->scheme = buffer;
} }
if (IsSpecial(url.scheme)) { if (IsSpecial(url->scheme)) {
SPECIAL() url->flags |= URL_FLAGS_SPECIAL;
special = true;
} else { } else {
url.flags &= ~URL_FLAGS_SPECIAL; url->flags &= ~URL_FLAGS_SPECIAL;
} }
if (has_state_override) if (has_state_override)
goto done; return;
buffer.clear(); buffer.clear();
if (url.scheme == "file:") { if (url->scheme == "file:") {
state = kFile; state = kFile;
} else if (special && } else if (special &&
has_base && has_base &&
DOES_HAVE_SCHEME(base) && base->flags & URL_FLAGS_HAS_SCHEME &&
url.scheme == base.scheme) { url->scheme == base->scheme) {
state = kSpecialRelativeOrAuthority; state = kSpecialRelativeOrAuthority;
} else if (special) { } else if (special) {
state = kSpecialAuthoritySlashes; state = kSpecialAuthoritySlashes;
@ -730,9 +679,9 @@ namespace url {
state = kPathOrAuthority; state = kPathOrAuthority;
p++; p++;
} else { } else {
CANNOT_BE_BASE() url->flags |= URL_FLAGS_CANNOT_BE_BASE;
SET_HAVE_PATH() url->flags |= URL_FLAGS_HAS_PATH;
url.path.push_back(""); url->path.push_back("");
state = kCannotBeBase; state = kCannotBeBase;
} }
} else if (!has_state_override) { } else if (!has_state_override) {
@ -741,43 +690,48 @@ namespace url {
p = input; p = input;
continue; continue;
} else { } else {
TERMINATE() url->flags |= URL_FLAGS_TERMINATED;
return;
} }
break; break;
case kNoScheme: case kNoScheme:
if (!has_base || (IS_CANNOT_BE_BASE(base.flags) && ch != '#')) { cannot_be_base = base->flags & URL_FLAGS_CANNOT_BE_BASE;
URL_FAILED() if (!has_base || (cannot_be_base && ch != '#')) {
} else if (IS_CANNOT_BE_BASE(base.flags) && ch == '#') { url->flags |= URL_FLAGS_FAILED;
SET_HAVE_SCHEME() return;
url.scheme = base.scheme; } else if (cannot_be_base && ch == '#') {
if (IsSpecial(url.scheme)) { url->flags |= URL_FLAGS_HAS_SCHEME;
SPECIAL() url->scheme = base->scheme;
if (IsSpecial(url->scheme)) {
url->flags |= URL_FLAGS_SPECIAL;
special = true;
} else { } else {
url.flags &= ~URL_FLAGS_SPECIAL; url->flags &= ~URL_FLAGS_SPECIAL;
} }
if (DOES_HAVE_PATH(base)) { if (base->flags & URL_FLAGS_HAS_PATH) {
SET_HAVE_PATH() url->flags |= URL_FLAGS_HAS_PATH;
url.path = base.path; url->path = base->path;
} }
if (DOES_HAVE_QUERY(base)) { if (base->flags & URL_FLAGS_HAS_QUERY) {
SET_HAVE_QUERY() url->flags |= URL_FLAGS_HAS_QUERY;
url.query = base.query; url->query = base->query;
} }
if (DOES_HAVE_FRAGMENT(base)) { if (base->flags & URL_FLAGS_HAS_FRAGMENT) {
SET_HAVE_FRAGMENT() url->flags |= URL_FLAGS_HAS_FRAGMENT;
url.fragment = base.fragment; url->fragment = base->fragment;
} }
CANNOT_BE_BASE() url->flags |= URL_FLAGS_CANNOT_BE_BASE;
state = kFragment; state = kFragment;
} else if (has_base && } else if (has_base &&
DOES_HAVE_SCHEME(base) && base->flags & URL_FLAGS_HAS_SCHEME &&
base.scheme != "file:") { base->scheme != "file:") {
state = kRelative; state = kRelative;
continue; continue;
} else { } else {
SET_HAVE_SCHEME() url->flags |= URL_FLAGS_HAS_SCHEME;
url.scheme = "file:"; url->scheme = "file:";
SPECIAL() url->flags |= URL_FLAGS_SPECIAL;
special = true;
state = kFile; state = kFile;
continue; continue;
} }
@ -800,106 +754,107 @@ namespace url {
} }
break; break;
case kRelative: case kRelative:
SET_HAVE_SCHEME() url->flags |= URL_FLAGS_HAS_SCHEME;
url.scheme = base.scheme; url->scheme = base->scheme;
if (IsSpecial(url.scheme)) { if (IsSpecial(url->scheme)) {
SPECIAL() url->flags |= URL_FLAGS_SPECIAL;
special = true;
} else { } else {
url.flags &= ~URL_FLAGS_SPECIAL; url->flags &= ~URL_FLAGS_SPECIAL;
} }
switch (ch) { switch (ch) {
case kEOL: case kEOL:
if (DOES_HAVE_USERNAME(base)) { if (base->flags & URL_FLAGS_HAS_USERNAME) {
SET_HAVE_USERNAME() url->flags |= URL_FLAGS_HAS_USERNAME;
url.username = base.username; url->username = base->username;
} }
if (DOES_HAVE_PASSWORD(base)) { if (base->flags & URL_FLAGS_HAS_PASSWORD) {
SET_HAVE_PASSWORD() url->flags |= URL_FLAGS_HAS_PASSWORD;
url.password = base.password; url->password = base->password;
} }
if (DOES_HAVE_HOST(base)) { if (base->flags & URL_FLAGS_HAS_HOST) {
SET_HAVE_HOST() url->flags |= URL_FLAGS_HAS_HOST;
url.host = base.host; url->host = base->host;
} }
if (DOES_HAVE_QUERY(base)) { if (base->flags & URL_FLAGS_HAS_QUERY) {
SET_HAVE_QUERY() url->flags |= URL_FLAGS_HAS_QUERY;
url.query = base.query; url->query = base->query;
} }
if (DOES_HAVE_PATH(base)) { if (base->flags & URL_FLAGS_HAS_PATH) {
SET_HAVE_PATH() url->flags |= URL_FLAGS_HAS_PATH;
url.path = base.path; url->path = base->path;
} }
url.port = base.port; url->port = base->port;
break; break;
case '/': case '/':
state = kRelativeSlash; state = kRelativeSlash;
break; break;
case '?': case '?':
if (DOES_HAVE_USERNAME(base)) { if (base->flags & URL_FLAGS_HAS_USERNAME) {
SET_HAVE_USERNAME() url->flags |= URL_FLAGS_HAS_USERNAME;
url.username = base.username; url->username = base->username;
} }
if (DOES_HAVE_PASSWORD(base)) { if (base->flags & URL_FLAGS_HAS_PASSWORD) {
SET_HAVE_PASSWORD() url->flags |= URL_FLAGS_HAS_PASSWORD;
url.password = base.password; url->password = base->password;
} }
if (DOES_HAVE_HOST(base)) { if (base->flags & URL_FLAGS_HAS_HOST) {
SET_HAVE_HOST() url->flags |= URL_FLAGS_HAS_HOST;
url.host = base.host; url->host = base->host;
} }
if (DOES_HAVE_PATH(base)) { if (base->flags & URL_FLAGS_HAS_PATH) {
SET_HAVE_PATH() url->flags |= URL_FLAGS_HAS_PATH;
url.path = base.path; url->path = base->path;
} }
url.port = base.port; url->port = base->port;
state = kQuery; state = kQuery;
break; break;
case '#': case '#':
if (DOES_HAVE_USERNAME(base)) { if (base->flags & URL_FLAGS_HAS_USERNAME) {
SET_HAVE_USERNAME() url->flags |= URL_FLAGS_HAS_USERNAME;
url.username = base.username; url->username = base->username;
} }
if (DOES_HAVE_PASSWORD(base)) { if (base->flags & URL_FLAGS_HAS_PASSWORD) {
SET_HAVE_PASSWORD() url->flags |= URL_FLAGS_HAS_PASSWORD;
url.password = base.password; url->password = base->password;
} }
if (DOES_HAVE_HOST(base)) { if (base->flags & URL_FLAGS_HAS_HOST) {
SET_HAVE_HOST() url->flags |= URL_FLAGS_HAS_HOST;
url.host = base.host; url->host = base->host;
} }
if (DOES_HAVE_QUERY(base)) { if (base->flags & URL_FLAGS_HAS_QUERY) {
SET_HAVE_QUERY() url->flags |= URL_FLAGS_HAS_QUERY;
url.query = base.query; url->query = base->query;
} }
if (DOES_HAVE_PATH(base)) { if (base->flags & URL_FLAGS_HAS_PATH) {
SET_HAVE_PATH() url->flags |= URL_FLAGS_HAS_PATH;
url.path = base.path; url->path = base->path;
} }
url.port = base.port; url->port = base->port;
state = kFragment; state = kFragment;
break; break;
default: default:
if (special_back_slash) { if (special_back_slash) {
state = kRelativeSlash; state = kRelativeSlash;
} else { } else {
if (DOES_HAVE_USERNAME(base)) { if (base->flags & URL_FLAGS_HAS_USERNAME) {
SET_HAVE_USERNAME() url->flags |= URL_FLAGS_HAS_USERNAME;
url.username = base.username; url->username = base->username;
} }
if (DOES_HAVE_PASSWORD(base)) { if (base->flags & URL_FLAGS_HAS_PASSWORD) {
SET_HAVE_PASSWORD() url->flags |= URL_FLAGS_HAS_PASSWORD;
url.password = base.password; url->password = base->password;
} }
if (DOES_HAVE_HOST(base)) { if (base->flags & URL_FLAGS_HAS_HOST) {
SET_HAVE_HOST() url->flags |= URL_FLAGS_HAS_HOST;
url.host = base.host; url->host = base->host;
} }
if (DOES_HAVE_PATH(base)) { if (base->flags & URL_FLAGS_HAS_PATH) {
SET_HAVE_PATH() url->flags |= URL_FLAGS_HAS_PATH;
url.path = base.path; url->path = base->path;
ShortenUrlPath(&url); ShortenUrlPath(url);
} }
url.port = base.port; url->port = base->port;
state = kPath; state = kPath;
continue; continue;
} }
@ -909,19 +864,19 @@ namespace url {
if (ch == '/' || special_back_slash) { if (ch == '/' || special_back_slash) {
state = kSpecialAuthorityIgnoreSlashes; state = kSpecialAuthorityIgnoreSlashes;
} else { } else {
if (DOES_HAVE_USERNAME(base)) { if (base->flags & URL_FLAGS_HAS_USERNAME) {
SET_HAVE_USERNAME() url->flags |= URL_FLAGS_HAS_USERNAME;
url.username = base.username; url->username = base->username;
} }
if (DOES_HAVE_PASSWORD(base)) { if (base->flags & URL_FLAGS_HAS_PASSWORD) {
SET_HAVE_PASSWORD() url->flags |= URL_FLAGS_HAS_PASSWORD;
url.password = base.password; url->password = base->password;
} }
if (DOES_HAVE_HOST(base)) { if (base->flags & URL_FLAGS_HAS_HOST) {
SET_HAVE_HOST() url->flags |= URL_FLAGS_HAS_HOST;
url.host = base.host; url->host = base->host;
} }
url.port = base.port; url->port = base->port;
state = kPath; state = kPath;
continue; continue;
} }
@ -949,21 +904,21 @@ namespace url {
atflag = true; atflag = true;
const size_t blen = buffer.size(); const size_t blen = buffer.size();
if (blen > 0 && buffer[0] != ':') { if (blen > 0 && buffer[0] != ':') {
SET_HAVE_USERNAME() url->flags |= URL_FLAGS_HAS_USERNAME;
} }
for (size_t n = 0; n < blen; n++) { for (size_t n = 0; n < blen; n++) {
const char bch = buffer[n]; const char bch = buffer[n];
if (bch == ':') { if (bch == ':') {
SET_HAVE_PASSWORD() url->flags |= URL_FLAGS_HAS_PASSWORD;
if (!uflag) { if (!uflag) {
uflag = true; uflag = true;
continue; continue;
} }
} }
if (uflag) { if (uflag) {
AppendOrEscape(&url.password, bch, UserinfoEncodeSet); AppendOrEscape(&url->password, bch, UserinfoEncodeSet);
} else { } else {
AppendOrEscape(&url.username, bch, UserinfoEncodeSet); AppendOrEscape(&url->username, bch, UserinfoEncodeSet);
} }
} }
buffer.clear(); buffer.clear();
@ -982,30 +937,42 @@ namespace url {
case kHost: case kHost:
case kHostname: case kHostname:
if (ch == ':' && !sbflag) { if (ch == ':' && !sbflag) {
if (special && buffer.size() == 0) if (special && buffer.size() == 0) {
URL_FAILED() url->flags |= URL_FLAGS_FAILED;
SET_HAVE_HOST() return;
if (!ParseHost(&buffer, &url.host)) }
URL_FAILED() url->flags |= URL_FLAGS_HAS_HOST;
if (!ParseHost(&buffer, &url->host)) {
url->flags |= URL_FLAGS_FAILED;
return;
}
buffer.clear(); buffer.clear();
state = kPort; state = kPort;
if (state_override == kHostname) if (state_override == kHostname) {
TERMINATE() url->flags |= URL_FLAGS_TERMINATED;
return;
}
} else if (ch == kEOL || } else if (ch == kEOL ||
ch == '/' || ch == '/' ||
ch == '?' || ch == '?' ||
ch == '#' || ch == '#' ||
special_back_slash) { special_back_slash) {
p--; p--;
if (special && buffer.size() == 0) if (special && buffer.size() == 0) {
URL_FAILED() url->flags |= URL_FLAGS_FAILED;
SET_HAVE_HOST() return;
if (!ParseHost(&buffer, &url.host)) }
URL_FAILED() url->flags |= URL_FLAGS_HAS_HOST;
if (!ParseHost(&buffer, &url->host)) {
url->flags |= URL_FLAGS_FAILED;
return;
}
buffer.clear(); buffer.clear();
state = kPathStart; state = kPathStart;
if (has_state_override) if (has_state_override) {
TERMINATE() url->flags |= URL_FLAGS_TERMINATED;
return;
}
} else { } else {
if (ch == '[') if (ch == '[')
sbflag = true; sbflag = true;
@ -1028,37 +995,39 @@ namespace url {
for (size_t i = 0; i < buffer.size(); i++) for (size_t i = 0; i < buffer.size(); i++)
port = port * 10 + buffer[i] - '0'; port = port * 10 + buffer[i] - '0';
if (port >= 0 && port <= 0xffff) { if (port >= 0 && port <= 0xffff) {
url.port = NormalizePort(url.scheme, port); url->port = NormalizePort(url->scheme, port);
} else if (!has_state_override) { } else if (!has_state_override) {
URL_FAILED() url->flags |= URL_FLAGS_FAILED;
return;
} }
buffer.clear(); buffer.clear();
} }
state = kPathStart; state = kPathStart;
continue; continue;
} else { } else {
URL_FAILED(); url->flags |= URL_FLAGS_FAILED;
return;
} }
break; break;
case kFile: case kFile:
base_is_file = ( base_is_file = (
has_base && has_base &&
DOES_HAVE_SCHEME(base) && base->flags & URL_FLAGS_HAS_SCHEME &&
base.scheme == "file:"); base->scheme == "file:");
switch (ch) { switch (ch) {
case kEOL: case kEOL:
if (base_is_file) { if (base_is_file) {
if (DOES_HAVE_HOST(base)) { if (base->flags & URL_FLAGS_HAS_HOST) {
SET_HAVE_HOST() url->flags |= URL_FLAGS_HAS_HOST;
url.host = base.host; url->host = base->host;
} }
if (DOES_HAVE_PATH(base)) { if (base->flags & URL_FLAGS_HAS_PATH) {
SET_HAVE_PATH() url->flags |= URL_FLAGS_HAS_PATH;
url.path = base.path; url->path = base->path;
} }
if (DOES_HAVE_QUERY(base)) { if (base->flags & URL_FLAGS_HAS_QUERY) {
SET_HAVE_QUERY() url->flags |= URL_FLAGS_HAS_QUERY;
url.query = base.query; url->query = base->query;
} }
break; break;
} }
@ -1070,31 +1039,31 @@ namespace url {
break; break;
case '?': case '?':
if (base_is_file) { if (base_is_file) {
if (DOES_HAVE_HOST(base)) { if (base->flags & URL_FLAGS_HAS_HOST) {
SET_HAVE_HOST() url->flags |= URL_FLAGS_HAS_HOST;
url.host = base.host; url->host = base->host;
} }
if (DOES_HAVE_PATH(base)) { if (base->flags & URL_FLAGS_HAS_PATH) {
SET_HAVE_PATH() url->flags |= URL_FLAGS_HAS_PATH;
url.path = base.path; url->path = base->path;
} }
SET_HAVE_QUERY() url->flags |= URL_FLAGS_HAS_QUERY;
state = kQuery; state = kQuery;
break; break;
} }
case '#': case '#':
if (base_is_file) { if (base_is_file) {
if (DOES_HAVE_HOST(base)) { if (base->flags & URL_FLAGS_HAS_HOST) {
SET_HAVE_HOST() url->flags |= URL_FLAGS_HAS_HOST;
url.host = base.host; url->host = base->host;
} }
if (DOES_HAVE_PATH(base)) { if (base->flags & URL_FLAGS_HAS_PATH) {
SET_HAVE_PATH() url->flags |= URL_FLAGS_HAS_PATH;
url.path = base.path; url->path = base->path;
} }
if (DOES_HAVE_QUERY(base)) { if (base->flags & URL_FLAGS_HAS_QUERY) {
SET_HAVE_QUERY() url->flags |= URL_FLAGS_HAS_QUERY;
url.query = base.query; url->query = base->query;
} }
state = kFragment; state = kFragment;
break; break;
@ -1107,15 +1076,15 @@ namespace url {
p[2] != '\\' && p[2] != '\\' &&
p[2] != '?' && p[2] != '?' &&
p[2] != '#'))) { p[2] != '#'))) {
if (DOES_HAVE_HOST(base)) { if (base->flags & URL_FLAGS_HAS_HOST) {
SET_HAVE_HOST() url->flags |= URL_FLAGS_HAS_HOST;
url.host = base.host; url->host = base->host;
} }
if (DOES_HAVE_PATH(base)) { if (base->flags & URL_FLAGS_HAS_PATH) {
SET_HAVE_PATH() url->flags |= URL_FLAGS_HAS_PATH;
url.path = base.path; url->path = base->path;
} }
ShortenUrlPath(&url); ShortenUrlPath(url);
} }
state = kPath; state = kPath;
continue; continue;
@ -1126,13 +1095,13 @@ namespace url {
state = kFileHost; state = kFileHost;
} else { } else {
if (has_base && if (has_base &&
DOES_HAVE_SCHEME(base) && base->flags & URL_FLAGS_HAS_SCHEME &&
base.scheme == "file:" && base->scheme == "file:" &&
DOES_HAVE_PATH(base) && base->flags & URL_FLAGS_HAS_PATH &&
base.path.size() > 0 && base->path.size() > 0 &&
NORMALIZED_WINDOWS_DRIVE_LETTER(base.path[0])) { NORMALIZED_WINDOWS_DRIVE_LETTER(base->path[0])) {
SET_HAVE_PATH() url->flags |= URL_FLAGS_HAS_PATH;
url.path.push_back(base.path[0]); url->path.push_back(base->path[0]);
} }
state = kPath; state = kPath;
continue; continue;
@ -1151,9 +1120,11 @@ namespace url {
state = kPathStart; state = kPathStart;
} else { } else {
if (buffer != "localhost") { if (buffer != "localhost") {
SET_HAVE_HOST() url->flags |= URL_FLAGS_HAS_HOST;
if (!ParseHost(&buffer, &url.host)) if (!ParseHost(&buffer, &url->host)) {
URL_FAILED() url->flags |= URL_FLAGS_FAILED;
return;
}
} }
buffer.clear(); buffer.clear();
state = kPathStart; state = kPathStart;
@ -1174,32 +1145,32 @@ namespace url {
special_back_slash || special_back_slash ||
(!has_state_override && (ch == '?' || ch == '#'))) { (!has_state_override && (ch == '?' || ch == '#'))) {
if (IsDoubleDotSegment(buffer)) { if (IsDoubleDotSegment(buffer)) {
ShortenUrlPath(&url); ShortenUrlPath(url);
if (ch != '/' && !special_back_slash) { if (ch != '/' && !special_back_slash) {
SET_HAVE_PATH() url->flags |= URL_FLAGS_HAS_PATH;
url.path.push_back(""); url->path.push_back("");
} }
} else if (IsSingleDotSegment(buffer)) { } else if (IsSingleDotSegment(buffer)) {
if (ch != '/' && !special_back_slash) { if (ch != '/' && !special_back_slash) {
SET_HAVE_PATH(); url->flags |= URL_FLAGS_HAS_PATH;
url.path.push_back(""); url->path.push_back("");
} }
} else { } else {
if (DOES_HAVE_SCHEME(url) && if (url->flags & URL_FLAGS_HAS_SCHEME &&
url.scheme == "file:" && url->scheme == "file:" &&
url.path.empty() && url->path.empty() &&
buffer.size() == 2 && buffer.size() == 2 &&
WINDOWS_DRIVE_LETTER(buffer[0], buffer[1])) { WINDOWS_DRIVE_LETTER(buffer[0], buffer[1])) {
url.flags &= ~URL_FLAGS_HAS_HOST; url->flags &= ~URL_FLAGS_HAS_HOST;
buffer[1] = ':'; buffer[1] = ':';
} }
SET_HAVE_PATH() url->flags |= URL_FLAGS_HAS_PATH;
std::string segment(buffer.c_str(), buffer.size()); std::string segment(buffer.c_str(), buffer.size());
url.path.push_back(segment); url->path.push_back(segment);
} }
buffer.clear(); buffer.clear();
if (ch == '?') { if (ch == '?') {
SET_HAVE_QUERY() url->flags |= URL_FLAGS_HAS_QUERY;
state = kQuery; state = kQuery;
} else if (ch == '#') { } else if (ch == '#') {
state = kFragment; state = kFragment;
@ -1217,16 +1188,16 @@ namespace url {
state = kFragment; state = kFragment;
break; break;
default: default:
if (url.path.size() == 0) if (url->path.size() == 0)
url.path.push_back(""); url->path.push_back("");
if (url.path.size() > 0 && ch != kEOL) if (url->path.size() > 0 && ch != kEOL)
AppendOrEscape(&url.path[0], ch, SimpleEncodeSet); AppendOrEscape(&url->path[0], ch, SimpleEncodeSet);
} }
break; break;
case kQuery: case kQuery:
if (ch == kEOL || (!has_state_override && ch == '#')) { if (ch == kEOL || (!has_state_override && ch == '#')) {
SET_HAVE_QUERY() url->flags |= URL_FLAGS_HAS_QUERY;
url.query = buffer; url->query = buffer;
buffer.clear(); buffer.clear();
if (ch == '#') if (ch == '#')
state = kFragment; state = kFragment;
@ -1237,8 +1208,8 @@ namespace url {
case kFragment: case kFragment:
switch (ch) { switch (ch) {
case kEOL: case kEOL:
SET_HAVE_FRAGMENT() url->flags |= URL_FLAGS_HAS_FRAGMENT;
url.fragment = buffer; url->fragment = buffer;
break; break;
case 0: case 0:
break; break;
@ -1247,14 +1218,39 @@ namespace url {
} }
break; break;
default: default:
INVALID_PARSE_STATE() url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
goto done; return;
} }
p++; p++;
} }
}
static void Parse(Environment* env,
Local<Value> recv,
const char* input,
const size_t len,
enum url_parse_state state_override,
Local<Value> base_obj,
Local<Value> context_obj,
Local<Function> cb) {
Isolate* isolate = env->isolate();
Local<Context> context = env->context();
HandleScope handle_scope(isolate);
Context::Scope context_scope(context);
done: const bool has_base = base_obj->IsObject();
struct url_data base;
struct url_data url;
if (context_obj->IsObject())
HarvestContext(env, &url, context_obj.As<Object>());
if (has_base)
HarvestBase(env, &base, base_obj.As<Object>());
URL::Parse(input, len, state_override, &url, &base, has_base);
if (url.flags & URL_FLAGS_INVALID_PARSE_STATE)
return;
// Define the return value placeholders // Define the return value placeholders
const Local<Value> undef = Undefined(isolate); const Local<Value> undef = Undefined(isolate);
@ -1271,22 +1267,22 @@ namespace url {
}; };
argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags); argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
if (!IS_FAILED(url.flags)) { if (!(url.flags & URL_FLAGS_FAILED)) {
if (DOES_HAVE_SCHEME(url)) if (url.flags & URL_FLAGS_HAS_SCHEME)
argv[ARG_PROTOCOL] = OneByteString(isolate, url.scheme.c_str()); argv[ARG_PROTOCOL] = OneByteString(isolate, url.scheme.c_str());
if (DOES_HAVE_USERNAME(url)) if (url.flags & URL_FLAGS_HAS_USERNAME)
argv[ARG_USERNAME] = UTF8STRING(isolate, url.username); argv[ARG_USERNAME] = UTF8STRING(isolate, url.username);
if (DOES_HAVE_PASSWORD(url)) if (url.flags & URL_FLAGS_HAS_PASSWORD)
argv[ARG_PASSWORD] = UTF8STRING(isolate, url.password); argv[ARG_PASSWORD] = UTF8STRING(isolate, url.password);
if (DOES_HAVE_HOST(url)) if (url.flags & URL_FLAGS_HAS_HOST)
argv[ARG_HOST] = UTF8STRING(isolate, url.host); argv[ARG_HOST] = UTF8STRING(isolate, url.host);
if (DOES_HAVE_QUERY(url)) if (url.flags & URL_FLAGS_HAS_QUERY)
argv[ARG_QUERY] = UTF8STRING(isolate, url.query); argv[ARG_QUERY] = UTF8STRING(isolate, url.query);
if (DOES_HAVE_FRAGMENT(url)) if (url.flags & URL_FLAGS_HAS_FRAGMENT)
argv[ARG_FRAGMENT] = UTF8STRING(isolate, url.fragment); argv[ARG_FRAGMENT] = UTF8STRING(isolate, url.fragment);
if (url.port > -1) if (url.port > -1)
argv[ARG_PORT] = Integer::New(isolate, url.port); argv[ARG_PORT] = Integer::New(isolate, url.port);
if (DOES_HAVE_PATH(url)) if (url.flags & URL_FLAGS_HAS_PATH)
argv[ARG_PATH] = Copy(env, url.path); argv[ARG_PATH] = Copy(env, url.path);
} }

91
src/node_url.h

@ -470,19 +470,19 @@ enum url_parse_state {
#define XX(name) name, #define XX(name) name,
PARSESTATES(XX) PARSESTATES(XX)
#undef XX #undef XX
} url_parse_state; };
enum url_flags { enum url_flags {
#define XX(name, val) name = val, #define XX(name, val) name = val,
FLAGS(XX) FLAGS(XX)
#undef XX #undef XX
} url_flags; };
enum url_cb_args { enum url_cb_args {
#define XX(name) name, #define XX(name) name,
ARGS(XX) ARGS(XX)
#undef XX #undef XX
} url_cb_args; };
static inline bool IsSpecial(std::string scheme) { static inline bool IsSpecial(std::string scheme) {
#define XX(name, _) if (scheme == name) return true; #define XX(name, _) if (scheme == name) return true;
@ -528,6 +528,91 @@ struct url_host {
url_host_value value; url_host_value value;
enum url_host_type type; enum url_host_type type;
}; };
class URL {
public:
static void Parse(const char* input,
const size_t len,
enum url_parse_state state_override,
struct url_data* url,
const struct url_data* base,
bool has_base);
URL(const char* input, const size_t len) {
Parse(input, len, kUnknownState, &context_, nullptr, false);
}
URL(const char* input, const size_t len, const URL* base) {
if (base != nullptr)
Parse(input, len, kUnknownState, &context_, &(base->context_), true);
else
Parse(input, len, kUnknownState, &context_, nullptr, false);
}
URL(const char* input, const size_t len,
const char* base, const size_t baselen) {
if (base != nullptr && baselen > 0) {
URL _base(base, baselen);
Parse(input, len, kUnknownState, &context_, &(_base.context_), true);
} else {
Parse(input, len, kUnknownState, &context_, nullptr, false);
}
}
explicit URL(std::string input) :
URL(input.c_str(), input.length()) {}
URL(std::string input, const URL* base) :
URL(input.c_str(), input.length(), base) {}
URL(std::string input, std::string base) :
URL(input.c_str(), input.length(), base.c_str(), base.length()) {}
int32_t flags() {
return context_.flags;
}
int port() {
return context_.port;
}
const std::string& protocol() const {
return context_.scheme;
}
const std::string& username() const {
return context_.username;
}
const std::string& password() const {
return context_.password;
}
const std::string& host() const {
return context_.host;
}
const std::string& query() const {
return context_.query;
}
const std::string& fragment() const {
return context_.fragment;
}
std::string path() {
std::string ret;
for (auto i = context_.path.begin(); i != context_.path.end(); i++) {
ret += '/';
ret += *i;
}
return ret;
}
private:
struct url_data context_;
};
} // namespace url } // namespace url
} // namespace node } // namespace node

Loading…
Cancel
Save