diff --git a/doc/api/url.markdown b/doc/api/url.markdown index 5f9b31f451..ac5b1444a3 100644 --- a/doc/api/url.markdown +++ b/doc/api/url.markdown @@ -9,19 +9,19 @@ string will not be in the parsed object. Examples are shown for the URL `'http://user:pass@host.com:8080/p/a/t/h?query=string#hash'` -* `href`: The full URL that was originally parsed. +* `href`: The full URL that was originally parsed. Both the protocol and host are lowercased. Example: `'http://user:pass@host.com:8080/p/a/t/h?query=string#hash'` -* `protocol`: The request protocol. +* `protocol`: The request protocol, lowercased. Example: `'http:'` -* `host`: The full host portion of the URL, including port and authentication information. +* `host`: The full lowercased host portion of the URL, including port and authentication information. Example: `'user:pass@host.com:8080'` * `auth`: The authentication information portion of a URL. Example: `'user:pass'` -* `hostname`: Just the hostname portion of the host. +* `hostname`: Just the lowercased hostname portion of the host. Example: `'host.com'` * `port`: The port number portion of the host. diff --git a/lib/url.js b/lib/url.js index cbea2f0fc3..68f9ae5365 100644 --- a/lib/url.js +++ b/lib/url.js @@ -26,7 +26,7 @@ exports.format = urlFormat; // define these here so at least they only have to be // compiled once on the first module load. -var protocolPattern = /^([a-z0-9]+:)/, +var protocolPattern = /^([a-z0-9]+:)/i, portPattern = /:[0-9]+$/, delims = ['<', '>', '"', '\'', '`', /\s/], unwise = ['{', '}', '|', '\\', '^', '~', '[', ']', '`'].concat(delims), @@ -71,14 +71,17 @@ var protocolPattern = /^([a-z0-9]+:)/, function urlParse(url, parseQueryString, slashesDenoteHost) { if (url && typeof(url) === 'object' && url.href) return url; - var out = {}, + var out = { href: '' }, rest = url; - var proto = protocolPattern.exec(rest); + var proto = protocolPattern.exec(rest), + lowerProto = proto; if (proto) { proto = proto[0]; - out.protocol = proto; + lowerProto = proto.toLowerCase(); + out.protocol = lowerProto; rest = rest.substr(proto.length); + out.href += lowerProto; } // figure out if it's got a host @@ -87,14 +90,15 @@ function urlParse(url, parseQueryString, slashesDenoteHost) { // how the browser resolves relative URLs. if (slashesDenoteHost || proto || rest.match(/^\/\/[^@\/]+@[^@\/]+/)) { var slashes = rest.substr(0, 2) === '//'; - if (slashes && !(proto && hostlessProtocol[proto])) { + if (slashes && !(lowerProto && hostlessProtocol[lowerProto])) { rest = rest.substr(2); out.slashes = true; + out.href += '//'; } } - if (!hostlessProtocol[proto] && - (slashes || (proto && !slashedProtocol[proto]))) { + if (!hostlessProtocol[lowerProto] && + (slashes || (lowerProto && !slashedProtocol[lowerProto]))) { // there's a hostname. // the first instance of /, ?, ;, or # ends the host. // don't enforce full RFC correctness, just be unstupid about it. @@ -121,7 +125,7 @@ function urlParse(url, parseQueryString, slashesDenoteHost) { } // we've indicated that there is a hostname, // so even if it's empty, it has to be present. - out.hostname = out.hostname || ''; + out.hostname = (out.hostname) ? out.hostname.toLowerCase() : ''; // validate a little. if (out.hostname.length > hostnameMaxLen) { @@ -136,11 +140,13 @@ function urlParse(url, parseQueryString, slashesDenoteHost) { } } } + out.host = ((out.auth)?out.auth +'@':'') + (out.hostname||'') + ((out.port)?':'+out.port:''); + out.href += out.host; } // now rest is set to the post-host stuff. // chop off any delim chars. - if (!unsafeProtocol[proto]) { + if (!unsafeProtocol[lowerProto]) { var chop = rest.length; for (var i = 0, l = delims.length; i < l; i++) { var c = rest.indexOf(delims[i]); @@ -149,6 +155,7 @@ function urlParse(url, parseQueryString, slashesDenoteHost) { } } rest = rest.substr(0, chop); + out.href += rest; } @@ -173,7 +180,7 @@ function urlParse(url, parseQueryString, slashesDenoteHost) { out.query = {}; } if (rest) out.pathname = rest; - if (slashedProtocol[proto] && + if (slashedProtocol[lowerProto] && out.hostname && !out.pathname) { out.pathname = '/'; } diff --git a/test/simple/test-url.js b/test/simple/test-url.js index 4f3d139ca9..0424543706 100644 --- a/test/simple/test-url.js +++ b/test/simple/test-url.js @@ -32,6 +32,39 @@ var parseTests = { 'href': '//some_path', 'pathname': '//some_path' }, + 'HTTP://www.example.com/' : { + 'href': 'http://www.example.com/', + 'protocol': 'http:', + 'host': 'www.example.com', + 'hostname': 'www.example.com', + 'pathname': '/' + }, + 'http://www.ExAmPlE.com/' : { + 'href': 'http://www.example.com/', + 'protocol': 'http:', + 'host': 'www.example.com', + 'hostname': 'www.example.com', + 'pathname': '/' + + }, + 'http://user:pw@www.ExAmPlE.com/' : { + 'href': 'http://user:pw@www.example.com/', + 'protocol': 'http:', + 'auth': 'user:pw', + 'host': 'user:pw@www.example.com', + 'hostname': 'www.example.com', + 'pathname': '/' + + }, + 'http://USER:PW@www.ExAmPlE.com/' : { + 'href': 'http://USER:PW@www.example.com/', + 'protocol': 'http:', + 'auth': 'USER:PW', + 'host': 'USER:PW@www.example.com', + 'hostname': 'www.example.com', + 'pathname': '/' + + }, 'http://www.narwhaljs.org/blog/categories?id=news' : { 'href': 'http://www.narwhaljs.org/blog/categories?id=news', 'protocol': 'http:', @@ -154,11 +187,11 @@ for (var u in parseTests) { 'parse(' + u + ').' + i + ' == ' + e + '\nactual: ' + a); } - var expected = u, + var expected = parseTests[u].href, actual = url.format(parseTests[u]); assert.equal(expected, actual, - 'format(' + u + ') == ' + u + '\nactual:' + actual); + 'format(' + u + ') == ' + expected + '\nactual:' + actual); } var parseTestsWithQueryString = { @@ -171,7 +204,7 @@ var parseTestsWithQueryString = { }, 'pathname': '/foo/bar' }, - 'http://example.com' : { + 'http://example.com/' : { 'href': 'http://example.com/', 'protocol': 'http:', 'slashes': true,