Browse Source

Lowercase protocol and hostname since casing isn't significant.

Signed-off-by: Nick Campbell <nicholas.j.campbell@gmail.com>
v0.7.4-release
Nick Campbell 14 years ago
committed by Ryan Dahl
parent
commit
425b57bedc
  1. 8
      doc/api/url.markdown
  2. 27
      lib/url.js
  3. 39
      test/simple/test-url.js

8
doc/api/url.markdown

@ -9,19 +9,19 @@ string will not be in the parsed object. Examples are shown for the URL
`'http://user:pass@host.com:8080/p/a/t/h?query=string#hash'` `'http://user:pass@host.com:8080/p/a/t/h?query=string#hash'`
* `href`: The full URL that was originally parsed. * `href`: The full URL that was originally parsed. Both the protocol and host are lowercased.
Example: `'http://user:pass@host.com:8080/p/a/t/h?query=string#hash'` Example: `'http://user:pass@host.com:8080/p/a/t/h?query=string#hash'`
* `protocol`: The request protocol. * `protocol`: The request protocol, lowercased.
Example: `'http:'` Example: `'http:'`
* `host`: The full host portion of the URL, including port and authentication information. * `host`: The full lowercased host portion of the URL, including port and authentication information.
Example: `'user:pass@host.com:8080'` Example: `'user:pass@host.com:8080'`
* `auth`: The authentication information portion of a URL. * `auth`: The authentication information portion of a URL.
Example: `'user:pass'` Example: `'user:pass'`
* `hostname`: Just the hostname portion of the host. * `hostname`: Just the lowercased hostname portion of the host.
Example: `'host.com'` Example: `'host.com'`
* `port`: The port number portion of the host. * `port`: The port number portion of the host.

27
lib/url.js

@ -26,7 +26,7 @@ exports.format = urlFormat;
// define these here so at least they only have to be // define these here so at least they only have to be
// compiled once on the first module load. // compiled once on the first module load.
var protocolPattern = /^([a-z0-9]+:)/, var protocolPattern = /^([a-z0-9]+:)/i,
portPattern = /:[0-9]+$/, portPattern = /:[0-9]+$/,
delims = ['<', '>', '"', '\'', '`', /\s/], delims = ['<', '>', '"', '\'', '`', /\s/],
unwise = ['{', '}', '|', '\\', '^', '~', '[', ']', '`'].concat(delims), unwise = ['{', '}', '|', '\\', '^', '~', '[', ']', '`'].concat(delims),
@ -71,14 +71,17 @@ var protocolPattern = /^([a-z0-9]+:)/,
function urlParse(url, parseQueryString, slashesDenoteHost) { function urlParse(url, parseQueryString, slashesDenoteHost) {
if (url && typeof(url) === 'object' && url.href) return url; if (url && typeof(url) === 'object' && url.href) return url;
var out = {}, var out = { href: '' },
rest = url; rest = url;
var proto = protocolPattern.exec(rest); var proto = protocolPattern.exec(rest),
lowerProto = proto;
if (proto) { if (proto) {
proto = proto[0]; proto = proto[0];
out.protocol = proto; lowerProto = proto.toLowerCase();
out.protocol = lowerProto;
rest = rest.substr(proto.length); rest = rest.substr(proto.length);
out.href += lowerProto;
} }
// figure out if it's got a host // figure out if it's got a host
@ -87,14 +90,15 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
// how the browser resolves relative URLs. // how the browser resolves relative URLs.
if (slashesDenoteHost || proto || rest.match(/^\/\/[^@\/]+@[^@\/]+/)) { if (slashesDenoteHost || proto || rest.match(/^\/\/[^@\/]+@[^@\/]+/)) {
var slashes = rest.substr(0, 2) === '//'; var slashes = rest.substr(0, 2) === '//';
if (slashes && !(proto && hostlessProtocol[proto])) { if (slashes && !(lowerProto && hostlessProtocol[lowerProto])) {
rest = rest.substr(2); rest = rest.substr(2);
out.slashes = true; out.slashes = true;
out.href += '//';
} }
} }
if (!hostlessProtocol[proto] && if (!hostlessProtocol[lowerProto] &&
(slashes || (proto && !slashedProtocol[proto]))) { (slashes || (lowerProto && !slashedProtocol[lowerProto]))) {
// there's a hostname. // there's a hostname.
// the first instance of /, ?, ;, or # ends the host. // the first instance of /, ?, ;, or # ends the host.
// don't enforce full RFC correctness, just be unstupid about it. // don't enforce full RFC correctness, just be unstupid about it.
@ -121,7 +125,7 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
} }
// we've indicated that there is a hostname, // we've indicated that there is a hostname,
// so even if it's empty, it has to be present. // so even if it's empty, it has to be present.
out.hostname = out.hostname || ''; out.hostname = (out.hostname) ? out.hostname.toLowerCase() : '';
// validate a little. // validate a little.
if (out.hostname.length > hostnameMaxLen) { if (out.hostname.length > hostnameMaxLen) {
@ -136,11 +140,13 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
} }
} }
} }
out.host = ((out.auth)?out.auth +'@':'') + (out.hostname||'') + ((out.port)?':'+out.port:'');
out.href += out.host;
} }
// now rest is set to the post-host stuff. // now rest is set to the post-host stuff.
// chop off any delim chars. // chop off any delim chars.
if (!unsafeProtocol[proto]) { if (!unsafeProtocol[lowerProto]) {
var chop = rest.length; var chop = rest.length;
for (var i = 0, l = delims.length; i < l; i++) { for (var i = 0, l = delims.length; i < l; i++) {
var c = rest.indexOf(delims[i]); var c = rest.indexOf(delims[i]);
@ -149,6 +155,7 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
} }
} }
rest = rest.substr(0, chop); rest = rest.substr(0, chop);
out.href += rest;
} }
@ -173,7 +180,7 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
out.query = {}; out.query = {};
} }
if (rest) out.pathname = rest; if (rest) out.pathname = rest;
if (slashedProtocol[proto] && if (slashedProtocol[lowerProto] &&
out.hostname && !out.pathname) { out.hostname && !out.pathname) {
out.pathname = '/'; out.pathname = '/';
} }

39
test/simple/test-url.js

@ -32,6 +32,39 @@ var parseTests = {
'href': '//some_path', 'href': '//some_path',
'pathname': '//some_path' 'pathname': '//some_path'
}, },
'HTTP://www.example.com/' : {
'href': 'http://www.example.com/',
'protocol': 'http:',
'host': 'www.example.com',
'hostname': 'www.example.com',
'pathname': '/'
},
'http://www.ExAmPlE.com/' : {
'href': 'http://www.example.com/',
'protocol': 'http:',
'host': 'www.example.com',
'hostname': 'www.example.com',
'pathname': '/'
},
'http://user:pw@www.ExAmPlE.com/' : {
'href': 'http://user:pw@www.example.com/',
'protocol': 'http:',
'auth': 'user:pw',
'host': 'user:pw@www.example.com',
'hostname': 'www.example.com',
'pathname': '/'
},
'http://USER:PW@www.ExAmPlE.com/' : {
'href': 'http://USER:PW@www.example.com/',
'protocol': 'http:',
'auth': 'USER:PW',
'host': 'USER:PW@www.example.com',
'hostname': 'www.example.com',
'pathname': '/'
},
'http://www.narwhaljs.org/blog/categories?id=news' : { 'http://www.narwhaljs.org/blog/categories?id=news' : {
'href': 'http://www.narwhaljs.org/blog/categories?id=news', 'href': 'http://www.narwhaljs.org/blog/categories?id=news',
'protocol': 'http:', 'protocol': 'http:',
@ -154,11 +187,11 @@ for (var u in parseTests) {
'parse(' + u + ').' + i + ' == ' + e + '\nactual: ' + a); 'parse(' + u + ').' + i + ' == ' + e + '\nactual: ' + a);
} }
var expected = u, var expected = parseTests[u].href,
actual = url.format(parseTests[u]); actual = url.format(parseTests[u]);
assert.equal(expected, actual, assert.equal(expected, actual,
'format(' + u + ') == ' + u + '\nactual:' + actual); 'format(' + u + ') == ' + expected + '\nactual:' + actual);
} }
var parseTestsWithQueryString = { var parseTestsWithQueryString = {
@ -171,7 +204,7 @@ var parseTestsWithQueryString = {
}, },
'pathname': '/foo/bar' 'pathname': '/foo/bar'
}, },
'http://example.com' : { 'http://example.com/' : {
'href': 'http://example.com/', 'href': 'http://example.com/',
'protocol': 'http:', 'protocol': 'http:',
'slashes': true, 'slashes': true,

Loading…
Cancel
Save