Browse Source

Lowercase protocol and hostname since casing isn't significant.

Signed-off-by: Nick Campbell <nicholas.j.campbell@gmail.com>
v0.7.4-release
Nick Campbell 14 years ago
committed by Ryan Dahl
parent
commit
425b57bedc
  1. 8
      doc/api/url.markdown
  2. 27
      lib/url.js
  3. 39
      test/simple/test-url.js

8
doc/api/url.markdown

@ -9,19 +9,19 @@ string will not be in the parsed object. Examples are shown for the URL
`'http://user:pass@host.com:8080/p/a/t/h?query=string#hash'`
* `href`: The full URL that was originally parsed.
* `href`: The full URL that was originally parsed. Both the protocol and host are lowercased.
Example: `'http://user:pass@host.com:8080/p/a/t/h?query=string#hash'`
* `protocol`: The request protocol.
* `protocol`: The request protocol, lowercased.
Example: `'http:'`
* `host`: The full host portion of the URL, including port and authentication information.
* `host`: The full lowercased host portion of the URL, including port and authentication information.
Example: `'user:pass@host.com:8080'`
* `auth`: The authentication information portion of a URL.
Example: `'user:pass'`
* `hostname`: Just the hostname portion of the host.
* `hostname`: Just the lowercased hostname portion of the host.
Example: `'host.com'`
* `port`: The port number portion of the host.

27
lib/url.js

@ -26,7 +26,7 @@ exports.format = urlFormat;
// define these here so at least they only have to be
// compiled once on the first module load.
var protocolPattern = /^([a-z0-9]+:)/,
var protocolPattern = /^([a-z0-9]+:)/i,
portPattern = /:[0-9]+$/,
delims = ['<', '>', '"', '\'', '`', /\s/],
unwise = ['{', '}', '|', '\\', '^', '~', '[', ']', '`'].concat(delims),
@ -71,14 +71,17 @@ var protocolPattern = /^([a-z0-9]+:)/,
function urlParse(url, parseQueryString, slashesDenoteHost) {
if (url && typeof(url) === 'object' && url.href) return url;
var out = {},
var out = { href: '' },
rest = url;
var proto = protocolPattern.exec(rest);
var proto = protocolPattern.exec(rest),
lowerProto = proto;
if (proto) {
proto = proto[0];
out.protocol = proto;
lowerProto = proto.toLowerCase();
out.protocol = lowerProto;
rest = rest.substr(proto.length);
out.href += lowerProto;
}
// figure out if it's got a host
@ -87,14 +90,15 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
// how the browser resolves relative URLs.
if (slashesDenoteHost || proto || rest.match(/^\/\/[^@\/]+@[^@\/]+/)) {
var slashes = rest.substr(0, 2) === '//';
if (slashes && !(proto && hostlessProtocol[proto])) {
if (slashes && !(lowerProto && hostlessProtocol[lowerProto])) {
rest = rest.substr(2);
out.slashes = true;
out.href += '//';
}
}
if (!hostlessProtocol[proto] &&
(slashes || (proto && !slashedProtocol[proto]))) {
if (!hostlessProtocol[lowerProto] &&
(slashes || (lowerProto && !slashedProtocol[lowerProto]))) {
// there's a hostname.
// the first instance of /, ?, ;, or # ends the host.
// don't enforce full RFC correctness, just be unstupid about it.
@ -121,7 +125,7 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
}
// we've indicated that there is a hostname,
// so even if it's empty, it has to be present.
out.hostname = out.hostname || '';
out.hostname = (out.hostname) ? out.hostname.toLowerCase() : '';
// validate a little.
if (out.hostname.length > hostnameMaxLen) {
@ -136,11 +140,13 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
}
}
}
out.host = ((out.auth)?out.auth +'@':'') + (out.hostname||'') + ((out.port)?':'+out.port:'');
out.href += out.host;
}
// now rest is set to the post-host stuff.
// chop off any delim chars.
if (!unsafeProtocol[proto]) {
if (!unsafeProtocol[lowerProto]) {
var chop = rest.length;
for (var i = 0, l = delims.length; i < l; i++) {
var c = rest.indexOf(delims[i]);
@ -149,6 +155,7 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
}
}
rest = rest.substr(0, chop);
out.href += rest;
}
@ -173,7 +180,7 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
out.query = {};
}
if (rest) out.pathname = rest;
if (slashedProtocol[proto] &&
if (slashedProtocol[lowerProto] &&
out.hostname && !out.pathname) {
out.pathname = '/';
}

39
test/simple/test-url.js

@ -32,6 +32,39 @@ var parseTests = {
'href': '//some_path',
'pathname': '//some_path'
},
'HTTP://www.example.com/' : {
'href': 'http://www.example.com/',
'protocol': 'http:',
'host': 'www.example.com',
'hostname': 'www.example.com',
'pathname': '/'
},
'http://www.ExAmPlE.com/' : {
'href': 'http://www.example.com/',
'protocol': 'http:',
'host': 'www.example.com',
'hostname': 'www.example.com',
'pathname': '/'
},
'http://user:pw@www.ExAmPlE.com/' : {
'href': 'http://user:pw@www.example.com/',
'protocol': 'http:',
'auth': 'user:pw',
'host': 'user:pw@www.example.com',
'hostname': 'www.example.com',
'pathname': '/'
},
'http://USER:PW@www.ExAmPlE.com/' : {
'href': 'http://USER:PW@www.example.com/',
'protocol': 'http:',
'auth': 'USER:PW',
'host': 'USER:PW@www.example.com',
'hostname': 'www.example.com',
'pathname': '/'
},
'http://www.narwhaljs.org/blog/categories?id=news' : {
'href': 'http://www.narwhaljs.org/blog/categories?id=news',
'protocol': 'http:',
@ -154,11 +187,11 @@ for (var u in parseTests) {
'parse(' + u + ').' + i + ' == ' + e + '\nactual: ' + a);
}
var expected = u,
var expected = parseTests[u].href,
actual = url.format(parseTests[u]);
assert.equal(expected, actual,
'format(' + u + ') == ' + u + '\nactual:' + actual);
'format(' + u + ') == ' + expected + '\nactual:' + actual);
}
var parseTestsWithQueryString = {
@ -171,7 +204,7 @@ var parseTestsWithQueryString = {
},
'pathname': '/foo/bar'
},
'http://example.com' : {
'http://example.com/' : {
'href': 'http://example.com/',
'protocol': 'http:',
'slashes': true,

Loading…
Cancel
Save