@ -24,25 +24,40 @@ exports.resolve = urlResolve;
exports . resolveObject = urlResolveObject ;
exports . format = urlFormat ;
// Reference: RFC 3986, RFC 1808, RFC 2396
// define these here so at least they only have to be
// compiled once on the first module load.
var protocolPattern = /^([a-z0-9]+:)/ ,
var protocolPattern = /^([a-z0-9]+:)/i ,
portPattern = /:[0-9]+$/ ,
delims = [ '<' , '>' , '"' , '\'' , '`' , /\s/ ] ,
// RFC 2396: characters reserved for delimiting URLs.
delims = [ '<' , '>' , '"' , '`' , ' ' , '\r' , '\n' , '\t' ] ,
// RFC 2396: characters not allowed for various reasons.
unwise = [ '{' , '}' , '|' , '\\' , '^' , '~' , '[' , ']' , '`' ] . concat ( delims ) ,
nonHostChars = [ '/' , '?' , ';' , '#' ] . concat ( unwise ) ,
// Allowed by RFCs, but cause of XSS attacks. Always escape these.
autoEscape = [ '\'' ] ,
// Characters that are never ever allowed in a hostname.
// Note that any invalid chars are also handled, but these
// are the ones that are *expected* to be seen, so we fast-path
// them.
nonHostChars = [ '%' , '/' , '?' , ';' , '#' ]
. concat ( unwise ) . concat ( autoEscape ) ,
hostnameMaxLen = 255 ,
hostnamePartPattern = /^[a-z0-9][a-z0-9A-Z-]{0,62}$/ ,
hostnamePartPattern = /^[a-zA-Z0-9][a-z0-9A-Z-]{0,62}$/ ,
hostnamePartStart = /^([a-zA-Z0-9][a-z0-9A-Z-]{0,62})(.*)$/ ,
// protocols that can allow "unsafe" and "unwise" chars.
unsafeProtocol = {
'javascript' : true ,
'javascript:' : true
} ,
// protocols that never have a hostname.
hostlessProtocol = {
'javascript' : true ,
'javascript:' : true ,
'file' : true ,
'file:' : true
} ,
// protocols that always have a path component.
pathedProtocol = {
'http' : true ,
'https' : true ,
@ -54,6 +69,7 @@ var protocolPattern = /^([a-z0-9]+:)/,
'gopher:' : true ,
'file:' : true
} ,
// protocols that always contain a // bit.
slashedProtocol = {
'http' : true ,
'https' : true ,
@ -74,10 +90,19 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
var out = { } ,
rest = url ;
// cut off any delimiters.
// This is to support parse stuff like "<http://foo.com>"
for ( var i = 0 , l = rest . length ; i < l ; i ++ ) {
if ( delims . indexOf ( rest . charAt ( i ) ) === - 1 ) break ;
}
if ( i !== 0 ) rest = rest . substr ( i ) ;
var proto = protocolPattern . exec ( rest ) ;
if ( proto ) {
proto = proto [ 0 ] ;
out . protocol = proto ;
var lowerProto = proto . toLowerCase ( ) ;
out . protocol = lowerProto ;
rest = rest . substr ( proto . length ) ;
}
@ -119,6 +144,7 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
var key = keys [ i ] ;
out [ key ] = p [ key ] ;
}
// we've indicated that there is a hostname,
// so even if it's empty, it has to be present.
out . hostname = out . hostname || '' ;
@ -130,17 +156,49 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
var hostparts = out . hostname . split ( /\./ ) ;
for ( var i = 0 , l = hostparts . length ; i < l ; i ++ ) {
var part = hostparts [ i ] ;
if ( ! part ) continue ;
if ( ! part . match ( hostnamePartPattern ) ) {
out . hostname = '' ;
var validParts = hostparts . slice ( 0 , i ) ;
var notHost = hostparts . slice ( i + 1 ) ;
var bit = part . match ( hostnamePartStart ) ;
if ( bit ) {
validParts . push ( bit [ 1 ] ) ;
notHost . unshift ( bit [ 2 ] ) ;
}
if ( notHost . length ) {
rest = '/' + notHost . join ( '.' ) + rest
}
out . hostname = validParts . join ( '.' ) ;
break ;
}
}
}
// hostnames are always lower case.
out . hostname = out . hostname . toLowerCase ( ) ;
out . host = ( ( out . auth ) ? out . auth + '@' : '' ) +
( out . hostname || '' ) +
( ( out . port ) ? ':' + out . port : '' ) ;
out . href += out . host ;
}
// now rest is set to the post-host stuff.
// chop off any delim chars.
if ( ! unsafeProtocol [ proto ] ) {
if ( ! unsafeProtocol [ lowerProto ] ) {
// First, make 100% sure that any "autoEscape" chars get
// escaped, even if encodeURIComponent doesn't think they
// need to be.
for ( var i = 0 , l = autoEscape . length ; i < l ; i ++ ) {
var ae = autoEscape [ i ] ;
var esc = encodeURIComponent ( ae ) ;
if ( esc === ae ) {
esc = escape ( ae ) ;
}
rest = rest . split ( ae ) . join ( esc ) ;
}
// Now make sure that delims never appear in a url.
var chop = rest . length ;
for ( var i = 0 , l = delims . length ; i < l ; i ++ ) {
var c = rest . indexOf ( delims [ i ] ) ;