From 9fc7283a403bb0dec096b76991226cba8e7b73c2 Mon Sep 17 00:00:00 2001 From: isaacs Date: Tue, 15 May 2012 19:06:15 -0700 Subject: [PATCH] Fix #3270 Escape url.parse delims Rather than omitting them. --- lib/url.js | 26 ++++------- test/simple/test-url.js | 96 ++++++++++++++++++++++++++++++++--------- 2 files changed, 84 insertions(+), 38 deletions(-) diff --git a/lib/url.js b/lib/url.js index 31eb536e19..50eb8b20f6 100644 --- a/lib/url.js +++ b/lib/url.js @@ -32,12 +32,16 @@ exports.format = urlFormat; // compiled once on the first module load. var protocolPattern = /^([a-z0-9.+-]+:)/i, portPattern = /:[0-9]*$/, + // RFC 2396: characters reserved for delimiting URLs. + // We actually just auto-escape these. delims = ['<', '>', '"', '`', ' ', '\r', '\n', '\t'], + // RFC 2396: characters not allowed for various reasons. unwise = ['{', '}', '|', '\\', '^', '~', '`'].concat(delims), + // Allowed by RFCs, but cause of XSS attacks. Always escape these. - autoEscape = ['\''], + autoEscape = ['\''].concat(delims), // Characters that are never ever allowed in a hostname. // Note that any invalid chars are also handled, but these // are the ones that are *expected* to be seen, so we fast-path @@ -95,13 +99,9 @@ function urlParse(url, parseQueryString, slashesDenoteHost) { var out = {}, rest = url; - // cut off any delimiters. - // This is to support parse stuff like "" - for (var i = 0, l = rest.length; i < l; i++) { - if (delims.indexOf(rest.charAt(i)) === -1) break; - } - if (i !== 0) rest = rest.substr(i); - + // trim before proceeding. + // This is to support parse stuff like " http://foo.com \n" + rest = rest.trim(); var proto = protocolPattern.exec(rest); if (proto) { @@ -271,16 +271,6 @@ function urlParse(url, parseQueryString, slashesDenoteHost) { } rest = rest.split(ae).join(esc); } - - // Now make sure that delims never appear in a url. - var chop = rest.length; - for (var i = 0, l = delims.length; i < l; i++) { - var c = rest.indexOf(delims[i]); - if (c !== -1) { - chop = Math.min(c, chop); - } - } - rest = rest.substr(0, chop); } diff --git a/test/simple/test-url.js b/test/simple/test-url.js index 7287d0f572..ff4a8449eb 100644 --- a/test/simple/test-url.js +++ b/test/simple/test-url.js @@ -33,6 +33,7 @@ var parseTests = { 'pathname': '//some_path', 'path': '//some_path' }, + 'HTTP://www.example.com/' : { 'href': 'http://www.example.com/', 'protocol': 'http:', @@ -42,6 +43,7 @@ var parseTests = { 'pathname': '/', 'path': '/' }, + 'http://www.ExAmPlE.com/' : { 'href': 'http://www.example.com/', 'protocol': 'http:', @@ -51,6 +53,7 @@ var parseTests = { 'pathname': '/', 'path': '/' }, + 'http://user:pw@www.ExAmPlE.com/' : { 'href': 'http://user:pw@www.example.com/', 'protocol': 'http:', @@ -61,6 +64,7 @@ var parseTests = { 'pathname': '/', 'path': '/' }, + 'http://USER:PW@www.ExAmPlE.com/' : { 'href': 'http://USER:PW@www.example.com/', 'protocol': 'http:', @@ -71,6 +75,7 @@ var parseTests = { 'pathname': '/', 'path': '/' }, + 'http://user@www.example.com/' : { 'href': 'http://user@www.example.com/', 'protocol': 'http:', @@ -81,6 +86,7 @@ var parseTests = { 'pathname': '/', 'path': '/' }, + 'http://user%3Apw@www.example.com/' : { 'href': 'http://user:pw@www.example.com/', 'protocol': 'http:', @@ -91,8 +97,9 @@ var parseTests = { 'pathname': '/', 'path': '/' }, + 'http://x.com/path?that\'s#all, folks' : { - 'href': 'http://x.com/path?that%27s#all,', + 'href': 'http://x.com/path?that%27s#all,%20folks', 'protocol': 'http:', 'slashes': true, 'host': 'x.com', @@ -100,9 +107,10 @@ var parseTests = { 'search': '?that%27s', 'query': 'that%27s', 'pathname': '/path', - 'hash': '#all,', + 'hash': '#all,%20folks', 'path': '/path?that%27s' }, + 'HTTP://X.COM/Y' : { 'href': 'http://x.com/Y', 'protocol': 'http:', @@ -112,9 +120,10 @@ var parseTests = { 'pathname': '/Y', 'path': '/Y' }, + // an unexpected invalid char in the hostname. 'HtTp://x.y.cOm*a/b/c?d=e#f gi' : { - 'href': 'http://x.y.com/*a/b/c?d=e#f', + 'href': 'http://x.y.com/*a/b/c?d=e#f%20g%3Ch%3Ei', 'protocol': 'http:', 'slashes': true, 'host': 'x.y.com', @@ -122,12 +131,13 @@ var parseTests = { 'pathname': '/*a/b/c', 'search': '?d=e', 'query': 'd=e', - 'hash': '#f', + 'hash': '#f%20g%3Ch%3Ei', 'path': '/*a/b/c?d=e' }, + // make sure that we don't accidentally lcast the path parts. 'HtTp://x.y.cOm*A/b/c?d=e#f gi' : { - 'href': 'http://x.y.com/*A/b/c?d=e#f', + 'href': 'http://x.y.com/*A/b/c?d=e#f%20g%3Ch%3Ei', 'protocol': 'http:', 'slashes': true, 'host': 'x.y.com', @@ -135,9 +145,10 @@ var parseTests = { 'pathname': '/*A/b/c', 'search': '?d=e', 'query': 'd=e', - 'hash': '#f', + 'hash': '#f%20g%3Ch%3Ei', 'path': '/*A/b/c?d=e' }, + 'http://x...y...#p': { 'href': 'http://x...y.../#p', 'protocol': 'http:', @@ -148,24 +159,23 @@ var parseTests = { 'pathname': '/', 'path': '/' }, + 'http://x/p/"quoted"': { - 'href': 'http://x/p/', + 'href': 'http://x/p/%22quoted%22', 'protocol': 'http:', 'slashes': true, 'host': 'x', 'hostname': 'x', - 'pathname': '/p/', - 'path': '/p/' + 'pathname': '/p/%22quoted%22', + 'path': '/p/%22quoted%22' }, + ' Is a URL!': { - 'href': 'http://goo.corn/bread', - 'protocol': 'http:', - 'slashes': true, - 'host': 'goo.corn', - 'hostname': 'goo.corn', - 'pathname': '/bread', - 'path': '/bread' + 'href': '%3Chttp://goo.corn/bread%3E%20Is%20a%20URL!', + 'pathname': '%3Chttp://goo.corn/bread%3E%20Is%20a%20URL!', + 'path': '%3Chttp://goo.corn/bread%3E%20Is%20a%20URL!' }, + 'http://www.narwhaljs.org/blog/categories?id=news' : { 'href': 'http://www.narwhaljs.org/blog/categories?id=news', 'protocol': 'http:', @@ -177,6 +187,7 @@ var parseTests = { 'pathname': '/blog/categories', 'path': '/blog/categories?id=news' }, + 'http://mt0.google.com/vt/lyrs=m@114&hl=en&src=api&x=2&y=2&z=3&s=' : { 'href': 'http://mt0.google.com/vt/lyrs=m@114&hl=en&src=api&x=2&y=2&z=3&s=', 'protocol': 'http:', @@ -186,6 +197,7 @@ var parseTests = { 'pathname': '/vt/lyrs=m@114&hl=en&src=api&x=2&y=2&z=3&s=', 'path': '/vt/lyrs=m@114&hl=en&src=api&x=2&y=2&z=3&s=' }, + 'http://mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=' : { 'href': 'http://mt0.google.com/vt/lyrs=m@114???&hl=en&src=api' + '&x=2&y=2&z=3&s=', @@ -198,6 +210,7 @@ var parseTests = { 'pathname': '/vt/lyrs=m@114', 'path': '/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=' }, + 'http://user:pass@mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=': { 'href': 'http://user:pass@mt0.google.com/vt/lyrs=m@114???' + @@ -212,6 +225,7 @@ var parseTests = { 'pathname': '/vt/lyrs=m@114', 'path': '/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=' }, + 'file:///etc/passwd' : { 'href': 'file:///etc/passwd', 'slashes': true, @@ -221,6 +235,7 @@ var parseTests = { 'host': '', 'path': '/etc/passwd' }, + 'file://localhost/etc/passwd' : { 'href': 'file://localhost/etc/passwd', 'protocol': 'file:', @@ -230,6 +245,7 @@ var parseTests = { 'host': 'localhost', 'path': '/etc/passwd' }, + 'file://foo/etc/passwd' : { 'href': 'file://foo/etc/passwd', 'protocol': 'file:', @@ -239,6 +255,7 @@ var parseTests = { 'host': 'foo', 'path': '/etc/passwd' }, + 'file:///etc/node/' : { 'href': 'file:///etc/node/', 'slashes': true, @@ -248,6 +265,7 @@ var parseTests = { 'host': '', 'path': '/etc/node/' }, + 'file://localhost/etc/node/' : { 'href': 'file://localhost/etc/node/', 'protocol': 'file:', @@ -257,6 +275,7 @@ var parseTests = { 'host': 'localhost', 'path': '/etc/node/' }, + 'file://foo/etc/node/' : { 'href': 'file://foo/etc/node/', 'protocol': 'file:', @@ -266,12 +285,14 @@ var parseTests = { 'host': 'foo', 'path': '/etc/node/' }, + 'http:/baz/../foo/bar' : { 'href': 'http:/baz/../foo/bar', 'protocol': 'http:', 'pathname': '/baz/../foo/bar', 'path': '/baz/../foo/bar' }, + 'http://user:pass@example.com:8000/foo/bar?baz=quux#frag' : { 'href': 'http://user:pass@example.com:8000/foo/bar?baz=quux#frag', 'protocol': 'http:', @@ -286,6 +307,7 @@ var parseTests = { 'pathname': '/foo/bar', 'path': '/foo/bar?baz=quux' }, + '//user:pass@example.com:8000/foo/bar?baz=quux#frag' : { 'href': '//user:pass@example.com:8000/foo/bar?baz=quux#frag', 'slashes': true, @@ -299,6 +321,7 @@ var parseTests = { 'pathname': '/foo/bar', 'path': '/foo/bar?baz=quux' }, + '/foo/bar?baz=quux#frag' : { 'href': '/foo/bar?baz=quux#frag', 'hash': '#frag', @@ -307,6 +330,7 @@ var parseTests = { 'pathname': '/foo/bar', 'path': '/foo/bar?baz=quux' }, + 'http:/foo/bar?baz=quux#frag' : { 'href': 'http:/foo/bar?baz=quux#frag', 'protocol': 'http:', @@ -316,6 +340,7 @@ var parseTests = { 'pathname': '/foo/bar', 'path': '/foo/bar?baz=quux' }, + 'mailto:foo@bar.com?subject=hello' : { 'href': 'mailto:foo@bar.com?subject=hello', 'protocol': 'mailto:', @@ -326,12 +351,14 @@ var parseTests = { 'query': 'subject=hello', 'path': '?subject=hello' }, + 'javascript:alert(\'hello\');' : { 'href': 'javascript:alert(\'hello\');', 'protocol': 'javascript:', 'pathname': 'alert(\'hello\');', 'path': 'alert(\'hello\');' }, + 'xmpp:isaacschlueter@jabber.org' : { 'href': 'xmpp:isaacschlueter@jabber.org', 'protocol': 'xmpp:', @@ -339,6 +366,7 @@ var parseTests = { 'auth': 'isaacschlueter', 'hostname': 'jabber.org' }, + 'http://atpass:foo%40bar@127.0.0.1:8080/path?search=foo#bar' : { 'href' : 'http://atpass:foo%40bar@127.0.0.1:8080/path?search=foo#bar', 'protocol' : 'http:', @@ -353,6 +381,7 @@ var parseTests = { 'hash' : '#bar', 'path': '/path?search=foo' }, + 'svn+ssh://foo/bar': { 'href': 'svn+ssh://foo/bar', 'host': 'foo', @@ -362,6 +391,7 @@ var parseTests = { 'path': '/bar', 'slashes': true }, + 'dash-test://foo/bar': { 'href': 'dash-test://foo/bar', 'host': 'foo', @@ -371,6 +401,7 @@ var parseTests = { 'path': '/bar', 'slashes': true }, + 'dash-test:foo/bar': { 'href': 'dash-test:foo/bar', 'host': 'foo', @@ -379,6 +410,7 @@ var parseTests = { 'pathname': '/bar', 'path': '/bar' }, + 'dot.test://foo/bar': { 'href': 'dot.test://foo/bar', 'host': 'foo', @@ -388,6 +420,7 @@ var parseTests = { 'path': '/bar', 'slashes': true }, + 'dot.test:foo/bar': { 'href': 'dot.test:foo/bar', 'host': 'foo', @@ -396,6 +429,7 @@ var parseTests = { 'pathname': '/bar', 'path': '/bar' }, + // IDNA tests 'http://www.日本語.com/' : { 'href': 'http://www.xn--wgv71a119e.com/', @@ -406,6 +440,7 @@ var parseTests = { 'pathname': '/', 'path': '/' }, + 'http://example.Bücher.com/' : { 'href': 'http://example.xn--bcher-kva.com/', 'protocol': 'http:', @@ -415,6 +450,7 @@ var parseTests = { 'pathname': '/', 'path': '/' }, + 'http://www.Äffchen.com/' : { 'href': 'http://www.xn--ffchen-9ta.com/', 'protocol': 'http:', @@ -424,8 +460,9 @@ var parseTests = { 'pathname': '/', 'path': '/' }, + 'http://www.Äffchen.cOm*A/b/c?d=e#f gi' : { - 'href': 'http://www.xn--ffchen-9ta.com/*A/b/c?d=e#f', + 'href': 'http://www.xn--ffchen-9ta.com/*A/b/c?d=e#f%20g%3Ch%3Ei', 'protocol': 'http:', 'slashes': true, 'host': 'www.xn--ffchen-9ta.com', @@ -433,9 +470,10 @@ var parseTests = { 'pathname': '/*A/b/c', 'search': '?d=e', 'query': 'd=e', - 'hash': '#f', + 'hash': '#f%20g%3Ch%3Ei', 'path': '/*A/b/c?d=e' }, + 'http://SÉLIER.COM/' : { 'href': 'http://xn--slier-bsa.com/', 'protocol': 'http:', @@ -445,6 +483,7 @@ var parseTests = { 'pathname': '/', 'path': '/' }, + 'http://ليهمابتكلموشعربي؟.ي؟/' : { 'href': 'http://xn--egbpdaj6bu4bxfgehfvwxn.xn--egb9f/', 'protocol': 'http:', @@ -454,6 +493,7 @@ var parseTests = { 'pathname': '/', 'path': '/' }, + 'http://➡.ws/➡' : { 'href': 'http://xn--hgi.ws/➡', 'protocol': 'http:', @@ -463,6 +503,7 @@ var parseTests = { 'pathname': '/➡', 'path': '/➡' }, + 'http://bucket_name.s3.amazonaws.com/image.jpg': { protocol: 'http:', 'slashes': true, @@ -473,6 +514,7 @@ var parseTests = { href: 'http://bucket_name.s3.amazonaws.com/image.jpg', 'path': '/image.jpg' }, + 'git+http://github.com/joyent/node.git': { protocol: 'git+http:', slashes: true, @@ -482,6 +524,7 @@ var parseTests = { path: '/joyent/node.git', href: 'git+http://github.com/joyent/node.git' }, + //if local1@domain1 is uses as a relative URL it may //be parse into auth@hostname, but here there is no //way to make it work in url.parse, I add the test to be explicit @@ -490,6 +533,7 @@ var parseTests = { 'path': 'local1@domain1', 'href': 'local1@domain1' }, + //While this may seem counter-intuitive, a browser will parse // as a path. 'www.example.com' : { @@ -497,12 +541,14 @@ var parseTests = { 'pathname': 'www.example.com', 'path': 'www.example.com' }, + // ipv6 support '[fe80::1]': { 'href': '[fe80::1]', 'pathname': '[fe80::1]', 'path': '[fe80::1]' }, + 'coap://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]': { 'protocol': 'coap:', 'slashes': true, @@ -512,6 +558,7 @@ var parseTests = { 'pathname': '/', 'path': '/' }, + 'coap://[1080:0:0:0:8:800:200C:417A]:61616/': { 'protocol': 'coap:', 'slashes': true, @@ -522,6 +569,7 @@ var parseTests = { 'pathname': '/', 'path': '/' }, + 'http://user:password@[3ffe:2a00:100:7031::1]:8080': { 'protocol': 'http:', 'slashes': true, @@ -533,6 +581,7 @@ var parseTests = { 'pathname': '/', 'path': '/' }, + 'coap://u:p@[::192.9.5.5]:61616/.well-known/r?n=Temperature': { 'protocol': 'coap:', 'slashes': true, @@ -546,6 +595,7 @@ var parseTests = { 'pathname': '/.well-known/r', 'path': '/.well-known/r?n=Temperature' }, + // empty port 'http://example.com:': { 'protocol': 'http:', @@ -556,6 +606,7 @@ var parseTests = { 'pathname': '/', 'path': '/' }, + 'http://example.com:/a/b.html': { 'protocol': 'http:', 'slashes': true, @@ -565,6 +616,7 @@ var parseTests = { 'pathname': '/a/b.html', 'path': '/a/b.html' }, + 'http://example.com:?a=b': { 'protocol': 'http:', 'slashes': true, @@ -576,6 +628,7 @@ var parseTests = { 'pathname': '/', 'path': '/?a=b' }, + 'http://example.com:#abc': { 'protocol': 'http:', 'slashes': true, @@ -586,6 +639,7 @@ var parseTests = { 'pathname': '/', 'path': '/' }, + 'http://[fe80::1]:/a/b?a=b#abc': { 'protocol': 'http:', 'slashes': true, @@ -602,9 +656,11 @@ var parseTests = { for (var u in parseTests) { var actual = url.parse(u), + spaced = url.parse(' \t ' + u + '\n\t'); expected = parseTests[u]; assert.deepEqual(actual, expected); + assert.deepEqual(spaced, expected); var expected = parseTests[u].href, actual = url.format(parseTests[u]); @@ -705,10 +761,10 @@ var formatTests = { 'pathname': '/' }, 'http://google.com" onload="alert(42)/' : { - 'href': 'http://google.com/', + 'href': 'http://google.com/%22%20onload=%22alert(42)/', 'protocol': 'http:', 'host': 'google.com', - 'pathname': '/' + 'pathname': '/%22%20onload=%22alert(42)/' }, 'http://a.com/a/b/c?s#h' : { 'href': 'http://a.com/a/b/c?s#h',