From 0f58d3cbefbe744a8e5f9383c08eefc2991b8d1c Mon Sep 17 00:00:00 2001 From: Daijiro Wachi Date: Mon, 1 May 2017 17:26:21 +0200 Subject: [PATCH] src: support domains with empty labels Follow the spec of domainToASCII/domainToUnicode in whatwg, and synchronise WPT url test data. Refs: https://github.com/w3c/web-platform-tests/pull/5397 PR-URL: https://github.com/nodejs/node/pull/12707 Reviewed-By: James M Snell Reviewed-By: Timothy Gu --- src/node_i18n.cc | 17 ++++++++++++++ test/fixtures/url-idna.js | 19 ++++++--------- test/fixtures/url-tests.js | 48 +++++++++++++++++++++++++++++++++++++- 3 files changed, 71 insertions(+), 13 deletions(-) diff --git a/src/node_i18n.cc b/src/node_i18n.cc index 13fb77b15d..6d966bb117 100644 --- a/src/node_i18n.cc +++ b/src/node_i18n.cc @@ -461,6 +461,13 @@ int32_t ToUnicode(MaybeStackBuffer* buf, &status); } + // UTS #46's ToUnicode operation applies no validation of domain name length + // (nor a flag requesting it to do so, like VerifyDnsLength for ToASCII). For + // that reason, unlike ToASCII below, ICU4C correctly accepts long domain + // names. However, ICU4C still sets the EMPTY_LABEL error in contrary to UTS + // #46. Therefore, explicitly filters out that error here. + info.errors &= ~UIDNA_ERROR_EMPTY_LABEL; + if (U_FAILURE(status) || (!lenient && info.errors != 0)) { len = -1; buf->SetLength(0); @@ -500,6 +507,16 @@ int32_t ToASCII(MaybeStackBuffer* buf, &status); } + // The WHATWG URL "domain to ASCII" algorithm explicitly sets the + // VerifyDnsLength flag to false, which disables the domain name length + // verification step in ToASCII (as specified by UTS #46). Unfortunately, + // ICU4C's IDNA module does not support disabling this flag through `options`, + // so just filter out the errors that may be caused by the verification step + // afterwards. + info.errors &= ~UIDNA_ERROR_EMPTY_LABEL; + info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG; + info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; + if (U_FAILURE(status) || (!lenient && info.errors != 0)) { len = -1; buf->SetLength(0); diff --git a/test/fixtures/url-idna.js b/test/fixtures/url-idna.js index af169bb04a..f105adeed1 100644 --- a/test/fixtures/url-idna.js +++ b/test/fixtures/url-idna.js @@ -182,23 +182,18 @@ module.exports = { ascii: 'xn--vitnam-jk8b.icom.museum', unicode: 'việtnam.icom.museum' }, - // long URL - { - ascii: `${`${'a'.repeat(63)}.`.repeat(3)}com`, - unicode: `${`${'a'.repeat(63)}.`.repeat(3)}com` - } - ], - invalid: [ // long label { - url: `${'a'.repeat(64)}.com`, - mode: 'ascii' + ascii: `${'a'.repeat(64)}.com`, + unicode: `${'a'.repeat(64)}.com`, }, // long URL { - url: `${`${'a'.repeat(63)}.`.repeat(4)}com`, - mode: 'ascii' - }, + ascii: `${`${'a'.repeat(64)}.`.repeat(4)}com`, + unicode: `${`${'a'.repeat(64)}.`.repeat(4)}com` + } + ], + invalid: [ // invalid character { url: '\ufffd.com', diff --git a/test/fixtures/url-tests.js b/test/fixtures/url-tests.js index 69d289bd06..355388eaaf 100644 --- a/test/fixtures/url-tests.js +++ b/test/fixtures/url-tests.js @@ -1,7 +1,7 @@ 'use strict'; /* WPT Refs: - https://github.com/w3c/web-platform-tests/blob/3eff1bd/url/urltestdata.json + https://github.com/w3c/web-platform-tests/blob/3afae94/url/urltestdata.json License: http://www.w3.org/Consortium/Legal/2008/04-testsuite-copyright.html */ module.exports = @@ -3789,6 +3789,52 @@ module.exports = "search": "", "hash": "" }, + "Domains with empty labels", + { + "input": "http://./", + "base": "about:blank", + "href": "http://./", + "origin": "http://.", + "protocol": "http:", + "username": "", + "password": "", + "host": ".", + "hostname": ".", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://../", + "base": "about:blank", + "href": "http://../", + "origin": "http://..", + "protocol": "http:", + "username": "", + "password": "", + "host": "..", + "hostname": "..", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://0..0x300/", + "base": "about:blank", + "href": "http://0..0x300/", + "origin": "http://0..0x300", + "protocol": "http:", + "username": "", + "password": "", + "host": "0..0x300", + "hostname": "0..0x300", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, "Broken IPv6", { "input": "http://[www.google.com]/",