From 535fec83ea890775c31cbe041fc19db9c4b7ff1f Mon Sep 17 00:00:00 2001 From: Ben Noordhuis Date: Tue, 9 Dec 2014 15:41:35 +0100 Subject: [PATCH] src: fix unaligned access in ucs2 string encoder Seen with g++ 4.9.2 on x86_64 Linux: a SIGSEGV is generated when the input to v8::String::NewFromTwoByte() is not suitably aligned. g++ 4.9.2 emits SSE instructions for copy loops. That requires aligned input but that was something StringBytes::Encode() did not enforce until now. Make a properly aligned copy before handing off the input to V8. We could, as an optimization, check that the pointer is aligned on a two-byte boundary but that is technically still UB; pointers-to-char are allowed to alias other pointers but the reverse is not true: a pointer-to-uint16_t that aliases a pointer-to-char is in violation of the pointer aliasing rules. See https://code.google.com/p/v8/issues/detail?id=3694 Fixes segfaulting test simple/test-stream2-writable. PR-URL: https://github.com/iojs/io.js/pull/127 Reviewed-by: Trevor Norris --- src/string_bytes.cc | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/string_bytes.cc b/src/string_bytes.cc index 3e79ce3990..f9892405f3 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -748,28 +748,28 @@ Local StringBytes::Encode(Isolate* isolate, } case UCS2: { - const uint16_t* out = reinterpret_cast(buf); - uint16_t* dst = nullptr; - if (IsBigEndian()) { - // Node's "ucs2" encoding expects LE character data inside a - // Buffer, so we need to reorder on BE platforms. See - // http://nodejs.org/api/buffer.html regarding Node's "ucs2" - // encoding specification - dst = new uint16_t[buflen / 2]; - for (size_t i = 0; i < buflen / 2; i++) { - dst[i] = (out[i] << 8) | (out[i] >> 8); - } - out = dst; + // Node's "ucs2" encoding expects LE character data inside a + // Buffer, so we need to reorder on BE platforms. See + // http://nodejs.org/api/buffer.html regarding Node's "ucs2" + // encoding specification. Note that we have to make a copy + // to avoid pointer aliasing and unaligned access, something + // that we can't guarantee by just reinterpret_casting |buf|. + size_t srclen = buflen / 2; + uint16_t* src = new uint16_t[srclen]; + for (size_t i = 0, k = 0; i < srclen; i += 1, k += 2) { + const uint8_t lo = static_cast(buf[k + 0]); + const uint8_t hi = static_cast(buf[k + 1]); + src[i] = lo | hi << 8; } - if (buflen < EXTERN_APEX) + if (buflen < EXTERN_APEX) { val = String::NewFromTwoByte(isolate, - out, + src, String::kNormalString, - buflen / 2); - else - val = ExternTwoByteString::NewFromCopy(isolate, out, buflen / 2); - if (dst) - delete[] dst; + srclen); + } else { + val = ExternTwoByteString::NewFromCopy(isolate, src, srclen); + } + delete[] src; break; }