From 56fde66c46653e5c0fbc6e8960d8a013af35f42b Mon Sep 17 00:00:00 2001 From: Ben Noordhuis Date: Wed, 10 Dec 2014 17:33:56 +0100 Subject: [PATCH] src: redo unaligned access workaround Introduce two-byte overloads of node::Encode() and StringBytes::Encode() that ensure that the input is suitably aligned. Revisits commit 535fec8 from yesterday. --- src/node.cc | 12 +++++---- src/node.h | 18 +++++++++++-- src/node_buffer.cc | 34 +++++++++++++++++++++++++ src/node_crypto.cc | 4 +-- src/string_bytes.cc | 62 ++++++++++++++++++++++++++------------------- src/string_bytes.h | 5 ++++ 6 files changed, 100 insertions(+), 35 deletions(-) diff --git a/src/node.cc b/src/node.cc index a48e2288bf..e2a506680b 100644 --- a/src/node.cc +++ b/src/node.cc @@ -1219,13 +1219,15 @@ enum encoding ParseEncoding(Isolate* isolate, } Local Encode(Isolate* isolate, - const void* buf, + const char* buf, size_t len, enum encoding encoding) { - return StringBytes::Encode(isolate, - static_cast(buf), - len, - encoding); + CHECK_NE(encoding, UCS2); + return StringBytes::Encode(isolate, buf, len, encoding); +} + +Local Encode(Isolate* isolate, const uint16_t* buf, size_t len) { + return StringBytes::Encode(isolate, buf, len); } // Returns -1 if the handle was not valid for decoding diff --git a/src/node.h b/src/node.h index afca6bf0d3..80bb20d40a 100644 --- a/src/node.h +++ b/src/node.h @@ -142,6 +142,7 @@ NODE_EXTERN v8::Handle MakeCallback( #endif #include +#include #ifndef NODE_STRINGIFY #define NODE_STRINGIFY(n) NODE_STRINGIFY_HELPER(n) @@ -267,16 +268,29 @@ NODE_DEPRECATED("Use FatalException(isolate, ...)", return FatalException(v8::Isolate::GetCurrent(), try_catch); }) +// Don't call with encoding=UCS2. NODE_EXTERN v8::Local Encode(v8::Isolate* isolate, - const void* buf, + const char* buf, size_t len, enum encoding encoding = BINARY); + +NODE_EXTERN v8::Local Encode(v8::Isolate* isolate, + const uint16_t* buf, + size_t len); + NODE_DEPRECATED("Use Encode(isolate, ...)", inline v8::Local Encode( const void* buf, size_t len, enum encoding encoding = BINARY) { - return Encode(v8::Isolate::GetCurrent(), buf, len, encoding); + v8::Isolate* isolate = v8::Isolate::GetCurrent(); + if (encoding == UCS2) { + assert(reinterpret_cast(buf) % sizeof(uint16_t) == 0 && + "UCS2 buffer must be aligned on two-byte boundary."); + const uint16_t* that = static_cast(buf); + return Encode(isolate, that, len / sizeof(*that)); + } + return Encode(isolate, static_cast(buf), len, encoding); }) // Returns -1 if the handle was not valid for decoding diff --git a/src/node_buffer.cc b/src/node_buffer.cc index 4c1caeb6ec..17477c01fa 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -264,6 +264,40 @@ void StringSlice(const FunctionCallbackInfo& args) { } +template <> +void StringSlice(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + + ARGS_THIS(args.This()) + SLICE_START_END(args[0], args[1], obj_length) + length /= 2; + + const char* data = obj_data + start; + const uint16_t* buf; + bool release = false; + + if (reinterpret_cast(data) % sizeof(*buf) == 0) { + buf = reinterpret_cast(data); + } else { + // Make a copy to avoid unaligned accesses in v8::String::NewFromTwoByte(). + uint16_t* copy = new uint16_t[length]; + for (size_t i = 0, k = 0; i < length; i += 1, k += 2) { + // Assumes that the input is little endian. + const uint8_t lo = static_cast(data[k + 0]); + const uint8_t hi = static_cast(data[k + 1]); + copy[i] = lo | hi << 8; + } + buf = copy; + release = true; + } + + args.GetReturnValue().Set(StringBytes::Encode(env->isolate(), buf, length)); + + if (release) + delete[] buf; +} + + void BinarySlice(const FunctionCallbackInfo& args) { StringSlice(args); } diff --git a/src/node_crypto.cc b/src/node_crypto.cc index 918c2b9089..84625ac8c0 100644 --- a/src/node_crypto.cc +++ b/src/node_crypto.cc @@ -1447,8 +1447,8 @@ void SSLWrap::GetSession(const FunctionCallbackInfo& args) { int slen = i2d_SSL_SESSION(sess, nullptr); CHECK_GT(slen, 0); - unsigned char* sbuf = new unsigned char[slen]; - unsigned char* p = sbuf; + char* sbuf = new char[slen]; + unsigned char* p = reinterpret_cast(sbuf); i2d_SSL_SESSION(sess, &p); args.GetReturnValue().Set(Encode(env->isolate(), sbuf, slen, BUFFER)); delete[] sbuf; diff --git a/src/string_bytes.cc b/src/string_bytes.cc index f9892405f3..e646087b9d 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -690,6 +690,7 @@ Local StringBytes::Encode(Isolate* isolate, enum encoding encoding) { EscapableHandleScope scope(isolate); + CHECK_NE(encoding, UCS2); CHECK_LE(buflen, Buffer::kMaxLength); if (!buflen && encoding != BUFFER) return scope.Escape(String::Empty(isolate)); @@ -747,32 +748,6 @@ Local StringBytes::Encode(Isolate* isolate, break; } - case UCS2: { - // Node's "ucs2" encoding expects LE character data inside a - // Buffer, so we need to reorder on BE platforms. See - // http://nodejs.org/api/buffer.html regarding Node's "ucs2" - // encoding specification. Note that we have to make a copy - // to avoid pointer aliasing and unaligned access, something - // that we can't guarantee by just reinterpret_casting |buf|. - size_t srclen = buflen / 2; - uint16_t* src = new uint16_t[srclen]; - for (size_t i = 0, k = 0; i < srclen; i += 1, k += 2) { - const uint8_t lo = static_cast(buf[k + 0]); - const uint8_t hi = static_cast(buf[k + 1]); - src[i] = lo | hi << 8; - } - if (buflen < EXTERN_APEX) { - val = String::NewFromTwoByte(isolate, - src, - String::kNormalString, - srclen); - } else { - val = ExternTwoByteString::NewFromCopy(isolate, src, srclen); - } - delete[] src; - break; - } - case HEX: { size_t dlen = buflen * 2; char* dst = new char[dlen]; @@ -796,4 +771,39 @@ Local StringBytes::Encode(Isolate* isolate, return scope.Escape(val); } + +Local StringBytes::Encode(Isolate* isolate, + const uint16_t* buf, + size_t buflen) { + const uint16_t* src = buf; + + // Node's "ucs2" encoding expects LE character data inside a + // Buffer, so we need to reorder on BE platforms. See + // http://nodejs.org/api/buffer.html regarding Node's "ucs2" + // encoding specification. + if (IsBigEndian()) { + // Inefficient, see StringSlice() in src/node_buffer.cc; + // this is potentially the second copy of the actual input. + uint16_t* copy = new uint16_t[buflen]; + for (size_t i = 0; i < buflen; i += 1) + copy[i] = buf[i] << 8 | buf[i] >> 8; + src = copy; + } + + Local val; + if (buflen < EXTERN_APEX) { + val = String::NewFromTwoByte(isolate, + src, + String::kNormalString, + buflen); + } else { + val = ExternTwoByteString::NewFromCopy(isolate, src, buflen); + } + + if (src != buf) + delete[] src; + + return val; +} + } // namespace node diff --git a/src/string_bytes.h b/src/string_bytes.h index 10fee30913..a67bd45e49 100644 --- a/src/string_bytes.h +++ b/src/string_bytes.h @@ -71,11 +71,16 @@ class StringBytes { int* chars_written = nullptr); // Take the bytes in the src, and turn it into a Buffer or String. + // Don't call with encoding=UCS2. static v8::Local Encode(v8::Isolate* isolate, const char* buf, size_t buflen, enum encoding encoding); + static v8::Local Encode(v8::Isolate* isolate, + const uint16_t* buf, + size_t buflen); + // Deprecated legacy interface NODE_DEPRECATED("Use IsValidString(isolate, ...)",