diff --git a/src/node_buffer.cc b/src/node_buffer.cc index 467a6e8847..d7c27295e9 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -12,7 +12,6 @@ #include #include -#include #define BUFFER_ID 0xB0E4 @@ -49,38 +48,6 @@ THROW_AND_RETURN_IF_OOB(end <= end_max); \ size_t length = end - start; -#if defined(__GNUC__) || defined(__clang__) -#define BSWAP_INTRINSIC_2(x) __builtin_bswap16(x) -#define BSWAP_INTRINSIC_4(x) __builtin_bswap32(x) -#define BSWAP_INTRINSIC_8(x) __builtin_bswap64(x) -#elif defined(__linux__) -#include -#define BSWAP_INTRINSIC_2(x) bswap_16(x) -#define BSWAP_INTRINSIC_4(x) bswap_32(x) -#define BSWAP_INTRINSIC_8(x) bswap_64(x) -#elif defined(_MSC_VER) -#include -#define BSWAP_INTRINSIC_2(x) _byteswap_ushort(x); -#define BSWAP_INTRINSIC_4(x) _byteswap_ulong(x); -#define BSWAP_INTRINSIC_8(x) _byteswap_uint64(x); -#else -#define BSWAP_INTRINSIC_2(x) ((x) << 8) | ((x) >> 8) -#define BSWAP_INTRINSIC_4(x) \ - (((x) & 0xFF) << 24) | \ - (((x) & 0xFF00) << 8) | \ - (((x) >> 8) & 0xFF00) | \ - (((x) >> 24) & 0xFF) -#define BSWAP_INTRINSIC_8(x) \ - (((x) & 0xFF00000000000000ull) >> 56) | \ - (((x) & 0x00FF000000000000ull) >> 40) | \ - (((x) & 0x0000FF0000000000ull) >> 24) | \ - (((x) & 0x000000FF00000000ull) >> 8) | \ - (((x) & 0x00000000FF000000ull) << 8) | \ - (((x) & 0x0000000000FF0000ull) << 24) | \ - (((x) & 0x000000000000FF00ull) << 40) | \ - (((x) & 0x00000000000000FFull) << 56) -#endif - namespace node { // if true, all Buffer and SlowBuffer instances will automatically zero-fill @@ -1204,23 +1171,7 @@ void Swap16(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]); SPREAD_ARG(args[0], ts_obj); - - CHECK_EQ(ts_obj_length % 2, 0); - - int align = reinterpret_cast(ts_obj_data) % sizeof(uint16_t); - - if (align == 0) { - uint16_t* data16 = reinterpret_cast(ts_obj_data); - size_t len16 = ts_obj_length / 2; - for (size_t i = 0; i < len16; i++) { - data16[i] = BSWAP_INTRINSIC_2(data16[i]); - } - } else { - for (size_t i = 0; i < ts_obj_length; i += 2) { - std::swap(ts_obj_data[i], ts_obj_data[i + 1]); - } - } - + SwapBytes16(ts_obj_data, ts_obj_length); args.GetReturnValue().Set(args[0]); } @@ -1229,24 +1180,7 @@ void Swap32(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]); SPREAD_ARG(args[0], ts_obj); - - CHECK_EQ(ts_obj_length % 4, 0); - - int align = reinterpret_cast(ts_obj_data) % sizeof(uint32_t); - - if (align == 0) { - uint32_t* data32 = reinterpret_cast(ts_obj_data); - size_t len32 = ts_obj_length / 4; - for (size_t i = 0; i < len32; i++) { - data32[i] = BSWAP_INTRINSIC_4(data32[i]); - } - } else { - for (size_t i = 0; i < ts_obj_length; i += 4) { - std::swap(ts_obj_data[i], ts_obj_data[i + 3]); - std::swap(ts_obj_data[i + 1], ts_obj_data[i + 2]); - } - } - + SwapBytes32(ts_obj_data, ts_obj_length); args.GetReturnValue().Set(args[0]); } @@ -1255,26 +1189,7 @@ void Swap64(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]); SPREAD_ARG(args[0], ts_obj); - - CHECK_EQ(ts_obj_length % 8, 0); - - int align = reinterpret_cast(ts_obj_data) % sizeof(uint64_t); - - if (align == 0) { - uint64_t* data64 = reinterpret_cast(ts_obj_data); - size_t len32 = ts_obj_length / 8; - for (size_t i = 0; i < len32; i++) { - data64[i] = BSWAP_INTRINSIC_8(data64[i]); - } - } else { - for (size_t i = 0; i < ts_obj_length; i += 8) { - std::swap(ts_obj_data[i], ts_obj_data[i + 7]); - std::swap(ts_obj_data[i + 1], ts_obj_data[i + 6]); - std::swap(ts_obj_data[i + 2], ts_obj_data[i + 5]); - std::swap(ts_obj_data[i + 3], ts_obj_data[i + 4]); - } - } - + SwapBytes64(ts_obj_data, ts_obj_length); args.GetReturnValue().Set(args[0]); } diff --git a/src/string_bytes.cc b/src/string_bytes.cc index d9e8b97114..882ca6e3e8 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -309,27 +309,13 @@ size_t StringBytes::Write(Isolate* isolate, if (chars_written != nullptr) *chars_written = nchars; - if (!IsBigEndian()) - break; - // Node's "ucs2" encoding wants LE character data stored in // the Buffer, so we need to reorder on BE platforms. See // http://nodejs.org/api/buffer.html regarding Node's "ucs2" // encoding specification + if (IsBigEndian()) + SwapBytes16(buf, nbytes); - const bool is_aligned = - reinterpret_cast(buf) % sizeof(uint16_t); - if (is_aligned) { - uint16_t* const dst = reinterpret_cast(buf); - SwapBytes(dst, dst, nchars); - } - - ASSERT_EQ(sizeof(uint16_t), 2); - for (size_t i = 0; i < nchars; i++) { - char tmp = buf[i * 2]; - buf[i * 2] = buf[i * 2 + 1]; - buf[i * 2 + 1] = tmp; - } break; } @@ -705,17 +691,19 @@ Local StringBytes::Encode(Isolate* isolate, Local StringBytes::Encode(Isolate* isolate, const uint16_t* buf, size_t buflen) { - Local val; + // Node's "ucs2" encoding expects LE character data inside a + // Buffer, so we need to reorder on BE platforms. See + // http://nodejs.org/api/buffer.html regarding Node's "ucs2" + // encoding specification std::vector dst; if (IsBigEndian()) { - // Node's "ucs2" encoding expects LE character data inside a - // Buffer, so we need to reorder on BE platforms. See - // http://nodejs.org/api/buffer.html regarding Node's "ucs2" - // encoding specification - dst.resize(buflen); - SwapBytes(&dst[0], buf, buflen); + dst.assign(buf, buf + buflen); + size_t nbytes = buflen * sizeof(dst[0]); + SwapBytes16(reinterpret_cast(&dst[0]), nbytes); buf = &dst[0]; } + + Local val; if (buflen < EXTERN_APEX) { val = String::NewFromTwoByte(isolate, buf, diff --git a/src/util-inl.h b/src/util-inl.h index 51adb81692..5ffe5b857f 100644 --- a/src/util-inl.h +++ b/src/util-inl.h @@ -4,6 +4,30 @@ #if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS #include "util.h" +#include + +#if defined(_MSC_VER) +#include +#define BSWAP_2(x) _byteswap_ushort(x) +#define BSWAP_4(x) _byteswap_ulong(x) +#define BSWAP_8(x) _byteswap_uint64(x) +#else +#define BSWAP_2(x) ((x) << 8) | ((x) >> 8) +#define BSWAP_4(x) \ + (((x) & 0xFF) << 24) | \ + (((x) & 0xFF00) << 8) | \ + (((x) >> 8) & 0xFF00) | \ + (((x) >> 24) & 0xFF) +#define BSWAP_8(x) \ + (((x) & 0xFF00000000000000ull) >> 56) | \ + (((x) & 0x00FF000000000000ull) >> 40) | \ + (((x) & 0x0000FF0000000000ull) >> 24) | \ + (((x) & 0x000000FF00000000ull) >> 8) | \ + (((x) & 0x00000000FF000000ull) << 8) | \ + (((x) & 0x0000000000FF0000ull) << 24) | \ + (((x) & 0x000000000000FF00ull) << 40) | \ + (((x) & 0x00000000000000FFull) << 56) +#endif namespace node { @@ -200,9 +224,76 @@ TypeName* Unwrap(v8::Local object) { return static_cast(pointer); } -void SwapBytes(uint16_t* dst, const uint16_t* src, size_t buflen) { - for (size_t i = 0; i < buflen; i += 1) - dst[i] = (src[i] << 8) | (src[i] >> 8); +void SwapBytes16(char* data, size_t nbytes) { + CHECK_EQ(nbytes % 2, 0); + +#if defined(_MSC_VER) + int align = reinterpret_cast(data) % sizeof(uint16_t); + if (align == 0) { + // MSVC has no strict aliasing, and is able to highly optimize this case. + uint16_t* data16 = reinterpret_cast(data); + size_t len16 = nbytes / sizeof(*data16); + for (size_t i = 0; i < len16; i++) { + data16[i] = BSWAP_2(data16[i]); + } + return; + } +#endif + + uint16_t temp; + for (size_t i = 0; i < nbytes; i += sizeof(temp)) { + memcpy(&temp, &data[i], sizeof(temp)); + temp = BSWAP_2(temp); + memcpy(&data[i], &temp, sizeof(temp)); + } +} + +void SwapBytes32(char* data, size_t nbytes) { + CHECK_EQ(nbytes % 4, 0); + +#if defined(_MSC_VER) + int align = reinterpret_cast(data) % sizeof(uint32_t); + // MSVC has no strict aliasing, and is able to highly optimize this case. + if (align == 0) { + uint32_t* data32 = reinterpret_cast(data); + size_t len32 = nbytes / sizeof(*data32); + for (size_t i = 0; i < len32; i++) { + data32[i] = BSWAP_4(data32[i]); + } + return; + } +#endif + + uint32_t temp; + for (size_t i = 0; i < nbytes; i += sizeof(temp)) { + memcpy(&temp, &data[i], sizeof(temp)); + temp = BSWAP_4(temp); + memcpy(&data[i], &temp, sizeof(temp)); + } +} + +void SwapBytes64(char* data, size_t nbytes) { + CHECK_EQ(nbytes % 8, 0); + +#if defined(_MSC_VER) + int align = reinterpret_cast(data) % sizeof(uint64_t); + if (align == 0) { + // MSVC has no strict aliasing, and is able to highly optimize this case. + uint64_t* data64 = reinterpret_cast(data); + size_t len64 = nbytes / sizeof(*data64); + for (size_t i = 0; i < len64; i++) { + data64[i] = BSWAP_8(data64[i]); + } + return; + } +#endif + + uint64_t temp; + for (size_t i = 0; i < nbytes; i += sizeof(temp)) { + memcpy(&temp, &data[i], sizeof(temp)); + temp = BSWAP_8(temp); + memcpy(&data[i], &temp, sizeof(temp)); + } } char ToLower(char c) { diff --git a/src/util.h b/src/util.h index 25f2eb0178..e2f9df02bc 100644 --- a/src/util.h +++ b/src/util.h @@ -254,7 +254,11 @@ inline void ClearWrap(v8::Local object); template inline TypeName* Unwrap(v8::Local object); -inline void SwapBytes(uint16_t* dst, const uint16_t* src, size_t buflen); +// Swaps bytes in place. nbytes is the number of bytes to swap and must be a +// multiple of the word size (checked by function). +inline void SwapBytes16(char* data, size_t nbytes); +inline void SwapBytes32(char* data, size_t nbytes); +inline void SwapBytes64(char* data, size_t nbytes); // tolower() is locale-sensitive. Use ToLower() instead. inline char ToLower(char c); diff --git a/test/parallel/test-buffer-alloc.js b/test/parallel/test-buffer-alloc.js index 38e3f4763c..760ad2e37c 100644 --- a/test/parallel/test-buffer-alloc.js +++ b/test/parallel/test-buffer-alloc.js @@ -585,6 +585,12 @@ assert.strictEqual('', x.inspect()); assert.strictEqual(b.toString(encoding), 'あいうえお'); }); +['ucs2', 'ucs-2', 'utf16le', 'utf-16le'].forEach((encoding) => { + const b = Buffer.allocUnsafe(11); + b.write('あいうえお', 1, encoding); + assert.strictEqual(b.toString(encoding, 1), 'あいうえお'); +}); + { // latin1 encoding should write only one byte per character. const b = Buffer.from([0xde, 0xad, 0xbe, 0xef]);