Browse Source

src: add BE support to StringBytes::Encode()

Versions of Node.js after v0.12 have relocated byte-swapping away from
the StringBytes::Encode function, thereby causing a nan test (which
accesses this function directly) to fail on big-endian machines.

This change re-introduces byte swapping in StringBytes::Encode,
done via a call to a function in util-inl. Another change in
NodeBuffer::StringSlice was necessary to avoid double byte swapping
in big-endian function calls to StringSlice.

PR-URL: https://github.com/nodejs/node/pull/3410
Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
Reviewed-By: Trevor Norris <trev.norris@gmail.com>
v4.x
Bryon Leung 9 years ago
committed by Myles Borins
parent
commit
aa8a10c8c3
  1. 9
      src/node_buffer.cc
  2. 16
      src/string_bytes.cc
  3. 14
      src/util-inl.h
  4. 2
      src/util.h

9
src/node_buffer.cc

@ -472,10 +472,11 @@ void StringSlice<UCS2>(const FunctionCallbackInfo<Value>& args) {
// need to reorder on BE platforms. See http://nodejs.org/api/buffer.html // need to reorder on BE platforms. See http://nodejs.org/api/buffer.html
// regarding Node's "ucs2" encoding specification. // regarding Node's "ucs2" encoding specification.
const bool aligned = (reinterpret_cast<uintptr_t>(data) % sizeof(*buf) == 0); const bool aligned = (reinterpret_cast<uintptr_t>(data) % sizeof(*buf) == 0);
if (IsLittleEndian() && aligned) { if (IsLittleEndian() && !aligned) {
buf = reinterpret_cast<const uint16_t*>(data);
} else {
// Make a copy to avoid unaligned accesses in v8::String::NewFromTwoByte(). // Make a copy to avoid unaligned accesses in v8::String::NewFromTwoByte().
// This applies ONLY to little endian platforms, as misalignment will be
// handled by a byte-swapping operation in StringBytes::Encode on
// big endian platforms.
uint16_t* copy = new uint16_t[length]; uint16_t* copy = new uint16_t[length];
for (size_t i = 0, k = 0; i < length; i += 1, k += 2) { for (size_t i = 0, k = 0; i < length; i += 1, k += 2) {
// Assumes that the input is little endian. // Assumes that the input is little endian.
@ -485,6 +486,8 @@ void StringSlice<UCS2>(const FunctionCallbackInfo<Value>& args) {
} }
buf = copy; buf = copy;
release = true; release = true;
} else {
buf = reinterpret_cast<const uint16_t*>(data);
} }
args.GetReturnValue().Set(StringBytes::Encode(env->isolate(), buf, length)); args.GetReturnValue().Set(StringBytes::Encode(env->isolate(), buf, length));

16
src/string_bytes.cc

@ -6,6 +6,7 @@
#include <limits.h> #include <limits.h>
#include <string.h> // memcpy #include <string.h> // memcpy
#include <vector>
// When creating strings >= this length v8's gc spins up and consumes // When creating strings >= this length v8's gc spins up and consumes
// most of the execution time. For these cases it's more performant to // most of the execution time. For these cases it's more performant to
@ -406,9 +407,7 @@ size_t StringBytes::Write(Isolate* isolate,
reinterpret_cast<uintptr_t>(buf) % sizeof(uint16_t); reinterpret_cast<uintptr_t>(buf) % sizeof(uint16_t);
if (is_aligned) { if (is_aligned) {
uint16_t* const dst = reinterpret_cast<uint16_t*>(buf); uint16_t* const dst = reinterpret_cast<uint16_t*>(buf);
for (size_t i = 0; i < nchars; i++) SwapBytes(dst, dst, nchars);
dst[i] = dst[i] << 8 | dst[i] >> 8;
break;
} }
ASSERT_EQ(sizeof(uint16_t), 2); ASSERT_EQ(sizeof(uint16_t), 2);
@ -857,7 +856,16 @@ Local<Value> StringBytes::Encode(Isolate* isolate,
const uint16_t* buf, const uint16_t* buf,
size_t buflen) { size_t buflen) {
Local<String> val; Local<String> val;
std::vector<uint16_t> dst;
if (IsBigEndian()) {
// Node's "ucs2" encoding expects LE character data inside a
// Buffer, so we need to reorder on BE platforms. See
// http://nodejs.org/api/buffer.html regarding Node's "ucs2"
// encoding specification
dst.resize(buflen);
SwapBytes(&dst[0], buf, buflen);
buf = &dst[0];
}
if (buflen < EXTERN_APEX) { if (buflen < EXTERN_APEX) {
val = String::NewFromTwoByte(isolate, val = String::NewFromTwoByte(isolate,
buf, buf,

14
src/util-inl.h

@ -198,6 +198,20 @@ TypeName* Unwrap(v8::Local<v8::Object> object) {
return static_cast<TypeName*>(pointer); return static_cast<TypeName*>(pointer);
} }
void SwapBytes(uint16_t* dst, const uint16_t* src, size_t buflen) {
for (size_t i = 0; i < buflen; i++) {
// __builtin_bswap16 generates more efficient code with
// g++ 4.8 on PowerPC and other big-endian archs
#ifdef __GNUC__
dst[i] = __builtin_bswap16(src[i]);
#else
dst[i] = (src[i] << 8) | (src[i] >> 8);
#endif
}
}
} // namespace node } // namespace node
#endif // SRC_UTIL_INL_H_ #endif // SRC_UTIL_INL_H_

2
src/util.h

@ -176,6 +176,8 @@ inline void ClearWrap(v8::Local<v8::Object> object);
template <typename TypeName> template <typename TypeName>
inline TypeName* Unwrap(v8::Local<v8::Object> object); inline TypeName* Unwrap(v8::Local<v8::Object> object);
inline void SwapBytes(uint16_t* dst, const uint16_t* src, size_t buflen);
class Utf8Value { class Utf8Value {
public: public:
explicit Utf8Value(v8::Isolate* isolate, v8::Local<v8::Value> value); explicit Utf8Value(v8::Isolate* isolate, v8::Local<v8::Value> value);

Loading…
Cancel
Save