buffer: add encoding parameter to fill()

Can now call fill() using following parameters if value is a String: fill(string[, start[, end]][, encoding]) And with the following if value is a Buffer: fill(buffer[, start[, end]]) The encoding is ignored if value is not a String. All other non-Buffer values are coerced to a uint32. A multibyte strings will simply be copied into the Buffer until the number of bytes run out. Meaning partial strings can be left behind: Buffer(3).fill('\u0222'); // returns: <Buffer c8 a2 c8> In some encoding cases, such as 'hex', fill() will throw if the input string is not valid. PR-URL: https://github.com/nodejs/node/pull/4935 Reviewed-By: James M Snell <jasnell@gmail.com>
9 years ago · b55e58042c
6 changed files with 411 additions and 32 deletions
--- a/doc/api/buffer.markdown
+++ b/doc/api/buffer.markdown
@ -471,16 +471,19 @@ console.log(buf1.equals(buf3));
  // Prints: false
 ```

-### buf.fill(value[, offset[, end]])
+### buf.fill(value[, offset[, end]][, encoding])

-* `value` {String|Number}
+* `value` {String|Buffer|Number}
 * `offset` {Number} Default: 0
-* `end` {Number} Default: `buffer.length`
+* `end` {Number} Default: `buf.length`
+* `encoding` {String} Default: `'utf8'`
 * Return: {Buffer}

-Fills the Buffer with the specified value. If the `offset` and `end` are not
-given it will fill the entire Buffer. The method returns a reference to the
-Buffer so calls can be chained.
+Fills the Buffer with the specified value. If the `offset` (defaults to `0`)
+and `end` (defaults to `buf.length`) are not given the entire buffer will be
+filled. The method returns a reference to the Buffer, so calls can be chained.
+This is meant as a small simplification to creating a Buffer. Allowing the
+creation and fill of the Buffer to be done on a single line:

 ```js
 const b = new Buffer(50).fill('h');
@ -488,6 +491,18 @@ console.log(b.toString());
  // Prints: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
 ```

+`encoding` is only relevant if `value` is a string. Otherwise it is ignored.
+`value` is coerced to a `uint32` value if it is not a String or Number.
+
+The `fill()` operation writes bytes into the Buffer dumbly. If the final write
+falls in between a multi-byte character then whatever bytes fit into the buffer
+are written.
+
+```js
+Buffer(3).fill('\u0222');
+  // Prints: <Buffer c8 a2 c8>
+```
+
 ### buf.indexOf(value[, byteOffset][, encoding])

 * `value` {String|Buffer|Number}
--- a/lib/buffer.js
+++ b/lib/buffer.js
@ -498,24 +498,48 @@ Buffer.prototype.includes = function includes(val, byteOffset, encoding) {
 };


-Buffer.prototype.fill = function fill(val, start, end) {
-  start = start >> 0;
-  end = (end === undefined) ? this.length : end >> 0;
+// Usage:
+//    buffer.fill(number[, offset[, end]])
+//    buffer.fill(buffer[, offset[, end]])
+//    buffer.fill(string[, offset[, end]][, encoding])
+Buffer.prototype.fill = function fill(val, start, end, encoding) {
+  // Handle string cases:
+  if (typeof val === 'string') {
+    if (typeof start === 'string') {
+      encoding = start;
+      start = 0;
+      end = this.length;
+    } else if (typeof end === 'string') {
+      encoding = end;
+      end = this.length;
+    }
+    if (val.length === 1) {
+      var code = val.charCodeAt(0);
+      if (code < 256)
+        val = code;
+    }
+    if (encoding !== undefined && typeof encoding !== 'string') {
+      throw new TypeError('encoding must be a string');
+    }
+    if (typeof encoding === 'string' && !Buffer.isEncoding(encoding)) {
+      throw new TypeError('Unknown encoding: ' + encoding);
+    }

+  } else if (typeof val === 'number') {
+    val = val & 255;
+  }
+
+  // Invalid ranges are not set to a default, so can range check early.
  if (start < 0 || end > this.length)
    throw new RangeError('Out of range index');
+
  if (end <= start)
    return this;

-  if (typeof val !== 'string') {
-    val = val >>> 0;
-  } else if (val.length === 1) {
-    var code = val.charCodeAt(0);
-    if (code < 256)
-      val = code;
-  }
+  start = start >>> 0;
+  end = end === undefined ? this.length : end >>> 0;

-  binding.fill(this, val, start, end);
+  binding.fill(this, val, start, end, encoding);

  return this;
 };
--- a/src/node_buffer.cc
+++ b/src/node_buffer.cc
@ -570,42 +570,91 @@ void Copy(const FunctionCallbackInfo<Value> &args) {


 void Fill(const FunctionCallbackInfo<Value>& args) {
-  THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
+  Environment* env = Environment::GetCurrent(args);
+
+  THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
  SPREAD_ARG(args[0], ts_obj);

  size_t start = args[2]->Uint32Value();
  size_t end = args[3]->Uint32Value();
-  size_t length = end - start;
-  CHECK(length + start <= ts_obj_length);
+  size_t fill_length = end - start;
+  Local<String> str_obj;
+  size_t str_length;
+  enum encoding enc;
+  CHECK(fill_length + start <= ts_obj_length);
+
+  // First check if Buffer has been passed.
+  if (Buffer::HasInstance(args[1])) {
+    SPREAD_ARG(args[1], fill_obj);
+    str_length = fill_obj_length;
+    memcpy(ts_obj_data + start, fill_obj_data, MIN(str_length, fill_length));
+    goto start_fill;
+  }

-  if (args[1]->IsNumber()) {
+  // Then coerce everything that's not a string.
+  if (!args[1]->IsString()) {
    int value = args[1]->Uint32Value() & 255;
-    memset(ts_obj_data + start, value, length);
+    memset(ts_obj_data + start, value, fill_length);
    return;
  }

-  node::Utf8Value str(args.GetIsolate(), args[1]);
-  size_t str_length = str.length();
-  size_t in_there = str_length;
-  char* ptr = ts_obj_data + start + str_length;
+  str_obj = args[1]->ToString(env->isolate());
+  enc = ParseEncoding(env->isolate(), args[4], UTF8);
+  str_length =
+      enc == UTF8 ? str_obj->Utf8Length() :
+      enc == UCS2 ? str_obj->Length() * sizeof(uint16_t) : str_obj->Length();
+
+  if (enc == HEX && str_length  % 2 != 0)
+    return env->ThrowTypeError("Invalid hex string");

  if (str_length == 0)
    return;

-  memcpy(ts_obj_data + start, *str, MIN(str_length, length));
+  // Can't use StringBytes::Write() in all cases. For example if attempting
+  // to write a two byte character into a one byte Buffer.
+  if (enc == UTF8) {
+    node::Utf8Value str(env->isolate(), args[1]);
+    memcpy(ts_obj_data + start, *str, MIN(str_length, fill_length));

-  if (str_length >= length)
+  } else if (enc == UCS2) {
+    node::TwoByteValue str(env->isolate(), args[1]);
+    memcpy(ts_obj_data + start, *str, MIN(str_length, fill_length));
+
+  } else {
+    // Write initial String to Buffer, then use that memory to copy remainder
+    // of string. Correct the string length for cases like HEX where less than
+    // the total string length is written.
+    str_length = StringBytes::Write(env->isolate(),
+                                    ts_obj_data + start,
+                                    fill_length,
+                                    str_obj,
+                                    enc,
+                                    nullptr);
+    // This check is also needed in case Write() returns that no bytes could
+    // be written.
+    // TODO(trevnorris): Should this throw? Because of the string length was
+    // greater than 0 but couldn't be written then the string was invalid.
+    if (str_length == 0)
+      return;
+  }
+
+ start_fill:
+
+  if (str_length >= fill_length)
    return;

-  while (in_there < length - in_there) {
+
+  size_t in_there = str_length;
+  char* ptr = ts_obj_data + start + str_length;
+
+  while (in_there < fill_length - in_there) {
    memcpy(ptr, ts_obj_data + start, in_there);
    ptr += in_there;
    in_there *= 2;
  }

-  if (in_there < length) {
-    memcpy(ptr, ts_obj_data + start, length - in_there);
-    in_there = length;
+  if (in_there < fill_length) {
+    memcpy(ptr, ts_obj_data + start, fill_length - in_there);
  }
 }

--- a/src/util.cc
+++ b/src/util.cc
@ -25,4 +25,27 @@ Utf8Value::Utf8Value(v8::Isolate* isolate, v8::Local<v8::Value> value)
  str_[length_] = '\0';
 }

+
+TwoByteValue::TwoByteValue(v8::Isolate* isolate, v8::Local<v8::Value> value)
+    : length_(0), str_(str_st_) {
+  if (value.IsEmpty())
+    return;
+
+  v8::Local<v8::String> string = value->ToString(isolate);
+  if (string.IsEmpty())
+    return;
+
+  // Allocate enough space to include the null terminator
+  size_t len = StringBytes::StorageSize(isolate, string, UCS2) + 1;
+  if (len > sizeof(str_st_)) {
+    str_ = static_cast<uint16_t*>(malloc(len));
+    CHECK_NE(str_, nullptr);
+  }
+
+  const int flags =
+      v8::String::NO_NULL_TERMINATION | v8::String::REPLACE_INVALID_UTF8;
+  length_ = string->Write(str_, 0, len, flags);
+  str_[length_] = '\0';
+}
+
 }  // namespace node
--- a/src/util.h
+++ b/src/util.h
@ -205,6 +205,33 @@ class Utf8Value {
    char str_st_[1024];
 };

+class TwoByteValue {
+  public:
+    explicit TwoByteValue(v8::Isolate* isolate, v8::Local<v8::Value> value);
+
+    ~TwoByteValue() {
+      if (str_ != str_st_)
+        free(str_);
+    }
+
+    uint16_t* operator*() {
+      return str_;
+    };
+
+    const uint16_t* operator*() const {
+      return str_;
+    };
+
+    size_t length() const {
+      return length_;
+    };
+
+  private:
+    size_t length_;
+    uint16_t* str_;
+    uint16_t str_st_[1024];
+};
+
 }  // namespace node

 #endif  // SRC_UTIL_H_
--- a/test/parallel/test-buffer-fill.js
+++ b/test/parallel/test-buffer-fill.js
@ -0,0 +1,241 @@
+'use strict';
+
+require('../common');
+const assert = require('assert');
+const os = require('os');
+const SIZE = 28;
+
+const buf1 = Buffer(SIZE);
+const buf2 = Buffer(SIZE);
+
+
+// Default encoding
+testBufs('abc');
+testBufs('\u0222aa');
+testBufs('a\u0234b\u0235c\u0236');
+testBufs('abc', 4);
+testBufs('abc', 5);
+testBufs('abc', SIZE);
+testBufs('\u0222aa', 2);
+testBufs('\u0222aa', 8);
+testBufs('a\u0234b\u0235c\u0236', 4);
+testBufs('a\u0234b\u0235c\u0236', 12);
+testBufs('abc', 4, -1);
+testBufs('abc', 4, 1);
+testBufs('abc', 5, 1);
+testBufs('\u0222aa', 2, -1);
+testBufs('\u0222aa', 8, 1);
+testBufs('a\u0234b\u0235c\u0236', 4, -1);
+testBufs('a\u0234b\u0235c\u0236', 4, 1);
+testBufs('a\u0234b\u0235c\u0236', 12, 1);
+
+
+// UTF8
+testBufs('abc', 'utf8');
+testBufs('\u0222aa', 'utf8');
+testBufs('a\u0234b\u0235c\u0236', 'utf8');
+testBufs('abc', 4, 'utf8');
+testBufs('abc', 5, 'utf8');
+testBufs('abc', SIZE, 'utf8');
+testBufs('\u0222aa', 2, 'utf8');
+testBufs('\u0222aa', 8, 'utf8');
+testBufs('a\u0234b\u0235c\u0236', 4, 'utf8');
+testBufs('a\u0234b\u0235c\u0236', 12, 'utf8');
+testBufs('abc', 4, -1, 'utf8');
+testBufs('abc', 4, 1, 'utf8');
+testBufs('abc', 5, 1, 'utf8');
+testBufs('\u0222aa', 2, -1, 'utf8');
+testBufs('\u0222aa', 8, 1, 'utf8');
+testBufs('a\u0234b\u0235c\u0236', 4, -1, 'utf8');
+testBufs('a\u0234b\u0235c\u0236', 4, 1, 'utf8');
+testBufs('a\u0234b\u0235c\u0236', 12, 1, 'utf8');
+assert.equal(Buffer(1).fill(0).fill('\u0222')[0], 0xc8);
+
+
+// BINARY
+testBufs('abc', 'binary');
+testBufs('\u0222aa', 'binary');
+testBufs('a\u0234b\u0235c\u0236', 'binary');
+testBufs('abc', 4, 'binary');
+testBufs('abc', 5, 'binary');
+testBufs('abc', SIZE, 'binary');
+testBufs('\u0222aa', 2, 'binary');
+testBufs('\u0222aa', 8, 'binary');
+testBufs('a\u0234b\u0235c\u0236', 4, 'binary');
+testBufs('a\u0234b\u0235c\u0236', 12, 'binary');
+testBufs('abc', 4, -1, 'binary');
+testBufs('abc', 4, 1, 'binary');
+testBufs('abc', 5, 1, 'binary');
+testBufs('\u0222aa', 2, -1, 'binary');
+testBufs('\u0222aa', 8, 1, 'binary');
+testBufs('a\u0234b\u0235c\u0236', 4, -1, 'binary');
+testBufs('a\u0234b\u0235c\u0236', 4, 1, 'binary');
+testBufs('a\u0234b\u0235c\u0236', 12, 1, 'binary');
+
+
+// UCS2
+testBufs('abc', 'ucs2');
+testBufs('\u0222aa', 'ucs2');
+testBufs('a\u0234b\u0235c\u0236', 'ucs2');
+testBufs('abc', 4, 'ucs2');
+testBufs('abc', SIZE, 'ucs2');
+testBufs('\u0222aa', 2, 'ucs2');
+testBufs('\u0222aa', 8, 'ucs2');
+testBufs('a\u0234b\u0235c\u0236', 4, 'ucs2');
+testBufs('a\u0234b\u0235c\u0236', 12, 'ucs2');
+testBufs('abc', 4, -1, 'ucs2');
+testBufs('abc', 4, 1, 'ucs2');
+testBufs('abc', 5, 1, 'ucs2');
+testBufs('\u0222aa', 2, -1, 'ucs2');
+testBufs('\u0222aa', 8, 1, 'ucs2');
+testBufs('a\u0234b\u0235c\u0236', 4, -1, 'ucs2');
+testBufs('a\u0234b\u0235c\u0236', 4, 1, 'ucs2');
+testBufs('a\u0234b\u0235c\u0236', 12, 1, 'ucs2');
+assert.equal(Buffer(1).fill('\u0222', 'ucs2')[0],
+             os.endianness() === 'LE' ? 0x22 : 0x02);
+
+
+// HEX
+testBufs('616263', 'hex');
+testBufs('c8a26161', 'hex');
+testBufs('61c8b462c8b563c8b6', 'hex');
+testBufs('616263', 4, 'hex');
+testBufs('616263', 5, 'hex');
+testBufs('616263', SIZE, 'hex');
+testBufs('c8a26161', 2, 'hex');
+testBufs('c8a26161', 8, 'hex');
+testBufs('61c8b462c8b563c8b6', 4, 'hex');
+testBufs('61c8b462c8b563c8b6', 12, 'hex');
+testBufs('616263', 4, -1, 'hex');
+testBufs('616263', 4, 1, 'hex');
+testBufs('616263', 5, 1, 'hex');
+testBufs('c8a26161', 2, -1, 'hex');
+testBufs('c8a26161', 8, 1, 'hex');
+testBufs('61c8b462c8b563c8b6', 4, -1, 'hex');
+testBufs('61c8b462c8b563c8b6', 4, 1, 'hex');
+testBufs('61c8b462c8b563c8b6', 12, 1, 'hex');
+// Make sure this operation doesn't go on forever
+buf1.fill('yKJh', 'hex');
+assert.throws(() => buf1.fill('\u0222', 'hex'));
+
+
+// BASE64
+testBufs('YWJj', 'ucs2');
+testBufs('yKJhYQ==', 'ucs2');
+testBufs('Yci0Ysi1Y8i2', 'ucs2');
+testBufs('YWJj', 4, 'ucs2');
+testBufs('YWJj', SIZE, 'ucs2');
+testBufs('yKJhYQ==', 2, 'ucs2');
+testBufs('yKJhYQ==', 8, 'ucs2');
+testBufs('Yci0Ysi1Y8i2', 4, 'ucs2');
+testBufs('Yci0Ysi1Y8i2', 12, 'ucs2');
+testBufs('YWJj', 4, -1, 'ucs2');
+testBufs('YWJj', 4, 1, 'ucs2');
+testBufs('YWJj', 5, 1, 'ucs2');
+testBufs('yKJhYQ==', 2, -1, 'ucs2');
+testBufs('yKJhYQ==', 8, 1, 'ucs2');
+testBufs('Yci0Ysi1Y8i2', 4, -1, 'ucs2');
+testBufs('Yci0Ysi1Y8i2', 4, 1, 'ucs2');
+testBufs('Yci0Ysi1Y8i2', 12, 1, 'ucs2');
+
+
+// Buffer
+const buf2Fill = Buffer(1).fill(2);
+assert.deepEqual(genBuffer(4, [buf2Fill]), [2, 2, 2, 2]);
+assert.deepEqual(genBuffer(4, [buf2Fill, 1]), [0, 2, 2, 2]);
+assert.deepEqual(genBuffer(4, [buf2Fill, 1, 3]), [0, 2, 2, 0]);
+assert.deepEqual(genBuffer(4, [buf2Fill, 1, 1]), [0, 0, 0, 0]);
+assert.deepEqual(genBuffer(4, [buf2Fill, 1, -1]), [0, 0, 0, 0]);
+const hexBufFill = Buffer(2).fill(0).fill('0102', 'hex');
+assert.deepEqual(genBuffer(4, [hexBufFill]), [1, 2, 1, 2]);
+assert.deepEqual(genBuffer(4, [hexBufFill, 1]), [0, 1, 2, 1]);
+assert.deepEqual(genBuffer(4, [hexBufFill, 1, 3]), [0, 1, 2, 0]);
+assert.deepEqual(genBuffer(4, [hexBufFill, 1, 1]), [0, 0, 0, 0]);
+assert.deepEqual(genBuffer(4, [hexBufFill, 1, -1]), [0, 0, 0, 0]);
+
+
+// Check exceptions
+assert.throws(() => buf1.fill(0, -1));
+assert.throws(() => buf1.fill(0, 0, buf1.length + 1));
+assert.throws(() => buf1.fill('', -1));
+assert.throws(() => buf1.fill('', 0, buf1.length + 1));
+assert.throws(() => buf1.fill('a', 0, buf1.length, 'node rocks!'));
+assert.throws(() => buf1.fill('a', 0, 0, NaN));
+assert.throws(() => buf1.fill('a', 0, 0, null));
+assert.throws(() => buf1.fill('a', 0, 0, 'foo'));
+
+
+function genBuffer(size, args) {
+  const b = Buffer(size);
+  return b.fill(0).fill.apply(b, args);
+}
+
+
+function bufReset() {
+  buf1.fill(0);
+  buf2.fill(0);
+}
+
+
+// This is mostly accurate. Except write() won't write partial bytes to the
+// string while fill() blindly copies bytes into memory. To account for that an
+// error will be thrown if not all the data can be written, and the SIZE has
+// been massaged to work with the input characters.
+function writeToFill(string, offset, end, encoding) {
+  if (typeof offset === 'string') {
+    encoding = offset;
+    offset = 0;
+    end = buf2.length;
+  } else if (typeof end === 'string') {
+    encoding = end;
+    end = buf2.length;
+  } else if (end === undefined) {
+    end = buf2.length;
+  }
+
+  if (offset < 0 || end > buf2.length)
+    throw new RangeError('Out of range index');
+
+  if (end <= offset)
+    return buf2;
+
+  offset >>>= 0;
+  end >>>= 0;
+  assert(offset <= buf2.length);
+
+  // Convert "end" to "length" (which write understands).
+  const length = end - offset < 0 ? 0 : end - offset;
+
+  var wasZero = false;
+  do {
+    const written = buf2.write(string, offset, length, encoding);
+    offset += written;
+    // Safety check in case write falls into infinite loop.
+    if (written === 0) {
+      if (wasZero)
+        throw new Error('Could not write all data to Buffer');
+      else
+        wasZero = true;
+    }
+  } while (offset < buf2.length);
+
+  // Correction for UCS2 operations.
+  if (os.endianness() === 'BE' && encoding === 'ucs2') {
+    for (var i = 0; i < buf2.length; i += 2) {
+      var tmp = buf2[i];
+      buf2[i] = buf2[i + 1];
+      buf2[i + 1] = tmp;
+    }
+  }
+
+  return buf2;
+}
+
+
+function testBufs(string, offset, length, encoding) {
+  bufReset();
+  buf1.fill.apply(buf1, arguments);
+  // Swap bytes on BE archs for ucs2 encoding.
+  assert.deepStrictEqual(buf1.fill.apply(buf1, arguments),
+                         writeToFill.apply(null, arguments));
+}