buffer: add encoding parameter to fill()

Can now call fill() using following parameters if value is a String: fill(string[, start[, end]][, encoding]) And with the following if value is a Buffer: fill(buffer[, start[, end]]) The encoding is ignored if value is not a String. All other non-Buffer values are coerced to a uint32. A multibyte strings will simply be copied into the Buffer until the number of bytes run out. Meaning partial strings can be left behind: Buffer(3).fill('\u0222'); // returns: <Buffer c8 a2 c8> In some encoding cases, such as 'hex', fill() will throw if the input string is not valid. PR-URL: https://github.com/nodejs/node/pull/4935 Reviewed-By: James M Snell <jasnell@gmail.com>
9 years ago · b55e58042c
6 changed files with 411 additions and 32 deletions
--- a/doc/api/buffer.markdown
+++ b/doc/api/buffer.markdown
@ -471,16 +471,19 @@ console.log(buf1.equals(buf3));
  // Prints: false
 ```
-### buf.fill(value[, offset[, end]])
+### buf.fill(value[, offset[, end]][, encoding])
-* `value` {String|Number}
+* `value` {String|Buffer|Number}
 * `offset` {Number} Default: 0
-* `end` {Number} Default: `buffer.length`
+* `end` {Number} Default: `buf.length`
 * `encoding` {String} Default: `'utf8'`
 * Return: {Buffer}
-Fills the Buffer with the specified value. If the `offset` and `end` are not
+Fills the Buffer with the specified value. If the `offset` (defaults to `0`)
-given it will fill the entire Buffer. The method returns a reference to the
+and `end` (defaults to `buf.length`) are not given the entire buffer will be
-Buffer so calls can be chained.
+filled. The method returns a reference to the Buffer, so calls can be chained.
 This is meant as a small simplification to creating a Buffer. Allowing the
 creation and fill of the Buffer to be done on a single line:
 ```js
 const b = new Buffer(50).fill('h');
@ -488,6 +491,18 @@ console.log(b.toString());
  // Prints: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
 ```
 `encoding` is only relevant if `value` is a string. Otherwise it is ignored.
 `value` is coerced to a `uint32` value if it is not a String or Number.
 The `fill()` operation writes bytes into the Buffer dumbly. If the final write
 falls in between a multi-byte character then whatever bytes fit into the buffer
 are written.
 ```js
 Buffer(3).fill('\u0222');
  // Prints: <Buffer c8 a2 c8>
 ```
 ### buf.indexOf(value[, byteOffset][, encoding])
 * `value` {String|Buffer|Number}
--- a/lib/buffer.js
+++ b/lib/buffer.js
@ -498,24 +498,48 @@ Buffer.prototype.includes = function includes(val, byteOffset, encoding) {
 };
-Buffer.prototype.fill = function fill(val, start, end) {
+// Usage:
-  start = start >> 0;
+//    buffer.fill(number[, offset[, end]])
-  end = (end === undefined) ? this.length : end >> 0;
+//    buffer.fill(buffer[, offset[, end]])
 //    buffer.fill(string[, offset[, end]][, encoding])
 Buffer.prototype.fill = function fill(val, start, end, encoding) {
  // Handle string cases:
  if (typeof val === 'string') {
    if (typeof start === 'string') {
      encoding = start;
      start = 0;
      end = this.length;
    } else if (typeof end === 'string') {
      encoding = end;
      end = this.length;
    }
    if (val.length === 1) {
      var code = val.charCodeAt(0);
      if (code < 256)
        val = code;
    }
    if (encoding !== undefined && typeof encoding !== 'string') {
      throw new TypeError('encoding must be a string');
    }
    if (typeof encoding === 'string' && !Buffer.isEncoding(encoding)) {
      throw new TypeError('Unknown encoding: ' + encoding);
    }
  } else if (typeof val === 'number') {
    val = val & 255;
  }
  // Invalid ranges are not set to a default, so can range check early.
  if (start < 0 || end > this.length)
    throw new RangeError('Out of range index');
  if (end <= start)
    return this;
-  if (typeof val !== 'string') {
+  start = start >>> 0;
-    val = val >>> 0;
+  end = end === undefined ? this.length : end >>> 0;
  } else if (val.length === 1) {
    var code = val.charCodeAt(0);
    if (code < 256)
      val = code;
  }
-  binding.fill(this, val, start, end);
+  binding.fill(this, val, start, end, encoding);
  return this;
 };
--- a/src/node_buffer.cc
+++ b/src/node_buffer.cc
@ -570,42 +570,91 @@ void Copy(const FunctionCallbackInfo<Value> &args) {
 void Fill(const FunctionCallbackInfo<Value>& args) {
-  THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
+  Environment* env = Environment::GetCurrent(args);
  THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
  SPREAD_ARG(args[0], ts_obj);
  size_t start = args[2]->Uint32Value();
  size_t end = args[3]->Uint32Value();
-  size_t length = end - start;
+  size_t fill_length = end - start;
-  CHECK(length + start <= ts_obj_length);
+  Local<String> str_obj;
  size_t str_length;
  enum encoding enc;
  CHECK(fill_length + start <= ts_obj_length);
  // First check if Buffer has been passed.
  if (Buffer::HasInstance(args[1])) {
    SPREAD_ARG(args[1], fill_obj);
    str_length = fill_obj_length;
    memcpy(ts_obj_data + start, fill_obj_data, MIN(str_length, fill_length));
    goto start_fill;
  }
-  if (args[1]->IsNumber()) {
+  // Then coerce everything that's not a string.
  if (!args[1]->IsString()) {
    int value = args[1]->Uint32Value() & 255;
-    memset(ts_obj_data + start, value, length);
+    memset(ts_obj_data + start, value, fill_length);
    return;
  }
-  node::Utf8Value str(args.GetIsolate(), args[1]);
+  str_obj = args[1]->ToString(env->isolate());
-  size_t str_length = str.length();
+  enc = ParseEncoding(env->isolate(), args[4], UTF8);
-  size_t in_there = str_length;
+  str_length =
-  char* ptr = ts_obj_data + start + str_length;
+      enc == UTF8 ? str_obj->Utf8Length() :
      enc == UCS2 ? str_obj->Length() * sizeof(uint16_t) : str_obj->Length();
  if (enc == HEX && str_length  % 2 != 0)
    return env->ThrowTypeError("Invalid hex string");
  if (str_length == 0)
    return;
  // Can't use StringBytes::Write() in all cases. For example if attempting
  // to write a two byte character into a one byte Buffer.
  if (enc == UTF8) {
    node::Utf8Value str(env->isolate(), args[1]);
    memcpy(ts_obj_data + start, *str, MIN(str_length, fill_length));
  } else if (enc == UCS2) {
    node::TwoByteValue str(env->isolate(), args[1]);
    memcpy(ts_obj_data + start, *str, MIN(str_length, fill_length));
  } else {
    // Write initial String to Buffer, then use that memory to copy remainder
    // of string. Correct the string length for cases like HEX where less than
    // the total string length is written.
    str_length = StringBytes::Write(env->isolate(),
                                    ts_obj_data + start,
                                    fill_length,
                                    str_obj,
                                    enc,
                                    nullptr);
    // This check is also needed in case Write() returns that no bytes could
    // be written.
    // TODO(trevnorris): Should this throw? Because of the string length was
    // greater than 0 but couldn't be written then the string was invalid.
    if (str_length == 0)
      return;
  }
-  memcpy(ts_obj_data + start, *str, MIN(str_length, length));
+ start_fill:
-  if (str_length >= length)
+  if (str_length >= fill_length)
    return;
-  while (in_there < length - in_there) {
+
  size_t in_there = str_length;
  char* ptr = ts_obj_data + start + str_length;
  while (in_there < fill_length - in_there) {
    memcpy(ptr, ts_obj_data + start, in_there);
    ptr += in_there;
    in_there *= 2;
  }
-  if (in_there < length) {
+  if (in_there < fill_length) {
-    memcpy(ptr, ts_obj_data + start, length - in_there);
+    memcpy(ptr, ts_obj_data + start, fill_length - in_there);
    in_there = length;
  }
 }
--- a/src/util.cc
+++ b/src/util.cc
@ -25,4 +25,27 @@ Utf8Value::Utf8Value(v8::Isolate* isolate, v8::Local<v8::Value> value)
  str_[length_] = '\0';
 }
 TwoByteValue::TwoByteValue(v8::Isolate* isolate, v8::Local<v8::Value> value)
    : length_(0), str_(str_st_) {
  if (value.IsEmpty())
    return;
  v8::Local<v8::String> string = value->ToString(isolate);
  if (string.IsEmpty())
    return;
  // Allocate enough space to include the null terminator
  size_t len = StringBytes::StorageSize(isolate, string, UCS2) + 1;
  if (len > sizeof(str_st_)) {
    str_ = static_cast<uint16_t*>(malloc(len));
    CHECK_NE(str_, nullptr);
  }
  const int flags =
      v8::String::NO_NULL_TERMINATION | v8::String::REPLACE_INVALID_UTF8;
  length_ = string->Write(str_, 0, len, flags);
  str_[length_] = '\0';
 }
 }  // namespace node
--- a/src/util.h
+++ b/src/util.h
@ -205,6 +205,33 @@ class Utf8Value {
    char str_st_[1024];
 };
 class TwoByteValue {
  public:
    explicit TwoByteValue(v8::Isolate* isolate, v8::Local<v8::Value> value);
    ~TwoByteValue() {
      if (str_ != str_st_)
        free(str_);
    }
    uint16_t* operator*() {
      return str_;
    };
    const uint16_t* operator*() const {
      return str_;
    };
    size_t length() const {
      return length_;
    };
  private:
    size_t length_;
    uint16_t* str_;
    uint16_t str_st_[1024];
 };
 }  // namespace node
 #endif  // SRC_UTIL_H_
--- a/test/parallel/test-buffer-fill.js
+++ b/test/parallel/test-buffer-fill.js
@ -0,0 +1,241 @@
 'use strict';
 require('../common');
 const assert = require('assert');
 const os = require('os');
 const SIZE = 28;
 const buf1 = Buffer(SIZE);
 const buf2 = Buffer(SIZE);
 // Default encoding
 testBufs('abc');
 testBufs('\u0222aa');
 testBufs('a\u0234b\u0235c\u0236');
 testBufs('abc', 4);
 testBufs('abc', 5);
 testBufs('abc', SIZE);
 testBufs('\u0222aa', 2);
 testBufs('\u0222aa', 8);
 testBufs('a\u0234b\u0235c\u0236', 4);
 testBufs('a\u0234b\u0235c\u0236', 12);
 testBufs('abc', 4, -1);
 testBufs('abc', 4, 1);
 testBufs('abc', 5, 1);
 testBufs('\u0222aa', 2, -1);
 testBufs('\u0222aa', 8, 1);
 testBufs('a\u0234b\u0235c\u0236', 4, -1);
 testBufs('a\u0234b\u0235c\u0236', 4, 1);
 testBufs('a\u0234b\u0235c\u0236', 12, 1);
 // UTF8
 testBufs('abc', 'utf8');
 testBufs('\u0222aa', 'utf8');
 testBufs('a\u0234b\u0235c\u0236', 'utf8');
 testBufs('abc', 4, 'utf8');
 testBufs('abc', 5, 'utf8');
 testBufs('abc', SIZE, 'utf8');
 testBufs('\u0222aa', 2, 'utf8');
 testBufs('\u0222aa', 8, 'utf8');
 testBufs('a\u0234b\u0235c\u0236', 4, 'utf8');
 testBufs('a\u0234b\u0235c\u0236', 12, 'utf8');
 testBufs('abc', 4, -1, 'utf8');
 testBufs('abc', 4, 1, 'utf8');
 testBufs('abc', 5, 1, 'utf8');
 testBufs('\u0222aa', 2, -1, 'utf8');
 testBufs('\u0222aa', 8, 1, 'utf8');
 testBufs('a\u0234b\u0235c\u0236', 4, -1, 'utf8');
 testBufs('a\u0234b\u0235c\u0236', 4, 1, 'utf8');
 testBufs('a\u0234b\u0235c\u0236', 12, 1, 'utf8');
 assert.equal(Buffer(1).fill(0).fill('\u0222')[0], 0xc8);
 // BINARY
 testBufs('abc', 'binary');
 testBufs('\u0222aa', 'binary');
 testBufs('a\u0234b\u0235c\u0236', 'binary');
 testBufs('abc', 4, 'binary');
 testBufs('abc', 5, 'binary');
 testBufs('abc', SIZE, 'binary');
 testBufs('\u0222aa', 2, 'binary');
 testBufs('\u0222aa', 8, 'binary');
 testBufs('a\u0234b\u0235c\u0236', 4, 'binary');
 testBufs('a\u0234b\u0235c\u0236', 12, 'binary');
 testBufs('abc', 4, -1, 'binary');
 testBufs('abc', 4, 1, 'binary');
 testBufs('abc', 5, 1, 'binary');
 testBufs('\u0222aa', 2, -1, 'binary');
 testBufs('\u0222aa', 8, 1, 'binary');
 testBufs('a\u0234b\u0235c\u0236', 4, -1, 'binary');
 testBufs('a\u0234b\u0235c\u0236', 4, 1, 'binary');
 testBufs('a\u0234b\u0235c\u0236', 12, 1, 'binary');
 // UCS2
 testBufs('abc', 'ucs2');
 testBufs('\u0222aa', 'ucs2');
 testBufs('a\u0234b\u0235c\u0236', 'ucs2');
 testBufs('abc', 4, 'ucs2');
 testBufs('abc', SIZE, 'ucs2');
 testBufs('\u0222aa', 2, 'ucs2');
 testBufs('\u0222aa', 8, 'ucs2');
 testBufs('a\u0234b\u0235c\u0236', 4, 'ucs2');
 testBufs('a\u0234b\u0235c\u0236', 12, 'ucs2');
 testBufs('abc', 4, -1, 'ucs2');
 testBufs('abc', 4, 1, 'ucs2');
 testBufs('abc', 5, 1, 'ucs2');
 testBufs('\u0222aa', 2, -1, 'ucs2');
 testBufs('\u0222aa', 8, 1, 'ucs2');
 testBufs('a\u0234b\u0235c\u0236', 4, -1, 'ucs2');
 testBufs('a\u0234b\u0235c\u0236', 4, 1, 'ucs2');
 testBufs('a\u0234b\u0235c\u0236', 12, 1, 'ucs2');
 assert.equal(Buffer(1).fill('\u0222', 'ucs2')[0],
             os.endianness() === 'LE' ? 0x22 : 0x02);
 // HEX
 testBufs('616263', 'hex');
 testBufs('c8a26161', 'hex');
 testBufs('61c8b462c8b563c8b6', 'hex');
 testBufs('616263', 4, 'hex');
 testBufs('616263', 5, 'hex');
 testBufs('616263', SIZE, 'hex');
 testBufs('c8a26161', 2, 'hex');
 testBufs('c8a26161', 8, 'hex');
 testBufs('61c8b462c8b563c8b6', 4, 'hex');
 testBufs('61c8b462c8b563c8b6', 12, 'hex');
 testBufs('616263', 4, -1, 'hex');
 testBufs('616263', 4, 1, 'hex');
 testBufs('616263', 5, 1, 'hex');
 testBufs('c8a26161', 2, -1, 'hex');
 testBufs('c8a26161', 8, 1, 'hex');
 testBufs('61c8b462c8b563c8b6', 4, -1, 'hex');
 testBufs('61c8b462c8b563c8b6', 4, 1, 'hex');
 testBufs('61c8b462c8b563c8b6', 12, 1, 'hex');
 // Make sure this operation doesn't go on forever
 buf1.fill('yKJh', 'hex');
 assert.throws(() => buf1.fill('\u0222', 'hex'));
 // BASE64
 testBufs('YWJj', 'ucs2');
 testBufs('yKJhYQ==', 'ucs2');
 testBufs('Yci0Ysi1Y8i2', 'ucs2');
 testBufs('YWJj', 4, 'ucs2');
 testBufs('YWJj', SIZE, 'ucs2');
 testBufs('yKJhYQ==', 2, 'ucs2');
 testBufs('yKJhYQ==', 8, 'ucs2');
 testBufs('Yci0Ysi1Y8i2', 4, 'ucs2');
 testBufs('Yci0Ysi1Y8i2', 12, 'ucs2');
 testBufs('YWJj', 4, -1, 'ucs2');
 testBufs('YWJj', 4, 1, 'ucs2');
 testBufs('YWJj', 5, 1, 'ucs2');
 testBufs('yKJhYQ==', 2, -1, 'ucs2');
 testBufs('yKJhYQ==', 8, 1, 'ucs2');
 testBufs('Yci0Ysi1Y8i2', 4, -1, 'ucs2');
 testBufs('Yci0Ysi1Y8i2', 4, 1, 'ucs2');
 testBufs('Yci0Ysi1Y8i2', 12, 1, 'ucs2');
 // Buffer
 const buf2Fill = Buffer(1).fill(2);
 assert.deepEqual(genBuffer(4, [buf2Fill]), [2, 2, 2, 2]);
 assert.deepEqual(genBuffer(4, [buf2Fill, 1]), [0, 2, 2, 2]);
 assert.deepEqual(genBuffer(4, [buf2Fill, 1, 3]), [0, 2, 2, 0]);
 assert.deepEqual(genBuffer(4, [buf2Fill, 1, 1]), [0, 0, 0, 0]);
 assert.deepEqual(genBuffer(4, [buf2Fill, 1, -1]), [0, 0, 0, 0]);
 const hexBufFill = Buffer(2).fill(0).fill('0102', 'hex');
 assert.deepEqual(genBuffer(4, [hexBufFill]), [1, 2, 1, 2]);
 assert.deepEqual(genBuffer(4, [hexBufFill, 1]), [0, 1, 2, 1]);
 assert.deepEqual(genBuffer(4, [hexBufFill, 1, 3]), [0, 1, 2, 0]);
 assert.deepEqual(genBuffer(4, [hexBufFill, 1, 1]), [0, 0, 0, 0]);
 assert.deepEqual(genBuffer(4, [hexBufFill, 1, -1]), [0, 0, 0, 0]);
 // Check exceptions
 assert.throws(() => buf1.fill(0, -1));
 assert.throws(() => buf1.fill(0, 0, buf1.length + 1));
 assert.throws(() => buf1.fill('', -1));
 assert.throws(() => buf1.fill('', 0, buf1.length + 1));
 assert.throws(() => buf1.fill('a', 0, buf1.length, 'node rocks!'));
 assert.throws(() => buf1.fill('a', 0, 0, NaN));
 assert.throws(() => buf1.fill('a', 0, 0, null));
 assert.throws(() => buf1.fill('a', 0, 0, 'foo'));
 function genBuffer(size, args) {
  const b = Buffer(size);
  return b.fill(0).fill.apply(b, args);
 }
 function bufReset() {
  buf1.fill(0);
  buf2.fill(0);
 }
 // This is mostly accurate. Except write() won't write partial bytes to the
 // string while fill() blindly copies bytes into memory. To account for that an
 // error will be thrown if not all the data can be written, and the SIZE has
 // been massaged to work with the input characters.
 function writeToFill(string, offset, end, encoding) {
  if (typeof offset === 'string') {
    encoding = offset;
    offset = 0;
    end = buf2.length;
  } else if (typeof end === 'string') {
    encoding = end;
    end = buf2.length;
  } else if (end === undefined) {
    end = buf2.length;
  }
  if (offset < 0 || end > buf2.length)
    throw new RangeError('Out of range index');
  if (end <= offset)
    return buf2;
  offset >>>= 0;
  end >>>= 0;
  assert(offset <= buf2.length);
  // Convert "end" to "length" (which write understands).
  const length = end - offset < 0 ? 0 : end - offset;
  var wasZero = false;
  do {
    const written = buf2.write(string, offset, length, encoding);
    offset += written;
    // Safety check in case write falls into infinite loop.
    if (written === 0) {
      if (wasZero)
        throw new Error('Could not write all data to Buffer');
      else
        wasZero = true;
    }
  } while (offset < buf2.length);
  // Correction for UCS2 operations.
  if (os.endianness() === 'BE' && encoding === 'ucs2') {
    for (var i = 0; i < buf2.length; i += 2) {
      var tmp = buf2[i];
      buf2[i] = buf2[i + 1];
      buf2[i + 1] = tmp;
    }
  }
  return buf2;
 }
 function testBufs(string, offset, length, encoding) {
  bufReset();
  buf1.fill.apply(buf1, arguments);
  // Swap bytes on BE archs for ucs2 encoding.
  assert.deepStrictEqual(buf1.fill.apply(buf1, arguments),
                         writeToFill.apply(null, arguments));
 }