Browse Source

buffer: add encoding parameter to fill()

Can now call fill() using following parameters if value is a String:

    fill(string[, start[, end]][, encoding])

And with the following if value is a Buffer:

    fill(buffer[, start[, end]])

The encoding is ignored if value is not a String. All other non-Buffer
values are coerced to a uint32.

A multibyte strings will simply be copied into the Buffer until the
number of bytes run out. Meaning partial strings can be left behind:

    Buffer(3).fill('\u0222');
    // returns: <Buffer c8 a2 c8>

In some encoding cases, such as 'hex', fill() will throw if the input
string is not valid.

PR-URL: https://github.com/nodejs/node/pull/4935
Reviewed-By: James M Snell <jasnell@gmail.com>
process-exit-stdio-flushing
Trevor Norris 9 years ago
parent
commit
b55e58042c
  1. 27
      doc/api/buffer.markdown
  2. 46
      lib/buffer.js
  3. 79
      src/node_buffer.cc
  4. 23
      src/util.cc
  5. 27
      src/util.h
  6. 241
      test/parallel/test-buffer-fill.js

27
doc/api/buffer.markdown

@ -471,16 +471,19 @@ console.log(buf1.equals(buf3));
// Prints: false // Prints: false
``` ```
### buf.fill(value[, offset[, end]]) ### buf.fill(value[, offset[, end]][, encoding])
* `value` {String|Number} * `value` {String|Buffer|Number}
* `offset` {Number} Default: 0 * `offset` {Number} Default: 0
* `end` {Number} Default: `buffer.length` * `end` {Number} Default: `buf.length`
* `encoding` {String} Default: `'utf8'`
* Return: {Buffer} * Return: {Buffer}
Fills the Buffer with the specified value. If the `offset` and `end` are not Fills the Buffer with the specified value. If the `offset` (defaults to `0`)
given it will fill the entire Buffer. The method returns a reference to the and `end` (defaults to `buf.length`) are not given the entire buffer will be
Buffer so calls can be chained. filled. The method returns a reference to the Buffer, so calls can be chained.
This is meant as a small simplification to creating a Buffer. Allowing the
creation and fill of the Buffer to be done on a single line:
```js ```js
const b = new Buffer(50).fill('h'); const b = new Buffer(50).fill('h');
@ -488,6 +491,18 @@ console.log(b.toString());
// Prints: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh // Prints: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
``` ```
`encoding` is only relevant if `value` is a string. Otherwise it is ignored.
`value` is coerced to a `uint32` value if it is not a String or Number.
The `fill()` operation writes bytes into the Buffer dumbly. If the final write
falls in between a multi-byte character then whatever bytes fit into the buffer
are written.
```js
Buffer(3).fill('\u0222');
// Prints: <Buffer c8 a2 c8>
```
### buf.indexOf(value[, byteOffset][, encoding]) ### buf.indexOf(value[, byteOffset][, encoding])
* `value` {String|Buffer|Number} * `value` {String|Buffer|Number}

46
lib/buffer.js

@ -498,24 +498,48 @@ Buffer.prototype.includes = function includes(val, byteOffset, encoding) {
}; };
Buffer.prototype.fill = function fill(val, start, end) { // Usage:
start = start >> 0; // buffer.fill(number[, offset[, end]])
end = (end === undefined) ? this.length : end >> 0; // buffer.fill(buffer[, offset[, end]])
// buffer.fill(string[, offset[, end]][, encoding])
Buffer.prototype.fill = function fill(val, start, end, encoding) {
// Handle string cases:
if (typeof val === 'string') {
if (typeof start === 'string') {
encoding = start;
start = 0;
end = this.length;
} else if (typeof end === 'string') {
encoding = end;
end = this.length;
}
if (val.length === 1) {
var code = val.charCodeAt(0);
if (code < 256)
val = code;
}
if (encoding !== undefined && typeof encoding !== 'string') {
throw new TypeError('encoding must be a string');
}
if (typeof encoding === 'string' && !Buffer.isEncoding(encoding)) {
throw new TypeError('Unknown encoding: ' + encoding);
}
} else if (typeof val === 'number') {
val = val & 255;
}
// Invalid ranges are not set to a default, so can range check early.
if (start < 0 || end > this.length) if (start < 0 || end > this.length)
throw new RangeError('Out of range index'); throw new RangeError('Out of range index');
if (end <= start) if (end <= start)
return this; return this;
if (typeof val !== 'string') { start = start >>> 0;
val = val >>> 0; end = end === undefined ? this.length : end >>> 0;
} else if (val.length === 1) {
var code = val.charCodeAt(0);
if (code < 256)
val = code;
}
binding.fill(this, val, start, end); binding.fill(this, val, start, end, encoding);
return this; return this;
}; };

79
src/node_buffer.cc

@ -570,42 +570,91 @@ void Copy(const FunctionCallbackInfo<Value> &args) {
void Fill(const FunctionCallbackInfo<Value>& args) { void Fill(const FunctionCallbackInfo<Value>& args) {
THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]); Environment* env = Environment::GetCurrent(args);
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
SPREAD_ARG(args[0], ts_obj); SPREAD_ARG(args[0], ts_obj);
size_t start = args[2]->Uint32Value(); size_t start = args[2]->Uint32Value();
size_t end = args[3]->Uint32Value(); size_t end = args[3]->Uint32Value();
size_t length = end - start; size_t fill_length = end - start;
CHECK(length + start <= ts_obj_length); Local<String> str_obj;
size_t str_length;
enum encoding enc;
CHECK(fill_length + start <= ts_obj_length);
// First check if Buffer has been passed.
if (Buffer::HasInstance(args[1])) {
SPREAD_ARG(args[1], fill_obj);
str_length = fill_obj_length;
memcpy(ts_obj_data + start, fill_obj_data, MIN(str_length, fill_length));
goto start_fill;
}
if (args[1]->IsNumber()) { // Then coerce everything that's not a string.
if (!args[1]->IsString()) {
int value = args[1]->Uint32Value() & 255; int value = args[1]->Uint32Value() & 255;
memset(ts_obj_data + start, value, length); memset(ts_obj_data + start, value, fill_length);
return; return;
} }
node::Utf8Value str(args.GetIsolate(), args[1]); str_obj = args[1]->ToString(env->isolate());
size_t str_length = str.length(); enc = ParseEncoding(env->isolate(), args[4], UTF8);
size_t in_there = str_length; str_length =
char* ptr = ts_obj_data + start + str_length; enc == UTF8 ? str_obj->Utf8Length() :
enc == UCS2 ? str_obj->Length() * sizeof(uint16_t) : str_obj->Length();
if (enc == HEX && str_length % 2 != 0)
return env->ThrowTypeError("Invalid hex string");
if (str_length == 0)
return;
// Can't use StringBytes::Write() in all cases. For example if attempting
// to write a two byte character into a one byte Buffer.
if (enc == UTF8) {
node::Utf8Value str(env->isolate(), args[1]);
memcpy(ts_obj_data + start, *str, MIN(str_length, fill_length));
} else if (enc == UCS2) {
node::TwoByteValue str(env->isolate(), args[1]);
memcpy(ts_obj_data + start, *str, MIN(str_length, fill_length));
} else {
// Write initial String to Buffer, then use that memory to copy remainder
// of string. Correct the string length for cases like HEX where less than
// the total string length is written.
str_length = StringBytes::Write(env->isolate(),
ts_obj_data + start,
fill_length,
str_obj,
enc,
nullptr);
// This check is also needed in case Write() returns that no bytes could
// be written.
// TODO(trevnorris): Should this throw? Because of the string length was
// greater than 0 but couldn't be written then the string was invalid.
if (str_length == 0) if (str_length == 0)
return; return;
}
memcpy(ts_obj_data + start, *str, MIN(str_length, length)); start_fill:
if (str_length >= length) if (str_length >= fill_length)
return; return;
while (in_there < length - in_there) {
size_t in_there = str_length;
char* ptr = ts_obj_data + start + str_length;
while (in_there < fill_length - in_there) {
memcpy(ptr, ts_obj_data + start, in_there); memcpy(ptr, ts_obj_data + start, in_there);
ptr += in_there; ptr += in_there;
in_there *= 2; in_there *= 2;
} }
if (in_there < length) { if (in_there < fill_length) {
memcpy(ptr, ts_obj_data + start, length - in_there); memcpy(ptr, ts_obj_data + start, fill_length - in_there);
in_there = length;
} }
} }

23
src/util.cc

@ -25,4 +25,27 @@ Utf8Value::Utf8Value(v8::Isolate* isolate, v8::Local<v8::Value> value)
str_[length_] = '\0'; str_[length_] = '\0';
} }
TwoByteValue::TwoByteValue(v8::Isolate* isolate, v8::Local<v8::Value> value)
: length_(0), str_(str_st_) {
if (value.IsEmpty())
return;
v8::Local<v8::String> string = value->ToString(isolate);
if (string.IsEmpty())
return;
// Allocate enough space to include the null terminator
size_t len = StringBytes::StorageSize(isolate, string, UCS2) + 1;
if (len > sizeof(str_st_)) {
str_ = static_cast<uint16_t*>(malloc(len));
CHECK_NE(str_, nullptr);
}
const int flags =
v8::String::NO_NULL_TERMINATION | v8::String::REPLACE_INVALID_UTF8;
length_ = string->Write(str_, 0, len, flags);
str_[length_] = '\0';
}
} // namespace node } // namespace node

27
src/util.h

@ -205,6 +205,33 @@ class Utf8Value {
char str_st_[1024]; char str_st_[1024];
}; };
class TwoByteValue {
public:
explicit TwoByteValue(v8::Isolate* isolate, v8::Local<v8::Value> value);
~TwoByteValue() {
if (str_ != str_st_)
free(str_);
}
uint16_t* operator*() {
return str_;
};
const uint16_t* operator*() const {
return str_;
};
size_t length() const {
return length_;
};
private:
size_t length_;
uint16_t* str_;
uint16_t str_st_[1024];
};
} // namespace node } // namespace node
#endif // SRC_UTIL_H_ #endif // SRC_UTIL_H_

241
test/parallel/test-buffer-fill.js

@ -0,0 +1,241 @@
'use strict';
require('../common');
const assert = require('assert');
const os = require('os');
const SIZE = 28;
const buf1 = Buffer(SIZE);
const buf2 = Buffer(SIZE);
// Default encoding
testBufs('abc');
testBufs('\u0222aa');
testBufs('a\u0234b\u0235c\u0236');
testBufs('abc', 4);
testBufs('abc', 5);
testBufs('abc', SIZE);
testBufs('\u0222aa', 2);
testBufs('\u0222aa', 8);
testBufs('a\u0234b\u0235c\u0236', 4);
testBufs('a\u0234b\u0235c\u0236', 12);
testBufs('abc', 4, -1);
testBufs('abc', 4, 1);
testBufs('abc', 5, 1);
testBufs('\u0222aa', 2, -1);
testBufs('\u0222aa', 8, 1);
testBufs('a\u0234b\u0235c\u0236', 4, -1);
testBufs('a\u0234b\u0235c\u0236', 4, 1);
testBufs('a\u0234b\u0235c\u0236', 12, 1);
// UTF8
testBufs('abc', 'utf8');
testBufs('\u0222aa', 'utf8');
testBufs('a\u0234b\u0235c\u0236', 'utf8');
testBufs('abc', 4, 'utf8');
testBufs('abc', 5, 'utf8');
testBufs('abc', SIZE, 'utf8');
testBufs('\u0222aa', 2, 'utf8');
testBufs('\u0222aa', 8, 'utf8');
testBufs('a\u0234b\u0235c\u0236', 4, 'utf8');
testBufs('a\u0234b\u0235c\u0236', 12, 'utf8');
testBufs('abc', 4, -1, 'utf8');
testBufs('abc', 4, 1, 'utf8');
testBufs('abc', 5, 1, 'utf8');
testBufs('\u0222aa', 2, -1, 'utf8');
testBufs('\u0222aa', 8, 1, 'utf8');
testBufs('a\u0234b\u0235c\u0236', 4, -1, 'utf8');
testBufs('a\u0234b\u0235c\u0236', 4, 1, 'utf8');
testBufs('a\u0234b\u0235c\u0236', 12, 1, 'utf8');
assert.equal(Buffer(1).fill(0).fill('\u0222')[0], 0xc8);
// BINARY
testBufs('abc', 'binary');
testBufs('\u0222aa', 'binary');
testBufs('a\u0234b\u0235c\u0236', 'binary');
testBufs('abc', 4, 'binary');
testBufs('abc', 5, 'binary');
testBufs('abc', SIZE, 'binary');
testBufs('\u0222aa', 2, 'binary');
testBufs('\u0222aa', 8, 'binary');
testBufs('a\u0234b\u0235c\u0236', 4, 'binary');
testBufs('a\u0234b\u0235c\u0236', 12, 'binary');
testBufs('abc', 4, -1, 'binary');
testBufs('abc', 4, 1, 'binary');
testBufs('abc', 5, 1, 'binary');
testBufs('\u0222aa', 2, -1, 'binary');
testBufs('\u0222aa', 8, 1, 'binary');
testBufs('a\u0234b\u0235c\u0236', 4, -1, 'binary');
testBufs('a\u0234b\u0235c\u0236', 4, 1, 'binary');
testBufs('a\u0234b\u0235c\u0236', 12, 1, 'binary');
// UCS2
testBufs('abc', 'ucs2');
testBufs('\u0222aa', 'ucs2');
testBufs('a\u0234b\u0235c\u0236', 'ucs2');
testBufs('abc', 4, 'ucs2');
testBufs('abc', SIZE, 'ucs2');
testBufs('\u0222aa', 2, 'ucs2');
testBufs('\u0222aa', 8, 'ucs2');
testBufs('a\u0234b\u0235c\u0236', 4, 'ucs2');
testBufs('a\u0234b\u0235c\u0236', 12, 'ucs2');
testBufs('abc', 4, -1, 'ucs2');
testBufs('abc', 4, 1, 'ucs2');
testBufs('abc', 5, 1, 'ucs2');
testBufs('\u0222aa', 2, -1, 'ucs2');
testBufs('\u0222aa', 8, 1, 'ucs2');
testBufs('a\u0234b\u0235c\u0236', 4, -1, 'ucs2');
testBufs('a\u0234b\u0235c\u0236', 4, 1, 'ucs2');
testBufs('a\u0234b\u0235c\u0236', 12, 1, 'ucs2');
assert.equal(Buffer(1).fill('\u0222', 'ucs2')[0],
os.endianness() === 'LE' ? 0x22 : 0x02);
// HEX
testBufs('616263', 'hex');
testBufs('c8a26161', 'hex');
testBufs('61c8b462c8b563c8b6', 'hex');
testBufs('616263', 4, 'hex');
testBufs('616263', 5, 'hex');
testBufs('616263', SIZE, 'hex');
testBufs('c8a26161', 2, 'hex');
testBufs('c8a26161', 8, 'hex');
testBufs('61c8b462c8b563c8b6', 4, 'hex');
testBufs('61c8b462c8b563c8b6', 12, 'hex');
testBufs('616263', 4, -1, 'hex');
testBufs('616263', 4, 1, 'hex');
testBufs('616263', 5, 1, 'hex');
testBufs('c8a26161', 2, -1, 'hex');
testBufs('c8a26161', 8, 1, 'hex');
testBufs('61c8b462c8b563c8b6', 4, -1, 'hex');
testBufs('61c8b462c8b563c8b6', 4, 1, 'hex');
testBufs('61c8b462c8b563c8b6', 12, 1, 'hex');
// Make sure this operation doesn't go on forever
buf1.fill('yKJh', 'hex');
assert.throws(() => buf1.fill('\u0222', 'hex'));
// BASE64
testBufs('YWJj', 'ucs2');
testBufs('yKJhYQ==', 'ucs2');
testBufs('Yci0Ysi1Y8i2', 'ucs2');
testBufs('YWJj', 4, 'ucs2');
testBufs('YWJj', SIZE, 'ucs2');
testBufs('yKJhYQ==', 2, 'ucs2');
testBufs('yKJhYQ==', 8, 'ucs2');
testBufs('Yci0Ysi1Y8i2', 4, 'ucs2');
testBufs('Yci0Ysi1Y8i2', 12, 'ucs2');
testBufs('YWJj', 4, -1, 'ucs2');
testBufs('YWJj', 4, 1, 'ucs2');
testBufs('YWJj', 5, 1, 'ucs2');
testBufs('yKJhYQ==', 2, -1, 'ucs2');
testBufs('yKJhYQ==', 8, 1, 'ucs2');
testBufs('Yci0Ysi1Y8i2', 4, -1, 'ucs2');
testBufs('Yci0Ysi1Y8i2', 4, 1, 'ucs2');
testBufs('Yci0Ysi1Y8i2', 12, 1, 'ucs2');
// Buffer
const buf2Fill = Buffer(1).fill(2);
assert.deepEqual(genBuffer(4, [buf2Fill]), [2, 2, 2, 2]);
assert.deepEqual(genBuffer(4, [buf2Fill, 1]), [0, 2, 2, 2]);
assert.deepEqual(genBuffer(4, [buf2Fill, 1, 3]), [0, 2, 2, 0]);
assert.deepEqual(genBuffer(4, [buf2Fill, 1, 1]), [0, 0, 0, 0]);
assert.deepEqual(genBuffer(4, [buf2Fill, 1, -1]), [0, 0, 0, 0]);
const hexBufFill = Buffer(2).fill(0).fill('0102', 'hex');
assert.deepEqual(genBuffer(4, [hexBufFill]), [1, 2, 1, 2]);
assert.deepEqual(genBuffer(4, [hexBufFill, 1]), [0, 1, 2, 1]);
assert.deepEqual(genBuffer(4, [hexBufFill, 1, 3]), [0, 1, 2, 0]);
assert.deepEqual(genBuffer(4, [hexBufFill, 1, 1]), [0, 0, 0, 0]);
assert.deepEqual(genBuffer(4, [hexBufFill, 1, -1]), [0, 0, 0, 0]);
// Check exceptions
assert.throws(() => buf1.fill(0, -1));
assert.throws(() => buf1.fill(0, 0, buf1.length + 1));
assert.throws(() => buf1.fill('', -1));
assert.throws(() => buf1.fill('', 0, buf1.length + 1));
assert.throws(() => buf1.fill('a', 0, buf1.length, 'node rocks!'));
assert.throws(() => buf1.fill('a', 0, 0, NaN));
assert.throws(() => buf1.fill('a', 0, 0, null));
assert.throws(() => buf1.fill('a', 0, 0, 'foo'));
function genBuffer(size, args) {
const b = Buffer(size);
return b.fill(0).fill.apply(b, args);
}
function bufReset() {
buf1.fill(0);
buf2.fill(0);
}
// This is mostly accurate. Except write() won't write partial bytes to the
// string while fill() blindly copies bytes into memory. To account for that an
// error will be thrown if not all the data can be written, and the SIZE has
// been massaged to work with the input characters.
function writeToFill(string, offset, end, encoding) {
if (typeof offset === 'string') {
encoding = offset;
offset = 0;
end = buf2.length;
} else if (typeof end === 'string') {
encoding = end;
end = buf2.length;
} else if (end === undefined) {
end = buf2.length;
}
if (offset < 0 || end > buf2.length)
throw new RangeError('Out of range index');
if (end <= offset)
return buf2;
offset >>>= 0;
end >>>= 0;
assert(offset <= buf2.length);
// Convert "end" to "length" (which write understands).
const length = end - offset < 0 ? 0 : end - offset;
var wasZero = false;
do {
const written = buf2.write(string, offset, length, encoding);
offset += written;
// Safety check in case write falls into infinite loop.
if (written === 0) {
if (wasZero)
throw new Error('Could not write all data to Buffer');
else
wasZero = true;
}
} while (offset < buf2.length);
// Correction for UCS2 operations.
if (os.endianness() === 'BE' && encoding === 'ucs2') {
for (var i = 0; i < buf2.length; i += 2) {
var tmp = buf2[i];
buf2[i] = buf2[i + 1];
buf2[i + 1] = tmp;
}
}
return buf2;
}
function testBufs(string, offset, length, encoding) {
bufReset();
buf1.fill.apply(buf1, arguments);
// Swap bytes on BE archs for ucs2 encoding.
assert.deepStrictEqual(buf1.fill.apply(buf1, arguments),
writeToFill.apply(null, arguments));
}
Loading…
Cancel
Save