From 08a09bb50abcc355941f62deecece5cb6f3a2382 Mon Sep 17 00:00:00 2001 From: Ryan Dahl Date: Thu, 8 Apr 2010 16:31:02 -0700 Subject: [PATCH] Use some more Binary/F methods for Buffer --- doc/api.markdown | 31 +++++++------------------- lib/buffer.js | 45 ++++++++++++++++++++++++++++++-------- lib/http.js | 21 ++++-------------- lib/net.js | 34 ++++++---------------------- test/simple/test-buffer.js | 6 ++--- 5 files changed, 58 insertions(+), 79 deletions(-) diff --git a/doc/api.markdown b/doc/api.markdown index 3fc5028b6a..d935c07e85 100644 --- a/doc/api.markdown +++ b/doc/api.markdown @@ -31,10 +31,10 @@ All of the examples in the documentation can be run similarly. Pure Javascript is Unicode friendly but not nice to pure binary data. When dealing with TCP streams or the file system, it's necessary to handle octet -streams. Node has several stratagies for manipulating, creating, and +streams. Node has several strategies for manipulating, creating, and consuming octet streams. -Raw data is stored in instaces of the `Buffer` class. A `Buffer` is similar +Raw data is stored in instances of the `Buffer` class. A `Buffer` is similar to an array of integers but correspond to a raw memory allocation outside the V8 heap. A `Buffer` cannot be resized. Access the class at `require('buffer').Buffer`. @@ -59,29 +59,14 @@ Binary (`"binary"`). `"ascii"` and `"binary"` only look at the first 8 bits of the 16bit JavaScript string characters. The following `Buffer` methods allow decoding and encoding of strings: -- **`buffer.utf8Write(string, offset)`**: Writes `string` to the buffer at -`offset` using UTF-8 encoding. Returns the number of octets written. If +- **`buffer.write(string, encoding, offset)`**: Writes `string` to the buffer at +`offset` using the given encoding. Returns number of octets written. If `buffer` did not contain enough space to fit the entire string it will write -a partial amount of the string. However, this method will not write partial -characters. +a partial amount of the string. In the case of `encoding=='utf8'`, the +method will not write partial characters. -- **`buffer.binaryWrite(string, offset)`**: Writes `string` to the buffer at -`offset` using binary encoding - that is it will only use the first 8 bits -of each character. Write a partial string if not enough space remains. -Returns number of octets written. - -- **`buffer.asciiWrite(string, offset)`**: Writes `string` to the buffer at -`offset` using ASCII encoding. Faster than `utf8Write()`. Write a partial -string if not enough space remains. Returns number of octets written. - -- **`buffer.utf8Slice(start, end)`**: Decodes and returns a string assuming -UTF-8 encoding beginning at `start` and ending at `end`. - -- **`buffer.binarySlice(start, end)`**: Decodes and returns a string assuming -binary encoding beginning at `start` and ending at `end`. - -- **`buffer.asciiSlice(start, end)`**: Decodes and returns a string assuming -ASCII encoding beginning at `start` and ending at `end`. +- **`buffer.toString(encoding, start, end)`**: Decodes and returns a string assuming +in the given encoding beginning at `start` and ending at `end`. diff --git a/lib/buffer.js b/lib/buffer.js index 4f7699e638..66401b4460 100644 --- a/lib/buffer.js +++ b/lib/buffer.js @@ -18,19 +18,46 @@ Buffer.prototype.inspect = function () { }; Buffer.prototype.toString = function (encoding, start, stop) { - encoding = encoding || 'utf8'; + encoding = (encoding || 'utf8').toLowerCase(); if (!start) start = 0; if (!stop) stop = this.length; - if (encoding == 'utf8') { - return this.utf8Slice(start, stop); - } else if (encoding == 'ascii') { - return this.asciiSlice(start, stop); - } else if (encoding == 'binary') { - return this.binarySlice(start, stop); - } else { - throw new Error('Unknown encoding'); + switch (encoding) { + case 'utf8': + case 'utf-8': + return this.utf8Slice(start, stop); + + case 'ascii': + return this.asciiSlice(start, stop); + + case 'binary': + return this.binarySlice(start, stop); + + default: + throw new Error('Unknown encoding'); } }; +Buffer.prototype.write = function (string, encoding, offset) { + encoding = (encoding || 'utf8').toLowerCase(); + switch (encoding) { + case 'utf8': + case 'utf-8': + return this.utf8Write(string, offset); + + case 'ascii': + return this.asciiWrite(string, offset); + + case 'binary': + return this.binaryWrite(string, offset); + + default: + throw new Error('Unknown encoding'); + } +}; + + + + + diff --git a/lib/http.js b/lib/http.js index d0ae3064c5..da6cb384d9 100644 --- a/lib/http.js +++ b/lib/http.js @@ -31,7 +31,7 @@ function newParser (type) { // Only servers will get URL events. parser.onURL = function (b, start, len) { - var slice = b.asciiSlice(start, start+len); + var slice = b.toString('ascii', start, start+len); if (parser.incoming.url) { parser.incoming.url += slice; } else { @@ -41,7 +41,7 @@ function newParser (type) { }; parser.onHeaderField = function (b, start, len) { - var slice = b.asciiSlice(start, start+len).toLowerCase(); + var slice = b.toString('ascii', start, start+len).toLowerCase(); if (parser.value) { parser.incoming._addHeaderLine(parser.field, parser.value); parser.field = null; @@ -55,7 +55,7 @@ function newParser (type) { }; parser.onHeaderValue = function (b, start, len) { - var slice = b.asciiSlice(start, start+len); + var slice = b.toString('ascii', start, start+len); if (parser.value) { parser.value += slice; } else { @@ -88,20 +88,7 @@ function newParser (type) { if (!enc) { parser.incoming.emit('data', b.slice(start, start+len)); } else { - var string; - switch (enc) { - case 'utf8': - string = b.utf8Slice(start, start+len); - break; - case 'ascii': - string = b.asciiSlice(start, start+len); - break; - case 'binary': - string = b.binarySlice(start, start+len); - break; - default: - throw new Error('Unsupported encoding ' + enc + '. Use Buffer'); - } + var string = b.toString(enc, start, start+len); parser.incoming.emit('data', string); } }; diff --git a/lib/net.js b/lib/net.js index ff616eb964..e99f1162bf 100644 --- a/lib/net.js +++ b/lib/net.js @@ -308,22 +308,7 @@ function initStream (self) { // Optimization: emit the original buffer with end points if (self.ondata) self.ondata(pool, start, end); } else { - // TODO remove me - we should only output Buffer - - var string; - switch (self._encoding) { - case 'utf8': - string = pool.utf8Slice(start, end); - break; - case 'ascii': - string = pool.asciiSlice(start, end); - break; - case 'binary': - string = pool.binarySlice(start, end); - break; - default: - throw new Error('Unsupported encoding ' + self._encoding + '. Use Buffer'); - } + var string = pool.toString(self._encoding, start, end); self.emit('data', string); } } @@ -442,21 +427,16 @@ Stream.prototype._writeOut = function (data, encoding) { allocNewPool(); } - if (encoding == 'binary') { - bytesWritten = pool.binaryWrite(data, pool.used); - charsWritten = bytesWritten; - } else if (encoding == 'ascii') { - bytesWritten = pool.asciiWrite(data, pool.used); - charsWritten = bytesWritten; - - } else { + if (encoding == 'utf8' || encoding == 'utf-8') { // default to utf8 - bytesWritten = pool.utf8Write(data, pool.used); + bytesWritten = pool.write(data, 'utf8', pool.used); // XXX Hacky way to find out the number of characters written. // Waiting for a more optimal way: http://codereview.chromium.org/1539013 - var _s = pool.utf8Slice(pool.used, pool.used + bytesWritten); + var _s = pool.toString('utf8', pool.used, pool.used + bytesWritten); charsWritten = _s.length; - + } else { + bytesWritten = pool.write(data, encoding, pool.used); + charsWritten = bytesWritten; } assert(bytesWritten > 0); diff --git a/test/simple/test-buffer.js b/test/simple/test-buffer.js index 88282c5b0a..084c268ad6 100644 --- a/test/simple/test-buffer.js +++ b/test/simple/test-buffer.js @@ -34,12 +34,12 @@ for (var j = 0; j < 500; j++) { for (var i = 0; i < asciiString.length; i++) { b[i] = asciiString.charCodeAt(i); } - var asciiSlice = b.asciiSlice(0, asciiString.length); + var asciiSlice = b.toString('ascii', 0, asciiString.length); assert.equal(asciiString, asciiSlice); var written = b.asciiWrite(asciiString, offset); assert.equal(asciiString.length, written); - var asciiSlice = b.asciiSlice(offset, offset+asciiString.length); + var asciiSlice = b.toString('ascii', offset, offset+asciiString.length); assert.equal(asciiString, asciiSlice); var sliceA = b.slice(offset, offset+asciiString.length); @@ -91,7 +91,7 @@ var testValue = '\u00F6\u65E5\u672C\u8A9E'; // ö日本語 var buffer = new Buffer(32); var size = buffer.utf8Write(testValue, 0); puts('bytes written to buffer: ' + size); -var slice = buffer.utf8Slice(0, size); +var slice = buffer.toString('utf8', 0, size); assert.equal(slice, testValue);