Browse Source

Refactor: Utf8Decoder -> StringDecoder

Instead of just decoding Utf8, this will proxy requests to buffer.toString()
for other encodings. This makes for a simpler interface.
v0.7.4-release
Ryan Dahl 15 years ago
parent
commit
6bed15e074
  1. 23
      lib/fs.js
  2. 28
      lib/http.js
  3. 34
      lib/net.js
  4. 45
      lib/string_decoder.js
  5. 2
      src/node.cc
  6. 25
      test/simple/test-http-upgrade.js
  7. 50
      test/simple/test-string-decoder.js

23
lib/fs.js

@ -637,17 +637,8 @@ sys.inherits(ReadStream, events.EventEmitter);
fs.FileReadStream = fs.ReadStream; // support the legacy name fs.FileReadStream = fs.ReadStream; // support the legacy name
ReadStream.prototype.setEncoding = function (encoding) { ReadStream.prototype.setEncoding = function (encoding) {
var Utf8Decoder = require("utf8decoder").Utf8Decoder; // lazy load var StringDecoder = require("string_decoder").StringDecoder; // lazy load
var self = this; this._decoder = new StringDecoder(encoding);
this._encoding = encoding.toLowerCase();
if (this._encoding == 'utf-8' || this._encoding == 'utf8') {
this._decoder = new Utf8Decoder();
this._decoder.onString = function(str) {
self.emit('data', str);
};
} else if (this._decoder) {
delete this._decoder;
}
}; };
@ -707,13 +698,11 @@ ReadStream.prototype._read = function () {
ReadStream.prototype._emitData = function (d) { ReadStream.prototype._emitData = function (d) {
if (!this._encoding) { if (this._decoder) {
this.emit('data', d); var string = this._decoder.write(d);
} else if (this._decoder) { if (string.length) this.emit('data', string);
this._decoder.write(d);
} else { } else {
var string = d.toString(this._encoding, 0, d.length); this.emit('data', d);
this.emit('data', string);
} }
}; };

28
lib/http.js

@ -9,7 +9,6 @@ if (debugLevel & 0x4) {
} }
var net = require('net'); var net = require('net');
var Utf8Decoder = require('utf8decoder').Utf8Decoder;
var events = require('events'); var events = require('events');
var Buffer = require('buffer').Buffer; var Buffer = require('buffer').Buffer;
@ -93,14 +92,12 @@ var parsers = new FreeList('parsers', 1000, function () {
parser.onBody = function (b, start, len) { parser.onBody = function (b, start, len) {
// TODO body encoding? // TODO body encoding?
var enc = parser.incoming._encoding; var slice = b.slice(start, start+len);
if (!enc) { if (parser.incoming._decoder) {
parser.incoming.emit('data', b.slice(start, start+len)); var string = parser.incoming._decoder.write(slice);
} else if (this._decoder) { if (string.length) parser.incoming.emit('data', string);
this._decoder.write(pool.slice(start, end));
} else { } else {
var string = b.toString(enc, start, start+len); parser.incoming.emit('data', slice);
parser.incoming.emit('data', string);
} }
}; };
@ -217,18 +214,9 @@ IncomingMessage.prototype.setBodyEncoding = function (enc) {
this.setEncoding(enc); this.setEncoding(enc);
}; };
IncomingMessage.prototype.setEncoding = function (enc) { IncomingMessage.prototype.setEncoding = function (encoding) {
// TODO check values, error out on bad, and deprecation message? var StringDecoder = require("string_decoder").StringDecoder; // lazy load
this._encoding = enc.toLowerCase(); this._decoder = new StringDecoder(encoding);
if (this._encoding == 'utf-8' || this._encoding == 'utf8') {
this._decoder = new Utf8Decoder();
this._decoder.onString = function(str) {
this.emit('data', str);
};
} else if (this._decoder) {
delete this._decoder;
}
}; };
IncomingMessage.prototype.pause = function () { IncomingMessage.prototype.pause = function () {

34
lib/net.js

@ -1,6 +1,5 @@
var sys = require("sys"); var sys = require("sys");
var fs = require("fs"); var fs = require("fs");
var Utf8Decoder = require("utf8decoder").Utf8Decoder;
var events = require("events"); var events = require("events");
var dns = require('dns'); var dns = require('dns');
@ -500,20 +499,20 @@ function initStream (self) {
var end = pool.used + bytesRead; var end = pool.used + bytesRead;
pool.used += bytesRead; pool.used += bytesRead;
if (!self._encoding) { if (self._decoder) {
// emit String
var string = self._decoder.write(pool.slice(start, end));
if (string.length) self.emit('data', string);
} else {
// emit buffer
if (self._events && self._events['data']) { if (self._events && self._events['data']) {
// emit a slice // emit a slice
self.emit('data', pool.slice(start, end)); self.emit('data', pool.slice(start, end));
} }
// Optimization: emit the original buffer with end points
if (self.ondata) self.ondata(pool, start, end);
} else if (this._decoder) {
this._decoder.write(pool.slice(start, end));
} else {
var string = pool.toString(self._encoding, start, end);
self.emit('data', string);
} }
// Optimization: emit the original buffer with end points
if (self.ondata) self.ondata(pool, start, end);
} }
}; };
self.readable = false; self.readable = false;
@ -828,18 +827,9 @@ Stream.prototype._writeQueueLast = function () {
}; };
Stream.prototype.setEncoding = function (enc) { Stream.prototype.setEncoding = function (encoding) {
var self = this; var StringDecoder = require("string_decoder").StringDecoder; // lazy load
// TODO check values, error out on bad, and deprecation message? this._decoder = new StringDecoder(encoding);
this._encoding = enc.toLowerCase();
if (this._encoding == 'utf-8' || this._encoding == 'utf8') {
this._decoder = new Utf8Decoder();
this._decoder.onString = function(str) {
self.emit('data', str);
};
} else if (this._decoder) {
delete this._decoder;
}
}; };

45
lib/utf8decoder.js → lib/string_decoder.js

@ -1,12 +1,21 @@
var Buffer = require('buffer').Buffer; var Buffer = require('buffer').Buffer;
var Utf8Decoder = exports.Utf8Decoder = function() { var StringDecoder = exports.StringDecoder = function (encoding) {
this.charBuffer = new Buffer(4); this.encoding = (encoding || 'utf8').toLowerCase().replace(/[-_]/,'');
this.charReceived = 0; if (this.encoding === 'utf8') {
this.charLength = 0; this.charBuffer = new Buffer(4);
this.charReceived = 0;
this.charLength = 0;
}
}; };
Utf8Decoder.prototype.write = function(buffer) {
StringDecoder.prototype.write = function (buffer) {
// If not utf8...
if (this.encoding !== 'utf8') {
return buffer.toString(this.encoding);
}
var charStr = ''; var charStr = '';
// if our last write ended with an incomplete multibyte character // if our last write ended with an incomplete multibyte character
if (this.charLength) { if (this.charLength) {
@ -21,28 +30,23 @@ Utf8Decoder.prototype.write = function(buffer) {
if (this.charReceived < this.charLength) { if (this.charReceived < this.charLength) {
// still not enough chars in this buffer? wait for more ... // still not enough chars in this buffer? wait for more ...
return; return '';
} }
// get the character that was split // get the character that was split
charStr = this.charBuffer.slice(0, this.charLength).toString(); charStr = this.charBuffer.slice(0, this.charLength).toString();
this.charReceived = this.charLength = 0; this.charReceived = this.charLength = 0;
if (i == buffer.length) { // if there are no more bytes in this buffer, just emit our char
// if there are no more bytes in this buffer, just emit our char if (i == buffer.length) return charStr;
this.onString(charStr)
return;
}
// otherwise cut of the characters end from the beginning of this buffer // otherwise cut off the characters end from the beginning of this buffer
buffer = buffer.slice(i, buffer.length); buffer = buffer.slice(i, buffer.length);
} }
// determine how many bytes we have to check at the end of this buffer // determine how many bytes we have to check at the end of this buffer
var i = (buffer.length >= 3) var i = (buffer.length >= 3) ? 3 : buffer.length;
? 3
: buffer.length;
// figure out if one of the last i bytes of our buffer announces an incomplete char // figure out if one of the last i bytes of our buffer announces an incomplete char
for (; i > 0; i--) { for (; i > 0; i--) {
@ -71,8 +75,7 @@ Utf8Decoder.prototype.write = function(buffer) {
if (!this.charLength) { if (!this.charLength) {
// no incomplete char at the end of this buffer, emit the whole thing // no incomplete char at the end of this buffer, emit the whole thing
this.onString(charStr+buffer.toString()); return charStr + buffer.toString();
return;
} }
// buffer the incomplete character bytes we got // buffer the incomplete character bytes we got
@ -81,9 +84,9 @@ Utf8Decoder.prototype.write = function(buffer) {
if (buffer.length - i > 0) { if (buffer.length - i > 0) {
// buffer had more bytes before the incomplete char, emit them // buffer had more bytes before the incomplete char, emit them
this.onString(charStr+buffer.slice(0, buffer.length - i).toString()); return charStr + buffer.toString('utf8', 0, buffer.length - i);
} else if (charStr) {
// or just emit the charStr if any
this.onString(charStr);
} }
// or just emit the charStr
return charStr;
}; };

2
src/node.cc

@ -1866,7 +1866,7 @@ static Handle<Value> Binding(const Arguments& args) {
exports->Set(String::New("utils"), String::New(native_utils)); exports->Set(String::New("utils"), String::New(native_utils));
exports->Set(String::New("path"), String::New(native_path)); exports->Set(String::New("path"), String::New(native_path));
exports->Set(String::New("module"), String::New(native_module)); exports->Set(String::New("module"), String::New(native_module));
exports->Set(String::New("utf8decoder"), String::New(native_utf8decoder)); exports->Set(String::New("string_decoder"), String::New(native_string_decoder));
binding_cache->Set(module, exports); binding_cache->Set(module, exports);
} }

25
test/simple/test-http-upgrade.js

@ -50,12 +50,6 @@ function testServer(){
sys.inherits(testServer, http.Server); sys.inherits(testServer, http.Server);
function testClient(){
var conn = net.createConnection(PORT);
conn.setEncoding("utf8");
return conn;
}
function writeReq(socket, data, encoding){ function writeReq(socket, data, encoding){
requests_sent++; requests_sent++;
socket.write(data); socket.write(data);
@ -66,7 +60,8 @@ function writeReq(socket, data, encoding){
connection: Upgrade with listener connection: Upgrade with listener
-----------------------------------------------*/ -----------------------------------------------*/
function test_upgrade_with_listener(_server){ function test_upgrade_with_listener(_server){
var conn = new testClient(); var conn = net.createConnection(PORT);
conn.setEncoding("utf8");
var state = 0; var state = 0;
conn.addListener("connect", function () { conn.addListener("connect", function () {
@ -79,10 +74,12 @@ function test_upgrade_with_listener(_server){
); );
}); });
conn.addListener("data", function(data){ conn.addListener("data", function (data) {
state++; state++;
if(state == 1){ assert.equal('string', typeof data);
if(state == 1) {
assert.equal("HTTP/1.1 101", data.substr(0, 12)); assert.equal("HTTP/1.1 101", data.substr(0, 12));
assert.equal("WjN}|M(6", request_upgradeHead.toString("utf8")); assert.equal("WjN}|M(6", request_upgradeHead.toString("utf8"));
conn.write("test", "utf8"); conn.write("test", "utf8");
@ -106,7 +103,8 @@ function test_upgrade_with_listener(_server){
var test_upgrade_no_listener_ended = false; var test_upgrade_no_listener_ended = false;
function test_upgrade_no_listener(){ function test_upgrade_no_listener(){
var conn = new testClient(); var conn = net.createConnection(PORT);
conn.setEncoding("utf8");
conn.addListener("connect", function () { conn.addListener("connect", function () {
writeReq(conn, "GET / HTTP/1.1\r\nUpgrade: WebSocket\r\nConnection: Upgrade\r\n\r\n"); writeReq(conn, "GET / HTTP/1.1\r\nUpgrade: WebSocket\r\nConnection: Upgrade\r\n\r\n");
@ -126,12 +124,15 @@ function test_upgrade_no_listener(){
connection: normal connection: normal
-----------------------------------------------*/ -----------------------------------------------*/
function test_standard_http(){ function test_standard_http(){
var conn = new testClient(); var conn = net.createConnection(PORT);
conn.setEncoding("utf8");
conn.addListener("connect", function () { conn.addListener("connect", function () {
writeReq(conn, "GET / HTTP/1.1\r\n\r\n"); writeReq(conn, "GET / HTTP/1.1\r\n\r\n");
}); });
conn.addListener("data", function(data){ conn.addListener("data", function(data){
assert.equal("string", typeof data);
assert.equal("HTTP/1.1 200", data.substr(0, 12)); assert.equal("HTTP/1.1 200", data.substr(0, 12));
conn.end(); conn.end();
}); });
@ -144,7 +145,7 @@ function test_standard_http(){
var server = createTestServer(); var server = createTestServer();
server.listen(PORT, function(){ server.listen(PORT, function () {
// All tests get chained after this: // All tests get chained after this:
test_upgrade_with_listener(server); test_upgrade_with_listener(server);
}); });

50
test/simple/test-utf8-decoder.js → test/simple/test-string-decoder.js

@ -1,36 +1,30 @@
require('../common'); require('../common');
var Utf8Decoder = require('utf8decoder').Utf8Decoder,
Buffer = require('buffer').Buffer,
decoder = new Utf8Decoder(),
buffer,
onStringCalled = 0;
decoder.onString = function(str) { Buffer = require('buffer').Buffer;
onStringCalled++; StringDecoder = require('string_decoder').StringDecoder;
assert.deepEqual(str, buffer.toString()); decoder = new StringDecoder('utf8');
};
buffer = new Buffer('$'); buffer = new Buffer('$');
decoder.write(buffer); assert.deepEqual('$', decoder.write(buffer));
assert.equal(onStringCalled, 1);
buffer = new Buffer('¢'); buffer = new Buffer('¢');
decoder.write(buffer.slice(0, 1)); assert.deepEqual('', decoder.write(buffer.slice(0, 1)));
decoder.write(buffer.slice(1, 2)); assert.deepEqual('¢', decoder.write(buffer.slice(1, 2)));
assert.equal(onStringCalled, 2);
buffer = new Buffer('€'); buffer = new Buffer('€');
decoder.write(buffer.slice(0, 1)); assert.deepEqual('', decoder.write(buffer.slice(0, 1)));
decoder.write(buffer.slice(1, 2)); assert.deepEqual('', decoder.write(buffer.slice(1, 2)));
decoder.write(buffer.slice(2, 3)); assert.deepEqual('€', decoder.write(buffer.slice(2, 3)));
assert.equal(onStringCalled, 3);
buffer = new Buffer([0xF0, 0xA4, 0xAD, 0xA2]); buffer = new Buffer([0xF0, 0xA4, 0xAD, 0xA2]);
decoder.write(buffer.slice(0, 1)); s = '';
decoder.write(buffer.slice(1, 2)); s += decoder.write(buffer.slice(0, 1));
decoder.write(buffer.slice(2, 3)); s += decoder.write(buffer.slice(1, 2));
decoder.write(buffer.slice(3, 4)); s += decoder.write(buffer.slice(2, 3));
assert.equal(onStringCalled, 4); s += decoder.write(buffer.slice(3, 4));
assert.ok(s.length > 0);
// A mixed ascii and non-ascii string // A mixed ascii and non-ascii string
// Test stolen from deps/v8/test/cctest/test-strings.cc // Test stolen from deps/v8/test/cctest/test-strings.cc
@ -51,18 +45,16 @@ charLengths = [0, 0, 1, 2, 2, 2, 3, 4, 4, 4, 5, 5];
print('scanning '); print('scanning ');
for (var j = 2; j < buffer.length; j++) { for (var j = 2; j < buffer.length; j++) {
for (var i = 1; i < j; i++) { for (var i = 1; i < j; i++) {
var decoder = new Utf8Decoder(); var decoder = new StringDecoder('utf8');
var sum = "";
decoder.onString = function (s) { sum += s; };
decoder.write(buffer.slice(0, i)); var sum = decoder.write(buffer.slice(0, i));
// just check that we've received the right amount // just check that we've received the right amount
// after the first write // after the first write
assert.equal(charLengths[i], sum.length); assert.equal(charLengths[i], sum.length);
decoder.write(buffer.slice(i, j)); sum += decoder.write(buffer.slice(i, j));
decoder.write(buffer.slice(j, buffer.length)); sum += decoder.write(buffer.slice(j, buffer.length));
assert.equal(expected, sum); assert.equal(expected, sum);
print("."); print(".");
} }
Loading…
Cancel
Save