Browse Source

Refactor: Utf8Decoder -> StringDecoder

Instead of just decoding Utf8, this will proxy requests to buffer.toString()
for other encodings. This makes for a simpler interface.
v0.7.4-release
Ryan Dahl 15 years ago
parent
commit
6bed15e074
  1. 23
      lib/fs.js
  2. 28
      lib/http.js
  3. 34
      lib/net.js
  4. 45
      lib/string_decoder.js
  5. 2
      src/node.cc
  6. 25
      test/simple/test-http-upgrade.js
  7. 50
      test/simple/test-string-decoder.js

23
lib/fs.js

@ -637,17 +637,8 @@ sys.inherits(ReadStream, events.EventEmitter);
fs.FileReadStream = fs.ReadStream; // support the legacy name
ReadStream.prototype.setEncoding = function (encoding) {
var Utf8Decoder = require("utf8decoder").Utf8Decoder; // lazy load
var self = this;
this._encoding = encoding.toLowerCase();
if (this._encoding == 'utf-8' || this._encoding == 'utf8') {
this._decoder = new Utf8Decoder();
this._decoder.onString = function(str) {
self.emit('data', str);
};
} else if (this._decoder) {
delete this._decoder;
}
var StringDecoder = require("string_decoder").StringDecoder; // lazy load
this._decoder = new StringDecoder(encoding);
};
@ -707,13 +698,11 @@ ReadStream.prototype._read = function () {
ReadStream.prototype._emitData = function (d) {
if (!this._encoding) {
this.emit('data', d);
} else if (this._decoder) {
this._decoder.write(d);
if (this._decoder) {
var string = this._decoder.write(d);
if (string.length) this.emit('data', string);
} else {
var string = d.toString(this._encoding, 0, d.length);
this.emit('data', string);
this.emit('data', d);
}
};

28
lib/http.js

@ -9,7 +9,6 @@ if (debugLevel & 0x4) {
}
var net = require('net');
var Utf8Decoder = require('utf8decoder').Utf8Decoder;
var events = require('events');
var Buffer = require('buffer').Buffer;
@ -93,14 +92,12 @@ var parsers = new FreeList('parsers', 1000, function () {
parser.onBody = function (b, start, len) {
// TODO body encoding?
var enc = parser.incoming._encoding;
if (!enc) {
parser.incoming.emit('data', b.slice(start, start+len));
} else if (this._decoder) {
this._decoder.write(pool.slice(start, end));
var slice = b.slice(start, start+len);
if (parser.incoming._decoder) {
var string = parser.incoming._decoder.write(slice);
if (string.length) parser.incoming.emit('data', string);
} else {
var string = b.toString(enc, start, start+len);
parser.incoming.emit('data', string);
parser.incoming.emit('data', slice);
}
};
@ -217,18 +214,9 @@ IncomingMessage.prototype.setBodyEncoding = function (enc) {
this.setEncoding(enc);
};
IncomingMessage.prototype.setEncoding = function (enc) {
// TODO check values, error out on bad, and deprecation message?
this._encoding = enc.toLowerCase();
if (this._encoding == 'utf-8' || this._encoding == 'utf8') {
this._decoder = new Utf8Decoder();
this._decoder.onString = function(str) {
this.emit('data', str);
};
} else if (this._decoder) {
delete this._decoder;
}
IncomingMessage.prototype.setEncoding = function (encoding) {
var StringDecoder = require("string_decoder").StringDecoder; // lazy load
this._decoder = new StringDecoder(encoding);
};
IncomingMessage.prototype.pause = function () {

34
lib/net.js

@ -1,6 +1,5 @@
var sys = require("sys");
var fs = require("fs");
var Utf8Decoder = require("utf8decoder").Utf8Decoder;
var events = require("events");
var dns = require('dns');
@ -500,20 +499,20 @@ function initStream (self) {
var end = pool.used + bytesRead;
pool.used += bytesRead;
if (!self._encoding) {
if (self._decoder) {
// emit String
var string = self._decoder.write(pool.slice(start, end));
if (string.length) self.emit('data', string);
} else {
// emit buffer
if (self._events && self._events['data']) {
// emit a slice
self.emit('data', pool.slice(start, end));
}
// Optimization: emit the original buffer with end points
if (self.ondata) self.ondata(pool, start, end);
} else if (this._decoder) {
this._decoder.write(pool.slice(start, end));
} else {
var string = pool.toString(self._encoding, start, end);
self.emit('data', string);
}
// Optimization: emit the original buffer with end points
if (self.ondata) self.ondata(pool, start, end);
}
};
self.readable = false;
@ -828,18 +827,9 @@ Stream.prototype._writeQueueLast = function () {
};
Stream.prototype.setEncoding = function (enc) {
var self = this;
// TODO check values, error out on bad, and deprecation message?
this._encoding = enc.toLowerCase();
if (this._encoding == 'utf-8' || this._encoding == 'utf8') {
this._decoder = new Utf8Decoder();
this._decoder.onString = function(str) {
self.emit('data', str);
};
} else if (this._decoder) {
delete this._decoder;
}
Stream.prototype.setEncoding = function (encoding) {
var StringDecoder = require("string_decoder").StringDecoder; // lazy load
this._decoder = new StringDecoder(encoding);
};

45
lib/utf8decoder.js → lib/string_decoder.js

@ -1,12 +1,21 @@
var Buffer = require('buffer').Buffer;
var Utf8Decoder = exports.Utf8Decoder = function() {
this.charBuffer = new Buffer(4);
this.charReceived = 0;
this.charLength = 0;
var StringDecoder = exports.StringDecoder = function (encoding) {
this.encoding = (encoding || 'utf8').toLowerCase().replace(/[-_]/,'');
if (this.encoding === 'utf8') {
this.charBuffer = new Buffer(4);
this.charReceived = 0;
this.charLength = 0;
}
};
Utf8Decoder.prototype.write = function(buffer) {
StringDecoder.prototype.write = function (buffer) {
// If not utf8...
if (this.encoding !== 'utf8') {
return buffer.toString(this.encoding);
}
var charStr = '';
// if our last write ended with an incomplete multibyte character
if (this.charLength) {
@ -21,28 +30,23 @@ Utf8Decoder.prototype.write = function(buffer) {
if (this.charReceived < this.charLength) {
// still not enough chars in this buffer? wait for more ...
return;
return '';
}
// get the character that was split
charStr = this.charBuffer.slice(0, this.charLength).toString();
this.charReceived = this.charLength = 0;
if (i == buffer.length) {
// if there are no more bytes in this buffer, just emit our char
this.onString(charStr)
return;
}
// if there are no more bytes in this buffer, just emit our char
if (i == buffer.length) return charStr;
// otherwise cut of the characters end from the beginning of this buffer
// otherwise cut off the characters end from the beginning of this buffer
buffer = buffer.slice(i, buffer.length);
}
// determine how many bytes we have to check at the end of this buffer
var i = (buffer.length >= 3)
? 3
: buffer.length;
var i = (buffer.length >= 3) ? 3 : buffer.length;
// figure out if one of the last i bytes of our buffer announces an incomplete char
for (; i > 0; i--) {
@ -71,8 +75,7 @@ Utf8Decoder.prototype.write = function(buffer) {
if (!this.charLength) {
// no incomplete char at the end of this buffer, emit the whole thing
this.onString(charStr+buffer.toString());
return;
return charStr + buffer.toString();
}
// buffer the incomplete character bytes we got
@ -81,9 +84,9 @@ Utf8Decoder.prototype.write = function(buffer) {
if (buffer.length - i > 0) {
// buffer had more bytes before the incomplete char, emit them
this.onString(charStr+buffer.slice(0, buffer.length - i).toString());
} else if (charStr) {
// or just emit the charStr if any
this.onString(charStr);
return charStr + buffer.toString('utf8', 0, buffer.length - i);
}
// or just emit the charStr
return charStr;
};

2
src/node.cc

@ -1866,7 +1866,7 @@ static Handle<Value> Binding(const Arguments& args) {
exports->Set(String::New("utils"), String::New(native_utils));
exports->Set(String::New("path"), String::New(native_path));
exports->Set(String::New("module"), String::New(native_module));
exports->Set(String::New("utf8decoder"), String::New(native_utf8decoder));
exports->Set(String::New("string_decoder"), String::New(native_string_decoder));
binding_cache->Set(module, exports);
}

25
test/simple/test-http-upgrade.js

@ -50,12 +50,6 @@ function testServer(){
sys.inherits(testServer, http.Server);
function testClient(){
var conn = net.createConnection(PORT);
conn.setEncoding("utf8");
return conn;
}
function writeReq(socket, data, encoding){
requests_sent++;
socket.write(data);
@ -66,7 +60,8 @@ function writeReq(socket, data, encoding){
connection: Upgrade with listener
-----------------------------------------------*/
function test_upgrade_with_listener(_server){
var conn = new testClient();
var conn = net.createConnection(PORT);
conn.setEncoding("utf8");
var state = 0;
conn.addListener("connect", function () {
@ -79,10 +74,12 @@ function test_upgrade_with_listener(_server){
);
});
conn.addListener("data", function(data){
conn.addListener("data", function (data) {
state++;
if(state == 1){
assert.equal('string', typeof data);
if(state == 1) {
assert.equal("HTTP/1.1 101", data.substr(0, 12));
assert.equal("WjN}|M(6", request_upgradeHead.toString("utf8"));
conn.write("test", "utf8");
@ -106,7 +103,8 @@ function test_upgrade_with_listener(_server){
var test_upgrade_no_listener_ended = false;
function test_upgrade_no_listener(){
var conn = new testClient();
var conn = net.createConnection(PORT);
conn.setEncoding("utf8");
conn.addListener("connect", function () {
writeReq(conn, "GET / HTTP/1.1\r\nUpgrade: WebSocket\r\nConnection: Upgrade\r\n\r\n");
@ -126,12 +124,15 @@ function test_upgrade_no_listener(){
connection: normal
-----------------------------------------------*/
function test_standard_http(){
var conn = new testClient();
var conn = net.createConnection(PORT);
conn.setEncoding("utf8");
conn.addListener("connect", function () {
writeReq(conn, "GET / HTTP/1.1\r\n\r\n");
});
conn.addListener("data", function(data){
assert.equal("string", typeof data);
assert.equal("HTTP/1.1 200", data.substr(0, 12));
conn.end();
});
@ -144,7 +145,7 @@ function test_standard_http(){
var server = createTestServer();
server.listen(PORT, function(){
server.listen(PORT, function () {
// All tests get chained after this:
test_upgrade_with_listener(server);
});

50
test/simple/test-utf8-decoder.js → test/simple/test-string-decoder.js

@ -1,36 +1,30 @@
require('../common');
var Utf8Decoder = require('utf8decoder').Utf8Decoder,
Buffer = require('buffer').Buffer,
decoder = new Utf8Decoder(),
buffer,
onStringCalled = 0;
decoder.onString = function(str) {
onStringCalled++;
assert.deepEqual(str, buffer.toString());
};
Buffer = require('buffer').Buffer;
StringDecoder = require('string_decoder').StringDecoder;
decoder = new StringDecoder('utf8');
buffer = new Buffer('$');
decoder.write(buffer);
assert.equal(onStringCalled, 1);
assert.deepEqual('$', decoder.write(buffer));
buffer = new Buffer('¢');
decoder.write(buffer.slice(0, 1));
decoder.write(buffer.slice(1, 2));
assert.equal(onStringCalled, 2);
assert.deepEqual('', decoder.write(buffer.slice(0, 1)));
assert.deepEqual('¢', decoder.write(buffer.slice(1, 2)));
buffer = new Buffer('€');
decoder.write(buffer.slice(0, 1));
decoder.write(buffer.slice(1, 2));
decoder.write(buffer.slice(2, 3));
assert.equal(onStringCalled, 3);
assert.deepEqual('', decoder.write(buffer.slice(0, 1)));
assert.deepEqual('', decoder.write(buffer.slice(1, 2)));
assert.deepEqual('€', decoder.write(buffer.slice(2, 3)));
buffer = new Buffer([0xF0, 0xA4, 0xAD, 0xA2]);
decoder.write(buffer.slice(0, 1));
decoder.write(buffer.slice(1, 2));
decoder.write(buffer.slice(2, 3));
decoder.write(buffer.slice(3, 4));
assert.equal(onStringCalled, 4);
s = '';
s += decoder.write(buffer.slice(0, 1));
s += decoder.write(buffer.slice(1, 2));
s += decoder.write(buffer.slice(2, 3));
s += decoder.write(buffer.slice(3, 4));
assert.ok(s.length > 0);
// A mixed ascii and non-ascii string
// Test stolen from deps/v8/test/cctest/test-strings.cc
@ -51,18 +45,16 @@ charLengths = [0, 0, 1, 2, 2, 2, 3, 4, 4, 4, 5, 5];
print('scanning ');
for (var j = 2; j < buffer.length; j++) {
for (var i = 1; i < j; i++) {
var decoder = new Utf8Decoder();
var sum = "";
decoder.onString = function (s) { sum += s; };
var decoder = new StringDecoder('utf8');
decoder.write(buffer.slice(0, i));
var sum = decoder.write(buffer.slice(0, i));
// just check that we've received the right amount
// after the first write
assert.equal(charLengths[i], sum.length);
decoder.write(buffer.slice(i, j));
decoder.write(buffer.slice(j, buffer.length));
sum += decoder.write(buffer.slice(i, j));
sum += decoder.write(buffer.slice(j, buffer.length));
assert.equal(expected, sum);
print(".");
}
Loading…
Cancel
Save