From 94284e7d2e558cc3f1a985460fb0cc6606133766 Mon Sep 17 00:00:00 2001 From: Nao Iizuka Date: Fri, 15 Mar 2013 16:18:30 -1000 Subject: [PATCH] readline: handle wide characters properly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Handle wide characters (such as あ, 谢, 고) as two column wide to make cursor move properly. Closes #555. Closes #4994. --- doc/api/readline.markdown | 2 +- lib/readline.js | 160 ++++++++++++++++++++++--- test/simple/test-readline-interface.js | 19 +++ 3 files changed, 164 insertions(+), 17 deletions(-) diff --git a/doc/api/readline.markdown b/doc/api/readline.markdown index d586b646c0..b94f0615c7 100644 --- a/doc/api/readline.markdown +++ b/doc/api/readline.markdown @@ -86,7 +86,7 @@ a `"resize"` event on the `output` if/when the columns ever change The class that represents a readline interface with an input and output stream. -### rl.setPrompt(prompt, length) +### rl.setPrompt(prompt) Sets the prompt, for example when you run `node` on the command line, you see `> `, which is node's prompt. diff --git a/lib/readline.js b/lib/readline.js index 83b425c4df..af025b8ef8 100644 --- a/lib/readline.js +++ b/lib/readline.js @@ -148,15 +148,8 @@ Interface.prototype.__defineGetter__('columns', function() { return this.output.columns || Infinity; }); -Interface.prototype.setPrompt = function(prompt, length) { +Interface.prototype.setPrompt = function(prompt) { this._prompt = prompt; - if (length) { - this._promptLength = length; - } else { - var lines = prompt.split(/[\r\n]/); - var lastLine = lines[lines.length - 1]; - this._promptLength = lastLine.length; - } }; @@ -224,9 +217,9 @@ Interface.prototype._refreshLine = function() { // line length var line = this._prompt + this.line; - var lineLength = line.length; - var lineCols = lineLength % columns; - var lineRows = (lineLength - lineCols) / columns; + var dispPos = this._getDisplayPos(line); + var lineCols = dispPos.cols; + var lineRows = dispPos.rows; // cursor position var cursorPos = this._getCursorPos(); @@ -559,12 +552,46 @@ Interface.prototype._historyPrev = function() { }; +// Returns the last character's display position of the given string +Interface.prototype._getDisplayPos = function(str) { + var offset = 0; + var col = this.columns; + var code; + for (var i = 0, len = str.length; i < len; i++) { + code = codePointAt(str, i); + if (code >= 0x10000) { // surrogates + i++; + } + if (isFullWidthCodePoint(code)) { + if ((offset + 1) % col === 0) { + offset++; + } + offset += 2; + } else { + offset++; + } + } + var cols = offset % col; + var rows = (offset - cols) / col; + return {cols: cols, rows: rows}; +}; + + // Returns current cursor's position and line Interface.prototype._getCursorPos = function() { var columns = this.columns; - var cursorPos = this.cursor + this._promptLength; - var cols = cursorPos % columns; - var rows = (cursorPos - cols) / columns; + var strBeforeCursor = this._prompt + this.line.substring(0, this.cursor); + var dispPos = this._getDisplayPos(strBeforeCursor); + var cols = dispPos.cols; + var rows = dispPos.rows; + // If the cursor is on a full-width character which steps over the line, + // move the cursor to the beginning of the next line. + if (cols + 1 === columns && + this.cursor < this.line.length && + isFullWidthCodePoint(codePointAt(this.line, this.cursor))) { + rows++; + cols = 0; + } return {cols: cols, rows: rows}; }; @@ -578,13 +605,24 @@ Interface.prototype._moveCursor = function(dx) { // bounds check if (this.cursor < 0) this.cursor = 0; - if (this.cursor > this.line.length) this.cursor = this.line.length; + else if (this.cursor > this.line.length) this.cursor = this.line.length; var newPos = this._getCursorPos(); // check if cursors are in the same line if (oldPos.rows === newPos.rows) { - exports.moveCursor(this.output, this.cursor - oldcursor, 0); + var diffCursor = this.cursor - oldcursor; + var diffWidth; + if (diffCursor < 0) { + diffWidth = -getStringWidth( + this.line.substring(this.cursor, oldcursor) + ); + } else if (diffCursor > 0) { + diffWidth = getStringWidth( + this.line.substring(this.cursor, oldcursor) + ); + } + exports.moveCursor(this.output, diffWidth, 0); this.prevRows = newPos.rows; } else { this._refreshLine(); @@ -1161,3 +1199,93 @@ function clearScreenDown(stream) { stream.write('\x1b[0J'); } exports.clearScreenDown = clearScreenDown; + + +/** + * Returns the number of columns required to display the given string. + */ + +function getStringWidth(str) { + var width = 0; + for (var i = 0, len = str.length; i < len; i++) { + var code = codePointAt(str, i); + if (code >= 0x10000) { // surrogates + i++; + } + if (isFullWidthCodePoint(code)) { + width += 2; + } else { + width++; + } + } + return width; +} +exports.getStringWidth = getStringWidth; + + +/** + * Returns true if the character represented by a given + * Unicode code point is full-width. Otherwise returns false. + */ + +function isFullWidthCodePoint(code) { + if (isNaN(code)) { + return false; + } + + // Code points are derived from: + // http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt + if (code >= 0x1100 && ( + code <= 0x115f || // Hangul Jamo + 0x2329 === code || // LEFT-POINTING ANGLE BRACKET + 0x232a === code || // RIGHT-POINTING ANGLE BRACKET + // CJK Radicals Supplement .. Enclosed CJK Letters and Months + (0x2e80 <= code && code <= 0x3247 && code !== 0x303f) || + // Enclosed CJK Letters and Months .. CJK Unified Ideographs Extension A + 0x3250 <= code && code <= 0x4dbf || + // CJK Unified Ideographs .. Yi Radicals + 0x4e00 <= code && code <= 0xa4c6 || + // Hangul Jamo Extended-A + 0xa960 <= code && code <= 0xa97c || + // Hangul Syllables + 0xac00 <= code && code <= 0xd7a3 || + // CJK Compatibility Ideographs + 0xf900 <= code && code <= 0xfaff || + // Vertical Forms + 0xfe10 <= code && code <= 0xfe19 || + // CJK Compatibility Forms .. Small Form Variants + 0xfe30 <= code && code <= 0xfe6b || + // Halfwidth and Fullwidth Forms + 0xff01 <= code && code <= 0xff60 || + 0xffe0 <= code && code <= 0xffe6 || + // Kana Supplement + 0x1b000 <= code && code <= 0x1b001 || + // Enclosed Ideographic Supplement + 0x1f200 <= code && code <= 0x1f251 || + // CJK Unified Ideographs Extension B .. Tertiary Ideographic Plane + 0x20000 <= code && code <= 0x3fffd)) { + return true; + } + return false; +} +exports.isFullWidthCodePoint = isFullWidthCodePoint; + + +/** + * Returns the Unicode code point for the character at the + * given index in the given string. Similar to String.charCodeAt(), + * but this function handles surrogates (code point >= 0x10000). + */ + +function codePointAt(str, index) { + var code = str.charCodeAt(index); + var low; + if (0xd800 <= code && code <= 0xdbff) { // High surrogate + low = str.charCodeAt(index + 1); + if (!isNaN(low)) { + code = 0x10000 + (code - 0xd800) * 0x400 + (low - 0xdc00); + } + } + return code; +} +exports.codePointAt = codePointAt; diff --git a/test/simple/test-readline-interface.js b/test/simple/test-readline-interface.js index fc20d1d212..d0bc494c23 100644 --- a/test/simple/test-readline-interface.js +++ b/test/simple/test-readline-interface.js @@ -173,6 +173,25 @@ FakeInput.prototype.end = function() {}; assert.equal(callCount, 1); rli.close(); + // wide characters should be treated as two columns. + assert.equal(readline.isFullWidthCodePoint('a'.charCodeAt(0)), false); + assert.equal(readline.isFullWidthCodePoint('あ'.charCodeAt(0)), true); + assert.equal(readline.isFullWidthCodePoint('谢'.charCodeAt(0)), true); + assert.equal(readline.isFullWidthCodePoint('고'.charCodeAt(0)), true); + assert.equal(readline.isFullWidthCodePoint(0x1f251), true); // surrogate + assert.equal(readline.codePointAt('ABC', 0), 0x41); + assert.equal(readline.codePointAt('あいう', 1), 0x3044); + assert.equal(readline.codePointAt('\ud800\udc00', 0), // surrogate + 0x10000); + assert.equal(readline.codePointAt('\ud800\udc00A', 2), // surrogate + 0x41); + assert.equal(readline.getStringWidth('abcde'), 5); + assert.equal(readline.getStringWidth('古池や'), 6); + assert.equal(readline.getStringWidth('ノード.js'), 9); + assert.equal(readline.getStringWidth('你好'), 4); + assert.equal(readline.getStringWidth('안녕하세요'), 10); + assert.equal(readline.getStringWidth('A\ud83c\ude00BC'), 5); // surrogate + assert.deepEqual(fi.listeners('end'), []); assert.deepEqual(fi.listeners(terminal ? 'keypress' : 'data'), []); });