mirror of https://github.com/lukechilds/node.git
Bert Belder
13 years ago
committed by
isaacs
2 changed files with 342 additions and 3 deletions
@ -0,0 +1,327 @@ |
|||
|
|||
var assert = require('assert'), |
|||
Stream = require('stream'), |
|||
inherits = require('util').inherits; |
|||
|
|||
|
|||
/* |
|||
* This filter consumes a stream of characters and emits one string per line. |
|||
*/ |
|||
function LineSplitter() { |
|||
var self = this, |
|||
buffer = ""; |
|||
|
|||
Stream.call(this); |
|||
this.writable = true; |
|||
|
|||
this.write = function(data) { |
|||
var lines = (buffer + data).split(/\r\n|\n\r|\n|\r/); |
|||
for (var i = 0; i < lines.length - 1; i++) { |
|||
self.emit('data', lines[i]); |
|||
} |
|||
buffer = lines[lines.length - 1]; |
|||
return true; |
|||
}; |
|||
|
|||
this.end = function(data) { |
|||
this.write(data || ''); |
|||
if (buffer) { |
|||
self.emit('data', buffer); |
|||
} |
|||
self.emit('end'); |
|||
}; |
|||
} |
|||
inherits(LineSplitter, Stream); |
|||
|
|||
|
|||
/* |
|||
* This filter consumes lines and emits paragraph objects. |
|||
*/ |
|||
function ParagraphParser() { |
|||
var self = this, |
|||
block_is_license_block = false, |
|||
block_has_c_style_comment, |
|||
is_first_line_in_paragraph, |
|||
paragraph_line_indent, |
|||
paragraph; |
|||
|
|||
Stream.call(this); |
|||
this.writable = true; |
|||
|
|||
resetBlock(false); |
|||
|
|||
this.write = function(data) { |
|||
parseLine(data + ''); |
|||
return true; |
|||
}; |
|||
|
|||
this.end = function(data) { |
|||
if (data) { |
|||
parseLine(data + ''); |
|||
} |
|||
flushParagraph(); |
|||
self.emit('end'); |
|||
}; |
|||
|
|||
function resetParagraph() { |
|||
is_first_line_in_paragraph = true; |
|||
paragraph_line_indent = -1; |
|||
|
|||
paragraph = { |
|||
li: '', |
|||
in_license_block: block_is_license_block, |
|||
lines: [] |
|||
}; |
|||
} |
|||
|
|||
function resetBlock(is_license_block) { |
|||
block_is_license_block = is_license_block; |
|||
block_has_c_style_comment = false; |
|||
resetParagraph(); |
|||
} |
|||
|
|||
function flushParagraph() { |
|||
if (paragraph.lines.length || paragraph.li) { |
|||
self.emit('data', paragraph); |
|||
} |
|||
resetParagraph(); |
|||
} |
|||
|
|||
function parseLine(line) { |
|||
// Strip trailing whitespace
|
|||
line = line.replace(/\s*$/, ''); |
|||
|
|||
// Detect block separator
|
|||
if (/^\s*(=|"){3,}\s*$/.test(line)) { |
|||
flushParagraph(); |
|||
resetBlock(!block_is_license_block); |
|||
return; |
|||
} |
|||
|
|||
// Strip comments around block
|
|||
if (block_is_license_block) { |
|||
if (!block_has_c_style_comment) |
|||
block_has_c_style_comment = /^\s*(\/\*)/.test(line); |
|||
if (block_has_c_style_comment) { |
|||
var prev = line; |
|||
line = line.replace(/^(\s*?)(?:\s?\*\/|\/\*\s|\s\*\s?)/, '$1'); |
|||
if (prev == line) |
|||
line = line.replace(/^\s{2}/, ''); |
|||
if (/\*\//.test(prev)) |
|||
block_has_c_style_comment = false; |
|||
} else { |
|||
// Strip C++ and perl style comments.
|
|||
line = line.replace(/^(\s*)(?:\/\/\s?|#\s?)/, '$1'); |
|||
} |
|||
} |
|||
|
|||
// Detect blank line (paragraph separator)
|
|||
if (!/\S/.test(line)) { |
|||
flushParagraph(); |
|||
return; |
|||
} |
|||
|
|||
// Detect separator "lines" within a block. These mark a paragraph break
|
|||
// and are stripped from the output.
|
|||
if (/^\s*[=*\-]{5,}\s*$/.test(line)) { |
|||
flushParagraph(); |
|||
return; |
|||
} |
|||
|
|||
// Find out indentation level and the start of a lied or numbered list;
|
|||
var result = /^(\s*)(\d+\.|\*|-)?\s*/.exec(line); |
|||
assert.ok(result); |
|||
// The number of characters that will be stripped from the beginning of
|
|||
// the line.
|
|||
var line_strip_length = result[0].length; |
|||
// The indentation size that will be used to detect indentation jumps.
|
|||
// Fudge by 1 space.
|
|||
var line_indent = Math.floor(result[0].length / 2) * 2; |
|||
// The indentation level that will be exported
|
|||
var level = Math.floor(result[1].length / 2); |
|||
// The list indicator that precedes the actual content, if any.
|
|||
var line_li = result[2]; |
|||
|
|||
// Flush the paragraph when there is a li or an indentation jump
|
|||
if (line_li || (line_indent != paragraph_line_indent && |
|||
paragraph_line_indent != -1)) { |
|||
flushParagraph(); |
|||
paragraph.li = line_li; |
|||
} |
|||
|
|||
// Set the paragraph indent that we use to detect indentation jumps. When
|
|||
// we just detected a list indicator, wait
|
|||
// for the next line to arrive before setting this.
|
|||
if (!line_li && paragraph_line_indent != -1) { |
|||
paragraph_line_indent = line_indent; |
|||
} |
|||
|
|||
// Set the output indent level if it has not been set yet.
|
|||
if (paragraph.level === undefined) |
|||
paragraph.level = level; |
|||
|
|||
// Strip leading whitespace and li.
|
|||
line = line.slice(line_strip_length); |
|||
|
|||
if (line) |
|||
paragraph.lines.push(line); |
|||
|
|||
is_first_line_in_paragraph = false; |
|||
} |
|||
} |
|||
inherits(ParagraphParser, Stream); |
|||
|
|||
|
|||
/* |
|||
* This filter consumes paragraph objects and emits modified paragraph objects. |
|||
* The lines within the paragraph are unwrapped where appropriate. |
|||
*/ |
|||
function Unwrapper() { |
|||
var self = this; |
|||
|
|||
Stream.call(this); |
|||
this.writable = true; |
|||
|
|||
this.write = function(paragraph) { |
|||
var lines = paragraph.lines, |
|||
break_after = [], |
|||
i; |
|||
|
|||
for (i = 0; i < lines.length - 1; i++) { |
|||
var line = lines[i]; |
|||
|
|||
// When a line is really short, the line was probably kept separate for a
|
|||
// reason.
|
|||
if (line.length < 50) { |
|||
// If the first word on the next line really didn't fit after the line,
|
|||
// it probably was just ordinary wrapping after all.
|
|||
var next_first_word_length = lines[i + 1].replace(/\s.*$/, '').length; |
|||
if (line.length + next_first_word_length < 60) { |
|||
break_after[i] = true; |
|||
} |
|||
} |
|||
} |
|||
|
|||
for (i = 0; i < lines.length - 1; ) { |
|||
if (!break_after[i]) { |
|||
lines[i] += ' ' + lines.splice(i + 1, 1)[0]; |
|||
} else { |
|||
i++; |
|||
} |
|||
} |
|||
|
|||
self.emit('data', paragraph); |
|||
}; |
|||
|
|||
this.end = function(data) { |
|||
if (data) |
|||
self.write(data); |
|||
self.emit('end'); |
|||
}; |
|||
} |
|||
inherits(Unwrapper, Stream); |
|||
|
|||
|
|||
/* |
|||
* This filter generates an rtf document from a stream of paragraph objects. |
|||
*/ |
|||
function RtfGenerator() { |
|||
var self = this, |
|||
did_write_anything = false; |
|||
|
|||
Stream.call(this); |
|||
this.writable = true; |
|||
|
|||
this.write = function(paragraph) { |
|||
if (!did_write_anything) { |
|||
emitHeader(); |
|||
did_write_anything = true; |
|||
} |
|||
|
|||
var li = paragraph.li, |
|||
level = paragraph.level + (li ? 1 : 0), |
|||
lic = paragraph.in_license_block; |
|||
|
|||
var rtf = "\\pard"; |
|||
rtf += '\\sa150\\sl300\\slmult1'; |
|||
if (level > 0) |
|||
rtf += '\\li' + (level * 240); |
|||
if (li) { |
|||
rtf += '\\tx' + (level) * 240; |
|||
rtf += '\\fi-240'; |
|||
} |
|||
if (lic) |
|||
rtf += '\\ri240'; |
|||
if (!lic) |
|||
rtf += '\\b'; |
|||
if (li) |
|||
rtf += ' ' + li + '\\tab'; |
|||
rtf += ' '; |
|||
rtf += paragraph.lines.map(rtfEscape).join('\\line '); |
|||
if (!lic) |
|||
rtf += '\\b0'; |
|||
rtf += '\\par\n'; |
|||
|
|||
self.emit('data', rtf); |
|||
}; |
|||
|
|||
this.end = function(data) { |
|||
if (data) |
|||
self.write(data); |
|||
if (did_write_anything) |
|||
emitFooter(); |
|||
self.emit('end'); |
|||
}; |
|||
|
|||
function toHex(number, length) { |
|||
var hex = (~~number).toString(16); |
|||
while (hex.length < length) |
|||
hex = '0' + hex; |
|||
return hex; |
|||
} |
|||
|
|||
function rtfEscape(string) { |
|||
return string |
|||
.replace(/[\\\{\}]/g, function(m) { |
|||
return '\\' + m; |
|||
}) |
|||
.replace(/\t/g, function() { |
|||
return '\\tab '; |
|||
}) |
|||
.replace(/[\x00-\x1f\x7f-\xff]/g, function(m) { |
|||
return '\\\'' + toHex(m.charCodeAt(0), 2); |
|||
}) |
|||
.replace(/\ufeff/g, '') |
|||
.replace(/[\u0100-\uffff]/g, function(m) { |
|||
return '\\u' + toHex(m.charCodeAt(0), 4) + '?'; |
|||
}); |
|||
} |
|||
|
|||
function emitHeader() { |
|||
self.emit('data', '{\\rtf1\\ansi\\ansicpg1252\\uc1\\deff0\\deflang1033' + |
|||
'{\\fonttbl{\\f0\\fswiss\\fcharset0 Tahoma;}}\\fs20\n' + |
|||
'{\\*\\generator txt2rtf 0.0.1;}\n'); |
|||
} |
|||
|
|||
function emitFooter() { |
|||
self.emit('data', '}'); |
|||
} |
|||
} |
|||
inherits(RtfGenerator, Stream); |
|||
|
|||
|
|||
var stdin = process.stdin, |
|||
stdout = process.stdout, |
|||
line_splitter = new LineSplitter(), |
|||
paragraph_parser = new ParagraphParser(), |
|||
unwrapper = new Unwrapper(), |
|||
rtf_generator = new RtfGenerator(); |
|||
|
|||
stdin.setEncoding('utf-8'); |
|||
stdin.resume(); |
|||
|
|||
stdin.pipe(line_splitter); |
|||
line_splitter.pipe(paragraph_parser); |
|||
paragraph_parser.pipe(unwrapper); |
|||
unwrapper.pipe(rtf_generator); |
|||
rtf_generator.pipe(stdout); |
Loading…
Reference in new issue