node/tools/license2rtf.js



								var assert = require('assert'),

								    Stream = require('stream'),

								    inherits = require('util').inherits;


								/*

								 * This filter consumes a stream of characters and emits one string per line.

								 */

								function LineSplitter() {

								  var self = this,

								      buffer = "";


								  Stream.call(this);

								  this.writable = true;


								  this.write = function(data) {

								    var lines = (buffer + data).split(/\r\n|\n\r|\n|\r/);

								    for (var i = 0; i < lines.length - 1; i++) {

								      self.emit('data', lines[i]);

								    }

								    buffer = lines[lines.length - 1];

								    return true;

								  };


								  this.end = function(data) {

								    this.write(data || '');

								    if (buffer) {

								      self.emit('data', buffer);

								    }

								    self.emit('end');

								  };

								}

								inherits(LineSplitter, Stream);


								/*

								 * This filter consumes lines and emits paragraph objects.

								 */

								function ParagraphParser() {

								  var self = this,

								      block_is_license_block = false,

								      block_has_c_style_comment,

								      is_first_line_in_paragraph,

								      paragraph_line_indent,

								      paragraph;


								   Stream.call(this);

								   this.writable = true;


								   resetBlock(false);


								   this.write = function(data) {

								     parseLine(data + '');

								     return true;

								   };


								   this.end = function(data) {

								     if (data) {

								       parseLine(data + '');

								     }

								     flushParagraph();

								     self.emit('end');

								   };


								  function resetParagraph() {

								    is_first_line_in_paragraph = true;

								    paragraph_line_indent = -1;


								    paragraph = {

								      li: '',

								      in_license_block: block_is_license_block,

								      lines: []

								    };

								  }


								  function resetBlock(is_license_block) {

								    block_is_license_block = is_license_block;

								    block_has_c_style_comment = false;

								    resetParagraph();

								  }


								  function flushParagraph() {

								    if (paragraph.lines.length || paragraph.li) {

								      self.emit('data', paragraph);

								    }

								    resetParagraph();

								  }


								  function parseLine(line) {

								    // Strip trailing whitespace

								    line = line.replace(/\s*$/, '');


								    // Detect block separator

								    if (/^\s*(=|"){3,}\s*$/.test(line)) {

								      flushParagraph();

								      resetBlock(!block_is_license_block);

								      return;

								    }


								    // Strip comments around block

								    if (block_is_license_block) {

								      if (!block_has_c_style_comment)

								        block_has_c_style_comment = /^\s*(\/\*)/.test(line);

								      if (block_has_c_style_comment) {

								        var prev = line;

								        line = line.replace(/^(\s*?)(?:\s?\*\/|\/\*\s|\s\*\s?)/, '$1');

								        if (prev == line)

								          line = line.replace(/^\s{2}/, '');

								        if (/\*\//.test(prev))

								          block_has_c_style_comment = false;

								      } else {

								        // Strip C++ and perl style comments.

								        line = line.replace(/^(\s*)(?:\/\/\s?|#\s?)/, '$1');

								      }

								    }


								    // Detect blank line (paragraph separator)

								    if (!/\S/.test(line)) {

								      flushParagraph();

								      return;

								    }


								    // Detect separator "lines" within a block. These mark a paragraph break

								    // and are stripped from the output.

								    if (/^\s*[=*\-]{5,}\s*$/.test(line)) {

								      flushParagraph();

								      return;

								    }


								    // Find out indentation level and the start of a lied or numbered list;

								    var result = /^(\s*)(\d+\.|\*|-)?\s*/.exec(line);

								    assert.ok(result);

								    // The number of characters that will be stripped from the beginning of

								    // the line.

								    var line_strip_length = result[0].length;

								    // The indentation size that will be used to detect indentation jumps.

								    // Fudge by 1 space.

								    var line_indent = Math.floor(result[0].length / 2) * 2;

								    // The indentation level that will be exported

								    var level = Math.floor(result[1].length / 2);

								    // The list indicator that precedes the actual content, if any.

								    var line_li = result[2];


								    // Flush the paragraph when there is a li or an indentation jump

								    if (line_li || (line_indent != paragraph_line_indent &&

								                    paragraph_line_indent != -1)) {

								      flushParagraph();

								      paragraph.li = line_li;

								    }


								    // Set the paragraph indent that we use to detect indentation jumps. When

								    // we just detected a list indicator, wait

								    // for the next line to arrive before setting this.

								    if (!line_li && paragraph_line_indent != -1) {

								      paragraph_line_indent = line_indent;

								    }


								    // Set the output indent level if it has not been set yet.

								    if (paragraph.level === undefined)

								      paragraph.level = level;


								    // Strip leading whitespace and li.

								    line = line.slice(line_strip_length);


								    if (line)

								      paragraph.lines.push(line);


								    is_first_line_in_paragraph = false;

								  }

								}

								inherits(ParagraphParser, Stream);


								/*

								 * This filter consumes paragraph objects and emits modified paragraph objects.

								 * The lines within the paragraph are unwrapped where appropriate. It also

								 * replaces multiple consecutive whitespace characters by a single one.

								 */

								function Unwrapper() {

								  var self = this;


								  Stream.call(this);

								  this.writable = true;


								  this.write = function(paragraph) {

								    var lines = paragraph.lines,

								        break_after = [],

								        i;


								    for (i = 0; i < lines.length - 1; i++) {

								      var line = lines[i];


								      // When a line is really short, the line was probably kept separate for a

								      // reason.

								      if (line.length < 50)  {

								        // If the first word on the next line really didn't fit after the line,

								        // it probably was just ordinary wrapping after all.

								        var next_first_word_length = lines[i + 1].replace(/\s.*$/, '').length;

								        if (line.length + next_first_word_length < 60) {

								          break_after[i] = true;

								        }

								      }

								    }


								    for (i = 0; i < lines.length - 1; ) {

								      if (!break_after[i]) {

								        lines[i] += ' ' + lines.splice(i + 1, 1)[0];

								      } else {

								        i++;

								      }

								    }


								    for (i = 0; i < lines.length; i++) {

								      // Replace multiple whitespace characters by a single one, and strip

								      // trailing whitespace.

								      lines[i] = lines[i].replace(/\s+/g, ' ').replace(/\s+$/, '');

								    }


								    self.emit('data', paragraph);

								  };


								  this.end = function(data) {

								    if (data)

								      self.write(data);

								    self.emit('end');

								  };

								}

								inherits(Unwrapper, Stream);


								/*

								 * This filter generates an rtf document from a stream of paragraph objects.

								 */

								function RtfGenerator() {

								  var self = this,

								      did_write_anything = false;


								  Stream.call(this);

								  this.writable = true;


								  this.write = function(paragraph) {

								    if (!did_write_anything) {

								      emitHeader();

								      did_write_anything = true;

								    }


								    var li = paragraph.li,

								        level = paragraph.level + (li ? 1 : 0),

								        lic = paragraph.in_license_block;


								    var rtf = "\\pard";

								    rtf += '\\sa150\\sl300\\slmult1';

								    if (level > 0)

								      rtf += '\\li' + (level * 240);

								    if (li) {

								      rtf += '\\tx' + (level) * 240;

								      rtf += '\\fi-240';

								    }

								    if (lic)

								      rtf += '\\ri240';

								    if (!lic)

								      rtf += '\\b';

								    if (li)

								      rtf += ' ' + li + '\\tab';

								    rtf += ' ';

								    rtf += paragraph.lines.map(rtfEscape).join('\\line ');

								    if (!lic)

								      rtf += '\\b0';

								    rtf += '\\par\n';


								    self.emit('data', rtf);

								  };


								  this.end = function(data) {

								    if (data)

								      self.write(data);

								    if (did_write_anything)

								      emitFooter();

								    self.emit('end');

								  };


								  function toHex(number, length) {

								    var hex = (~~number).toString(16);

								    while (hex.length < length)

								      hex = '0' + hex;

								    return hex;

								  }


								  function rtfEscape(string) {

								    return string

								      .replace(/[\\\{\}]/g, function(m) {

								       return '\\' + m;

								      })

								      .replace(/\t/g, function() {

								        return '\\tab ';

								      })

								      .replace(/[\x00-\x1f\x7f-\xff]/g, function(m) {

								        return '\\\'' + toHex(m.charCodeAt(0), 2);

								      })

								      .replace(/\ufeff/g, '')

								      .replace(/[\u0100-\uffff]/g, function(m) {

								        return '\\u' + toHex(m.charCodeAt(0), 4) + '?';

								     });

								  }


								  function emitHeader() {

								    self.emit('data', '{\\rtf1\\ansi\\ansicpg1252\\uc1\\deff0\\deflang1033' +

								                      '{\\fonttbl{\\f0\\fswiss\\fcharset0 Tahoma;}}\\fs20\n' +

								                      '{\\*\\generator txt2rtf 0.0.1;}\n');

								  }


								  function emitFooter() {

								    self.emit('data', '}');

								  }

								}

								inherits(RtfGenerator, Stream);


								var stdin = process.stdin,

								    stdout = process.stdout,

								    line_splitter = new LineSplitter(),

								    paragraph_parser = new ParagraphParser(),

								    unwrapper = new Unwrapper(),

								    rtf_generator = new RtfGenerator();


								stdin.setEncoding('utf-8');

								stdin.resume();


								stdin.pipe(line_splitter);

								line_splitter.pipe(paragraph_parser);

								paragraph_parser.pipe(unwrapper);

								unwrapper.pipe(rtf_generator);

								rtf_generator.pipe(stdout);