You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

451 lines
10 KiB

/**
* @author Titus Wormer
* @copyright 2015 Titus Wormer
* @license MIT
* @module remark:parse:tokenizer
* @fileoverview Markdown tokenizer.
*/
'use strict';
module.exports = factory;
var MERGEABLE_NODES = {
text: mergeText,
blockquote: mergeBlockquote
};
/* Check whether a node is mergeable with adjacent nodes. */
function mergeable(node) {
var start;
var end;
if (node.type !== 'text' || !node.position) {
return true;
}
start = node.position.start;
end = node.position.end;
/* Only merge nodes which occupy the same size as their
* `value`. */
return start.line !== end.line ||
end.column - start.column === node.value.length;
}
/* Merge two text nodes: `node` into `prev`. */
function mergeText(prev, node) {
prev.value += node.value;
return prev;
}
/* Merge two blockquotes: `node` into `prev`, unless in
* CommonMark mode. */
function mergeBlockquote(prev, node) {
if (this.options.commonmark) {
return node;
}
prev.children = prev.children.concat(node.children);
return prev;
}
/* Construct a tokenizer. This creates both
* `tokenizeInline` and `tokenizeBlock`. */
function factory(type) {
return tokenize;
/* Tokenizer for a bound `type`. */
function tokenize(value, location) {
var self = this;
var offset = self.offset;
var tokens = [];
var methods = self[type + 'Methods'];
var tokenizers = self[type + 'Tokenizers'];
var line = location.line;
var column = location.column;
var index;
var length;
var method;
var name;
var matched;
var valueLength;
/* Trim white space only lines. */
if (!value) {
return tokens;
}
/* Expose on `eat`. */
eat.now = now;
eat.file = self.file;
/* Sync initial offset. */
updatePosition('');
/* Iterate over `value`, and iterate over all
* tokenizers. When one eats something, re-iterate
* with the remaining value. If no tokenizer eats,
* something failed (should not happen) and an
* exception is thrown. */
while (value) {
index = -1;
length = methods.length;
matched = false;
while (++index < length) {
name = methods[index];
method = tokenizers[name];
if (
method &&
(!method.onlyAtStart || self.atStart) &&
(!method.notInList || !self.inList) &&
(!method.notInBlock || !self.inBlock) &&
(!method.notInLink || !self.inLink)
) {
valueLength = value.length;
method.apply(self, [eat, value]);
matched = valueLength !== value.length;
if (matched) {
break;
}
}
}
/* istanbul ignore if */
if (!matched) {
self.file.fail(new Error('Infinite loop'), eat.now());
}
}
self.eof = now();
return tokens;
/**
* Update line, column, and offset based on
* `value`.
*
* @example
* updatePosition('foo');
*
* @param {string} subvalue - Subvalue to eat.
*/
function updatePosition(subvalue) {
var lastIndex = -1;
var index = subvalue.indexOf('\n');
while (index !== -1) {
line++;
lastIndex = index;
index = subvalue.indexOf('\n', index + 1);
}
if (lastIndex === -1) {
column += subvalue.length;
} else {
column = subvalue.length - lastIndex;
}
if (line in offset) {
if (lastIndex !== -1) {
column += offset[line];
} else if (column <= offset[line]) {
column = offset[line] + 1;
}
}
}
/**
* Get offset. Called before the first character is
* eaten to retrieve the range's offsets.
*
* @return {Function} - `done`, to be called when
* the last character is eaten.
*/
function getOffset() {
var indentation = [];
var pos = line + 1;
/**
* Done. Called when the last character is
* eaten to retrieve the range’s offsets.
*
* @return {Array.<number>} - Offset.
*/
return function () {
var last = line + 1;
while (pos < last) {
indentation.push((offset[pos] || 0) + 1);
pos++;
}
return indentation;
};
}
/**
* Get the current position.
*
* @example
* position = now(); // {line: 1, column: 1, offset: 0}
*
* @return {Object} - Current Position.
*/
function now() {
var pos = {line: line, column: column};
pos.offset = self.toOffset(pos);
return pos;
}
/**
* Store position information for a node.
*
* @example
* start = now();
* updatePosition('foo');
* location = new Position(start);
* // {
* // start: {line: 1, column: 1, offset: 0},
* // end: {line: 1, column: 3, offset: 2}
* // }
*
* @param {Object} start - Starting position.
*/
function Position(start) {
this.start = start;
this.end = now();
}
/**
* Throw when a value is incorrectly eaten.
* This shouldn’t happen but will throw on new,
* incorrect rules.
*
* @example
* // When the current value is set to `foo bar`.
* validateEat('foo');
* eat('foo');
*
* validateEat('bar');
* // throws, because the space is not eaten.
*
* @param {string} subvalue - Value to be eaten.
* @throws {Error} - When `subvalue` cannot be eaten.
*/
function validateEat(subvalue) {
/* istanbul ignore if */
if (value.substring(0, subvalue.length) !== subvalue) {
/* Capture stack-trace. */
self.file.fail(
new Error(
'Incorrectly eaten value: please report this ' +
'warning on http://git.io/vg5Ft'
),
now()
);
}
}
/**
* Mark position and patch `node.position`.
*
* @example
* var update = position();
* updatePosition('foo');
* update({});
* // {
* // position: {
* // start: {line: 1, column: 1, offset: 0},
* // end: {line: 1, column: 3, offset: 2}
* // }
* // }
*
* @returns {Function} - Updater.
*/
function position() {
var before = now();
return update;
/**
* Add the position to a node.
*
* @example
* update({type: 'text', value: 'foo'});
*
* @param {Node} node - Node to attach position
* on.
* @param {Array} [indent] - Indentation for
* `node`.
* @return {Node} - `node`.
*/
function update(node, indent) {
var prev = node.position;
var start = prev ? prev.start : before;
var combined = [];
var n = prev && prev.end.line;
var l = before.line;
node.position = new Position(start);
/* If there was already a `position`, this
* node was merged. Fixing `start` wasn’t
* hard, but the indent is different.
* Especially because some information, the
* indent between `n` and `l` wasn’t
* tracked. Luckily, that space is
* (should be?) empty, so we can safely
* check for it now. */
if (prev && indent && prev.indent) {
combined = prev.indent;
if (n < l) {
while (++n < l) {
combined.push((offset[n] || 0) + 1);
}
combined.push(before.column);
}
indent = combined.concat(indent);
}
node.position.indent = indent || [];
return node;
}
}
/**
* Add `node` to `parent`s children or to `tokens`.
* Performs merges where possible.
*
* @example
* add({});
*
* add({}, {children: []});
*
* @param {Object} node - Node to add.
* @param {Object} [parent] - Parent to insert into.
* @return {Object} - Added or merged into node.
*/
function add(node, parent) {
var children = parent ? parent.children : tokens;
var prev = children[children.length - 1];
if (
prev &&
node.type === prev.type &&
node.type in MERGEABLE_NODES &&
mergeable(prev) &&
mergeable(node)
) {
node = MERGEABLE_NODES[node.type].call(self, prev, node);
}
if (node !== prev) {
children.push(node);
}
if (self.atStart && tokens.length !== 0) {
self.exitStart();
}
return node;
}
/**
* Remove `subvalue` from `value`.
* `subvalue` must be at the start of `value`.
*
* @example
* eat('foo')({type: 'text', value: 'foo'});
*
* @param {string} subvalue - Removed from `value`,
* and passed to `updatePosition`.
* @return {Function} - Wrapper around `add`, which
* also adds `position` to node.
*/
function eat(subvalue) {
var indent = getOffset();
var pos = position();
var current = now();
validateEat(subvalue);
apply.reset = reset;
reset.test = test;
apply.test = test;
value = value.substring(subvalue.length);
updatePosition(subvalue);
indent = indent();
return apply;
/**
* Add the given arguments, add `position` to
* the returned node, and return the node.
*
* @param {Object} node - Node to add.
* @param {Object} [parent] - Node to insert into.
* @return {Node} - Added node.
*/
function apply(node, parent) {
return pos(add(pos(node), parent), indent);
}
/**
* Functions just like apply, but resets the
* content: the line and column are reversed,
* and the eaten value is re-added.
*
* This is useful for nodes with a single
* type of content, such as lists and tables.
*
* See `apply` above for what parameters are
* expected.
*
* @return {Node} - Added node.
*/
function reset() {
var node = apply.apply(null, arguments);
line = current.line;
column = current.column;
value = subvalue + value;
return node;
}
/**
* Test the position, after eating, and reverse
* to a not-eaten state.
*
* @return {Position} - Position after eating `subvalue`.
*/
function test() {
var result = pos({});
line = current.line;
column = current.column;
value = subvalue + value;
return result.position;
}
}
}
}