mirror of https://github.com/lukechilds/node.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
451 lines
10 KiB
451 lines
10 KiB
/**
|
|
* @author Titus Wormer
|
|
* @copyright 2015 Titus Wormer
|
|
* @license MIT
|
|
* @module remark:parse:tokenizer
|
|
* @fileoverview Markdown tokenizer.
|
|
*/
|
|
|
|
'use strict';
|
|
|
|
module.exports = factory;
|
|
|
|
var MERGEABLE_NODES = {
|
|
text: mergeText,
|
|
blockquote: mergeBlockquote
|
|
};
|
|
|
|
/* Check whether a node is mergeable with adjacent nodes. */
|
|
function mergeable(node) {
|
|
var start;
|
|
var end;
|
|
|
|
if (node.type !== 'text' || !node.position) {
|
|
return true;
|
|
}
|
|
|
|
start = node.position.start;
|
|
end = node.position.end;
|
|
|
|
/* Only merge nodes which occupy the same size as their
|
|
* `value`. */
|
|
return start.line !== end.line ||
|
|
end.column - start.column === node.value.length;
|
|
}
|
|
|
|
/* Merge two text nodes: `node` into `prev`. */
|
|
function mergeText(prev, node) {
|
|
prev.value += node.value;
|
|
|
|
return prev;
|
|
}
|
|
|
|
/* Merge two blockquotes: `node` into `prev`, unless in
|
|
* CommonMark mode. */
|
|
function mergeBlockquote(prev, node) {
|
|
if (this.options.commonmark) {
|
|
return node;
|
|
}
|
|
|
|
prev.children = prev.children.concat(node.children);
|
|
|
|
return prev;
|
|
}
|
|
|
|
/* Construct a tokenizer. This creates both
|
|
* `tokenizeInline` and `tokenizeBlock`. */
|
|
function factory(type) {
|
|
return tokenize;
|
|
|
|
/* Tokenizer for a bound `type`. */
|
|
function tokenize(value, location) {
|
|
var self = this;
|
|
var offset = self.offset;
|
|
var tokens = [];
|
|
var methods = self[type + 'Methods'];
|
|
var tokenizers = self[type + 'Tokenizers'];
|
|
var line = location.line;
|
|
var column = location.column;
|
|
var index;
|
|
var length;
|
|
var method;
|
|
var name;
|
|
var matched;
|
|
var valueLength;
|
|
|
|
/* Trim white space only lines. */
|
|
if (!value) {
|
|
return tokens;
|
|
}
|
|
|
|
/* Expose on `eat`. */
|
|
eat.now = now;
|
|
eat.file = self.file;
|
|
|
|
/* Sync initial offset. */
|
|
updatePosition('');
|
|
|
|
/* Iterate over `value`, and iterate over all
|
|
* tokenizers. When one eats something, re-iterate
|
|
* with the remaining value. If no tokenizer eats,
|
|
* something failed (should not happen) and an
|
|
* exception is thrown. */
|
|
while (value) {
|
|
index = -1;
|
|
length = methods.length;
|
|
matched = false;
|
|
|
|
while (++index < length) {
|
|
name = methods[index];
|
|
method = tokenizers[name];
|
|
|
|
if (
|
|
method &&
|
|
(!method.onlyAtStart || self.atStart) &&
|
|
(!method.notInList || !self.inList) &&
|
|
(!method.notInBlock || !self.inBlock) &&
|
|
(!method.notInLink || !self.inLink)
|
|
) {
|
|
valueLength = value.length;
|
|
|
|
method.apply(self, [eat, value]);
|
|
|
|
matched = valueLength !== value.length;
|
|
|
|
if (matched) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* istanbul ignore if */
|
|
if (!matched) {
|
|
self.file.fail(new Error('Infinite loop'), eat.now());
|
|
}
|
|
}
|
|
|
|
self.eof = now();
|
|
|
|
return tokens;
|
|
|
|
/**
|
|
* Update line, column, and offset based on
|
|
* `value`.
|
|
*
|
|
* @example
|
|
* updatePosition('foo');
|
|
*
|
|
* @param {string} subvalue - Subvalue to eat.
|
|
*/
|
|
function updatePosition(subvalue) {
|
|
var lastIndex = -1;
|
|
var index = subvalue.indexOf('\n');
|
|
|
|
while (index !== -1) {
|
|
line++;
|
|
lastIndex = index;
|
|
index = subvalue.indexOf('\n', index + 1);
|
|
}
|
|
|
|
if (lastIndex === -1) {
|
|
column += subvalue.length;
|
|
} else {
|
|
column = subvalue.length - lastIndex;
|
|
}
|
|
|
|
if (line in offset) {
|
|
if (lastIndex !== -1) {
|
|
column += offset[line];
|
|
} else if (column <= offset[line]) {
|
|
column = offset[line] + 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get offset. Called before the first character is
|
|
* eaten to retrieve the range's offsets.
|
|
*
|
|
* @return {Function} - `done`, to be called when
|
|
* the last character is eaten.
|
|
*/
|
|
function getOffset() {
|
|
var indentation = [];
|
|
var pos = line + 1;
|
|
|
|
/**
|
|
* Done. Called when the last character is
|
|
* eaten to retrieve the range’s offsets.
|
|
*
|
|
* @return {Array.<number>} - Offset.
|
|
*/
|
|
return function () {
|
|
var last = line + 1;
|
|
|
|
while (pos < last) {
|
|
indentation.push((offset[pos] || 0) + 1);
|
|
|
|
pos++;
|
|
}
|
|
|
|
return indentation;
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Get the current position.
|
|
*
|
|
* @example
|
|
* position = now(); // {line: 1, column: 1, offset: 0}
|
|
*
|
|
* @return {Object} - Current Position.
|
|
*/
|
|
function now() {
|
|
var pos = {line: line, column: column};
|
|
|
|
pos.offset = self.toOffset(pos);
|
|
|
|
return pos;
|
|
}
|
|
|
|
/**
|
|
* Store position information for a node.
|
|
*
|
|
* @example
|
|
* start = now();
|
|
* updatePosition('foo');
|
|
* location = new Position(start);
|
|
* // {
|
|
* // start: {line: 1, column: 1, offset: 0},
|
|
* // end: {line: 1, column: 3, offset: 2}
|
|
* // }
|
|
*
|
|
* @param {Object} start - Starting position.
|
|
*/
|
|
function Position(start) {
|
|
this.start = start;
|
|
this.end = now();
|
|
}
|
|
|
|
/**
|
|
* Throw when a value is incorrectly eaten.
|
|
* This shouldn’t happen but will throw on new,
|
|
* incorrect rules.
|
|
*
|
|
* @example
|
|
* // When the current value is set to `foo bar`.
|
|
* validateEat('foo');
|
|
* eat('foo');
|
|
*
|
|
* validateEat('bar');
|
|
* // throws, because the space is not eaten.
|
|
*
|
|
* @param {string} subvalue - Value to be eaten.
|
|
* @throws {Error} - When `subvalue` cannot be eaten.
|
|
*/
|
|
function validateEat(subvalue) {
|
|
/* istanbul ignore if */
|
|
if (value.substring(0, subvalue.length) !== subvalue) {
|
|
/* Capture stack-trace. */
|
|
self.file.fail(
|
|
new Error(
|
|
'Incorrectly eaten value: please report this ' +
|
|
'warning on http://git.io/vg5Ft'
|
|
),
|
|
now()
|
|
);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Mark position and patch `node.position`.
|
|
*
|
|
* @example
|
|
* var update = position();
|
|
* updatePosition('foo');
|
|
* update({});
|
|
* // {
|
|
* // position: {
|
|
* // start: {line: 1, column: 1, offset: 0},
|
|
* // end: {line: 1, column: 3, offset: 2}
|
|
* // }
|
|
* // }
|
|
*
|
|
* @returns {Function} - Updater.
|
|
*/
|
|
function position() {
|
|
var before = now();
|
|
|
|
return update;
|
|
|
|
/**
|
|
* Add the position to a node.
|
|
*
|
|
* @example
|
|
* update({type: 'text', value: 'foo'});
|
|
*
|
|
* @param {Node} node - Node to attach position
|
|
* on.
|
|
* @param {Array} [indent] - Indentation for
|
|
* `node`.
|
|
* @return {Node} - `node`.
|
|
*/
|
|
function update(node, indent) {
|
|
var prev = node.position;
|
|
var start = prev ? prev.start : before;
|
|
var combined = [];
|
|
var n = prev && prev.end.line;
|
|
var l = before.line;
|
|
|
|
node.position = new Position(start);
|
|
|
|
/* If there was already a `position`, this
|
|
* node was merged. Fixing `start` wasn’t
|
|
* hard, but the indent is different.
|
|
* Especially because some information, the
|
|
* indent between `n` and `l` wasn’t
|
|
* tracked. Luckily, that space is
|
|
* (should be?) empty, so we can safely
|
|
* check for it now. */
|
|
if (prev && indent && prev.indent) {
|
|
combined = prev.indent;
|
|
|
|
if (n < l) {
|
|
while (++n < l) {
|
|
combined.push((offset[n] || 0) + 1);
|
|
}
|
|
|
|
combined.push(before.column);
|
|
}
|
|
|
|
indent = combined.concat(indent);
|
|
}
|
|
|
|
node.position.indent = indent || [];
|
|
|
|
return node;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Add `node` to `parent`s children or to `tokens`.
|
|
* Performs merges where possible.
|
|
*
|
|
* @example
|
|
* add({});
|
|
*
|
|
* add({}, {children: []});
|
|
*
|
|
* @param {Object} node - Node to add.
|
|
* @param {Object} [parent] - Parent to insert into.
|
|
* @return {Object} - Added or merged into node.
|
|
*/
|
|
function add(node, parent) {
|
|
var children = parent ? parent.children : tokens;
|
|
var prev = children[children.length - 1];
|
|
|
|
if (
|
|
prev &&
|
|
node.type === prev.type &&
|
|
node.type in MERGEABLE_NODES &&
|
|
mergeable(prev) &&
|
|
mergeable(node)
|
|
) {
|
|
node = MERGEABLE_NODES[node.type].call(self, prev, node);
|
|
}
|
|
|
|
if (node !== prev) {
|
|
children.push(node);
|
|
}
|
|
|
|
if (self.atStart && tokens.length !== 0) {
|
|
self.exitStart();
|
|
}
|
|
|
|
return node;
|
|
}
|
|
|
|
/**
|
|
* Remove `subvalue` from `value`.
|
|
* `subvalue` must be at the start of `value`.
|
|
*
|
|
* @example
|
|
* eat('foo')({type: 'text', value: 'foo'});
|
|
*
|
|
* @param {string} subvalue - Removed from `value`,
|
|
* and passed to `updatePosition`.
|
|
* @return {Function} - Wrapper around `add`, which
|
|
* also adds `position` to node.
|
|
*/
|
|
function eat(subvalue) {
|
|
var indent = getOffset();
|
|
var pos = position();
|
|
var current = now();
|
|
|
|
validateEat(subvalue);
|
|
|
|
apply.reset = reset;
|
|
reset.test = test;
|
|
apply.test = test;
|
|
|
|
value = value.substring(subvalue.length);
|
|
|
|
updatePosition(subvalue);
|
|
|
|
indent = indent();
|
|
|
|
return apply;
|
|
|
|
/**
|
|
* Add the given arguments, add `position` to
|
|
* the returned node, and return the node.
|
|
*
|
|
* @param {Object} node - Node to add.
|
|
* @param {Object} [parent] - Node to insert into.
|
|
* @return {Node} - Added node.
|
|
*/
|
|
function apply(node, parent) {
|
|
return pos(add(pos(node), parent), indent);
|
|
}
|
|
|
|
/**
|
|
* Functions just like apply, but resets the
|
|
* content: the line and column are reversed,
|
|
* and the eaten value is re-added.
|
|
*
|
|
* This is useful for nodes with a single
|
|
* type of content, such as lists and tables.
|
|
*
|
|
* See `apply` above for what parameters are
|
|
* expected.
|
|
*
|
|
* @return {Node} - Added node.
|
|
*/
|
|
function reset() {
|
|
var node = apply.apply(null, arguments);
|
|
|
|
line = current.line;
|
|
column = current.column;
|
|
value = subvalue + value;
|
|
|
|
return node;
|
|
}
|
|
|
|
/**
|
|
* Test the position, after eating, and reverse
|
|
* to a not-eaten state.
|
|
*
|
|
* @return {Position} - Position after eating `subvalue`.
|
|
*/
|
|
function test() {
|
|
var result = pos({});
|
|
|
|
line = current.line;
|
|
column = current.column;
|
|
value = subvalue + value;
|
|
|
|
return result.position;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|