mirror of https://github.com/lukechilds/node.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
521 lines
18 KiB
521 lines
18 KiB
#!/usr/bin/env python
|
|
#
|
|
# Copyright 2010 The Closure Linter Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS-IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""Metadata pass for annotating tokens in EcmaScript files."""
|
|
|
|
__author__ = ('robbyw@google.com (Robert Walker)')
|
|
|
|
from closure_linter import javascripttokens
|
|
from closure_linter import tokenutil
|
|
|
|
|
|
TokenType = javascripttokens.JavaScriptTokenType
|
|
|
|
|
|
class ParseError(Exception):
|
|
"""Exception indicating a parse error at the given token.
|
|
|
|
Attributes:
|
|
token: The token where the parse error occurred.
|
|
"""
|
|
|
|
def __init__(self, token, message=None):
|
|
"""Initialize a parse error at the given token with an optional message.
|
|
|
|
Args:
|
|
token: The token where the parse error occurred.
|
|
message: A message describing the parse error.
|
|
"""
|
|
Exception.__init__(self, message)
|
|
self.token = token
|
|
|
|
|
|
class EcmaContext(object):
|
|
"""Context object for EcmaScript languages.
|
|
|
|
Attributes:
|
|
type: The context type.
|
|
start_token: The token where this context starts.
|
|
end_token: The token where this context ends.
|
|
parent: The parent context.
|
|
"""
|
|
|
|
# The root context.
|
|
ROOT = 'root'
|
|
|
|
# A block of code.
|
|
BLOCK = 'block'
|
|
|
|
# A pseudo-block of code for a given case or default section.
|
|
CASE_BLOCK = 'case_block'
|
|
|
|
# Block of statements in a for loop's parentheses.
|
|
FOR_GROUP_BLOCK = 'for_block'
|
|
|
|
# An implied block of code for 1 line if, while, and for statements
|
|
IMPLIED_BLOCK = 'implied_block'
|
|
|
|
# An index in to an array or object.
|
|
INDEX = 'index'
|
|
|
|
# An array literal in [].
|
|
ARRAY_LITERAL = 'array_literal'
|
|
|
|
# An object literal in {}.
|
|
OBJECT_LITERAL = 'object_literal'
|
|
|
|
# An individual element in an array or object literal.
|
|
LITERAL_ELEMENT = 'literal_element'
|
|
|
|
# The portion of a ternary statement between ? and :
|
|
TERNARY_TRUE = 'ternary_true'
|
|
|
|
# The portion of a ternary statment after :
|
|
TERNARY_FALSE = 'ternary_false'
|
|
|
|
# The entire switch statment. This will contain a GROUP with the variable
|
|
# and a BLOCK with the code.
|
|
|
|
# Since that BLOCK is not a normal block, it can not contain statements except
|
|
# for case and default.
|
|
SWITCH = 'switch'
|
|
|
|
# A normal comment.
|
|
COMMENT = 'comment'
|
|
|
|
# A JsDoc comment.
|
|
DOC = 'doc'
|
|
|
|
# An individual statement.
|
|
STATEMENT = 'statement'
|
|
|
|
# Code within parentheses.
|
|
GROUP = 'group'
|
|
|
|
# Parameter names in a function declaration.
|
|
PARAMETERS = 'parameters'
|
|
|
|
# A set of variable declarations appearing after the 'var' keyword.
|
|
VAR = 'var'
|
|
|
|
# Context types that are blocks.
|
|
BLOCK_TYPES = frozenset([
|
|
ROOT, BLOCK, CASE_BLOCK, FOR_GROUP_BLOCK, IMPLIED_BLOCK])
|
|
|
|
def __init__(self, type, start_token, parent):
|
|
"""Initializes the context object.
|
|
|
|
Args:
|
|
type: The context type.
|
|
start_token: The token where this context starts.
|
|
parent: The parent context.
|
|
"""
|
|
self.type = type
|
|
self.start_token = start_token
|
|
self.end_token = None
|
|
self.parent = parent
|
|
|
|
def __repr__(self):
|
|
"""Returns a string representation of the context object."""
|
|
stack = []
|
|
context = self
|
|
while context:
|
|
stack.append(context.type)
|
|
context = context.parent
|
|
return 'Context(%s)' % ' > '.join(stack)
|
|
|
|
|
|
class EcmaMetaData(object):
|
|
"""Token metadata for EcmaScript languages.
|
|
|
|
Attributes:
|
|
last_code: The last code token to appear before this one.
|
|
context: The context this token appears in.
|
|
operator_type: The operator type, will be one of the *_OPERATOR constants
|
|
defined below.
|
|
"""
|
|
|
|
UNARY_OPERATOR = 'unary'
|
|
|
|
UNARY_POST_OPERATOR = 'unary_post'
|
|
|
|
BINARY_OPERATOR = 'binary'
|
|
|
|
TERNARY_OPERATOR = 'ternary'
|
|
|
|
def __init__(self):
|
|
"""Initializes a token metadata object."""
|
|
self.last_code = None
|
|
self.context = None
|
|
self.operator_type = None
|
|
self.is_implied_semicolon = False
|
|
self.is_implied_block = False
|
|
self.is_implied_block_close = False
|
|
|
|
def __repr__(self):
|
|
"""Returns a string representation of the context object."""
|
|
parts = ['%r' % self.context]
|
|
if self.operator_type:
|
|
parts.append('optype: %r' % self.operator_type)
|
|
if self.is_implied_semicolon:
|
|
parts.append('implied;')
|
|
return 'MetaData(%s)' % ', '.join(parts)
|
|
|
|
def IsUnaryOperator(self):
|
|
return self.operator_type in (EcmaMetaData.UNARY_OPERATOR,
|
|
EcmaMetaData.UNARY_POST_OPERATOR)
|
|
|
|
def IsUnaryPostOperator(self):
|
|
return self.operator_type == EcmaMetaData.UNARY_POST_OPERATOR
|
|
|
|
|
|
class EcmaMetaDataPass(object):
|
|
"""A pass that iterates over all tokens and builds metadata about them."""
|
|
|
|
def __init__(self):
|
|
"""Initialize the meta data pass object."""
|
|
self.Reset()
|
|
|
|
def Reset(self):
|
|
"""Resets the metadata pass to prepare for the next file."""
|
|
self._token = None
|
|
self._context = None
|
|
self._AddContext(EcmaContext.ROOT)
|
|
self._last_code = None
|
|
|
|
def _CreateContext(self, type):
|
|
"""Overridable by subclasses to create the appropriate context type."""
|
|
return EcmaContext(type, self._token, self._context)
|
|
|
|
def _CreateMetaData(self):
|
|
"""Overridable by subclasses to create the appropriate metadata type."""
|
|
return EcmaMetaData()
|
|
|
|
def _AddContext(self, type):
|
|
"""Adds a context of the given type to the context stack.
|
|
|
|
Args:
|
|
type: The type of context to create
|
|
"""
|
|
self._context = self._CreateContext(type)
|
|
|
|
def _PopContext(self):
|
|
"""Moves up one level in the context stack.
|
|
|
|
Returns:
|
|
The former context.
|
|
|
|
Raises:
|
|
ParseError: If the root context is popped.
|
|
"""
|
|
top_context = self._context
|
|
top_context.end_token = self._token
|
|
self._context = top_context.parent
|
|
if self._context:
|
|
return top_context
|
|
else:
|
|
raise ParseError(self._token)
|
|
|
|
def _PopContextType(self, *stop_types):
|
|
"""Pops the context stack until a context of the given type is popped.
|
|
|
|
Args:
|
|
stop_types: The types of context to pop to - stops at the first match.
|
|
|
|
Returns:
|
|
The context object of the given type that was popped.
|
|
"""
|
|
last = None
|
|
while not last or last.type not in stop_types:
|
|
last = self._PopContext()
|
|
return last
|
|
|
|
def _EndStatement(self):
|
|
"""Process the end of a statement."""
|
|
self._PopContextType(EcmaContext.STATEMENT)
|
|
if self._context.type == EcmaContext.IMPLIED_BLOCK:
|
|
self._token.metadata.is_implied_block_close = True
|
|
self._PopContext()
|
|
|
|
def _ProcessContext(self):
|
|
"""Process the context at the current token.
|
|
|
|
Returns:
|
|
The context that should be assigned to the current token, or None if
|
|
the current context after this method should be used.
|
|
|
|
Raises:
|
|
ParseError: When the token appears in an invalid context.
|
|
"""
|
|
token = self._token
|
|
token_type = token.type
|
|
|
|
if self._context.type in EcmaContext.BLOCK_TYPES:
|
|
# Whenever we're in a block, we add a statement context. We make an
|
|
# exception for switch statements since they can only contain case: and
|
|
# default: and therefore don't directly contain statements.
|
|
# The block we add here may be immediately removed in some cases, but
|
|
# that causes no harm.
|
|
parent = self._context.parent
|
|
if not parent or parent.type != EcmaContext.SWITCH:
|
|
self._AddContext(EcmaContext.STATEMENT)
|
|
|
|
elif self._context.type == EcmaContext.ARRAY_LITERAL:
|
|
self._AddContext(EcmaContext.LITERAL_ELEMENT)
|
|
|
|
if token_type == TokenType.START_PAREN:
|
|
if self._last_code and self._last_code.IsKeyword('for'):
|
|
# for loops contain multiple statements in the group unlike while,
|
|
# switch, if, etc.
|
|
self._AddContext(EcmaContext.FOR_GROUP_BLOCK)
|
|
else:
|
|
self._AddContext(EcmaContext.GROUP)
|
|
|
|
elif token_type == TokenType.END_PAREN:
|
|
result = self._PopContextType(EcmaContext.GROUP,
|
|
EcmaContext.FOR_GROUP_BLOCK)
|
|
keyword_token = result.start_token.metadata.last_code
|
|
# keyword_token will not exist if the open paren is the first line of the
|
|
# file, for example if all code is wrapped in an immediately executed
|
|
# annonymous function.
|
|
if keyword_token and keyword_token.string in ('if', 'for', 'while'):
|
|
next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES)
|
|
if next_code.type != TokenType.START_BLOCK:
|
|
# Check for do-while.
|
|
is_do_while = False
|
|
pre_keyword_token = keyword_token.metadata.last_code
|
|
if (pre_keyword_token and
|
|
pre_keyword_token.type == TokenType.END_BLOCK):
|
|
start_block_token = pre_keyword_token.metadata.context.start_token
|
|
is_do_while = start_block_token.metadata.last_code.string == 'do'
|
|
|
|
# If it's not do-while, it's an implied block.
|
|
if not is_do_while:
|
|
self._AddContext(EcmaContext.IMPLIED_BLOCK)
|
|
token.metadata.is_implied_block = True
|
|
|
|
return result
|
|
|
|
# else (not else if) with no open brace after it should be considered the
|
|
# start of an implied block, similar to the case with if, for, and while
|
|
# above.
|
|
elif (token_type == TokenType.KEYWORD and
|
|
token.string == 'else'):
|
|
next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES)
|
|
if (next_code.type != TokenType.START_BLOCK and
|
|
(next_code.type != TokenType.KEYWORD or next_code.string != 'if')):
|
|
self._AddContext(EcmaContext.IMPLIED_BLOCK)
|
|
token.metadata.is_implied_block = True
|
|
|
|
elif token_type == TokenType.START_PARAMETERS:
|
|
self._AddContext(EcmaContext.PARAMETERS)
|
|
|
|
elif token_type == TokenType.END_PARAMETERS:
|
|
return self._PopContextType(EcmaContext.PARAMETERS)
|
|
|
|
elif token_type == TokenType.START_BRACKET:
|
|
if (self._last_code and
|
|
self._last_code.type in TokenType.EXPRESSION_ENDER_TYPES):
|
|
self._AddContext(EcmaContext.INDEX)
|
|
else:
|
|
self._AddContext(EcmaContext.ARRAY_LITERAL)
|
|
|
|
elif token_type == TokenType.END_BRACKET:
|
|
return self._PopContextType(EcmaContext.INDEX, EcmaContext.ARRAY_LITERAL)
|
|
|
|
elif token_type == TokenType.START_BLOCK:
|
|
if (self._last_code.type in (TokenType.END_PAREN,
|
|
TokenType.END_PARAMETERS) or
|
|
self._last_code.IsKeyword('else') or
|
|
self._last_code.IsKeyword('do') or
|
|
self._last_code.IsKeyword('try') or
|
|
self._last_code.IsKeyword('finally') or
|
|
(self._last_code.IsOperator(':') and
|
|
self._last_code.metadata.context.type == EcmaContext.CASE_BLOCK)):
|
|
# else, do, try, and finally all might have no () before {.
|
|
# Also, handle the bizzare syntax case 10: {...}.
|
|
self._AddContext(EcmaContext.BLOCK)
|
|
else:
|
|
self._AddContext(EcmaContext.OBJECT_LITERAL)
|
|
|
|
elif token_type == TokenType.END_BLOCK:
|
|
context = self._PopContextType(EcmaContext.BLOCK,
|
|
EcmaContext.OBJECT_LITERAL)
|
|
if self._context.type == EcmaContext.SWITCH:
|
|
# The end of the block also means the end of the switch statement it
|
|
# applies to.
|
|
return self._PopContext()
|
|
return context
|
|
|
|
elif token.IsKeyword('switch'):
|
|
self._AddContext(EcmaContext.SWITCH)
|
|
|
|
elif (token_type == TokenType.KEYWORD and
|
|
token.string in ('case', 'default')):
|
|
# Pop up to but not including the switch block.
|
|
while self._context.parent.type != EcmaContext.SWITCH:
|
|
self._PopContext()
|
|
|
|
elif token.IsOperator('?'):
|
|
self._AddContext(EcmaContext.TERNARY_TRUE)
|
|
|
|
elif token.IsOperator(':'):
|
|
if self._context.type == EcmaContext.OBJECT_LITERAL:
|
|
self._AddContext(EcmaContext.LITERAL_ELEMENT)
|
|
|
|
elif self._context.type == EcmaContext.TERNARY_TRUE:
|
|
self._PopContext()
|
|
self._AddContext(EcmaContext.TERNARY_FALSE)
|
|
|
|
# Handle nested ternary statements like:
|
|
# foo = bar ? baz ? 1 : 2 : 3
|
|
# When we encounter the second ":" the context is
|
|
# ternary_false > ternary_true > statement > root
|
|
elif (self._context.type == EcmaContext.TERNARY_FALSE and
|
|
self._context.parent.type == EcmaContext.TERNARY_TRUE):
|
|
self._PopContext() # Leave current ternary false context.
|
|
self._PopContext() # Leave current parent ternary true
|
|
self._AddContext(EcmaContext.TERNARY_FALSE)
|
|
|
|
elif self._context.parent.type == EcmaContext.SWITCH:
|
|
self._AddContext(EcmaContext.CASE_BLOCK)
|
|
|
|
elif token.IsKeyword('var'):
|
|
self._AddContext(EcmaContext.VAR)
|
|
|
|
elif token.IsOperator(','):
|
|
while self._context.type not in (EcmaContext.VAR,
|
|
EcmaContext.ARRAY_LITERAL,
|
|
EcmaContext.OBJECT_LITERAL,
|
|
EcmaContext.STATEMENT,
|
|
EcmaContext.PARAMETERS,
|
|
EcmaContext.GROUP):
|
|
self._PopContext()
|
|
|
|
elif token_type == TokenType.SEMICOLON:
|
|
self._EndStatement()
|
|
|
|
def Process(self, first_token):
|
|
"""Processes the token stream starting with the given token."""
|
|
self._token = first_token
|
|
while self._token:
|
|
self._ProcessToken()
|
|
|
|
if self._token.IsCode():
|
|
self._last_code = self._token
|
|
|
|
self._token = self._token.next
|
|
|
|
try:
|
|
self._PopContextType(self, EcmaContext.ROOT)
|
|
except ParseError:
|
|
# Ignore the "popped to root" error.
|
|
pass
|
|
|
|
def _ProcessToken(self):
|
|
"""Process the given token."""
|
|
token = self._token
|
|
token.metadata = self._CreateMetaData()
|
|
context = (self._ProcessContext() or self._context)
|
|
token.metadata.context = context
|
|
token.metadata.last_code = self._last_code
|
|
|
|
# Determine the operator type of the token, if applicable.
|
|
if token.type == TokenType.OPERATOR:
|
|
token.metadata.operator_type = self._GetOperatorType(token)
|
|
|
|
# Determine if there is an implied semicolon after the token.
|
|
if token.type != TokenType.SEMICOLON:
|
|
next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES)
|
|
# A statement like if (x) does not need a semicolon after it
|
|
is_implied_block = self._context == EcmaContext.IMPLIED_BLOCK
|
|
is_last_code_in_line = token.IsCode() and (
|
|
not next_code or next_code.line_number != token.line_number)
|
|
is_continued_identifier = (token.type == TokenType.IDENTIFIER and
|
|
token.string.endswith('.'))
|
|
is_continued_operator = (token.type == TokenType.OPERATOR and
|
|
not token.metadata.IsUnaryPostOperator())
|
|
is_continued_dot = token.string == '.'
|
|
next_code_is_operator = next_code and next_code.type == TokenType.OPERATOR
|
|
next_code_is_dot = next_code and next_code.string == '.'
|
|
is_end_of_block = (token.type == TokenType.END_BLOCK and
|
|
token.metadata.context.type != EcmaContext.OBJECT_LITERAL)
|
|
is_multiline_string = token.type == TokenType.STRING_TEXT
|
|
next_code_is_block = next_code and next_code.type == TokenType.START_BLOCK
|
|
if (is_last_code_in_line and
|
|
self._StatementCouldEndInContext() and
|
|
not is_multiline_string and
|
|
not is_end_of_block and
|
|
not is_continued_identifier and
|
|
not is_continued_operator and
|
|
not is_continued_dot and
|
|
not next_code_is_dot and
|
|
not next_code_is_operator and
|
|
not is_implied_block and
|
|
not next_code_is_block):
|
|
token.metadata.is_implied_semicolon = True
|
|
self._EndStatement()
|
|
|
|
def _StatementCouldEndInContext(self):
|
|
"""Returns whether the current statement (if any) may end in this context."""
|
|
# In the basic statement or variable declaration context, statement can
|
|
# always end in this context.
|
|
if self._context.type in (EcmaContext.STATEMENT, EcmaContext.VAR):
|
|
return True
|
|
|
|
# End of a ternary false branch inside a statement can also be the
|
|
# end of the statement, for example:
|
|
# var x = foo ? foo.bar() : null
|
|
# In this case the statement ends after the null, when the context stack
|
|
# looks like ternary_false > var > statement > root.
|
|
if (self._context.type == EcmaContext.TERNARY_FALSE and
|
|
self._context.parent.type in (EcmaContext.STATEMENT, EcmaContext.VAR)):
|
|
return True
|
|
|
|
# In all other contexts like object and array literals, ternary true, etc.
|
|
# the statement can't yet end.
|
|
return False
|
|
|
|
def _GetOperatorType(self, token):
|
|
"""Returns the operator type of the given operator token.
|
|
|
|
Args:
|
|
token: The token to get arity for.
|
|
|
|
Returns:
|
|
The type of the operator. One of the *_OPERATOR constants defined in
|
|
EcmaMetaData.
|
|
"""
|
|
if token.string == '?':
|
|
return EcmaMetaData.TERNARY_OPERATOR
|
|
|
|
if token.string in TokenType.UNARY_OPERATORS:
|
|
return EcmaMetaData.UNARY_OPERATOR
|
|
|
|
last_code = token.metadata.last_code
|
|
if not last_code or last_code.type == TokenType.END_BLOCK:
|
|
return EcmaMetaData.UNARY_OPERATOR
|
|
|
|
if (token.string in TokenType.UNARY_POST_OPERATORS and
|
|
last_code.type in TokenType.EXPRESSION_ENDER_TYPES):
|
|
return EcmaMetaData.UNARY_POST_OPERATOR
|
|
|
|
if (token.string in TokenType.UNARY_OK_OPERATORS and
|
|
last_code.type not in TokenType.EXPRESSION_ENDER_TYPES and
|
|
last_code.string not in TokenType.UNARY_POST_OPERATORS):
|
|
return EcmaMetaData.UNARY_OPERATOR
|
|
|
|
return EcmaMetaData.BINARY_OPERATOR
|
|
|