node/tools/closure_linter/closure_linter/ecmametadatapass.py


								#!/usr/bin/env python

								#

								# Copyright 2010 The Closure Linter Authors. All Rights Reserved.

								#

								# Licensed under the Apache License, Version 2.0 (the "License");

								# you may not use this file except in compliance with the License.

								# You may obtain a copy of the License at

								#

								#      http://www.apache.org/licenses/LICENSE-2.0

								#

								# Unless required by applicable law or agreed to in writing, software

								# distributed under the License is distributed on an "AS-IS" BASIS,

								# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

								# See the License for the specific language governing permissions and

								# limitations under the License.


								"""Metadata pass for annotating tokens in EcmaScript files."""


								__author__ = ('robbyw@google.com (Robert Walker)')


								from closure_linter import javascripttokens

								from closure_linter import tokenutil


								TokenType = javascripttokens.JavaScriptTokenType


								class ParseError(Exception):

								  """Exception indicating a parse error at the given token.


								  Attributes:

								    token: The token where the parse error occurred.

								  """


								  def __init__(self, token, message=None):

								    """Initialize a parse error at the given token with an optional message.


								    Args:

								      token: The token where the parse error occurred.

								      message: A message describing the parse error.

								    """

								    Exception.__init__(self, message)

								    self.token = token


								class EcmaContext(object):

								  """Context object for EcmaScript languages.


								  Attributes:

								    type: The context type.

								    start_token: The token where this context starts.

								    end_token: The token where this context ends.

								    parent: The parent context.

								  """


								  # The root context.

								  ROOT = 'root'


								  # A block of code.

								  BLOCK = 'block'


								  # A pseudo-block of code for a given case or default section.

								  CASE_BLOCK = 'case_block'


								  # Block of statements in a for loop's parentheses.

								  FOR_GROUP_BLOCK = 'for_block'


								  # An implied block of code for 1 line if, while, and for statements

								  IMPLIED_BLOCK = 'implied_block'


								  # An index in to an array or object.

								  INDEX = 'index'


								  # An array literal in [].

								  ARRAY_LITERAL = 'array_literal'


								  # An object literal in {}.

								  OBJECT_LITERAL = 'object_literal'


								  # An individual element in an array or object literal.

								  LITERAL_ELEMENT = 'literal_element'


								  # The portion of a ternary statement between ? and :

								  TERNARY_TRUE = 'ternary_true'


								  # The portion of a ternary statment after :

								  TERNARY_FALSE = 'ternary_false'


								  # The entire switch statment.  This will contain a GROUP with the variable

								  # and a BLOCK with the code.


								  # Since that BLOCK is not a normal block, it can not contain statements except

								  # for case and default.

								  SWITCH = 'switch'


								  # A normal comment.

								  COMMENT = 'comment'


								  # A JsDoc comment.

								  DOC = 'doc'


								  # An individual statement.

								  STATEMENT = 'statement'


								  # Code within parentheses.

								  GROUP = 'group'


								  # Parameter names in a function declaration.

								  PARAMETERS = 'parameters'


								  # A set of variable declarations appearing after the 'var' keyword.

								  VAR = 'var'


								  # Context types that are blocks.

								  BLOCK_TYPES = frozenset([

								      ROOT, BLOCK, CASE_BLOCK, FOR_GROUP_BLOCK, IMPLIED_BLOCK])


								  def __init__(self, type, start_token, parent):

								    """Initializes the context object.


								    Args:

								      type: The context type.

								      start_token: The token where this context starts.

								      parent: The parent context.

								    """

								    self.type = type

								    self.start_token = start_token

								    self.end_token = None

								    self.parent = parent


								  def __repr__(self):

								    """Returns a string representation of the context object."""

								    stack = []

								    context = self

								    while context:

								      stack.append(context.type)

								      context = context.parent

								    return 'Context(%s)' % ' > '.join(stack)


								class EcmaMetaData(object):

								  """Token metadata for EcmaScript languages.


								  Attributes:

								    last_code: The last code token to appear before this one.

								    context: The context this token appears in.

								    operator_type: The operator type, will be one of the *_OPERATOR constants

								        defined below.

								  """


								  UNARY_OPERATOR = 'unary'


								  UNARY_POST_OPERATOR = 'unary_post'


								  BINARY_OPERATOR = 'binary'


								  TERNARY_OPERATOR = 'ternary'


								  def __init__(self):

								    """Initializes a token metadata object."""

								    self.last_code = None

								    self.context = None

								    self.operator_type = None

								    self.is_implied_semicolon = False

								    self.is_implied_block = False

								    self.is_implied_block_close = False


								  def __repr__(self):

								    """Returns a string representation of the context object."""

								    parts = ['%r' % self.context]

								    if self.operator_type:

								      parts.append('optype: %r' % self.operator_type)

								    if self.is_implied_semicolon:

								      parts.append('implied;')

								    return 'MetaData(%s)' % ', '.join(parts)


								  def IsUnaryOperator(self):

								    return self.operator_type in (EcmaMetaData.UNARY_OPERATOR,

								                                  EcmaMetaData.UNARY_POST_OPERATOR)


								  def IsUnaryPostOperator(self):

								    return self.operator_type == EcmaMetaData.UNARY_POST_OPERATOR


								class EcmaMetaDataPass(object):

								  """A pass that iterates over all tokens and builds metadata about them."""


								  def __init__(self):

								    """Initialize the meta data pass object."""

								    self.Reset()


								  def Reset(self):

								    """Resets the metadata pass to prepare for the next file."""

								    self._token = None

								    self._context = None

								    self._AddContext(EcmaContext.ROOT)

								    self._last_code = None


								  def _CreateContext(self, type):

								    """Overridable by subclasses to create the appropriate context type."""

								    return EcmaContext(type, self._token, self._context)


								  def _CreateMetaData(self):

								    """Overridable by subclasses to create the appropriate metadata type."""

								    return EcmaMetaData()


								  def _AddContext(self, type):

								    """Adds a context of the given type to the context stack.


								    Args:

								      type: The type of context to create

								    """

								    self._context  = self._CreateContext(type)


								  def _PopContext(self):

								    """Moves up one level in the context stack.


								    Returns:

								      The former context.


								    Raises:

								      ParseError: If the root context is popped.

								    """

								    top_context = self._context

								    top_context.end_token = self._token

								    self._context = top_context.parent

								    if self._context:

								      return top_context

								    else:

								      raise ParseError(self._token)


								  def _PopContextType(self, *stop_types):

								    """Pops the context stack until a context of the given type is popped.


								    Args:

								      stop_types: The types of context to pop to - stops at the first match.


								    Returns:

								      The context object of the given type that was popped.

								    """

								    last = None

								    while not last or last.type not in stop_types:

								      last = self._PopContext()

								    return last


								  def _EndStatement(self):

								    """Process the end of a statement."""

								    self._PopContextType(EcmaContext.STATEMENT)

								    if self._context.type == EcmaContext.IMPLIED_BLOCK:

								      self._token.metadata.is_implied_block_close = True

								      self._PopContext()


								  def _ProcessContext(self):

								    """Process the context at the current token.


								    Returns:

								      The context that should be assigned to the current token, or None if

								      the current context after this method should be used.


								    Raises:

								      ParseError: When the token appears in an invalid context.

								    """

								    token = self._token

								    token_type = token.type


								    if self._context.type in EcmaContext.BLOCK_TYPES:

								      # Whenever we're in a block, we add a statement context.  We make an

								      # exception for switch statements since they can only contain case: and

								      # default: and therefore don't directly contain statements.

								      # The block we add here may be immediately removed in some cases, but

								      # that causes no harm.

								      parent = self._context.parent

								      if not parent or parent.type != EcmaContext.SWITCH:

								        self._AddContext(EcmaContext.STATEMENT)


								    elif self._context.type == EcmaContext.ARRAY_LITERAL:

								      self._AddContext(EcmaContext.LITERAL_ELEMENT)


								    if token_type == TokenType.START_PAREN:

								      if self._last_code and self._last_code.IsKeyword('for'):

								        # for loops contain multiple statements in the group unlike while,

								        # switch, if, etc.

								        self._AddContext(EcmaContext.FOR_GROUP_BLOCK)

								      else:

								        self._AddContext(EcmaContext.GROUP)


								    elif token_type == TokenType.END_PAREN:

								      result = self._PopContextType(EcmaContext.GROUP,

								                                    EcmaContext.FOR_GROUP_BLOCK)

								      keyword_token = result.start_token.metadata.last_code

								      # keyword_token will not exist if the open paren is the first line of the

								      # file, for example if all code is wrapped in an immediately executed

								      # annonymous function.

								      if keyword_token and keyword_token.string in ('if', 'for', 'while'):

								        next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES)

								        if next_code.type != TokenType.START_BLOCK:

								          # Check for do-while.

								          is_do_while = False

								          pre_keyword_token = keyword_token.metadata.last_code

								          if (pre_keyword_token and

								              pre_keyword_token.type == TokenType.END_BLOCK):

								            start_block_token = pre_keyword_token.metadata.context.start_token

								            is_do_while = start_block_token.metadata.last_code.string == 'do'


								          # If it's not do-while, it's an implied block.

								          if not is_do_while:

								            self._AddContext(EcmaContext.IMPLIED_BLOCK)

								            token.metadata.is_implied_block = True


								      return result


								    # else (not else if) with no open brace after it should be considered the

								    # start of an implied block, similar to the case with if, for, and while

								    # above.

								    elif (token_type == TokenType.KEYWORD and

								          token.string == 'else'):

								      next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES)

								      if (next_code.type != TokenType.START_BLOCK and

								          (next_code.type != TokenType.KEYWORD or next_code.string != 'if')):

								        self._AddContext(EcmaContext.IMPLIED_BLOCK)

								        token.metadata.is_implied_block = True


								    elif token_type == TokenType.START_PARAMETERS:

								      self._AddContext(EcmaContext.PARAMETERS)


								    elif token_type == TokenType.END_PARAMETERS:

								      return self._PopContextType(EcmaContext.PARAMETERS)


								    elif token_type == TokenType.START_BRACKET:

								      if (self._last_code and

								          self._last_code.type in TokenType.EXPRESSION_ENDER_TYPES):

								        self._AddContext(EcmaContext.INDEX)

								      else:

								        self._AddContext(EcmaContext.ARRAY_LITERAL)


								    elif token_type == TokenType.END_BRACKET:

								      return self._PopContextType(EcmaContext.INDEX, EcmaContext.ARRAY_LITERAL)


								    elif token_type == TokenType.START_BLOCK:

								      if (self._last_code.type in (TokenType.END_PAREN,

								                                   TokenType.END_PARAMETERS) or

								          self._last_code.IsKeyword('else') or

								          self._last_code.IsKeyword('do') or

								          self._last_code.IsKeyword('try') or

								          self._last_code.IsKeyword('finally') or

								          (self._last_code.IsOperator(':') and

								           self._last_code.metadata.context.type == EcmaContext.CASE_BLOCK)):

								        # else, do, try, and finally all might have no () before {.

								        # Also, handle the bizzare syntax case 10: {...}.

								        self._AddContext(EcmaContext.BLOCK)

								      else:

								        self._AddContext(EcmaContext.OBJECT_LITERAL)


								    elif token_type == TokenType.END_BLOCK:

								      context = self._PopContextType(EcmaContext.BLOCK,

								                                     EcmaContext.OBJECT_LITERAL)

								      if self._context.type == EcmaContext.SWITCH:

								        # The end of the block also means the end of the switch statement it

								        # applies to.

								        return self._PopContext()

								      return context


								    elif token.IsKeyword('switch'):

								      self._AddContext(EcmaContext.SWITCH)


								    elif (token_type == TokenType.KEYWORD and

								          token.string in ('case', 'default')):

								      # Pop up to but not including the switch block.

								      while self._context.parent.type != EcmaContext.SWITCH:

								        self._PopContext()


								    elif token.IsOperator('?'):

								      self._AddContext(EcmaContext.TERNARY_TRUE)


								    elif token.IsOperator(':'):

								      if self._context.type == EcmaContext.OBJECT_LITERAL:

								        self._AddContext(EcmaContext.LITERAL_ELEMENT)


								      elif self._context.type == EcmaContext.TERNARY_TRUE:

								        self._PopContext()

								        self._AddContext(EcmaContext.TERNARY_FALSE)


								      # Handle nested ternary statements like:

								      # foo = bar ? baz ? 1 : 2 : 3

								      # When we encounter the second ":" the context is

								      # ternary_false > ternary_true > statement > root

								      elif (self._context.type == EcmaContext.TERNARY_FALSE and

								            self._context.parent.type == EcmaContext.TERNARY_TRUE):

								           self._PopContext() # Leave current ternary false context.

								           self._PopContext() # Leave current parent ternary true

								           self._AddContext(EcmaContext.TERNARY_FALSE)


								      elif self._context.parent.type == EcmaContext.SWITCH:

								        self._AddContext(EcmaContext.CASE_BLOCK)


								    elif token.IsKeyword('var'):

								      self._AddContext(EcmaContext.VAR)


								    elif token.IsOperator(','):

								      while self._context.type not in (EcmaContext.VAR,

								                                       EcmaContext.ARRAY_LITERAL,

								                                       EcmaContext.OBJECT_LITERAL,

								                                       EcmaContext.STATEMENT,

								                                       EcmaContext.PARAMETERS,

								                                       EcmaContext.GROUP):

								        self._PopContext()


								    elif token_type == TokenType.SEMICOLON:

								      self._EndStatement()


								  def Process(self, first_token):

								    """Processes the token stream starting with the given token."""

								    self._token = first_token

								    while self._token:

								      self._ProcessToken()


								      if self._token.IsCode():

								        self._last_code = self._token


								      self._token = self._token.next


								    try:

								      self._PopContextType(self, EcmaContext.ROOT)

								    except ParseError:

								      # Ignore the "popped to root" error.

								      pass


								  def _ProcessToken(self):

								    """Process the given token."""

								    token = self._token

								    token.metadata = self._CreateMetaData()

								    context = (self._ProcessContext() or self._context)

								    token.metadata.context = context

								    token.metadata.last_code = self._last_code


								    # Determine the operator type of the token, if applicable.

								    if token.type == TokenType.OPERATOR:

								      token.metadata.operator_type = self._GetOperatorType(token)


								    # Determine if there is an implied semicolon after the token.

								    if token.type != TokenType.SEMICOLON:

								      next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES)

								      # A statement like if (x) does not need a semicolon after it

								      is_implied_block = self._context == EcmaContext.IMPLIED_BLOCK

								      is_last_code_in_line = token.IsCode() and (

								          not next_code or next_code.line_number != token.line_number)

								      is_continued_identifier = (token.type == TokenType.IDENTIFIER and

								                                 token.string.endswith('.'))

								      is_continued_operator = (token.type == TokenType.OPERATOR and

								                               not token.metadata.IsUnaryPostOperator())

								      is_continued_dot = token.string == '.'

								      next_code_is_operator = next_code and next_code.type == TokenType.OPERATOR

								      next_code_is_dot = next_code and next_code.string == '.'

								      is_end_of_block = (token.type == TokenType.END_BLOCK and

								          token.metadata.context.type != EcmaContext.OBJECT_LITERAL)

								      is_multiline_string = token.type == TokenType.STRING_TEXT

								      next_code_is_block = next_code and next_code.type == TokenType.START_BLOCK

								      if (is_last_code_in_line and

								          self._StatementCouldEndInContext() and

								          not is_multiline_string and

								          not is_end_of_block and

								          not is_continued_identifier and

								          not is_continued_operator and

								          not is_continued_dot and

								          not next_code_is_dot and

								          not next_code_is_operator and

								          not is_implied_block and

								          not next_code_is_block):

								        token.metadata.is_implied_semicolon = True

								        self._EndStatement()


								  def _StatementCouldEndInContext(self):

								    """Returns whether the current statement (if any) may end in this context."""

								    # In the basic statement or variable declaration context, statement can

								    # always end in this context.

								    if self._context.type in (EcmaContext.STATEMENT, EcmaContext.VAR):

								      return True


								    # End of a ternary false branch inside a statement can also be the

								    # end of the statement, for example:

								    # var x = foo ? foo.bar() : null

								    # In this case the statement ends after the null, when the context stack

								    # looks like ternary_false > var > statement > root.

								    if (self._context.type == EcmaContext.TERNARY_FALSE and

								        self._context.parent.type in (EcmaContext.STATEMENT, EcmaContext.VAR)):

								      return True


								    # In all other contexts like object and array literals, ternary true, etc.

								    # the statement can't yet end.

								    return False


								  def _GetOperatorType(self, token):

								    """Returns the operator type of the given operator token.


								    Args:

								      token: The token to get arity for.


								    Returns:

								      The type of the operator.  One of the *_OPERATOR constants defined in

								      EcmaMetaData.

								    """

								    if token.string == '?':

								      return EcmaMetaData.TERNARY_OPERATOR


								    if token.string in TokenType.UNARY_OPERATORS:

								      return EcmaMetaData.UNARY_OPERATOR


								    last_code = token.metadata.last_code

								    if not last_code or last_code.type == TokenType.END_BLOCK:

								      return EcmaMetaData.UNARY_OPERATOR


								    if (token.string in TokenType.UNARY_POST_OPERATORS and

								        last_code.type in TokenType.EXPRESSION_ENDER_TYPES):

								      return EcmaMetaData.UNARY_POST_OPERATOR


								    if (token.string in TokenType.UNARY_OK_OPERATORS and

								        last_code.type not in TokenType.EXPRESSION_ENDER_TYPES and

								        last_code.string not in TokenType.UNARY_POST_OPERATORS):

								      return EcmaMetaData.UNARY_OPERATOR


								    return EcmaMetaData.BINARY_OPERATOR