node/tools/closure_linter/closure_linter/indentation.py


								#!/usr/bin/env python

								#

								# Copyright 2010 The Closure Linter Authors. All Rights Reserved.

								#

								# Licensed under the Apache License, Version 2.0 (the "License");

								# you may not use this file except in compliance with the License.

								# You may obtain a copy of the License at

								#

								#      http://www.apache.org/licenses/LICENSE-2.0

								#

								# Unless required by applicable law or agreed to in writing, software

								# distributed under the License is distributed on an "AS-IS" BASIS,

								# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

								# See the License for the specific language governing permissions and

								# limitations under the License.


								"""Methods for checking EcmaScript files for indentation issues."""


								__author__ = ('robbyw@google.com (Robert Walker)')


								from closure_linter import ecmametadatapass

								from closure_linter import errors

								from closure_linter import javascripttokens

								from closure_linter import tokenutil

								from closure_linter.common import error

								from closure_linter.common import position


								import gflags as flags


								flags.DEFINE_boolean('debug_indentation', False,

								                     'Whether to print debugging information for indentation.')


								# Shorthand

								Context = ecmametadatapass.EcmaContext

								Error = error.Error

								Position = position.Position

								Type = javascripttokens.JavaScriptTokenType


								# The general approach:

								#

								# 1. Build a stack of tokens that can affect indentation.

								#    For each token, we determine if it is a block or continuation token.

								#    Some tokens need to be temporarily overwritten in case they are removed

								#    before the end of the line.

								#    Much of the work here is determining which tokens to keep on the stack

								#    at each point.  Operators, for example, should be removed once their

								#    expression or line is gone, while parentheses must stay until the matching

								#    end parentheses is found.

								#

								# 2. Given that stack, determine the allowable indentations.

								#    Due to flexible indentation rules in JavaScript, there may be many

								#    allowable indentations for each stack.  We follows the general

								#    "no false positives" approach of GJsLint and build the most permissive

								#    set possible.


								class TokenInfo(object):

								  """Stores information about a token.


								  Attributes:

								    token: The token

								    is_block: Whether the token represents a block indentation.

								    is_transient: Whether the token should be automatically removed without

								      finding a matching end token.

								    overridden_by: TokenInfo for a token that overrides the indentation that

								      this token would require.

								    is_permanent_override: Whether the override on this token should persist

								      even after the overriding token is removed from the stack.  For example:

								      x([

								        1],

								      2);

								      needs this to be set so the last line is not required to be a continuation

								      indent.

								    line_number: The effective line number of this token.  Will either be the

								      actual line number or the one before it in the case of a mis-wrapped

								      operator.

								  """


								  def __init__(self, token, is_block=False):

								    """Initializes a TokenInfo object.


								    Args:

								      token: The token

								      is_block: Whether the token represents a block indentation.

								    """

								    self.token = token

								    self.overridden_by = None

								    self.is_permanent_override = False

								    self.is_block = is_block

								    self.is_transient = not is_block and not token.type in (

								        Type.START_PAREN, Type.START_PARAMETERS)

								    self.line_number = token.line_number


								  def __repr__(self):

								    result = '\n  %s' % self.token

								    if self.overridden_by:

								      result = '%s OVERRIDDEN [by "%s"]' % (

								          result, self.overridden_by.token.string)

								    result += ' {is_block: %s, is_transient: %s}' % (

								        self.is_block, self.is_transient)

								    return result


								class IndentationRules(object):

								  """EmcaScript indentation rules.


								  Can be used to find common indentation errors in JavaScript, ActionScript and

								  other Ecma like scripting languages.

								  """


								  def __init__(self):

								    """Initializes the IndentationRules checker."""

								    self._stack = []


								    # Map from line number to number of characters it is off in indentation.

								    self._start_index_offset = {}


								  def Finalize(self):

								    if self._stack:

								      old_stack = self._stack

								      self._stack = []

								      raise Exception("INTERNAL ERROR: indentation stack is not empty: %r" %

								                      old_stack)


								  def CheckToken(self, token, state):

								    """Checks a token for indentation errors.


								    Args:

								      token: The current token under consideration

								      state: Additional information about the current tree state


								    Returns:

								      An error array [error code, error string, error token] if the token is

								      improperly indented, or None if indentation is correct.

								    """


								    token_type = token.type

								    indentation_errors = []

								    stack = self._stack

								    is_first = self._IsFirstNonWhitespaceTokenInLine(token)


								    # Add tokens that could decrease indentation before checking.

								    if token_type == Type.END_PAREN:

								      self._PopTo(Type.START_PAREN)


								    elif token_type == Type.END_PARAMETERS:

								      self._PopTo(Type.START_PARAMETERS)


								    elif token_type == Type.END_BRACKET:

								      self._PopTo(Type.START_BRACKET)


								    elif token_type == Type.END_BLOCK:

								      self._PopTo(Type.START_BLOCK)


								    elif token_type == Type.KEYWORD and token.string in ('case', 'default'):

								      self._Add(self._PopTo(Type.START_BLOCK))


								    elif is_first and token.string == '.':

								      # This token should have been on the previous line, so treat it as if it

								      # was there.

								      info = TokenInfo(token)

								      info.line_number = token.line_number - 1

								      self._Add(info)


								    elif token_type == Type.SEMICOLON:

								      self._PopTransient()


								    not_binary_operator = (token_type != Type.OPERATOR or

								                           token.metadata.IsUnaryOperator())

								    not_dot = token.string != '.'

								    if is_first and not_binary_operator and not_dot and token.type not in (

								        Type.COMMENT, Type.DOC_PREFIX, Type.STRING_TEXT):

								      if flags.FLAGS.debug_indentation:

								        print 'Line #%d: stack %r' % (token.line_number, stack)


								      # Ignore lines that start in JsDoc since we don't check them properly yet.

								      # TODO(robbyw): Support checking JsDoc indentation.

								      # Ignore lines that start as multi-line strings since indentation is N/A.

								      # Ignore lines that start with operators since we report that already.

								      # Ignore lines with tabs since we report that already.

								      expected = self._GetAllowableIndentations()

								      actual = self._GetActualIndentation(token)


								      # Special case comments describing else, case, and default.  Allow them

								      # to outdent to the parent block.

								      if token_type in Type.COMMENT_TYPES:

								        next_code = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES)

								        if next_code and next_code.type == Type.END_BLOCK:

								          next_code = tokenutil.SearchExcept(next_code, Type.NON_CODE_TYPES)

								        if next_code and next_code.string in ('else', 'case', 'default'):

								          # TODO(robbyw): This almost certainly introduces false negatives.

								          expected |= self._AddToEach(expected, -2)


								      if actual >= 0 and actual not in expected:

								        expected = sorted(expected)

								        indentation_errors.append([

								            errors.WRONG_INDENTATION,

								            'Wrong indentation: expected any of {%s} but got %d' % (

								                ', '.join(

								                    ['%d' % x for x in expected]), actual),

								            token,

								            Position(actual, expected[0])])

								        self._start_index_offset[token.line_number] = expected[0] - actual


								    # Add tokens that could increase indentation.

								    if token_type == Type.START_BRACKET:

								      self._Add(TokenInfo(token=token,

								          is_block=token.metadata.context.type == Context.ARRAY_LITERAL))


								    elif token_type == Type.START_BLOCK or token.metadata.is_implied_block:

								      self._Add(TokenInfo(token=token, is_block=True))


								    elif token_type in (Type.START_PAREN,  Type.START_PARAMETERS):

								      self._Add(TokenInfo(token=token, is_block=False))


								    elif token_type == Type.KEYWORD and token.string == 'return':

								      self._Add(TokenInfo(token))


								    elif not token.IsLastInLine() and (

								        token.IsAssignment() or token.IsOperator('?')):

								      self._Add(TokenInfo(token=token))


								    # Handle implied block closes.

								    if token.metadata.is_implied_block_close:

								      self._PopToImpliedBlock()


								    # Add some tokens only if they appear at the end of the line.

								    is_last = self._IsLastCodeInLine(token)

								    if is_last:

								      if token_type == Type.OPERATOR:

								        if token.string == ':':

								          if (stack and stack[-1].token.string == '?'):

								            # When a ternary : is on a different line than its '?', it doesn't

								            # add indentation.

								            if (token.line_number == stack[-1].token.line_number):

								              self._Add(TokenInfo(token))

								          elif token.metadata.context.type == Context.CASE_BLOCK:

								            # Pop transient tokens from say, line continuations, e.g.,

								            # case x.

								            #     y:

								            # Want to pop the transient 4 space continuation indent.

								            self._PopTransient()

								            # Starting the body of the case statement, which is a type of

								            # block.

								            self._Add(TokenInfo(token=token, is_block=True))

								          elif token.metadata.context.type == Context.LITERAL_ELEMENT:

								            # When in an object literal, acts as operator indicating line

								            # continuations.

								            self._Add(TokenInfo(token))

								            pass

								          else:

								            # ':' might also be a statement label, no effect on indentation in

								            # this case.

								            pass


								        elif token.string != ',':

								          self._Add(TokenInfo(token))

								        else:

								          # The token is a comma.

								          if token.metadata.context.type == Context.VAR:

								            self._Add(TokenInfo(token))

								          elif token.metadata.context.type != Context.PARAMETERS:

								            self._PopTransient()


								      elif (token.string.endswith('.')

								            and token_type in (Type.IDENTIFIER, Type.NORMAL)):

								        self._Add(TokenInfo(token))

								      elif token_type == Type.PARAMETERS and token.string.endswith(','):

								        # Parameter lists.

								        self._Add(TokenInfo(token))

								      elif token.metadata.is_implied_semicolon:

								        self._PopTransient()

								    elif token.IsAssignment():

								      self._Add(TokenInfo(token))


								    return indentation_errors


								  def _AddToEach(self, original, amount):

								    """Returns a new set with the given amount added to each element.


								    Args:

								      original: The original set of numbers

								      amount: The amount to add to each element


								    Returns:

								      A new set containing each element of the original set added to the amount.

								    """

								    return set([x + amount for x in original])


								  _HARD_STOP_TYPES = (Type.START_PAREN, Type.START_PARAMETERS,

								                      Type.START_BRACKET)


								  _HARD_STOP_STRINGS = ('return', '?')


								  def _IsHardStop(self, token):

								    """Determines if the given token can have a hard stop after it.


								    Hard stops are indentations defined by the position of another token as in

								    indentation lined up with return, (, [, and ?.

								    """

								    return (token.type in self._HARD_STOP_TYPES or

								            token.string in self._HARD_STOP_STRINGS or

								            token.IsAssignment())


								  def _GetAllowableIndentations(self):

								    """Computes the set of allowable indentations.


								    Returns:

								      The set of allowable indentations, given the current stack.

								    """

								    expected = set([0])

								    hard_stops = set([])


								    # Whether the tokens are still in the same continuation, meaning additional

								    # indentation is optional.  As an example:

								    # x = 5 +

								    #     6 +

								    #     7;

								    # The second '+' does not add any required indentation.

								    in_same_continuation = False


								    for token_info in self._stack:

								      token = token_info.token


								      # Handle normal additive indentation tokens.

								      if not token_info.overridden_by and token.string != 'return':

								        if token_info.is_block:

								          expected = self._AddToEach(expected, 2)

								          hard_stops = self._AddToEach(hard_stops, 2)

								          in_same_continuation = False

								        elif in_same_continuation:

								          expected |= self._AddToEach(expected, 4)

								          hard_stops |= self._AddToEach(hard_stops, 4)

								        else:

								          expected = self._AddToEach(expected, 4)

								          hard_stops |= self._AddToEach(hard_stops, 4)

								          in_same_continuation = True


								      # Handle hard stops after (, [, return, =, and ?

								      if self._IsHardStop(token):

								        override_is_hard_stop = (token_info.overridden_by and

								            self._IsHardStop(token_info.overridden_by.token))

								        if not override_is_hard_stop:

								          start_index = token.start_index

								          if token.line_number in self._start_index_offset:

								            start_index += self._start_index_offset[token.line_number]

								          if (token.type in (Type.START_PAREN, Type.START_PARAMETERS) and

								              not token_info.overridden_by):

								            hard_stops.add(start_index + 1)


								          elif token.string == 'return' and not token_info.overridden_by:

								            hard_stops.add(start_index + 7)


								          elif (token.type == Type.START_BRACKET):

								            hard_stops.add(start_index + 1)


								          elif token.IsAssignment():

								            hard_stops.add(start_index + len(token.string) + 1)


								          elif token.IsOperator('?') and not token_info.overridden_by:

								            hard_stops.add(start_index + 2)


								    return (expected | hard_stops) or set([0])


								  def _GetActualIndentation(self, token):

								    """Gets the actual indentation of the line containing the given token.


								    Args:

								      token: Any token on the line.


								    Returns:

								      The actual indentation of the line containing the given token.  Returns

								      -1 if this line should be ignored due to the presence of tabs.

								    """

								    # Move to the first token in the line

								    token = tokenutil.GetFirstTokenInSameLine(token)


								    # If it is whitespace, it is the indentation.

								    if token.type == Type.WHITESPACE:

								      if token.string.find('\t') >= 0:

								        return -1

								      else:

								        return len(token.string)

								    elif token.type == Type.PARAMETERS:

								      return len(token.string) - len(token.string.lstrip())

								    else:

								      return 0


								  def _IsFirstNonWhitespaceTokenInLine(self, token):

								    """Determines if the given token is the first non-space token on its line.


								    Args:

								      token: The token.


								    Returns:

								      True if the token is the first non-whitespace token on its line.

								    """

								    if token.type in (Type.WHITESPACE, Type.BLANK_LINE):

								      return False

								    if token.IsFirstInLine():

								      return True

								    return (token.previous and token.previous.IsFirstInLine() and

								            token.previous.type == Type.WHITESPACE)


								  def _IsLastCodeInLine(self, token):

								    """Determines if the given token is the last code token on its line.


								    Args:

								      token: The token.


								    Returns:

								      True if the token is the last code token on its line.

								    """

								    if token.type in Type.NON_CODE_TYPES:

								      return False

								    start_token = token

								    while True:

								      token = token.next

								      if not token or token.line_number != start_token.line_number:

								        return True

								      if token.type not in Type.NON_CODE_TYPES:

								        return False


								  def _Add(self, token_info):

								    """Adds the given token info to the stack.


								    Args:

								      token_info: The token information to add.

								    """

								    if self._stack and self._stack[-1].token == token_info.token:

								      # Don't add the same token twice.

								      return


								    if token_info.is_block or token_info.token.type == Type.START_PAREN:

								      index = 1

								      while index <= len(self._stack):

								        stack_info = self._stack[-index]

								        stack_token = stack_info.token


								        if stack_info.line_number == token_info.line_number:

								          # In general, tokens only override each other when they are on

								          # the same line.

								          stack_info.overridden_by = token_info

								          if (token_info.token.type == Type.START_BLOCK and

								              (stack_token.IsAssignment() or

								               stack_token.type in (Type.IDENTIFIER, Type.START_PAREN))):

								            # Multi-line blocks have lasting overrides, as in:

								            # callFn({

								            #   a: 10

								            # },

								            # 30);

								            close_block = token_info.token.metadata.context.end_token

								            stack_info.is_permanent_override = \

								                close_block.line_number != token_info.token.line_number

								        elif (token_info.token.type == Type.START_BLOCK and

								              token_info.token.metadata.context.type == Context.BLOCK and

								              (stack_token.IsAssignment() or

								               stack_token.type == Type.IDENTIFIER)):

								          # When starting a function block, the override can transcend lines.

								          # For example

								          # long.long.name = function(

								          #     a) {

								          # In this case the { and the = are on different lines.  But the

								          # override should still apply.

								          stack_info.overridden_by = token_info

								          stack_info.is_permanent_override = True

								        else:

								          break

								        index += 1


								    self._stack.append(token_info)


								  def _Pop(self):

								    """Pops the top token from the stack.


								    Returns:

								      The popped token info.

								    """

								    token_info = self._stack.pop()

								    if token_info.token.type not in (Type.START_BLOCK, Type.START_BRACKET):

								      # Remove any temporary overrides.

								      self._RemoveOverrides(token_info)

								    else:

								      # For braces and brackets, which can be object and array literals, remove

								      # overrides when the literal is closed on the same line.

								      token_check = token_info.token

								      same_type = token_check.type

								      goal_type = None

								      if token_info.token.type == Type.START_BRACKET:

								        goal_type = Type.END_BRACKET

								      else:

								        goal_type = Type.END_BLOCK

								      line_number = token_info.token.line_number

								      count = 0

								      while token_check and token_check.line_number == line_number:

								        if token_check.type == goal_type:

								          count -= 1

								          if not count:

								            self._RemoveOverrides(token_info)

								            break

								        if token_check.type == same_type:

								          count += 1

								        token_check = token_check.next

								    return token_info


								  def _PopToImpliedBlock(self):

								    """Pops the stack until an implied block token is found."""

								    while not self._Pop().token.metadata.is_implied_block:

								      pass


								  def _PopTo(self, stop_type):

								    """Pops the stack until a token of the given type is popped.


								    Args:

								      stop_type: The type of token to pop to.


								    Returns:

								      The token info of the given type that was popped.

								    """

								    last = None

								    while True:

								      last = self._Pop()

								      if last.token.type == stop_type:

								        break

								    return last


								  def _RemoveOverrides(self, token_info):

								    """Marks any token that was overridden by this token as active again.


								    Args:

								      token_info: The token that is being removed from the stack.

								    """

								    for stack_token in self._stack:

								      if (stack_token.overridden_by == token_info and

								          not stack_token.is_permanent_override):

								        stack_token.overridden_by = None


								  def _PopTransient(self):

								    """Pops all transient tokens - i.e. not blocks, literals, or parens."""

								    while self._stack and self._stack[-1].is_transient:

								      self._Pop()