Browse Source

Import Google's closure_linter

Run with 'make lint'
v0.7.4-release
Ryan Dahl 14 years ago
parent
commit
a128451004
  1. 3
      LICENSE
  2. 8
      Makefile
  3. 10
      tools/closure_linter/PKG-INFO
  4. 9
      tools/closure_linter/README
  5. 10
      tools/closure_linter/closure_linter.egg-info/PKG-INFO
  6. 41
      tools/closure_linter/closure_linter.egg-info/SOURCES.txt
  7. 1
      tools/closure_linter/closure_linter.egg-info/dependency_links.txt
  8. 4
      tools/closure_linter/closure_linter.egg-info/entry_points.txt
  9. 1
      tools/closure_linter/closure_linter.egg-info/requires.txt
  10. 1
      tools/closure_linter/closure_linter.egg-info/top_level.txt
  11. 1
      tools/closure_linter/closure_linter/__init__.py
  12. 82
      tools/closure_linter/closure_linter/checker.py
  13. 237
      tools/closure_linter/closure_linter/checkerbase.py
  14. 1
      tools/closure_linter/closure_linter/common/__init__.py
  15. 65
      tools/closure_linter/closure_linter/common/error.py
  16. 46
      tools/closure_linter/closure_linter/common/erroraccumulator.py
  17. 61
      tools/closure_linter/closure_linter/common/errorhandler.py
  18. 203
      tools/closure_linter/closure_linter/common/errorprinter.py
  19. 105
      tools/closure_linter/closure_linter/common/filetestcase.py
  20. 170
      tools/closure_linter/closure_linter/common/htmlutil.py
  21. 39
      tools/closure_linter/closure_linter/common/lintrunner.py
  22. 60
      tools/closure_linter/closure_linter/common/matcher.py
  23. 126
      tools/closure_linter/closure_linter/common/position.py
  24. 190
      tools/closure_linter/closure_linter/common/simplefileflags.py
  25. 184
      tools/closure_linter/closure_linter/common/tokenizer.py
  26. 125
      tools/closure_linter/closure_linter/common/tokens.py
  27. 752
      tools/closure_linter/closure_linter/ecmalintrules.py
  28. 521
      tools/closure_linter/closure_linter/ecmametadatapass.py
  29. 336
      tools/closure_linter/closure_linter/error_fixer.py
  30. 42
      tools/closure_linter/closure_linter/errorrules.py
  31. 131
      tools/closure_linter/closure_linter/errors.py
  32. 47
      tools/closure_linter/closure_linter/fixjsstyle.py
  33. 61
      tools/closure_linter/closure_linter/fixjsstyle_test.py
  34. 99
      tools/closure_linter/closure_linter/full_test.py
  35. 142
      tools/closure_linter/closure_linter/gjslint.py
  36. 543
      tools/closure_linter/closure_linter/indentation.py
  37. 395
      tools/closure_linter/closure_linter/javascriptlintrules.py
  38. 238
      tools/closure_linter/closure_linter/javascriptstatetracker.py
  39. 53
      tools/closure_linter/closure_linter/javascriptstatetracker_test.py
  40. 365
      tools/closure_linter/closure_linter/javascripttokenizer.py
  41. 147
      tools/closure_linter/closure_linter/javascripttokens.py
  42. 964
      tools/closure_linter/closure_linter/statetracker.py
  43. 285
      tools/closure_linter/closure_linter/tokenutil.py
  44. 2489
      tools/closure_linter/gflags.py
  45. 5
      tools/closure_linter/setup.cfg
  46. 38
      tools/closure_linter/setup.py

3
LICENSE

@ -34,6 +34,9 @@ are:
- src/platform_darwin_proctitle.cc, has code taken from the Chromium - src/platform_darwin_proctitle.cc, has code taken from the Chromium
project copyright Google Inc. and released with the BSD license. project copyright Google Inc. and released with the BSD license.
- tools/closure_linter is copyrighted by The Closure Linter Authors and
Google Inc and is released under the Apache license.
Node's license follows: Node's license follows:

8
Makefile

@ -130,5 +130,11 @@ bench-idle:
sleep 1 sleep 1
./node benchmark/idle_clients.js & ./node benchmark/idle_clients.js &
lint:
@for i in lib/*.js; do \
PYTHONPATH=tools/closure_linter/ python tools/closure_linter/closure_linter/gjslint.py \
--unix_mode --strict --nojsdoc $$i || exit 1; \
done
.PHONY: bench clean docopen docclean doc dist distclean check uninstall install all program staticlib dynamiclib test test-all website-upload
.PHONY: lint bench clean docopen docclean doc dist distclean check uninstall install all program staticlib dynamiclib test test-all website-upload

10
tools/closure_linter/PKG-INFO

@ -0,0 +1,10 @@
Metadata-Version: 1.0
Name: closure_linter
Version: 2.2.6
Summary: Closure Linter
Home-page: http://code.google.com/p/closure-linter
Author: The Closure Linter Authors
Author-email: opensource@google.com
License: Apache
Description: UNKNOWN
Platform: UNKNOWN

9
tools/closure_linter/README

@ -0,0 +1,9 @@
This repository contains the Closure Linter - a style checker for JavaScript.
To install the application, run
python ./setup.py install
After installing, you get two helper applications installed into /usr/local/bin:
gjslint.py - runs the linter and checks for errors
fixjsstyle.py - tries to fix errors automatically

10
tools/closure_linter/closure_linter.egg-info/PKG-INFO

@ -0,0 +1,10 @@
Metadata-Version: 1.0
Name: closure-linter
Version: 2.2.6
Summary: Closure Linter
Home-page: http://code.google.com/p/closure-linter
Author: The Closure Linter Authors
Author-email: opensource@google.com
License: Apache
Description: UNKNOWN
Platform: UNKNOWN

41
tools/closure_linter/closure_linter.egg-info/SOURCES.txt

@ -0,0 +1,41 @@
README
setup.py
closure_linter/__init__.py
closure_linter/checker.py
closure_linter/checkerbase.py
closure_linter/ecmalintrules.py
closure_linter/ecmametadatapass.py
closure_linter/error_fixer.py
closure_linter/errorrules.py
closure_linter/errors.py
closure_linter/fixjsstyle.py
closure_linter/fixjsstyle_test.py
closure_linter/full_test.py
closure_linter/gjslint.py
closure_linter/indentation.py
closure_linter/javascriptlintrules.py
closure_linter/javascriptstatetracker.py
closure_linter/javascriptstatetracker_test.py
closure_linter/javascripttokenizer.py
closure_linter/javascripttokens.py
closure_linter/statetracker.py
closure_linter/tokenutil.py
closure_linter.egg-info/PKG-INFO
closure_linter.egg-info/SOURCES.txt
closure_linter.egg-info/dependency_links.txt
closure_linter.egg-info/entry_points.txt
closure_linter.egg-info/requires.txt
closure_linter.egg-info/top_level.txt
closure_linter/common/__init__.py
closure_linter/common/error.py
closure_linter/common/erroraccumulator.py
closure_linter/common/errorhandler.py
closure_linter/common/errorprinter.py
closure_linter/common/filetestcase.py
closure_linter/common/htmlutil.py
closure_linter/common/lintrunner.py
closure_linter/common/matcher.py
closure_linter/common/position.py
closure_linter/common/simplefileflags.py
closure_linter/common/tokenizer.py
closure_linter/common/tokens.py

1
tools/closure_linter/closure_linter.egg-info/dependency_links.txt

@ -0,0 +1 @@

4
tools/closure_linter/closure_linter.egg-info/entry_points.txt

@ -0,0 +1,4 @@
[console_scripts]
fixjsstyle = closure_linter.fixjsstyle:main
gjslint = closure_linter.gjslint:main

1
tools/closure_linter/closure_linter.egg-info/requires.txt

@ -0,0 +1 @@
python-gflags

1
tools/closure_linter/closure_linter.egg-info/top_level.txt

@ -0,0 +1 @@
closure_linter

1
tools/closure_linter/closure_linter/__init__.py

@ -0,0 +1 @@
#!/usr/bin/env python

82
tools/closure_linter/closure_linter/checker.py

@ -0,0 +1,82 @@
#!/usr/bin/env python
#
# Copyright 2007 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Core methods for checking JS files for common style guide violations."""
__author__ = ('robbyw@google.com (Robert Walker)',
'ajp@google.com (Andy Perelson)')
import gflags as flags
from closure_linter import checkerbase
from closure_linter import ecmametadatapass
from closure_linter import errors
from closure_linter import javascriptlintrules
from closure_linter import javascriptstatetracker
from closure_linter.common import errorprinter
from closure_linter.common import lintrunner
flags.DEFINE_list('limited_doc_files', ['dummy.js', 'externs.js'],
'List of files with relaxed documentation checks. Will not '
'report errors for missing documentation, some missing '
'descriptions, or methods whose @return tags don\'t have a '
'matching return statement.')
class JavaScriptStyleChecker(checkerbase.CheckerBase):
"""Checker that applies JavaScriptLintRules."""
def __init__(self, error_handler):
"""Initialize an JavaScriptStyleChecker object.
Args:
error_handler: Error handler to pass all errors to
"""
checkerbase.CheckerBase.__init__(
self,
error_handler=error_handler,
lint_rules=javascriptlintrules.JavaScriptLintRules(),
state_tracker=javascriptstatetracker.JavaScriptStateTracker(
closurized_namespaces=flags.FLAGS.closurized_namespaces),
metadata_pass=ecmametadatapass.EcmaMetaDataPass(),
limited_doc_files=flags.FLAGS.limited_doc_files)
class GJsLintRunner(lintrunner.LintRunner):
"""Wrapper class to run GJsLint."""
def Run(self, filenames, error_handler=None):
"""Run GJsLint on the given filenames.
Args:
filenames: The filenames to check
error_handler: An optional ErrorHandler object, an ErrorPrinter is used if
none is specified.
Returns:
error_count, file_count: The number of errors and the number of files that
contain errors.
"""
if not error_handler:
error_handler = errorprinter.ErrorPrinter(errors.NEW_ERRORS)
checker = JavaScriptStyleChecker(error_handler)
# Check the list of files.
for filename in filenames:
checker.Check(filename)
return error_handler

237
tools/closure_linter/closure_linter/checkerbase.py

@ -0,0 +1,237 @@
#!/usr/bin/env python
#
# Copyright 2008 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base classes for writing checkers that operate on tokens."""
__author__ = ('robbyw@google.com (Robert Walker)',
'ajp@google.com (Andy Perelson)',
'jacobr@google.com (Jacob Richman)')
import traceback
import gflags as flags
from closure_linter import ecmametadatapass
from closure_linter import errorrules
from closure_linter import errors
from closure_linter import javascripttokenizer
from closure_linter.common import error
from closure_linter.common import htmlutil
FLAGS = flags.FLAGS
flags.DEFINE_boolean('debug_tokens', False,
'Whether to print all tokens for debugging.')
flags.DEFINE_boolean('error_trace', False,
'Whether to show error exceptions.')
class LintRulesBase(object):
"""Base class for all classes defining the lint rules for a language."""
def __init__(self):
self.__checker = None
def Initialize(self, checker, limited_doc_checks, is_html):
"""Initializes to prepare to check a file.
Args:
checker: Class to report errors to.
limited_doc_checks: Whether doc checking is relaxed for this file.
is_html: Whether the file is an HTML file with extracted contents.
"""
self.__checker = checker
self._limited_doc_checks = limited_doc_checks
self._is_html = is_html
def _HandleError(self, code, message, token, position=None,
fix_data=None):
"""Call the HandleError function for the checker we are associated with."""
if errorrules.ShouldReportError(code):
self.__checker.HandleError(code, message, token, position, fix_data)
def CheckToken(self, token, parser_state):
"""Checks a token, given the current parser_state, for warnings and errors.
Args:
token: The current token under consideration.
parser_state: Object that indicates the parser state in the page.
Raises:
TypeError: If not overridden.
"""
raise TypeError('Abstract method CheckToken not implemented')
def Finalize(self, parser_state, tokenizer_mode):
"""Perform all checks that need to occur after all lines are processed.
Args:
parser_state: State of the parser after parsing all tokens
tokenizer_mode: Mode of the tokenizer after parsing the entire page
Raises:
TypeError: If not overridden.
"""
raise TypeError('Abstract method Finalize not implemented')
class CheckerBase(object):
"""This class handles checking a LintRules object against a file."""
def __init__(self, error_handler, lint_rules, state_tracker,
limited_doc_files=None, metadata_pass=None):
"""Initialize a checker object.
Args:
error_handler: Object that handles errors.
lint_rules: LintRules object defining lint errors given a token
and state_tracker object.
state_tracker: Object that tracks the current state in the token stream.
limited_doc_files: List of filenames that are not required to have
documentation comments.
metadata_pass: Object that builds metadata about the token stream.
"""
self.__error_handler = error_handler
self.__lint_rules = lint_rules
self.__state_tracker = state_tracker
self.__metadata_pass = metadata_pass
self.__limited_doc_files = limited_doc_files
self.__tokenizer = javascripttokenizer.JavaScriptTokenizer()
self.__has_errors = False
def HandleError(self, code, message, token, position=None,
fix_data=None):
"""Prints out the given error message including a line number.
Args:
code: The error code.
message: The error to print.
token: The token where the error occurred, or None if it was a file-wide
issue.
position: The position of the error, defaults to None.
fix_data: Metadata used for fixing the error.
"""
self.__has_errors = True
self.__error_handler.HandleError(
error.Error(code, message, token, position, fix_data))
def HasErrors(self):
"""Returns true if the style checker has found any errors.
Returns:
True if the style checker has found any errors.
"""
return self.__has_errors
def Check(self, filename):
"""Checks the file, printing warnings and errors as they are found.
Args:
filename: The name of the file to check.
"""
try:
f = open(filename)
except IOError:
self.__error_handler.HandleFile(filename, None)
self.HandleError(errors.FILE_NOT_FOUND, 'File not found', None)
self.__error_handler.FinishFile()
return
try:
if filename.endswith('.html') or filename.endswith('.htm'):
self.CheckLines(filename, htmlutil.GetScriptLines(f), True)
else:
self.CheckLines(filename, f, False)
finally:
f.close()
def CheckLines(self, filename, lines_iter, is_html):
"""Checks a file, given as an iterable of lines, for warnings and errors.
Args:
filename: The name of the file to check.
lines_iter: An iterator that yields one line of the file at a time.
is_html: Whether the file being checked is an HTML file with extracted
contents.
Returns:
A boolean indicating whether the full file could be checked or if checking
failed prematurely.
"""
limited_doc_checks = False
if self.__limited_doc_files:
for limited_doc_filename in self.__limited_doc_files:
if filename.endswith(limited_doc_filename):
limited_doc_checks = True
break
state_tracker = self.__state_tracker
lint_rules = self.__lint_rules
state_tracker.Reset()
lint_rules.Initialize(self, limited_doc_checks, is_html)
token = self.__tokenizer.TokenizeFile(lines_iter)
parse_error = None
if self.__metadata_pass:
try:
self.__metadata_pass.Reset()
self.__metadata_pass.Process(token)
except ecmametadatapass.ParseError, caught_parse_error:
if FLAGS.error_trace:
traceback.print_exc()
parse_error = caught_parse_error
except Exception:
print 'Internal error in %s' % filename
traceback.print_exc()
return False
self.__error_handler.HandleFile(filename, token)
while token:
if FLAGS.debug_tokens:
print token
if parse_error and parse_error.token == token:
# Report any parse errors from above once we find the token.
message = ('Error parsing file at token "%s". Unable to '
'check the rest of file.' % token.string)
self.HandleError(errors.FILE_DOES_NOT_PARSE, message, token)
self.__error_handler.FinishFile()
return False
if FLAGS.error_trace:
state_tracker.HandleToken(token, state_tracker.GetLastNonSpaceToken())
else:
try:
state_tracker.HandleToken(token, state_tracker.GetLastNonSpaceToken())
except:
self.HandleError(errors.FILE_DOES_NOT_PARSE,
('Error parsing file at token "%s". Unable to '
'check the rest of file.' % token.string),
token)
self.__error_handler.FinishFile()
return False
# Check the token for style guide violations.
lint_rules.CheckToken(token, state_tracker)
state_tracker.HandleAfterToken(token)
# Move to the next token.
token = token.next
lint_rules.Finalize(state_tracker, self.__tokenizer.mode)
self.__error_handler.FinishFile()
return True

1
tools/closure_linter/closure_linter/common/__init__.py

@ -0,0 +1 @@
#!/usr/bin/env python

65
tools/closure_linter/closure_linter/common/error.py

@ -0,0 +1,65 @@
#!/usr/bin/env python
#
# Copyright 2007 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Error object commonly used in linters."""
__author__ = ('robbyw@google.com (Robert Walker)',
'ajp@google.com (Andy Perelson)')
class Error(object):
"""Object representing a style error."""
def __init__(self, code, message, token, position, fix_data):
"""Initialize the error object.
Args:
code: The numeric error code.
message: The error message string.
token: The tokens.Token where the error occurred.
position: The position of the error within the token.
fix_data: Data to be used in autofixing. Codes with fix_data are:
GOOG_REQUIRES_NOT_ALPHABETIZED - List of string value tokens that are
class names in goog.requires calls.
"""
self.code = code
self.message = message
self.token = token
self.position = position
if token:
self.start_index = token.start_index
else:
self.start_index = 0
self.fix_data = fix_data
if self.position:
self.start_index += self.position.start
def Compare(a, b):
"""Compare two error objects, by source code order.
Args:
a: First error object.
b: Second error object.
Returns:
A Negative/0/Positive number when a is before/the same as/after b.
"""
line_diff = a.token.line_number - b.token.line_number
if line_diff:
return line_diff
return a.start_index - b.start_index
Compare = staticmethod(Compare)

46
tools/closure_linter/closure_linter/common/erroraccumulator.py

@ -0,0 +1,46 @@
#!/usr/bin/env python
#
# Copyright 2008 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Linter error handler class that accumulates an array of errors."""
__author__ = ('robbyw@google.com (Robert Walker)',
'ajp@google.com (Andy Perelson)')
from closure_linter.common import errorhandler
class ErrorAccumulator(errorhandler.ErrorHandler):
"""Error handler object that accumulates errors in a list."""
def __init__(self):
self._errors = []
def HandleError(self, error):
"""Append the error to the list.
Args:
error: The error object
"""
self._errors.append((error.token.line_number, error.code))
def GetErrors(self):
"""Returns the accumulated errors.
Returns:
A sequence of errors.
"""
return self._errors

61
tools/closure_linter/closure_linter/common/errorhandler.py

@ -0,0 +1,61 @@
#!/usr/bin/env python
#
# Copyright 2008 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Interface for a linter error handler.
Error handlers aggregate a set of errors from multiple files and can optionally
perform some action based on the reported errors, for example, logging the error
or automatically fixing it.
"""
__author__ = ('robbyw@google.com (Robert Walker)',
'ajp@google.com (Andy Perelson)')
class ErrorHandler(object):
"""Error handler interface."""
def __init__(self):
if self.__class__ == ErrorHandler:
raise NotImplementedError('class ErrorHandler is abstract')
def HandleFile(self, filename, first_token):
"""Notifies this ErrorHandler that subsequent errors are in filename.
Args:
filename: The file being linted.
first_token: The first token of the file.
"""
def HandleError(self, error):
"""Append the error to the list.
Args:
error: The error object
"""
def FinishFile(self):
"""Finishes handling the current file.
Should be called after all errors in a file have been handled.
"""
def GetErrors(self):
"""Returns the accumulated errors.
Returns:
A sequence of errors.
"""

203
tools/closure_linter/closure_linter/common/errorprinter.py

@ -0,0 +1,203 @@
#!/usr/bin/env python
#
# Copyright 2008 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Linter error handler class that prints errors to stdout."""
__author__ = ('robbyw@google.com (Robert Walker)',
'ajp@google.com (Andy Perelson)')
from closure_linter.common import error
from closure_linter.common import errorhandler
Error = error.Error
# The error message is of the format:
# Line <number>, E:<code>: message
DEFAULT_FORMAT = 1
# The error message is of the format:
# filename:[line number]:message
UNIX_FORMAT = 2
class ErrorPrinter(errorhandler.ErrorHandler):
"""ErrorHandler that prints errors to stdout."""
def __init__(self, new_errors=None):
"""Initializes this error printer.
Args:
new_errors: A sequence of error codes representing recently introduced
errors, defaults to None.
"""
# Number of errors
self._error_count = 0
# Number of new errors
self._new_error_count = 0
# Number of files checked
self._total_file_count = 0
# Number of files with errors
self._error_file_count = 0
# Dict of file name to number of errors
self._file_table = {}
# List of errors for each file
self._file_errors = None
# Current file
self._filename = None
self._format = DEFAULT_FORMAT
if new_errors:
self._new_errors = frozenset(new_errors)
else:
self._new_errors = frozenset(set())
def SetFormat(self, format):
"""Sets the print format of errors.
Args:
format: One of {DEFAULT_FORMAT, UNIX_FORMAT}.
"""
self._format = format
def HandleFile(self, filename, first_token):
"""Notifies this ErrorPrinter that subsequent errors are in filename.
Sets the current file name, and sets a flag stating the header for this file
has not been printed yet.
Should be called by a linter before a file is style checked.
Args:
filename: The name of the file about to be checked.
first_token: The first token in the file, or None if there was an error
opening the file
"""
if self._filename and self._file_table[self._filename]:
print
self._filename = filename
self._file_table[filename] = 0
self._total_file_count += 1
self._file_errors = []
def HandleError(self, error):
"""Prints a formatted error message about the specified error.
The error message is of the format:
Error #<code>, line #<number>: message
Args:
error: The error object
"""
self._file_errors.append(error)
self._file_table[self._filename] += 1
self._error_count += 1
if self._new_errors and error.code in self._new_errors:
self._new_error_count += 1
def _PrintError(self, error):
"""Prints a formatted error message about the specified error.
Args:
error: The error object
"""
new_error = self._new_errors and error.code in self._new_errors
if self._format == DEFAULT_FORMAT:
line = ''
if error.token:
line = 'Line %d, ' % error.token.line_number
code = 'E:%04d' % error.code
if new_error:
print '%s%s: (New error) %s' % (line, code, error.message)
else:
print '%s%s: %s' % (line, code, error.message)
else:
# UNIX format
filename = self._filename
line = ''
if error.token:
line = '%d' % error.token.line_number
error_code = '%04d' % error.code
if new_error:
error_code = 'New Error ' + error_code
print '%s:%s:(%s) %s' % (filename, line, error_code, error.message)
def FinishFile(self):
"""Finishes handling the current file."""
if self._file_errors:
self._error_file_count += 1
if self._format != UNIX_FORMAT:
print '----- FILE : %s -----' % (self._filename)
self._file_errors.sort(Error.Compare)
for error in self._file_errors:
self._PrintError(error)
def HasErrors(self):
"""Whether this error printer encountered any errors.
Returns:
True if the error printer encountered any errors.
"""
return self._error_count
def HasNewErrors(self):
"""Whether this error printer encountered any new errors.
Returns:
True if the error printer encountered any new errors.
"""
return self._new_error_count
def HasOldErrors(self):
"""Whether this error printer encountered any old errors.
Returns:
True if the error printer encountered any old errors.
"""
return self._error_count - self._new_error_count
def PrintSummary(self):
"""Print a summary of the number of errors and files."""
if self.HasErrors() or self.HasNewErrors():
print ('Found %d errors, including %d new errors, in %d files '
'(%d files OK).' % (
self._error_count,
self._new_error_count,
self._error_file_count,
self._total_file_count - self._error_file_count))
else:
print '%d files checked, no errors found.' % self._total_file_count
def PrintFileSummary(self):
"""Print a detailed summary of the number of errors in each file."""
keys = self._file_table.keys()
keys.sort()
for filename in keys:
print '%s: %d' % (filename, self._file_table[filename])

105
tools/closure_linter/closure_linter/common/filetestcase.py

@ -0,0 +1,105 @@
#!/usr/bin/env python
#
# Copyright 2007 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Test case that runs a checker on a file, matching errors against annotations.
Runs the given checker on the given file, accumulating all errors. The list
of errors is then matched against those annotated in the file. Based heavily
on devtools/javascript/gpylint/full_test.py.
"""
__author__ = ('robbyw@google.com (Robert Walker)',
'ajp@google.com (Andy Perelson)')
import re
import unittest as googletest
from closure_linter.common import erroraccumulator
class AnnotatedFileTestCase(googletest.TestCase):
"""Test case to run a linter against a single file."""
# Matches an all caps letters + underscores error identifer
_MESSAGE = {'msg': '[A-Z][A-Z_]+'}
# Matches a //, followed by an optional line number with a +/-, followed by a
# list of message IDs. Used to extract expected messages from testdata files.
# TODO(robbyw): Generalize to use different commenting patterns.
_EXPECTED_RE = re.compile(r'\s*//\s*(?:(?P<line>[+-]?[0-9]+):)?'
r'\s*(?P<msgs>%(msg)s(?:,\s*%(msg)s)*)' % _MESSAGE)
def __init__(self, filename, runner, converter):
"""Create a single file lint test case.
Args:
filename: Filename to test.
runner: Object implementing the LintRunner interface that lints a file.
converter: Function taking an error string and returning an error code.
"""
googletest.TestCase.__init__(self, 'runTest')
self._filename = filename
self._messages = []
self._runner = runner
self._converter = converter
def shortDescription(self):
"""Provides a description for the test."""
return 'Run linter on %s' % self._filename
def runTest(self):
"""Runs the test."""
try:
filename = self._filename
stream = open(filename)
except IOError, ex:
raise IOError('Could not find testdata resource for %s: %s' %
(self._filename, ex))
expected = self._GetExpectedMessages(stream)
got = self._ProcessFileAndGetMessages(filename)
self.assertEqual(expected, got)
def _GetExpectedMessages(self, stream):
"""Parse a file and get a sorted list of expected messages."""
messages = []
for i, line in enumerate(stream):
match = self._EXPECTED_RE.search(line)
if match:
line = match.group('line')
msg_ids = match.group('msgs')
if line is None:
line = i + 1
elif line.startswith('+') or line.startswith('-'):
line = i + 1 + int(line)
else:
line = int(line)
for msg_id in msg_ids.split(','):
# Ignore a spurious message from the license preamble.
if msg_id != 'WITHOUT':
messages.append((line, self._converter(msg_id.strip())))
stream.seek(0)
messages.sort()
return messages
def _ProcessFileAndGetMessages(self, filename):
"""Trap gpylint's output parse it to get messages added."""
errors = erroraccumulator.ErrorAccumulator()
self._runner.Run([filename], errors)
errors = errors.GetErrors()
errors.sort()
return errors

170
tools/closure_linter/closure_linter/common/htmlutil.py

@ -0,0 +1,170 @@
#!/usr/bin/env python
#
# Copyright 2007 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for dealing with HTML."""
__author__ = ('robbyw@google.com (Robert Walker)')
import cStringIO
import formatter
import htmllib
import HTMLParser
import re
class ScriptExtractor(htmllib.HTMLParser):
"""Subclass of HTMLParser that extracts script contents from an HTML file.
Also inserts appropriate blank lines so that line numbers in the extracted
code match the line numbers in the original HTML.
"""
def __init__(self):
"""Initialize a ScriptExtractor."""
htmllib.HTMLParser.__init__(self, formatter.NullFormatter())
self._in_script = False
self._text = ''
def start_script(self, attrs):
"""Internal handler for the start of a script tag.
Args:
attrs: The attributes of the script tag, as a list of tuples.
"""
for attribute in attrs:
if attribute[0].lower() == 'src':
# Skip script tags with a src specified.
return
self._in_script = True
def end_script(self):
"""Internal handler for the end of a script tag."""
self._in_script = False
def handle_data(self, data):
"""Internal handler for character data.
Args:
data: The character data from the HTML file.
"""
if self._in_script:
# If the last line contains whitespace only, i.e. is just there to
# properly align a </script> tag, strip the whitespace.
if data.rstrip(' \t') != data.rstrip(' \t\n\r\f'):
data = data.rstrip(' \t')
self._text += data
else:
self._AppendNewlines(data)
def handle_comment(self, data):
"""Internal handler for HTML comments.
Args:
data: The text of the comment.
"""
self._AppendNewlines(data)
def _AppendNewlines(self, data):
"""Count the number of newlines in the given string and append them.
This ensures line numbers are correct for reported errors.
Args:
data: The data to count newlines in.
"""
# We append 'x' to both sides of the string to ensure that splitlines
# gives us an accurate count.
for i in xrange(len(('x' + data + 'x').splitlines()) - 1):
self._text += '\n'
def GetScriptLines(self):
"""Return the extracted script lines.
Returns:
The extracted script lines as a list of strings.
"""
return self._text.splitlines()
def GetScriptLines(f):
"""Extract script tag contents from the given HTML file.
Args:
f: The HTML file.
Returns:
Lines in the HTML file that are from script tags.
"""
extractor = ScriptExtractor()
# The HTML parser chokes on text like Array.<!string>, so we patch
# that bug by replacing the < with &lt; - escaping all text inside script
# tags would be better but it's a bit of a catch 22.
contents = f.read()
contents = re.sub(r'<([^\s\w/])',
lambda x: '&lt;%s' % x.group(1),
contents)
extractor.feed(contents)
extractor.close()
return extractor.GetScriptLines()
def StripTags(str):
"""Returns the string with HTML tags stripped.
Args:
str: An html string.
Returns:
The html string with all tags stripped. If there was a parse error, returns
the text successfully parsed so far.
"""
# Brute force approach to stripping as much HTML as possible. If there is a
# parsing error, don't strip text before parse error position, and continue
# trying from there.
final_text = ''
finished = False
while not finished:
try:
strip = _HtmlStripper()
strip.feed(str)
strip.close()
str = strip.get_output()
final_text += str
finished = True
except HTMLParser.HTMLParseError, e:
final_text += str[:e.offset]
str = str[e.offset + 1:]
return final_text
class _HtmlStripper(HTMLParser.HTMLParser):
"""Simple class to strip tags from HTML.
Does so by doing nothing when encountering tags, and appending character data
to a buffer when that is encountered.
"""
def __init__(self):
self.reset()
self.__output = cStringIO.StringIO()
def handle_data(self, d):
self.__output.write(d)
def get_output(self):
return self.__output.getvalue()

39
tools/closure_linter/closure_linter/common/lintrunner.py

@ -0,0 +1,39 @@
#!/usr/bin/env python
#
# Copyright 2008 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Interface for a lint running wrapper."""
__author__ = ('robbyw@google.com (Robert Walker)',
'ajp@google.com (Andy Perelson)')
class LintRunner(object):
"""Interface for a lint running wrapper."""
def __init__(self):
if self.__class__ == LintRunner:
raise NotImplementedError('class LintRunner is abstract')
def Run(self, filenames, error_handler):
"""Run a linter on the given filenames.
Args:
filenames: The filenames to check
error_handler: An ErrorHandler object
Returns:
The error handler, which may have been used to collect error info.
"""

60
tools/closure_linter/closure_linter/common/matcher.py

@ -0,0 +1,60 @@
#!/usr/bin/env python
#
# Copyright 2007 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Regular expression based JavaScript matcher classes."""
__author__ = ('robbyw@google.com (Robert Walker)',
'ajp@google.com (Andy Perelson)')
from closure_linter.common import position
from closure_linter.common import tokens
# Shorthand
Token = tokens.Token
Position = position.Position
class Matcher(object):
"""A token matcher.
Specifies a pattern to match, the type of token it represents, what mode the
token changes to, and what mode the token applies to.
Modes allow more advanced grammars to be incorporated, and are also necessary
to tokenize line by line. We can have different patterns apply to different
modes - i.e. looking for documentation while in comment mode.
Attributes:
regex: The regular expression representing this matcher.
type: The type of token indicated by a successful match.
result_mode: The mode to move to after a successful match.
"""
def __init__(self, regex, token_type, result_mode=None, line_start=False):
"""Create a new matcher template.
Args:
regex: The regular expression to match.
token_type: The type of token a successful match indicates.
result_mode: What mode to change to after a successful match. Defaults to
None, which means to not change the current mode.
line_start: Whether this matcher should only match string at the start
of a line.
"""
self.regex = regex
self.type = token_type
self.result_mode = result_mode
self.line_start = line_start

126
tools/closure_linter/closure_linter/common/position.py

@ -0,0 +1,126 @@
#!/usr/bin/env python
#
# Copyright 2008 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Classes to represent positions within strings."""
__author__ = ('robbyw@google.com (Robert Walker)',
'ajp@google.com (Andy Perelson)')
class Position(object):
"""Object representing a segment of a string.
Attributes:
start: The index in to the string where the segment starts.
length: The length of the string segment.
"""
def __init__(self, start, length):
"""Initialize the position object.
Args:
start: The start index.
length: The number of characters to include.
"""
self.start = start
self.length = length
def Get(self, string):
"""Returns this range of the given string.
Args:
string: The string to slice.
Returns:
The string within the range specified by this object.
"""
return string[self.start:self.start + self.length]
def Set(self, target, source):
"""Sets this range within the target string to the source string.
Args:
target: The target string.
source: The source string.
Returns:
The resulting string
"""
return target[:self.start] + source + target[self.start + self.length:]
def AtEnd(string):
"""Create a Position representing the end of the given string.
Args:
string: The string to represent the end of.
Returns:
The created Position object.
"""
return Position(len(string), 0)
AtEnd = staticmethod(AtEnd)
def IsAtEnd(self, string):
"""Returns whether this position is at the end of the given string.
Args:
string: The string to test for the end of.
Returns:
Whether this position is at the end of the given string.
"""
return self.start == len(string) and self.length == 0
def AtBeginning():
"""Create a Position representing the beginning of any string.
Returns:
The created Position object.
"""
return Position(0, 0)
AtBeginning = staticmethod(AtBeginning)
def IsAtBeginning(self):
"""Returns whether this position is at the beginning of any string.
Returns:
Whether this position is at the beginning of any string.
"""
return self.start == 0 and self.length == 0
def All(string):
"""Create a Position representing the entire string.
Args:
string: The string to represent the entirety of.
Returns:
The created Position object.
"""
return Position(0, len(string))
All = staticmethod(All)
def Index(index):
"""Returns a Position object for the specified index.
Args:
index: The index to select, inclusively.
Returns:
The created Position object.
"""
return Position(index, 1)
Index = staticmethod(Index)

190
tools/closure_linter/closure_linter/common/simplefileflags.py

@ -0,0 +1,190 @@
#!/usr/bin/env python
#
# Copyright 2008 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Determines the list of files to be checked from command line arguments."""
__author__ = ('robbyw@google.com (Robert Walker)',
'ajp@google.com (Andy Perelson)')
import glob
import os
import re
import gflags as flags
FLAGS = flags.FLAGS
flags.DEFINE_multistring(
'recurse',
None,
'Recurse in to the subdirectories of the given path',
short_name='r')
flags.DEFINE_list(
'exclude_directories',
('_demos'),
'Exclude the specified directories (only applicable along with -r or '
'--presubmit)',
short_name='e')
flags.DEFINE_list(
'exclude_files',
('deps.js'),
'Exclude the specified files',
short_name='x')
def MatchesSuffixes(filename, suffixes):
"""Returns whether the given filename matches one of the given suffixes.
Args:
filename: Filename to check.
suffixes: Sequence of suffixes to check.
Returns:
Whether the given filename matches one of the given suffixes.
"""
suffix = filename[filename.rfind('.'):]
return suffix in suffixes
def _GetUserSpecifiedFiles(argv, suffixes):
"""Returns files to be linted, specified directly on the command line.
Can handle the '*' wildcard in filenames, but no other wildcards.
Args:
argv: Sequence of command line arguments. The second and following arguments
are assumed to be files that should be linted.
suffixes: Expected suffixes for the file type being checked.
Returns:
A sequence of files to be linted.
"""
files = argv[1:] or []
all_files = []
lint_files = []
# Perform any necessary globs.
for f in files:
if f.find('*') != -1:
for result in glob.glob(f):
all_files.append(result)
else:
all_files.append(f)
for f in all_files:
if MatchesSuffixes(f, suffixes):
lint_files.append(f)
return lint_files
def _GetRecursiveFiles(suffixes):
"""Returns files to be checked specified by the --recurse flag.
Args:
suffixes: Expected suffixes for the file type being checked.
Returns:
A list of files to be checked.
"""
lint_files = []
# Perform any request recursion
if FLAGS.recurse:
for start in FLAGS.recurse:
for root, subdirs, files in os.walk(start):
for f in files:
if MatchesSuffixes(f, suffixes):
lint_files.append(os.path.join(root, f))
return lint_files
def GetAllSpecifiedFiles(argv, suffixes):
"""Returns all files specified by the user on the commandline.
Args:
argv: Sequence of command line arguments. The second and following arguments
are assumed to be files that should be linted.
suffixes: Expected suffixes for the file type
Returns:
A list of all files specified directly or indirectly (via flags) on the
command line by the user.
"""
files = _GetUserSpecifiedFiles(argv, suffixes)
if FLAGS.recurse:
files += _GetRecursiveFiles(suffixes)
return FilterFiles(files)
def FilterFiles(files):
"""Filters the list of files to be linted be removing any excluded files.
Filters out files excluded using --exclude_files and --exclude_directories.
Args:
files: Sequence of files that needs filtering.
Returns:
Filtered list of files to be linted.
"""
num_files = len(files)
ignore_dirs_regexs = []
for ignore in FLAGS.exclude_directories:
ignore_dirs_regexs.append(re.compile(r'(^|[\\/])%s[\\/]' % ignore))
result_files = []
for f in files:
add_file = True
for exclude in FLAGS.exclude_files:
if f.endswith('/' + exclude) or f == exclude:
add_file = False
break
for ignore in ignore_dirs_regexs:
if ignore.search(f):
# Break out of ignore loop so we don't add to
# filtered files.
add_file = False
break
if add_file:
# Convert everything to absolute paths so we can easily remove duplicates
# using a set.
result_files.append(os.path.abspath(f))
skipped = num_files - len(result_files)
if skipped:
print 'Skipping %d file(s).' % skipped
return set(result_files)
def GetFileList(argv, file_type, suffixes):
"""Parse the flags and return the list of files to check.
Args:
argv: Sequence of command line arguments.
suffixes: Sequence of acceptable suffixes for the file type.
Returns:
The list of files to check.
"""
return sorted(GetAllSpecifiedFiles(argv, suffixes))
def IsEmptyArgumentList(argv):
return not (len(argv[1:]) or FLAGS.recurse)

184
tools/closure_linter/closure_linter/common/tokenizer.py

@ -0,0 +1,184 @@
#!/usr/bin/env python
#
# Copyright 2007 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Regular expression based lexer."""
__author__ = ('robbyw@google.com (Robert Walker)',
'ajp@google.com (Andy Perelson)')
from closure_linter.common import tokens
# Shorthand
Type = tokens.TokenType
class Tokenizer(object):
"""General purpose tokenizer.
Attributes:
mode: The latest mode of the tokenizer. This allows patterns to distinguish
if they are mid-comment, mid-parameter list, etc.
matchers: Dictionary of modes to sequences of matchers that define the
patterns to check at any given time.
default_types: Dictionary of modes to types, defining what type to give
non-matched text when in the given mode. Defaults to Type.NORMAL.
"""
def __init__(self, starting_mode, matchers, default_types):
"""Initialize the tokenizer.
Args:
starting_mode: Mode to start in.
matchers: Dictionary of modes to sequences of matchers that defines the
patterns to check at any given time.
default_types: Dictionary of modes to types, defining what type to give
non-matched text when in the given mode. Defaults to Type.NORMAL.
"""
self.__starting_mode = starting_mode
self.matchers = matchers
self.default_types = default_types
def TokenizeFile(self, file):
"""Tokenizes the given file.
Args:
file: An iterable that yields one line of the file at a time.
Returns:
The first token in the file
"""
# The current mode.
self.mode = self.__starting_mode
# The first token in the stream.
self.__first_token = None
# The last token added to the token stream.
self.__last_token = None
# The current line number.
self.__line_number = 0
for line in file:
self.__line_number += 1
self.__TokenizeLine(line)
return self.__first_token
def _CreateToken(self, string, token_type, line, line_number, values=None):
"""Creates a new Token object (or subclass).
Args:
string: The string of input the token represents.
token_type: The type of token.
line: The text of the line this token is in.
line_number: The line number of the token.
values: A dict of named values within the token. For instance, a
function declaration may have a value called 'name' which captures the
name of the function.
Returns:
The newly created Token object.
"""
return tokens.Token(string, token_type, line, line_number, values)
def __TokenizeLine(self, line):
"""Tokenizes the given line.
Args:
line: The contents of the line.
"""
string = line.rstrip('\n\r\f')
line_number = self.__line_number
self.__start_index = 0
if not string:
self.__AddToken(self._CreateToken('', Type.BLANK_LINE, line, line_number))
return
normal_token = ''
index = 0
while index < len(string):
for matcher in self.matchers[self.mode]:
if matcher.line_start and index > 0:
continue
match = matcher.regex.match(string, index)
if match:
if normal_token:
self.__AddToken(
self.__CreateNormalToken(self.mode, normal_token, line,
line_number))
normal_token = ''
# Add the match.
self.__AddToken(self._CreateToken(match.group(), matcher.type, line,
line_number, match.groupdict()))
# Change the mode to the correct one for after this match.
self.mode = matcher.result_mode or self.mode
# Shorten the string to be matched.
index = match.end()
break
else:
# If the for loop finishes naturally (i.e. no matches) we just add the
# first character to the string of consecutive non match characters.
# These will constitute a NORMAL token.
if string:
normal_token += string[index:index + 1]
index += 1
if normal_token:
self.__AddToken(
self.__CreateNormalToken(self.mode, normal_token, line, line_number))
def __CreateNormalToken(self, mode, string, line, line_number):
"""Creates a normal token.
Args:
mode: The current mode.
string: The string to tokenize.
line: The line of text.
line_number: The line number within the file.
Returns:
A Token object, of the default type for the current mode.
"""
type = Type.NORMAL
if mode in self.default_types:
type = self.default_types[mode]
return self._CreateToken(string, type, line, line_number)
def __AddToken(self, token):
"""Add the given token to the token stream.
Args:
token: The token to add.
"""
# Store the first token, or point the previous token to this one.
if not self.__first_token:
self.__first_token = token
else:
self.__last_token.next = token
# Establish the doubly linked list
token.previous = self.__last_token
self.__last_token = token
# Compute the character indices
token.start_index = self.__start_index
self.__start_index += token.length

125
tools/closure_linter/closure_linter/common/tokens.py

@ -0,0 +1,125 @@
#!/usr/bin/env python
#
# Copyright 2008 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Classes to represent tokens and positions within them."""
__author__ = ('robbyw@google.com (Robert Walker)',
'ajp@google.com (Andy Perelson)')
class TokenType(object):
"""Token types common to all languages."""
NORMAL = 'normal'
WHITESPACE = 'whitespace'
BLANK_LINE = 'blank line'
class Token(object):
"""Token class for intelligent text splitting.
The token class represents a string of characters and an identifying type.
Attributes:
type: The type of token.
string: The characters the token comprises.
length: The length of the token.
line: The text of the line the token is found in.
line_number: The number of the line the token is found in.
values: Dictionary of values returned from the tokens regex match.
previous: The token before this one.
next: The token after this one.
start_index: The character index in the line where this token starts.
attached_object: Object containing more information about this token.
metadata: Object containing metadata about this token. Must be added by
a separate metadata pass.
"""
def __init__(self, string, token_type, line, line_number, values=None):
"""Creates a new Token object.
Args:
string: The string of input the token contains.
token_type: The type of token.
line: The text of the line this token is in.
line_number: The line number of the token.
values: A dict of named values within the token. For instance, a
function declaration may have a value called 'name' which captures the
name of the function.
"""
self.type = token_type
self.string = string
self.length = len(string)
self.line = line
self.line_number = line_number
self.values = values
# These parts can only be computed when the file is fully tokenized
self.previous = None
self.next = None
self.start_index = None
# This part is set in statetracker.py
# TODO(robbyw): Wrap this in to metadata
self.attached_object = None
# This part is set in *metadatapass.py
self.metadata = None
def IsFirstInLine(self):
"""Tests if this token is the first token in its line.
Returns:
Whether the token is the first token in its line.
"""
return not self.previous or self.previous.line_number != self.line_number
def IsLastInLine(self):
"""Tests if this token is the last token in its line.
Returns:
Whether the token is the last token in its line.
"""
return not self.next or self.next.line_number != self.line_number
def IsType(self, token_type):
"""Tests if this token is of the given type.
Args:
token_type: The type to test for.
Returns:
True if the type of this token matches the type passed in.
"""
return self.type == token_type
def IsAnyType(self, *token_types):
"""Tests if this token is any of the given types.
Args:
token_types: The types to check. Also accepts a single array.
Returns:
True if the type of this token is any of the types passed in.
"""
if not isinstance(token_types[0], basestring):
return self.type in token_types[0]
else:
return self.type in token_types
def __repr__(self):
return '<Token: %s, "%s", %r, %d, %r>' % (self.type, self.string,
self.values, self.line_number,
self.metadata)

752
tools/closure_linter/closure_linter/ecmalintrules.py

@ -0,0 +1,752 @@
#!/usr/bin/env python
#
# Copyright 2008 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Core methods for checking EcmaScript files for common style guide violations.
"""
__author__ = ('robbyw@google.com (Robert Walker)',
'ajp@google.com (Andy Perelson)',
'jacobr@google.com (Jacob Richman)')
import re
from closure_linter import checkerbase
from closure_linter import ecmametadatapass
from closure_linter import errors
from closure_linter import indentation
from closure_linter import javascripttokens
from closure_linter import javascripttokenizer
from closure_linter import statetracker
from closure_linter import tokenutil
from closure_linter.common import error
from closure_linter.common import htmlutil
from closure_linter.common import lintrunner
from closure_linter.common import position
from closure_linter.common import tokens
import gflags as flags
FLAGS = flags.FLAGS
flags.DEFINE_boolean('strict', False,
'Whether to validate against the stricter Closure style.')
flags.DEFINE_list('custom_jsdoc_tags', '', 'Extra jsdoc tags to allow')
# TODO(robbyw): Check for extra parens on return statements
# TODO(robbyw): Check for 0px in strings
# TODO(robbyw): Ensure inline jsDoc is in {}
# TODO(robbyw): Check for valid JS types in parameter docs
# Shorthand
Context = ecmametadatapass.EcmaContext
Error = error.Error
Modes = javascripttokenizer.JavaScriptModes
Position = position.Position
Type = javascripttokens.JavaScriptTokenType
class EcmaScriptLintRules(checkerbase.LintRulesBase):
"""EmcaScript lint style checking rules.
Can be used to find common style errors in JavaScript, ActionScript and other
Ecma like scripting languages. Style checkers for Ecma scripting languages
should inherit from this style checker.
Please do not add any state to EcmaScriptLintRules or to any subclasses.
All state should be added to the StateTracker subclass used for a particular
language.
"""
# Static constants.
MAX_LINE_LENGTH = 80
MISSING_PARAMETER_SPACE = re.compile(r',\S')
EXTRA_SPACE = re.compile('(\(\s|\s\))')
ENDS_WITH_SPACE = re.compile('\s$')
ILLEGAL_TAB = re.compile(r'\t')
# Regex used to split up complex types to check for invalid use of ? and |.
TYPE_SPLIT = re.compile(r'[,<>()]')
# Regex for form of author lines after the @author tag.
AUTHOR_SPEC = re.compile(r'(\s*)[^\s]+@[^(\s]+(\s*)\(.+\)')
# Acceptable tokens to remove for line too long testing.
LONG_LINE_IGNORE = frozenset(['*', '//', '@see'] +
['@%s' % tag for tag in statetracker.DocFlag.HAS_TYPE])
def __init__(self):
"""Initialize this lint rule object."""
checkerbase.LintRulesBase.__init__(self)
def Initialize(self, checker, limited_doc_checks, is_html):
"""Initialize this lint rule object before parsing a new file."""
checkerbase.LintRulesBase.Initialize(self, checker, limited_doc_checks,
is_html)
self._indentation = indentation.IndentationRules()
def HandleMissingParameterDoc(self, token, param_name):
"""Handle errors associated with a parameter missing a @param tag."""
raise TypeError('Abstract method HandleMissingParameterDoc not implemented')
def _CheckLineLength(self, last_token, state):
"""Checks whether the line is too long.
Args:
last_token: The last token in the line.
"""
# Start from the last token so that we have the flag object attached to
# and DOC_FLAG tokens.
line_number = last_token.line_number
token = last_token
# Build a representation of the string where spaces indicate potential
# line-break locations.
line = []
while token and token.line_number == line_number:
if state.IsTypeToken(token):
line.insert(0, 'x' * len(token.string))
elif token.type in (Type.IDENTIFIER, Type.NORMAL):
# Dots are acceptable places to wrap.
line.insert(0, token.string.replace('.', ' '))
else:
line.insert(0, token.string)
token = token.previous
line = ''.join(line)
line = line.rstrip('\n\r\f')
try:
length = len(unicode(line, 'utf-8'))
except:
# Unknown encoding. The line length may be wrong, as was originally the
# case for utf-8 (see bug 1735846). For now just accept the default
# length, but as we find problems we can either add test for other
# possible encodings or return without an error to protect against
# false positives at the cost of more false negatives.
length = len(line)
if length > self.MAX_LINE_LENGTH:
# If the line matches one of the exceptions, then it's ok.
for long_line_regexp in self.GetLongLineExceptions():
if long_line_regexp.match(last_token.line):
return
# If the line consists of only one "word", or multiple words but all
# except one are ignoreable, then it's ok.
parts = set(line.split())
# We allow two "words" (type and name) when the line contains @param
max = 1
if '@param' in parts:
max = 2
# Custom tags like @requires may have url like descriptions, so ignore
# the tag, similar to how we handle @see.
custom_tags = set(['@%s' % f for f in FLAGS.custom_jsdoc_tags])
if (len(parts.difference(self.LONG_LINE_IGNORE | custom_tags)) > max):
self._HandleError(errors.LINE_TOO_LONG,
'Line too long (%d characters).' % len(line), last_token)
def _CheckJsDocType(self, token):
"""Checks the given type for style errors.
Args:
token: The DOC_FLAG token for the flag whose type to check.
"""
flag = token.attached_object
type = flag.type
if type and type is not None and not type.isspace():
pieces = self.TYPE_SPLIT.split(type)
if len(pieces) == 1 and type.count('|') == 1 and (
type.endswith('|null') or type.startswith('null|')):
self._HandleError(errors.JSDOC_PREFER_QUESTION_TO_PIPE_NULL,
'Prefer "?Type" to "Type|null": "%s"' % type, token)
for p in pieces:
if p.count('|') and p.count('?'):
# TODO(robbyw): We should do actual parsing of JsDoc types. As is,
# this won't report an error for {number|Array.<string>?}, etc.
self._HandleError(errors.JSDOC_ILLEGAL_QUESTION_WITH_PIPE,
'JsDoc types cannot contain both "?" and "|": "%s"' % p, token)
if FLAGS.strict and (flag.type_start_token.type != Type.DOC_START_BRACE or
flag.type_end_token.type != Type.DOC_END_BRACE):
self._HandleError(errors.MISSING_BRACES_AROUND_TYPE,
'Type must always be surrounded by curly braces.', token)
def _CheckForMissingSpaceBeforeToken(self, token):
"""Checks for a missing space at the beginning of a token.
Reports a MISSING_SPACE error if the token does not begin with a space or
the previous token doesn't end with a space and the previous token is on the
same line as the token.
Args:
token: The token being checked
"""
# TODO(user): Check if too many spaces?
if (len(token.string) == len(token.string.lstrip()) and
token.previous and token.line_number == token.previous.line_number and
len(token.previous.string) - len(token.previous.string.rstrip()) == 0):
self._HandleError(
errors.MISSING_SPACE,
'Missing space before "%s"' % token.string,
token,
Position.AtBeginning())
def _ExpectSpaceBeforeOperator(self, token):
"""Returns whether a space should appear before the given operator token.
Args:
token: The operator token.
Returns:
Whether there should be a space before the token.
"""
if token.string == ',' or token.metadata.IsUnaryPostOperator():
return False
# Colons should appear in labels, object literals, the case of a switch
# statement, and ternary operator. Only want a space in the case of the
# ternary operator.
if (token.string == ':' and
token.metadata.context.type in (Context.LITERAL_ELEMENT,
Context.CASE_BLOCK,
Context.STATEMENT)):
return False
if token.metadata.IsUnaryOperator() and token.IsFirstInLine():
return False
return True
def CheckToken(self, token, state):
"""Checks a token, given the current parser_state, for warnings and errors.
Args:
token: The current token under consideration
state: parser_state object that indicates the current state in the page
"""
# Store some convenience variables
first_in_line = token.IsFirstInLine()
last_in_line = token.IsLastInLine()
last_non_space_token = state.GetLastNonSpaceToken()
type = token.type
# Process the line change.
if not self._is_html and FLAGS.strict:
# TODO(robbyw): Support checking indentation in HTML files.
indentation_errors = self._indentation.CheckToken(token, state)
for indentation_error in indentation_errors:
self._HandleError(*indentation_error)
if last_in_line:
self._CheckLineLength(token, state)
if type == Type.PARAMETERS:
# Find missing spaces in parameter lists.
if self.MISSING_PARAMETER_SPACE.search(token.string):
self._HandleError(errors.MISSING_SPACE, 'Missing space after ","',
token)
# Find extra spaces at the beginning of parameter lists. Make sure
# we aren't at the beginning of a continuing multi-line list.
if not first_in_line:
space_count = len(token.string) - len(token.string.lstrip())
if space_count:
self._HandleError(errors.EXTRA_SPACE, 'Extra space after "("',
token, Position(0, space_count))
elif (type == Type.START_BLOCK and
token.metadata.context.type == Context.BLOCK):
self._CheckForMissingSpaceBeforeToken(token)
elif type == Type.END_BLOCK:
# This check is for object literal end block tokens, but there is no need
# to test that condition since a comma at the end of any other kind of
# block is undoubtedly a parse error.
last_code = token.metadata.last_code
if last_code.IsOperator(','):
self._HandleError(errors.COMMA_AT_END_OF_LITERAL,
'Illegal comma at end of object literal', last_code,
Position.All(last_code.string))
if state.InFunction() and state.IsFunctionClose():
is_immediately_called = (token.next and
token.next.type == Type.START_PAREN)
if state.InTopLevelFunction():
# When the function was top-level and not immediately called, check
# that it's terminated by a semi-colon.
if state.InAssignedFunction():
if not is_immediately_called and (last_in_line or
not token.next.type == Type.SEMICOLON):
self._HandleError(errors.MISSING_SEMICOLON_AFTER_FUNCTION,
'Missing semicolon after function assigned to a variable',
token, Position.AtEnd(token.string))
else:
if not last_in_line and token.next.type == Type.SEMICOLON:
self._HandleError(errors.ILLEGAL_SEMICOLON_AFTER_FUNCTION,
'Illegal semicolon after function declaration',
token.next, Position.All(token.next.string))
if (state.InInterfaceMethod() and last_code.type != Type.START_BLOCK):
self._HandleError(errors.INTERFACE_METHOD_CANNOT_HAVE_CODE,
'Interface methods cannot contain code', last_code)
elif (state.IsBlockClose() and
token.next and token.next.type == Type.SEMICOLON):
self._HandleError(errors.REDUNDANT_SEMICOLON,
'No semicolon is required to end a code block',
token.next, Position.All(token.next.string))
elif type == Type.SEMICOLON:
if token.previous and token.previous.type == Type.WHITESPACE:
self._HandleError(errors.EXTRA_SPACE, 'Extra space before ";"',
token.previous, Position.All(token.previous.string))
if token.next and token.next.line_number == token.line_number:
if token.metadata.context.type != Context.FOR_GROUP_BLOCK:
# TODO(robbyw): Error about no multi-statement lines.
pass
elif token.next.type not in (
Type.WHITESPACE, Type.SEMICOLON, Type.END_PAREN):
self._HandleError(errors.MISSING_SPACE,
'Missing space after ";" in for statement',
token.next,
Position.AtBeginning())
last_code = token.metadata.last_code
if last_code and last_code.type == Type.SEMICOLON:
# Allow a single double semi colon in for loops for cases like:
# for (;;) { }.
# NOTE(user): This is not a perfect check, and will not throw an error
# for cases like: for (var i = 0;; i < n; i++) {}, but then your code
# probably won't work either.
for_token = tokenutil.CustomSearch(last_code,
lambda token: token.type == Type.KEYWORD and token.string == 'for',
end_func=lambda token: token.type == Type.SEMICOLON,
distance=None,
reverse=True)
if not for_token:
self._HandleError(errors.REDUNDANT_SEMICOLON, 'Redundant semicolon',
token, Position.All(token.string))
elif type == Type.START_PAREN:
if token.previous and token.previous.type == Type.KEYWORD:
self._HandleError(errors.MISSING_SPACE, 'Missing space before "("',
token, Position.AtBeginning())
elif token.previous and token.previous.type == Type.WHITESPACE:
before_space = token.previous.previous
if (before_space and before_space.line_number == token.line_number and
before_space.type == Type.IDENTIFIER):
self._HandleError(errors.EXTRA_SPACE, 'Extra space before "("',
token.previous, Position.All(token.previous.string))
elif type == Type.START_BRACKET:
if (not first_in_line and token.previous.type == Type.WHITESPACE and
last_non_space_token and
last_non_space_token.type in Type.EXPRESSION_ENDER_TYPES):
self._HandleError(errors.EXTRA_SPACE, 'Extra space before "["',
token.previous, Position.All(token.previous.string))
# If the [ token is the first token in a line we shouldn't complain
# about a missing space before [. This is because some Ecma script
# languages allow syntax like:
# [Annotation]
# class MyClass {...}
# So we don't want to blindly warn about missing spaces before [.
# In the the future, when rules for computing exactly how many spaces
# lines should be indented are added, then we can return errors for
# [ tokens that are improperly indented.
# For example:
# var someVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongVariableName =
# [a,b,c];
# should trigger a proper indentation warning message as [ is not indented
# by four spaces.
elif (not first_in_line and token.previous and
not token.previous.type in (
[Type.WHITESPACE, Type.START_PAREN, Type.START_BRACKET] +
Type.EXPRESSION_ENDER_TYPES)):
self._HandleError(errors.MISSING_SPACE, 'Missing space before "["',
token, Position.AtBeginning())
elif type in (Type.END_PAREN, Type.END_BRACKET):
# Ensure there is no space before closing parentheses, except when
# it's in a for statement with an omitted section, or when it's at the
# beginning of a line.
if (token.previous and token.previous.type == Type.WHITESPACE and
not token.previous.IsFirstInLine() and
not (last_non_space_token and last_non_space_token.line_number ==
token.line_number and
last_non_space_token.type == Type.SEMICOLON)):
self._HandleError(errors.EXTRA_SPACE, 'Extra space before "%s"' %
token.string, token.previous, Position.All(token.previous.string))
if token.type == Type.END_BRACKET:
last_code = token.metadata.last_code
if last_code.IsOperator(','):
self._HandleError(errors.COMMA_AT_END_OF_LITERAL,
'Illegal comma at end of array literal', last_code,
Position.All(last_code.string))
elif type == Type.WHITESPACE:
if self.ILLEGAL_TAB.search(token.string):
if token.IsFirstInLine():
self._HandleError(errors.ILLEGAL_TAB,
'Illegal tab in whitespace before "%s"' % token.next.string,
token, Position.All(token.string))
else:
self._HandleError(errors.ILLEGAL_TAB,
'Illegal tab in whitespace after "%s"' % token.previous.string,
token, Position.All(token.string))
# Check whitespace length if it's not the first token of the line and
# if it's not immediately before a comment.
if last_in_line:
# Check for extra whitespace at the end of a line.
self._HandleError(errors.EXTRA_SPACE, 'Extra space at end of line',
token, Position.All(token.string))
elif not first_in_line and not token.next.IsComment():
if token.length > 1:
self._HandleError(errors.EXTRA_SPACE, 'Extra space after "%s"' %
token.previous.string, token,
Position(1, len(token.string) - 1))
elif type == Type.OPERATOR:
last_code = token.metadata.last_code
if not self._ExpectSpaceBeforeOperator(token):
if (token.previous and token.previous.type == Type.WHITESPACE and
last_code and last_code.type in (Type.NORMAL, Type.IDENTIFIER)):
self._HandleError(errors.EXTRA_SPACE,
'Extra space before "%s"' % token.string, token.previous,
Position.All(token.previous.string))
elif (token.previous and
not token.previous.IsComment() and
token.previous.type in Type.EXPRESSION_ENDER_TYPES):
self._HandleError(errors.MISSING_SPACE,
'Missing space before "%s"' % token.string, token,
Position.AtBeginning())
# Check that binary operators are not used to start lines.
if ((not last_code or last_code.line_number != token.line_number) and
not token.metadata.IsUnaryOperator()):
self._HandleError(errors.LINE_STARTS_WITH_OPERATOR,
'Binary operator should go on previous line "%s"' % token.string,
token)
elif type == Type.DOC_FLAG:
flag = token.attached_object
if flag.flag_type == 'bug':
# TODO(robbyw): Check for exactly 1 space on the left.
string = token.next.string.lstrip()
string = string.split(' ', 1)[0]
if not string.isdigit():
self._HandleError(errors.NO_BUG_NUMBER_AFTER_BUG_TAG,
'@bug should be followed by a bug number', token)
elif flag.flag_type == 'suppress':
if flag.type is None:
# A syntactically invalid suppress tag will get tokenized as a normal
# flag, indicating an error.
self._HandleError(errors.INCORRECT_SUPPRESS_SYNTAX,
'Invalid suppress syntax: should be @suppress {errortype}. '
'Spaces matter.', token)
elif flag.type not in state.GetDocFlag().SUPPRESS_TYPES:
self._HandleError(errors.INVALID_SUPPRESS_TYPE,
'Invalid suppression type: %s' % flag.type,
token)
elif FLAGS.strict and flag.flag_type == 'author':
# TODO(user): In non strict mode check the author tag for as much as
# it exists, though the full form checked below isn't required.
string = token.next.string
result = self.AUTHOR_SPEC.match(string)
if not result:
self._HandleError(errors.INVALID_AUTHOR_TAG_DESCRIPTION,
'Author tag line should be of the form: '
'@author foo@somewhere.com (Your Name)',
token.next)
else:
# Check spacing between email address and name. Do this before
# checking earlier spacing so positions are easier to calculate for
# autofixing.
num_spaces = len(result.group(2))
if num_spaces < 1:
self._HandleError(errors.MISSING_SPACE,
'Missing space after email address',
token.next, Position(result.start(2), 0))
elif num_spaces > 1:
self._HandleError(errors.EXTRA_SPACE,
'Extra space after email address',
token.next,
Position(result.start(2) + 1, num_spaces - 1))
# Check for extra spaces before email address. Can't be too few, if
# not at least one we wouldn't match @author tag.
num_spaces = len(result.group(1))
if num_spaces > 1:
self._HandleError(errors.EXTRA_SPACE,
'Extra space before email address',
token.next, Position(1, num_spaces - 1))
elif (flag.flag_type in state.GetDocFlag().HAS_DESCRIPTION and
not self._limited_doc_checks):
if flag.flag_type == 'param':
if flag.name is None:
self._HandleError(errors.MISSING_JSDOC_PARAM_NAME,
'Missing name in @param tag', token)
if not flag.description or flag.description is None:
flag_name = token.type
if 'name' in token.values:
flag_name = '@' + token.values['name']
self._HandleError(errors.MISSING_JSDOC_TAG_DESCRIPTION,
'Missing description in %s tag' % flag_name, token)
else:
self._CheckForMissingSpaceBeforeToken(flag.description_start_token)
# We want punctuation to be inside of any tags ending a description,
# so strip tags before checking description. See bug 1127192. Note
# that depending on how lines break, the real description end token
# may consist only of stripped html and the effective end token can
# be different.
end_token = flag.description_end_token
end_string = htmlutil.StripTags(end_token.string).strip()
while (end_string == '' and not
end_token.type in Type.FLAG_ENDING_TYPES):
end_token = end_token.previous
if end_token.type in Type.FLAG_DESCRIPTION_TYPES:
end_string = htmlutil.StripTags(end_token.string).rstrip()
if not (end_string.endswith('.') or end_string.endswith('?') or
end_string.endswith('!')):
# Find the position for the missing punctuation, inside of any html
# tags.
desc_str = end_token.string.rstrip()
while desc_str.endswith('>'):
start_tag_index = desc_str.rfind('<')
if start_tag_index < 0:
break
desc_str = desc_str[:start_tag_index].rstrip()
end_position = Position(len(desc_str), 0)
self._HandleError(
errors.JSDOC_TAG_DESCRIPTION_ENDS_WITH_INVALID_CHARACTER,
('%s descriptions must end with valid punctuation such as a '
'period.' % token.string),
end_token, end_position)
if flag.flag_type in state.GetDocFlag().HAS_TYPE:
if flag.type_start_token is not None:
self._CheckForMissingSpaceBeforeToken(
token.attached_object.type_start_token)
if flag.type and flag.type != '' and not flag.type.isspace():
self._CheckJsDocType(token)
if type in (Type.DOC_FLAG, Type.DOC_INLINE_FLAG):
if (token.values['name'] not in state.GetDocFlag().LEGAL_DOC and
token.values['name'] not in FLAGS.custom_jsdoc_tags):
self._HandleError(errors.INVALID_JSDOC_TAG,
'Invalid JsDoc tag: %s' % token.values['name'], token)
if (FLAGS.strict and token.values['name'] == 'inheritDoc' and
type == Type.DOC_INLINE_FLAG):
self._HandleError(errors.UNNECESSARY_BRACES_AROUND_INHERIT_DOC,
'Unnecessary braces around @inheritDoc',
token)
elif type == Type.SIMPLE_LVALUE:
identifier = token.values['identifier']
if ((not state.InFunction() or state.InConstructor()) and
not state.InParentheses() and not state.InObjectLiteralDescendant()):
jsdoc = state.GetDocComment()
if not state.HasDocComment(identifier):
# Only test for documentation on identifiers with .s in them to
# avoid checking things like simple variables. We don't require
# documenting assignments to .prototype itself (bug 1880803).
if (not state.InConstructor() and
identifier.find('.') != -1 and not
identifier.endswith('.prototype') and not
self._limited_doc_checks):
comment = state.GetLastComment()
if not (comment and comment.lower().count('jsdoc inherited')):
self._HandleError(errors.MISSING_MEMBER_DOCUMENTATION,
"No docs found for member '%s'" % identifier,
token);
elif jsdoc and (not state.InConstructor() or
identifier.startswith('this.')):
# We are at the top level and the function/member is documented.
if identifier.endswith('_') and not identifier.endswith('__'):
if jsdoc.HasFlag('override'):
self._HandleError(errors.INVALID_OVERRIDE_PRIVATE,
'%s should not override a private member.' % identifier,
jsdoc.GetFlag('override').flag_token)
# Can have a private class which inherits documentation from a
# public superclass.
if jsdoc.HasFlag('inheritDoc') and not jsdoc.HasFlag('constructor'):
self._HandleError(errors.INVALID_INHERIT_DOC_PRIVATE,
'%s should not inherit from a private member.' % identifier,
jsdoc.GetFlag('inheritDoc').flag_token)
if (not jsdoc.HasFlag('private') and
not ('underscore' in jsdoc.suppressions)):
self._HandleError(errors.MISSING_PRIVATE,
'Member "%s" must have @private JsDoc.' %
identifier, token)
if jsdoc.HasFlag('private') and 'underscore' in jsdoc.suppressions:
self._HandleError(errors.UNNECESSARY_SUPPRESS,
'@suppress {underscore} is not necessary with @private',
jsdoc.suppressions['underscore'])
elif jsdoc.HasFlag('private'):
self._HandleError(errors.EXTRA_PRIVATE,
'Member "%s" must not have @private JsDoc' %
identifier, token)
if ((jsdoc.HasFlag('desc') or jsdoc.HasFlag('hidden'))
and not identifier.startswith('MSG_')
and identifier.find('.MSG_') == -1):
# TODO(user): Update error message to show the actual invalid
# tag, either @desc or @hidden.
self._HandleError(errors.INVALID_USE_OF_DESC_TAG,
'Member "%s" should not have @desc JsDoc' % identifier,
token)
# Check for illegaly assigning live objects as prototype property values.
index = identifier.find('.prototype.')
# Ignore anything with additional .s after the prototype.
if index != -1 and identifier.find('.', index + 11) == -1:
equal_operator = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES)
next_code = tokenutil.SearchExcept(equal_operator, Type.NON_CODE_TYPES)
if next_code and (
next_code.type in (Type.START_BRACKET, Type.START_BLOCK) or
next_code.IsOperator('new')):
self._HandleError(errors.ILLEGAL_PROTOTYPE_MEMBER_VALUE,
'Member %s cannot have a non-primitive value' % identifier,
token)
elif type == Type.END_PARAMETERS:
# Find extra space at the end of parameter lists. We check the token
# prior to the current one when it is a closing paren.
if (token.previous and token.previous.type == Type.PARAMETERS
and self.ENDS_WITH_SPACE.search(token.previous.string)):
self._HandleError(errors.EXTRA_SPACE, 'Extra space before ")"',
token.previous)
jsdoc = state.GetDocComment()
if state.GetFunction().is_interface:
if token.previous and token.previous.type == Type.PARAMETERS:
self._HandleError(errors.INTERFACE_CONSTRUCTOR_CANNOT_HAVE_PARAMS,
'Interface constructor cannot have parameters',
token.previous)
elif (state.InTopLevel() and jsdoc and not jsdoc.HasFlag('see')
and not jsdoc.InheritsDocumentation()
and not state.InObjectLiteralDescendant() and not
jsdoc.IsInvalidated()):
distance, edit = jsdoc.CompareParameters(state.GetParams())
if distance:
params_iter = iter(state.GetParams())
docs_iter = iter(jsdoc.ordered_params)
for op in edit:
if op == 'I':
# Insertion.
# Parsing doc comments is the same for all languages
# but some languages care about parameters that don't have
# doc comments and some languages don't care.
# Languages that don't allow variables to by typed such as
# JavaScript care but languages such as ActionScript or Java
# that allow variables to be typed don't care.
self.HandleMissingParameterDoc(token, params_iter.next())
elif op == 'D':
# Deletion
self._HandleError(errors.EXTRA_PARAMETER_DOCUMENTATION,
'Found docs for non-existing parameter: "%s"' %
docs_iter.next(), token)
elif op == 'S':
# Substitution
self._HandleError(errors.WRONG_PARAMETER_DOCUMENTATION,
'Parameter mismatch: got "%s", expected "%s"' %
(params_iter.next(), docs_iter.next()), token)
else:
# Equality - just advance the iterators
params_iter.next()
docs_iter.next()
elif type == Type.STRING_TEXT:
# If this is the first token after the start of the string, but it's at
# the end of a line, we know we have a multi-line string.
if token.previous.type in (Type.SINGLE_QUOTE_STRING_START,
Type.DOUBLE_QUOTE_STRING_START) and last_in_line:
self._HandleError(errors.MULTI_LINE_STRING,
'Multi-line strings are not allowed', token)
# This check is orthogonal to the ones above, and repeats some types, so
# it is a plain if and not an elif.
if token.type in Type.COMMENT_TYPES:
if self.ILLEGAL_TAB.search(token.string):
self._HandleError(errors.ILLEGAL_TAB,
'Illegal tab in comment "%s"' % token.string, token)
trimmed = token.string.rstrip()
if last_in_line and token.string != trimmed:
# Check for extra whitespace at the end of a line.
self._HandleError(errors.EXTRA_SPACE, 'Extra space at end of line',
token, Position(len(trimmed), len(token.string) - len(trimmed)))
# This check is also orthogonal since it is based on metadata.
if token.metadata.is_implied_semicolon:
self._HandleError(errors.MISSING_SEMICOLON,
'Missing semicolon at end of line', token)
def Finalize(self, state, tokenizer_mode):
last_non_space_token = state.GetLastNonSpaceToken()
# Check last line for ending with newline.
if state.GetLastLine() and not (state.GetLastLine().isspace() or
state.GetLastLine().rstrip('\n\r\f') != state.GetLastLine()):
self._HandleError(
errors.FILE_MISSING_NEWLINE,
'File does not end with new line. (%s)' % state.GetLastLine(),
last_non_space_token)
# Check that the mode is not mid comment, argument list, etc.
if not tokenizer_mode == Modes.TEXT_MODE:
self._HandleError(
errors.FILE_IN_BLOCK,
'File ended in mode "%s".' % tokenizer_mode,
last_non_space_token)
try:
self._indentation.Finalize()
except Exception, e:
self._HandleError(
errors.FILE_DOES_NOT_PARSE,
str(e),
last_non_space_token)
def GetLongLineExceptions(self):
"""Gets a list of regexps for lines which can be longer than the limit."""
return []

521
tools/closure_linter/closure_linter/ecmametadatapass.py

@ -0,0 +1,521 @@
#!/usr/bin/env python
#
# Copyright 2010 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Metadata pass for annotating tokens in EcmaScript files."""
__author__ = ('robbyw@google.com (Robert Walker)')
from closure_linter import javascripttokens
from closure_linter import tokenutil
TokenType = javascripttokens.JavaScriptTokenType
class ParseError(Exception):
"""Exception indicating a parse error at the given token.
Attributes:
token: The token where the parse error occurred.
"""
def __init__(self, token, message=None):
"""Initialize a parse error at the given token with an optional message.
Args:
token: The token where the parse error occurred.
message: A message describing the parse error.
"""
Exception.__init__(self, message)
self.token = token
class EcmaContext(object):
"""Context object for EcmaScript languages.
Attributes:
type: The context type.
start_token: The token where this context starts.
end_token: The token where this context ends.
parent: The parent context.
"""
# The root context.
ROOT = 'root'
# A block of code.
BLOCK = 'block'
# A pseudo-block of code for a given case or default section.
CASE_BLOCK = 'case_block'
# Block of statements in a for loop's parentheses.
FOR_GROUP_BLOCK = 'for_block'
# An implied block of code for 1 line if, while, and for statements
IMPLIED_BLOCK = 'implied_block'
# An index in to an array or object.
INDEX = 'index'
# An array literal in [].
ARRAY_LITERAL = 'array_literal'
# An object literal in {}.
OBJECT_LITERAL = 'object_literal'
# An individual element in an array or object literal.
LITERAL_ELEMENT = 'literal_element'
# The portion of a ternary statement between ? and :
TERNARY_TRUE = 'ternary_true'
# The portion of a ternary statment after :
TERNARY_FALSE = 'ternary_false'
# The entire switch statment. This will contain a GROUP with the variable
# and a BLOCK with the code.
# Since that BLOCK is not a normal block, it can not contain statements except
# for case and default.
SWITCH = 'switch'
# A normal comment.
COMMENT = 'comment'
# A JsDoc comment.
DOC = 'doc'
# An individual statement.
STATEMENT = 'statement'
# Code within parentheses.
GROUP = 'group'
# Parameter names in a function declaration.
PARAMETERS = 'parameters'
# A set of variable declarations appearing after the 'var' keyword.
VAR = 'var'
# Context types that are blocks.
BLOCK_TYPES = frozenset([
ROOT, BLOCK, CASE_BLOCK, FOR_GROUP_BLOCK, IMPLIED_BLOCK])
def __init__(self, type, start_token, parent):
"""Initializes the context object.
Args:
type: The context type.
start_token: The token where this context starts.
parent: The parent context.
"""
self.type = type
self.start_token = start_token
self.end_token = None
self.parent = parent
def __repr__(self):
"""Returns a string representation of the context object."""
stack = []
context = self
while context:
stack.append(context.type)
context = context.parent
return 'Context(%s)' % ' > '.join(stack)
class EcmaMetaData(object):
"""Token metadata for EcmaScript languages.
Attributes:
last_code: The last code token to appear before this one.
context: The context this token appears in.
operator_type: The operator type, will be one of the *_OPERATOR constants
defined below.
"""
UNARY_OPERATOR = 'unary'
UNARY_POST_OPERATOR = 'unary_post'
BINARY_OPERATOR = 'binary'
TERNARY_OPERATOR = 'ternary'
def __init__(self):
"""Initializes a token metadata object."""
self.last_code = None
self.context = None
self.operator_type = None
self.is_implied_semicolon = False
self.is_implied_block = False
self.is_implied_block_close = False
def __repr__(self):
"""Returns a string representation of the context object."""
parts = ['%r' % self.context]
if self.operator_type:
parts.append('optype: %r' % self.operator_type)
if self.is_implied_semicolon:
parts.append('implied;')
return 'MetaData(%s)' % ', '.join(parts)
def IsUnaryOperator(self):
return self.operator_type in (EcmaMetaData.UNARY_OPERATOR,
EcmaMetaData.UNARY_POST_OPERATOR)
def IsUnaryPostOperator(self):
return self.operator_type == EcmaMetaData.UNARY_POST_OPERATOR
class EcmaMetaDataPass(object):
"""A pass that iterates over all tokens and builds metadata about them."""
def __init__(self):
"""Initialize the meta data pass object."""
self.Reset()
def Reset(self):
"""Resets the metadata pass to prepare for the next file."""
self._token = None
self._context = None
self._AddContext(EcmaContext.ROOT)
self._last_code = None
def _CreateContext(self, type):
"""Overridable by subclasses to create the appropriate context type."""
return EcmaContext(type, self._token, self._context)
def _CreateMetaData(self):
"""Overridable by subclasses to create the appropriate metadata type."""
return EcmaMetaData()
def _AddContext(self, type):
"""Adds a context of the given type to the context stack.
Args:
type: The type of context to create
"""
self._context = self._CreateContext(type)
def _PopContext(self):
"""Moves up one level in the context stack.
Returns:
The former context.
Raises:
ParseError: If the root context is popped.
"""
top_context = self._context
top_context.end_token = self._token
self._context = top_context.parent
if self._context:
return top_context
else:
raise ParseError(self._token)
def _PopContextType(self, *stop_types):
"""Pops the context stack until a context of the given type is popped.
Args:
stop_types: The types of context to pop to - stops at the first match.
Returns:
The context object of the given type that was popped.
"""
last = None
while not last or last.type not in stop_types:
last = self._PopContext()
return last
def _EndStatement(self):
"""Process the end of a statement."""
self._PopContextType(EcmaContext.STATEMENT)
if self._context.type == EcmaContext.IMPLIED_BLOCK:
self._token.metadata.is_implied_block_close = True
self._PopContext()
def _ProcessContext(self):
"""Process the context at the current token.
Returns:
The context that should be assigned to the current token, or None if
the current context after this method should be used.
Raises:
ParseError: When the token appears in an invalid context.
"""
token = self._token
token_type = token.type
if self._context.type in EcmaContext.BLOCK_TYPES:
# Whenever we're in a block, we add a statement context. We make an
# exception for switch statements since they can only contain case: and
# default: and therefore don't directly contain statements.
# The block we add here may be immediately removed in some cases, but
# that causes no harm.
parent = self._context.parent
if not parent or parent.type != EcmaContext.SWITCH:
self._AddContext(EcmaContext.STATEMENT)
elif self._context.type == EcmaContext.ARRAY_LITERAL:
self._AddContext(EcmaContext.LITERAL_ELEMENT)
if token_type == TokenType.START_PAREN:
if self._last_code and self._last_code.IsKeyword('for'):
# for loops contain multiple statements in the group unlike while,
# switch, if, etc.
self._AddContext(EcmaContext.FOR_GROUP_BLOCK)
else:
self._AddContext(EcmaContext.GROUP)
elif token_type == TokenType.END_PAREN:
result = self._PopContextType(EcmaContext.GROUP,
EcmaContext.FOR_GROUP_BLOCK)
keyword_token = result.start_token.metadata.last_code
# keyword_token will not exist if the open paren is the first line of the
# file, for example if all code is wrapped in an immediately executed
# annonymous function.
if keyword_token and keyword_token.string in ('if', 'for', 'while'):
next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES)
if next_code.type != TokenType.START_BLOCK:
# Check for do-while.
is_do_while = False
pre_keyword_token = keyword_token.metadata.last_code
if (pre_keyword_token and
pre_keyword_token.type == TokenType.END_BLOCK):
start_block_token = pre_keyword_token.metadata.context.start_token
is_do_while = start_block_token.metadata.last_code.string == 'do'
# If it's not do-while, it's an implied block.
if not is_do_while:
self._AddContext(EcmaContext.IMPLIED_BLOCK)
token.metadata.is_implied_block = True
return result
# else (not else if) with no open brace after it should be considered the
# start of an implied block, similar to the case with if, for, and while
# above.
elif (token_type == TokenType.KEYWORD and
token.string == 'else'):
next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES)
if (next_code.type != TokenType.START_BLOCK and
(next_code.type != TokenType.KEYWORD or next_code.string != 'if')):
self._AddContext(EcmaContext.IMPLIED_BLOCK)
token.metadata.is_implied_block = True
elif token_type == TokenType.START_PARAMETERS:
self._AddContext(EcmaContext.PARAMETERS)
elif token_type == TokenType.END_PARAMETERS:
return self._PopContextType(EcmaContext.PARAMETERS)
elif token_type == TokenType.START_BRACKET:
if (self._last_code and
self._last_code.type in TokenType.EXPRESSION_ENDER_TYPES):
self._AddContext(EcmaContext.INDEX)
else:
self._AddContext(EcmaContext.ARRAY_LITERAL)
elif token_type == TokenType.END_BRACKET:
return self._PopContextType(EcmaContext.INDEX, EcmaContext.ARRAY_LITERAL)
elif token_type == TokenType.START_BLOCK:
if (self._last_code.type in (TokenType.END_PAREN,
TokenType.END_PARAMETERS) or
self._last_code.IsKeyword('else') or
self._last_code.IsKeyword('do') or
self._last_code.IsKeyword('try') or
self._last_code.IsKeyword('finally') or
(self._last_code.IsOperator(':') and
self._last_code.metadata.context.type == EcmaContext.CASE_BLOCK)):
# else, do, try, and finally all might have no () before {.
# Also, handle the bizzare syntax case 10: {...}.
self._AddContext(EcmaContext.BLOCK)
else:
self._AddContext(EcmaContext.OBJECT_LITERAL)
elif token_type == TokenType.END_BLOCK:
context = self._PopContextType(EcmaContext.BLOCK,
EcmaContext.OBJECT_LITERAL)
if self._context.type == EcmaContext.SWITCH:
# The end of the block also means the end of the switch statement it
# applies to.
return self._PopContext()
return context
elif token.IsKeyword('switch'):
self._AddContext(EcmaContext.SWITCH)
elif (token_type == TokenType.KEYWORD and
token.string in ('case', 'default')):
# Pop up to but not including the switch block.
while self._context.parent.type != EcmaContext.SWITCH:
self._PopContext()
elif token.IsOperator('?'):
self._AddContext(EcmaContext.TERNARY_TRUE)
elif token.IsOperator(':'):
if self._context.type == EcmaContext.OBJECT_LITERAL:
self._AddContext(EcmaContext.LITERAL_ELEMENT)
elif self._context.type == EcmaContext.TERNARY_TRUE:
self._PopContext()
self._AddContext(EcmaContext.TERNARY_FALSE)
# Handle nested ternary statements like:
# foo = bar ? baz ? 1 : 2 : 3
# When we encounter the second ":" the context is
# ternary_false > ternary_true > statement > root
elif (self._context.type == EcmaContext.TERNARY_FALSE and
self._context.parent.type == EcmaContext.TERNARY_TRUE):
self._PopContext() # Leave current ternary false context.
self._PopContext() # Leave current parent ternary true
self._AddContext(EcmaContext.TERNARY_FALSE)
elif self._context.parent.type == EcmaContext.SWITCH:
self._AddContext(EcmaContext.CASE_BLOCK)
elif token.IsKeyword('var'):
self._AddContext(EcmaContext.VAR)
elif token.IsOperator(','):
while self._context.type not in (EcmaContext.VAR,
EcmaContext.ARRAY_LITERAL,
EcmaContext.OBJECT_LITERAL,
EcmaContext.STATEMENT,
EcmaContext.PARAMETERS,
EcmaContext.GROUP):
self._PopContext()
elif token_type == TokenType.SEMICOLON:
self._EndStatement()
def Process(self, first_token):
"""Processes the token stream starting with the given token."""
self._token = first_token
while self._token:
self._ProcessToken()
if self._token.IsCode():
self._last_code = self._token
self._token = self._token.next
try:
self._PopContextType(self, EcmaContext.ROOT)
except ParseError:
# Ignore the "popped to root" error.
pass
def _ProcessToken(self):
"""Process the given token."""
token = self._token
token.metadata = self._CreateMetaData()
context = (self._ProcessContext() or self._context)
token.metadata.context = context
token.metadata.last_code = self._last_code
# Determine the operator type of the token, if applicable.
if token.type == TokenType.OPERATOR:
token.metadata.operator_type = self._GetOperatorType(token)
# Determine if there is an implied semicolon after the token.
if token.type != TokenType.SEMICOLON:
next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES)
# A statement like if (x) does not need a semicolon after it
is_implied_block = self._context == EcmaContext.IMPLIED_BLOCK
is_last_code_in_line = token.IsCode() and (
not next_code or next_code.line_number != token.line_number)
is_continued_identifier = (token.type == TokenType.IDENTIFIER and
token.string.endswith('.'))
is_continued_operator = (token.type == TokenType.OPERATOR and
not token.metadata.IsUnaryPostOperator())
is_continued_dot = token.string == '.'
next_code_is_operator = next_code and next_code.type == TokenType.OPERATOR
next_code_is_dot = next_code and next_code.string == '.'
is_end_of_block = (token.type == TokenType.END_BLOCK and
token.metadata.context.type != EcmaContext.OBJECT_LITERAL)
is_multiline_string = token.type == TokenType.STRING_TEXT
next_code_is_block = next_code and next_code.type == TokenType.START_BLOCK
if (is_last_code_in_line and
self._StatementCouldEndInContext() and
not is_multiline_string and
not is_end_of_block and
not is_continued_identifier and
not is_continued_operator and
not is_continued_dot and
not next_code_is_dot and
not next_code_is_operator and
not is_implied_block and
not next_code_is_block):
token.metadata.is_implied_semicolon = True
self._EndStatement()
def _StatementCouldEndInContext(self):
"""Returns whether the current statement (if any) may end in this context."""
# In the basic statement or variable declaration context, statement can
# always end in this context.
if self._context.type in (EcmaContext.STATEMENT, EcmaContext.VAR):
return True
# End of a ternary false branch inside a statement can also be the
# end of the statement, for example:
# var x = foo ? foo.bar() : null
# In this case the statement ends after the null, when the context stack
# looks like ternary_false > var > statement > root.
if (self._context.type == EcmaContext.TERNARY_FALSE and
self._context.parent.type in (EcmaContext.STATEMENT, EcmaContext.VAR)):
return True
# In all other contexts like object and array literals, ternary true, etc.
# the statement can't yet end.
return False
def _GetOperatorType(self, token):
"""Returns the operator type of the given operator token.
Args:
token: The token to get arity for.
Returns:
The type of the operator. One of the *_OPERATOR constants defined in
EcmaMetaData.
"""
if token.string == '?':
return EcmaMetaData.TERNARY_OPERATOR
if token.string in TokenType.UNARY_OPERATORS:
return EcmaMetaData.UNARY_OPERATOR
last_code = token.metadata.last_code
if not last_code or last_code.type == TokenType.END_BLOCK:
return EcmaMetaData.UNARY_OPERATOR
if (token.string in TokenType.UNARY_POST_OPERATORS and
last_code.type in TokenType.EXPRESSION_ENDER_TYPES):
return EcmaMetaData.UNARY_POST_OPERATOR
if (token.string in TokenType.UNARY_OK_OPERATORS and
last_code.type not in TokenType.EXPRESSION_ENDER_TYPES and
last_code.string not in TokenType.UNARY_POST_OPERATORS):
return EcmaMetaData.UNARY_OPERATOR
return EcmaMetaData.BINARY_OPERATOR

336
tools/closure_linter/closure_linter/error_fixer.py

@ -0,0 +1,336 @@
#!/usr/bin/env python
#
# Copyright 2007 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Main class responsible for automatically fixing simple style violations."""
__author__ = 'robbyw@google.com (Robert Walker)'
import re
import gflags as flags
from closure_linter import errors
from closure_linter import javascriptstatetracker
from closure_linter import javascripttokens
from closure_linter import tokenutil
from closure_linter.common import errorhandler
# Shorthand
Token = javascripttokens.JavaScriptToken
Type = javascripttokens.JavaScriptTokenType
END_OF_FLAG_TYPE = re.compile(r'(}?\s*)$')
FLAGS = flags.FLAGS
flags.DEFINE_boolean('disable_indentation_fixing', False,
'Whether to disable automatic fixing of indentation.')
class ErrorFixer(errorhandler.ErrorHandler):
"""Object that fixes simple style errors."""
def __init__(self, external_file = None):
"""Initialize the error fixer.
Args:
external_file: If included, all output will be directed to this file
instead of overwriting the files the errors are found in.
"""
self._file_name = None
self._file_token = None
self._external_file = external_file
def HandleFile(self, filename, first_token):
"""Notifies this ErrorPrinter that subsequent errors are in filename.
Args:
filename: The name of the file about to be checked.
first_token: The first token in the file.
"""
self._file_name = filename
self._file_token = first_token
self._file_fix_count = 0
self._file_changed_lines = set()
def _AddFix(self, tokens):
"""Adds the fix to the internal count.
Args:
tokens: The token or sequence of tokens changed to fix an error.
"""
self._file_fix_count += 1
if hasattr(tokens, 'line_number'):
self._file_changed_lines.add(tokens.line_number)
else:
for token in tokens:
self._file_changed_lines.add(token.line_number)
def HandleError(self, error):
"""Attempts to fix the error.
Args:
error: The error object
"""
code = error.code
token = error.token
if code == errors.JSDOC_PREFER_QUESTION_TO_PIPE_NULL:
iterator = token.attached_object.type_start_token
if iterator.type == Type.DOC_START_BRACE or iterator.string.isspace():
iterator = iterator.next
leading_space = len(iterator.string) - len(iterator.string.lstrip())
iterator.string = '%s?%s' % (' ' * leading_space,
iterator.string.lstrip())
# Cover the no outer brace case where the end token is part of the type.
while iterator and iterator != token.attached_object.type_end_token.next:
iterator.string = iterator.string.replace(
'null|', '').replace('|null', '')
iterator = iterator.next
# Create a new flag object with updated type info.
token.attached_object = javascriptstatetracker.JsDocFlag(token)
self._AddFix(token)
elif code in (errors.MISSING_SEMICOLON_AFTER_FUNCTION,
errors.MISSING_SEMICOLON):
semicolon_token = Token(';', Type.SEMICOLON, token.line,
token.line_number)
tokenutil.InsertTokenAfter(semicolon_token, token)
token.metadata.is_implied_semicolon = False
semicolon_token.metadata.is_implied_semicolon = False
self._AddFix(token)
elif code in (errors.ILLEGAL_SEMICOLON_AFTER_FUNCTION,
errors.REDUNDANT_SEMICOLON,
errors.COMMA_AT_END_OF_LITERAL):
tokenutil.DeleteToken(token)
self._AddFix(token)
elif code == errors.INVALID_JSDOC_TAG:
if token.string == '@returns':
token.string = '@return'
self._AddFix(token)
elif code == errors.FILE_MISSING_NEWLINE:
# This error is fixed implicitly by the way we restore the file
self._AddFix(token)
elif code == errors.MISSING_SPACE:
if error.position:
if error.position.IsAtBeginning():
tokenutil.InsertSpaceTokenAfter(token.previous)
elif error.position.IsAtEnd(token.string):
tokenutil.InsertSpaceTokenAfter(token)
else:
token.string = error.position.Set(token.string, ' ')
self._AddFix(token)
elif code == errors.EXTRA_SPACE:
if error.position:
token.string = error.position.Set(token.string, '')
self._AddFix(token)
elif code == errors.JSDOC_TAG_DESCRIPTION_ENDS_WITH_INVALID_CHARACTER:
token.string = error.position.Set(token.string, '.')
self._AddFix(token)
elif code == errors.MISSING_LINE:
if error.position.IsAtBeginning():
tokenutil.InsertLineAfter(token.previous)
else:
tokenutil.InsertLineAfter(token)
self._AddFix(token)
elif code == errors.EXTRA_LINE:
tokenutil.DeleteToken(token)
self._AddFix(token)
elif code == errors.WRONG_BLANK_LINE_COUNT:
if not token.previous:
# TODO(user): Add an insertBefore method to tokenutil.
return
num_lines = error.fix_data
should_delete = False
if num_lines < 0:
num_lines = num_lines * -1
should_delete = True
for i in xrange(1, num_lines + 1):
if should_delete:
# TODO(user): DeleteToken should update line numbers.
tokenutil.DeleteToken(token.previous)
else:
tokenutil.InsertLineAfter(token.previous)
self._AddFix(token)
elif code == errors.UNNECESSARY_DOUBLE_QUOTED_STRING:
end_quote = tokenutil.Search(token, Type.DOUBLE_QUOTE_STRING_END)
if end_quote:
single_quote_start = Token("'", Type.SINGLE_QUOTE_STRING_START,
token.line, token.line_number)
single_quote_end = Token("'", Type.SINGLE_QUOTE_STRING_START,
end_quote.line, token.line_number)
tokenutil.InsertTokenAfter(single_quote_start, token)
tokenutil.InsertTokenAfter(single_quote_end, end_quote)
tokenutil.DeleteToken(token)
tokenutil.DeleteToken(end_quote)
self._AddFix([token, end_quote])
elif code == errors.MISSING_BRACES_AROUND_TYPE:
fixed_tokens = []
start_token = token.attached_object.type_start_token
if start_token.type != Type.DOC_START_BRACE:
leading_space = (len(start_token.string) -
len(start_token.string.lstrip()))
if leading_space:
start_token = tokenutil.SplitToken(start_token, leading_space)
# Fix case where start and end token were the same.
if token.attached_object.type_end_token == start_token.previous:
token.attached_object.type_end_token = start_token
new_token = Token("{", Type.DOC_START_BRACE, start_token.line,
start_token.line_number)
tokenutil.InsertTokenAfter(new_token, start_token.previous)
token.attached_object.type_start_token = new_token
fixed_tokens.append(new_token)
end_token = token.attached_object.type_end_token
if end_token.type != Type.DOC_END_BRACE:
# If the start token was a brace, the end token will be a
# FLAG_ENDING_TYPE token, if there wasn't a starting brace then
# the end token is the last token of the actual type.
last_type = end_token
if not len(fixed_tokens):
last_type = end_token.previous
while last_type.string.isspace():
last_type = last_type.previous
# If there was no starting brace then a lone end brace wouldn't have
# been type end token. Now that we've added any missing start brace,
# see if the last effective type token was an end brace.
if last_type.type != Type.DOC_END_BRACE:
trailing_space = (len(last_type.string) -
len(last_type.string.rstrip()))
if trailing_space:
tokenutil.SplitToken(last_type,
len(last_type.string) - trailing_space)
new_token = Token("}", Type.DOC_END_BRACE, last_type.line,
last_type.line_number)
tokenutil.InsertTokenAfter(new_token, last_type)
token.attached_object.type_end_token = new_token
fixed_tokens.append(new_token)
self._AddFix(fixed_tokens)
elif code in (errors.GOOG_REQUIRES_NOT_ALPHABETIZED,
errors.GOOG_PROVIDES_NOT_ALPHABETIZED):
tokens = error.fix_data
strings = map(lambda x: x.string, tokens)
sorted_strings = sorted(strings)
index = 0
changed_tokens = []
for token in tokens:
if token.string != sorted_strings[index]:
token.string = sorted_strings[index]
changed_tokens.append(token)
index += 1
self._AddFix(changed_tokens)
elif code == errors.UNNECESSARY_BRACES_AROUND_INHERIT_DOC:
if token.previous.string == '{' and token.next.string == '}':
tokenutil.DeleteToken(token.previous)
tokenutil.DeleteToken(token.next)
self._AddFix([token])
elif (code == errors.WRONG_INDENTATION and
not FLAGS.disable_indentation_fixing):
token = tokenutil.GetFirstTokenInSameLine(token)
actual = error.position.start
expected = error.position.length
if token.type in (Type.WHITESPACE, Type.PARAMETERS):
token.string = token.string.lstrip() + (' ' * expected)
self._AddFix([token])
else:
# We need to add indentation.
new_token = Token(' ' * expected, Type.WHITESPACE,
token.line, token.line_number)
# Note that we'll never need to add indentation at the first line,
# since it will always not be indented. Therefore it's safe to assume
# token.previous exists.
tokenutil.InsertTokenAfter(new_token, token.previous)
self._AddFix([token])
elif code == errors.EXTRA_GOOG_REQUIRE:
fixed_tokens = []
while token:
if token.type == Type.IDENTIFIER:
if token.string not in ['goog.require', 'goog.provide']:
# Stop iterating over tokens once we're out of the requires and
# provides.
break
if token.string == 'goog.require':
# Text of form: goog.require('required'), skipping past open paren
# and open quote to the string text.
required = token.next.next.next.string
if required in error.fix_data:
fixed_tokens.append(token)
# Want to delete: goog.require + open paren + open single-quote +
# text + close single-quote + close paren + semi-colon = 7.
tokenutil.DeleteTokens(token, 7)
token = token.next
self._AddFix(fixed_tokens)
def FinishFile(self):
"""Called when the current file has finished style checking.
Used to go back and fix any errors in the file.
"""
if self._file_fix_count:
f = self._external_file
if not f:
print "Fixed %d errors in %s" % (self._file_fix_count, self._file_name)
f = open(self._file_name, 'w')
token = self._file_token
char_count = 0
while token:
f.write(token.string)
char_count += len(token.string)
if token.IsLastInLine():
f.write('\n')
if char_count > 80 and token.line_number in self._file_changed_lines:
print "WARNING: Line %d of %s is now longer than 80 characters." % (
token.line_number, self._file_name)
char_count = 0
self._file_changed_lines
token = token.next
if not self._external_file:
# Close the file if we created it
f.close()

42
tools/closure_linter/closure_linter/errorrules.py

@ -0,0 +1,42 @@
#!/usr/bin/env python
#
# Copyright 2010 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Linter error rules class for Closure Linter."""
__author__ = 'robbyw@google.com (Robert Walker)'
import gflags as flags
from closure_linter import errors
FLAGS = flags.FLAGS
flags.DEFINE_boolean('jsdoc', True,
'Whether to report errors for missing JsDoc.')
def ShouldReportError(error):
"""Whether the given error should be reported.
Returns:
True for all errors except missing documentation errors. For these,
it returns the value of the jsdoc flag.
"""
return FLAGS.jsdoc or error not in (
errors.MISSING_PARAMETER_DOCUMENTATION,
errors.MISSING_RETURN_DOCUMENTATION,
errors.MISSING_MEMBER_DOCUMENTATION,
errors.MISSING_PRIVATE,
errors.MISSING_JSDOC_TAG_THIS)

131
tools/closure_linter/closure_linter/errors.py

@ -0,0 +1,131 @@
#!/usr/bin/env python
#
# Copyright 2007 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Error codes for JavaScript style checker."""
__author__ = ('robbyw@google.com (Robert Walker)',
'ajp@google.com (Andy Perelson)')
def ByName(name):
"""Get the error code for the given error name.
Args:
name: The name of the error
Returns:
The error code
"""
return globals()[name]
# "File-fatal" errors - these errors stop further parsing of a single file
FILE_NOT_FOUND = -1
FILE_DOES_NOT_PARSE = -2
# Spacing
EXTRA_SPACE = 1
MISSING_SPACE = 2
EXTRA_LINE = 3
MISSING_LINE = 4
ILLEGAL_TAB = 5
WRONG_INDENTATION = 6
WRONG_BLANK_LINE_COUNT = 7
# Semicolons
MISSING_SEMICOLON = 10
MISSING_SEMICOLON_AFTER_FUNCTION = 11
ILLEGAL_SEMICOLON_AFTER_FUNCTION = 12
REDUNDANT_SEMICOLON = 13
# Miscellaneous
ILLEGAL_PROTOTYPE_MEMBER_VALUE = 100
LINE_TOO_LONG = 110
LINE_STARTS_WITH_OPERATOR = 120
COMMA_AT_END_OF_LITERAL = 121
MULTI_LINE_STRING = 130
UNNECESSARY_DOUBLE_QUOTED_STRING = 131
# Requires, provides
GOOG_REQUIRES_NOT_ALPHABETIZED = 140
GOOG_PROVIDES_NOT_ALPHABETIZED = 141
MISSING_GOOG_REQUIRE = 142
MISSING_GOOG_PROVIDE = 143
EXTRA_GOOG_REQUIRE = 144
# JsDoc
INVALID_JSDOC_TAG = 200
INVALID_USE_OF_DESC_TAG = 201
NO_BUG_NUMBER_AFTER_BUG_TAG = 202
MISSING_PARAMETER_DOCUMENTATION = 210
EXTRA_PARAMETER_DOCUMENTATION = 211
WRONG_PARAMETER_DOCUMENTATION = 212
MISSING_JSDOC_TAG_TYPE = 213
MISSING_JSDOC_TAG_DESCRIPTION = 214
MISSING_JSDOC_PARAM_NAME = 215
OUT_OF_ORDER_JSDOC_TAG_TYPE = 216
MISSING_RETURN_DOCUMENTATION = 217
UNNECESSARY_RETURN_DOCUMENTATION = 218
MISSING_BRACES_AROUND_TYPE = 219
MISSING_MEMBER_DOCUMENTATION = 220
MISSING_PRIVATE = 221
EXTRA_PRIVATE = 222
INVALID_OVERRIDE_PRIVATE = 223
INVALID_INHERIT_DOC_PRIVATE = 224
MISSING_JSDOC_TAG_THIS = 225
UNNECESSARY_BRACES_AROUND_INHERIT_DOC = 226
INVALID_AUTHOR_TAG_DESCRIPTION = 227
JSDOC_PREFER_QUESTION_TO_PIPE_NULL = 230
JSDOC_ILLEGAL_QUESTION_WITH_PIPE = 231
JSDOC_TAG_DESCRIPTION_ENDS_WITH_INVALID_CHARACTER = 240
# TODO(robbyw): Split this in to more specific syntax problems.
INCORRECT_SUPPRESS_SYNTAX = 250
INVALID_SUPPRESS_TYPE = 251
UNNECESSARY_SUPPRESS = 252
# File ending
FILE_MISSING_NEWLINE = 300
FILE_IN_BLOCK = 301
# Interfaces
INTERFACE_CONSTRUCTOR_CANNOT_HAVE_PARAMS = 400
INTERFACE_METHOD_CANNOT_HAVE_CODE = 401
# ActionScript specific errors:
# TODO(user): move these errors to their own file and move all JavaScript
# specific errors to their own file as well.
# All ActionScript specific errors should have error number at least 1000.
FUNCTION_MISSING_RETURN_TYPE = 1132
PARAMETER_MISSING_TYPE = 1133
VAR_MISSING_TYPE = 1134
PARAMETER_MISSING_DEFAULT_VALUE = 1135
IMPORTS_NOT_ALPHABETIZED = 1140
IMPORT_CONTAINS_WILDCARD = 1141
UNUSED_IMPORT = 1142
INVALID_TRACE_SEVERITY_LEVEL = 1250
MISSING_TRACE_SEVERITY_LEVEL = 1251
MISSING_TRACE_MESSAGE = 1252
REMOVE_TRACE_BEFORE_SUBMIT = 1253
REMOVE_COMMENT_BEFORE_SUBMIT = 1254
# End of list of ActionScript specific errors.
NEW_ERRORS = frozenset([
# Errors added after 2.0.2:
WRONG_INDENTATION,
MISSING_SEMICOLON,
# Errors added after 2.2.5:
WRONG_BLANK_LINE_COUNT,
EXTRA_GOOG_REQUIRE,
])

47
tools/closure_linter/closure_linter/fixjsstyle.py

@ -0,0 +1,47 @@
#!/usr/bin/env python
#
# Copyright 2007 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Automatically fix simple style guide violations."""
__author__ = 'robbyw@google.com (Robert Walker)'
import sys
import gflags as flags
from closure_linter import checker
from closure_linter import error_fixer
from closure_linter.common import simplefileflags as fileflags
def main(argv = None):
"""Main function.
Args:
argv: Sequence of command line arguments.
"""
if argv is None:
argv = flags.FLAGS(sys.argv)
files = fileflags.GetFileList(argv, 'JavaScript', ['.js'])
style_checker = checker.JavaScriptStyleChecker(error_fixer.ErrorFixer())
# Check the list of files.
for filename in files:
style_checker.Check(filename)
if __name__ == '__main__':
main()

61
tools/closure_linter/closure_linter/fixjsstyle_test.py

@ -0,0 +1,61 @@
#!/usr/bin/env python
#
# Copyright 2008 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Medium tests for the gpylint auto-fixer."""
__author__ = 'robbyw@google.com (Robby Walker)'
import StringIO
import gflags as flags
import unittest as googletest
from closure_linter import checker
from closure_linter import error_fixer
_RESOURCE_PREFIX = 'closure_linter/testdata'
flags.FLAGS.strict = True
flags.FLAGS.limited_doc_files = ('dummy.js', 'externs.js')
flags.FLAGS.closurized_namespaces = ('goog', 'dummy')
class FixJsStyleTest(googletest.TestCase):
"""Test case to for gjslint auto-fixing."""
def testFixJsStyle(self):
input_filename = None
try:
input_filename = '%s/fixjsstyle.in.js' % (_RESOURCE_PREFIX)
golden_filename = '%s/fixjsstyle.out.js' % (_RESOURCE_PREFIX)
except IOError, ex:
raise IOError('Could not find testdata resource for %s: %s' %
(self._filename, ex))
# Autofix the file, sending output to a fake file.
actual = StringIO.StringIO()
style_checker = checker.JavaScriptStyleChecker(
error_fixer.ErrorFixer(actual))
style_checker.Check(input_filename)
# Now compare the files.
actual.seek(0)
expected = open(golden_filename, 'r')
self.assertEqual(actual.readlines(), expected.readlines())
if __name__ == '__main__':
googletest.main()

99
tools/closure_linter/closure_linter/full_test.py

@ -0,0 +1,99 @@
#!/usr/bin/env python
#
# Copyright 2007 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Full regression-type (Medium) tests for gjslint.
Tests every error that can be thrown by gjslint. Based heavily on
devtools/javascript/gpylint/full_test.py
"""
__author__ = ('robbyw@google.com (Robert Walker)',
'ajp@google.com (Andy Perelson)')
import re
import os
import sys
import unittest
import gflags as flags
import unittest as googletest
from closure_linter import checker
from closure_linter import errors
from closure_linter.common import filetestcase
_RESOURCE_PREFIX = 'closure_linter/testdata'
flags.FLAGS.strict = True
flags.FLAGS.custom_jsdoc_tags = ('customtag', 'requires')
flags.FLAGS.closurized_namespaces = ('goog', 'dummy')
flags.FLAGS.limited_doc_files = ('externs.js', 'dummy.js')
# List of files under testdata to test.
# We need to list files explicitly since pyglib can't list directories.
_TEST_FILES = [
'all_js_wrapped.js',
'blank_lines.js',
'ends_with_block.js',
'externs.js',
'html_parse_error.html',
'indentation.js',
'interface.js',
'jsdoc.js',
'minimal.js',
'other.js',
'require_all_caps.js',
'require_extra.js',
'require_function.js',
'require_function_missing.js',
'require_function_through_both.js',
'require_function_through_namespace.js',
'require_interface.js',
'require_lower_case.js',
'require_numeric.js',
'require_provide_ok.js',
'require_provide_missing.js',
'simple.html',
'spaces.js',
'tokenizer.js',
'unparseable.js',
'utf8.html'
]
class GJsLintTestSuite(unittest.TestSuite):
"""Test suite to run a GJsLintTest for each of several files.
If sys.argv[1:] is non-empty, it is interpreted as a list of filenames in
testdata to test. Otherwise, _TEST_FILES is used.
"""
def __init__(self, tests=()):
unittest.TestSuite.__init__(self, tests)
argv = sys.argv and sys.argv[1:] or []
if argv:
test_files = argv
else:
test_files = _TEST_FILES
for test_file in test_files:
resource_path = os.path.join(_RESOURCE_PREFIX, test_file)
self.addTest(filetestcase.AnnotatedFileTestCase(resource_path,
checker.GJsLintRunner(), errors.ByName))
if __name__ == '__main__':
# Don't let main parse args; it happens in the TestSuite.
googletest.main(argv=sys.argv[0:1], defaultTest='GJsLintTestSuite')

142
tools/closure_linter/closure_linter/gjslint.py

@ -0,0 +1,142 @@
#!/usr/bin/env python
#
# Copyright 2007 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Checks JavaScript files for common style guide violations.
gjslint.py is designed to be used as a PRESUBMIT script to check for javascript
style guide violations. As of now, it checks for the following violations:
* Missing and extra spaces
* Lines longer than 80 characters
* Missing newline at end of file
* Missing semicolon after function declaration
* Valid JsDoc including parameter matching
Someday it will validate to the best of its ability against the entirety of the
JavaScript style guide.
This file is a front end that parses arguments and flags. The core of the code
is in tokenizer.py and checker.py.
"""
__author__ = ('robbyw@google.com (Robert Walker)',
'ajp@google.com (Andy Perelson)')
import sys
import time
from closure_linter import checker
from closure_linter import errors
from closure_linter.common import errorprinter
from closure_linter.common import simplefileflags as fileflags
import gflags as flags
FLAGS = flags.FLAGS
flags.DEFINE_boolean('unix_mode', False,
'Whether to emit warnings in standard unix format.')
flags.DEFINE_boolean('beep', True, 'Whether to beep when errors are found.')
flags.DEFINE_boolean('time', False, 'Whether to emit timing statistics.')
flags.DEFINE_boolean('check_html', False,
'Whether to check javascript in html files.')
flags.DEFINE_boolean('summary', False,
'Whether to show an error count summary.')
GJSLINT_ONLY_FLAGS = ['--unix_mode', '--beep', '--nobeep', '--time',
'--check_html', '--summary']
def FormatTime(t):
"""Formats a duration as a human-readable string.
Args:
t: A duration in seconds.
Returns:
A formatted duration string.
"""
if t < 1:
return '%dms' % round(t * 1000)
else:
return '%.2fs' % t
def main(argv = None):
"""Main function.
Args:
argv: Sequence of command line arguments.
"""
if argv is None:
argv = flags.FLAGS(sys.argv)
if FLAGS.time:
start_time = time.time()
suffixes = ['.js']
if FLAGS.check_html:
suffixes += ['.html', '.htm']
files = fileflags.GetFileList(argv, 'JavaScript', suffixes)
error_handler = None
if FLAGS.unix_mode:
error_handler = errorprinter.ErrorPrinter(errors.NEW_ERRORS)
error_handler.SetFormat(errorprinter.UNIX_FORMAT)
runner = checker.GJsLintRunner()
result = runner.Run(files, error_handler)
result.PrintSummary()
exit_code = 0
if result.HasOldErrors():
exit_code += 1
if result.HasNewErrors():
exit_code += 2
if exit_code:
if FLAGS.summary:
result.PrintFileSummary()
if FLAGS.beep:
# Make a beep noise.
sys.stdout.write(chr(7))
# Write out instructions for using fixjsstyle script to fix some of the
# reported errors.
fix_args = []
for flag in sys.argv[1:]:
for f in GJSLINT_ONLY_FLAGS:
if flag.startswith(f):
break
else:
fix_args.append(flag)
print """
Some of the errors reported by GJsLint may be auto-fixable using the script
fixjsstyle. Please double check any changes it makes and report any bugs. The
script can be run by executing:
fixjsstyle %s
""" % ' '.join(fix_args)
if FLAGS.time:
print 'Done in %s.' % FormatTime(time.time() - start_time)
sys.exit(exit_code)
if __name__ == '__main__':
main()

543
tools/closure_linter/closure_linter/indentation.py

@ -0,0 +1,543 @@
#!/usr/bin/env python
#
# Copyright 2010 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Methods for checking EcmaScript files for indentation issues."""
__author__ = ('robbyw@google.com (Robert Walker)')
from closure_linter import ecmametadatapass
from closure_linter import errors
from closure_linter import javascripttokens
from closure_linter import tokenutil
from closure_linter.common import error
from closure_linter.common import position
import gflags as flags
flags.DEFINE_boolean('debug_indentation', False,
'Whether to print debugging information for indentation.')
# Shorthand
Context = ecmametadatapass.EcmaContext
Error = error.Error
Position = position.Position
Type = javascripttokens.JavaScriptTokenType
# The general approach:
#
# 1. Build a stack of tokens that can affect indentation.
# For each token, we determine if it is a block or continuation token.
# Some tokens need to be temporarily overwritten in case they are removed
# before the end of the line.
# Much of the work here is determining which tokens to keep on the stack
# at each point. Operators, for example, should be removed once their
# expression or line is gone, while parentheses must stay until the matching
# end parentheses is found.
#
# 2. Given that stack, determine the allowable indentations.
# Due to flexible indentation rules in JavaScript, there may be many
# allowable indentations for each stack. We follows the general
# "no false positives" approach of GJsLint and build the most permissive
# set possible.
class TokenInfo(object):
"""Stores information about a token.
Attributes:
token: The token
is_block: Whether the token represents a block indentation.
is_transient: Whether the token should be automatically removed without
finding a matching end token.
overridden_by: TokenInfo for a token that overrides the indentation that
this token would require.
is_permanent_override: Whether the override on this token should persist
even after the overriding token is removed from the stack. For example:
x([
1],
2);
needs this to be set so the last line is not required to be a continuation
indent.
line_number: The effective line number of this token. Will either be the
actual line number or the one before it in the case of a mis-wrapped
operator.
"""
def __init__(self, token, is_block=False):
"""Initializes a TokenInfo object.
Args:
token: The token
is_block: Whether the token represents a block indentation.
"""
self.token = token
self.overridden_by = None
self.is_permanent_override = False
self.is_block = is_block
self.is_transient = not is_block and not token.type in (
Type.START_PAREN, Type.START_PARAMETERS)
self.line_number = token.line_number
def __repr__(self):
result = '\n %s' % self.token
if self.overridden_by:
result = '%s OVERRIDDEN [by "%s"]' % (
result, self.overridden_by.token.string)
result += ' {is_block: %s, is_transient: %s}' % (
self.is_block, self.is_transient)
return result
class IndentationRules(object):
"""EmcaScript indentation rules.
Can be used to find common indentation errors in JavaScript, ActionScript and
other Ecma like scripting languages.
"""
def __init__(self):
"""Initializes the IndentationRules checker."""
self._stack = []
# Map from line number to number of characters it is off in indentation.
self._start_index_offset = {}
def Finalize(self):
if self._stack:
old_stack = self._stack
self._stack = []
raise Exception("INTERNAL ERROR: indentation stack is not empty: %r" %
old_stack)
def CheckToken(self, token, state):
"""Checks a token for indentation errors.
Args:
token: The current token under consideration
state: Additional information about the current tree state
Returns:
An error array [error code, error string, error token] if the token is
improperly indented, or None if indentation is correct.
"""
token_type = token.type
indentation_errors = []
stack = self._stack
is_first = self._IsFirstNonWhitespaceTokenInLine(token)
# Add tokens that could decrease indentation before checking.
if token_type == Type.END_PAREN:
self._PopTo(Type.START_PAREN)
elif token_type == Type.END_PARAMETERS:
self._PopTo(Type.START_PARAMETERS)
elif token_type == Type.END_BRACKET:
self._PopTo(Type.START_BRACKET)
elif token_type == Type.END_BLOCK:
self._PopTo(Type.START_BLOCK)
elif token_type == Type.KEYWORD and token.string in ('case', 'default'):
self._Add(self._PopTo(Type.START_BLOCK))
elif is_first and token.string == '.':
# This token should have been on the previous line, so treat it as if it
# was there.
info = TokenInfo(token)
info.line_number = token.line_number - 1
self._Add(info)
elif token_type == Type.SEMICOLON:
self._PopTransient()
not_binary_operator = (token_type != Type.OPERATOR or
token.metadata.IsUnaryOperator())
not_dot = token.string != '.'
if is_first and not_binary_operator and not_dot and token.type not in (
Type.COMMENT, Type.DOC_PREFIX, Type.STRING_TEXT):
if flags.FLAGS.debug_indentation:
print 'Line #%d: stack %r' % (token.line_number, stack)
# Ignore lines that start in JsDoc since we don't check them properly yet.
# TODO(robbyw): Support checking JsDoc indentation.
# Ignore lines that start as multi-line strings since indentation is N/A.
# Ignore lines that start with operators since we report that already.
# Ignore lines with tabs since we report that already.
expected = self._GetAllowableIndentations()
actual = self._GetActualIndentation(token)
# Special case comments describing else, case, and default. Allow them
# to outdent to the parent block.
if token_type in Type.COMMENT_TYPES:
next_code = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES)
if next_code and next_code.type == Type.END_BLOCK:
next_code = tokenutil.SearchExcept(next_code, Type.NON_CODE_TYPES)
if next_code and next_code.string in ('else', 'case', 'default'):
# TODO(robbyw): This almost certainly introduces false negatives.
expected |= self._AddToEach(expected, -2)
if actual >= 0 and actual not in expected:
expected = sorted(expected)
indentation_errors.append([
errors.WRONG_INDENTATION,
'Wrong indentation: expected any of {%s} but got %d' % (
', '.join(
['%d' % x for x in expected]), actual),
token,
Position(actual, expected[0])])
self._start_index_offset[token.line_number] = expected[0] - actual
# Add tokens that could increase indentation.
if token_type == Type.START_BRACKET:
self._Add(TokenInfo(token=token,
is_block=token.metadata.context.type == Context.ARRAY_LITERAL))
elif token_type == Type.START_BLOCK or token.metadata.is_implied_block:
self._Add(TokenInfo(token=token, is_block=True))
elif token_type in (Type.START_PAREN, Type.START_PARAMETERS):
self._Add(TokenInfo(token=token, is_block=False))
elif token_type == Type.KEYWORD and token.string == 'return':
self._Add(TokenInfo(token))
elif not token.IsLastInLine() and (
token.IsAssignment() or token.IsOperator('?')):
self._Add(TokenInfo(token=token))
# Handle implied block closes.
if token.metadata.is_implied_block_close:
self._PopToImpliedBlock()
# Add some tokens only if they appear at the end of the line.
is_last = self._IsLastCodeInLine(token)
if is_last:
if token_type == Type.OPERATOR:
if token.string == ':':
if (stack and stack[-1].token.string == '?'):
# When a ternary : is on a different line than its '?', it doesn't
# add indentation.
if (token.line_number == stack[-1].token.line_number):
self._Add(TokenInfo(token))
elif token.metadata.context.type == Context.CASE_BLOCK:
# Pop transient tokens from say, line continuations, e.g.,
# case x.
# y:
# Want to pop the transient 4 space continuation indent.
self._PopTransient()
# Starting the body of the case statement, which is a type of
# block.
self._Add(TokenInfo(token=token, is_block=True))
elif token.metadata.context.type == Context.LITERAL_ELEMENT:
# When in an object literal, acts as operator indicating line
# continuations.
self._Add(TokenInfo(token))
pass
else:
# ':' might also be a statement label, no effect on indentation in
# this case.
pass
elif token.string != ',':
self._Add(TokenInfo(token))
else:
# The token is a comma.
if token.metadata.context.type == Context.VAR:
self._Add(TokenInfo(token))
elif token.metadata.context.type != Context.PARAMETERS:
self._PopTransient()
elif (token.string.endswith('.')
and token_type in (Type.IDENTIFIER, Type.NORMAL)):
self._Add(TokenInfo(token))
elif token_type == Type.PARAMETERS and token.string.endswith(','):
# Parameter lists.
self._Add(TokenInfo(token))
elif token.metadata.is_implied_semicolon:
self._PopTransient()
elif token.IsAssignment():
self._Add(TokenInfo(token))
return indentation_errors
def _AddToEach(self, original, amount):
"""Returns a new set with the given amount added to each element.
Args:
original: The original set of numbers
amount: The amount to add to each element
Returns:
A new set containing each element of the original set added to the amount.
"""
return set([x + amount for x in original])
_HARD_STOP_TYPES = (Type.START_PAREN, Type.START_PARAMETERS,
Type.START_BRACKET)
_HARD_STOP_STRINGS = ('return', '?')
def _IsHardStop(self, token):
"""Determines if the given token can have a hard stop after it.
Hard stops are indentations defined by the position of another token as in
indentation lined up with return, (, [, and ?.
"""
return (token.type in self._HARD_STOP_TYPES or
token.string in self._HARD_STOP_STRINGS or
token.IsAssignment())
def _GetAllowableIndentations(self):
"""Computes the set of allowable indentations.
Returns:
The set of allowable indentations, given the current stack.
"""
expected = set([0])
hard_stops = set([])
# Whether the tokens are still in the same continuation, meaning additional
# indentation is optional. As an example:
# x = 5 +
# 6 +
# 7;
# The second '+' does not add any required indentation.
in_same_continuation = False
for token_info in self._stack:
token = token_info.token
# Handle normal additive indentation tokens.
if not token_info.overridden_by and token.string != 'return':
if token_info.is_block:
expected = self._AddToEach(expected, 2)
hard_stops = self._AddToEach(hard_stops, 2)
in_same_continuation = False
elif in_same_continuation:
expected |= self._AddToEach(expected, 4)
hard_stops |= self._AddToEach(hard_stops, 4)
else:
expected = self._AddToEach(expected, 4)
hard_stops |= self._AddToEach(hard_stops, 4)
in_same_continuation = True
# Handle hard stops after (, [, return, =, and ?
if self._IsHardStop(token):
override_is_hard_stop = (token_info.overridden_by and
self._IsHardStop(token_info.overridden_by.token))
if not override_is_hard_stop:
start_index = token.start_index
if token.line_number in self._start_index_offset:
start_index += self._start_index_offset[token.line_number]
if (token.type in (Type.START_PAREN, Type.START_PARAMETERS) and
not token_info.overridden_by):
hard_stops.add(start_index + 1)
elif token.string == 'return' and not token_info.overridden_by:
hard_stops.add(start_index + 7)
elif (token.type == Type.START_BRACKET):
hard_stops.add(start_index + 1)
elif token.IsAssignment():
hard_stops.add(start_index + len(token.string) + 1)
elif token.IsOperator('?') and not token_info.overridden_by:
hard_stops.add(start_index + 2)
return (expected | hard_stops) or set([0])
def _GetActualIndentation(self, token):
"""Gets the actual indentation of the line containing the given token.
Args:
token: Any token on the line.
Returns:
The actual indentation of the line containing the given token. Returns
-1 if this line should be ignored due to the presence of tabs.
"""
# Move to the first token in the line
token = tokenutil.GetFirstTokenInSameLine(token)
# If it is whitespace, it is the indentation.
if token.type == Type.WHITESPACE:
if token.string.find('\t') >= 0:
return -1
else:
return len(token.string)
elif token.type == Type.PARAMETERS:
return len(token.string) - len(token.string.lstrip())
else:
return 0
def _IsFirstNonWhitespaceTokenInLine(self, token):
"""Determines if the given token is the first non-space token on its line.
Args:
token: The token.
Returns:
True if the token is the first non-whitespace token on its line.
"""
if token.type in (Type.WHITESPACE, Type.BLANK_LINE):
return False
if token.IsFirstInLine():
return True
return (token.previous and token.previous.IsFirstInLine() and
token.previous.type == Type.WHITESPACE)
def _IsLastCodeInLine(self, token):
"""Determines if the given token is the last code token on its line.
Args:
token: The token.
Returns:
True if the token is the last code token on its line.
"""
if token.type in Type.NON_CODE_TYPES:
return False
start_token = token
while True:
token = token.next
if not token or token.line_number != start_token.line_number:
return True
if token.type not in Type.NON_CODE_TYPES:
return False
def _Add(self, token_info):
"""Adds the given token info to the stack.
Args:
token_info: The token information to add.
"""
if self._stack and self._stack[-1].token == token_info.token:
# Don't add the same token twice.
return
if token_info.is_block or token_info.token.type == Type.START_PAREN:
index = 1
while index <= len(self._stack):
stack_info = self._stack[-index]
stack_token = stack_info.token
if stack_info.line_number == token_info.line_number:
# In general, tokens only override each other when they are on
# the same line.
stack_info.overridden_by = token_info
if (token_info.token.type == Type.START_BLOCK and
(stack_token.IsAssignment() or
stack_token.type in (Type.IDENTIFIER, Type.START_PAREN))):
# Multi-line blocks have lasting overrides, as in:
# callFn({
# a: 10
# },
# 30);
close_block = token_info.token.metadata.context.end_token
stack_info.is_permanent_override = \
close_block.line_number != token_info.token.line_number
elif (token_info.token.type == Type.START_BLOCK and
token_info.token.metadata.context.type == Context.BLOCK and
(stack_token.IsAssignment() or
stack_token.type == Type.IDENTIFIER)):
# When starting a function block, the override can transcend lines.
# For example
# long.long.name = function(
# a) {
# In this case the { and the = are on different lines. But the
# override should still apply.
stack_info.overridden_by = token_info
stack_info.is_permanent_override = True
else:
break
index += 1
self._stack.append(token_info)
def _Pop(self):
"""Pops the top token from the stack.
Returns:
The popped token info.
"""
token_info = self._stack.pop()
if token_info.token.type not in (Type.START_BLOCK, Type.START_BRACKET):
# Remove any temporary overrides.
self._RemoveOverrides(token_info)
else:
# For braces and brackets, which can be object and array literals, remove
# overrides when the literal is closed on the same line.
token_check = token_info.token
same_type = token_check.type
goal_type = None
if token_info.token.type == Type.START_BRACKET:
goal_type = Type.END_BRACKET
else:
goal_type = Type.END_BLOCK
line_number = token_info.token.line_number
count = 0
while token_check and token_check.line_number == line_number:
if token_check.type == goal_type:
count -= 1
if not count:
self._RemoveOverrides(token_info)
break
if token_check.type == same_type:
count += 1
token_check = token_check.next
return token_info
def _PopToImpliedBlock(self):
"""Pops the stack until an implied block token is found."""
while not self._Pop().token.metadata.is_implied_block:
pass
def _PopTo(self, stop_type):
"""Pops the stack until a token of the given type is popped.
Args:
stop_type: The type of token to pop to.
Returns:
The token info of the given type that was popped.
"""
last = None
while True:
last = self._Pop()
if last.token.type == stop_type:
break
return last
def _RemoveOverrides(self, token_info):
"""Marks any token that was overridden by this token as active again.
Args:
token_info: The token that is being removed from the stack.
"""
for stack_token in self._stack:
if (stack_token.overridden_by == token_info and
not stack_token.is_permanent_override):
stack_token.overridden_by = None
def _PopTransient(self):
"""Pops all transient tokens - i.e. not blocks, literals, or parens."""
while self._stack and self._stack[-1].is_transient:
self._Pop()

395
tools/closure_linter/closure_linter/javascriptlintrules.py

@ -0,0 +1,395 @@
#!/usr/bin/env python
#
# Copyright 2008 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Methods for checking JS files for common style guide violations.
These style guide violations should only apply to JavaScript and not an Ecma
scripting languages.
"""
__author__ = ('robbyw@google.com (Robert Walker)',
'ajp@google.com (Andy Perelson)',
'jacobr@google.com (Jacob Richman)')
import gflags as flags
from closure_linter import ecmalintrules
from closure_linter import errors
from closure_linter import javascripttokenizer
from closure_linter import javascripttokens
from closure_linter import tokenutil
from closure_linter.common import error
from closure_linter.common import position
FLAGS = flags.FLAGS
flags.DEFINE_list('closurized_namespaces', '',
'Namespace prefixes, used for testing of'
'goog.provide/require')
flags.DEFINE_list('ignored_extra_namespaces', '',
'Fully qualified namespaces that should be not be reported '
'as extra by the linter.')
# Shorthand
Error = error.Error
Position = position.Position
Type = javascripttokens.JavaScriptTokenType
class JavaScriptLintRules(ecmalintrules.EcmaScriptLintRules):
"""JavaScript lint rules that catch JavaScript specific style errors."""
def HandleMissingParameterDoc(self, token, param_name):
"""Handle errors associated with a parameter missing a param tag."""
self._HandleError(errors.MISSING_PARAMETER_DOCUMENTATION,
'Missing docs for parameter: "%s"' % param_name, token)
def __ContainsRecordType(self, token):
"""Check whether the given token contains a record type.
Args:
token: The token being checked
"""
# If we see more than one left-brace in the string of an annotation token,
# then there's a record type in there.
return (token and token.type == Type.DOC_FLAG and
token.attached_object.type is not None and
token.attached_object.type.find('{') != token.string.rfind('{'))
def CheckToken(self, token, state):
"""Checks a token, given the current parser_state, for warnings and errors.
Args:
token: The current token under consideration
state: parser_state object that indicates the current state in the page
"""
if self.__ContainsRecordType(token):
# We should bail out and not emit any warnings for this annotation.
# TODO(nicksantos): Support record types for real.
state.GetDocComment().Invalidate()
return
# Call the base class's CheckToken function.
super(JavaScriptLintRules, self).CheckToken(token, state)
# Store some convenience variables
first_in_line = token.IsFirstInLine()
last_in_line = token.IsLastInLine()
type = token.type
if type == Type.DOC_FLAG:
flag = token.attached_object
if flag.flag_type == 'param' and flag.name_token is not None:
self._CheckForMissingSpaceBeforeToken(
token.attached_object.name_token)
if flag.flag_type in state.GetDocFlag().HAS_TYPE:
# Check for both missing type token and empty type braces '{}'
# Missing suppress types are reported separately and we allow enums
# without types.
if (flag.flag_type not in ('suppress', 'enum') and
(flag.type == None or flag.type == '' or flag.type.isspace())):
self._HandleError(errors.MISSING_JSDOC_TAG_TYPE,
'Missing type in %s tag' % token.string, token)
elif flag.name_token and flag.type_end_token and tokenutil.Compare(
flag.type_end_token, flag.name_token) > 0:
self._HandleError(
errors.OUT_OF_ORDER_JSDOC_TAG_TYPE,
'Type should be immediately after %s tag' % token.string,
token)
elif type == Type.DOUBLE_QUOTE_STRING_START:
next = token.next
while next.type == Type.STRING_TEXT:
if javascripttokenizer.JavaScriptTokenizer.SINGLE_QUOTE.search(
next.string):
break
next = next.next
else:
self._HandleError(
errors.UNNECESSARY_DOUBLE_QUOTED_STRING,
'Single-quoted string preferred over double-quoted string.',
token,
Position.All(token.string))
elif type == Type.END_DOC_COMMENT:
if (FLAGS.strict and not self._is_html and state.InTopLevel() and
not state.InBlock()):
# Check if we're in a fileoverview or constructor JsDoc.
doc_comment = state.GetDocComment()
is_constructor = (doc_comment.HasFlag('constructor') or
doc_comment.HasFlag('interface'))
is_file_overview = doc_comment.HasFlag('fileoverview')
# If the comment is not a file overview, and it does not immediately
# precede some code, skip it.
# NOTE: The tokenutil methods are not used here because of their
# behavior at the top of a file.
next = token.next
if (not next or
(not is_file_overview and next.type in Type.NON_CODE_TYPES)):
return
# Find the start of this block (include comments above the block, unless
# this is a file overview).
block_start = doc_comment.start_token
if not is_file_overview:
token = block_start.previous
while token and token.type in Type.COMMENT_TYPES:
block_start = token
token = token.previous
# Count the number of blank lines before this block.
blank_lines = 0
token = block_start.previous
while token and token.type in [Type.WHITESPACE, Type.BLANK_LINE]:
if token.type == Type.BLANK_LINE:
# A blank line.
blank_lines += 1
elif token.type == Type.WHITESPACE and not token.line.strip():
# A line with only whitespace on it.
blank_lines += 1
token = token.previous
# Log errors.
error_message = False
expected_blank_lines = 0
if is_file_overview and blank_lines == 0:
error_message = 'Should have a blank line before a file overview.'
expected_blank_lines = 1
elif is_constructor and blank_lines != 3:
error_message = ('Should have 3 blank lines before a constructor/'
'interface.')
expected_blank_lines = 3
elif not is_file_overview and not is_constructor and blank_lines != 2:
error_message = 'Should have 2 blank lines between top-level blocks.'
expected_blank_lines = 2
if error_message:
self._HandleError(errors.WRONG_BLANK_LINE_COUNT, error_message,
block_start, Position.AtBeginning(),
expected_blank_lines - blank_lines)
elif type == Type.END_BLOCK:
if state.InFunction() and state.IsFunctionClose():
is_immediately_called = (token.next and
token.next.type == Type.START_PAREN)
function = state.GetFunction()
if not self._limited_doc_checks:
if (function.has_return and function.doc and
not is_immediately_called and
not function.doc.HasFlag('return') and
not function.doc.InheritsDocumentation() and
not function.doc.HasFlag('constructor')):
# Check for proper documentation of return value.
self._HandleError(
errors.MISSING_RETURN_DOCUMENTATION,
'Missing @return JsDoc in function with non-trivial return',
function.doc.end_token, Position.AtBeginning())
elif (not function.has_return and function.doc and
function.doc.HasFlag('return') and
not state.InInterfaceMethod()):
return_flag = function.doc.GetFlag('return')
if (return_flag.type is None or (
'undefined' not in return_flag.type and
'void' not in return_flag.type and
'*' not in return_flag.type)):
self._HandleError(
errors.UNNECESSARY_RETURN_DOCUMENTATION,
'Found @return JsDoc on function that returns nothing',
return_flag.flag_token, Position.AtBeginning())
if state.InFunction() and state.IsFunctionClose():
is_immediately_called = (token.next and
token.next.type == Type.START_PAREN)
if (function.has_this and function.doc and
not function.doc.HasFlag('this') and
not function.is_constructor and
not function.is_interface and
'.prototype.' not in function.name):
self._HandleError(
errors.MISSING_JSDOC_TAG_THIS,
'Missing @this JsDoc in function referencing "this". ('
'this usually means you are trying to reference "this" in '
'a static function, or you have forgotten to mark a '
'constructor with @constructor)',
function.doc.end_token, Position.AtBeginning())
elif type == Type.IDENTIFIER:
if token.string == 'goog.inherits' and not state.InFunction():
if state.GetLastNonSpaceToken().line_number == token.line_number:
self._HandleError(
errors.MISSING_LINE,
'Missing newline between constructor and goog.inherits',
token,
Position.AtBeginning())
extra_space = state.GetLastNonSpaceToken().next
while extra_space != token:
if extra_space.type == Type.BLANK_LINE:
self._HandleError(
errors.EXTRA_LINE,
'Extra line between constructor and goog.inherits',
extra_space)
extra_space = extra_space.next
# TODO(robbyw): Test the last function was a constructor.
# TODO(robbyw): Test correct @extends and @implements documentation.
elif type == Type.OPERATOR:
# If the token is unary and appears to be used in a unary context
# it's ok. Otherwise, if it's at the end of the line or immediately
# before a comment, it's ok.
# Don't report an error before a start bracket - it will be reported
# by that token's space checks.
if (not token.metadata.IsUnaryOperator() and not last_in_line
and not token.next.IsComment()
and not token.next.IsOperator(',')
and not token.next.type in (Type.WHITESPACE, Type.END_PAREN,
Type.END_BRACKET, Type.SEMICOLON,
Type.START_BRACKET)):
self._HandleError(
errors.MISSING_SPACE,
'Missing space after "%s"' % token.string,
token,
Position.AtEnd(token.string))
elif type == Type.WHITESPACE:
# Check whitespace length if it's not the first token of the line and
# if it's not immediately before a comment.
if not last_in_line and not first_in_line and not token.next.IsComment():
# Ensure there is no space after opening parentheses.
if (token.previous.type in (Type.START_PAREN, Type.START_BRACKET,
Type.FUNCTION_NAME)
or token.next.type == Type.START_PARAMETERS):
self._HandleError(
errors.EXTRA_SPACE,
'Extra space after "%s"' % token.previous.string,
token,
Position.All(token.string))
def Finalize(self, state, tokenizer_mode):
"""Perform all checks that need to occur after all lines are processed."""
# Call the base class's Finalize function.
super(JavaScriptLintRules, self).Finalize(state, tokenizer_mode)
# Check for sorted requires statements.
goog_require_tokens = state.GetGoogRequireTokens()
requires = [require_token.string for require_token in goog_require_tokens]
sorted_requires = sorted(requires)
index = 0
bad = False
for item in requires:
if item != sorted_requires[index]:
bad = True
break
index += 1
if bad:
self._HandleError(
errors.GOOG_REQUIRES_NOT_ALPHABETIZED,
'goog.require classes must be alphabetized. The correct code is:\n' +
'\n'.join(map(lambda x: 'goog.require(\'%s\');' % x,
sorted_requires)),
goog_require_tokens[index],
position=Position.AtBeginning(),
fix_data=goog_require_tokens)
# Check for sorted provides statements.
goog_provide_tokens = state.GetGoogProvideTokens()
provides = [provide_token.string for provide_token in goog_provide_tokens]
sorted_provides = sorted(provides)
index = 0
bad = False
for item in provides:
if item != sorted_provides[index]:
bad = True
break
index += 1
if bad:
self._HandleError(
errors.GOOG_PROVIDES_NOT_ALPHABETIZED,
'goog.provide classes must be alphabetized. The correct code is:\n' +
'\n'.join(map(lambda x: 'goog.provide(\'%s\');' % x,
sorted_provides)),
goog_provide_tokens[index],
position=Position.AtBeginning(),
fix_data=goog_provide_tokens)
if FLAGS.closurized_namespaces:
# Check that we provide everything we need.
provided_namespaces = state.GetProvidedNamespaces()
missing_provides = provided_namespaces - set(provides)
if missing_provides:
self._HandleError(
errors.MISSING_GOOG_PROVIDE,
'Missing the following goog.provide statements:\n' +
'\n'.join(map(lambda x: 'goog.provide(\'%s\');' % x,
sorted(missing_provides))),
state.GetFirstToken(), position=Position.AtBeginning(),
fix_data=missing_provides)
# Compose a set of all available namespaces. Explicitly omit goog
# because if you can call goog.require, you already have goog.
available_namespaces = (set(requires) | set(provides) | set(['goog']) |
provided_namespaces)
# Check that we require everything we need.
missing_requires = set()
for namespace_variants in state.GetUsedNamespaces():
# Namespace variants is a list of potential things to require. If we
# find we're missing one, we are lazy and choose to require the first
# in the sequence - which should be the namespace.
if not set(namespace_variants) & available_namespaces:
missing_requires.add(namespace_variants[0])
if missing_requires:
self._HandleError(
errors.MISSING_GOOG_REQUIRE,
'Missing the following goog.require statements:\n' +
'\n'.join(map(lambda x: 'goog.require(\'%s\');' % x,
sorted(missing_requires))),
state.GetFirstToken(), position=Position.AtBeginning(),
fix_data=missing_requires)
# Check that we don't require things we don't actually use.
namespace_variants = state.GetUsedNamespaces()
used_namespaces = set()
for a, b in namespace_variants:
used_namespaces.add(a)
used_namespaces.add(b)
extra_requires = set()
for i in requires:
baseNamespace = i.split('.')[0]
if (i not in used_namespaces and
baseNamespace in FLAGS.closurized_namespaces and
i not in FLAGS.ignored_extra_namespaces):
extra_requires.add(i)
if extra_requires:
self._HandleError(
errors.EXTRA_GOOG_REQUIRE,
'The following goog.require statements appear unnecessary:\n' +
'\n'.join(map(lambda x: 'goog.require(\'%s\');' % x,
sorted(extra_requires))),
state.GetFirstToken(), position=Position.AtBeginning(),
fix_data=extra_requires)

238
tools/closure_linter/closure_linter/javascriptstatetracker.py

@ -0,0 +1,238 @@
#!/usr/bin/env python
#
# Copyright 2008 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Parser for JavaScript files."""
from closure_linter import javascripttokens
from closure_linter import statetracker
from closure_linter import tokenutil
# Shorthand
Type = javascripttokens.JavaScriptTokenType
class JsDocFlag(statetracker.DocFlag):
"""Javascript doc flag object.
Attribute:
flag_type: param, return, define, type, etc.
flag_token: The flag token.
type_start_token: The first token specifying the flag JS type,
including braces.
type_end_token: The last token specifying the flag JS type,
including braces.
type: The JavaScript type spec.
name_token: The token specifying the flag name.
name: The flag name
description_start_token: The first token in the description.
description_end_token: The end token in the description.
description: The description.
"""
# Please keep these lists alphabetized.
# Some projects use the following extensions to JsDoc.
# TODO(robbyw): determine which of these, if any, should be illegal.
EXTENDED_DOC = frozenset([
'class', 'code', 'desc', 'final', 'hidden', 'inheritDoc', 'link',
'protected', 'notypecheck', 'throws'])
LEGAL_DOC = EXTENDED_DOC | statetracker.DocFlag.LEGAL_DOC
def __init__(self, flag_token):
"""Creates the JsDocFlag object and attaches it to the given start token.
Args:
flag_token: The starting token of the flag.
"""
statetracker.DocFlag.__init__(self, flag_token)
class JavaScriptStateTracker(statetracker.StateTracker):
"""JavaScript state tracker.
Inherits from the core EcmaScript StateTracker adding extra state tracking
functionality needed for JavaScript.
"""
def __init__(self, closurized_namespaces=''):
"""Initializes a JavaScript token stream state tracker.
Args:
closurized_namespaces: An optional list of namespace prefixes used for
testing of goog.provide/require.
"""
statetracker.StateTracker.__init__(self, JsDocFlag)
self.__closurized_namespaces = closurized_namespaces
def Reset(self):
"""Resets the state tracker to prepare for processing a new page."""
super(JavaScriptStateTracker, self).Reset()
self.__goog_require_tokens = []
self.__goog_provide_tokens = []
self.__provided_namespaces = set()
self.__used_namespaces = []
def InTopLevel(self):
"""Compute whether we are at the top level in the class.
This function call is language specific. In some languages like
JavaScript, a function is top level if it is not inside any parenthesis.
In languages such as ActionScript, a function is top level if it is directly
within a class.
Returns:
Whether we are at the top level in the class.
"""
return not self.InParentheses()
def GetGoogRequireTokens(self):
"""Returns list of require tokens."""
return self.__goog_require_tokens
def GetGoogProvideTokens(self):
"""Returns list of provide tokens."""
return self.__goog_provide_tokens
def GetProvidedNamespaces(self):
"""Returns list of provided namespaces."""
return self.__provided_namespaces
def GetUsedNamespaces(self):
"""Returns list of used namespaces, is a list of sequences."""
return self.__used_namespaces
def GetBlockType(self, token):
"""Determine the block type given a START_BLOCK token.
Code blocks come after parameters, keywords like else, and closing parens.
Args:
token: The current token. Can be assumed to be type START_BLOCK
Returns:
Code block type for current token.
"""
last_code = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES, None,
True)
if last_code.type in (Type.END_PARAMETERS, Type.END_PAREN,
Type.KEYWORD) and not last_code.IsKeyword('return'):
return self.CODE
else:
return self.OBJECT_LITERAL
def HandleToken(self, token, last_non_space_token):
"""Handles the given token and updates state.
Args:
token: The token to handle.
last_non_space_token:
"""
super(JavaScriptStateTracker, self).HandleToken(token,
last_non_space_token)
if token.IsType(Type.IDENTIFIER):
if token.string == 'goog.require':
class_token = tokenutil.Search(token, Type.STRING_TEXT)
self.__goog_require_tokens.append(class_token)
elif token.string == 'goog.provide':
class_token = tokenutil.Search(token, Type.STRING_TEXT)
self.__goog_provide_tokens.append(class_token)
elif self.__closurized_namespaces:
self.__AddUsedNamespace(token.string)
if token.IsType(Type.SIMPLE_LVALUE) and not self.InFunction():
identifier = token.values['identifier']
if self.__closurized_namespaces:
namespace = self.GetClosurizedNamespace(identifier)
if namespace and identifier == namespace:
self.__provided_namespaces.add(namespace)
if (self.__closurized_namespaces and
token.IsType(Type.DOC_FLAG) and
token.attached_object.flag_type == 'implements'):
# Interfaces should be goog.require'd.
doc_start = tokenutil.Search(token, Type.DOC_START_BRACE)
interface = tokenutil.Search(doc_start, Type.COMMENT)
self.__AddUsedNamespace(interface.string)
def __AddUsedNamespace(self, identifier):
"""Adds the namespace of an identifier to the list of used namespaces.
Args:
identifier: An identifier which has been used.
"""
namespace = self.GetClosurizedNamespace(identifier)
if namespace:
# We add token.string as a 'namespace' as it is something that could
# potentially be provided to satisfy this dependency.
self.__used_namespaces.append([namespace, identifier])
def GetClosurizedNamespace(self, identifier):
"""Given an identifier, returns the namespace that identifier is from.
Args:
identifier: The identifier to extract a namespace from.
Returns:
The namespace the given identifier resides in, or None if one could not
be found.
"""
parts = identifier.split('.')
for part in parts:
if part.endswith('_'):
# Ignore private variables / inner classes.
return None
if identifier.startswith('goog.global'):
# Ignore goog.global, since it is, by definition, global.
return None
for namespace in self.__closurized_namespaces:
if identifier.startswith(namespace + '.'):
last_part = parts[-1]
if not last_part:
# TODO(robbyw): Handle this: it's a multi-line identifier.
return None
if last_part in ('apply', 'inherits', 'call'):
# Calling one of Function's methods usually indicates use of a
# superclass.
parts.pop()
last_part = parts[-1]
for i in xrange(1, len(parts)):
part = parts[i]
if part.isupper():
# If an identifier is of the form foo.bar.BAZ.x or foo.bar.BAZ,
# the namespace is foo.bar.
return '.'.join(parts[:i])
if part == 'prototype':
# If an identifier is of the form foo.bar.prototype.x, the
# namespace is foo.bar.
return '.'.join(parts[:i])
if last_part.isupper() or not last_part[0].isupper():
# Strip off the last part of an enum or constant reference.
parts.pop()
return '.'.join(parts)
return None

53
tools/closure_linter/closure_linter/javascriptstatetracker_test.py

@ -0,0 +1,53 @@
#!/usr/bin/env python
#
# Copyright 2010 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Unit tests for JavaScriptStateTracker."""
import unittest as googletest
from closure_linter import javascriptstatetracker
class JavaScriptStateTrackerTest(googletest.TestCase):
__test_cases = {
'package.CONSTANT' : 'package',
'package.methodName' : 'package',
'package.subpackage.methodName' : 'package.subpackage',
'package.ClassName.something' : 'package.ClassName',
'package.ClassName.Enum.VALUE.methodName' : 'package.ClassName.Enum',
'package.ClassName.CONSTANT' : 'package.ClassName',
'package.ClassName.inherits' : 'package.ClassName',
'package.ClassName.apply' : 'package.ClassName',
'package.ClassName.methodName.apply' : 'package.ClassName',
'package.ClassName.methodName.call' : 'package.ClassName',
'package.ClassName.prototype.methodName' : 'package.ClassName',
'package.ClassName.privateMethod_' : None,
'package.ClassName.prototype.methodName.apply' : 'package.ClassName'
}
def testGetClosurizedNamespace(self):
stateTracker = javascriptstatetracker.JavaScriptStateTracker(['package'])
for identifier, expected_namespace in self.__test_cases.items():
actual_namespace = stateTracker.GetClosurizedNamespace(identifier)
self.assertEqual(expected_namespace, actual_namespace,
'expected namespace "' + str(expected_namespace) +
'" for identifier "' + str(identifier) + '" but was "' +
str(actual_namespace) + '"')
if __name__ == '__main__':
googletest.main()

365
tools/closure_linter/closure_linter/javascripttokenizer.py

@ -0,0 +1,365 @@
#!/usr/bin/env python
#
# Copyright 2007 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Regular expression based JavaScript parsing classes."""
__author__ = ('robbyw@google.com (Robert Walker)',
'ajp@google.com (Andy Perelson)')
import copy
import re
from closure_linter import javascripttokens
from closure_linter.common import matcher
from closure_linter.common import tokenizer
# Shorthand
Type = javascripttokens.JavaScriptTokenType
Matcher = matcher.Matcher
class JavaScriptModes(object):
"""Enumeration of the different matcher modes used for JavaScript."""
TEXT_MODE = 'text'
SINGLE_QUOTE_STRING_MODE = 'single_quote_string'
DOUBLE_QUOTE_STRING_MODE = 'double_quote_string'
BLOCK_COMMENT_MODE = 'block_comment'
DOC_COMMENT_MODE = 'doc_comment'
DOC_COMMENT_LEX_SPACES_MODE = 'doc_comment_spaces'
LINE_COMMENT_MODE = 'line_comment'
PARAMETER_MODE = 'parameter'
FUNCTION_MODE = 'function'
class JavaScriptTokenizer(tokenizer.Tokenizer):
"""JavaScript tokenizer.
Convert JavaScript code in to an array of tokens.
"""
# Useful patterns for JavaScript parsing.
IDENTIFIER_CHAR = r'A-Za-z0-9_$.';
# Number patterns based on:
# http://www.mozilla.org/js/language/js20-2000-07/formal/lexer-grammar.html
MANTISSA = r"""
(\d+(?!\.)) | # Matches '10'
(\d+\.(?!\d)) | # Matches '10.'
(\d*\.\d+) # Matches '.5' or '10.5'
"""
DECIMAL_LITERAL = r'(%s)([eE][-+]?\d+)?' % MANTISSA
HEX_LITERAL = r'0[xX][0-9a-fA-F]+'
NUMBER = re.compile(r"""
((%s)|(%s))
""" % (HEX_LITERAL, DECIMAL_LITERAL), re.VERBOSE)
# Strings come in three parts - first we match the start of the string, then
# the contents, then the end. The contents consist of any character except a
# backslash or end of string, or a backslash followed by any character, or a
# backslash followed by end of line to support correct parsing of multi-line
# strings.
SINGLE_QUOTE = re.compile(r"'")
SINGLE_QUOTE_TEXT = re.compile(r"([^'\\]|\\(.|$))+")
DOUBLE_QUOTE = re.compile(r'"')
DOUBLE_QUOTE_TEXT = re.compile(r'([^"\\]|\\(.|$))+')
START_SINGLE_LINE_COMMENT = re.compile(r'//')
END_OF_LINE_SINGLE_LINE_COMMENT = re.compile(r'//$')
START_DOC_COMMENT = re.compile(r'/\*\*')
START_BLOCK_COMMENT = re.compile(r'/\*')
END_BLOCK_COMMENT = re.compile(r'\*/')
BLOCK_COMMENT_TEXT = re.compile(r'([^*]|\*(?!/))+')
# Comment text is anything that we are not going to parse into another special
# token like (inline) flags or end comments. Complicated regex to match
# most normal characters, and '*', '{', '}', and '@' when we are sure that
# it is safe. Expression [^*{\s]@ must come first, or the other options will
# match everything before @, and we won't match @'s that aren't part of flags
# like in email addresses in the @author tag.
DOC_COMMENT_TEXT = re.compile(r'([^*{}\s]@|[^*{}@]|\*(?!/))+')
DOC_COMMENT_NO_SPACES_TEXT = re.compile(r'([^*{}\s]@|[^*{}@\s]|\*(?!/))+')
# Match the prefix ' * ' that starts every line of jsdoc. Want to include
# spaces after the '*', but nothing else that occurs after a '*', and don't
# want to match the '*' in '*/'.
DOC_PREFIX = re.compile(r'\s*\*(\s+|(?!/))')
START_BLOCK = re.compile('{')
END_BLOCK = re.compile('}')
REGEX_CHARACTER_CLASS = r"""
\[ # Opening bracket
([^\]\\]|\\.)* # Anything but a ] or \,
# or a backslash followed by anything
\] # Closing bracket
"""
# We ensure the regex is followed by one of the above tokens to avoid
# incorrectly parsing something like x / y / z as x REGEX(/ y /) z
POST_REGEX_LIST = [
';', ',', r'\.', r'\)', r'\]', '$', r'\/\/', r'\/\*', ':', '}']
REGEX = re.compile(r"""
/ # opening slash
(?!\*) # not the start of a comment
(\\.|[^\[\/\\]|(%s))* # a backslash followed by anything,
# or anything but a / or [ or \,
# or a character class
/ # closing slash
[gimsx]* # optional modifiers
(?=\s*(%s))
""" % (REGEX_CHARACTER_CLASS, '|'.join(POST_REGEX_LIST)),
re.VERBOSE)
ANYTHING = re.compile(r'.*')
PARAMETERS = re.compile(r'[^\)]+')
CLOSING_PAREN_WITH_SPACE = re.compile(r'\)\s*')
FUNCTION_DECLARATION = re.compile(r'\bfunction\b')
OPENING_PAREN = re.compile(r'\(')
CLOSING_PAREN = re.compile(r'\)')
OPENING_BRACKET = re.compile(r'\[')
CLOSING_BRACKET = re.compile(r'\]')
# We omit these JS keywords from the list:
# function - covered by FUNCTION_DECLARATION.
# delete, in, instanceof, new, typeof - included as operators.
# this - included in identifiers.
# null, undefined - not included, should go in some "special constant" list.
KEYWORD_LIST = ['break', 'case', 'catch', 'continue', 'default', 'do', 'else',
'finally', 'for', 'if', 'return', 'switch', 'throw', 'try', 'var',
'while', 'with']
# Match a keyword string followed by a non-identifier character in order to
# not match something like doSomething as do + Something.
KEYWORD = re.compile('(%s)((?=[^%s])|$)' % (
'|'.join(KEYWORD_LIST), IDENTIFIER_CHAR))
# List of regular expressions to match as operators. Some notes: for our
# purposes, the comma behaves similarly enough to a normal operator that we
# include it here. r'\bin\b' actually matches 'in' surrounded by boundary
# characters - this may not match some very esoteric uses of the in operator.
# Operators that are subsets of larger operators must come later in this list
# for proper matching, e.g., '>>' must come AFTER '>>>'.
OPERATOR_LIST = [',', r'\+\+', '===', '!==', '>>>=', '>>>', '==', '>=', '<=',
'!=', '<<=', '>>=', '<<', '>>', '>', '<', r'\+=', r'\+',
'--', '\^=', '-=', '-', '/=', '/', r'\*=', r'\*', '%=', '%',
'&&', r'\|\|', '&=', '&', r'\|=', r'\|', '=', '!', ':', '\?',
r'\bdelete\b', r'\bin\b', r'\binstanceof\b', r'\bnew\b',
r'\btypeof\b', r'\bvoid\b']
OPERATOR = re.compile('|'.join(OPERATOR_LIST))
WHITESPACE = re.compile(r'\s+')
SEMICOLON = re.compile(r';')
# Technically JavaScript identifiers can't contain '.', but we treat a set of
# nested identifiers as a single identifier.
NESTED_IDENTIFIER = r'[a-zA-Z_$][%s.]*' % IDENTIFIER_CHAR
IDENTIFIER = re.compile(NESTED_IDENTIFIER)
SIMPLE_LVALUE = re.compile(r"""
(?P<identifier>%s) # a valid identifier
(?=\s* # optional whitespace
\= # look ahead to equal sign
(?!=)) # not follwed by equal
""" % NESTED_IDENTIFIER, re.VERBOSE)
# A doc flag is a @ sign followed by non-space characters that appears at the
# beginning of the line, after whitespace, or after a '{'. The look-behind
# check is necessary to not match someone@google.com as a flag.
DOC_FLAG = re.compile(r'(^|(?<=\s))@(?P<name>[a-zA-Z]+)')
# To properly parse parameter names, we need to tokenize whitespace into a
# token.
DOC_FLAG_LEX_SPACES = re.compile(r'(^|(?<=\s))@(?P<name>%s)\b' %
'|'.join(['param']))
DOC_INLINE_FLAG = re.compile(r'(?<={)@(?P<name>[a-zA-Z]+)')
# Star followed by non-slash, i.e a star that does not end a comment.
# This is used for TYPE_GROUP below.
SAFE_STAR = r'(\*(?!/))'
COMMON_DOC_MATCHERS = [
# Find the end of the comment.
Matcher(END_BLOCK_COMMENT, Type.END_DOC_COMMENT,
JavaScriptModes.TEXT_MODE),
# Tokenize documented flags like @private.
Matcher(DOC_INLINE_FLAG, Type.DOC_INLINE_FLAG),
Matcher(DOC_FLAG_LEX_SPACES, Type.DOC_FLAG,
JavaScriptModes.DOC_COMMENT_LEX_SPACES_MODE),
Matcher(DOC_FLAG, Type.DOC_FLAG),
# Tokenize braces so we can find types.
Matcher(START_BLOCK, Type.DOC_START_BRACE),
Matcher(END_BLOCK, Type.DOC_END_BRACE),
Matcher(DOC_PREFIX, Type.DOC_PREFIX, None, True)]
# The token matcher groups work as follows: it is an list of Matcher objects.
# The matchers will be tried in this order, and the first to match will be
# returned. Hence the order is important because the matchers that come first
# overrule the matchers that come later.
JAVASCRIPT_MATCHERS = {
# Matchers for basic text mode.
JavaScriptModes.TEXT_MODE: [
# Check a big group - strings, starting comments, and regexes - all
# of which could be intertwined. 'string with /regex/',
# /regex with 'string'/, /* comment with /regex/ and string */ (and so on)
Matcher(START_DOC_COMMENT, Type.START_DOC_COMMENT,
JavaScriptModes.DOC_COMMENT_MODE),
Matcher(START_BLOCK_COMMENT, Type.START_BLOCK_COMMENT,
JavaScriptModes.BLOCK_COMMENT_MODE),
Matcher(END_OF_LINE_SINGLE_LINE_COMMENT,
Type.START_SINGLE_LINE_COMMENT),
Matcher(START_SINGLE_LINE_COMMENT, Type.START_SINGLE_LINE_COMMENT,
JavaScriptModes.LINE_COMMENT_MODE),
Matcher(SINGLE_QUOTE, Type.SINGLE_QUOTE_STRING_START,
JavaScriptModes.SINGLE_QUOTE_STRING_MODE),
Matcher(DOUBLE_QUOTE, Type.DOUBLE_QUOTE_STRING_START,
JavaScriptModes.DOUBLE_QUOTE_STRING_MODE),
Matcher(REGEX, Type.REGEX),
# Next we check for start blocks appearing outside any of the items above.
Matcher(START_BLOCK, Type.START_BLOCK),
Matcher(END_BLOCK, Type.END_BLOCK),
# Then we search for function declarations.
Matcher(FUNCTION_DECLARATION, Type.FUNCTION_DECLARATION,
JavaScriptModes.FUNCTION_MODE),
# Next, we convert non-function related parens to tokens.
Matcher(OPENING_PAREN, Type.START_PAREN),
Matcher(CLOSING_PAREN, Type.END_PAREN),
# Next, we convert brackets to tokens.
Matcher(OPENING_BRACKET, Type.START_BRACKET),
Matcher(CLOSING_BRACKET, Type.END_BRACKET),
# Find numbers. This has to happen before operators because scientific
# notation numbers can have + and - in them.
Matcher(NUMBER, Type.NUMBER),
# Find operators and simple assignments
Matcher(SIMPLE_LVALUE, Type.SIMPLE_LVALUE),
Matcher(OPERATOR, Type.OPERATOR),
# Find key words and whitespace
Matcher(KEYWORD, Type.KEYWORD),
Matcher(WHITESPACE, Type.WHITESPACE),
# Find identifiers
Matcher(IDENTIFIER, Type.IDENTIFIER),
# Finally, we convert semicolons to tokens.
Matcher(SEMICOLON, Type.SEMICOLON)],
# Matchers for single quote strings.
JavaScriptModes.SINGLE_QUOTE_STRING_MODE: [
Matcher(SINGLE_QUOTE_TEXT, Type.STRING_TEXT),
Matcher(SINGLE_QUOTE, Type.SINGLE_QUOTE_STRING_END,
JavaScriptModes.TEXT_MODE)],
# Matchers for double quote strings.
JavaScriptModes.DOUBLE_QUOTE_STRING_MODE: [
Matcher(DOUBLE_QUOTE_TEXT, Type.STRING_TEXT),
Matcher(DOUBLE_QUOTE, Type.DOUBLE_QUOTE_STRING_END,
JavaScriptModes.TEXT_MODE)],
# Matchers for block comments.
JavaScriptModes.BLOCK_COMMENT_MODE: [
# First we check for exiting a block comment.
Matcher(END_BLOCK_COMMENT, Type.END_BLOCK_COMMENT,
JavaScriptModes.TEXT_MODE),
# Match non-comment-ending text..
Matcher(BLOCK_COMMENT_TEXT, Type.COMMENT)],
# Matchers for doc comments.
JavaScriptModes.DOC_COMMENT_MODE: COMMON_DOC_MATCHERS + [
Matcher(DOC_COMMENT_TEXT, Type.COMMENT)],
JavaScriptModes.DOC_COMMENT_LEX_SPACES_MODE: COMMON_DOC_MATCHERS + [
Matcher(WHITESPACE, Type.COMMENT),
Matcher(DOC_COMMENT_NO_SPACES_TEXT, Type.COMMENT)],
# Matchers for single line comments.
JavaScriptModes.LINE_COMMENT_MODE: [
# We greedy match until the end of the line in line comment mode.
Matcher(ANYTHING, Type.COMMENT, JavaScriptModes.TEXT_MODE)],
# Matchers for code after the function keyword.
JavaScriptModes.FUNCTION_MODE: [
# Must match open paren before anything else and move into parameter mode,
# otherwise everything inside the parameter list is parsed incorrectly.
Matcher(OPENING_PAREN, Type.START_PARAMETERS,
JavaScriptModes.PARAMETER_MODE),
Matcher(WHITESPACE, Type.WHITESPACE),
Matcher(IDENTIFIER, Type.FUNCTION_NAME)],
# Matchers for function parameters
JavaScriptModes.PARAMETER_MODE: [
# When in function parameter mode, a closing paren is treated specially.
# Everything else is treated as lines of parameters.
Matcher(CLOSING_PAREN_WITH_SPACE, Type.END_PARAMETERS,
JavaScriptModes.TEXT_MODE),
Matcher(PARAMETERS, Type.PARAMETERS, JavaScriptModes.PARAMETER_MODE)]}
# When text is not matched, it is given this default type based on mode.
# If unspecified in this map, the default default is Type.NORMAL.
JAVASCRIPT_DEFAULT_TYPES = {
JavaScriptModes.DOC_COMMENT_MODE: Type.COMMENT,
JavaScriptModes.DOC_COMMENT_LEX_SPACES_MODE: Type.COMMENT
}
def __init__(self, parse_js_doc = True):
"""Create a tokenizer object.
Args:
parse_js_doc: Whether to do detailed parsing of javascript doc comments,
or simply treat them as normal comments. Defaults to parsing JsDoc.
"""
matchers = self.JAVASCRIPT_MATCHERS
if not parse_js_doc:
# Make a copy so the original doesn't get modified.
matchers = copy.deepcopy(matchers)
matchers[JavaScriptModes.DOC_COMMENT_MODE] = matchers[
JavaScriptModes.BLOCK_COMMENT_MODE]
tokenizer.Tokenizer.__init__(self, JavaScriptModes.TEXT_MODE, matchers,
self.JAVASCRIPT_DEFAULT_TYPES)
def _CreateToken(self, string, token_type, line, line_number, values=None):
"""Creates a new JavaScriptToken object.
Args:
string: The string of input the token contains.
token_type: The type of token.
line: The text of the line this token is in.
line_number: The line number of the token.
values: A dict of named values within the token. For instance, a
function declaration may have a value called 'name' which captures the
name of the function.
"""
return javascripttokens.JavaScriptToken(string, token_type, line,
line_number, values)

147
tools/closure_linter/closure_linter/javascripttokens.py

@ -0,0 +1,147 @@
#!/usr/bin/env python
#
# Copyright 2008 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Classes to represent JavaScript tokens."""
__author__ = ('robbyw@google.com (Robert Walker)',
'ajp@google.com (Andy Perelson)')
from closure_linter.common import tokens
class JavaScriptTokenType(tokens.TokenType):
"""Enumeration of JavaScript token types, and useful sets of token types."""
NUMBER = 'number'
START_SINGLE_LINE_COMMENT = '//'
START_BLOCK_COMMENT = '/*'
START_DOC_COMMENT = '/**'
END_BLOCK_COMMENT = '*/'
END_DOC_COMMENT = 'doc */'
COMMENT = 'comment'
SINGLE_QUOTE_STRING_START = "'string"
SINGLE_QUOTE_STRING_END = "string'"
DOUBLE_QUOTE_STRING_START = '"string'
DOUBLE_QUOTE_STRING_END = 'string"'
STRING_TEXT = 'string'
START_BLOCK = '{'
END_BLOCK = '}'
START_PAREN = '('
END_PAREN = ')'
START_BRACKET = '['
END_BRACKET = ']'
REGEX = '/regex/'
FUNCTION_DECLARATION = 'function(...)'
FUNCTION_NAME = 'function functionName(...)'
START_PARAMETERS = 'startparams('
PARAMETERS = 'pa,ra,ms'
END_PARAMETERS = ')endparams'
SEMICOLON = ';'
DOC_FLAG = '@flag'
DOC_INLINE_FLAG = '{@flag ...}'
DOC_START_BRACE = 'doc {'
DOC_END_BRACE = 'doc }'
DOC_PREFIX = 'comment prefix: * '
SIMPLE_LVALUE = 'lvalue='
KEYWORD = 'keyword'
OPERATOR = 'operator'
IDENTIFIER = 'identifier'
STRING_TYPES = frozenset([
SINGLE_QUOTE_STRING_START, SINGLE_QUOTE_STRING_END,
DOUBLE_QUOTE_STRING_START, DOUBLE_QUOTE_STRING_END, STRING_TEXT])
COMMENT_TYPES = frozenset([START_SINGLE_LINE_COMMENT, COMMENT,
START_BLOCK_COMMENT, START_DOC_COMMENT,
END_BLOCK_COMMENT, END_DOC_COMMENT,
DOC_START_BRACE, DOC_END_BRACE,
DOC_FLAG, DOC_INLINE_FLAG, DOC_PREFIX])
FLAG_DESCRIPTION_TYPES = frozenset([
DOC_INLINE_FLAG, COMMENT, DOC_START_BRACE, DOC_END_BRACE])
FLAG_ENDING_TYPES = frozenset([DOC_FLAG, END_DOC_COMMENT])
NON_CODE_TYPES = COMMENT_TYPES | frozenset([
tokens.TokenType.WHITESPACE, tokens.TokenType.BLANK_LINE])
UNARY_OPERATORS = ['!', 'new', 'delete', 'typeof', 'void']
UNARY_OK_OPERATORS = ['--', '++', '-', '+'] + UNARY_OPERATORS
UNARY_POST_OPERATORS = ['--', '++']
# An expression ender is any token that can end an object - i.e. we could have
# x.y or [1, 2], or (10 + 9) or {a: 10}.
EXPRESSION_ENDER_TYPES = [tokens.TokenType.NORMAL, IDENTIFIER, NUMBER,
SIMPLE_LVALUE, END_BRACKET, END_PAREN, END_BLOCK,
SINGLE_QUOTE_STRING_END, DOUBLE_QUOTE_STRING_END]
class JavaScriptToken(tokens.Token):
"""JavaScript token subclass of Token, provides extra instance checks.
The following token types have data in attached_object:
- All JsDoc flags: a parser.JsDocFlag object.
"""
def IsKeyword(self, keyword):
"""Tests if this token is the given keyword.
Args:
keyword: The keyword to compare to.
Returns:
True if this token is a keyword token with the given name.
"""
return self.type == JavaScriptTokenType.KEYWORD and self.string == keyword
def IsOperator(self, operator):
"""Tests if this token is the given operator.
Args:
operator: The operator to compare to.
Returns:
True if this token is a operator token with the given name.
"""
return self.type == JavaScriptTokenType.OPERATOR and self.string == operator
def IsAssignment(self):
"""Tests if this token is an assignment operator.
Returns:
True if this token is an assignment operator.
"""
return (self.type == JavaScriptTokenType.OPERATOR and
self.string.endswith('=') and
self.string not in ('==', '!=', '>=', '<=', '===', '!=='))
def IsComment(self):
"""Tests if this token is any part of a comment.
Returns:
True if this token is any part of a comment.
"""
return self.type in JavaScriptTokenType.COMMENT_TYPES
def IsCode(self):
"""Tests if this token is code, as opposed to a comment or whitespace."""
return self.type not in JavaScriptTokenType.NON_CODE_TYPES
def __repr__(self):
return '<JavaScriptToken: %d, %s, "%s", %r, %r>' % (self.line_number,
self.type, self.string,
self.values,
self.metadata)

964
tools/closure_linter/closure_linter/statetracker.py

@ -0,0 +1,964 @@
#!/usr/bin/env python
#
# Copyright 2007 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Light weight EcmaScript state tracker that reads tokens and tracks state."""
__author__ = ('robbyw@google.com (Robert Walker)',
'ajp@google.com (Andy Perelson)')
import re
from closure_linter import javascripttokenizer
from closure_linter import javascripttokens
from closure_linter import tokenutil
# Shorthand
Type = javascripttokens.JavaScriptTokenType
class DocFlag(object):
"""Generic doc flag object.
Attribute:
flag_type: param, return, define, type, etc.
flag_token: The flag token.
type_start_token: The first token specifying the flag type,
including braces.
type_end_token: The last token specifying the flag type,
including braces.
type: The type spec.
name_token: The token specifying the flag name.
name: The flag name
description_start_token: The first token in the description.
description_end_token: The end token in the description.
description: The description.
"""
# Please keep these lists alphabetized.
# The list of standard jsdoc tags is from
STANDARD_DOC = frozenset([
'author',
'bug',
'const',
'constructor',
'define',
'deprecated',
'enum',
'export',
'extends',
'externs',
'fileoverview',
'implements',
'implicitCast',
'interface',
'license',
'noalias',
'nocompile',
'nosideeffects',
'override',
'owner',
'param',
'preserve',
'private',
'return',
'see',
'supported',
'template',
'this',
'type',
'typedef',
])
ANNOTATION = frozenset(['preserveTry', 'suppress'])
LEGAL_DOC = STANDARD_DOC | ANNOTATION
# Includes all Closure Compiler @suppress types.
# Not all of these annotations are interpreted by Closure Linter.
SUPPRESS_TYPES = frozenset([
'accessControls',
'checkRegExp',
'checkTypes',
'checkVars',
'deprecated',
'duplicate',
'fileoverviewTags',
'invalidCasts',
'missingProperties',
'nonStandardJsDocs',
'strictModuleDepCheck',
'undefinedVars',
'underscore',
'unknownDefines',
'uselessCode',
'visibility',
'with'])
HAS_DESCRIPTION = frozenset([
'define', 'deprecated', 'desc', 'fileoverview', 'license', 'param',
'preserve', 'return', 'supported'])
HAS_TYPE = frozenset([
'define', 'enum', 'extends', 'implements', 'param', 'return', 'type',
'suppress'])
TYPE_ONLY = frozenset(['enum', 'extends', 'implements', 'suppress', 'type'])
HAS_NAME = frozenset(['param'])
EMPTY_COMMENT_LINE = re.compile(r'^\s*\*?\s*$')
EMPTY_STRING = re.compile(r'^\s*$')
def __init__(self, flag_token):
"""Creates the DocFlag object and attaches it to the given start token.
Args:
flag_token: The starting token of the flag.
"""
self.flag_token = flag_token
self.flag_type = flag_token.string.strip().lstrip('@')
# Extract type, if applicable.
self.type = None
self.type_start_token = None
self.type_end_token = None
if self.flag_type in self.HAS_TYPE:
brace = tokenutil.SearchUntil(flag_token, [Type.DOC_START_BRACE],
Type.FLAG_ENDING_TYPES)
if brace:
end_token, contents = _GetMatchingEndBraceAndContents(brace)
self.type = contents
self.type_start_token = brace
self.type_end_token = end_token
elif (self.flag_type in self.TYPE_ONLY and
flag_token.next.type not in Type.FLAG_ENDING_TYPES):
self.type_start_token = flag_token.next
self.type_end_token, self.type = _GetEndTokenAndContents(
self.type_start_token)
if self.type is not None:
self.type = self.type.strip()
# Extract name, if applicable.
self.name_token = None
self.name = None
if self.flag_type in self.HAS_NAME:
# Handle bad case, name could be immediately after flag token.
self.name_token = _GetNextIdentifierToken(flag_token)
# Handle good case, if found token is after type start, look for
# identifier after type end, since types contain identifiers.
if (self.type and self.name_token and
tokenutil.Compare(self.name_token, self.type_start_token) > 0):
self.name_token = _GetNextIdentifierToken(self.type_end_token)
if self.name_token:
self.name = self.name_token.string
# Extract description, if applicable.
self.description_start_token = None
self.description_end_token = None
self.description = None
if self.flag_type in self.HAS_DESCRIPTION:
search_start_token = flag_token
if self.name_token and self.type_end_token:
if tokenutil.Compare(self.type_end_token, self.name_token) > 0:
search_start_token = self.type_end_token
else:
search_start_token = self.name_token
elif self.name_token:
search_start_token = self.name_token
elif self.type:
search_start_token = self.type_end_token
interesting_token = tokenutil.Search(search_start_token,
Type.FLAG_DESCRIPTION_TYPES | Type.FLAG_ENDING_TYPES)
if interesting_token.type in Type.FLAG_DESCRIPTION_TYPES:
self.description_start_token = interesting_token
self.description_end_token, self.description = (
_GetEndTokenAndContents(interesting_token))
class DocComment(object):
"""JavaScript doc comment object.
Attributes:
ordered_params: Ordered list of parameters documented.
start_token: The token that starts the doc comment.
end_token: The token that ends the doc comment.
suppressions: Map of suppression type to the token that added it.
"""
def __init__(self, start_token):
"""Create the doc comment object.
Args:
start_token: The first token in the doc comment.
"""
self.__params = {}
self.ordered_params = []
self.__flags = {}
self.start_token = start_token
self.end_token = None
self.suppressions = {}
self.invalidated = False
def Invalidate(self):
"""Indicate that the JSDoc is well-formed but we had problems parsing it.
This is a short-circuiting mechanism so that we don't emit false
positives about well-formed doc comments just because we don't support
hot new syntaxes.
"""
self.invalidated = True
def IsInvalidated(self):
"""Test whether Invalidate() has been called."""
return self.invalidated
def AddParam(self, name, param_type):
"""Add a new documented parameter.
Args:
name: The name of the parameter to document.
param_type: The parameter's declared JavaScript type.
"""
self.ordered_params.append(name)
self.__params[name] = param_type
def AddSuppression(self, token):
"""Add a new error suppression flag.
Args:
token: The suppression flag token.
"""
#TODO(user): Error if no braces
brace = tokenutil.SearchUntil(token, [Type.DOC_START_BRACE],
[Type.DOC_FLAG])
if brace:
end_token, contents = _GetMatchingEndBraceAndContents(brace)
self.suppressions[contents] = token
def AddFlag(self, flag):
"""Add a new document flag.
Args:
flag: DocFlag object.
"""
self.__flags[flag.flag_type] = flag
def InheritsDocumentation(self):
"""Test if the jsdoc implies documentation inheritance.
Returns:
True if documentation may be pulled off the superclass.
"""
return (self.HasFlag('inheritDoc') or
(self.HasFlag('override') and
not self.HasFlag('return') and
not self.HasFlag('param')))
def HasFlag(self, flag_type):
"""Test if the given flag has been set.
Args:
flag_type: The type of the flag to check.
Returns:
True if the flag is set.
"""
return flag_type in self.__flags
def GetFlag(self, flag_type):
"""Gets the last flag of the given type.
Args:
flag_type: The type of the flag to get.
Returns:
The last instance of the given flag type in this doc comment.
"""
return self.__flags[flag_type]
def CompareParameters(self, params):
"""Computes the edit distance and list from the function params to the docs.
Uses the Levenshtein edit distance algorithm, with code modified from
http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance#Python
Args:
params: The parameter list for the function declaration.
Returns:
The edit distance, the edit list.
"""
source_len, target_len = len(self.ordered_params), len(params)
edit_lists = [[]]
distance = [[]]
for i in range(target_len+1):
edit_lists[0].append(['I'] * i)
distance[0].append(i)
for j in range(1, source_len+1):
edit_lists.append([['D'] * j])
distance.append([j])
for i in range(source_len):
for j in range(target_len):
cost = 1
if self.ordered_params[i] == params[j]:
cost = 0
deletion = distance[i][j+1] + 1
insertion = distance[i+1][j] + 1
substitution = distance[i][j] + cost
edit_list = None
best = None
if deletion <= insertion and deletion <= substitution:
# Deletion is best.
best = deletion
edit_list = list(edit_lists[i][j+1])
edit_list.append('D')
elif insertion <= substitution:
# Insertion is best.
best = insertion
edit_list = list(edit_lists[i+1][j])
edit_list.append('I')
edit_lists[i+1].append(edit_list)
else:
# Substitution is best.
best = substitution
edit_list = list(edit_lists[i][j])
if cost:
edit_list.append('S')
else:
edit_list.append('=')
edit_lists[i+1].append(edit_list)
distance[i+1].append(best)
return distance[source_len][target_len], edit_lists[source_len][target_len]
def __repr__(self):
"""Returns a string representation of this object.
Returns:
A string representation of this object.
"""
return '<DocComment: %s, %s>' % (str(self.__params), str(self.__flags))
#
# Helper methods used by DocFlag and DocComment to parse out flag information.
#
def _GetMatchingEndBraceAndContents(start_brace):
"""Returns the matching end brace and contents between the two braces.
If any FLAG_ENDING_TYPE token is encountered before a matching end brace, then
that token is used as the matching ending token. Contents will have all
comment prefixes stripped out of them, and all comment prefixes in between the
start and end tokens will be split out into separate DOC_PREFIX tokens.
Args:
start_brace: The DOC_START_BRACE token immediately before desired contents.
Returns:
The matching ending token (DOC_END_BRACE or FLAG_ENDING_TYPE) and a string
of the contents between the matching tokens, minus any comment prefixes.
"""
open_count = 1
close_count = 0
contents = []
# We don't consider the start brace part of the type string.
token = start_brace.next
while open_count != close_count:
if token.type == Type.DOC_START_BRACE:
open_count += 1
elif token.type == Type.DOC_END_BRACE:
close_count += 1
if token.type != Type.DOC_PREFIX:
contents.append(token.string)
if token.type in Type.FLAG_ENDING_TYPES:
break
token = token.next
#Don't include the end token (end brace, end doc comment, etc.) in type.
token = token.previous
contents = contents[:-1]
return token, ''.join(contents)
def _GetNextIdentifierToken(start_token):
"""Searches for and returns the first identifier at the beginning of a token.
Searches each token after the start to see if it starts with an identifier.
If found, will split the token into at most 3 piecies: leading whitespace,
identifier, rest of token, returning the identifier token. If no identifier is
found returns None and changes no tokens. Search is abandoned when a
FLAG_ENDING_TYPE token is found.
Args:
start_token: The token to start searching after.
Returns:
The identifier token is found, None otherwise.
"""
token = start_token.next
while token and not token.type in Type.FLAG_ENDING_TYPES:
match = javascripttokenizer.JavaScriptTokenizer.IDENTIFIER.match(
token.string)
if (match is not None and token.type == Type.COMMENT and
len(token.string) == len(match.group(0))):
return token
token = token.next
return None
def _GetEndTokenAndContents(start_token):
"""Returns last content token and all contents before FLAG_ENDING_TYPE token.
Comment prefixes are split into DOC_PREFIX tokens and stripped from the
returned contents.
Args:
start_token: The token immediately before the first content token.
Returns:
The last content token and a string of all contents including start and
end tokens, with comment prefixes stripped.
"""
iterator = start_token
last_line = iterator.line_number
last_token = None
contents = ''
while not iterator.type in Type.FLAG_ENDING_TYPES:
if (iterator.IsFirstInLine() and
DocFlag.EMPTY_COMMENT_LINE.match(iterator.line)):
# If we have a blank comment line, consider that an implicit
# ending of the description. This handles a case like:
#
# * @return {boolean} True
# *
# * Note: This is a sentence.
#
# The note is not part of the @return description, but there was
# no definitive ending token. Rather there was a line containing
# only a doc comment prefix or whitespace.
break
if iterator.type in Type.FLAG_DESCRIPTION_TYPES:
contents += iterator.string
last_token = iterator
iterator = iterator.next
if iterator.line_number != last_line:
contents += '\n'
last_line = iterator.line_number
end_token = last_token
if DocFlag.EMPTY_STRING.match(contents):
contents = None
else:
# Strip trailing newline.
contents = contents[:-1]
return end_token, contents
class Function(object):
"""Data about a JavaScript function.
Attributes:
block_depth: Block depth the function began at.
doc: The DocComment associated with the function.
has_return: If the function has a return value.
has_this: If the function references the 'this' object.
is_assigned: If the function is part of an assignment.
is_constructor: If the function is a constructor.
name: The name of the function, whether given in the function keyword or
as the lvalue the function is assigned to.
"""
def __init__(self, block_depth, is_assigned, doc, name):
self.block_depth = block_depth
self.is_assigned = is_assigned
self.is_constructor = doc and doc.HasFlag('constructor')
self.is_interface = doc and doc.HasFlag('interface')
self.has_return = False
self.has_this = False
self.name = name
self.doc = doc
class StateTracker(object):
"""EcmaScript state tracker.
Tracks block depth, function names, etc. within an EcmaScript token stream.
"""
OBJECT_LITERAL = 'o'
CODE = 'c'
def __init__(self, doc_flag=DocFlag):
"""Initializes a JavaScript token stream state tracker.
Args:
doc_flag: An optional custom DocFlag used for validating
documentation flags.
"""
self._doc_flag = doc_flag
self.Reset()
def Reset(self):
"""Resets the state tracker to prepare for processing a new page."""
self._block_depth = 0
self._is_block_close = False
self._paren_depth = 0
self._functions = []
self._functions_by_name = {}
self._last_comment = None
self._doc_comment = None
self._cumulative_params = None
self._block_types = []
self._last_non_space_token = None
self._last_line = None
self._first_token = None
self._documented_identifiers = set()
def InFunction(self):
"""Returns true if the current token is within a function.
Returns:
True if the current token is within a function.
"""
return bool(self._functions)
def InConstructor(self):
"""Returns true if the current token is within a constructor.
Returns:
True if the current token is within a constructor.
"""
return self.InFunction() and self._functions[-1].is_constructor
def InInterfaceMethod(self):
"""Returns true if the current token is within an interface method.
Returns:
True if the current token is within an interface method.
"""
if self.InFunction():
if self._functions[-1].is_interface:
return True
else:
name = self._functions[-1].name
prototype_index = name.find('.prototype.')
if prototype_index != -1:
class_function_name = name[0:prototype_index]
if (class_function_name in self._functions_by_name and
self._functions_by_name[class_function_name].is_interface):
return True
return False
def InTopLevelFunction(self):
"""Returns true if the current token is within a top level function.
Returns:
True if the current token is within a top level function.
"""
return len(self._functions) == 1 and self.InTopLevel()
def InAssignedFunction(self):
"""Returns true if the current token is within a function variable.
Returns:
True if if the current token is within a function variable
"""
return self.InFunction() and self._functions[-1].is_assigned
def IsFunctionOpen(self):
"""Returns true if the current token is a function block open.
Returns:
True if the current token is a function block open.
"""
return (self._functions and
self._functions[-1].block_depth == self._block_depth - 1)
def IsFunctionClose(self):
"""Returns true if the current token is a function block close.
Returns:
True if the current token is a function block close.
"""
return (self._functions and
self._functions[-1].block_depth == self._block_depth)
def InBlock(self):
"""Returns true if the current token is within a block.
Returns:
True if the current token is within a block.
"""
return bool(self._block_depth)
def IsBlockClose(self):
"""Returns true if the current token is a block close.
Returns:
True if the current token is a block close.
"""
return self._is_block_close
def InObjectLiteral(self):
"""Returns true if the current token is within an object literal.
Returns:
True if the current token is within an object literal.
"""
return self._block_depth and self._block_types[-1] == self.OBJECT_LITERAL
def InObjectLiteralDescendant(self):
"""Returns true if the current token has an object literal ancestor.
Returns:
True if the current token has an object literal ancestor.
"""
return self.OBJECT_LITERAL in self._block_types
def InParentheses(self):
"""Returns true if the current token is within parentheses.
Returns:
True if the current token is within parentheses.
"""
return bool(self._paren_depth)
def InTopLevel(self):
"""Whether we are at the top level in the class.
This function call is language specific. In some languages like
JavaScript, a function is top level if it is not inside any parenthesis.
In languages such as ActionScript, a function is top level if it is directly
within a class.
"""
raise TypeError('Abstract method InTopLevel not implemented')
def GetBlockType(self, token):
"""Determine the block type given a START_BLOCK token.
Code blocks come after parameters, keywords like else, and closing parens.
Args:
token: The current token. Can be assumed to be type START_BLOCK.
Returns:
Code block type for current token.
"""
raise TypeError('Abstract method GetBlockType not implemented')
def GetParams(self):
"""Returns the accumulated input params as an array.
In some EcmasSript languages, input params are specified like
(param:Type, param2:Type2, ...)
in other they are specified just as
(param, param2)
We handle both formats for specifying parameters here and leave
it to the compilers for each language to detect compile errors.
This allows more code to be reused between lint checkers for various
EcmaScript languages.
Returns:
The accumulated input params as an array.
"""
params = []
if self._cumulative_params:
params = re.compile(r'\s+').sub('', self._cumulative_params).split(',')
# Strip out the type from parameters of the form name:Type.
params = map(lambda param: param.split(':')[0], params)
return params
def GetLastComment(self):
"""Return the last plain comment that could be used as documentation.
Returns:
The last plain comment that could be used as documentation.
"""
return self._last_comment
def GetDocComment(self):
"""Return the most recent applicable documentation comment.
Returns:
The last applicable documentation comment.
"""
return self._doc_comment
def HasDocComment(self, identifier):
"""Returns whether the identifier has been documented yet.
Args:
identifier: The identifier.
Returns:
Whether the identifier has been documented yet.
"""
return identifier in self._documented_identifiers
def InDocComment(self):
"""Returns whether the current token is in a doc comment.
Returns:
Whether the current token is in a doc comment.
"""
return self._doc_comment and self._doc_comment.end_token is None
def GetDocFlag(self):
"""Returns the current documentation flags.
Returns:
The current documentation flags.
"""
return self._doc_flag
def IsTypeToken(self, t):
if self.InDocComment() and t.type not in (Type.START_DOC_COMMENT,
Type.DOC_FLAG, Type.DOC_INLINE_FLAG, Type.DOC_PREFIX):
f = tokenutil.SearchUntil(t, [Type.DOC_FLAG], [Type.START_DOC_COMMENT],
None, True)
if f and f.attached_object.type_start_token is not None:
return (tokenutil.Compare(t, f.attached_object.type_start_token) > 0 and
tokenutil.Compare(t, f.attached_object.type_end_token) < 0)
return False
def GetFunction(self):
"""Return the function the current code block is a part of.
Returns:
The current Function object.
"""
if self._functions:
return self._functions[-1]
def GetBlockDepth(self):
"""Return the block depth.
Returns:
The current block depth.
"""
return self._block_depth
def GetLastNonSpaceToken(self):
"""Return the last non whitespace token."""
return self._last_non_space_token
def GetLastLine(self):
"""Return the last line."""
return self._last_line
def GetFirstToken(self):
"""Return the very first token in the file."""
return self._first_token
def HandleToken(self, token, last_non_space_token):
"""Handles the given token and updates state.
Args:
token: The token to handle.
last_non_space_token:
"""
self._is_block_close = False
if not self._first_token:
self._first_token = token
# Track block depth.
type = token.type
if type == Type.START_BLOCK:
self._block_depth += 1
# Subclasses need to handle block start very differently because
# whether a block is a CODE or OBJECT_LITERAL block varies significantly
# by language.
self._block_types.append(self.GetBlockType(token))
# Track block depth.
elif type == Type.END_BLOCK:
self._is_block_close = not self.InObjectLiteral()
self._block_depth -= 1
self._block_types.pop()
# Track parentheses depth.
elif type == Type.START_PAREN:
self._paren_depth += 1
# Track parentheses depth.
elif type == Type.END_PAREN:
self._paren_depth -= 1
elif type == Type.COMMENT:
self._last_comment = token.string
elif type == Type.START_DOC_COMMENT:
self._last_comment = None
self._doc_comment = DocComment(token)
elif type == Type.END_DOC_COMMENT:
self._doc_comment.end_token = token
elif type in (Type.DOC_FLAG, Type.DOC_INLINE_FLAG):
flag = self._doc_flag(token)
token.attached_object = flag
self._doc_comment.AddFlag(flag)
if flag.flag_type == 'param' and flag.name:
self._doc_comment.AddParam(flag.name, flag.type)
elif flag.flag_type == 'suppress':
self._doc_comment.AddSuppression(token)
elif type == Type.FUNCTION_DECLARATION:
last_code = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES, None,
True)
doc = None
# Only functions outside of parens are eligible for documentation.
if not self._paren_depth:
doc = self._doc_comment
name = ''
is_assigned = last_code and (last_code.IsOperator('=') or
last_code.IsOperator('||') or last_code.IsOperator('&&') or
(last_code.IsOperator(':') and not self.InObjectLiteral()))
if is_assigned:
# TODO(robbyw): This breaks for x[2] = ...
# Must use loop to find full function name in the case of line-wrapped
# declarations (bug 1220601) like:
# my.function.foo.
# bar = function() ...
identifier = tokenutil.Search(last_code, Type.SIMPLE_LVALUE, None, True)
while identifier and identifier.type in (
Type.IDENTIFIER, Type.SIMPLE_LVALUE):
name = identifier.string + name
# Traverse behind us, skipping whitespace and comments.
while True:
identifier = identifier.previous
if not identifier or not identifier.type in Type.NON_CODE_TYPES:
break
else:
next_token = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES)
while next_token and next_token.IsType(Type.FUNCTION_NAME):
name += next_token.string
next_token = tokenutil.Search(next_token, Type.FUNCTION_NAME, 2)
function = Function(self._block_depth, is_assigned, doc, name)
self._functions.append(function)
self._functions_by_name[name] = function
elif type == Type.START_PARAMETERS:
self._cumulative_params = ''
elif type == Type.PARAMETERS:
self._cumulative_params += token.string
elif type == Type.KEYWORD and token.string == 'return':
next_token = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES)
if not next_token.IsType(Type.SEMICOLON):
function = self.GetFunction()
if function:
function.has_return = True
elif type == Type.SIMPLE_LVALUE:
identifier = token.values['identifier']
jsdoc = self.GetDocComment()
if jsdoc:
self._documented_identifiers.add(identifier)
self._HandleIdentifier(identifier, True)
elif type == Type.IDENTIFIER:
self._HandleIdentifier(token.string, False)
# Detect documented non-assignments.
next_token = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES)
if next_token.IsType(Type.SEMICOLON):
if (self._last_non_space_token and
self._last_non_space_token.IsType(Type.END_DOC_COMMENT)):
self._documented_identifiers.add(token.string)
def _HandleIdentifier(self, identifier, is_assignment):
"""Process the given identifier.
Currently checks if it references 'this' and annotates the function
accordingly.
Args:
identifier: The identifer to process.
is_assignment: Whether the identifer is being written to.
"""
if identifier == 'this' or identifier.startswith('this.'):
function = self.GetFunction()
if function:
function.has_this = True
def HandleAfterToken(self, token):
"""Handle updating state after a token has been checked.
This function should be used for destructive state changes such as
deleting a tracked object.
Args:
token: The token to handle.
"""
type = token.type
if type == Type.SEMICOLON or type == Type.END_PAREN or (
type == Type.END_BRACKET and
self._last_non_space_token.type not in (
Type.SINGLE_QUOTE_STRING_END, Type.DOUBLE_QUOTE_STRING_END)):
# We end on any numeric array index, but keep going for string based
# array indices so that we pick up manually exported identifiers.
self._doc_comment = None
self._last_comment = None
elif type == Type.END_BLOCK:
self._doc_comment = None
self._last_comment = None
if self.InFunction() and self.IsFunctionClose():
# TODO(robbyw): Detect the function's name for better errors.
self._functions.pop()
elif type == Type.END_PARAMETERS and self._doc_comment:
self._doc_comment = None
self._last_comment = None
if not token.IsAnyType(Type.WHITESPACE, Type.BLANK_LINE):
self._last_non_space_token = token
self._last_line = token.line

285
tools/closure_linter/closure_linter/tokenutil.py

@ -0,0 +1,285 @@
#!/usr/bin/env python
#
# Copyright 2007 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Token utility functions."""
__author__ = ('robbyw@google.com (Robert Walker)',
'ajp@google.com (Andy Perelson)')
from closure_linter.common import tokens
from closure_linter import javascripttokens
import copy
# Shorthand
JavaScriptToken = javascripttokens.JavaScriptToken
Type = tokens.TokenType
def GetFirstTokenInSameLine(token):
"""Returns the first token in the same line as token.
Args:
token: Any token in the line.
Returns:
The first token in the same line as token.
"""
while not token.IsFirstInLine():
token = token.previous
return token
def CustomSearch(start_token, func, end_func=None, distance=None,
reverse=False):
"""Returns the first token where func is True within distance of this token.
Args:
start_token: The token to start searching from
func: The function to call to test a token for applicability
end_func: The function to call to test a token to determine whether to abort
the search.
distance: The number of tokens to look through before failing search. Must
be positive. If unspecified, will search until the end of the token
chain
reverse: When true, search the tokens before this one instead of the tokens
after it
Returns:
The first token matching func within distance of this token, or None if no
such token is found.
"""
token = start_token
if reverse:
while token and (distance is None or distance > 0):
previous = token.previous
if previous:
if func(previous):
return previous
if end_func and end_func(previous):
return None
token = previous
if distance is not None:
distance -= 1
else:
while token and (distance is None or distance > 0):
next = token.next
if next:
if func(next):
return next
if end_func and end_func(next):
return None
token = next
if distance is not None:
distance -= 1
return None
def Search(start_token, token_types, distance=None, reverse=False):
"""Returns the first token of type in token_types within distance.
Args:
start_token: The token to start searching from
token_types: The allowable types of the token being searched for
distance: The number of tokens to look through before failing search. Must
be positive. If unspecified, will search until the end of the token
chain
reverse: When true, search the tokens before this one instead of the tokens
after it
Returns:
The first token of any type in token_types within distance of this token, or
None if no such token is found.
"""
return CustomSearch(start_token, lambda token: token.IsAnyType(token_types),
None, distance, reverse)
def SearchExcept(start_token, token_types, distance=None, reverse=False):
"""Returns the first token not of any type in token_types within distance.
Args:
start_token: The token to start searching from
token_types: The unallowable types of the token being searched for
distance: The number of tokens to look through before failing search. Must
be positive. If unspecified, will search until the end of the token
chain
reverse: When true, search the tokens before this one instead of the tokens
after it
Returns:
The first token of any type in token_types within distance of this token, or
None if no such token is found.
"""
return CustomSearch(start_token,
lambda token: not token.IsAnyType(token_types),
None, distance, reverse)
def SearchUntil(start_token, token_types, end_types, distance=None,
reverse=False):
"""Returns the first token of type in token_types before a token of end_type.
Args:
start_token: The token to start searching from.
token_types: The allowable types of the token being searched for.
end_types: Types of tokens to abort search if we find.
distance: The number of tokens to look through before failing search. Must
be positive. If unspecified, will search until the end of the token
chain
reverse: When true, search the tokens before this one instead of the tokens
after it
Returns:
The first token of any type in token_types within distance of this token
before any tokens of type in end_type, or None if no such token is found.
"""
return CustomSearch(start_token, lambda token: token.IsAnyType(token_types),
lambda token: token.IsAnyType(end_types),
distance, reverse)
def DeleteToken(token):
"""Deletes the given token from the linked list.
Args:
token: The token to delete
"""
if token.previous:
token.previous.next = token.next
if token.next:
token.next.previous = token.previous
following_token = token.next
while following_token and following_token.metadata.last_code == token:
following_token.metadata.last_code = token.metadata.last_code
following_token = following_token.next
def DeleteTokens(token, tokenCount):
"""Deletes the given number of tokens starting with the given token.
Args:
token: The token to start deleting at.
tokenCount: The total number of tokens to delete.
"""
for i in xrange(1, tokenCount):
DeleteToken(token.next)
DeleteToken(token)
def InsertTokenAfter(new_token, token):
"""Insert new_token after token
Args:
new_token: A token to be added to the stream
token: A token already in the stream
"""
new_token.previous = token
new_token.next = token.next
new_token.metadata = copy.copy(token.metadata)
if token.IsCode():
new_token.metadata.last_code = token
if new_token.IsCode():
following_token = token.next
while following_token and following_token.metadata.last_code == token:
following_token.metadata.last_code = new_token
following_token = following_token.next
token.next = new_token
if new_token.next:
new_token.next.previous = new_token
if new_token.start_index is None:
if new_token.line_number == token.line_number:
new_token.start_index = token.start_index + len(token.string)
else:
new_token.start_index = 0
iterator = new_token.next
while iterator and iterator.line_number == new_token.line_number:
iterator.start_index += len(new_token.string)
iterator = iterator.next
def InsertSpaceTokenAfter(token):
"""Inserts a space token after the given token.
Args:
token: The token to insert a space token after
Returns:
A single space token"""
space_token = JavaScriptToken(' ', Type.WHITESPACE, token.line,
token.line_number)
InsertTokenAfter(space_token, token)
def InsertLineAfter(token):
"""Inserts a blank line after the given token.
Args:
token: The token to insert a blank line after
Returns:
A single space token"""
blank_token = JavaScriptToken('', Type.BLANK_LINE, '',
token.line_number + 1)
InsertTokenAfter(blank_token, token)
# Update all subsequent ine numbers.
blank_token = blank_token.next
while blank_token:
blank_token.line_number += 1
blank_token = blank_token.next
def SplitToken(token, position):
"""Splits the token into two tokens at position.
Args:
token: The token to split
position: The position to split at. Will be the beginning of second token.
Returns:
The new second token.
"""
new_string = token.string[position:]
token.string = token.string[:position]
new_token = JavaScriptToken(new_string, token.type, token.line,
token.line_number)
InsertTokenAfter(new_token, token)
return new_token
def Compare(token1, token2):
"""Compares two tokens and determines their relative order.
Returns:
A negative integer, zero, or a positive integer as the first token is
before, equal, or after the second in the token stream.
"""
if token2.line_number != token1.line_number:
return token1.line_number - token2.line_number
else:
return token1.start_index - token2.start_index

2489
tools/closure_linter/gflags.py

File diff suppressed because it is too large

5
tools/closure_linter/setup.cfg

@ -0,0 +1,5 @@
[egg_info]
tag_build =
tag_date = 0
tag_svn_revision = 0

38
tools/closure_linter/setup.py

@ -0,0 +1,38 @@
#!/usr/bin/env python
#
# Copyright 2010 The Closure Linter Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
try:
from setuptools import setup
except ImportError:
from distutils.core import setup
setup(name='closure_linter',
version='2.2.6',
description='Closure Linter',
license='Apache',
author='The Closure Linter Authors',
author_email='opensource@google.com',
url='http://code.google.com/p/closure-linter',
install_requires=['python-gflags'],
package_dir={'closure_linter': 'closure_linter'},
packages=['closure_linter', 'closure_linter.common'],
entry_points = {
'console_scripts': [
'gjslint = closure_linter.gjslint:main',
'fixjsstyle = closure_linter.fixjsstyle:main'
]
}
)
Loading…
Cancel
Save