• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2# -*- coding: utf-8 -*-
3#
4# Copyright (C) 2009 Google Inc. All rights reserved.
5# Copyright (C) 2009 Torch Mobile Inc.
6#
7# Redistribution and use in source and binary forms, with or without
8# modification, are permitted provided that the following conditions are
9# met:
10#
11#    * Redistributions of source code must retain the above copyright
12# notice, this list of conditions and the following disclaimer.
13#    * Redistributions in binary form must reproduce the above
14# copyright notice, this list of conditions and the following disclaimer
15# in the documentation and/or other materials provided with the
16# distribution.
17#    * Neither the name of Google Inc. nor the names of its
18# contributors may be used to endorse or promote products derived from
19# this software without specific prior written permission.
20#
21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
33# This is the modified version of Google's cpplint. The original code is
34# http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py
35
36"""Does WebKit-lint on c++ files.
37
38The goal of this script is to identify places in the code that *may*
39be in non-compliance with WebKit style.  It does not attempt to fix
40up these problems -- the point is to educate.  It does also not
41attempt to find all problems, or to ensure that everything it does
42find is legitimately a problem.
43
44In particular, we can get very confused by /* and // inside strings!
45We do a small hack, which is to ignore //'s with "'s after them on the
46same line, but it is far from perfect (in either direction).
47"""
48
49import codecs
50import getopt
51import math  # for log
52import os
53import os.path
54import re
55import sre_compile
56import string
57import sys
58import unicodedata
59
60
61_USAGE = """
62Syntax: %(program_name)s [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
63        <file> [file] ...
64
65  The style guidelines this tries to follow are those in
66    http://webkit.org/coding/coding-style.html
67
68  Every problem is given a confidence score from 1-5, with 5 meaning we are
69  certain of the problem, and 1 meaning it could be a legitimate construct.
70  This will miss some errors, and is not a substitute for a code review.
71
72  To prevent specific lines from being linted, add a '// NOLINT' comment to the
73  end of the line.
74
75  The files passed in will be linted; at least one file must be provided.
76  Linted extensions are .cpp, .c and .h.  Other file types will be ignored.
77
78  Flags:
79
80    output=vs7
81      By default, the output is formatted to ease emacs parsing.  Visual Studio
82      compatible output (vs7) may also be used.  Other formats are unsupported.
83
84    verbose=#
85      Specify a number 0-5 to restrict errors to certain verbosity levels.
86
87    filter=-x,+y,...
88      Specify a comma-separated list of category-filters to apply: only
89      error messages whose category names pass the filters will be printed.
90      (Category names are printed with the message and look like
91      "[whitespace/indent]".)  Filters are evaluated left to right.
92      "-FOO" and "FOO" means "do not print categories that start with FOO".
93      "+FOO" means "do print categories that start with FOO".
94
95      Examples: --filter=-whitespace,+whitespace/braces
96                --filter=whitespace,runtime/printf,+runtime/printf_format
97                --filter=-,+build/include_what_you_use
98
99      To see a list of all the categories used in %(program_name)s, pass no arg:
100         --filter=
101""" % {'program_name': sys.argv[0]}
102
103# We categorize each error message we print.  Here are the categories.
104# We want an explicit list so we can list them all in cpp_style --filter=.
105# If you add a new error message with a new category, add it to the list
106# here!  cpp_style_unittest.py should tell you if you forget to do this.
107# \ used for clearer layout -- pylint: disable-msg=C6013
108_ERROR_CATEGORIES = '''\
109    build/class
110    build/deprecated
111    build/endif_comment
112    build/forward_decl
113    build/header_guard
114    build/include
115    build/include_order
116    build/include_what_you_use
117    build/namespaces
118    build/printf_format
119    build/storage_class
120    legal/copyright
121    readability/braces
122    readability/casting
123    readability/check
124    readability/comparison_to_zero
125    readability/constructors
126    readability/control_flow
127    readability/fn_size
128    readability/function
129    readability/multiline_comment
130    readability/multiline_string
131    readability/null
132    readability/streams
133    readability/todo
134    readability/utf8
135    runtime/arrays
136    runtime/casting
137    runtime/explicit
138    runtime/int
139    runtime/init
140    runtime/invalid_increment
141    runtime/memset
142    runtime/printf
143    runtime/printf_format
144    runtime/references
145    runtime/rtti
146    runtime/sizeof
147    runtime/string
148    runtime/threadsafe_fn
149    runtime/virtual
150    whitespace/blank_line
151    whitespace/braces
152    whitespace/comma
153    whitespace/comments
154    whitespace/declaration
155    whitespace/end_of_line
156    whitespace/ending_newline
157    whitespace/indent
158    whitespace/labels
159    whitespace/line_length
160    whitespace/newline
161    whitespace/operators
162    whitespace/parens
163    whitespace/semicolon
164    whitespace/tab
165    whitespace/todo
166'''
167
168# The default state of the category filter. This is overrided by the --filter=
169# flag. By default all errors are on, so only add here categories that should be
170# off by default (i.e., categories that must be enabled by the --filter= flags).
171# All entries here should start with a '-' or '+', as in the --filter= flag.
172_DEFAULT_FILTERS = []
173
174# Headers that we consider STL headers.
175_STL_HEADERS = frozenset([
176    'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
177    'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
178    'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'pair.h',
179    'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
180    'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
181    'utility', 'vector', 'vector.h',
182    ])
183
184
185# Non-STL C++ system headers.
186_CPP_HEADERS = frozenset([
187    'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
188    'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
189    'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
190    'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
191    'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
192    'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
193    'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream.h',
194    'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
195    'numeric', 'ostream.h', 'parsestream.h', 'pfstream.h', 'PlotFile.h',
196    'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h', 'ropeimpl.h',
197    'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
198    'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string',
199    'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray',
200    ])
201
202
203# Assertion macros.  These are defined in base/logging.h and
204# testing/base/gunit.h.  Note that the _M versions need to come first
205# for substring matching to work.
206_CHECK_MACROS = [
207    'DCHECK', 'CHECK',
208    'EXPECT_TRUE_M', 'EXPECT_TRUE',
209    'ASSERT_TRUE_M', 'ASSERT_TRUE',
210    'EXPECT_FALSE_M', 'EXPECT_FALSE',
211    'ASSERT_FALSE_M', 'ASSERT_FALSE',
212    ]
213
214# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
215_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
216
217for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
218                        ('>=', 'GE'), ('>', 'GT'),
219                        ('<=', 'LE'), ('<', 'LT')]:
220    _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
221    _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
222    _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
223    _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
224    _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
225    _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
226
227for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
228                            ('>=', 'LT'), ('>', 'LE'),
229                            ('<=', 'GT'), ('<', 'GE')]:
230    _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
231    _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
232    _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
233    _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
234
235
236# These constants define types of headers for use with
237# _IncludeState.check_next_include_order().
238_CONFIG_HEADER = 0
239_PRIMARY_HEADER = 1
240_OTHER_HEADER = 2
241
242
243_regexp_compile_cache = {}
244
245
246def match(pattern, s):
247    """Matches the string with the pattern, caching the compiled regexp."""
248    # The regexp compilation caching is inlined in both match and search for
249    # performance reasons; factoring it out into a separate function turns out
250    # to be noticeably expensive.
251    if not pattern in _regexp_compile_cache:
252        _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
253    return _regexp_compile_cache[pattern].match(s)
254
255
256def search(pattern, s):
257    """Searches the string for the pattern, caching the compiled regexp."""
258    if not pattern in _regexp_compile_cache:
259        _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
260    return _regexp_compile_cache[pattern].search(s)
261
262
263class _IncludeState(dict):
264    """Tracks line numbers for includes, and the order in which includes appear.
265
266    As a dict, an _IncludeState object serves as a mapping between include
267    filename and line number on which that file was included.
268
269    Call check_next_include_order() once for each header in the file, passing
270    in the type constants defined above. Calls in an illegal order will
271    raise an _IncludeError with an appropriate error message.
272
273    """
274    # self._section will move monotonically through this set. If it ever
275    # needs to move backwards, check_next_include_order will raise an error.
276    _INITIAL_SECTION = 0
277    _CONFIG_SECTION = 1
278    _PRIMARY_SECTION = 2
279    _OTHER_SECTION = 3
280
281    _TYPE_NAMES = {
282        _CONFIG_HEADER: 'WebCore config.h',
283        _PRIMARY_HEADER: 'header this file implements',
284        _OTHER_HEADER: 'other header',
285        }
286    _SECTION_NAMES = {
287        _INITIAL_SECTION: "... nothing.",
288        _CONFIG_SECTION: "WebCore config.h.",
289        _PRIMARY_SECTION: 'a header this file implements.',
290        _OTHER_SECTION: 'other header.',
291        }
292
293    def __init__(self):
294        dict.__init__(self)
295        self._section = self._INITIAL_SECTION
296        self._visited_primary_section = False
297        self.header_types = dict();
298
299    def visited_primary_section(self):
300        return self._visited_primary_section
301
302    def check_next_include_order(self, header_type, file_is_header):
303        """Returns a non-empty error message if the next header is out of order.
304
305        This function also updates the internal state to be ready to check
306        the next include.
307
308        Args:
309          header_type: One of the _XXX_HEADER constants defined above.
310          file_is_header: Whether the file that owns this _IncludeState is itself a header
311
312        Returns:
313          The empty string if the header is in the right order, or an
314          error message describing what's wrong.
315
316        """
317        if header_type == _CONFIG_HEADER and file_is_header:
318            return 'Header file should not contain WebCore config.h.'
319        if header_type == _PRIMARY_HEADER and file_is_header:
320            return 'Header file should not contain itself.'
321
322        error_message = ''
323        if self._section != self._OTHER_SECTION:
324            before_error_message = ('Found %s before %s' %
325                                    (self._TYPE_NAMES[header_type],
326                                     self._SECTION_NAMES[self._section + 1]))
327        after_error_message = ('Found %s after %s' %
328                                (self._TYPE_NAMES[header_type],
329                                 self._SECTION_NAMES[self._section]))
330
331        if header_type == _CONFIG_HEADER:
332            if self._section >= self._CONFIG_SECTION:
333                error_message = after_error_message
334            self._section = self._CONFIG_SECTION
335        elif header_type == _PRIMARY_HEADER:
336            if self._section >= self._PRIMARY_SECTION:
337                error_message = after_error_message
338            elif self._section < self._CONFIG_SECTION:
339                error_message = before_error_message
340            self._section = self._PRIMARY_SECTION
341            self._visited_primary_section = True
342        else:
343            assert header_type == _OTHER_HEADER
344            if not file_is_header and self._section < self._PRIMARY_SECTION:
345                error_message = before_error_message
346            self._section = self._OTHER_SECTION
347
348        return error_message
349
350
351class _CppStyleState(object):
352    """Maintains module-wide state.."""
353
354    def __init__(self):
355        self.verbose_level = 1  # global setting.
356        self.error_count = 0    # global count of reported errors
357        # filters to apply when emitting error messages
358        self.filters = _DEFAULT_FILTERS[:]
359
360        # output format:
361        # "emacs" - format that emacs can parse (default)
362        # "vs7" - format that Microsoft Visual Studio 7 can parse
363        self.output_format = 'emacs'
364
365    def set_output_format(self, output_format):
366        """Sets the output format for errors."""
367        self.output_format = output_format
368
369    def set_verbose_level(self, level):
370        """Sets the module's verbosity, and returns the previous setting."""
371        last_verbose_level = self.verbose_level
372        self.verbose_level = level
373        return last_verbose_level
374
375    def set_filters(self, filters):
376        """Sets the error-message filters.
377
378        These filters are applied when deciding whether to emit a given
379        error message.
380
381        Args:
382          filters: A string of comma-separated filters (eg "+whitespace/indent").
383                   Each filter should start with + or -; else we die.
384
385        Raises:
386          ValueError: The comma-separated filters did not all start with '+' or '-'.
387                      E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
388        """
389        # Default filters always have less priority than the flag ones.
390        self.filters = _DEFAULT_FILTERS[:]
391        for filter in filters.split(','):
392            clean_filter = filter.strip()
393            if clean_filter:
394                self.filters.append(clean_filter)
395        for filter in self.filters:
396            if not (filter.startswith('+') or filter.startswith('-')):
397                raise ValueError('Every filter in --filter must start with '
398                                 '+ or - (%s does not)' % filter)
399
400    def reset_error_count(self):
401        """Sets the module's error statistic back to zero."""
402        self.error_count = 0
403
404    def increment_error_count(self):
405        """Bumps the module's error statistic."""
406        self.error_count += 1
407
408
409_cpp_style_state = _CppStyleState()
410
411
412def _output_format():
413    """Gets the module's output format."""
414    return _cpp_style_state.output_format
415
416
417def _set_output_format(output_format):
418    """Sets the module's output format."""
419    _cpp_style_state.set_output_format(output_format)
420
421
422def _verbose_level():
423    """Returns the module's verbosity setting."""
424    return _cpp_style_state.verbose_level
425
426
427def _set_verbose_level(level):
428    """Sets the module's verbosity, and returns the previous setting."""
429    return _cpp_style_state.set_verbose_level(level)
430
431
432def _filters():
433    """Returns the module's list of output filters, as a list."""
434    return _cpp_style_state.filters
435
436
437def _set_filters(filters):
438    """Sets the module's error-message filters.
439
440    These filters are applied when deciding whether to emit a given
441    error message.
442
443    Args:
444      filters: A string of comma-separated filters (eg "whitespace/indent").
445               Each filter should start with + or -; else we die.
446    """
447    _cpp_style_state.set_filters(filters)
448
449
450def error_count():
451    """Returns the global count of reported errors."""
452    return _cpp_style_state.error_count
453
454
455class _FunctionState(object):
456    """Tracks current function name and the number of lines in its body."""
457
458    _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
459    _TEST_TRIGGER = 400    # about 50% more than _NORMAL_TRIGGER.
460
461    def __init__(self):
462        self.in_a_function = False
463        self.lines_in_function = 0
464        self.current_function = ''
465
466    def begin(self, function_name):
467        """Start analyzing function body.
468
469        Args:
470            function_name: The name of the function being tracked.
471        """
472        self.in_a_function = True
473        self.lines_in_function = 0
474        self.current_function = function_name
475
476    def count(self):
477        """Count line in current function body."""
478        if self.in_a_function:
479            self.lines_in_function += 1
480
481    def check(self, error, filename, line_number):
482        """Report if too many lines in function body.
483
484        Args:
485          error: The function to call with any errors found.
486          filename: The name of the current file.
487          line_number: The number of the line to check.
488        """
489        if match(r'T(EST|est)', self.current_function):
490            base_trigger = self._TEST_TRIGGER
491        else:
492            base_trigger = self._NORMAL_TRIGGER
493        trigger = base_trigger * 2 ** _verbose_level()
494
495        if self.lines_in_function > trigger:
496            error_level = int(math.log(self.lines_in_function / base_trigger, 2))
497            # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
498            if error_level > 5:
499                error_level = 5
500            error(filename, line_number, 'readability/fn_size', error_level,
501                  'Small and focused functions are preferred:'
502                  ' %s has %d non-comment lines'
503                  ' (error triggered by exceeding %d lines).'  % (
504                      self.current_function, self.lines_in_function, trigger))
505
506    def end(self):
507        """Stop analizing function body."""
508        self.in_a_function = False
509
510
511class _IncludeError(Exception):
512    """Indicates a problem with the include order in a file."""
513    pass
514
515
516class FileInfo:
517    """Provides utility functions for filenames.
518
519    FileInfo provides easy access to the components of a file's path
520    relative to the project root.
521    """
522
523    def __init__(self, filename):
524        self._filename = filename
525
526    def full_name(self):
527        """Make Windows paths like Unix."""
528        return os.path.abspath(self._filename).replace('\\', '/')
529
530    def repository_name(self):
531        """Full name after removing the local path to the repository.
532
533        If we have a real absolute path name here we can try to do something smart:
534        detecting the root of the checkout and truncating /path/to/checkout from
535        the name so that we get header guards that don't include things like
536        "C:\Documents and Settings\..." or "/home/username/..." in them and thus
537        people on different computers who have checked the source out to different
538        locations won't see bogus errors.
539        """
540        fullname = self.full_name()
541
542        if os.path.exists(fullname):
543            project_dir = os.path.dirname(fullname)
544
545            if os.path.exists(os.path.join(project_dir, ".svn")):
546                # If there's a .svn file in the current directory, we
547                # recursively look up the directory tree for the top
548                # of the SVN checkout
549                root_dir = project_dir
550                one_up_dir = os.path.dirname(root_dir)
551                while os.path.exists(os.path.join(one_up_dir, ".svn")):
552                    root_dir = os.path.dirname(root_dir)
553                    one_up_dir = os.path.dirname(one_up_dir)
554
555                prefix = os.path.commonprefix([root_dir, project_dir])
556                return fullname[len(prefix) + 1:]
557
558            # Not SVN? Try to find a git top level directory by
559            # searching up from the current path.
560            root_dir = os.path.dirname(fullname)
561            while (root_dir != os.path.dirname(root_dir)
562                   and not os.path.exists(os.path.join(root_dir, ".git"))):
563                root_dir = os.path.dirname(root_dir)
564                if os.path.exists(os.path.join(root_dir, ".git")):
565                    prefix = os.path.commonprefix([root_dir, project_dir])
566                    return fullname[len(prefix) + 1:]
567
568        # Don't know what to do; header guard warnings may be wrong...
569        return fullname
570
571    def split(self):
572        """Splits the file into the directory, basename, and extension.
573
574        For 'chrome/browser/browser.cpp', Split() would
575        return ('chrome/browser', 'browser', '.cpp')
576
577        Returns:
578          A tuple of (directory, basename, extension).
579        """
580
581        googlename = self.repository_name()
582        project, rest = os.path.split(googlename)
583        return (project,) + os.path.splitext(rest)
584
585    def base_name(self):
586        """File base name - text after the final slash, before the final period."""
587        return self.split()[1]
588
589    def extension(self):
590        """File extension - text following the final period."""
591        return self.split()[2]
592
593    def no_extension(self):
594        """File has no source file extension."""
595        return '/'.join(self.split()[0:2])
596
597    def is_source(self):
598        """File has a source file extension."""
599        return self.extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
600
601
602def _should_print_error(category, confidence):
603    """Returns true iff confidence >= verbose, and category passes filter."""
604    # There are two ways we might decide not to print an error message:
605    # the verbosity level isn't high enough, or the filters filter it out.
606    if confidence < _cpp_style_state.verbose_level:
607        return False
608
609    is_filtered = False
610    for one_filter in _filters():
611        if one_filter.startswith('-'):
612            if category.startswith(one_filter[1:]):
613                is_filtered = True
614        elif one_filter.startswith('+'):
615            if category.startswith(one_filter[1:]):
616                is_filtered = False
617        else:
618            assert False  # should have been checked for in set_filter.
619    if is_filtered:
620        return False
621
622    return True
623
624
625def error(filename, line_number, category, confidence, message):
626    """Logs the fact we've found a lint error.
627
628    We log where the error was found, and also our confidence in the error,
629    that is, how certain we are this is a legitimate style regression, and
630    not a misidentification or a use that's sometimes justified.
631
632    Args:
633      filename: The name of the file containing the error.
634      line_number: The number of the line containing the error.
635      category: A string used to describe the "category" this bug
636                falls under: "whitespace", say, or "runtime".  Categories
637                may have a hierarchy separated by slashes: "whitespace/indent".
638      confidence: A number from 1-5 representing a confidence score for
639                  the error, with 5 meaning that we are certain of the problem,
640                  and 1 meaning that it could be a legitimate construct.
641      message: The error message.
642    """
643    # There are two ways we might decide not to print an error message:
644    # the verbosity level isn't high enough, or the filters filter it out.
645    if _should_print_error(category, confidence):
646        _cpp_style_state.increment_error_count()
647        if _cpp_style_state.output_format == 'vs7':
648            sys.stderr.write('%s(%s):  %s  [%s] [%d]\n' % (
649                filename, line_number, message, category, confidence))
650        else:
651            sys.stderr.write('%s:%s:  %s  [%s] [%d]\n' % (
652                filename, line_number, message, category, confidence))
653
654
655# Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
656_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
657    r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
658# Matches strings.  Escape codes should already be removed by ESCAPES.
659_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
660# Matches characters.  Escape codes should already be removed by ESCAPES.
661_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
662# Matches multi-line C++ comments.
663# This RE is a little bit more complicated than one might expect, because we
664# have to take care of space removals tools so we can handle comments inside
665# statements better.
666# The current rule is: We only clear spaces from both sides when we're at the
667# end of the line. Otherwise, we try to remove spaces from the right side,
668# if this doesn't work we try on left side but only if there's a non-character
669# on the right.
670_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
671    r"""(\s*/\*.*\*/\s*$|
672            /\*.*\*/\s+|
673         \s+/\*.*\*/(?=\W)|
674            /\*.*\*/)""", re.VERBOSE)
675
676
677def is_cpp_string(line):
678    """Does line terminate so, that the next symbol is in string constant.
679
680    This function does not consider single-line nor multi-line comments.
681
682    Args:
683      line: is a partial line of code starting from the 0..n.
684
685    Returns:
686      True, if next character appended to 'line' is inside a
687      string constant.
688    """
689
690    line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
691    return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
692
693
694def find_next_multi_line_comment_start(lines, line_index):
695    """Find the beginning marker for a multiline comment."""
696    while line_index < len(lines):
697        if lines[line_index].strip().startswith('/*'):
698            # Only return this marker if the comment goes beyond this line
699            if lines[line_index].strip().find('*/', 2) < 0:
700                return line_index
701        line_index += 1
702    return len(lines)
703
704
705def find_next_multi_line_comment_end(lines, line_index):
706    """We are inside a comment, find the end marker."""
707    while line_index < len(lines):
708        if lines[line_index].strip().endswith('*/'):
709            return line_index
710        line_index += 1
711    return len(lines)
712
713
714def remove_multi_line_comments_from_range(lines, begin, end):
715    """Clears a range of lines for multi-line comments."""
716    # Having // dummy comments makes the lines non-empty, so we will not get
717    # unnecessary blank line warnings later in the code.
718    for i in range(begin, end):
719        lines[i] = '// dummy'
720
721
722def remove_multi_line_comments(filename, lines, error):
723    """Removes multiline (c-style) comments from lines."""
724    line_index = 0
725    while line_index < len(lines):
726        line_index_begin = find_next_multi_line_comment_start(lines, line_index)
727        if line_index_begin >= len(lines):
728            return
729        line_index_end = find_next_multi_line_comment_end(lines, line_index_begin)
730        if line_index_end >= len(lines):
731            error(filename, line_index_begin + 1, 'readability/multiline_comment', 5,
732                  'Could not find end of multi-line comment')
733            return
734        remove_multi_line_comments_from_range(lines, line_index_begin, line_index_end + 1)
735        line_index = line_index_end + 1
736
737
738def cleanse_comments(line):
739    """Removes //-comments and single-line C-style /* */ comments.
740
741    Args:
742      line: A line of C++ source.
743
744    Returns:
745      The line with single-line comments removed.
746    """
747    comment_position = line.find('//')
748    if comment_position != -1 and not is_cpp_string(line[:comment_position]):
749        line = line[:comment_position]
750    # get rid of /* ... */
751    return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
752
753
754class CleansedLines(object):
755    """Holds 3 copies of all lines with different preprocessing applied to them.
756
757    1) elided member contains lines without strings and comments,
758    2) lines member contains lines without comments, and
759    3) raw member contains all the lines without processing.
760    All these three members are of <type 'list'>, and of the same length.
761    """
762
763    def __init__(self, lines):
764        self.elided = []
765        self.lines = []
766        self.raw_lines = lines
767        self._num_lines = len(lines)
768        for line_number in range(len(lines)):
769            self.lines.append(cleanse_comments(lines[line_number]))
770            elided = self.collapse_strings(lines[line_number])
771            self.elided.append(cleanse_comments(elided))
772
773    def num_lines(self):
774        """Returns the number of lines represented."""
775        return self._num_lines
776
777    @staticmethod
778    def collapse_strings(elided):
779        """Collapses strings and chars on a line to simple "" or '' blocks.
780
781        We nix strings first so we're not fooled by text like '"http://"'
782
783        Args:
784          elided: The line being processed.
785
786        Returns:
787          The line with collapsed strings.
788        """
789        if not _RE_PATTERN_INCLUDE.match(elided):
790            # Remove escaped characters first to make quote/single quote collapsing
791            # basic.  Things that look like escaped characters shouldn't occur
792            # outside of strings and chars.
793            elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
794            elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
795            elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
796        return elided
797
798
799def close_expression(clean_lines, line_number, pos):
800    """If input points to ( or { or [, finds the position that closes it.
801
802    If lines[line_number][pos] points to a '(' or '{' or '[', finds the the
803    line_number/pos that correspond to the closing of the expression.
804
805    Args:
806      clean_lines: A CleansedLines instance containing the file.
807      line_number: The number of the line to check.
808      pos: A position on the line.
809
810    Returns:
811      A tuple (line, line_number, pos) pointer *past* the closing brace, or
812      (line, len(lines), -1) if we never find a close.  Note we ignore
813      strings and comments when matching; and the line we return is the
814      'cleansed' line at line_number.
815    """
816
817    line = clean_lines.elided[line_number]
818    start_character = line[pos]
819    if start_character not in '({[':
820        return (line, clean_lines.num_lines(), -1)
821    if start_character == '(':
822        end_character = ')'
823    if start_character == '[':
824        end_character = ']'
825    if start_character == '{':
826        end_character = '}'
827
828    num_open = line.count(start_character) - line.count(end_character)
829    while line_number < clean_lines.num_lines() and num_open > 0:
830        line_number += 1
831        line = clean_lines.elided[line_number]
832        num_open += line.count(start_character) - line.count(end_character)
833    # OK, now find the end_character that actually got us back to even
834    endpos = len(line)
835    while num_open >= 0:
836        endpos = line.rfind(')', 0, endpos)
837        num_open -= 1                 # chopped off another )
838    return (line, line_number, endpos + 1)
839
840
841def check_for_copyright(filename, lines, error):
842    """Logs an error if no Copyright message appears at the top of the file."""
843
844    # We'll say it should occur by line 10. Don't forget there's a
845    # dummy line at the front.
846    for line in xrange(1, min(len(lines), 11)):
847        if re.search(r'Copyright', lines[line], re.I):
848            break
849    else:                       # means no copyright line was found
850        error(filename, 0, 'legal/copyright', 5,
851              'No copyright message found.  '
852              'You should have a line: "Copyright [year] <Copyright Owner>"')
853
854
855def get_header_guard_cpp_variable(filename):
856    """Returns the CPP variable that should be used as a header guard.
857
858    Args:
859      filename: The name of a C++ header file.
860
861    Returns:
862      The CPP variable that should be used as a header guard in the
863      named file.
864
865    """
866
867    fileinfo = FileInfo(filename)
868    return re.sub(r'[-./\s]', '_', fileinfo.repository_name()).upper() + '_'
869
870
871def check_for_header_guard(filename, lines, error):
872    """Checks that the file contains a header guard.
873
874    Logs an error if no #ifndef header guard is present.  For other
875    headers, checks that the full pathname is used.
876
877    Args:
878      filename: The name of the C++ header file.
879      lines: An array of strings, each representing a line of the file.
880      error: The function to call with any errors found.
881    """
882
883    cppvar = get_header_guard_cpp_variable(filename)
884
885    ifndef = None
886    ifndef_line_number = 0
887    define = None
888    endif = None
889    endif_line_number = 0
890    for line_number, line in enumerate(lines):
891        line_split = line.split()
892        if len(line_split) >= 2:
893            # find the first occurrence of #ifndef and #define, save arg
894            if not ifndef and line_split[0] == '#ifndef':
895                # set ifndef to the header guard presented on the #ifndef line.
896                ifndef = line_split[1]
897                ifndef_line_number = line_number
898            if not define and line_split[0] == '#define':
899                define = line_split[1]
900        # find the last occurrence of #endif, save entire line
901        if line.startswith('#endif'):
902            endif = line
903            endif_line_number = line_number
904
905    if not ifndef or not define or ifndef != define:
906        error(filename, 0, 'build/header_guard', 5,
907              'No #ifndef header guard found, suggested CPP variable is: %s' %
908              cppvar)
909        return
910
911    # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
912    # for backward compatibility.
913    if ifndef != cppvar:
914        error_level = 0
915        if ifndef != cppvar + '_':
916            error_level = 5
917
918        error(filename, ifndef_line_number, 'build/header_guard', error_level,
919              '#ifndef header guard has wrong style, please use: %s' % cppvar)
920
921    if endif != ('#endif  // %s' % cppvar):
922        error_level = 0
923        if endif != ('#endif  // %s' % (cppvar + '_')):
924            error_level = 5
925
926        error(filename, endif_line_number, 'build/header_guard', error_level,
927              '#endif line should be "#endif  // %s"' % cppvar)
928
929
930def check_for_unicode_replacement_characters(filename, lines, error):
931    """Logs an error for each line containing Unicode replacement characters.
932
933    These indicate that either the file contained invalid UTF-8 (likely)
934    or Unicode replacement characters (which it shouldn't).  Note that
935    it's possible for this to throw off line numbering if the invalid
936    UTF-8 occurred adjacent to a newline.
937
938    Args:
939      filename: The name of the current file.
940      lines: An array of strings, each representing a line of the file.
941      error: The function to call with any errors found.
942    """
943    for line_number, line in enumerate(lines):
944        if u'\ufffd' in line:
945            error(filename, line_number, 'readability/utf8', 5,
946                  'Line contains invalid UTF-8 (or Unicode replacement character).')
947
948
949def check_for_new_line_at_eof(filename, lines, error):
950    """Logs an error if there is no newline char at the end of the file.
951
952    Args:
953      filename: The name of the current file.
954      lines: An array of strings, each representing a line of the file.
955      error: The function to call with any errors found.
956    """
957
958    # The array lines() was created by adding two newlines to the
959    # original file (go figure), then splitting on \n.
960    # To verify that the file ends in \n, we just have to make sure the
961    # last-but-two element of lines() exists and is empty.
962    if len(lines) < 3 or lines[-2]:
963        error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
964              'Could not find a newline character at the end of the file.')
965
966
967def check_for_multiline_comments_and_strings(filename, clean_lines, line_number, error):
968    """Logs an error if we see /* ... */ or "..." that extend past one line.
969
970    /* ... */ comments are legit inside macros, for one line.
971    Otherwise, we prefer // comments, so it's ok to warn about the
972    other.  Likewise, it's ok for strings to extend across multiple
973    lines, as long as a line continuation character (backslash)
974    terminates each line. Although not currently prohibited by the C++
975    style guide, it's ugly and unnecessary. We don't do well with either
976    in this lint program, so we warn about both.
977
978    Args:
979      filename: The name of the current file.
980      clean_lines: A CleansedLines instance containing the file.
981      line_number: The number of the line to check.
982      error: The function to call with any errors found.
983    """
984    line = clean_lines.elided[line_number]
985
986    # Remove all \\ (escaped backslashes) from the line. They are OK, and the
987    # second (escaped) slash may trigger later \" detection erroneously.
988    line = line.replace('\\\\', '')
989
990    if line.count('/*') > line.count('*/'):
991        error(filename, line_number, 'readability/multiline_comment', 5,
992              'Complex multi-line /*...*/-style comment found. '
993              'Lint may give bogus warnings.  '
994              'Consider replacing these with //-style comments, '
995              'with #if 0...#endif, '
996              'or with more clearly structured multi-line comments.')
997
998    if (line.count('"') - line.count('\\"')) % 2:
999        error(filename, line_number, 'readability/multiline_string', 5,
1000              'Multi-line string ("...") found.  This lint script doesn\'t '
1001              'do well with such strings, and may give bogus warnings.  They\'re '
1002              'ugly and unnecessary, and you should use concatenation instead".')
1003
1004
1005_THREADING_LIST = (
1006    ('asctime(', 'asctime_r('),
1007    ('ctime(', 'ctime_r('),
1008    ('getgrgid(', 'getgrgid_r('),
1009    ('getgrnam(', 'getgrnam_r('),
1010    ('getlogin(', 'getlogin_r('),
1011    ('getpwnam(', 'getpwnam_r('),
1012    ('getpwuid(', 'getpwuid_r('),
1013    ('gmtime(', 'gmtime_r('),
1014    ('localtime(', 'localtime_r('),
1015    ('rand(', 'rand_r('),
1016    ('readdir(', 'readdir_r('),
1017    ('strtok(', 'strtok_r('),
1018    ('ttyname(', 'ttyname_r('),
1019    )
1020
1021
1022def check_posix_threading(filename, clean_lines, line_number, error):
1023    """Checks for calls to thread-unsafe functions.
1024
1025    Much code has been originally written without consideration of
1026    multi-threading. Also, engineers are relying on their old experience;
1027    they have learned posix before threading extensions were added. These
1028    tests guide the engineers to use thread-safe functions (when using
1029    posix directly).
1030
1031    Args:
1032      filename: The name of the current file.
1033      clean_lines: A CleansedLines instance containing the file.
1034      line_number: The number of the line to check.
1035      error: The function to call with any errors found.
1036    """
1037    line = clean_lines.elided[line_number]
1038    for single_thread_function, multithread_safe_function in _THREADING_LIST:
1039        index = line.find(single_thread_function)
1040        # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
1041        if index >= 0 and (index == 0 or (not line[index - 1].isalnum()
1042                                          and line[index - 1] not in ('_', '.', '>'))):
1043            error(filename, line_number, 'runtime/threadsafe_fn', 2,
1044                  'Consider using ' + multithread_safe_function +
1045                  '...) instead of ' + single_thread_function +
1046                  '...) for improved thread safety.')
1047
1048
1049# Matches invalid increment: *count++, which moves pointer instead of
1050# incrementing a value.
1051_RE_PATTERN_INVALID_INCREMENT = re.compile(
1052    r'^\s*\*\w+(\+\+|--);')
1053
1054
1055def check_invalid_increment(filename, clean_lines, line_number, error):
1056    """Checks for invalid increment *count++.
1057
1058    For example following function:
1059    void increment_counter(int* count) {
1060        *count++;
1061    }
1062    is invalid, because it effectively does count++, moving pointer, and should
1063    be replaced with ++*count, (*count)++ or *count += 1.
1064
1065    Args:
1066      filename: The name of the current file.
1067      clean_lines: A CleansedLines instance containing the file.
1068      line_number: The number of the line to check.
1069      error: The function to call with any errors found.
1070    """
1071    line = clean_lines.elided[line_number]
1072    if _RE_PATTERN_INVALID_INCREMENT.match(line):
1073        error(filename, line_number, 'runtime/invalid_increment', 5,
1074              'Changing pointer instead of value (or unused value of operator*).')
1075
1076
1077class _ClassInfo(object):
1078    """Stores information about a class."""
1079
1080    def __init__(self, name, line_number):
1081        self.name = name
1082        self.line_number = line_number
1083        self.seen_open_brace = False
1084        self.is_derived = False
1085        self.virtual_method_line_number = None
1086        self.has_virtual_destructor = False
1087        self.brace_depth = 0
1088
1089
1090class _ClassState(object):
1091    """Holds the current state of the parse relating to class declarations.
1092
1093    It maintains a stack of _ClassInfos representing the parser's guess
1094    as to the current nesting of class declarations. The innermost class
1095    is at the top (back) of the stack. Typically, the stack will either
1096    be empty or have exactly one entry.
1097    """
1098
1099    def __init__(self):
1100        self.classinfo_stack = []
1101
1102    def check_finished(self, filename, error):
1103        """Checks that all classes have been completely parsed.
1104
1105        Call this when all lines in a file have been processed.
1106        Args:
1107          filename: The name of the current file.
1108          error: The function to call with any errors found.
1109        """
1110        if self.classinfo_stack:
1111            # Note: This test can result in false positives if #ifdef constructs
1112            # get in the way of brace matching. See the testBuildClass test in
1113            # cpp_style_unittest.py for an example of this.
1114            error(filename, self.classinfo_stack[0].line_number, 'build/class', 5,
1115                  'Failed to find complete declaration of class %s' %
1116                  self.classinfo_stack[0].name)
1117
1118
1119def check_for_non_standard_constructs(filename, clean_lines, line_number,
1120                                      class_state, error):
1121    """Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
1122
1123    Complain about several constructs which gcc-2 accepts, but which are
1124    not standard C++.  Warning about these in lint is one way to ease the
1125    transition to new compilers.
1126    - put storage class first (e.g. "static const" instead of "const static").
1127    - "%lld" instead of %qd" in printf-type functions.
1128    - "%1$d" is non-standard in printf-type functions.
1129    - "\%" is an undefined character escape sequence.
1130    - text after #endif is not allowed.
1131    - invalid inner-style forward declaration.
1132    - >? and <? operators, and their >?= and <?= cousins.
1133    - classes with virtual methods need virtual destructors (compiler warning
1134        available, but not turned on yet.)
1135
1136    Additionally, check for constructor/destructor style violations as it
1137    is very convenient to do so while checking for gcc-2 compliance.
1138
1139    Args:
1140      filename: The name of the current file.
1141      clean_lines: A CleansedLines instance containing the file.
1142      line_number: The number of the line to check.
1143      class_state: A _ClassState instance which maintains information about
1144                   the current stack of nested class declarations being parsed.
1145      error: A callable to which errors are reported, which takes 4 arguments:
1146             filename, line number, error level, and message
1147    """
1148
1149    # Remove comments from the line, but leave in strings for now.
1150    line = clean_lines.lines[line_number]
1151
1152    if search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
1153        error(filename, line_number, 'runtime/printf_format', 3,
1154              '%q in format strings is deprecated.  Use %ll instead.')
1155
1156    if search(r'printf\s*\(.*".*%\d+\$', line):
1157        error(filename, line_number, 'runtime/printf_format', 2,
1158              '%N$ formats are unconventional.  Try rewriting to avoid them.')
1159
1160    # Remove escaped backslashes before looking for undefined escapes.
1161    line = line.replace('\\\\', '')
1162
1163    if search(r'("|\').*\\(%|\[|\(|{)', line):
1164        error(filename, line_number, 'build/printf_format', 3,
1165              '%, [, (, and { are undefined character escapes.  Unescape them.')
1166
1167    # For the rest, work with both comments and strings removed.
1168    line = clean_lines.elided[line_number]
1169
1170    if search(r'\b(const|volatile|void|char|short|int|long'
1171              r'|float|double|signed|unsigned'
1172              r'|schar|u?int8|u?int16|u?int32|u?int64)'
1173              r'\s+(auto|register|static|extern|typedef)\b',
1174              line):
1175        error(filename, line_number, 'build/storage_class', 5,
1176              'Storage class (static, extern, typedef, etc) should be first.')
1177
1178    if match(r'\s*#\s*endif\s*[^/\s]+', line):
1179        error(filename, line_number, 'build/endif_comment', 5,
1180              'Uncommented text after #endif is non-standard.  Use a comment.')
1181
1182    if match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
1183        error(filename, line_number, 'build/forward_decl', 5,
1184              'Inner-style forward declarations are invalid.  Remove this line.')
1185
1186    if search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?', line):
1187        error(filename, line_number, 'build/deprecated', 3,
1188              '>? and <? (max and min) operators are non-standard and deprecated.')
1189
1190    # Track class entry and exit, and attempt to find cases within the
1191    # class declaration that don't meet the C++ style
1192    # guidelines. Tracking is very dependent on the code matching Google
1193    # style guidelines, but it seems to perform well enough in testing
1194    # to be a worthwhile addition to the checks.
1195    classinfo_stack = class_state.classinfo_stack
1196    # Look for a class declaration
1197    class_decl_match = match(
1198        r'\s*(template\s*<[\w\s<>,:]*>\s*)?(class|struct)\s+(\w+(::\w+)*)', line)
1199    if class_decl_match:
1200        classinfo_stack.append(_ClassInfo(class_decl_match.group(3), line_number))
1201
1202    # Everything else in this function uses the top of the stack if it's
1203    # not empty.
1204    if not classinfo_stack:
1205        return
1206
1207    classinfo = classinfo_stack[-1]
1208
1209    # If the opening brace hasn't been seen look for it and also
1210    # parent class declarations.
1211    if not classinfo.seen_open_brace:
1212        # If the line has a ';' in it, assume it's a forward declaration or
1213        # a single-line class declaration, which we won't process.
1214        if line.find(';') != -1:
1215            classinfo_stack.pop()
1216            return
1217        classinfo.seen_open_brace = (line.find('{') != -1)
1218        # Look for a bare ':'
1219        if search('(^|[^:]):($|[^:])', line):
1220            classinfo.is_derived = True
1221        if not classinfo.seen_open_brace:
1222            return  # Everything else in this function is for after open brace
1223
1224    # The class may have been declared with namespace or classname qualifiers.
1225    # The constructor and destructor will not have those qualifiers.
1226    base_classname = classinfo.name.split('::')[-1]
1227
1228    # Look for single-argument constructors that aren't marked explicit.
1229    # Technically a valid construct, but against style.
1230    args = match(r'(?<!explicit)\s+%s\s*\(([^,()]+)\)'
1231                 % re.escape(base_classname),
1232                 line)
1233    if (args
1234        and args.group(1) != 'void'
1235        and not match(r'(const\s+)?%s\s*&' % re.escape(base_classname),
1236                      args.group(1).strip())):
1237        error(filename, line_number, 'runtime/explicit', 5,
1238              'Single-argument constructors should be marked explicit.')
1239
1240    # Look for methods declared virtual.
1241    if search(r'\bvirtual\b', line):
1242        classinfo.virtual_method_line_number = line_number
1243        # Only look for a destructor declaration on the same line. It would
1244        # be extremely unlikely for the destructor declaration to occupy
1245        # more than one line.
1246        if search(r'~%s\s*\(' % base_classname, line):
1247            classinfo.has_virtual_destructor = True
1248
1249    # Look for class end.
1250    brace_depth = classinfo.brace_depth
1251    brace_depth = brace_depth + line.count('{') - line.count('}')
1252    if brace_depth <= 0:
1253        classinfo = classinfo_stack.pop()
1254        # Try to detect missing virtual destructor declarations.
1255        # For now, only warn if a non-derived class with virtual methods lacks
1256        # a virtual destructor. This is to make it less likely that people will
1257        # declare derived virtual destructors without declaring the base
1258        # destructor virtual.
1259        if ((classinfo.virtual_method_line_number is not None)
1260            and (not classinfo.has_virtual_destructor)
1261            and (not classinfo.is_derived)):  # Only warn for base classes
1262            error(filename, classinfo.line_number, 'runtime/virtual', 4,
1263                  'The class %s probably needs a virtual destructor due to '
1264                  'having virtual method(s), one declared at line %d.'
1265                  % (classinfo.name, classinfo.virtual_method_line_number))
1266    else:
1267        classinfo.brace_depth = brace_depth
1268
1269
1270def check_spacing_for_function_call(filename, line, line_number, error):
1271    """Checks for the correctness of various spacing around function calls.
1272
1273    Args:
1274      filename: The name of the current file.
1275      line: The text of the line to check.
1276      line_number: The number of the line to check.
1277      error: The function to call with any errors found.
1278    """
1279
1280    # Since function calls often occur inside if/for/foreach/while/switch
1281    # expressions - which have their own, more liberal conventions - we
1282    # first see if we should be looking inside such an expression for a
1283    # function call, to which we can apply more strict standards.
1284    function_call = line    # if there's no control flow construct, look at whole line
1285    for pattern in (r'\bif\s*\((.*)\)\s*{',
1286                    r'\bfor\s*\((.*)\)\s*{',
1287                    r'\bforeach\s*\((.*)\)\s*{',
1288                    r'\bwhile\s*\((.*)\)\s*[{;]',
1289                    r'\bswitch\s*\((.*)\)\s*{'):
1290        matched = search(pattern, line)
1291        if matched:
1292            function_call = matched.group(1)    # look inside the parens for function calls
1293            break
1294
1295    # Except in if/for/foreach/while/switch, there should never be space
1296    # immediately inside parens (eg "f( 3, 4 )").  We make an exception
1297    # for nested parens ( (a+b) + c ).  Likewise, there should never be
1298    # a space before a ( when it's a function argument.  I assume it's a
1299    # function argument when the char before the whitespace is legal in
1300    # a function name (alnum + _) and we're not starting a macro. Also ignore
1301    # pointers and references to arrays and functions coz they're too tricky:
1302    # we use a very simple way to recognize these:
1303    # " (something)(maybe-something)" or
1304    # " (something)(maybe-something," or
1305    # " (something)[something]"
1306    # Note that we assume the contents of [] to be short enough that
1307    # they'll never need to wrap.
1308    if (  # Ignore control structures.
1309        not search(r'\b(if|for|foreach|while|switch|return|new|delete)\b', function_call)
1310        # Ignore pointers/references to functions.
1311        and not search(r' \([^)]+\)\([^)]*(\)|,$)', function_call)
1312        # Ignore pointers/references to arrays.
1313        and not search(r' \([^)]+\)\[[^\]]+\]', function_call)):
1314        if search(r'\w\s*\([ \t](?!\s*\\$)', function_call):      # a ( used for a fn call
1315            error(filename, line_number, 'whitespace/parens', 4,
1316                  'Extra space after ( in function call')
1317        elif search(r'\([ \t]+(?!(\s*\\)|\()', function_call):
1318            error(filename, line_number, 'whitespace/parens', 2,
1319                  'Extra space after (')
1320        if (search(r'\w\s+\(', function_call)
1321            and not search(r'#\s*define|typedef', function_call)):
1322            error(filename, line_number, 'whitespace/parens', 4,
1323                  'Extra space before ( in function call')
1324        # If the ) is followed only by a newline or a { + newline, assume it's
1325        # part of a control statement (if/while/etc), and don't complain
1326        if search(r'[^)\s]\s+\)(?!\s*$|{\s*$)', function_call):
1327            error(filename, line_number, 'whitespace/parens', 2,
1328                  'Extra space before )')
1329
1330
1331def is_blank_line(line):
1332    """Returns true if the given line is blank.
1333
1334    We consider a line to be blank if the line is empty or consists of
1335    only white spaces.
1336
1337    Args:
1338      line: A line of a string.
1339
1340    Returns:
1341      True, if the given line is blank.
1342    """
1343    return not line or line.isspace()
1344
1345
1346def check_for_function_lengths(filename, clean_lines, line_number,
1347                               function_state, error):
1348    """Reports for long function bodies.
1349
1350    For an overview why this is done, see:
1351    http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
1352
1353    Uses a simplistic algorithm assuming other style guidelines
1354    (especially spacing) are followed.
1355    Only checks unindented functions, so class members are unchecked.
1356    Trivial bodies are unchecked, so constructors with huge initializer lists
1357    may be missed.
1358    Blank/comment lines are not counted so as to avoid encouraging the removal
1359    of vertical space and commments just to get through a lint check.
1360    NOLINT *on the last line of a function* disables this check.
1361
1362    Args:
1363      filename: The name of the current file.
1364      clean_lines: A CleansedLines instance containing the file.
1365      line_number: The number of the line to check.
1366      function_state: Current function name and lines in body so far.
1367      error: The function to call with any errors found.
1368    """
1369    lines = clean_lines.lines
1370    line = lines[line_number]
1371    raw = clean_lines.raw_lines
1372    raw_line = raw[line_number]
1373    joined_line = ''
1374
1375    starting_func = False
1376    regexp = r'(\w(\w|::|\*|\&|\s)*)\('  # decls * & space::name( ...
1377    match_result = match(regexp, line)
1378    if match_result:
1379        # If the name is all caps and underscores, figure it's a macro and
1380        # ignore it, unless it's TEST or TEST_F.
1381        function_name = match_result.group(1).split()[-1]
1382        if function_name == 'TEST' or function_name == 'TEST_F' or (not match(r'[A-Z_]+$', function_name)):
1383            starting_func = True
1384
1385    if starting_func:
1386        body_found = False
1387        for start_line_number in xrange(line_number, clean_lines.num_lines()):
1388            start_line = lines[start_line_number]
1389            joined_line += ' ' + start_line.lstrip()
1390            if search(r'(;|})', start_line):  # Declarations and trivial functions
1391                body_found = True
1392                break                              # ... ignore
1393            if search(r'{', start_line):
1394                body_found = True
1395                function = search(r'((\w|:)*)\(', line).group(1)
1396                if match(r'TEST', function):    # Handle TEST... macros
1397                    parameter_regexp = search(r'(\(.*\))', joined_line)
1398                    if parameter_regexp:             # Ignore bad syntax
1399                        function += parameter_regexp.group(1)
1400                else:
1401                    function += '()'
1402                function_state.begin(function)
1403                break
1404        if not body_found:
1405            # No body for the function (or evidence of a non-function) was found.
1406            error(filename, line_number, 'readability/fn_size', 5,
1407                  'Lint failed to find start of function body.')
1408    elif match(r'^\}\s*$', line):  # function end
1409        if not search(r'\bNOLINT\b', raw_line):
1410            function_state.check(error, filename, line_number)
1411        function_state.end()
1412    elif not match(r'^\s*$', line):
1413        function_state.count()  # Count non-blank/non-comment lines.
1414
1415
1416def check_spacing(filename, clean_lines, line_number, error):
1417    """Checks for the correctness of various spacing issues in the code.
1418
1419    Things we check for: spaces around operators, spaces after
1420    if/for/while/switch, no spaces around parens in function calls, two
1421    spaces between code and comment, don't start a block with a blank
1422    line, don't end a function with a blank line, don't have too many
1423    blank lines in a row.
1424
1425    Args:
1426      filename: The name of the current file.
1427      clean_lines: A CleansedLines instance containing the file.
1428      line_number: The number of the line to check.
1429      error: The function to call with any errors found.
1430    """
1431
1432    raw = clean_lines.raw_lines
1433    line = raw[line_number]
1434
1435    # Before nixing comments, check if the line is blank for no good
1436    # reason.  This includes the first line after a block is opened, and
1437    # blank lines at the end of a function (ie, right before a line like '}').
1438    if is_blank_line(line):
1439        elided = clean_lines.elided
1440        previous_line = elided[line_number - 1]
1441        previous_brace = previous_line.rfind('{')
1442        # FIXME: Don't complain if line before blank line, and line after,
1443        #        both start with alnums and are indented the same amount.
1444        #        This ignores whitespace at the start of a namespace block
1445        #        because those are not usually indented.
1446        if (previous_brace != -1 and previous_line[previous_brace:].find('}') == -1
1447            and previous_line[:previous_brace].find('namespace') == -1):
1448            # OK, we have a blank line at the start of a code block.  Before we
1449            # complain, we check if it is an exception to the rule: The previous
1450            # non-empty line has the parameters of a function header that are indented
1451            # 4 spaces (because they did not fit in a 80 column line when placed on
1452            # the same line as the function name).  We also check for the case where
1453            # the previous line is indented 6 spaces, which may happen when the
1454            # initializers of a constructor do not fit into a 80 column line.
1455            exception = False
1456            if match(r' {6}\w', previous_line):  # Initializer list?
1457                # We are looking for the opening column of initializer list, which
1458                # should be indented 4 spaces to cause 6 space indentation afterwards.
1459                search_position = line_number - 2
1460                while (search_position >= 0
1461                       and match(r' {6}\w', elided[search_position])):
1462                    search_position -= 1
1463                exception = (search_position >= 0
1464                             and elided[search_position][:5] == '    :')
1465            else:
1466                # Search for the function arguments or an initializer list.  We use a
1467                # simple heuristic here: If the line is indented 4 spaces; and we have a
1468                # closing paren, without the opening paren, followed by an opening brace
1469                # or colon (for initializer lists) we assume that it is the last line of
1470                # a function header.  If we have a colon indented 4 spaces, it is an
1471                # initializer list.
1472                exception = (match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
1473                                   previous_line)
1474                             or match(r' {4}:', previous_line))
1475
1476            if not exception:
1477                error(filename, line_number, 'whitespace/blank_line', 2,
1478                      'Blank line at the start of a code block.  Is this needed?')
1479        # This doesn't ignore whitespace at the end of a namespace block
1480        # because that is too hard without pairing open/close braces;
1481        # however, a special exception is made for namespace closing
1482        # brackets which have a comment containing "namespace".
1483        #
1484        # Also, ignore blank lines at the end of a block in a long if-else
1485        # chain, like this:
1486        #   if (condition1) {
1487        #     // Something followed by a blank line
1488        #
1489        #   } else if (condition2) {
1490        #     // Something else
1491        #   }
1492        if line_number + 1 < clean_lines.num_lines():
1493            next_line = raw[line_number + 1]
1494            if (next_line
1495                and match(r'\s*}', next_line)
1496                and next_line.find('namespace') == -1
1497                and next_line.find('} else ') == -1):
1498                error(filename, line_number, 'whitespace/blank_line', 3,
1499                      'Blank line at the end of a code block.  Is this needed?')
1500
1501    # Next, we complain if there's a comment too near the text
1502    comment_position = line.find('//')
1503    if comment_position != -1:
1504        # Check if the // may be in quotes.  If so, ignore it
1505        # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
1506        if (line.count('"', 0, comment_position) - line.count('\\"', 0, comment_position)) % 2 == 0:   # not in quotes
1507            # Allow one space for new scopes, two spaces otherwise:
1508            if (not match(r'^\s*{ //', line)
1509                and ((comment_position >= 1
1510                      and line[comment_position-1] not in string.whitespace)
1511                     or (comment_position >= 2
1512                         and line[comment_position-2] not in string.whitespace))):
1513                error(filename, line_number, 'whitespace/comments', 2,
1514                      'At least two spaces is best between code and comments')
1515            # There should always be a space between the // and the comment
1516            commentend = comment_position + 2
1517            if commentend < len(line) and not line[commentend] == ' ':
1518                # but some lines are exceptions -- e.g. if they're big
1519                # comment delimiters like:
1520                # //----------------------------------------------------------
1521                # or they begin with multiple slashes followed by a space:
1522                # //////// Header comment
1523                matched = (search(r'[=/-]{4,}\s*$', line[commentend:])
1524                           or search(r'^/+ ', line[commentend:]))
1525                if not matched:
1526                    error(filename, line_number, 'whitespace/comments', 4,
1527                          'Should have a space between // and comment')
1528
1529    line = clean_lines.elided[line_number]  # get rid of comments and strings
1530
1531    # Don't try to do spacing checks for operator methods
1532    line = re.sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line)
1533
1534    # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
1535    # Otherwise not.  Note we only check for non-spaces on *both* sides;
1536    # sometimes people put non-spaces on one side when aligning ='s among
1537    # many lines (not that this is behavior that I approve of...)
1538    if search(r'[\w.]=[\w.]', line) and not search(r'\b(if|while) ', line):
1539        error(filename, line_number, 'whitespace/operators', 4,
1540              'Missing spaces around =')
1541
1542    # FIXME: It's not ok to have spaces around binary operators like + - * / .
1543
1544    # You should always have whitespace around binary operators.
1545    # Alas, we can't test < or > because they're legitimately used sans spaces
1546    # (a->b, vector<int> a).  The only time we can tell is a < with no >, and
1547    # only if it's not template params list spilling into the next line.
1548    matched = search(r'[^<>=!\s](==|!=|<=|>=)[^<>=!\s]', line)
1549    if not matched:
1550        # Note that while it seems that the '<[^<]*' term in the following
1551        # regexp could be simplified to '<.*', which would indeed match
1552        # the same class of strings, the [^<] means that searching for the
1553        # regexp takes linear rather than quadratic time.
1554        if not search(r'<[^<]*,\s*$', line):  # template params spill
1555            matched = search(r'[^<>=!\s](<)[^<>=!\s]([^>]|->)*$', line)
1556    if matched:
1557        error(filename, line_number, 'whitespace/operators', 3,
1558              'Missing spaces around %s' % matched.group(1))
1559    # We allow no-spaces around << and >> when used like this: 10<<20, but
1560    # not otherwise (particularly, not when used as streams)
1561    matched = search(r'[^0-9\s](<<|>>)[^0-9\s]', line)
1562    if matched:
1563        error(filename, line_number, 'whitespace/operators', 3,
1564              'Missing spaces around %s' % matched.group(1))
1565
1566    # There shouldn't be space around unary operators
1567    matched = search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
1568    if matched:
1569        error(filename, line_number, 'whitespace/operators', 4,
1570              'Extra space for operator %s' % matched.group(1))
1571
1572    # A pet peeve of mine: no spaces after an if, while, switch, or for
1573    matched = search(r' (if\(|for\(|foreach\(|while\(|switch\()', line)
1574    if matched:
1575        error(filename, line_number, 'whitespace/parens', 5,
1576              'Missing space before ( in %s' % matched.group(1))
1577
1578    # For if/for/foreach/while/switch, the left and right parens should be
1579    # consistent about how many spaces are inside the parens, and
1580    # there should either be zero or one spaces inside the parens.
1581    # We don't want: "if ( foo)" or "if ( foo   )".
1582    # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
1583    matched = search(r'\b(if|for|foreach|while|switch)\s*\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
1584                     line)
1585    if matched:
1586        if len(matched.group(2)) != len(matched.group(4)):
1587            if not (matched.group(3) == ';'
1588                    and len(matched.group(2)) == 1 + len(matched.group(4))
1589                    or not matched.group(2) and search(r'\bfor\s*\(.*; \)', line)):
1590                error(filename, line_number, 'whitespace/parens', 5,
1591                      'Mismatching spaces inside () in %s' % matched.group(1))
1592        if not len(matched.group(2)) in [0, 1]:
1593            error(filename, line_number, 'whitespace/parens', 5,
1594                  'Should have zero or one spaces inside ( and ) in %s' %
1595                  matched.group(1))
1596
1597    # You should always have a space after a comma (either as fn arg or operator)
1598    if search(r',[^\s]', line):
1599        error(filename, line_number, 'whitespace/comma', 3,
1600              'Missing space after ,')
1601
1602    if filename.endswith('.cpp'):
1603        # C++ should have the & or * beside the type not the variable name.
1604        matched = match(r'\s*\w+(?<!\breturn)\s+(?P<pointer_operator>\*|\&)\w+', line)
1605        if matched:
1606            error(filename, line_number, 'whitespace/declaration', 3,
1607                  'Declaration has space between type name and %s in %s' % (matched.group('pointer_operator'), matched.group(0).strip()))
1608
1609    elif filename.endswith('.c'):
1610        # C Pointer declaration should have the * beside the variable not the type name.
1611        matched = search(r'^\s*\w+\*\s+\w+', line)
1612        if matched:
1613            error(filename, line_number, 'whitespace/declaration', 3,
1614                  'Declaration has space between * and variable name in %s' % matched.group(0).strip())
1615
1616    # Next we will look for issues with function calls.
1617    check_spacing_for_function_call(filename, line, line_number, error)
1618
1619    # Except after an opening paren, you should have spaces before your braces.
1620    # And since you should never have braces at the beginning of a line, this is
1621    # an easy test.
1622    if search(r'[^ ({]{', line):
1623        error(filename, line_number, 'whitespace/braces', 5,
1624              'Missing space before {')
1625
1626    # Make sure '} else {' has spaces.
1627    if search(r'}else', line):
1628        error(filename, line_number, 'whitespace/braces', 5,
1629              'Missing space before else')
1630
1631    # You shouldn't have spaces before your brackets, except maybe after
1632    # 'delete []' or 'new char * []'.
1633    if search(r'\w\s+\[', line) and not search(r'delete\s+\[', line):
1634        error(filename, line_number, 'whitespace/braces', 5,
1635              'Extra space before [')
1636
1637    # You shouldn't have a space before a semicolon at the end of the line.
1638    # There's a special case for "for" since the style guide allows space before
1639    # the semicolon there.
1640    if search(r':\s*;\s*$', line):
1641        error(filename, line_number, 'whitespace/semicolon', 5,
1642              'Semicolon defining empty statement. Use { } instead.')
1643    elif search(r'^\s*;\s*$', line):
1644        error(filename, line_number, 'whitespace/semicolon', 5,
1645              'Line contains only semicolon. If this should be an empty statement, '
1646              'use { } instead.')
1647    elif (search(r'\s+;\s*$', line) and not search(r'\bfor\b', line)):
1648        error(filename, line_number, 'whitespace/semicolon', 5,
1649              'Extra space before last semicolon. If this should be an empty '
1650              'statement, use { } instead.')
1651    elif (search(r'\b(for|while)\s*\(.*\)\s*;\s*$', line)
1652          and line.count('(') == line.count(')')
1653          # Allow do {} while();
1654          and not search(r'}\s*while', line)):
1655        error(filename, line_number, 'whitespace/semicolon', 5,
1656              'Semicolon defining empty statement for this loop. Use { } instead.')
1657
1658
1659def get_previous_non_blank_line(clean_lines, line_number):
1660    """Return the most recent non-blank line and its line number.
1661
1662    Args:
1663      clean_lines: A CleansedLines instance containing the file contents.
1664      line_number: The number of the line to check.
1665
1666    Returns:
1667      A tuple with two elements.  The first element is the contents of the last
1668      non-blank line before the current line, or the empty string if this is the
1669      first non-blank line.  The second is the line number of that line, or -1
1670      if this is the first non-blank line.
1671    """
1672
1673    previous_line_number = line_number - 1
1674    while previous_line_number >= 0:
1675        previous_line = clean_lines.elided[previous_line_number]
1676        if not is_blank_line(previous_line):     # if not a blank line...
1677            return (previous_line, previous_line_number)
1678        previous_line_number -= 1
1679    return ('', -1)
1680
1681
1682def check_namespace_indentation(filename, clean_lines, line_number, file_extension, error):
1683    """Looks for indentation errors inside of namespaces.
1684
1685    Args:
1686      filename: The name of the current file.
1687      clean_lines: A CleansedLines instance containing the file.
1688      line_number: The number of the line to check.
1689      file_extension: The extension (dot not included) of the file.
1690      error: The function to call with any errors found.
1691    """
1692
1693    line = clean_lines.elided[line_number] # Get rid of comments and strings.
1694
1695    namespace_match = match(r'(?P<namespace_indentation>\s*)namespace\s+\S+\s*{\s*$', line)
1696    if not namespace_match:
1697        return
1698
1699    namespace_indentation = namespace_match.group('namespace_indentation')
1700
1701    is_header_file = file_extension == 'h'
1702    is_implementation_file = not is_header_file
1703    line_offset = 0
1704
1705    if is_header_file:
1706        inner_indentation = namespace_indentation + ' ' * 4
1707
1708        for current_line in clean_lines.raw_lines[line_number + 1:]:
1709            line_offset += 1
1710
1711            # Skip not only empty lines but also those with preprocessor directives.
1712            # Goto labels don't occur in header files, so no need to check for those.
1713            if current_line.strip() == '' or current_line.startswith('#'):
1714                continue
1715
1716            if not current_line.startswith(inner_indentation):
1717                # If something unindented was discovered, make sure it's a closing brace.
1718                if not current_line.startswith(namespace_indentation + '}'):
1719                    error(filename, line_number + line_offset, 'whitespace/indent', 4,
1720                          'In a header, code inside a namespace should be indented.')
1721                break
1722
1723    if is_implementation_file:
1724        for current_line in clean_lines.raw_lines[line_number + 1:]:
1725            line_offset += 1
1726
1727            # Skip not only empty lines but also those with (goto) labels.
1728            # The goto label regexp accepts spaces or the beginning of a
1729            # comment (if anything) after the initial colon.
1730            if current_line.strip() == '' or match(r'\w+\s*:([\s\/].*)?$', current_line):
1731                continue
1732
1733            remaining_line = current_line[len(namespace_indentation):]
1734            if not match(r'\S', remaining_line):
1735                error(filename, line_number + line_offset, 'whitespace/indent', 4,
1736                      'In an implementation file, code inside a namespace should not be indented.')
1737
1738            # Just check the first non-empty line in any case, because
1739            # otherwise we would need to count opened and closed braces,
1740            # which is obviously a lot more complicated.
1741            break
1742
1743
1744def check_switch_indentation(filename, clean_lines, line_number, error):
1745    """Looks for indentation errors inside of switch statements.
1746
1747    Args:
1748      filename: The name of the current file.
1749      clean_lines: A CleansedLines instance containing the file.
1750      line_number: The number of the line to check.
1751      error: The function to call with any errors found.
1752    """
1753
1754    line = clean_lines.elided[line_number] # Get rid of comments and strings.
1755
1756    switch_match = match(r'(?P<switch_indentation>\s*)switch\s*\(.+\)\s*{\s*$', line)
1757    if not switch_match:
1758        return
1759
1760    switch_indentation = switch_match.group('switch_indentation')
1761    inner_indentation = switch_indentation + ' ' * 4
1762    line_offset = 0
1763    encountered_nested_switch = False
1764
1765    for current_line in clean_lines.elided[line_number + 1:]:
1766        line_offset += 1
1767
1768        # Skip not only empty lines but also those with preprocessor directives.
1769        if current_line.strip() == '' or current_line.startswith('#'):
1770            continue
1771
1772        if match(r'\s*switch\s*\(.+\)\s*{\s*$', current_line):
1773            # Complexity alarm - another switch statement nested inside the one
1774            # that we're currently testing. We'll need to track the extent of
1775            # that inner switch if the upcoming label tests are still supposed
1776            # to work correctly. Let's not do that; instead, we'll finish
1777            # checking this line, and then leave it like that. Assuming the
1778            # indentation is done consistently (even if incorrectly), this will
1779            # still catch all indentation issues in practice.
1780            encountered_nested_switch = True
1781
1782        current_indentation_match = match(r'(?P<indentation>\s*)(?P<remaining_line>.*)$', current_line);
1783        current_indentation = current_indentation_match.group('indentation')
1784        remaining_line = current_indentation_match.group('remaining_line')
1785
1786        # End the check at the end of the switch statement.
1787        if remaining_line.startswith('}') and current_indentation == switch_indentation:
1788            break
1789        # Case and default branches should not be indented. The regexp also
1790        # catches single-line cases like "default: break;" but does not trigger
1791        # on stuff like "Document::Foo();".
1792        elif match(r'(default|case\s+.*)\s*:([^:].*)?$', remaining_line):
1793            if current_indentation != switch_indentation:
1794                error(filename, line_number + line_offset, 'whitespace/indent', 4,
1795                      'A case label should not be indented, but line up with its switch statement.')
1796                # Don't throw an error for multiple badly indented labels,
1797                # one should be enough to figure out the problem.
1798                break
1799        # We ignore goto labels at the very beginning of a line.
1800        elif match(r'\w+\s*:\s*$', remaining_line):
1801            continue
1802        # It's not a goto label, so check if it's indented at least as far as
1803        # the switch statement plus one more level of indentation.
1804        elif not current_indentation.startswith(inner_indentation):
1805            error(filename, line_number + line_offset, 'whitespace/indent', 4,
1806                  'Non-label code inside switch statements should be indented.')
1807            # Don't throw an error for multiple badly indented statements,
1808            # one should be enough to figure out the problem.
1809            break
1810
1811        if encountered_nested_switch:
1812            break
1813
1814
1815def check_braces(filename, clean_lines, line_number, error):
1816    """Looks for misplaced braces (e.g. at the end of line).
1817
1818    Args:
1819      filename: The name of the current file.
1820      clean_lines: A CleansedLines instance containing the file.
1821      line_number: The number of the line to check.
1822      error: The function to call with any errors found.
1823    """
1824
1825    line = clean_lines.elided[line_number] # Get rid of comments and strings.
1826
1827    if match(r'\s*{\s*$', line):
1828        # We allow an open brace to start a line in the case where someone
1829        # is using braces for function definition or in a block to
1830        # explicitly create a new scope, which is commonly used to control
1831        # the lifetime of stack-allocated variables.  We don't detect this
1832        # perfectly: we just don't complain if the last non-whitespace
1833        # character on the previous non-blank line is ';', ':', '{', '}',
1834        # ')', or ') const' and doesn't begin with 'if|for|while|switch|else'.
1835        # We also allow '#' for #endif and '=' for array initialization.
1836        previous_line = get_previous_non_blank_line(clean_lines, line_number)[0]
1837        if ((not search(r'[;:}{)=]\s*$|\)\s*const\s*$', previous_line)
1838             or search(r'\b(if|for|foreach|while|switch|else)\b', previous_line))
1839            and previous_line.find('#') < 0):
1840            error(filename, line_number, 'whitespace/braces', 4,
1841                  'This { should be at the end of the previous line')
1842    elif (search(r'\)\s*(const\s*)?{\s*$', line)
1843          and line.count('(') == line.count(')')
1844          and not search(r'\b(if|for|foreach|while|switch)\b', line)):
1845        error(filename, line_number, 'whitespace/braces', 4,
1846              'Place brace on its own line for function definitions.')
1847
1848    if (match(r'\s*}\s*(else\s*({\s*)?)?$', line) and line_number > 1):
1849        # We check if a closed brace has started a line to see if a
1850        # one line control statement was previous.
1851        previous_line = clean_lines.elided[line_number - 2]
1852        if (previous_line.find('{') > 0
1853            and search(r'\b(if|for|foreach|while|else)\b', previous_line)):
1854            error(filename, line_number, 'whitespace/braces', 4,
1855                  'One line control clauses should not use braces.')
1856
1857    # An else clause should be on the same line as the preceding closing brace.
1858    if match(r'\s*else\s*', line):
1859        previous_line = get_previous_non_blank_line(clean_lines, line_number)[0]
1860        if match(r'\s*}\s*$', previous_line):
1861            error(filename, line_number, 'whitespace/newline', 4,
1862                  'An else should appear on the same line as the preceding }')
1863
1864    # Likewise, an else should never have the else clause on the same line
1865    if search(r'\belse [^\s{]', line) and not search(r'\belse if\b', line):
1866        error(filename, line_number, 'whitespace/newline', 4,
1867              'Else clause should never be on same line as else (use 2 lines)')
1868
1869    # In the same way, a do/while should never be on one line
1870    if match(r'\s*do [^\s{]', line):
1871        error(filename, line_number, 'whitespace/newline', 4,
1872              'do/while clauses should not be on a single line')
1873
1874    # Braces shouldn't be followed by a ; unless they're defining a struct
1875    # or initializing an array.
1876    # We can't tell in general, but we can for some common cases.
1877    previous_line_number = line_number
1878    while True:
1879        (previous_line, previous_line_number) = get_previous_non_blank_line(clean_lines, previous_line_number)
1880        if match(r'\s+{.*}\s*;', line) and not previous_line.count(';'):
1881            line = previous_line + line
1882        else:
1883            break
1884    if (search(r'{.*}\s*;', line)
1885        and line.count('{') == line.count('}')
1886        and not search(r'struct|class|enum|\s*=\s*{', line)):
1887        error(filename, line_number, 'readability/braces', 4,
1888              "You don't need a ; after a }")
1889
1890
1891def check_exit_statement_simplifications(filename, clean_lines, line_number, error):
1892    """Looks for else or else-if statements that should be written as an
1893    if statement when the prior if concludes with a return, break, continue or
1894    goto statement.
1895
1896    Args:
1897      filename: The name of the current file.
1898      clean_lines: A CleansedLines instance containing the file.
1899      line_number: The number of the line to check.
1900      error: The function to call with any errors found.
1901    """
1902
1903    line = clean_lines.elided[line_number] # Get rid of comments and strings.
1904
1905    else_match = match(r'(?P<else_indentation>\s*)(\}\s*)?else(\s+if\s*\(|(?P<else>\s*(\{\s*)?\Z))', line)
1906    if not else_match:
1907        return
1908
1909    else_indentation = else_match.group('else_indentation')
1910    inner_indentation = else_indentation + ' ' * 4
1911
1912    previous_lines = clean_lines.elided[:line_number]
1913    previous_lines.reverse()
1914    line_offset = 0
1915    encountered_exit_statement = False
1916
1917    for current_line in previous_lines:
1918        line_offset -= 1
1919
1920        # Skip not only empty lines but also those with preprocessor directives
1921        # and goto labels.
1922        if current_line.strip() == '' or current_line.startswith('#') or match(r'\w+\s*:\s*$', current_line):
1923            continue
1924
1925        # Skip lines with closing braces on the original indentation level.
1926        # Even though the styleguide says they should be on the same line as
1927        # the "else if" statement, we also want to check for instances where
1928        # the current code does not comply with the coding style. Thus, ignore
1929        # these lines and proceed to the line before that.
1930        if current_line == else_indentation + '}':
1931            continue
1932
1933        current_indentation_match = match(r'(?P<indentation>\s*)(?P<remaining_line>.*)$', current_line);
1934        current_indentation = current_indentation_match.group('indentation')
1935        remaining_line = current_indentation_match.group('remaining_line')
1936
1937        # As we're going up the lines, the first real statement to encounter
1938        # has to be an exit statement (return, break, continue or goto) -
1939        # otherwise, this check doesn't apply.
1940        if not encountered_exit_statement:
1941            # We only want to find exit statements if they are on exactly
1942            # the same level of indentation as expected from the code inside
1943            # the block. If the indentation doesn't strictly match then we
1944            # might have a nested if or something, which must be ignored.
1945            if current_indentation != inner_indentation:
1946                break
1947            if match(r'(return(\W+.*)|(break|continue)\s*;|goto\s*\w+;)$', remaining_line):
1948                encountered_exit_statement = True
1949                continue
1950            break
1951
1952        # When code execution reaches this point, we've found an exit statement
1953        # as last statement of the previous block. Now we only need to make
1954        # sure that the block belongs to an "if", then we can throw an error.
1955
1956        # Skip lines with opening braces on the original indentation level,
1957        # similar to the closing braces check above. ("if (condition)\n{")
1958        if current_line == else_indentation + '{':
1959            continue
1960
1961        # Skip everything that's further indented than our "else" or "else if".
1962        if current_indentation.startswith(else_indentation) and current_indentation != else_indentation:
1963            continue
1964
1965        # So we've got a line with same (or less) indentation. Is it an "if"?
1966        # If yes: throw an error. If no: don't throw an error.
1967        # Whatever the outcome, this is the end of our loop.
1968        if match(r'if\s*\(', remaining_line):
1969            if else_match.start('else') != -1:
1970                error(filename, line_number + line_offset, 'readability/control_flow', 4,
1971                      'An else statement can be removed when the prior "if" '
1972                      'concludes with a return, break, continue or goto statement.')
1973            else:
1974                error(filename, line_number + line_offset, 'readability/control_flow', 4,
1975                      'An else if statement should be written as an if statement '
1976                      'when the prior "if" concludes with a return, break, '
1977                      'continue or goto statement.')
1978        break
1979
1980
1981def replaceable_check(operator, macro, line):
1982    """Determine whether a basic CHECK can be replaced with a more specific one.
1983
1984    For example suggest using CHECK_EQ instead of CHECK(a == b) and
1985    similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
1986
1987    Args:
1988      operator: The C++ operator used in the CHECK.
1989      macro: The CHECK or EXPECT macro being called.
1990      line: The current source line.
1991
1992    Returns:
1993      True if the CHECK can be replaced with a more specific one.
1994    """
1995
1996    # This matches decimal and hex integers, strings, and chars (in that order).
1997    match_constant = r'([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')'
1998
1999    # Expression to match two sides of the operator with something that
2000    # looks like a literal, since CHECK(x == iterator) won't compile.
2001    # This means we can't catch all the cases where a more specific
2002    # CHECK is possible, but it's less annoying than dealing with
2003    # extraneous warnings.
2004    match_this = (r'\s*' + macro + r'\((\s*' +
2005                  match_constant + r'\s*' + operator + r'[^<>].*|'
2006                  r'.*[^<>]' + operator + r'\s*' + match_constant +
2007                  r'\s*\))')
2008
2009    # Don't complain about CHECK(x == NULL) or similar because
2010    # CHECK_EQ(x, NULL) won't compile (requires a cast).
2011    # Also, don't complain about more complex boolean expressions
2012    # involving && or || such as CHECK(a == b || c == d).
2013    return match(match_this, line) and not search(r'NULL|&&|\|\|', line)
2014
2015
2016def check_check(filename, clean_lines, line_number, error):
2017    """Checks the use of CHECK and EXPECT macros.
2018
2019    Args:
2020      filename: The name of the current file.
2021      clean_lines: A CleansedLines instance containing the file.
2022      line_number: The number of the line to check.
2023      error: The function to call with any errors found.
2024    """
2025
2026    # Decide the set of replacement macros that should be suggested
2027    raw_lines = clean_lines.raw_lines
2028    current_macro = ''
2029    for macro in _CHECK_MACROS:
2030        if raw_lines[line_number].find(macro) >= 0:
2031            current_macro = macro
2032            break
2033    if not current_macro:
2034        # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
2035        return
2036
2037    line = clean_lines.elided[line_number]        # get rid of comments and strings
2038
2039    # Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
2040    for operator in ['==', '!=', '>=', '>', '<=', '<']:
2041        if replaceable_check(operator, current_macro, line):
2042            error(filename, line_number, 'readability/check', 2,
2043                  'Consider using %s instead of %s(a %s b)' % (
2044                      _CHECK_REPLACEMENT[current_macro][operator],
2045                      current_macro, operator))
2046            break
2047
2048
2049def check_for_comparisons_to_zero(filename, clean_lines, line_number, error):
2050    # Get the line without comments and strings.
2051    line = clean_lines.elided[line_number]
2052
2053    # Include NULL here so that users don't have to convert NULL to 0 first and then get this error.
2054    if search(r'[=!]=\s*(NULL|0|true|false)\W', line) or search(r'\W(NULL|0|true|false)\s*[=!]=', line):
2055        error(filename, line_number, 'readability/comparison_to_zero', 5,
2056              'Tests for true/false, null/non-null, and zero/non-zero should all be done without equality comparisons.')
2057
2058
2059def check_for_null(filename, clean_lines, line_number, error):
2060    # This check doesn't apply to C or Objective-C implementation files.
2061    if filename.endswith('.c') or filename.endswith('.m'):
2062        return
2063
2064    line = clean_lines.elided[line_number]
2065    if search(r'\bNULL\b', line):
2066        error(filename, line_number, 'readability/null', 5, 'Use 0 instead of NULL.')
2067        return
2068
2069    line = clean_lines.raw_lines[line_number]
2070    # See if NULL occurs in any comments in the line. If the search for NULL using the raw line
2071    # matches, then do the check with strings collapsed to avoid giving errors for
2072    # NULLs occurring in strings.
2073    if search(r'\bNULL\b', line) and search(r'\bNULL\b', CleansedLines.collapse_strings(line)):
2074        error(filename, line_number, 'readability/null', 4, 'Use 0 instead of NULL.')
2075
2076def get_line_width(line):
2077    """Determines the width of the line in column positions.
2078
2079    Args:
2080      line: A string, which may be a Unicode string.
2081
2082    Returns:
2083      The width of the line in column positions, accounting for Unicode
2084      combining characters and wide characters.
2085    """
2086    if isinstance(line, unicode):
2087        width = 0
2088        for c in unicodedata.normalize('NFC', line):
2089            if unicodedata.east_asian_width(c) in ('W', 'F'):
2090                width += 2
2091            elif not unicodedata.combining(c):
2092                width += 1
2093        return width
2094    return len(line)
2095
2096
2097def check_style(filename, clean_lines, line_number, file_extension, error):
2098    """Checks rules from the 'C++ style rules' section of cppguide.html.
2099
2100    Most of these rules are hard to test (naming, comment style), but we
2101    do what we can.  In particular we check for 4-space indents, line lengths,
2102    tab usage, spaces inside code, etc.
2103
2104    Args:
2105      filename: The name of the current file.
2106      clean_lines: A CleansedLines instance containing the file.
2107      line_number: The number of the line to check.
2108      file_extension: The extension (without the dot) of the filename.
2109      error: The function to call with any errors found.
2110    """
2111
2112    raw_lines = clean_lines.raw_lines
2113    line = raw_lines[line_number]
2114
2115    if line.find('\t') != -1:
2116        error(filename, line_number, 'whitespace/tab', 1,
2117              'Tab found; better to use spaces')
2118
2119    # One or three blank spaces at the beginning of the line is weird; it's
2120    # hard to reconcile that with 4-space indents.
2121    # NOTE: here are the conditions rob pike used for his tests.  Mine aren't
2122    # as sophisticated, but it may be worth becoming so:  RLENGTH==initial_spaces
2123    # if(RLENGTH > 20) complain = 0;
2124    # if(match($0, " +(error|private|public|protected):")) complain = 0;
2125    # if(match(prev, "&& *$")) complain = 0;
2126    # if(match(prev, "\\|\\| *$")) complain = 0;
2127    # if(match(prev, "[\",=><] *$")) complain = 0;
2128    # if(match($0, " <<")) complain = 0;
2129    # if(match(prev, " +for \\(")) complain = 0;
2130    # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
2131    initial_spaces = 0
2132    cleansed_line = clean_lines.elided[line_number]
2133    while initial_spaces < len(line) and line[initial_spaces] == ' ':
2134        initial_spaces += 1
2135    if line and line[-1].isspace():
2136        error(filename, line_number, 'whitespace/end_of_line', 4,
2137              'Line ends in whitespace.  Consider deleting these extra spaces.')
2138    # There are certain situations we allow one space, notably for labels
2139    elif ((initial_spaces >= 1 and initial_spaces <= 3)
2140          and not match(r'\s*\w+\s*:\s*$', cleansed_line)):
2141        error(filename, line_number, 'whitespace/indent', 3,
2142              'Weird number of spaces at line-start.  '
2143              'Are you using a 4-space indent?')
2144    # Labels should always be indented at least one space.
2145    elif not initial_spaces and line[:2] != '//':
2146        label_match = match(r'(?P<label>[^:]+):\s*$', line)
2147
2148        if label_match:
2149            label = label_match.group('label')
2150            # Only throw errors for stuff that is definitely not a goto label,
2151            # because goto labels can in fact occur at the start of the line.
2152            if label in ['public', 'private', 'protected'] or label.find(' ') != -1:
2153                error(filename, line_number, 'whitespace/labels', 4,
2154                      'Labels should always be indented at least one space.  '
2155                      'If this is a member-initializer list in a constructor, '
2156                      'the colon should be on the line after the definition header.')
2157
2158    if (cleansed_line.count(';') > 1
2159        # for loops are allowed two ;'s (and may run over two lines).
2160        and cleansed_line.find('for') == -1
2161        and (get_previous_non_blank_line(clean_lines, line_number)[0].find('for') == -1
2162             or get_previous_non_blank_line(clean_lines, line_number)[0].find(';') != -1)
2163        # It's ok to have many commands in a switch case that fits in 1 line
2164        and not ((cleansed_line.find('case ') != -1
2165                  or cleansed_line.find('default:') != -1)
2166                 and cleansed_line.find('break;') != -1)):
2167        error(filename, line_number, 'whitespace/newline', 4,
2168              'More than one command on the same line')
2169
2170    if cleansed_line.strip().endswith('||') or cleansed_line.strip().endswith('&&'):
2171        error(filename, line_number, 'whitespace/operators', 4,
2172              'Boolean expressions that span multiple lines should have their '
2173              'operators on the left side of the line instead of the right side.')
2174
2175    # Some more style checks
2176    check_namespace_indentation(filename, clean_lines, line_number, file_extension, error)
2177    check_switch_indentation(filename, clean_lines, line_number, error)
2178    check_braces(filename, clean_lines, line_number, error)
2179    check_exit_statement_simplifications(filename, clean_lines, line_number, error)
2180    check_spacing(filename, clean_lines, line_number, error)
2181    check_check(filename, clean_lines, line_number, error)
2182    check_for_comparisons_to_zero(filename, clean_lines, line_number, error)
2183    check_for_null(filename, clean_lines, line_number, error)
2184
2185
2186_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
2187_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
2188# Matches the first component of a filename delimited by -s and _s. That is:
2189#  _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
2190#  _RE_FIRST_COMPONENT.match('foo.cpp').group(0) == 'foo'
2191#  _RE_FIRST_COMPONENT.match('foo-bar_baz.cpp').group(0) == 'foo'
2192#  _RE_FIRST_COMPONENT.match('foo_bar-baz.cpp').group(0) == 'foo'
2193_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
2194
2195
2196def _drop_common_suffixes(filename):
2197    """Drops common suffixes like _test.cpp or -inl.h from filename.
2198
2199    For example:
2200      >>> _drop_common_suffixes('foo/foo-inl.h')
2201      'foo/foo'
2202      >>> _drop_common_suffixes('foo/bar/foo.cpp')
2203      'foo/bar/foo'
2204      >>> _drop_common_suffixes('foo/foo_internal.h')
2205      'foo/foo'
2206      >>> _drop_common_suffixes('foo/foo_unusualinternal.h')
2207      'foo/foo_unusualinternal'
2208
2209    Args:
2210      filename: The input filename.
2211
2212    Returns:
2213      The filename with the common suffix removed.
2214    """
2215    for suffix in ('test.cpp', 'regtest.cpp', 'unittest.cpp',
2216                   'inl.h', 'impl.h', 'internal.h'):
2217        if (filename.endswith(suffix) and len(filename) > len(suffix)
2218            and filename[-len(suffix) - 1] in ('-', '_')):
2219            return filename[:-len(suffix) - 1]
2220    return os.path.splitext(filename)[0]
2221
2222
2223def _is_test_filename(filename):
2224    """Determines if the given filename has a suffix that identifies it as a test.
2225
2226    Args:
2227      filename: The input filename.
2228
2229    Returns:
2230      True if 'filename' looks like a test, False otherwise.
2231    """
2232    if (filename.endswith('_test.cpp')
2233        or filename.endswith('_unittest.cpp')
2234        or filename.endswith('_regtest.cpp')):
2235        return True
2236    return False
2237
2238
2239def _classify_include(filename, include, is_system, include_state):
2240    """Figures out what kind of header 'include' is.
2241
2242    Args:
2243      filename: The current file cpp_style is running over.
2244      include: The path to a #included file.
2245      is_system: True if the #include used <> rather than "".
2246      include_state: An _IncludeState instance in which the headers are inserted.
2247
2248    Returns:
2249      One of the _XXX_HEADER constants.
2250
2251    For example:
2252      >>> _classify_include('foo.cpp', 'config.h', False)
2253      _CONFIG_HEADER
2254      >>> _classify_include('foo.cpp', 'foo.h', False)
2255      _PRIMARY_HEADER
2256      >>> _classify_include('foo.cpp', 'bar.h', False)
2257      _OTHER_HEADER
2258    """
2259
2260    # If it is a system header we know it is classified as _OTHER_HEADER.
2261    if is_system:
2262        return _OTHER_HEADER
2263
2264    # If the include is named config.h then this is WebCore/config.h.
2265    if include == "config.h":
2266        return _CONFIG_HEADER
2267
2268    # There cannot be primary includes in header files themselves. Only an
2269    # include exactly matches the header filename will be is flagged as
2270    # primary, so that it triggers the "don't include yourself" check.
2271    if filename.endswith('.h') and filename != include:
2272        return _OTHER_HEADER;
2273
2274    # If the target file basename starts with the include we're checking
2275    # then we consider it the primary header.
2276    target_base = FileInfo(filename).base_name()
2277    include_base = FileInfo(include).base_name()
2278
2279    # If we haven't encountered a primary header, then be lenient in checking.
2280    if not include_state.visited_primary_section() and target_base.startswith(include_base):
2281        return _PRIMARY_HEADER
2282    # If we already encountered a primary header, perform a strict comparison.
2283    # In case the two filename bases are the same then the above lenient check
2284    # probably was a false positive.
2285    elif include_state.visited_primary_section() and target_base == include_base:
2286        return _PRIMARY_HEADER
2287
2288    return _OTHER_HEADER
2289
2290
2291
2292def check_include_line(filename, clean_lines, line_number, include_state, error):
2293    """Check rules that are applicable to #include lines.
2294
2295    Strings on #include lines are NOT removed from elided line, to make
2296    certain tasks easier. However, to prevent false positives, checks
2297    applicable to #include lines in CheckLanguage must be put here.
2298
2299    Args:
2300      filename: The name of the current file.
2301      clean_lines: A CleansedLines instance containing the file.
2302      line_number: The number of the line to check.
2303      include_state: An _IncludeState instance in which the headers are inserted.
2304      error: The function to call with any errors found.
2305    """
2306
2307    line = clean_lines.lines[line_number]
2308
2309    matched = _RE_PATTERN_INCLUDE.search(line)
2310    if not matched:
2311        return
2312
2313    include = matched.group(2)
2314    is_system = (matched.group(1) == '<')
2315
2316    # Look for any of the stream classes that are part of standard C++.
2317    if match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
2318        # Many unit tests use cout, so we exempt them.
2319        if not _is_test_filename(filename):
2320            error(filename, line_number, 'readability/streams', 3,
2321                  'Streams are highly discouraged.')
2322
2323    # Look for specific includes to fix.
2324    if include.startswith('wtf/') and not is_system:
2325        error(filename, line_number, 'build/include', 4,
2326              'wtf includes should be <wtf/file.h> instead of "wtf/file.h".')
2327
2328    duplicate_header = include in include_state
2329    if duplicate_header:
2330        error(filename, line_number, 'build/include', 4,
2331              '"%s" already included at %s:%s' %
2332              (include, filename, include_state[include]))
2333    else:
2334        include_state[include] = line_number
2335
2336    header_type = _classify_include(filename, include, is_system, include_state)
2337    include_state.header_types[line_number] = header_type
2338
2339    # Only proceed if this isn't a duplicate header.
2340    if duplicate_header:
2341        return
2342
2343    # We want to ensure that headers appear in the right order:
2344    # 1) for implementation files: config.h, primary header, blank line, alphabetically sorted
2345    # 2) for header files: alphabetically sorted
2346    # The include_state object keeps track of the last type seen
2347    # and complains if the header types are out of order or missing.
2348    error_message = include_state.check_next_include_order(header_type, filename.endswith('.h'))
2349
2350    # Check to make sure we have a blank line after primary header.
2351    if not error_message and header_type == _PRIMARY_HEADER:
2352         next_line = clean_lines.raw_lines[line_number + 1]
2353         if not is_blank_line(next_line):
2354            error(filename, line_number, 'build/include_order', 4,
2355                  'You should add a blank line after implementation file\'s own header.')
2356
2357    # Check to make sure all headers besides config.h and the primary header are
2358    # alphabetically sorted.
2359    if not error_message and header_type == _OTHER_HEADER:
2360         previous_line_number = line_number - 1;
2361         previous_line = clean_lines.lines[previous_line_number]
2362         previous_match = _RE_PATTERN_INCLUDE.search(previous_line)
2363         while (not previous_match and previous_line_number > 0
2364                and not search(r'\A(#if|#ifdef|#ifndef|#else|#elif|#endif)', previous_line)):
2365            previous_line_number -= 1;
2366            previous_line = clean_lines.lines[previous_line_number]
2367            previous_match = _RE_PATTERN_INCLUDE.search(previous_line)
2368         if previous_match:
2369            previous_header_type = include_state.header_types[previous_line_number]
2370            if previous_header_type == _OTHER_HEADER and previous_line.strip() > line.strip():
2371                error(filename, line_number, 'build/include_order', 4,
2372                      'Alphabetical sorting problem.')
2373
2374    if error_message:
2375        if filename.endswith('.h'):
2376            error(filename, line_number, 'build/include_order', 4,
2377                  '%s Should be: alphabetically sorted.' %
2378                  error_message)
2379        else:
2380            error(filename, line_number, 'build/include_order', 4,
2381                  '%s Should be: config.h, primary header, blank line, and then alphabetically sorted.' %
2382                  error_message)
2383
2384
2385def check_language(filename, clean_lines, line_number, file_extension, include_state,
2386                   error):
2387    """Checks rules from the 'C++ language rules' section of cppguide.html.
2388
2389    Some of these rules are hard to test (function overloading, using
2390    uint32 inappropriately), but we do the best we can.
2391
2392    Args:
2393      filename: The name of the current file.
2394      clean_lines: A CleansedLines instance containing the file.
2395      line_number: The number of the line to check.
2396      file_extension: The extension (without the dot) of the filename.
2397      include_state: An _IncludeState instance in which the headers are inserted.
2398      error: The function to call with any errors found.
2399    """
2400    # If the line is empty or consists of entirely a comment, no need to
2401    # check it.
2402    line = clean_lines.elided[line_number]
2403    if not line:
2404        return
2405
2406    matched = _RE_PATTERN_INCLUDE.search(line)
2407    if matched:
2408        check_include_line(filename, clean_lines, line_number, include_state, error)
2409        return
2410
2411    # FIXME: figure out if they're using default arguments in fn proto.
2412
2413    # Check to see if they're using an conversion function cast.
2414    # I just try to capture the most common basic types, though there are more.
2415    # Parameterless conversion functions, such as bool(), are allowed as they are
2416    # probably a member operator declaration or default constructor.
2417    matched = search(
2418        r'\b(int|float|double|bool|char|int32|uint32|int64|uint64)\([^)]', line)
2419    if matched:
2420        # gMock methods are defined using some variant of MOCK_METHODx(name, type)
2421        # where type may be float(), int(string), etc.  Without context they are
2422        # virtually indistinguishable from int(x) casts.
2423        if not match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line):
2424            error(filename, line_number, 'readability/casting', 4,
2425                  'Using deprecated casting style.  '
2426                  'Use static_cast<%s>(...) instead' %
2427                  matched.group(1))
2428
2429    check_c_style_cast(filename, line_number, line, clean_lines.raw_lines[line_number],
2430                       'static_cast',
2431                       r'\((int|float|double|bool|char|u?int(16|32|64))\)',
2432                       error)
2433    # This doesn't catch all cases.  Consider (const char * const)"hello".
2434    check_c_style_cast(filename, line_number, line, clean_lines.raw_lines[line_number],
2435                       'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
2436
2437    # In addition, we look for people taking the address of a cast.  This
2438    # is dangerous -- casts can assign to temporaries, so the pointer doesn't
2439    # point where you think.
2440    if search(
2441        r'(&\([^)]+\)[\w(])|(&(static|dynamic|reinterpret)_cast\b)', line):
2442        error(filename, line_number, 'runtime/casting', 4,
2443              ('Are you taking an address of a cast?  '
2444               'This is dangerous: could be a temp var.  '
2445               'Take the address before doing the cast, rather than after'))
2446
2447    # Check for people declaring static/global STL strings at the top level.
2448    # This is dangerous because the C++ language does not guarantee that
2449    # globals with constructors are initialized before the first access.
2450    matched = match(
2451        r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
2452        line)
2453    # Make sure it's not a function.
2454    # Function template specialization looks like: "string foo<Type>(...".
2455    # Class template definitions look like: "string Foo<Type>::Method(...".
2456    if matched and not match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)',
2457                             matched.group(3)):
2458        error(filename, line_number, 'runtime/string', 4,
2459              'For a static/global string constant, use a C style string instead: '
2460              '"%schar %s[]".' %
2461              (matched.group(1), matched.group(2)))
2462
2463    # Check that we're not using RTTI outside of testing code.
2464    if search(r'\bdynamic_cast<', line) and not _is_test_filename(filename):
2465        error(filename, line_number, 'runtime/rtti', 5,
2466              'Do not use dynamic_cast<>.  If you need to cast within a class '
2467              "hierarchy, use static_cast<> to upcast.  Google doesn't support "
2468              'RTTI.')
2469
2470    if search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
2471        error(filename, line_number, 'runtime/init', 4,
2472              'You seem to be initializing a member variable with itself.')
2473
2474    if file_extension == 'h':
2475        # FIXME: check that 1-arg constructors are explicit.
2476        #        How to tell it's a constructor?
2477        #        (handled in check_for_non_standard_constructs for now)
2478        pass
2479
2480    # Check if people are using the verboten C basic types.  The only exception
2481    # we regularly allow is "unsigned short port" for port.
2482    if search(r'\bshort port\b', line):
2483        if not search(r'\bunsigned short port\b', line):
2484            error(filename, line_number, 'runtime/int', 4,
2485                  'Use "unsigned short" for ports, not "short"')
2486
2487    # When snprintf is used, the second argument shouldn't be a literal.
2488    matched = search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
2489    if matched:
2490        error(filename, line_number, 'runtime/printf', 3,
2491              'If you can, use sizeof(%s) instead of %s as the 2nd arg '
2492              'to snprintf.' % (matched.group(1), matched.group(2)))
2493
2494    # Check if some verboten C functions are being used.
2495    if search(r'\bsprintf\b', line):
2496        error(filename, line_number, 'runtime/printf', 5,
2497              'Never use sprintf.  Use snprintf instead.')
2498    matched = search(r'\b(strcpy|strcat)\b', line)
2499    if matched:
2500        error(filename, line_number, 'runtime/printf', 4,
2501              'Almost always, snprintf is better than %s' % matched.group(1))
2502
2503    if search(r'\bsscanf\b', line):
2504        error(filename, line_number, 'runtime/printf', 1,
2505              'sscanf can be ok, but is slow and can overflow buffers.')
2506
2507    # Check for suspicious usage of "if" like
2508    # } if (a == b) {
2509    if search(r'\}\s*if\s*\(', line):
2510        error(filename, line_number, 'readability/braces', 4,
2511              'Did you mean "else if"? If not, start a new line for "if".')
2512
2513    # Check for potential format string bugs like printf(foo).
2514    # We constrain the pattern not to pick things like DocidForPrintf(foo).
2515    # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
2516    matched = re.search(r'\b((?:string)?printf)\s*\(([\w.\->()]+)\)', line, re.I)
2517    if matched:
2518        error(filename, line_number, 'runtime/printf', 4,
2519              'Potential format string bug. Do %s("%%s", %s) instead.'
2520              % (matched.group(1), matched.group(2)))
2521
2522    # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
2523    matched = search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
2524    if matched and not match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", matched.group(2)):
2525        error(filename, line_number, 'runtime/memset', 4,
2526              'Did you mean "memset(%s, 0, %s)"?'
2527              % (matched.group(1), matched.group(2)))
2528
2529    # Detect variable-length arrays.
2530    matched = match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
2531    if (matched and matched.group(2) != 'return' and matched.group(2) != 'delete' and
2532        matched.group(3).find(']') == -1):
2533        # Split the size using space and arithmetic operators as delimiters.
2534        # If any of the resulting tokens are not compile time constants then
2535        # report the error.
2536        tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', matched.group(3))
2537        is_const = True
2538        skip_next = False
2539        for tok in tokens:
2540            if skip_next:
2541                skip_next = False
2542                continue
2543
2544            if search(r'sizeof\(.+\)', tok):
2545                continue
2546            if search(r'arraysize\(\w+\)', tok):
2547                continue
2548
2549            tok = tok.lstrip('(')
2550            tok = tok.rstrip(')')
2551            if not tok:
2552                continue
2553            if match(r'\d+', tok):
2554                continue
2555            if match(r'0[xX][0-9a-fA-F]+', tok):
2556                continue
2557            if match(r'k[A-Z0-9]\w*', tok):
2558                continue
2559            if match(r'(.+::)?k[A-Z0-9]\w*', tok):
2560                continue
2561            if match(r'(.+::)?[A-Z][A-Z0-9_]*', tok):
2562                continue
2563            # A catch all for tricky sizeof cases, including 'sizeof expression',
2564            # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
2565            # requires skipping the next token becasue we split on ' ' and '*'.
2566            if tok.startswith('sizeof'):
2567                skip_next = True
2568                continue
2569            is_const = False
2570            break
2571        if not is_const:
2572            error(filename, line_number, 'runtime/arrays', 1,
2573                  'Do not use variable-length arrays.  Use an appropriately named '
2574                  "('k' followed by CamelCase) compile-time constant for the size.")
2575
2576    # Check for use of unnamed namespaces in header files.  Registration
2577    # macros are typically OK, so we allow use of "namespace {" on lines
2578    # that end with backslashes.
2579    if (file_extension == 'h'
2580        and search(r'\bnamespace\s*{', line)
2581        and line[-1] != '\\'):
2582        error(filename, line_number, 'build/namespaces', 4,
2583              'Do not use unnamed namespaces in header files.  See '
2584              'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
2585              ' for more information.')
2586
2587
2588def check_c_style_cast(filename, line_number, line, raw_line, cast_type, pattern,
2589                       error):
2590    """Checks for a C-style cast by looking for the pattern.
2591
2592    This also handles sizeof(type) warnings, due to similarity of content.
2593
2594    Args:
2595      filename: The name of the current file.
2596      line_number: The number of the line to check.
2597      line: The line of code to check.
2598      raw_line: The raw line of code to check, with comments.
2599      cast_type: The string for the C++ cast to recommend.  This is either
2600                 reinterpret_cast or static_cast, depending.
2601      pattern: The regular expression used to find C-style casts.
2602      error: The function to call with any errors found.
2603    """
2604    matched = search(pattern, line)
2605    if not matched:
2606        return
2607
2608    # e.g., sizeof(int)
2609    sizeof_match = match(r'.*sizeof\s*$', line[0:matched.start(1) - 1])
2610    if sizeof_match:
2611        error(filename, line_number, 'runtime/sizeof', 1,
2612              'Using sizeof(type).  Use sizeof(varname) instead if possible')
2613        return
2614
2615    remainder = line[matched.end(0):]
2616
2617    # The close paren is for function pointers as arguments to a function.
2618    # eg, void foo(void (*bar)(int));
2619    # The semicolon check is a more basic function check; also possibly a
2620    # function pointer typedef.
2621    # eg, void foo(int); or void foo(int) const;
2622    # The equals check is for function pointer assignment.
2623    # eg, void *(*foo)(int) = ...
2624    #
2625    # Right now, this will only catch cases where there's a single argument, and
2626    # it's unnamed.  It should probably be expanded to check for multiple
2627    # arguments with some unnamed.
2628    function_match = match(r'\s*(\)|=|(const)?\s*(;|\{|throw\(\)))', remainder)
2629    if function_match:
2630        if (not function_match.group(3)
2631            or function_match.group(3) == ';'
2632            or raw_line.find('/*') < 0):
2633            error(filename, line_number, 'readability/function', 3,
2634                  'All parameters should be named in a function')
2635        return
2636
2637    # At this point, all that should be left is actual casts.
2638    error(filename, line_number, 'readability/casting', 4,
2639          'Using C-style cast.  Use %s<%s>(...) instead' %
2640          (cast_type, matched.group(1)))
2641
2642
2643_HEADERS_CONTAINING_TEMPLATES = (
2644    ('<deque>', ('deque',)),
2645    ('<functional>', ('unary_function', 'binary_function',
2646                      'plus', 'minus', 'multiplies', 'divides', 'modulus',
2647                      'negate',
2648                      'equal_to', 'not_equal_to', 'greater', 'less',
2649                      'greater_equal', 'less_equal',
2650                      'logical_and', 'logical_or', 'logical_not',
2651                      'unary_negate', 'not1', 'binary_negate', 'not2',
2652                      'bind1st', 'bind2nd',
2653                      'pointer_to_unary_function',
2654                      'pointer_to_binary_function',
2655                      'ptr_fun',
2656                      'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
2657                      'mem_fun_ref_t',
2658                      'const_mem_fun_t', 'const_mem_fun1_t',
2659                      'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
2660                      'mem_fun_ref',
2661                     )),
2662    ('<limits>', ('numeric_limits',)),
2663    ('<list>', ('list',)),
2664    ('<map>', ('map', 'multimap',)),
2665    ('<memory>', ('allocator',)),
2666    ('<queue>', ('queue', 'priority_queue',)),
2667    ('<set>', ('set', 'multiset',)),
2668    ('<stack>', ('stack',)),
2669    ('<string>', ('char_traits', 'basic_string',)),
2670    ('<utility>', ('pair',)),
2671    ('<vector>', ('vector',)),
2672
2673    # gcc extensions.
2674    # Note: std::hash is their hash, ::hash is our hash
2675    ('<hash_map>', ('hash_map', 'hash_multimap',)),
2676    ('<hash_set>', ('hash_set', 'hash_multiset',)),
2677    ('<slist>', ('slist',)),
2678    )
2679
2680_HEADERS_ACCEPTED_BUT_NOT_PROMOTED = {
2681    # We can trust with reasonable confidence that map gives us pair<>, too.
2682    'pair<>': ('map', 'multimap', 'hash_map', 'hash_multimap')
2683}
2684
2685_RE_PATTERN_STRING = re.compile(r'\bstring\b')
2686
2687_re_pattern_algorithm_header = []
2688for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
2689                  'transform'):
2690    # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
2691    # type::max().
2692    _re_pattern_algorithm_header.append(
2693        (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
2694         _template,
2695         '<algorithm>'))
2696
2697_re_pattern_templates = []
2698for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
2699    for _template in _templates:
2700        _re_pattern_templates.append(
2701            (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
2702             _template + '<>',
2703             _header))
2704
2705
2706def files_belong_to_same_module(filename_cpp, filename_h):
2707    """Check if these two filenames belong to the same module.
2708
2709    The concept of a 'module' here is a as follows:
2710    foo.h, foo-inl.h, foo.cpp, foo_test.cpp and foo_unittest.cpp belong to the
2711    same 'module' if they are in the same directory.
2712    some/path/public/xyzzy and some/path/internal/xyzzy are also considered
2713    to belong to the same module here.
2714
2715    If the filename_cpp contains a longer path than the filename_h, for example,
2716    '/absolute/path/to/base/sysinfo.cpp', and this file would include
2717    'base/sysinfo.h', this function also produces the prefix needed to open the
2718    header. This is used by the caller of this function to more robustly open the
2719    header file. We don't have access to the real include paths in this context,
2720    so we need this guesswork here.
2721
2722    Known bugs: tools/base/bar.cpp and base/bar.h belong to the same module
2723    according to this implementation. Because of this, this function gives
2724    some false positives. This should be sufficiently rare in practice.
2725
2726    Args:
2727      filename_cpp: is the path for the .cpp file
2728      filename_h: is the path for the header path
2729
2730    Returns:
2731      Tuple with a bool and a string:
2732      bool: True if filename_cpp and filename_h belong to the same module.
2733      string: the additional prefix needed to open the header file.
2734    """
2735
2736    if not filename_cpp.endswith('.cpp'):
2737        return (False, '')
2738    filename_cpp = filename_cpp[:-len('.cpp')]
2739    if filename_cpp.endswith('_unittest'):
2740        filename_cpp = filename_cpp[:-len('_unittest')]
2741    elif filename_cpp.endswith('_test'):
2742        filename_cpp = filename_cpp[:-len('_test')]
2743    filename_cpp = filename_cpp.replace('/public/', '/')
2744    filename_cpp = filename_cpp.replace('/internal/', '/')
2745
2746    if not filename_h.endswith('.h'):
2747        return (False, '')
2748    filename_h = filename_h[:-len('.h')]
2749    if filename_h.endswith('-inl'):
2750        filename_h = filename_h[:-len('-inl')]
2751    filename_h = filename_h.replace('/public/', '/')
2752    filename_h = filename_h.replace('/internal/', '/')
2753
2754    files_belong_to_same_module = filename_cpp.endswith(filename_h)
2755    common_path = ''
2756    if files_belong_to_same_module:
2757        common_path = filename_cpp[:-len(filename_h)]
2758    return files_belong_to_same_module, common_path
2759
2760
2761def update_include_state(filename, include_state, io=codecs):
2762    """Fill up the include_state with new includes found from the file.
2763
2764    Args:
2765      filename: the name of the header to read.
2766      include_state: an _IncludeState instance in which the headers are inserted.
2767      io: The io factory to use to read the file. Provided for testability.
2768
2769    Returns:
2770      True if a header was succesfully added. False otherwise.
2771    """
2772    header_file = None
2773    try:
2774        header_file = io.open(filename, 'r', 'utf8', 'replace')
2775    except IOError:
2776        return False
2777    line_number = 0
2778    for line in header_file:
2779        line_number += 1
2780        clean_line = cleanse_comments(line)
2781        matched = _RE_PATTERN_INCLUDE.search(clean_line)
2782        if matched:
2783            include = matched.group(2)
2784            # The value formatting is cute, but not really used right now.
2785            # What matters here is that the key is in include_state.
2786            include_state.setdefault(include, '%s:%d' % (filename, line_number))
2787    return True
2788
2789
2790def check_for_include_what_you_use(filename, clean_lines, include_state, error,
2791                                   io=codecs):
2792    """Reports for missing stl includes.
2793
2794    This function will output warnings to make sure you are including the headers
2795    necessary for the stl containers and functions that you use. We only give one
2796    reason to include a header. For example, if you use both equal_to<> and
2797    less<> in a .h file, only one (the latter in the file) of these will be
2798    reported as a reason to include the <functional>.
2799
2800    Args:
2801      filename: The name of the current file.
2802      clean_lines: A CleansedLines instance containing the file.
2803      include_state: An _IncludeState instance.
2804      error: The function to call with any errors found.
2805      io: The IO factory to use to read the header file. Provided for unittest
2806          injection.
2807    """
2808    required = {}  # A map of header name to line_number and the template entity.
2809        # Example of required: { '<functional>': (1219, 'less<>') }
2810
2811    for line_number in xrange(clean_lines.num_lines()):
2812        line = clean_lines.elided[line_number]
2813        if not line or line[0] == '#':
2814            continue
2815
2816        # String is special -- it is a non-templatized type in STL.
2817        if _RE_PATTERN_STRING.search(line):
2818            required['<string>'] = (line_number, 'string')
2819
2820        for pattern, template, header in _re_pattern_algorithm_header:
2821            if pattern.search(line):
2822                required[header] = (line_number, template)
2823
2824        # The following function is just a speed up, no semantics are changed.
2825        if not '<' in line:  # Reduces the cpu time usage by skipping lines.
2826            continue
2827
2828        for pattern, template, header in _re_pattern_templates:
2829            if pattern.search(line):
2830                required[header] = (line_number, template)
2831
2832    # The policy is that if you #include something in foo.h you don't need to
2833    # include it again in foo.cpp. Here, we will look at possible includes.
2834    # Let's copy the include_state so it is only messed up within this function.
2835    include_state = include_state.copy()
2836
2837    # Did we find the header for this file (if any) and succesfully load it?
2838    header_found = False
2839
2840    # Use the absolute path so that matching works properly.
2841    abs_filename = os.path.abspath(filename)
2842
2843    # For Emacs's flymake.
2844    # If cpp_style is invoked from Emacs's flymake, a temporary file is generated
2845    # by flymake and that file name might end with '_flymake.cpp'. In that case,
2846    # restore original file name here so that the corresponding header file can be
2847    # found.
2848    # e.g. If the file name is 'foo_flymake.cpp', we should search for 'foo.h'
2849    # instead of 'foo_flymake.h'
2850    emacs_flymake_suffix = '_flymake.cpp'
2851    if abs_filename.endswith(emacs_flymake_suffix):
2852        abs_filename = abs_filename[:-len(emacs_flymake_suffix)] + '.cpp'
2853
2854    # include_state is modified during iteration, so we iterate over a copy of
2855    # the keys.
2856    for header in include_state.keys():  #NOLINT
2857        (same_module, common_path) = files_belong_to_same_module(abs_filename, header)
2858        fullpath = common_path + header
2859        if same_module and update_include_state(fullpath, include_state, io):
2860            header_found = True
2861
2862    # If we can't find the header file for a .cpp, assume it's because we don't
2863    # know where to look. In that case we'll give up as we're not sure they
2864    # didn't include it in the .h file.
2865    # FIXME: Do a better job of finding .h files so we are confident that
2866    #        not having the .h file means there isn't one.
2867    if filename.endswith('.cpp') and not header_found:
2868        return
2869
2870    # All the lines have been processed, report the errors found.
2871    for required_header_unstripped in required:
2872        template = required[required_header_unstripped][1]
2873        if template in _HEADERS_ACCEPTED_BUT_NOT_PROMOTED:
2874            headers = _HEADERS_ACCEPTED_BUT_NOT_PROMOTED[template]
2875            if [True for header in headers if header in include_state]:
2876                continue
2877        if required_header_unstripped.strip('<>"') not in include_state:
2878            error(filename, required[required_header_unstripped][0],
2879                  'build/include_what_you_use', 4,
2880                  'Add #include ' + required_header_unstripped + ' for ' + template)
2881
2882
2883def process_line(filename, file_extension,
2884                 clean_lines, line, include_state, function_state,
2885                 class_state, error):
2886    """Processes a single line in the file.
2887
2888    Args:
2889      filename: Filename of the file that is being processed.
2890      file_extension: The extension (dot not included) of the file.
2891      clean_lines: An array of strings, each representing a line of the file,
2892                   with comments stripped.
2893      line: Number of line being processed.
2894      include_state: An _IncludeState instance in which the headers are inserted.
2895      function_state: A _FunctionState instance which counts function lines, etc.
2896      class_state: A _ClassState instance which maintains information about
2897                   the current stack of nested class declarations being parsed.
2898      error: A callable to which errors are reported, which takes 4 arguments:
2899             filename, line number, error level, and message
2900
2901    """
2902    raw_lines = clean_lines.raw_lines
2903    check_for_function_lengths(filename, clean_lines, line, function_state, error)
2904    if search(r'\bNOLINT\b', raw_lines[line]):  # ignore nolint lines
2905        return
2906    check_for_multiline_comments_and_strings(filename, clean_lines, line, error)
2907    check_style(filename, clean_lines, line, file_extension, error)
2908    check_language(filename, clean_lines, line, file_extension, include_state,
2909                   error)
2910    check_for_non_standard_constructs(filename, clean_lines, line,
2911                                      class_state, error)
2912    check_posix_threading(filename, clean_lines, line, error)
2913    check_invalid_increment(filename, clean_lines, line, error)
2914
2915
2916def process_file_data(filename, file_extension, lines, error):
2917    """Performs lint checks and reports any errors to the given error function.
2918
2919    Args:
2920      filename: Filename of the file that is being processed.
2921      file_extension: The extension (dot not included) of the file.
2922      lines: An array of strings, each representing a line of the file, with the
2923             last element being empty if the file is termined with a newline.
2924      error: A callable to which errors are reported, which takes 4 arguments:
2925    """
2926    lines = (['// marker so line numbers and indices both start at 1'] + lines +
2927             ['// marker so line numbers end in a known way'])
2928
2929    include_state = _IncludeState()
2930    function_state = _FunctionState()
2931    class_state = _ClassState()
2932
2933    check_for_copyright(filename, lines, error)
2934
2935    if file_extension == 'h':
2936        check_for_header_guard(filename, lines, error)
2937
2938    remove_multi_line_comments(filename, lines, error)
2939    clean_lines = CleansedLines(lines)
2940    for line in xrange(clean_lines.num_lines()):
2941        process_line(filename, file_extension, clean_lines, line,
2942                     include_state, function_state, class_state, error)
2943    class_state.check_finished(filename, error)
2944
2945    check_for_include_what_you_use(filename, clean_lines, include_state, error)
2946
2947    # We check here rather than inside process_line so that we see raw
2948    # lines rather than "cleaned" lines.
2949    check_for_unicode_replacement_characters(filename, lines, error)
2950
2951    check_for_new_line_at_eof(filename, lines, error)
2952
2953
2954def process_file(filename, error=error):
2955    """Performs cpp_style on a single file.
2956
2957    Args:
2958      filename: The name of the file to parse.
2959      error: The function to call with any errors found.
2960    """
2961    try:
2962        # Support the UNIX convention of using "-" for stdin.  Note that
2963        # we are not opening the file with universal newline support
2964        # (which codecs doesn't support anyway), so the resulting lines do
2965        # contain trailing '\r' characters if we are reading a file that
2966        # has CRLF endings.
2967        # If after the split a trailing '\r' is present, it is removed
2968        # below. If it is not expected to be present (i.e. os.linesep !=
2969        # '\r\n' as in Windows), a warning is issued below if this file
2970        # is processed.
2971
2972        if filename == '-':
2973            lines = codecs.StreamReaderWriter(sys.stdin,
2974                                              codecs.getreader('utf8'),
2975                                              codecs.getwriter('utf8'),
2976                                              'replace').read().split('\n')
2977        else:
2978            lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
2979
2980        carriage_return_found = False
2981        # Remove trailing '\r'.
2982        for line_number in range(len(lines)):
2983            if lines[line_number].endswith('\r'):
2984                lines[line_number] = lines[line_number].rstrip('\r')
2985                carriage_return_found = True
2986
2987    except IOError:
2988        sys.stderr.write(
2989            "Skipping input '%s': Can't open for reading\n" % filename)
2990        return
2991
2992    # Note, if no dot is found, this will give the entire filename as the ext.
2993    file_extension = filename[filename.rfind('.') + 1:]
2994
2995    # When reading from stdin, the extension is unknown, so no cpp_style tests
2996    # should rely on the extension.
2997    if (filename != '-' and file_extension != 'h' and file_extension != 'cpp'
2998        and file_extension != 'c'):
2999        sys.stderr.write('Ignoring %s; not a .cpp, .c or .h file\n' % filename)
3000    else:
3001        process_file_data(filename, file_extension, lines, error)
3002        if carriage_return_found and os.linesep != '\r\n':
3003            # Use 0 for line_number since outputing only one error for potentially
3004            # several lines.
3005            error(filename, 0, 'whitespace/newline', 1,
3006                  'One or more unexpected \\r (^M) found;'
3007                  'better to use only a \\n')
3008
3009    sys.stderr.write('Done processing %s\n' % filename)
3010
3011
3012def print_usage(message):
3013    """Prints a brief usage string and exits, optionally with an error message.
3014
3015    Args:
3016      message: The optional error message.
3017    """
3018    sys.stderr.write(_USAGE)
3019    if message:
3020        sys.exit('\nFATAL ERROR: ' + message)
3021    else:
3022        sys.exit(1)
3023
3024
3025def print_categories():
3026    """Prints a list of all the error-categories used by error messages.
3027
3028    These are the categories used to filter messages via --filter.
3029    """
3030    sys.stderr.write(_ERROR_CATEGORIES)
3031    sys.exit(0)
3032
3033
3034def parse_arguments(args, additional_flags=[]):
3035    """Parses the command line arguments.
3036
3037    This may set the output format and verbosity level as side-effects.
3038
3039    Args:
3040      args: The command line arguments:
3041      additional_flags: A list of strings which specifies flags we allow.
3042
3043    Returns:
3044      A tuple of (filenames, flags)
3045
3046      filenames: The list of filenames to lint.
3047      flags: The dict of the flag names and the flag values.
3048    """
3049    flags = ['help', 'output=', 'verbose=', 'filter='] + additional_flags
3050    additional_flag_values = {}
3051    try:
3052        (opts, filenames) = getopt.getopt(args, '', flags)
3053    except getopt.GetoptError:
3054        print_usage('Invalid arguments.')
3055
3056    verbosity = _verbose_level()
3057    output_format = _output_format()
3058    filters = ''
3059
3060    for (opt, val) in opts:
3061        if opt == '--help':
3062            print_usage(None)
3063        elif opt == '--output':
3064            if not val in ('emacs', 'vs7'):
3065                print_usage('The only allowed output formats are emacs and vs7.')
3066            output_format = val
3067        elif opt == '--verbose':
3068            verbosity = int(val)
3069        elif opt == '--filter':
3070            filters = val
3071            if not filters:
3072                print_categories()
3073        else:
3074            additional_flag_values[opt] = val
3075
3076    _set_output_format(output_format)
3077    _set_verbose_level(verbosity)
3078    _set_filters(filters)
3079
3080    return (filenames, additional_flag_values)
3081
3082
3083def use_webkit_styles():
3084    """Disables some features which are not suitable for WebKit."""
3085    # FIXME: For filters we will never want to have, remove them.
3086    #        For filters we want to have similar functionalities,
3087    #        modify the implementation and enable them.
3088    global _DEFAULT_FILTERS
3089    _DEFAULT_FILTERS = [
3090        '-whitespace/comments',
3091        '-whitespace/blank_line',
3092        '-runtime/explicit',  # explicit
3093        '-runtime/virtual',  # virtual dtor
3094        '-runtime/printf',
3095        '-runtime/threadsafe_fn',
3096        '-runtime/rtti',
3097        '-build/include_what_you_use',  # <string> for std::string
3098        '-legal/copyright',
3099        '-readability/multiline_comment',
3100        '-readability/braces',  # int foo() {};
3101        '-readability/fn_size',
3102        '-build/storage_class',  # const static
3103        '-build/endif_comment',
3104        '-whitespace/labels',
3105        '-runtime/arrays',  # variable length array
3106        '-build/header_guard',
3107        '-readability/casting',
3108        '-readability/function',
3109        '-runtime/casting',
3110        '-runtime/sizeof',
3111    ]
3112
3113
3114def main():
3115    sys.stderr.write(
3116        '''********************* WARNING WARNING WARNING *********************
3117
3118This tool is in the process of development and may give inaccurate
3119results at present.  Please file bugs (and/or patches) for things
3120that you notice that it flags incorrectly.
3121
3122********************* WARNING WARNING WARNING *********************
3123
3124''')
3125
3126    use_webkit_styles()
3127
3128    (filenames, flags) = parse_arguments(sys.argv[1:])
3129    if not filenames:
3130        print_usage('No files were specified.')
3131
3132    # Change stderr to write with replacement characters so we don't die
3133    # if we try to print something containing non-ASCII characters.
3134    sys.stderr = codecs.StreamReaderWriter(sys.stderr,
3135                                           codecs.getreader('utf8'),
3136                                           codecs.getwriter('utf8'),
3137                                           'replace')
3138
3139    _cpp_style_state.reset_error_count()
3140    for filename in filenames:
3141        process_file(filename)
3142    sys.stderr.write('Total errors found: %d\n' % _cpp_style_state.error_count)
3143    sys.exit(_cpp_style_state.error_count > 0)
3144
3145
3146if __name__ == '__main__':
3147    main()
3148