• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2# -*- coding: utf-8 -*-
3#
4# Copyright (C) 2009 Google Inc. All rights reserved.
5# Copyright (C) 2009 Torch Mobile Inc.
6# Copyright (C) 2009 Apple Inc. All rights reserved.
7# Copyright (C) 2010 Chris Jerdonek (cjerdonek@webkit.org)
8#
9# Redistribution and use in source and binary forms, with or without
10# modification, are permitted provided that the following conditions are
11# met:
12#
13#    * Redistributions of source code must retain the above copyright
14# notice, this list of conditions and the following disclaimer.
15#    * Redistributions in binary form must reproduce the above
16# copyright notice, this list of conditions and the following disclaimer
17# in the documentation and/or other materials provided with the
18# distribution.
19#    * Neither the name of Google Inc. nor the names of its
20# contributors may be used to endorse or promote products derived from
21# this software without specific prior written permission.
22#
23# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
35# This is the modified version of Google's cpplint. The original code is
36# http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py
37
38"""Support for check-webkit-style."""
39
40import codecs
41import math  # for log
42import os
43import os.path
44import re
45import sre_compile
46import string
47import sys
48import unicodedata
49
50
51# Headers that we consider STL headers.
52_STL_HEADERS = frozenset([
53    'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
54    'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
55    'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'pair.h',
56    'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
57    'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
58    'utility', 'vector', 'vector.h',
59    ])
60
61
62# Non-STL C++ system headers.
63_CPP_HEADERS = frozenset([
64    'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
65    'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
66    'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
67    'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
68    'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
69    'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
70    'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream.h',
71    'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
72    'numeric', 'ostream.h', 'parsestream.h', 'pfstream.h', 'PlotFile.h',
73    'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h', 'ropeimpl.h',
74    'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
75    'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string',
76    'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray',
77    ])
78
79
80# Assertion macros.  These are defined in base/logging.h and
81# testing/base/gunit.h.  Note that the _M versions need to come first
82# for substring matching to work.
83_CHECK_MACROS = [
84    'DCHECK', 'CHECK',
85    'EXPECT_TRUE_M', 'EXPECT_TRUE',
86    'ASSERT_TRUE_M', 'ASSERT_TRUE',
87    'EXPECT_FALSE_M', 'EXPECT_FALSE',
88    'ASSERT_FALSE_M', 'ASSERT_FALSE',
89    ]
90
91# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
92_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
93
94for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
95                        ('>=', 'GE'), ('>', 'GT'),
96                        ('<=', 'LE'), ('<', 'LT')]:
97    _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
98    _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
99    _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
100    _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
101    _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
102    _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
103
104for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
105                            ('>=', 'LT'), ('>', 'LE'),
106                            ('<=', 'GT'), ('<', 'GE')]:
107    _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
108    _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
109    _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
110    _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
111
112
113# These constants define types of headers for use with
114# _IncludeState.check_next_include_order().
115_CONFIG_HEADER = 0
116_PRIMARY_HEADER = 1
117_OTHER_HEADER = 2
118_MOC_HEADER = 3
119
120
121# The regexp compilation caching is inlined in all regexp functions for
122# performance reasons; factoring it out into a separate function turns out
123# to be noticeably expensive.
124_regexp_compile_cache = {}
125
126
127def match(pattern, s):
128    """Matches the string with the pattern, caching the compiled regexp."""
129    if not pattern in _regexp_compile_cache:
130        _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
131    return _regexp_compile_cache[pattern].match(s)
132
133
134def search(pattern, s):
135    """Searches the string for the pattern, caching the compiled regexp."""
136    if not pattern in _regexp_compile_cache:
137        _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
138    return _regexp_compile_cache[pattern].search(s)
139
140
141def sub(pattern, replacement, s):
142    """Substitutes occurrences of a pattern, caching the compiled regexp."""
143    if not pattern in _regexp_compile_cache:
144        _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
145    return _regexp_compile_cache[pattern].sub(replacement, s)
146
147
148def subn(pattern, replacement, s):
149    """Substitutes occurrences of a pattern, caching the compiled regexp."""
150    if not pattern in _regexp_compile_cache:
151        _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
152    return _regexp_compile_cache[pattern].subn(replacement, s)
153
154
155def up_to_unmatched_closing_paren(s):
156    """Splits a string into two parts up to first unmatched ')'.
157
158    Args:
159      s: a string which is a substring of line after '('
160      (e.g., "a == (b + c))").
161
162    Returns:
163      A pair of strings (prefix before first unmatched ')',
164      reminder of s after first unmatched ')'), e.g.,
165      up_to_unmatched_closing_paren("a == (b + c)) { ")
166      returns "a == (b + c)", " {".
167      Returns None, None if there is no unmatched ')'
168
169    """
170    i = 1
171    for pos, c in enumerate(s):
172      if c == '(':
173        i += 1
174      elif c == ')':
175        i -= 1
176        if i == 0:
177          return s[:pos], s[pos + 1:]
178    return None, None
179
180class _IncludeState(dict):
181    """Tracks line numbers for includes, and the order in which includes appear.
182
183    As a dict, an _IncludeState object serves as a mapping between include
184    filename and line number on which that file was included.
185
186    Call check_next_include_order() once for each header in the file, passing
187    in the type constants defined above. Calls in an illegal order will
188    raise an _IncludeError with an appropriate error message.
189
190    """
191    # self._section will move monotonically through this set. If it ever
192    # needs to move backwards, check_next_include_order will raise an error.
193    _INITIAL_SECTION = 0
194    _CONFIG_SECTION = 1
195    _PRIMARY_SECTION = 2
196    _OTHER_SECTION = 3
197
198    _TYPE_NAMES = {
199        _CONFIG_HEADER: 'WebCore config.h',
200        _PRIMARY_HEADER: 'header this file implements',
201        _OTHER_HEADER: 'other header',
202        _MOC_HEADER: 'moc file',
203        }
204    _SECTION_NAMES = {
205        _INITIAL_SECTION: "... nothing.",
206        _CONFIG_SECTION: "WebCore config.h.",
207        _PRIMARY_SECTION: 'a header this file implements.',
208        _OTHER_SECTION: 'other header.',
209        }
210
211    def __init__(self):
212        dict.__init__(self)
213        self._section = self._INITIAL_SECTION
214        self._visited_primary_section = False
215        self.header_types = dict();
216
217    def visited_primary_section(self):
218        return self._visited_primary_section
219
220    def check_next_include_order(self, header_type, file_is_header):
221        """Returns a non-empty error message if the next header is out of order.
222
223        This function also updates the internal state to be ready to check
224        the next include.
225
226        Args:
227          header_type: One of the _XXX_HEADER constants defined above.
228          file_is_header: Whether the file that owns this _IncludeState is itself a header
229
230        Returns:
231          The empty string if the header is in the right order, or an
232          error message describing what's wrong.
233
234        """
235        if header_type == _CONFIG_HEADER and file_is_header:
236            return 'Header file should not contain WebCore config.h.'
237        if header_type == _PRIMARY_HEADER and file_is_header:
238            return 'Header file should not contain itself.'
239        if header_type == _MOC_HEADER:
240            return ''
241
242        error_message = ''
243        if self._section != self._OTHER_SECTION:
244            before_error_message = ('Found %s before %s' %
245                                    (self._TYPE_NAMES[header_type],
246                                     self._SECTION_NAMES[self._section + 1]))
247        after_error_message = ('Found %s after %s' %
248                                (self._TYPE_NAMES[header_type],
249                                 self._SECTION_NAMES[self._section]))
250
251        if header_type == _CONFIG_HEADER:
252            if self._section >= self._CONFIG_SECTION:
253                error_message = after_error_message
254            self._section = self._CONFIG_SECTION
255        elif header_type == _PRIMARY_HEADER:
256            if self._section >= self._PRIMARY_SECTION:
257                error_message = after_error_message
258            elif self._section < self._CONFIG_SECTION:
259                error_message = before_error_message
260            self._section = self._PRIMARY_SECTION
261            self._visited_primary_section = True
262        else:
263            assert header_type == _OTHER_HEADER
264            if not file_is_header and self._section < self._PRIMARY_SECTION:
265                error_message = before_error_message
266            self._section = self._OTHER_SECTION
267
268        return error_message
269
270
271class _FunctionState(object):
272    """Tracks current function name and the number of lines in its body.
273
274    Attributes:
275      verbosity: The verbosity level to use while checking style.
276
277    """
278
279    _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
280    _TEST_TRIGGER = 400    # about 50% more than _NORMAL_TRIGGER.
281
282    def __init__(self, verbosity):
283        self.verbosity = verbosity
284        self.in_a_function = False
285        self.lines_in_function = 0
286        self.current_function = ''
287
288    def begin(self, function_name):
289        """Start analyzing function body.
290
291        Args:
292            function_name: The name of the function being tracked.
293        """
294        self.in_a_function = True
295        self.lines_in_function = 0
296        self.current_function = function_name
297
298    def count(self):
299        """Count line in current function body."""
300        if self.in_a_function:
301            self.lines_in_function += 1
302
303    def check(self, error, line_number):
304        """Report if too many lines in function body.
305
306        Args:
307          error: The function to call with any errors found.
308          line_number: The number of the line to check.
309        """
310        if match(r'T(EST|est)', self.current_function):
311            base_trigger = self._TEST_TRIGGER
312        else:
313            base_trigger = self._NORMAL_TRIGGER
314        trigger = base_trigger * 2 ** self.verbosity
315
316        if self.lines_in_function > trigger:
317            error_level = int(math.log(self.lines_in_function / base_trigger, 2))
318            # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
319            if error_level > 5:
320                error_level = 5
321            error(line_number, 'readability/fn_size', error_level,
322                  'Small and focused functions are preferred:'
323                  ' %s has %d non-comment lines'
324                  ' (error triggered by exceeding %d lines).'  % (
325                      self.current_function, self.lines_in_function, trigger))
326
327    def end(self):
328        """Stop analizing function body."""
329        self.in_a_function = False
330
331
332class _IncludeError(Exception):
333    """Indicates a problem with the include order in a file."""
334    pass
335
336
337def is_c_or_objective_c(file_extension):
338   """Return whether the file extension corresponds to C or Objective-C.
339
340   Args:
341     file_extension: The file extension without the leading dot.
342
343   """
344   return file_extension in ['c', 'm']
345
346
347class FileInfo:
348    """Provides utility functions for filenames.
349
350    FileInfo provides easy access to the components of a file's path
351    relative to the project root.
352    """
353
354    def __init__(self, filename):
355        self._filename = filename
356
357    def full_name(self):
358        """Make Windows paths like Unix."""
359        return os.path.abspath(self._filename).replace('\\', '/')
360
361    def repository_name(self):
362        """Full name after removing the local path to the repository.
363
364        If we have a real absolute path name here we can try to do something smart:
365        detecting the root of the checkout and truncating /path/to/checkout from
366        the name so that we get header guards that don't include things like
367        "C:\Documents and Settings\..." or "/home/username/..." in them and thus
368        people on different computers who have checked the source out to different
369        locations won't see bogus errors.
370        """
371        fullname = self.full_name()
372
373        if os.path.exists(fullname):
374            project_dir = os.path.dirname(fullname)
375
376            if os.path.exists(os.path.join(project_dir, ".svn")):
377                # If there's a .svn file in the current directory, we
378                # recursively look up the directory tree for the top
379                # of the SVN checkout
380                root_dir = project_dir
381                one_up_dir = os.path.dirname(root_dir)
382                while os.path.exists(os.path.join(one_up_dir, ".svn")):
383                    root_dir = os.path.dirname(root_dir)
384                    one_up_dir = os.path.dirname(one_up_dir)
385
386                prefix = os.path.commonprefix([root_dir, project_dir])
387                return fullname[len(prefix) + 1:]
388
389            # Not SVN? Try to find a git top level directory by
390            # searching up from the current path.
391            root_dir = os.path.dirname(fullname)
392            while (root_dir != os.path.dirname(root_dir)
393                   and not os.path.exists(os.path.join(root_dir, ".git"))):
394                root_dir = os.path.dirname(root_dir)
395                if os.path.exists(os.path.join(root_dir, ".git")):
396                    prefix = os.path.commonprefix([root_dir, project_dir])
397                    return fullname[len(prefix) + 1:]
398
399        # Don't know what to do; header guard warnings may be wrong...
400        return fullname
401
402    def split(self):
403        """Splits the file into the directory, basename, and extension.
404
405        For 'chrome/browser/browser.cpp', Split() would
406        return ('chrome/browser', 'browser', '.cpp')
407
408        Returns:
409          A tuple of (directory, basename, extension).
410        """
411
412        googlename = self.repository_name()
413        project, rest = os.path.split(googlename)
414        return (project,) + os.path.splitext(rest)
415
416    def base_name(self):
417        """File base name - text after the final slash, before the final period."""
418        return self.split()[1]
419
420    def extension(self):
421        """File extension - text following the final period."""
422        return self.split()[2]
423
424    def no_extension(self):
425        """File has no source file extension."""
426        return '/'.join(self.split()[0:2])
427
428    def is_source(self):
429        """File has a source file extension."""
430        return self.extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
431
432
433# Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
434_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
435    r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
436# Matches strings.  Escape codes should already be removed by ESCAPES.
437_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
438# Matches characters.  Escape codes should already be removed by ESCAPES.
439_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
440# Matches multi-line C++ comments.
441# This RE is a little bit more complicated than one might expect, because we
442# have to take care of space removals tools so we can handle comments inside
443# statements better.
444# The current rule is: We only clear spaces from both sides when we're at the
445# end of the line. Otherwise, we try to remove spaces from the right side,
446# if this doesn't work we try on left side but only if there's a non-character
447# on the right.
448_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
449    r"""(\s*/\*.*\*/\s*$|
450            /\*.*\*/\s+|
451         \s+/\*.*\*/(?=\W)|
452            /\*.*\*/)""", re.VERBOSE)
453
454
455def is_cpp_string(line):
456    """Does line terminate so, that the next symbol is in string constant.
457
458    This function does not consider single-line nor multi-line comments.
459
460    Args:
461      line: is a partial line of code starting from the 0..n.
462
463    Returns:
464      True, if next character appended to 'line' is inside a
465      string constant.
466    """
467
468    line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
469    return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
470
471
472def find_next_multi_line_comment_start(lines, line_index):
473    """Find the beginning marker for a multiline comment."""
474    while line_index < len(lines):
475        if lines[line_index].strip().startswith('/*'):
476            # Only return this marker if the comment goes beyond this line
477            if lines[line_index].strip().find('*/', 2) < 0:
478                return line_index
479        line_index += 1
480    return len(lines)
481
482
483def find_next_multi_line_comment_end(lines, line_index):
484    """We are inside a comment, find the end marker."""
485    while line_index < len(lines):
486        if lines[line_index].strip().endswith('*/'):
487            return line_index
488        line_index += 1
489    return len(lines)
490
491
492def remove_multi_line_comments_from_range(lines, begin, end):
493    """Clears a range of lines for multi-line comments."""
494    # Having // dummy comments makes the lines non-empty, so we will not get
495    # unnecessary blank line warnings later in the code.
496    for i in range(begin, end):
497        lines[i] = '// dummy'
498
499
500def remove_multi_line_comments(lines, error):
501    """Removes multiline (c-style) comments from lines."""
502    line_index = 0
503    while line_index < len(lines):
504        line_index_begin = find_next_multi_line_comment_start(lines, line_index)
505        if line_index_begin >= len(lines):
506            return
507        line_index_end = find_next_multi_line_comment_end(lines, line_index_begin)
508        if line_index_end >= len(lines):
509            error(line_index_begin + 1, 'readability/multiline_comment', 5,
510                  'Could not find end of multi-line comment')
511            return
512        remove_multi_line_comments_from_range(lines, line_index_begin, line_index_end + 1)
513        line_index = line_index_end + 1
514
515
516def cleanse_comments(line):
517    """Removes //-comments and single-line C-style /* */ comments.
518
519    Args:
520      line: A line of C++ source.
521
522    Returns:
523      The line with single-line comments removed.
524    """
525    comment_position = line.find('//')
526    if comment_position != -1 and not is_cpp_string(line[:comment_position]):
527        line = line[:comment_position]
528    # get rid of /* ... */
529    return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
530
531
532class CleansedLines(object):
533    """Holds 3 copies of all lines with different preprocessing applied to them.
534
535    1) elided member contains lines without strings and comments,
536    2) lines member contains lines without comments, and
537    3) raw member contains all the lines without processing.
538    All these three members are of <type 'list'>, and of the same length.
539    """
540
541    def __init__(self, lines):
542        self.elided = []
543        self.lines = []
544        self.raw_lines = lines
545        self._num_lines = len(lines)
546        for line_number in range(len(lines)):
547            self.lines.append(cleanse_comments(lines[line_number]))
548            elided = self.collapse_strings(lines[line_number])
549            self.elided.append(cleanse_comments(elided))
550
551    def num_lines(self):
552        """Returns the number of lines represented."""
553        return self._num_lines
554
555    @staticmethod
556    def collapse_strings(elided):
557        """Collapses strings and chars on a line to simple "" or '' blocks.
558
559        We nix strings first so we're not fooled by text like '"http://"'
560
561        Args:
562          elided: The line being processed.
563
564        Returns:
565          The line with collapsed strings.
566        """
567        if not _RE_PATTERN_INCLUDE.match(elided):
568            # Remove escaped characters first to make quote/single quote collapsing
569            # basic.  Things that look like escaped characters shouldn't occur
570            # outside of strings and chars.
571            elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
572            elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
573            elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
574        return elided
575
576
577def close_expression(clean_lines, line_number, pos):
578    """If input points to ( or { or [, finds the position that closes it.
579
580    If lines[line_number][pos] points to a '(' or '{' or '[', finds the the
581    line_number/pos that correspond to the closing of the expression.
582
583    Args:
584      clean_lines: A CleansedLines instance containing the file.
585      line_number: The number of the line to check.
586      pos: A position on the line.
587
588    Returns:
589      A tuple (line, line_number, pos) pointer *past* the closing brace, or
590      (line, len(lines), -1) if we never find a close.  Note we ignore
591      strings and comments when matching; and the line we return is the
592      'cleansed' line at line_number.
593    """
594
595    line = clean_lines.elided[line_number]
596    start_character = line[pos]
597    if start_character not in '({[':
598        return (line, clean_lines.num_lines(), -1)
599    if start_character == '(':
600        end_character = ')'
601    if start_character == '[':
602        end_character = ']'
603    if start_character == '{':
604        end_character = '}'
605
606    num_open = line.count(start_character) - line.count(end_character)
607    while line_number < clean_lines.num_lines() and num_open > 0:
608        line_number += 1
609        line = clean_lines.elided[line_number]
610        num_open += line.count(start_character) - line.count(end_character)
611    # OK, now find the end_character that actually got us back to even
612    endpos = len(line)
613    while num_open >= 0:
614        endpos = line.rfind(')', 0, endpos)
615        num_open -= 1                 # chopped off another )
616    return (line, line_number, endpos + 1)
617
618
619def check_for_copyright(lines, error):
620    """Logs an error if no Copyright message appears at the top of the file."""
621
622    # We'll say it should occur by line 10. Don't forget there's a
623    # dummy line at the front.
624    for line in xrange(1, min(len(lines), 11)):
625        if re.search(r'Copyright', lines[line], re.I):
626            break
627    else:                       # means no copyright line was found
628        error(0, 'legal/copyright', 5,
629              'No copyright message found.  '
630              'You should have a line: "Copyright [year] <Copyright Owner>"')
631
632
633def get_header_guard_cpp_variable(filename):
634    """Returns the CPP variable that should be used as a header guard.
635
636    Args:
637      filename: The name of a C++ header file.
638
639    Returns:
640      The CPP variable that should be used as a header guard in the
641      named file.
642
643    """
644
645    return sub(r'[-.\s]', '_', os.path.basename(filename))
646
647
648def check_for_header_guard(filename, lines, error):
649    """Checks that the file contains a header guard.
650
651    Logs an error if no #ifndef header guard is present.  For other
652    headers, checks that the full pathname is used.
653
654    Args:
655      filename: The name of the C++ header file.
656      lines: An array of strings, each representing a line of the file.
657      error: The function to call with any errors found.
658    """
659
660    cppvar = get_header_guard_cpp_variable(filename)
661
662    ifndef = None
663    ifndef_line_number = 0
664    define = None
665    for line_number, line in enumerate(lines):
666        line_split = line.split()
667        if len(line_split) >= 2:
668            # find the first occurrence of #ifndef and #define, save arg
669            if not ifndef and line_split[0] == '#ifndef':
670                # set ifndef to the header guard presented on the #ifndef line.
671                ifndef = line_split[1]
672                ifndef_line_number = line_number
673            if not define and line_split[0] == '#define':
674                define = line_split[1]
675            if define and ifndef:
676                break
677
678    if not ifndef or not define or ifndef != define:
679        error(0, 'build/header_guard', 5,
680              'No #ifndef header guard found, suggested CPP variable is: %s' %
681              cppvar)
682        return
683
684    # The guard should be File_h.
685    if ifndef != cppvar:
686        error(ifndef_line_number, 'build/header_guard', 5,
687              '#ifndef header guard has wrong style, please use: %s' % cppvar)
688
689
690def check_for_unicode_replacement_characters(lines, error):
691    """Logs an error for each line containing Unicode replacement characters.
692
693    These indicate that either the file contained invalid UTF-8 (likely)
694    or Unicode replacement characters (which it shouldn't).  Note that
695    it's possible for this to throw off line numbering if the invalid
696    UTF-8 occurred adjacent to a newline.
697
698    Args:
699      lines: An array of strings, each representing a line of the file.
700      error: The function to call with any errors found.
701    """
702    for line_number, line in enumerate(lines):
703        if u'\ufffd' in line:
704            error(line_number, 'readability/utf8', 5,
705                  'Line contains invalid UTF-8 (or Unicode replacement character).')
706
707
708def check_for_new_line_at_eof(lines, error):
709    """Logs an error if there is no newline char at the end of the file.
710
711    Args:
712      lines: An array of strings, each representing a line of the file.
713      error: The function to call with any errors found.
714    """
715
716    # The array lines() was created by adding two newlines to the
717    # original file (go figure), then splitting on \n.
718    # To verify that the file ends in \n, we just have to make sure the
719    # last-but-two element of lines() exists and is empty.
720    if len(lines) < 3 or lines[-2]:
721        error(len(lines) - 2, 'whitespace/ending_newline', 5,
722              'Could not find a newline character at the end of the file.')
723
724
725def check_for_multiline_comments_and_strings(clean_lines, line_number, error):
726    """Logs an error if we see /* ... */ or "..." that extend past one line.
727
728    /* ... */ comments are legit inside macros, for one line.
729    Otherwise, we prefer // comments, so it's ok to warn about the
730    other.  Likewise, it's ok for strings to extend across multiple
731    lines, as long as a line continuation character (backslash)
732    terminates each line. Although not currently prohibited by the C++
733    style guide, it's ugly and unnecessary. We don't do well with either
734    in this lint program, so we warn about both.
735
736    Args:
737      clean_lines: A CleansedLines instance containing the file.
738      line_number: The number of the line to check.
739      error: The function to call with any errors found.
740    """
741    line = clean_lines.elided[line_number]
742
743    # Remove all \\ (escaped backslashes) from the line. They are OK, and the
744    # second (escaped) slash may trigger later \" detection erroneously.
745    line = line.replace('\\\\', '')
746
747    if line.count('/*') > line.count('*/'):
748        error(line_number, 'readability/multiline_comment', 5,
749              'Complex multi-line /*...*/-style comment found. '
750              'Lint may give bogus warnings.  '
751              'Consider replacing these with //-style comments, '
752              'with #if 0...#endif, '
753              'or with more clearly structured multi-line comments.')
754
755    if (line.count('"') - line.count('\\"')) % 2:
756        error(line_number, 'readability/multiline_string', 5,
757              'Multi-line string ("...") found.  This lint script doesn\'t '
758              'do well with such strings, and may give bogus warnings.  They\'re '
759              'ugly and unnecessary, and you should use concatenation instead".')
760
761
762_THREADING_LIST = (
763    ('asctime(', 'asctime_r('),
764    ('ctime(', 'ctime_r('),
765    ('getgrgid(', 'getgrgid_r('),
766    ('getgrnam(', 'getgrnam_r('),
767    ('getlogin(', 'getlogin_r('),
768    ('getpwnam(', 'getpwnam_r('),
769    ('getpwuid(', 'getpwuid_r('),
770    ('gmtime(', 'gmtime_r('),
771    ('localtime(', 'localtime_r('),
772    ('rand(', 'rand_r('),
773    ('readdir(', 'readdir_r('),
774    ('strtok(', 'strtok_r('),
775    ('ttyname(', 'ttyname_r('),
776    )
777
778
779def check_posix_threading(clean_lines, line_number, error):
780    """Checks for calls to thread-unsafe functions.
781
782    Much code has been originally written without consideration of
783    multi-threading. Also, engineers are relying on their old experience;
784    they have learned posix before threading extensions were added. These
785    tests guide the engineers to use thread-safe functions (when using
786    posix directly).
787
788    Args:
789      clean_lines: A CleansedLines instance containing the file.
790      line_number: The number of the line to check.
791      error: The function to call with any errors found.
792    """
793    line = clean_lines.elided[line_number]
794    for single_thread_function, multithread_safe_function in _THREADING_LIST:
795        index = line.find(single_thread_function)
796        # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
797        if index >= 0 and (index == 0 or (not line[index - 1].isalnum()
798                                          and line[index - 1] not in ('_', '.', '>'))):
799            error(line_number, 'runtime/threadsafe_fn', 2,
800                  'Consider using ' + multithread_safe_function +
801                  '...) instead of ' + single_thread_function +
802                  '...) for improved thread safety.')
803
804
805# Matches invalid increment: *count++, which moves pointer instead of
806# incrementing a value.
807_RE_PATTERN_INVALID_INCREMENT = re.compile(
808    r'^\s*\*\w+(\+\+|--);')
809
810
811def check_invalid_increment(clean_lines, line_number, error):
812    """Checks for invalid increment *count++.
813
814    For example following function:
815    void increment_counter(int* count) {
816        *count++;
817    }
818    is invalid, because it effectively does count++, moving pointer, and should
819    be replaced with ++*count, (*count)++ or *count += 1.
820
821    Args:
822      clean_lines: A CleansedLines instance containing the file.
823      line_number: The number of the line to check.
824      error: The function to call with any errors found.
825    """
826    line = clean_lines.elided[line_number]
827    if _RE_PATTERN_INVALID_INCREMENT.match(line):
828        error(line_number, 'runtime/invalid_increment', 5,
829              'Changing pointer instead of value (or unused value of operator*).')
830
831
832class _ClassInfo(object):
833    """Stores information about a class."""
834
835    def __init__(self, name, line_number):
836        self.name = name
837        self.line_number = line_number
838        self.seen_open_brace = False
839        self.is_derived = False
840        self.virtual_method_line_number = None
841        self.has_virtual_destructor = False
842        self.brace_depth = 0
843
844
845class _ClassState(object):
846    """Holds the current state of the parse relating to class declarations.
847
848    It maintains a stack of _ClassInfos representing the parser's guess
849    as to the current nesting of class declarations. The innermost class
850    is at the top (back) of the stack. Typically, the stack will either
851    be empty or have exactly one entry.
852    """
853
854    def __init__(self):
855        self.classinfo_stack = []
856
857    def check_finished(self, error):
858        """Checks that all classes have been completely parsed.
859
860        Call this when all lines in a file have been processed.
861        Args:
862          error: The function to call with any errors found.
863        """
864        if self.classinfo_stack:
865            # Note: This test can result in false positives if #ifdef constructs
866            # get in the way of brace matching. See the testBuildClass test in
867            # cpp_style_unittest.py for an example of this.
868            error(self.classinfo_stack[0].line_number, 'build/class', 5,
869                  'Failed to find complete declaration of class %s' %
870                  self.classinfo_stack[0].name)
871
872
873class _FileState(object):
874    def __init__(self):
875        self._did_inside_namespace_indent_warning = False
876
877    def set_did_inside_namespace_indent_warning(self):
878        self._did_inside_namespace_indent_warning = True
879
880    def did_inside_namespace_indent_warning(self):
881        return self._did_inside_namespace_indent_warning
882
883def check_for_non_standard_constructs(clean_lines, line_number,
884                                      class_state, error):
885    """Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
886
887    Complain about several constructs which gcc-2 accepts, but which are
888    not standard C++.  Warning about these in lint is one way to ease the
889    transition to new compilers.
890    - put storage class first (e.g. "static const" instead of "const static").
891    - "%lld" instead of %qd" in printf-type functions.
892    - "%1$d" is non-standard in printf-type functions.
893    - "\%" is an undefined character escape sequence.
894    - text after #endif is not allowed.
895    - invalid inner-style forward declaration.
896    - >? and <? operators, and their >?= and <?= cousins.
897    - classes with virtual methods need virtual destructors (compiler warning
898        available, but not turned on yet.)
899
900    Additionally, check for constructor/destructor style violations as it
901    is very convenient to do so while checking for gcc-2 compliance.
902
903    Args:
904      clean_lines: A CleansedLines instance containing the file.
905      line_number: The number of the line to check.
906      class_state: A _ClassState instance which maintains information about
907                   the current stack of nested class declarations being parsed.
908      error: A callable to which errors are reported, which takes parameters:
909             line number, error level, and message
910    """
911
912    # Remove comments from the line, but leave in strings for now.
913    line = clean_lines.lines[line_number]
914
915    if search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
916        error(line_number, 'runtime/printf_format', 3,
917              '%q in format strings is deprecated.  Use %ll instead.')
918
919    if search(r'printf\s*\(.*".*%\d+\$', line):
920        error(line_number, 'runtime/printf_format', 2,
921              '%N$ formats are unconventional.  Try rewriting to avoid them.')
922
923    # Remove escaped backslashes before looking for undefined escapes.
924    line = line.replace('\\\\', '')
925
926    if search(r'("|\').*\\(%|\[|\(|{)', line):
927        error(line_number, 'build/printf_format', 3,
928              '%, [, (, and { are undefined character escapes.  Unescape them.')
929
930    # For the rest, work with both comments and strings removed.
931    line = clean_lines.elided[line_number]
932
933    if search(r'\b(const|volatile|void|char|short|int|long'
934              r'|float|double|signed|unsigned'
935              r'|schar|u?int8|u?int16|u?int32|u?int64)'
936              r'\s+(auto|register|static|extern|typedef)\b',
937              line):
938        error(line_number, 'build/storage_class', 5,
939              'Storage class (static, extern, typedef, etc) should be first.')
940
941    if match(r'\s*#\s*endif\s*[^/\s]+', line):
942        error(line_number, 'build/endif_comment', 5,
943              'Uncommented text after #endif is non-standard.  Use a comment.')
944
945    if match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
946        error(line_number, 'build/forward_decl', 5,
947              'Inner-style forward declarations are invalid.  Remove this line.')
948
949    if search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?', line):
950        error(line_number, 'build/deprecated', 3,
951              '>? and <? (max and min) operators are non-standard and deprecated.')
952
953    # Track class entry and exit, and attempt to find cases within the
954    # class declaration that don't meet the C++ style
955    # guidelines. Tracking is very dependent on the code matching Google
956    # style guidelines, but it seems to perform well enough in testing
957    # to be a worthwhile addition to the checks.
958    classinfo_stack = class_state.classinfo_stack
959    # Look for a class declaration
960    class_decl_match = match(
961        r'\s*(template\s*<[\w\s<>,:]*>\s*)?(class|struct)\s+(\w+(::\w+)*)', line)
962    if class_decl_match:
963        classinfo_stack.append(_ClassInfo(class_decl_match.group(3), line_number))
964
965    # Everything else in this function uses the top of the stack if it's
966    # not empty.
967    if not classinfo_stack:
968        return
969
970    classinfo = classinfo_stack[-1]
971
972    # If the opening brace hasn't been seen look for it and also
973    # parent class declarations.
974    if not classinfo.seen_open_brace:
975        # If the line has a ';' in it, assume it's a forward declaration or
976        # a single-line class declaration, which we won't process.
977        if line.find(';') != -1:
978            classinfo_stack.pop()
979            return
980        classinfo.seen_open_brace = (line.find('{') != -1)
981        # Look for a bare ':'
982        if search('(^|[^:]):($|[^:])', line):
983            classinfo.is_derived = True
984        if not classinfo.seen_open_brace:
985            return  # Everything else in this function is for after open brace
986
987    # The class may have been declared with namespace or classname qualifiers.
988    # The constructor and destructor will not have those qualifiers.
989    base_classname = classinfo.name.split('::')[-1]
990
991    # Look for single-argument constructors that aren't marked explicit.
992    # Technically a valid construct, but against style.
993    args = match(r'(?<!explicit)\s+%s\s*\(([^,()]+)\)'
994                 % re.escape(base_classname),
995                 line)
996    if (args
997        and args.group(1) != 'void'
998        and not match(r'(const\s+)?%s\s*&' % re.escape(base_classname),
999                      args.group(1).strip())):
1000        error(line_number, 'runtime/explicit', 5,
1001              'Single-argument constructors should be marked explicit.')
1002
1003    # Look for methods declared virtual.
1004    if search(r'\bvirtual\b', line):
1005        classinfo.virtual_method_line_number = line_number
1006        # Only look for a destructor declaration on the same line. It would
1007        # be extremely unlikely for the destructor declaration to occupy
1008        # more than one line.
1009        if search(r'~%s\s*\(' % base_classname, line):
1010            classinfo.has_virtual_destructor = True
1011
1012    # Look for class end.
1013    brace_depth = classinfo.brace_depth
1014    brace_depth = brace_depth + line.count('{') - line.count('}')
1015    if brace_depth <= 0:
1016        classinfo = classinfo_stack.pop()
1017        # Try to detect missing virtual destructor declarations.
1018        # For now, only warn if a non-derived class with virtual methods lacks
1019        # a virtual destructor. This is to make it less likely that people will
1020        # declare derived virtual destructors without declaring the base
1021        # destructor virtual.
1022        if ((classinfo.virtual_method_line_number is not None)
1023            and (not classinfo.has_virtual_destructor)
1024            and (not classinfo.is_derived)):  # Only warn for base classes
1025            error(classinfo.line_number, 'runtime/virtual', 4,
1026                  'The class %s probably needs a virtual destructor due to '
1027                  'having virtual method(s), one declared at line %d.'
1028                  % (classinfo.name, classinfo.virtual_method_line_number))
1029    else:
1030        classinfo.brace_depth = brace_depth
1031
1032
1033def check_spacing_for_function_call(line, line_number, error):
1034    """Checks for the correctness of various spacing around function calls.
1035
1036    Args:
1037      line: The text of the line to check.
1038      line_number: The number of the line to check.
1039      error: The function to call with any errors found.
1040    """
1041
1042    # Since function calls often occur inside if/for/foreach/while/switch
1043    # expressions - which have their own, more liberal conventions - we
1044    # first see if we should be looking inside such an expression for a
1045    # function call, to which we can apply more strict standards.
1046    function_call = line    # if there's no control flow construct, look at whole line
1047    for pattern in (r'\bif\s*\((.*)\)\s*{',
1048                    r'\bfor\s*\((.*)\)\s*{',
1049                    r'\bforeach\s*\((.*)\)\s*{',
1050                    r'\bwhile\s*\((.*)\)\s*[{;]',
1051                    r'\bswitch\s*\((.*)\)\s*{'):
1052        matched = search(pattern, line)
1053        if matched:
1054            function_call = matched.group(1)    # look inside the parens for function calls
1055            break
1056
1057    # Except in if/for/foreach/while/switch, there should never be space
1058    # immediately inside parens (eg "f( 3, 4 )").  We make an exception
1059    # for nested parens ( (a+b) + c ).  Likewise, there should never be
1060    # a space before a ( when it's a function argument.  I assume it's a
1061    # function argument when the char before the whitespace is legal in
1062    # a function name (alnum + _) and we're not starting a macro. Also ignore
1063    # pointers and references to arrays and functions coz they're too tricky:
1064    # we use a very simple way to recognize these:
1065    # " (something)(maybe-something)" or
1066    # " (something)(maybe-something," or
1067    # " (something)[something]"
1068    # Note that we assume the contents of [] to be short enough that
1069    # they'll never need to wrap.
1070    if (  # Ignore control structures.
1071        not search(r'\b(if|for|foreach|while|switch|return|new|delete)\b', function_call)
1072        # Ignore pointers/references to functions.
1073        and not search(r' \([^)]+\)\([^)]*(\)|,$)', function_call)
1074        # Ignore pointers/references to arrays.
1075        and not search(r' \([^)]+\)\[[^\]]+\]', function_call)):
1076        if search(r'\w\s*\([ \t](?!\s*\\$)', function_call):      # a ( used for a fn call
1077            error(line_number, 'whitespace/parens', 4,
1078                  'Extra space after ( in function call')
1079        elif search(r'\([ \t]+(?!(\s*\\)|\()', function_call):
1080            error(line_number, 'whitespace/parens', 2,
1081                  'Extra space after (')
1082        if (search(r'\w\s+\(', function_call)
1083            and not search(r'#\s*define|typedef', function_call)):
1084            error(line_number, 'whitespace/parens', 4,
1085                  'Extra space before ( in function call')
1086        # If the ) is followed only by a newline or a { + newline, assume it's
1087        # part of a control statement (if/while/etc), and don't complain
1088        if search(r'[^)\s]\s+\)(?!\s*$|{\s*$)', function_call):
1089            error(line_number, 'whitespace/parens', 2,
1090                  'Extra space before )')
1091
1092
1093def is_blank_line(line):
1094    """Returns true if the given line is blank.
1095
1096    We consider a line to be blank if the line is empty or consists of
1097    only white spaces.
1098
1099    Args:
1100      line: A line of a string.
1101
1102    Returns:
1103      True, if the given line is blank.
1104    """
1105    return not line or line.isspace()
1106
1107
1108def check_for_function_lengths(clean_lines, line_number, function_state, error):
1109    """Reports for long function bodies.
1110
1111    For an overview why this is done, see:
1112    http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
1113
1114    Uses a simplistic algorithm assuming other style guidelines
1115    (especially spacing) are followed.
1116    Only checks unindented functions, so class members are unchecked.
1117    Trivial bodies are unchecked, so constructors with huge initializer lists
1118    may be missed.
1119    Blank/comment lines are not counted so as to avoid encouraging the removal
1120    of vertical space and commments just to get through a lint check.
1121    NOLINT *on the last line of a function* disables this check.
1122
1123    Args:
1124      clean_lines: A CleansedLines instance containing the file.
1125      line_number: The number of the line to check.
1126      function_state: Current function name and lines in body so far.
1127      error: The function to call with any errors found.
1128    """
1129    lines = clean_lines.lines
1130    line = lines[line_number]
1131    raw = clean_lines.raw_lines
1132    raw_line = raw[line_number]
1133    joined_line = ''
1134
1135    starting_func = False
1136    regexp = r'(\w(\w|::|\*|\&|\s)*)\('  # decls * & space::name( ...
1137    match_result = match(regexp, line)
1138    if match_result:
1139        # If the name is all caps and underscores, figure it's a macro and
1140        # ignore it, unless it's TEST or TEST_F.
1141        function_name = match_result.group(1).split()[-1]
1142        if function_name == 'TEST' or function_name == 'TEST_F' or (not match(r'[A-Z_]+$', function_name)):
1143            starting_func = True
1144
1145    if starting_func:
1146        body_found = False
1147        for start_line_number in xrange(line_number, clean_lines.num_lines()):
1148            start_line = lines[start_line_number]
1149            joined_line += ' ' + start_line.lstrip()
1150            if search(r'(;|})', start_line):  # Declarations and trivial functions
1151                body_found = True
1152                break                              # ... ignore
1153            if search(r'{', start_line):
1154                body_found = True
1155                function = search(r'((\w|:)*)\(', line).group(1)
1156                if match(r'TEST', function):    # Handle TEST... macros
1157                    parameter_regexp = search(r'(\(.*\))', joined_line)
1158                    if parameter_regexp:             # Ignore bad syntax
1159                        function += parameter_regexp.group(1)
1160                else:
1161                    function += '()'
1162                function_state.begin(function)
1163                break
1164        if not body_found:
1165            # No body for the function (or evidence of a non-function) was found.
1166            error(line_number, 'readability/fn_size', 5,
1167                  'Lint failed to find start of function body.')
1168    elif match(r'^\}\s*$', line):  # function end
1169        if not search(r'\bNOLINT\b', raw_line):
1170            function_state.check(error, line_number)
1171        function_state.end()
1172    elif not match(r'^\s*$', line):
1173        function_state.count()  # Count non-blank/non-comment lines.
1174
1175
1176def check_spacing(file_extension, clean_lines, line_number, error):
1177    """Checks for the correctness of various spacing issues in the code.
1178
1179    Things we check for: spaces around operators, spaces after
1180    if/for/while/switch, no spaces around parens in function calls, two
1181    spaces between code and comment, don't start a block with a blank
1182    line, don't end a function with a blank line, don't have too many
1183    blank lines in a row.
1184
1185    Args:
1186      file_extension: The current file extension, without the leading dot.
1187      clean_lines: A CleansedLines instance containing the file.
1188      line_number: The number of the line to check.
1189      error: The function to call with any errors found.
1190    """
1191
1192    raw = clean_lines.raw_lines
1193    line = raw[line_number]
1194
1195    # Before nixing comments, check if the line is blank for no good
1196    # reason.  This includes the first line after a block is opened, and
1197    # blank lines at the end of a function (ie, right before a line like '}').
1198    if is_blank_line(line):
1199        elided = clean_lines.elided
1200        previous_line = elided[line_number - 1]
1201        previous_brace = previous_line.rfind('{')
1202        # FIXME: Don't complain if line before blank line, and line after,
1203        #        both start with alnums and are indented the same amount.
1204        #        This ignores whitespace at the start of a namespace block
1205        #        because those are not usually indented.
1206        if (previous_brace != -1 and previous_line[previous_brace:].find('}') == -1
1207            and previous_line[:previous_brace].find('namespace') == -1):
1208            # OK, we have a blank line at the start of a code block.  Before we
1209            # complain, we check if it is an exception to the rule: The previous
1210            # non-empty line has the parameters of a function header that are indented
1211            # 4 spaces (because they did not fit in a 80 column line when placed on
1212            # the same line as the function name).  We also check for the case where
1213            # the previous line is indented 6 spaces, which may happen when the
1214            # initializers of a constructor do not fit into a 80 column line.
1215            exception = False
1216            if match(r' {6}\w', previous_line):  # Initializer list?
1217                # We are looking for the opening column of initializer list, which
1218                # should be indented 4 spaces to cause 6 space indentation afterwards.
1219                search_position = line_number - 2
1220                while (search_position >= 0
1221                       and match(r' {6}\w', elided[search_position])):
1222                    search_position -= 1
1223                exception = (search_position >= 0
1224                             and elided[search_position][:5] == '    :')
1225            else:
1226                # Search for the function arguments or an initializer list.  We use a
1227                # simple heuristic here: If the line is indented 4 spaces; and we have a
1228                # closing paren, without the opening paren, followed by an opening brace
1229                # or colon (for initializer lists) we assume that it is the last line of
1230                # a function header.  If we have a colon indented 4 spaces, it is an
1231                # initializer list.
1232                exception = (match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
1233                                   previous_line)
1234                             or match(r' {4}:', previous_line))
1235
1236            if not exception:
1237                error(line_number, 'whitespace/blank_line', 2,
1238                      'Blank line at the start of a code block.  Is this needed?')
1239        # This doesn't ignore whitespace at the end of a namespace block
1240        # because that is too hard without pairing open/close braces;
1241        # however, a special exception is made for namespace closing
1242        # brackets which have a comment containing "namespace".
1243        #
1244        # Also, ignore blank lines at the end of a block in a long if-else
1245        # chain, like this:
1246        #   if (condition1) {
1247        #     // Something followed by a blank line
1248        #
1249        #   } else if (condition2) {
1250        #     // Something else
1251        #   }
1252        if line_number + 1 < clean_lines.num_lines():
1253            next_line = raw[line_number + 1]
1254            if (next_line
1255                and match(r'\s*}', next_line)
1256                and next_line.find('namespace') == -1
1257                and next_line.find('} else ') == -1):
1258                error(line_number, 'whitespace/blank_line', 3,
1259                      'Blank line at the end of a code block.  Is this needed?')
1260
1261    # Next, we complain if there's a comment too near the text
1262    comment_position = line.find('//')
1263    if comment_position != -1:
1264        # Check if the // may be in quotes.  If so, ignore it
1265        # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
1266        if (line.count('"', 0, comment_position) - line.count('\\"', 0, comment_position)) % 2 == 0:   # not in quotes
1267            # Allow one space before end of line comment.
1268            if (not match(r'^\s*$', line[:comment_position])
1269                and (comment_position >= 1
1270                and ((line[comment_position - 1] not in string.whitespace)
1271                     or (comment_position >= 2
1272                         and line[comment_position - 2] in string.whitespace)))):
1273                error(line_number, 'whitespace/comments', 5,
1274                      'One space before end of line comments')
1275            # There should always be a space between the // and the comment
1276            commentend = comment_position + 2
1277            if commentend < len(line) and not line[commentend] == ' ':
1278                # but some lines are exceptions -- e.g. if they're big
1279                # comment delimiters like:
1280                # //----------------------------------------------------------
1281                # or they begin with multiple slashes followed by a space:
1282                # //////// Header comment
1283                matched = (search(r'[=/-]{4,}\s*$', line[commentend:])
1284                           or search(r'^/+ ', line[commentend:]))
1285                if not matched:
1286                    error(line_number, 'whitespace/comments', 4,
1287                          'Should have a space between // and comment')
1288
1289    line = clean_lines.elided[line_number]  # get rid of comments and strings
1290
1291    # Don't try to do spacing checks for operator methods
1292    line = sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line)
1293    # Don't try to do spacing checks for #include or #import statements at
1294    # minimum because it messes up checks for spacing around /
1295    if match(r'\s*#\s*(?:include|import)', line):
1296        return
1297    if search(r'[\w.]=[\w.]', line):
1298        error(line_number, 'whitespace/operators', 4,
1299              'Missing spaces around =')
1300
1301    # FIXME: It's not ok to have spaces around binary operators like .
1302
1303    # You should always have whitespace around binary operators.
1304    # Alas, we can't test < or > because they're legitimately used sans spaces
1305    # (a->b, vector<int> a).  The only time we can tell is a < with no >, and
1306    # only if it's not template params list spilling into the next line.
1307    matched = search(r'[^<>=!\s](==|!=|\+=|-=|\*=|/=|/|\|=|&=|<<=|>>=|<=|>=|\|\||\||&&|>>|<<)[^<>=!\s]', line)
1308    if not matched:
1309        # Note that while it seems that the '<[^<]*' term in the following
1310        # regexp could be simplified to '<.*', which would indeed match
1311        # the same class of strings, the [^<] means that searching for the
1312        # regexp takes linear rather than quadratic time.
1313        if not search(r'<[^<]*,\s*$', line):  # template params spill
1314            matched = search(r'[^<>=!\s](<)[^<>=!\s]([^>]|->)*$', line)
1315    if matched:
1316        error(line_number, 'whitespace/operators', 3,
1317              'Missing spaces around %s' % matched.group(1))
1318
1319    # There shouldn't be space around unary operators
1320    matched = search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
1321    if matched:
1322        error(line_number, 'whitespace/operators', 4,
1323              'Extra space for operator %s' % matched.group(1))
1324
1325    # A pet peeve of mine: no spaces after an if, while, switch, or for
1326    matched = search(r' (if\(|for\(|foreach\(|while\(|switch\()', line)
1327    if matched:
1328        error(line_number, 'whitespace/parens', 5,
1329              'Missing space before ( in %s' % matched.group(1))
1330
1331    # For if/for/foreach/while/switch, the left and right parens should be
1332    # consistent about how many spaces are inside the parens, and
1333    # there should either be zero or one spaces inside the parens.
1334    # We don't want: "if ( foo)" or "if ( foo   )".
1335    # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
1336    matched = search(r'\b(?P<statement>if|for|foreach|while|switch)\s*\((?P<reminder>.*)$', line)
1337    if matched:
1338        statement = matched.group('statement')
1339        condition, rest = up_to_unmatched_closing_paren(matched.group('reminder'))
1340        if condition is not None:
1341            condition_match = search(r'(?P<leading>[ ]*)(?P<separator>.).*[^ ]+(?P<trailing>[ ]*)', condition)
1342            if condition_match:
1343                n_leading = len(condition_match.group('leading'))
1344                n_trailing = len(condition_match.group('trailing'))
1345                if n_leading != n_trailing:
1346                    for_exception = statement == 'for' and (
1347                        (condition.startswith(' ;') and n_trailing == 0) or
1348                        (condition.endswith('; ')   and n_leading == 0))
1349                    if not for_exception:
1350                        error(line_number, 'whitespace/parens', 5,
1351                              'Mismatching spaces inside () in %s' % statement)
1352                if n_leading > 1:
1353                    error(line_number, 'whitespace/parens', 5,
1354                          'Should have zero or one spaces inside ( and ) in %s' %
1355                          statement)
1356
1357            # Do not check for more than one command in macros
1358            in_macro = match(r'\s*#define', line)
1359            if not in_macro and not match(r'((\s*{\s*}?)|(\s*;?))\s*\\?$', rest):
1360                error(line_number, 'whitespace/parens', 4,
1361                      'More than one command on the same line in %s' % statement)
1362
1363    # You should always have a space after a comma (either as fn arg or operator)
1364    if search(r',[^\s]', line):
1365        error(line_number, 'whitespace/comma', 3,
1366              'Missing space after ,')
1367
1368    if file_extension == 'cpp':
1369        # C++ should have the & or * beside the type not the variable name.
1370        matched = match(r'\s*\w+(?<!\breturn)\s+(?P<pointer_operator>\*|\&)\w+', line)
1371        if matched:
1372            error(line_number, 'whitespace/declaration', 3,
1373                  'Declaration has space between type name and %s in %s' % (matched.group('pointer_operator'), matched.group(0).strip()))
1374
1375    elif file_extension == 'c':
1376        # C Pointer declaration should have the * beside the variable not the type name.
1377        matched = search(r'^\s*\w+\*\s+\w+', line)
1378        if matched:
1379            error(line_number, 'whitespace/declaration', 3,
1380                  'Declaration has space between * and variable name in %s' % matched.group(0).strip())
1381
1382    # Next we will look for issues with function calls.
1383    check_spacing_for_function_call(line, line_number, error)
1384
1385    # Except after an opening paren, you should have spaces before your braces.
1386    # And since you should never have braces at the beginning of a line, this is
1387    # an easy test.
1388    if search(r'[^ ({]{', line):
1389        error(line_number, 'whitespace/braces', 5,
1390              'Missing space before {')
1391
1392    # Make sure '} else {' has spaces.
1393    if search(r'}else', line):
1394        error(line_number, 'whitespace/braces', 5,
1395              'Missing space before else')
1396
1397    # You shouldn't have spaces before your brackets, except maybe after
1398    # 'delete []' or 'new char * []'.
1399    if search(r'\w\s+\[', line) and not search(r'delete\s+\[', line):
1400        error(line_number, 'whitespace/braces', 5,
1401              'Extra space before [')
1402
1403    # You shouldn't have a space before a semicolon at the end of the line.
1404    # There's a special case for "for" since the style guide allows space before
1405    # the semicolon there.
1406    if search(r':\s*;\s*$', line):
1407        error(line_number, 'whitespace/semicolon', 5,
1408              'Semicolon defining empty statement. Use { } instead.')
1409    elif search(r'^\s*;\s*$', line):
1410        error(line_number, 'whitespace/semicolon', 5,
1411              'Line contains only semicolon. If this should be an empty statement, '
1412              'use { } instead.')
1413    elif (search(r'\s+;\s*$', line) and not search(r'\bfor\b', line)):
1414        error(line_number, 'whitespace/semicolon', 5,
1415              'Extra space before last semicolon. If this should be an empty '
1416              'statement, use { } instead.')
1417    elif (search(r'\b(for|while)\s*\(.*\)\s*;\s*$', line)
1418          and line.count('(') == line.count(')')
1419          # Allow do {} while();
1420          and not search(r'}\s*while', line)):
1421        error(line_number, 'whitespace/semicolon', 5,
1422              'Semicolon defining empty statement for this loop. Use { } instead.')
1423
1424
1425def get_previous_non_blank_line(clean_lines, line_number):
1426    """Return the most recent non-blank line and its line number.
1427
1428    Args:
1429      clean_lines: A CleansedLines instance containing the file contents.
1430      line_number: The number of the line to check.
1431
1432    Returns:
1433      A tuple with two elements.  The first element is the contents of the last
1434      non-blank line before the current line, or the empty string if this is the
1435      first non-blank line.  The second is the line number of that line, or -1
1436      if this is the first non-blank line.
1437    """
1438
1439    previous_line_number = line_number - 1
1440    while previous_line_number >= 0:
1441        previous_line = clean_lines.elided[previous_line_number]
1442        if not is_blank_line(previous_line):     # if not a blank line...
1443            return (previous_line, previous_line_number)
1444        previous_line_number -= 1
1445    return ('', -1)
1446
1447
1448def check_namespace_indentation(clean_lines, line_number, file_extension, file_state, error):
1449    """Looks for indentation errors inside of namespaces.
1450
1451    Args:
1452      clean_lines: A CleansedLines instance containing the file.
1453      line_number: The number of the line to check.
1454      file_extension: The extension (dot not included) of the file.
1455      file_state: A _FileState instance which maintains information about
1456                  the state of things in the file.
1457      error: The function to call with any errors found.
1458    """
1459
1460    line = clean_lines.elided[line_number] # Get rid of comments and strings.
1461
1462    namespace_match = match(r'(?P<namespace_indentation>\s*)namespace\s+\S+\s*{\s*$', line)
1463    if not namespace_match:
1464        return
1465
1466    current_indentation_level = len(namespace_match.group('namespace_indentation'))
1467    if current_indentation_level > 0:
1468        # Don't warn about an indented namespace if we already warned about indented code.
1469        if not file_state.did_inside_namespace_indent_warning():
1470            error(line_number, 'whitespace/indent', 4,
1471                  'namespace should never be indented.')
1472        return
1473    looking_for_semicolon = False;
1474    line_offset = 0
1475    in_preprocessor_directive = False;
1476    for current_line in clean_lines.elided[line_number + 1:]:
1477        line_offset += 1
1478        if not current_line.strip():
1479            continue
1480        if not current_indentation_level:
1481            if not (in_preprocessor_directive or looking_for_semicolon):
1482                if not match(r'\S', current_line) and not file_state.did_inside_namespace_indent_warning():
1483                    file_state.set_did_inside_namespace_indent_warning()
1484                    error(line_number + line_offset, 'whitespace/indent', 4,
1485                          'Code inside a namespace should not be indented.')
1486            if in_preprocessor_directive or (current_line.strip()[0] == '#'): # This takes care of preprocessor directive syntax.
1487                in_preprocessor_directive = current_line[-1] == '\\'
1488            else:
1489                looking_for_semicolon = ((current_line.find(';') == -1) and (current_line.strip()[-1] != '}')) or (current_line[-1] == '\\')
1490        else:
1491            looking_for_semicolon = False; # If we have a brace we may not need a semicolon.
1492        current_indentation_level += current_line.count('{') - current_line.count('}')
1493        if current_indentation_level < 0:
1494            break;
1495
1496def check_using_std(file_extension, clean_lines, line_number, error):
1497    """Looks for 'using std::foo;' statements which should be replaced with 'using namespace std;'.
1498
1499    Args:
1500      file_extension: The extension of the current file, without the leading dot.
1501      clean_lines: A CleansedLines instance containing the file.
1502      line_number: The number of the line to check.
1503      error: The function to call with any errors found.
1504    """
1505
1506    # This check doesn't apply to C or Objective-C implementation files.
1507    if is_c_or_objective_c(file_extension):
1508        return
1509
1510    line = clean_lines.elided[line_number] # Get rid of comments and strings.
1511
1512    using_std_match = match(r'\s*using\s+std::(?P<method_name>\S+)\s*;\s*$', line)
1513    if not using_std_match:
1514        return
1515
1516    method_name = using_std_match.group('method_name')
1517    error(line_number, 'build/using_std', 4,
1518          "Use 'using namespace std;' instead of 'using std::%s;'." % method_name)
1519
1520
1521def check_max_min_macros(file_extension, clean_lines, line_number, error):
1522    """Looks use of MAX() and MIN() macros that should be replaced with std::max() and std::min().
1523
1524    Args:
1525      file_extension: The extension of the current file, without the leading dot.
1526      clean_lines: A CleansedLines instance containing the file.
1527      line_number: The number of the line to check.
1528      error: The function to call with any errors found.
1529    """
1530
1531    # This check doesn't apply to C or Objective-C implementation files.
1532    if is_c_or_objective_c(file_extension):
1533        return
1534
1535    line = clean_lines.elided[line_number] # Get rid of comments and strings.
1536
1537    max_min_macros_search = search(r'\b(?P<max_min_macro>(MAX|MIN))\s*\(', line)
1538    if not max_min_macros_search:
1539        return
1540
1541    max_min_macro = max_min_macros_search.group('max_min_macro')
1542    max_min_macro_lower = max_min_macro.lower()
1543    error(line_number, 'runtime/max_min_macros', 4,
1544          'Use std::%s() or std::%s<type>() instead of the %s() macro.'
1545          % (max_min_macro_lower, max_min_macro_lower, max_min_macro))
1546
1547
1548def check_switch_indentation(clean_lines, line_number, error):
1549    """Looks for indentation errors inside of switch statements.
1550
1551    Args:
1552      clean_lines: A CleansedLines instance containing the file.
1553      line_number: The number of the line to check.
1554      error: The function to call with any errors found.
1555    """
1556
1557    line = clean_lines.elided[line_number] # Get rid of comments and strings.
1558
1559    switch_match = match(r'(?P<switch_indentation>\s*)switch\s*\(.+\)\s*{\s*$', line)
1560    if not switch_match:
1561        return
1562
1563    switch_indentation = switch_match.group('switch_indentation')
1564    inner_indentation = switch_indentation + ' ' * 4
1565    line_offset = 0
1566    encountered_nested_switch = False
1567
1568    for current_line in clean_lines.elided[line_number + 1:]:
1569        line_offset += 1
1570
1571        # Skip not only empty lines but also those with preprocessor directives.
1572        if current_line.strip() == '' or current_line.startswith('#'):
1573            continue
1574
1575        if match(r'\s*switch\s*\(.+\)\s*{\s*$', current_line):
1576            # Complexity alarm - another switch statement nested inside the one
1577            # that we're currently testing. We'll need to track the extent of
1578            # that inner switch if the upcoming label tests are still supposed
1579            # to work correctly. Let's not do that; instead, we'll finish
1580            # checking this line, and then leave it like that. Assuming the
1581            # indentation is done consistently (even if incorrectly), this will
1582            # still catch all indentation issues in practice.
1583            encountered_nested_switch = True
1584
1585        current_indentation_match = match(r'(?P<indentation>\s*)(?P<remaining_line>.*)$', current_line);
1586        current_indentation = current_indentation_match.group('indentation')
1587        remaining_line = current_indentation_match.group('remaining_line')
1588
1589        # End the check at the end of the switch statement.
1590        if remaining_line.startswith('}') and current_indentation == switch_indentation:
1591            break
1592        # Case and default branches should not be indented. The regexp also
1593        # catches single-line cases like "default: break;" but does not trigger
1594        # on stuff like "Document::Foo();".
1595        elif match(r'(default|case\s+.*)\s*:([^:].*)?$', remaining_line):
1596            if current_indentation != switch_indentation:
1597                error(line_number + line_offset, 'whitespace/indent', 4,
1598                      'A case label should not be indented, but line up with its switch statement.')
1599                # Don't throw an error for multiple badly indented labels,
1600                # one should be enough to figure out the problem.
1601                break
1602        # We ignore goto labels at the very beginning of a line.
1603        elif match(r'\w+\s*:\s*$', remaining_line):
1604            continue
1605        # It's not a goto label, so check if it's indented at least as far as
1606        # the switch statement plus one more level of indentation.
1607        elif not current_indentation.startswith(inner_indentation):
1608            error(line_number + line_offset, 'whitespace/indent', 4,
1609                  'Non-label code inside switch statements should be indented.')
1610            # Don't throw an error for multiple badly indented statements,
1611            # one should be enough to figure out the problem.
1612            break
1613
1614        if encountered_nested_switch:
1615            break
1616
1617
1618def check_braces(clean_lines, line_number, error):
1619    """Looks for misplaced braces (e.g. at the end of line).
1620
1621    Args:
1622      clean_lines: A CleansedLines instance containing the file.
1623      line_number: The number of the line to check.
1624      error: The function to call with any errors found.
1625    """
1626
1627    line = clean_lines.elided[line_number] # Get rid of comments and strings.
1628
1629    if match(r'\s*{\s*$', line):
1630        # We allow an open brace to start a line in the case where someone
1631        # is using braces for function definition or in a block to
1632        # explicitly create a new scope, which is commonly used to control
1633        # the lifetime of stack-allocated variables.  We don't detect this
1634        # perfectly: we just don't complain if the last non-whitespace
1635        # character on the previous non-blank line is ';', ':', '{', '}',
1636        # ')', or ') const' and doesn't begin with 'if|for|while|switch|else'.
1637        # We also allow '#' for #endif and '=' for array initialization.
1638        previous_line = get_previous_non_blank_line(clean_lines, line_number)[0]
1639        if ((not search(r'[;:}{)=]\s*$|\)\s*const\s*$', previous_line)
1640             or search(r'\b(if|for|foreach|while|switch|else)\b', previous_line))
1641            and previous_line.find('#') < 0):
1642            error(line_number, 'whitespace/braces', 4,
1643                  'This { should be at the end of the previous line')
1644    elif (search(r'\)\s*(const\s*)?{\s*$', line)
1645          and line.count('(') == line.count(')')
1646          and not search(r'\b(if|for|foreach|while|switch)\b', line)
1647          and not match(r'\s+[A-Z_][A-Z_0-9]+\b', line)):
1648        error(line_number, 'whitespace/braces', 4,
1649              'Place brace on its own line for function definitions.')
1650
1651    if (match(r'\s*}\s*(else\s*({\s*)?)?$', line) and line_number > 1):
1652        # We check if a closed brace has started a line to see if a
1653        # one line control statement was previous.
1654        previous_line = clean_lines.elided[line_number - 2]
1655        if (previous_line.find('{') > 0
1656            and search(r'\b(if|for|foreach|while|else)\b', previous_line)):
1657            error(line_number, 'whitespace/braces', 4,
1658                  'One line control clauses should not use braces.')
1659
1660    # An else clause should be on the same line as the preceding closing brace.
1661    if match(r'\s*else\s*', line):
1662        previous_line = get_previous_non_blank_line(clean_lines, line_number)[0]
1663        if match(r'\s*}\s*$', previous_line):
1664            error(line_number, 'whitespace/newline', 4,
1665                  'An else should appear on the same line as the preceding }')
1666
1667    # Likewise, an else should never have the else clause on the same line
1668    if search(r'\belse [^\s{]', line) and not search(r'\belse if\b', line):
1669        error(line_number, 'whitespace/newline', 4,
1670              'Else clause should never be on same line as else (use 2 lines)')
1671
1672    # In the same way, a do/while should never be on one line
1673    if match(r'\s*do [^\s{]', line):
1674        error(line_number, 'whitespace/newline', 4,
1675              'do/while clauses should not be on a single line')
1676
1677    # Braces shouldn't be followed by a ; unless they're defining a struct
1678    # or initializing an array.
1679    # We can't tell in general, but we can for some common cases.
1680    previous_line_number = line_number
1681    while True:
1682        (previous_line, previous_line_number) = get_previous_non_blank_line(clean_lines, previous_line_number)
1683        if match(r'\s+{.*}\s*;', line) and not previous_line.count(';'):
1684            line = previous_line + line
1685        else:
1686            break
1687    if (search(r'{.*}\s*;', line)
1688        and line.count('{') == line.count('}')
1689        and not search(r'struct|class|enum|\s*=\s*{', line)):
1690        error(line_number, 'readability/braces', 4,
1691              "You don't need a ; after a }")
1692
1693
1694def check_exit_statement_simplifications(clean_lines, line_number, error):
1695    """Looks for else or else-if statements that should be written as an
1696    if statement when the prior if concludes with a return, break, continue or
1697    goto statement.
1698
1699    Args:
1700      clean_lines: A CleansedLines instance containing the file.
1701      line_number: The number of the line to check.
1702      error: The function to call with any errors found.
1703    """
1704
1705    line = clean_lines.elided[line_number] # Get rid of comments and strings.
1706
1707    else_match = match(r'(?P<else_indentation>\s*)(\}\s*)?else(\s+if\s*\(|(?P<else>\s*(\{\s*)?\Z))', line)
1708    if not else_match:
1709        return
1710
1711    else_indentation = else_match.group('else_indentation')
1712    inner_indentation = else_indentation + ' ' * 4
1713
1714    previous_lines = clean_lines.elided[:line_number]
1715    previous_lines.reverse()
1716    line_offset = 0
1717    encountered_exit_statement = False
1718
1719    for current_line in previous_lines:
1720        line_offset -= 1
1721
1722        # Skip not only empty lines but also those with preprocessor directives
1723        # and goto labels.
1724        if current_line.strip() == '' or current_line.startswith('#') or match(r'\w+\s*:\s*$', current_line):
1725            continue
1726
1727        # Skip lines with closing braces on the original indentation level.
1728        # Even though the styleguide says they should be on the same line as
1729        # the "else if" statement, we also want to check for instances where
1730        # the current code does not comply with the coding style. Thus, ignore
1731        # these lines and proceed to the line before that.
1732        if current_line == else_indentation + '}':
1733            continue
1734
1735        current_indentation_match = match(r'(?P<indentation>\s*)(?P<remaining_line>.*)$', current_line);
1736        current_indentation = current_indentation_match.group('indentation')
1737        remaining_line = current_indentation_match.group('remaining_line')
1738
1739        # As we're going up the lines, the first real statement to encounter
1740        # has to be an exit statement (return, break, continue or goto) -
1741        # otherwise, this check doesn't apply.
1742        if not encountered_exit_statement:
1743            # We only want to find exit statements if they are on exactly
1744            # the same level of indentation as expected from the code inside
1745            # the block. If the indentation doesn't strictly match then we
1746            # might have a nested if or something, which must be ignored.
1747            if current_indentation != inner_indentation:
1748                break
1749            if match(r'(return(\W+.*)|(break|continue)\s*;|goto\s*\w+;)$', remaining_line):
1750                encountered_exit_statement = True
1751                continue
1752            break
1753
1754        # When code execution reaches this point, we've found an exit statement
1755        # as last statement of the previous block. Now we only need to make
1756        # sure that the block belongs to an "if", then we can throw an error.
1757
1758        # Skip lines with opening braces on the original indentation level,
1759        # similar to the closing braces check above. ("if (condition)\n{")
1760        if current_line == else_indentation + '{':
1761            continue
1762
1763        # Skip everything that's further indented than our "else" or "else if".
1764        if current_indentation.startswith(else_indentation) and current_indentation != else_indentation:
1765            continue
1766
1767        # So we've got a line with same (or less) indentation. Is it an "if"?
1768        # If yes: throw an error. If no: don't throw an error.
1769        # Whatever the outcome, this is the end of our loop.
1770        if match(r'if\s*\(', remaining_line):
1771            if else_match.start('else') != -1:
1772                error(line_number + line_offset, 'readability/control_flow', 4,
1773                      'An else statement can be removed when the prior "if" '
1774                      'concludes with a return, break, continue or goto statement.')
1775            else:
1776                error(line_number + line_offset, 'readability/control_flow', 4,
1777                      'An else if statement should be written as an if statement '
1778                      'when the prior "if" concludes with a return, break, '
1779                      'continue or goto statement.')
1780        break
1781
1782
1783def replaceable_check(operator, macro, line):
1784    """Determine whether a basic CHECK can be replaced with a more specific one.
1785
1786    For example suggest using CHECK_EQ instead of CHECK(a == b) and
1787    similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
1788
1789    Args:
1790      operator: The C++ operator used in the CHECK.
1791      macro: The CHECK or EXPECT macro being called.
1792      line: The current source line.
1793
1794    Returns:
1795      True if the CHECK can be replaced with a more specific one.
1796    """
1797
1798    # This matches decimal and hex integers, strings, and chars (in that order).
1799    match_constant = r'([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')'
1800
1801    # Expression to match two sides of the operator with something that
1802    # looks like a literal, since CHECK(x == iterator) won't compile.
1803    # This means we can't catch all the cases where a more specific
1804    # CHECK is possible, but it's less annoying than dealing with
1805    # extraneous warnings.
1806    match_this = (r'\s*' + macro + r'\((\s*' +
1807                  match_constant + r'\s*' + operator + r'[^<>].*|'
1808                  r'.*[^<>]' + operator + r'\s*' + match_constant +
1809                  r'\s*\))')
1810
1811    # Don't complain about CHECK(x == NULL) or similar because
1812    # CHECK_EQ(x, NULL) won't compile (requires a cast).
1813    # Also, don't complain about more complex boolean expressions
1814    # involving && or || such as CHECK(a == b || c == d).
1815    return match(match_this, line) and not search(r'NULL|&&|\|\|', line)
1816
1817
1818def check_check(clean_lines, line_number, error):
1819    """Checks the use of CHECK and EXPECT macros.
1820
1821    Args:
1822      clean_lines: A CleansedLines instance containing the file.
1823      line_number: The number of the line to check.
1824      error: The function to call with any errors found.
1825    """
1826
1827    # Decide the set of replacement macros that should be suggested
1828    raw_lines = clean_lines.raw_lines
1829    current_macro = ''
1830    for macro in _CHECK_MACROS:
1831        if raw_lines[line_number].find(macro) >= 0:
1832            current_macro = macro
1833            break
1834    if not current_macro:
1835        # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
1836        return
1837
1838    line = clean_lines.elided[line_number]        # get rid of comments and strings
1839
1840    # Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
1841    for operator in ['==', '!=', '>=', '>', '<=', '<']:
1842        if replaceable_check(operator, current_macro, line):
1843            error(line_number, 'readability/check', 2,
1844                  'Consider using %s instead of %s(a %s b)' % (
1845                      _CHECK_REPLACEMENT[current_macro][operator],
1846                      current_macro, operator))
1847            break
1848
1849
1850def check_for_comparisons_to_zero(clean_lines, line_number, error):
1851    # Get the line without comments and strings.
1852    line = clean_lines.elided[line_number]
1853
1854    # Include NULL here so that users don't have to convert NULL to 0 first and then get this error.
1855    if search(r'[=!]=\s*(NULL|0|true|false)\W', line) or search(r'\W(NULL|0|true|false)\s*[=!]=', line):
1856        error(line_number, 'readability/comparison_to_zero', 5,
1857              'Tests for true/false, null/non-null, and zero/non-zero should all be done without equality comparisons.')
1858
1859
1860def check_for_null(file_extension, clean_lines, line_number, error):
1861    # This check doesn't apply to C or Objective-C implementation files.
1862    if is_c_or_objective_c(file_extension):
1863        return
1864
1865    line = clean_lines.elided[line_number]
1866
1867    # Don't warn about NULL usage in g_object_{get,set}(). See Bug 32858
1868    if search(r'\bg_object_[sg]et\b', line):
1869        return
1870
1871    if search(r'\bNULL\b', line):
1872        error(line_number, 'readability/null', 5, 'Use 0 instead of NULL.')
1873        return
1874
1875    line = clean_lines.raw_lines[line_number]
1876    # See if NULL occurs in any comments in the line. If the search for NULL using the raw line
1877    # matches, then do the check with strings collapsed to avoid giving errors for
1878    # NULLs occurring in strings.
1879    if search(r'\bNULL\b', line) and search(r'\bNULL\b', CleansedLines.collapse_strings(line)):
1880        error(line_number, 'readability/null', 4, 'Use 0 instead of NULL.')
1881
1882def get_line_width(line):
1883    """Determines the width of the line in column positions.
1884
1885    Args:
1886      line: A string, which may be a Unicode string.
1887
1888    Returns:
1889      The width of the line in column positions, accounting for Unicode
1890      combining characters and wide characters.
1891    """
1892    if isinstance(line, unicode):
1893        width = 0
1894        for c in unicodedata.normalize('NFC', line):
1895            if unicodedata.east_asian_width(c) in ('W', 'F'):
1896                width += 2
1897            elif not unicodedata.combining(c):
1898                width += 1
1899        return width
1900    return len(line)
1901
1902
1903def check_style(clean_lines, line_number, file_extension, file_state, error):
1904    """Checks rules from the 'C++ style rules' section of cppguide.html.
1905
1906    Most of these rules are hard to test (naming, comment style), but we
1907    do what we can.  In particular we check for 4-space indents, line lengths,
1908    tab usage, spaces inside code, etc.
1909
1910    Args:
1911      clean_lines: A CleansedLines instance containing the file.
1912      line_number: The number of the line to check.
1913      file_extension: The extension (without the dot) of the filename.
1914      file_state: A _FileState instance which maintains information about
1915                  the state of things in the file.
1916      error: The function to call with any errors found.
1917    """
1918
1919    raw_lines = clean_lines.raw_lines
1920    line = raw_lines[line_number]
1921
1922    if line.find('\t') != -1:
1923        error(line_number, 'whitespace/tab', 1,
1924              'Tab found; better to use spaces')
1925
1926    # One or three blank spaces at the beginning of the line is weird; it's
1927    # hard to reconcile that with 4-space indents.
1928    # NOTE: here are the conditions rob pike used for his tests.  Mine aren't
1929    # as sophisticated, but it may be worth becoming so:  RLENGTH==initial_spaces
1930    # if(RLENGTH > 20) complain = 0;
1931    # if(match($0, " +(error|private|public|protected):")) complain = 0;
1932    # if(match(prev, "&& *$")) complain = 0;
1933    # if(match(prev, "\\|\\| *$")) complain = 0;
1934    # if(match(prev, "[\",=><] *$")) complain = 0;
1935    # if(match($0, " <<")) complain = 0;
1936    # if(match(prev, " +for \\(")) complain = 0;
1937    # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
1938    initial_spaces = 0
1939    cleansed_line = clean_lines.elided[line_number]
1940    while initial_spaces < len(line) and line[initial_spaces] == ' ':
1941        initial_spaces += 1
1942    if line and line[-1].isspace():
1943        error(line_number, 'whitespace/end_of_line', 4,
1944              'Line ends in whitespace.  Consider deleting these extra spaces.')
1945    # There are certain situations we allow one space, notably for labels
1946    elif ((initial_spaces >= 1 and initial_spaces <= 3)
1947          and not match(r'\s*\w+\s*:\s*$', cleansed_line)):
1948        error(line_number, 'whitespace/indent', 3,
1949              'Weird number of spaces at line-start.  '
1950              'Are you using a 4-space indent?')
1951    # Labels should always be indented at least one space.
1952    elif not initial_spaces and line[:2] != '//':
1953        label_match = match(r'(?P<label>[^:]+):\s*$', line)
1954
1955        if label_match:
1956            label = label_match.group('label')
1957            # Only throw errors for stuff that is definitely not a goto label,
1958            # because goto labels can in fact occur at the start of the line.
1959            if label in ['public', 'private', 'protected'] or label.find(' ') != -1:
1960                error(line_number, 'whitespace/labels', 4,
1961                      'Labels should always be indented at least one space.  '
1962                      'If this is a member-initializer list in a constructor, '
1963                      'the colon should be on the line after the definition header.')
1964
1965    if (cleansed_line.count(';') > 1
1966        # for loops are allowed two ;'s (and may run over two lines).
1967        and cleansed_line.find('for') == -1
1968        and (get_previous_non_blank_line(clean_lines, line_number)[0].find('for') == -1
1969             or get_previous_non_blank_line(clean_lines, line_number)[0].find(';') != -1)
1970        # It's ok to have many commands in a switch case that fits in 1 line
1971        and not ((cleansed_line.find('case ') != -1
1972                  or cleansed_line.find('default:') != -1)
1973                 and cleansed_line.find('break;') != -1)
1974        and not cleansed_line.startswith('#define ')):
1975        error(line_number, 'whitespace/newline', 4,
1976              'More than one command on the same line')
1977
1978    if cleansed_line.strip().endswith('||') or cleansed_line.strip().endswith('&&'):
1979        error(line_number, 'whitespace/operators', 4,
1980              'Boolean expressions that span multiple lines should have their '
1981              'operators on the left side of the line instead of the right side.')
1982
1983    # Some more style checks
1984    check_namespace_indentation(clean_lines, line_number, file_extension, file_state, error)
1985    check_using_std(file_extension, clean_lines, line_number, error)
1986    check_max_min_macros(file_extension, clean_lines, line_number, error)
1987    check_switch_indentation(clean_lines, line_number, error)
1988    check_braces(clean_lines, line_number, error)
1989    check_exit_statement_simplifications(clean_lines, line_number, error)
1990    check_spacing(file_extension, clean_lines, line_number, error)
1991    check_check(clean_lines, line_number, error)
1992    check_for_comparisons_to_zero(clean_lines, line_number, error)
1993    check_for_null(file_extension, clean_lines, line_number, error)
1994
1995
1996_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
1997_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
1998# Matches the first component of a filename delimited by -s and _s. That is:
1999#  _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
2000#  _RE_FIRST_COMPONENT.match('foo.cpp').group(0) == 'foo'
2001#  _RE_FIRST_COMPONENT.match('foo-bar_baz.cpp').group(0) == 'foo'
2002#  _RE_FIRST_COMPONENT.match('foo_bar-baz.cpp').group(0) == 'foo'
2003_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
2004
2005
2006def _drop_common_suffixes(filename):
2007    """Drops common suffixes like _test.cpp or -inl.h from filename.
2008
2009    For example:
2010      >>> _drop_common_suffixes('foo/foo-inl.h')
2011      'foo/foo'
2012      >>> _drop_common_suffixes('foo/bar/foo.cpp')
2013      'foo/bar/foo'
2014      >>> _drop_common_suffixes('foo/foo_internal.h')
2015      'foo/foo'
2016      >>> _drop_common_suffixes('foo/foo_unusualinternal.h')
2017      'foo/foo_unusualinternal'
2018
2019    Args:
2020      filename: The input filename.
2021
2022    Returns:
2023      The filename with the common suffix removed.
2024    """
2025    for suffix in ('test.cpp', 'regtest.cpp', 'unittest.cpp',
2026                   'inl.h', 'impl.h', 'internal.h'):
2027        if (filename.endswith(suffix) and len(filename) > len(suffix)
2028            and filename[-len(suffix) - 1] in ('-', '_')):
2029            return filename[:-len(suffix) - 1]
2030    return os.path.splitext(filename)[0]
2031
2032
2033def _classify_include(filename, include, is_system, include_state):
2034    """Figures out what kind of header 'include' is.
2035
2036    Args:
2037      filename: The current file cpp_style is running over.
2038      include: The path to a #included file.
2039      is_system: True if the #include used <> rather than "".
2040      include_state: An _IncludeState instance in which the headers are inserted.
2041
2042    Returns:
2043      One of the _XXX_HEADER constants.
2044
2045    For example:
2046      >>> _classify_include('foo.cpp', 'config.h', False)
2047      _CONFIG_HEADER
2048      >>> _classify_include('foo.cpp', 'foo.h', False)
2049      _PRIMARY_HEADER
2050      >>> _classify_include('foo.cpp', 'bar.h', False)
2051      _OTHER_HEADER
2052    """
2053
2054    # If it is a system header we know it is classified as _OTHER_HEADER.
2055    if is_system:
2056        return _OTHER_HEADER
2057
2058    # If the include is named config.h then this is WebCore/config.h.
2059    if include == "config.h":
2060        return _CONFIG_HEADER
2061
2062    # There cannot be primary includes in header files themselves. Only an
2063    # include exactly matches the header filename will be is flagged as
2064    # primary, so that it triggers the "don't include yourself" check.
2065    if filename.endswith('.h') and filename != include:
2066        return _OTHER_HEADER;
2067
2068    # Qt's moc files do not follow the naming and ordering rules, so they should be skipped
2069    if include.startswith('moc_') and include.endswith('.cpp'):
2070        return _MOC_HEADER
2071
2072    if include.endswith('.moc'):
2073        return _MOC_HEADER
2074
2075    # If the target file basename starts with the include we're checking
2076    # then we consider it the primary header.
2077    target_base = FileInfo(filename).base_name()
2078    include_base = FileInfo(include).base_name()
2079
2080    # If we haven't encountered a primary header, then be lenient in checking.
2081    if not include_state.visited_primary_section() and target_base.find(include_base) != -1:
2082        return _PRIMARY_HEADER
2083    # If we already encountered a primary header, perform a strict comparison.
2084    # In case the two filename bases are the same then the above lenient check
2085    # probably was a false positive.
2086    elif include_state.visited_primary_section() and target_base == include_base:
2087        if include == "ResourceHandleWin.h":
2088            # FIXME: Thus far, we've only seen one example of these, but if we
2089            # start to see more, please consider generalizing this check
2090            # somehow.
2091            return _OTHER_HEADER
2092        return _PRIMARY_HEADER
2093
2094    return _OTHER_HEADER
2095
2096
2097def check_include_line(filename, file_extension, clean_lines, line_number, include_state, error):
2098    """Check rules that are applicable to #include lines.
2099
2100    Strings on #include lines are NOT removed from elided line, to make
2101    certain tasks easier. However, to prevent false positives, checks
2102    applicable to #include lines in CheckLanguage must be put here.
2103
2104    Args:
2105      filename: The name of the current file.
2106      file_extension: The current file extension, without the leading dot.
2107      clean_lines: A CleansedLines instance containing the file.
2108      line_number: The number of the line to check.
2109      include_state: An _IncludeState instance in which the headers are inserted.
2110      error: The function to call with any errors found.
2111    """
2112    # FIXME: For readability or as a possible optimization, consider
2113    #        exiting early here by checking whether the "build/include"
2114    #        category should be checked for the given filename.  This
2115    #        may involve having the error handler classes expose a
2116    #        should_check() method, in addition to the usual __call__
2117    #        method.
2118    line = clean_lines.lines[line_number]
2119
2120    matched = _RE_PATTERN_INCLUDE.search(line)
2121    if not matched:
2122        return
2123
2124    include = matched.group(2)
2125    is_system = (matched.group(1) == '<')
2126
2127    # Look for any of the stream classes that are part of standard C++.
2128    if match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
2129        error(line_number, 'readability/streams', 3,
2130              'Streams are highly discouraged.')
2131
2132    # Look for specific includes to fix.
2133    if include.startswith('wtf/') and not is_system:
2134        error(line_number, 'build/include', 4,
2135              'wtf includes should be <wtf/file.h> instead of "wtf/file.h".')
2136
2137    duplicate_header = include in include_state
2138    if duplicate_header:
2139        error(line_number, 'build/include', 4,
2140              '"%s" already included at %s:%s' %
2141              (include, filename, include_state[include]))
2142    else:
2143        include_state[include] = line_number
2144
2145    header_type = _classify_include(filename, include, is_system, include_state)
2146    include_state.header_types[line_number] = header_type
2147
2148    # Only proceed if this isn't a duplicate header.
2149    if duplicate_header:
2150        return
2151
2152    # We want to ensure that headers appear in the right order:
2153    # 1) for implementation files: config.h, primary header, blank line, alphabetically sorted
2154    # 2) for header files: alphabetically sorted
2155    # The include_state object keeps track of the last type seen
2156    # and complains if the header types are out of order or missing.
2157    error_message = include_state.check_next_include_order(header_type, file_extension == "h")
2158
2159    # Check to make sure we have a blank line after primary header.
2160    if not error_message and header_type == _PRIMARY_HEADER:
2161         next_line = clean_lines.raw_lines[line_number + 1]
2162         if not is_blank_line(next_line):
2163            error(line_number, 'build/include_order', 4,
2164                  'You should add a blank line after implementation file\'s own header.')
2165
2166    # Check to make sure all headers besides config.h and the primary header are
2167    # alphabetically sorted. Skip Qt's moc files.
2168    if not error_message and header_type == _OTHER_HEADER:
2169         previous_line_number = line_number - 1;
2170         previous_line = clean_lines.lines[previous_line_number]
2171         previous_match = _RE_PATTERN_INCLUDE.search(previous_line)
2172         while (not previous_match and previous_line_number > 0
2173                and not search(r'\A(#if|#ifdef|#ifndef|#else|#elif|#endif)', previous_line)):
2174            previous_line_number -= 1;
2175            previous_line = clean_lines.lines[previous_line_number]
2176            previous_match = _RE_PATTERN_INCLUDE.search(previous_line)
2177         if previous_match:
2178            previous_header_type = include_state.header_types[previous_line_number]
2179            if previous_header_type == _OTHER_HEADER and previous_line.strip() > line.strip():
2180                error(line_number, 'build/include_order', 4,
2181                      'Alphabetical sorting problem.')
2182
2183    if error_message:
2184        if file_extension == 'h':
2185            error(line_number, 'build/include_order', 4,
2186                  '%s Should be: alphabetically sorted.' %
2187                  error_message)
2188        else:
2189            error(line_number, 'build/include_order', 4,
2190                  '%s Should be: config.h, primary header, blank line, and then alphabetically sorted.' %
2191                  error_message)
2192
2193
2194def check_language(filename, clean_lines, line_number, file_extension, include_state,
2195                   error):
2196    """Checks rules from the 'C++ language rules' section of cppguide.html.
2197
2198    Some of these rules are hard to test (function overloading, using
2199    uint32 inappropriately), but we do the best we can.
2200
2201    Args:
2202      filename: The name of the current file.
2203      clean_lines: A CleansedLines instance containing the file.
2204      line_number: The number of the line to check.
2205      file_extension: The extension (without the dot) of the filename.
2206      include_state: An _IncludeState instance in which the headers are inserted.
2207      error: The function to call with any errors found.
2208    """
2209    # If the line is empty or consists of entirely a comment, no need to
2210    # check it.
2211    line = clean_lines.elided[line_number]
2212    if not line:
2213        return
2214
2215    matched = _RE_PATTERN_INCLUDE.search(line)
2216    if matched:
2217        check_include_line(filename, file_extension, clean_lines, line_number, include_state, error)
2218        return
2219
2220    # FIXME: figure out if they're using default arguments in fn proto.
2221
2222    # Check to see if they're using an conversion function cast.
2223    # I just try to capture the most common basic types, though there are more.
2224    # Parameterless conversion functions, such as bool(), are allowed as they are
2225    # probably a member operator declaration or default constructor.
2226    matched = search(
2227        r'\b(int|float|double|bool|char|int32|uint32|int64|uint64)\([^)]', line)
2228    if matched:
2229        # gMock methods are defined using some variant of MOCK_METHODx(name, type)
2230        # where type may be float(), int(string), etc.  Without context they are
2231        # virtually indistinguishable from int(x) casts.
2232        if not match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line):
2233            error(line_number, 'readability/casting', 4,
2234                  'Using deprecated casting style.  '
2235                  'Use static_cast<%s>(...) instead' %
2236                  matched.group(1))
2237
2238    check_c_style_cast(line_number, line, clean_lines.raw_lines[line_number],
2239                       'static_cast',
2240                       r'\((int|float|double|bool|char|u?int(16|32|64))\)',
2241                       error)
2242    # This doesn't catch all cases.  Consider (const char * const)"hello".
2243    check_c_style_cast(line_number, line, clean_lines.raw_lines[line_number],
2244                       'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
2245
2246    # In addition, we look for people taking the address of a cast.  This
2247    # is dangerous -- casts can assign to temporaries, so the pointer doesn't
2248    # point where you think.
2249    if search(
2250        r'(&\([^)]+\)[\w(])|(&(static|dynamic|reinterpret)_cast\b)', line):
2251        error(line_number, 'runtime/casting', 4,
2252              ('Are you taking an address of a cast?  '
2253               'This is dangerous: could be a temp var.  '
2254               'Take the address before doing the cast, rather than after'))
2255
2256    # Check for people declaring static/global STL strings at the top level.
2257    # This is dangerous because the C++ language does not guarantee that
2258    # globals with constructors are initialized before the first access.
2259    matched = match(
2260        r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
2261        line)
2262    # Make sure it's not a function.
2263    # Function template specialization looks like: "string foo<Type>(...".
2264    # Class template definitions look like: "string Foo<Type>::Method(...".
2265    if matched and not match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)',
2266                             matched.group(3)):
2267        error(line_number, 'runtime/string', 4,
2268              'For a static/global string constant, use a C style string instead: '
2269              '"%schar %s[]".' %
2270              (matched.group(1), matched.group(2)))
2271
2272    # Check that we're not using RTTI outside of testing code.
2273    if search(r'\bdynamic_cast<', line):
2274        error(line_number, 'runtime/rtti', 5,
2275              'Do not use dynamic_cast<>.  If you need to cast within a class '
2276              "hierarchy, use static_cast<> to upcast.  Google doesn't support "
2277              'RTTI.')
2278
2279    if search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
2280        error(line_number, 'runtime/init', 4,
2281              'You seem to be initializing a member variable with itself.')
2282
2283    if file_extension == 'h':
2284        # FIXME: check that 1-arg constructors are explicit.
2285        #        How to tell it's a constructor?
2286        #        (handled in check_for_non_standard_constructs for now)
2287        pass
2288
2289    # Check if people are using the verboten C basic types.  The only exception
2290    # we regularly allow is "unsigned short port" for port.
2291    if search(r'\bshort port\b', line):
2292        if not search(r'\bunsigned short port\b', line):
2293            error(line_number, 'runtime/int', 4,
2294                  'Use "unsigned short" for ports, not "short"')
2295
2296    # When snprintf is used, the second argument shouldn't be a literal.
2297    matched = search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
2298    if matched:
2299        error(line_number, 'runtime/printf', 3,
2300              'If you can, use sizeof(%s) instead of %s as the 2nd arg '
2301              'to snprintf.' % (matched.group(1), matched.group(2)))
2302
2303    # Check if some verboten C functions are being used.
2304    if search(r'\bsprintf\b', line):
2305        error(line_number, 'runtime/printf', 5,
2306              'Never use sprintf.  Use snprintf instead.')
2307    matched = search(r'\b(strcpy|strcat)\b', line)
2308    if matched:
2309        error(line_number, 'runtime/printf', 4,
2310              'Almost always, snprintf is better than %s' % matched.group(1))
2311
2312    if search(r'\bsscanf\b', line):
2313        error(line_number, 'runtime/printf', 1,
2314              'sscanf can be ok, but is slow and can overflow buffers.')
2315
2316    # Check for suspicious usage of "if" like
2317    # } if (a == b) {
2318    if search(r'\}\s*if\s*\(', line):
2319        error(line_number, 'readability/braces', 4,
2320              'Did you mean "else if"? If not, start a new line for "if".')
2321
2322    # Check for potential format string bugs like printf(foo).
2323    # We constrain the pattern not to pick things like DocidForPrintf(foo).
2324    # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
2325    matched = re.search(r'\b((?:string)?printf)\s*\(([\w.\->()]+)\)', line, re.I)
2326    if matched:
2327        error(line_number, 'runtime/printf', 4,
2328              'Potential format string bug. Do %s("%%s", %s) instead.'
2329              % (matched.group(1), matched.group(2)))
2330
2331    # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
2332    matched = search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
2333    if matched and not match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", matched.group(2)):
2334        error(line_number, 'runtime/memset', 4,
2335              'Did you mean "memset(%s, 0, %s)"?'
2336              % (matched.group(1), matched.group(2)))
2337
2338    # Detect variable-length arrays.
2339    matched = match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
2340    if (matched and matched.group(2) != 'return' and matched.group(2) != 'delete' and
2341        matched.group(3).find(']') == -1):
2342        # Split the size using space and arithmetic operators as delimiters.
2343        # If any of the resulting tokens are not compile time constants then
2344        # report the error.
2345        tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', matched.group(3))
2346        is_const = True
2347        skip_next = False
2348        for tok in tokens:
2349            if skip_next:
2350                skip_next = False
2351                continue
2352
2353            if search(r'sizeof\(.+\)', tok):
2354                continue
2355            if search(r'arraysize\(\w+\)', tok):
2356                continue
2357
2358            tok = tok.lstrip('(')
2359            tok = tok.rstrip(')')
2360            if not tok:
2361                continue
2362            if match(r'\d+', tok):
2363                continue
2364            if match(r'0[xX][0-9a-fA-F]+', tok):
2365                continue
2366            if match(r'k[A-Z0-9]\w*', tok):
2367                continue
2368            if match(r'(.+::)?k[A-Z0-9]\w*', tok):
2369                continue
2370            if match(r'(.+::)?[A-Z][A-Z0-9_]*', tok):
2371                continue
2372            # A catch all for tricky sizeof cases, including 'sizeof expression',
2373            # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
2374            # requires skipping the next token becasue we split on ' ' and '*'.
2375            if tok.startswith('sizeof'):
2376                skip_next = True
2377                continue
2378            is_const = False
2379            break
2380        if not is_const:
2381            error(line_number, 'runtime/arrays', 1,
2382                  'Do not use variable-length arrays.  Use an appropriately named '
2383                  "('k' followed by CamelCase) compile-time constant for the size.")
2384
2385    # Check for use of unnamed namespaces in header files.  Registration
2386    # macros are typically OK, so we allow use of "namespace {" on lines
2387    # that end with backslashes.
2388    if (file_extension == 'h'
2389        and search(r'\bnamespace\s*{', line)
2390        and line[-1] != '\\'):
2391        error(line_number, 'build/namespaces', 4,
2392              'Do not use unnamed namespaces in header files.  See '
2393              'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
2394              ' for more information.')
2395
2396    check_identifier_name_in_declaration(filename, line_number, line, error)
2397
2398
2399def check_identifier_name_in_declaration(filename, line_number, line, error):
2400    """Checks if identifier names contain any underscores.
2401
2402    As identifiers in libraries we are using have a bunch of
2403    underscores, we only warn about the declarations of identifiers
2404    and don't check use of identifiers.
2405
2406    Args:
2407      filename: The name of the current file.
2408      line_number: The number of the line to check.
2409      line: The line of code to check.
2410      error: The function to call with any errors found.
2411    """
2412    # We don't check a return statement.
2413    if match(r'\s*(return|delete)\b', line):
2414        return
2415
2416    # Basically, a declaration is a type name followed by whitespaces
2417    # followed by an identifier. The type name can be complicated
2418    # due to type adjectives and templates. We remove them first to
2419    # simplify the process to find declarations of identifiers.
2420
2421    # Convert "long long", "long double", and "long long int" to
2422    # simple types, but don't remove simple "long".
2423    line = sub(r'long (long )?(?=long|double|int)', '', line)
2424    line = sub(r'\b(unsigned|signed|inline|using|static|const|volatile|auto|register|extern|typedef|restrict|struct|class|virtual)(?=\W)', '', line)
2425
2426    # Remove all template parameters by removing matching < and >.
2427    # Loop until no templates are removed to remove nested templates.
2428    while True:
2429        line, number_of_replacements = subn(r'<([\w\s:]|::)+\s*[*&]*\s*>', '', line)
2430        if not number_of_replacements:
2431            break
2432
2433    # Declarations of local variables can be in condition expressions
2434    # of control flow statements (e.g., "if (RenderObject* p = o->parent())").
2435    # We remove the keywords and the first parenthesis.
2436    #
2437    # Declarations in "while", "if", and "switch" are different from
2438    # other declarations in two aspects:
2439    #
2440    # - There can be only one declaration between the parentheses.
2441    #   (i.e., you cannot write "if (int i = 0, j = 1) {}")
2442    # - The variable must be initialized.
2443    #   (i.e., you cannot write "if (int i) {}")
2444    #
2445    # and we will need different treatments for them.
2446    line = sub(r'^\s*for\s*\(', '', line)
2447    line, control_statement = subn(r'^\s*(while|else if|if|switch)\s*\(', '', line)
2448
2449    # Detect variable and functions.
2450    type_regexp = r'\w([\w]|\s*[*&]\s*|::)+'
2451    identifier_regexp = r'(?P<identifier>[\w:]+)'
2452    character_after_identifier_regexp = r'(?P<character_after_identifier>[[;()=,])(?!=)'
2453    declaration_without_type_regexp = r'\s*' + identifier_regexp + r'\s*' + character_after_identifier_regexp
2454    declaration_with_type_regexp = r'\s*' + type_regexp + r'\s' + declaration_without_type_regexp
2455    is_function_arguments = False
2456    number_of_identifiers = 0
2457    while True:
2458        # If we are seeing the first identifier or arguments of a
2459        # function, there should be a type name before an identifier.
2460        if not number_of_identifiers or is_function_arguments:
2461            declaration_regexp = declaration_with_type_regexp
2462        else:
2463            declaration_regexp = declaration_without_type_regexp
2464
2465        matched = match(declaration_regexp, line)
2466        if not matched:
2467            return
2468        identifier = matched.group('identifier')
2469        character_after_identifier = matched.group('character_after_identifier')
2470
2471        # If we removed a non-for-control statement, the character after
2472        # the identifier should be '='. With this rule, we can avoid
2473        # warning for cases like "if (val & INT_MAX) {".
2474        if control_statement and character_after_identifier != '=':
2475            return
2476
2477        is_function_arguments = is_function_arguments or character_after_identifier == '('
2478
2479        # Remove "m_" and "s_" to allow them.
2480        modified_identifier = sub(r'(^|(?<=::))[ms]_', '', identifier)
2481        if modified_identifier.find('_') >= 0:
2482            # Various exceptions to the rule: JavaScript op codes functions, const_iterator.
2483            if (not (filename.find('JavaScriptCore') >= 0 and modified_identifier.find('_op_') >= 0)
2484                and not modified_identifier.startswith('tst_')
2485                and not modified_identifier.startswith('webkit_dom_object_')
2486                and not modified_identifier.startswith('qt_')
2487                and not modified_identifier.find('::qt_') >= 0
2488                and not modified_identifier == "const_iterator"):
2489                error(line_number, 'readability/naming', 4, identifier + " is incorrectly named. Don't use underscores in your identifier names.")
2490
2491        # There can be only one declaration in non-for-control statements.
2492        if control_statement:
2493            return
2494        # We should continue checking if this is a function
2495        # declaration because we need to check its arguments.
2496        # Also, we need to check multiple declarations.
2497        if character_after_identifier != '(' and character_after_identifier != ',':
2498            return
2499
2500        number_of_identifiers += 1
2501        line = line[matched.end():]
2502
2503
2504def check_c_style_cast(line_number, line, raw_line, cast_type, pattern,
2505                       error):
2506    """Checks for a C-style cast by looking for the pattern.
2507
2508    This also handles sizeof(type) warnings, due to similarity of content.
2509
2510    Args:
2511      line_number: The number of the line to check.
2512      line: The line of code to check.
2513      raw_line: The raw line of code to check, with comments.
2514      cast_type: The string for the C++ cast to recommend.  This is either
2515                 reinterpret_cast or static_cast, depending.
2516      pattern: The regular expression used to find C-style casts.
2517      error: The function to call with any errors found.
2518    """
2519    matched = search(pattern, line)
2520    if not matched:
2521        return
2522
2523    # e.g., sizeof(int)
2524    sizeof_match = match(r'.*sizeof\s*$', line[0:matched.start(1) - 1])
2525    if sizeof_match:
2526        error(line_number, 'runtime/sizeof', 1,
2527              'Using sizeof(type).  Use sizeof(varname) instead if possible')
2528        return
2529
2530    remainder = line[matched.end(0):]
2531
2532    # The close paren is for function pointers as arguments to a function.
2533    # eg, void foo(void (*bar)(int));
2534    # The semicolon check is a more basic function check; also possibly a
2535    # function pointer typedef.
2536    # eg, void foo(int); or void foo(int) const;
2537    # The equals check is for function pointer assignment.
2538    # eg, void *(*foo)(int) = ...
2539    #
2540    # Right now, this will only catch cases where there's a single argument, and
2541    # it's unnamed.  It should probably be expanded to check for multiple
2542    # arguments with some unnamed.
2543    function_match = match(r'\s*(\)|=|(const)?\s*(;|\{|throw\(\)))', remainder)
2544    if function_match:
2545        if (not function_match.group(3)
2546            or function_match.group(3) == ';'
2547            or raw_line.find('/*') < 0):
2548            error(line_number, 'readability/function', 3,
2549                  'All parameters should be named in a function')
2550        return
2551
2552    # At this point, all that should be left is actual casts.
2553    error(line_number, 'readability/casting', 4,
2554          'Using C-style cast.  Use %s<%s>(...) instead' %
2555          (cast_type, matched.group(1)))
2556
2557
2558_HEADERS_CONTAINING_TEMPLATES = (
2559    ('<deque>', ('deque',)),
2560    ('<functional>', ('unary_function', 'binary_function',
2561                      'plus', 'minus', 'multiplies', 'divides', 'modulus',
2562                      'negate',
2563                      'equal_to', 'not_equal_to', 'greater', 'less',
2564                      'greater_equal', 'less_equal',
2565                      'logical_and', 'logical_or', 'logical_not',
2566                      'unary_negate', 'not1', 'binary_negate', 'not2',
2567                      'bind1st', 'bind2nd',
2568                      'pointer_to_unary_function',
2569                      'pointer_to_binary_function',
2570                      'ptr_fun',
2571                      'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
2572                      'mem_fun_ref_t',
2573                      'const_mem_fun_t', 'const_mem_fun1_t',
2574                      'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
2575                      'mem_fun_ref',
2576                     )),
2577    ('<limits>', ('numeric_limits',)),
2578    ('<list>', ('list',)),
2579    ('<map>', ('map', 'multimap',)),
2580    ('<memory>', ('allocator',)),
2581    ('<queue>', ('queue', 'priority_queue',)),
2582    ('<set>', ('set', 'multiset',)),
2583    ('<stack>', ('stack',)),
2584    ('<string>', ('char_traits', 'basic_string',)),
2585    ('<utility>', ('pair',)),
2586    ('<vector>', ('vector',)),
2587
2588    # gcc extensions.
2589    # Note: std::hash is their hash, ::hash is our hash
2590    ('<hash_map>', ('hash_map', 'hash_multimap',)),
2591    ('<hash_set>', ('hash_set', 'hash_multiset',)),
2592    ('<slist>', ('slist',)),
2593    )
2594
2595_HEADERS_ACCEPTED_BUT_NOT_PROMOTED = {
2596    # We can trust with reasonable confidence that map gives us pair<>, too.
2597    'pair<>': ('map', 'multimap', 'hash_map', 'hash_multimap')
2598}
2599
2600_RE_PATTERN_STRING = re.compile(r'\bstring\b')
2601
2602_re_pattern_algorithm_header = []
2603for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
2604                  'transform'):
2605    # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
2606    # type::max().
2607    _re_pattern_algorithm_header.append(
2608        (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
2609         _template,
2610         '<algorithm>'))
2611
2612_re_pattern_templates = []
2613for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
2614    for _template in _templates:
2615        _re_pattern_templates.append(
2616            (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
2617             _template + '<>',
2618             _header))
2619
2620
2621def files_belong_to_same_module(filename_cpp, filename_h):
2622    """Check if these two filenames belong to the same module.
2623
2624    The concept of a 'module' here is a as follows:
2625    foo.h, foo-inl.h, foo.cpp, foo_test.cpp and foo_unittest.cpp belong to the
2626    same 'module' if they are in the same directory.
2627    some/path/public/xyzzy and some/path/internal/xyzzy are also considered
2628    to belong to the same module here.
2629
2630    If the filename_cpp contains a longer path than the filename_h, for example,
2631    '/absolute/path/to/base/sysinfo.cpp', and this file would include
2632    'base/sysinfo.h', this function also produces the prefix needed to open the
2633    header. This is used by the caller of this function to more robustly open the
2634    header file. We don't have access to the real include paths in this context,
2635    so we need this guesswork here.
2636
2637    Known bugs: tools/base/bar.cpp and base/bar.h belong to the same module
2638    according to this implementation. Because of this, this function gives
2639    some false positives. This should be sufficiently rare in practice.
2640
2641    Args:
2642      filename_cpp: is the path for the .cpp file
2643      filename_h: is the path for the header path
2644
2645    Returns:
2646      Tuple with a bool and a string:
2647      bool: True if filename_cpp and filename_h belong to the same module.
2648      string: the additional prefix needed to open the header file.
2649    """
2650
2651    if not filename_cpp.endswith('.cpp'):
2652        return (False, '')
2653    filename_cpp = filename_cpp[:-len('.cpp')]
2654    if filename_cpp.endswith('_unittest'):
2655        filename_cpp = filename_cpp[:-len('_unittest')]
2656    elif filename_cpp.endswith('_test'):
2657        filename_cpp = filename_cpp[:-len('_test')]
2658    filename_cpp = filename_cpp.replace('/public/', '/')
2659    filename_cpp = filename_cpp.replace('/internal/', '/')
2660
2661    if not filename_h.endswith('.h'):
2662        return (False, '')
2663    filename_h = filename_h[:-len('.h')]
2664    if filename_h.endswith('-inl'):
2665        filename_h = filename_h[:-len('-inl')]
2666    filename_h = filename_h.replace('/public/', '/')
2667    filename_h = filename_h.replace('/internal/', '/')
2668
2669    files_belong_to_same_module = filename_cpp.endswith(filename_h)
2670    common_path = ''
2671    if files_belong_to_same_module:
2672        common_path = filename_cpp[:-len(filename_h)]
2673    return files_belong_to_same_module, common_path
2674
2675
2676def update_include_state(filename, include_state, io=codecs):
2677    """Fill up the include_state with new includes found from the file.
2678
2679    Args:
2680      filename: the name of the header to read.
2681      include_state: an _IncludeState instance in which the headers are inserted.
2682      io: The io factory to use to read the file. Provided for testability.
2683
2684    Returns:
2685      True if a header was succesfully added. False otherwise.
2686    """
2687    header_file = None
2688    try:
2689        header_file = io.open(filename, 'r', 'utf8', 'replace')
2690    except IOError:
2691        return False
2692    line_number = 0
2693    for line in header_file:
2694        line_number += 1
2695        clean_line = cleanse_comments(line)
2696        matched = _RE_PATTERN_INCLUDE.search(clean_line)
2697        if matched:
2698            include = matched.group(2)
2699            # The value formatting is cute, but not really used right now.
2700            # What matters here is that the key is in include_state.
2701            include_state.setdefault(include, '%s:%d' % (filename, line_number))
2702    return True
2703
2704
2705def check_for_include_what_you_use(filename, clean_lines, include_state, error,
2706                                   io=codecs):
2707    """Reports for missing stl includes.
2708
2709    This function will output warnings to make sure you are including the headers
2710    necessary for the stl containers and functions that you use. We only give one
2711    reason to include a header. For example, if you use both equal_to<> and
2712    less<> in a .h file, only one (the latter in the file) of these will be
2713    reported as a reason to include the <functional>.
2714
2715    Args:
2716      filename: The name of the current file.
2717      clean_lines: A CleansedLines instance containing the file.
2718      include_state: An _IncludeState instance.
2719      error: The function to call with any errors found.
2720      io: The IO factory to use to read the header file. Provided for unittest
2721          injection.
2722    """
2723    required = {}  # A map of header name to line_number and the template entity.
2724        # Example of required: { '<functional>': (1219, 'less<>') }
2725
2726    for line_number in xrange(clean_lines.num_lines()):
2727        line = clean_lines.elided[line_number]
2728        if not line or line[0] == '#':
2729            continue
2730
2731        # String is special -- it is a non-templatized type in STL.
2732        if _RE_PATTERN_STRING.search(line):
2733            required['<string>'] = (line_number, 'string')
2734
2735        for pattern, template, header in _re_pattern_algorithm_header:
2736            if pattern.search(line):
2737                required[header] = (line_number, template)
2738
2739        # The following function is just a speed up, no semantics are changed.
2740        if not '<' in line:  # Reduces the cpu time usage by skipping lines.
2741            continue
2742
2743        for pattern, template, header in _re_pattern_templates:
2744            if pattern.search(line):
2745                required[header] = (line_number, template)
2746
2747    # The policy is that if you #include something in foo.h you don't need to
2748    # include it again in foo.cpp. Here, we will look at possible includes.
2749    # Let's copy the include_state so it is only messed up within this function.
2750    include_state = include_state.copy()
2751
2752    # Did we find the header for this file (if any) and succesfully load it?
2753    header_found = False
2754
2755    # Use the absolute path so that matching works properly.
2756    abs_filename = os.path.abspath(filename)
2757
2758    # For Emacs's flymake.
2759    # If cpp_style is invoked from Emacs's flymake, a temporary file is generated
2760    # by flymake and that file name might end with '_flymake.cpp'. In that case,
2761    # restore original file name here so that the corresponding header file can be
2762    # found.
2763    # e.g. If the file name is 'foo_flymake.cpp', we should search for 'foo.h'
2764    # instead of 'foo_flymake.h'
2765    emacs_flymake_suffix = '_flymake.cpp'
2766    if abs_filename.endswith(emacs_flymake_suffix):
2767        abs_filename = abs_filename[:-len(emacs_flymake_suffix)] + '.cpp'
2768
2769    # include_state is modified during iteration, so we iterate over a copy of
2770    # the keys.
2771    for header in include_state.keys():  #NOLINT
2772        (same_module, common_path) = files_belong_to_same_module(abs_filename, header)
2773        fullpath = common_path + header
2774        if same_module and update_include_state(fullpath, include_state, io):
2775            header_found = True
2776
2777    # If we can't find the header file for a .cpp, assume it's because we don't
2778    # know where to look. In that case we'll give up as we're not sure they
2779    # didn't include it in the .h file.
2780    # FIXME: Do a better job of finding .h files so we are confident that
2781    #        not having the .h file means there isn't one.
2782    if filename.endswith('.cpp') and not header_found:
2783        return
2784
2785    # All the lines have been processed, report the errors found.
2786    for required_header_unstripped in required:
2787        template = required[required_header_unstripped][1]
2788        if template in _HEADERS_ACCEPTED_BUT_NOT_PROMOTED:
2789            headers = _HEADERS_ACCEPTED_BUT_NOT_PROMOTED[template]
2790            if [True for header in headers if header in include_state]:
2791                continue
2792        if required_header_unstripped.strip('<>"') not in include_state:
2793            error(required[required_header_unstripped][0],
2794                  'build/include_what_you_use', 4,
2795                  'Add #include ' + required_header_unstripped + ' for ' + template)
2796
2797
2798def process_line(filename, file_extension,
2799                 clean_lines, line, include_state, function_state,
2800                 class_state, file_state, error):
2801    """Processes a single line in the file.
2802
2803    Args:
2804      filename: Filename of the file that is being processed.
2805      file_extension: The extension (dot not included) of the file.
2806      clean_lines: An array of strings, each representing a line of the file,
2807                   with comments stripped.
2808      line: Number of line being processed.
2809      include_state: An _IncludeState instance in which the headers are inserted.
2810      function_state: A _FunctionState instance which counts function lines, etc.
2811      class_state: A _ClassState instance which maintains information about
2812                   the current stack of nested class declarations being parsed.
2813      file_state: A _FileState instance which maintains information about
2814                  the state of things in the file.
2815      error: A callable to which errors are reported, which takes arguments:
2816             line number, error level, and message
2817
2818    """
2819    raw_lines = clean_lines.raw_lines
2820    check_for_function_lengths(clean_lines, line, function_state, error)
2821    if search(r'\bNOLINT\b', raw_lines[line]):  # ignore nolint lines
2822        return
2823    check_for_multiline_comments_and_strings(clean_lines, line, error)
2824    check_style(clean_lines, line, file_extension, file_state, error)
2825    check_language(filename, clean_lines, line, file_extension, include_state,
2826                   error)
2827    check_for_non_standard_constructs(clean_lines, line, class_state, error)
2828    check_posix_threading(clean_lines, line, error)
2829    check_invalid_increment(clean_lines, line, error)
2830
2831
2832def _process_lines(filename, file_extension, lines, error, verbosity):
2833    """Performs lint checks and reports any errors to the given error function.
2834
2835    Args:
2836      filename: Filename of the file that is being processed.
2837      file_extension: The extension (dot not included) of the file.
2838      lines: An array of strings, each representing a line of the file, with the
2839             last element being empty if the file is termined with a newline.
2840      error: A callable to which errors are reported, which takes 4 arguments:
2841    """
2842    lines = (['// marker so line numbers and indices both start at 1'] + lines +
2843             ['// marker so line numbers end in a known way'])
2844
2845    include_state = _IncludeState()
2846    function_state = _FunctionState(verbosity)
2847    class_state = _ClassState()
2848    file_state = _FileState()
2849
2850    check_for_copyright(lines, error)
2851
2852    if file_extension == 'h':
2853        check_for_header_guard(filename, lines, error)
2854
2855    remove_multi_line_comments(lines, error)
2856    clean_lines = CleansedLines(lines)
2857    for line in xrange(clean_lines.num_lines()):
2858        process_line(filename, file_extension, clean_lines, line,
2859                     include_state, function_state, class_state, file_state, error)
2860    class_state.check_finished(error)
2861
2862    check_for_include_what_you_use(filename, clean_lines, include_state, error)
2863
2864    # We check here rather than inside process_line so that we see raw
2865    # lines rather than "cleaned" lines.
2866    check_for_unicode_replacement_characters(lines, error)
2867
2868    check_for_new_line_at_eof(lines, error)
2869
2870
2871class CppProcessor(object):
2872
2873    """Processes C++ lines for checking style."""
2874
2875    # This list is used to--
2876    #
2877    # (1) generate an explicit list of all possible categories,
2878    # (2) unit test that all checked categories have valid names, and
2879    # (3) unit test that all categories are getting unit tested.
2880    #
2881    categories = set([
2882        'build/class',
2883        'build/deprecated',
2884        'build/endif_comment',
2885        'build/forward_decl',
2886        'build/header_guard',
2887        'build/include',
2888        'build/include_order',
2889        'build/include_what_you_use',
2890        'build/namespaces',
2891        'build/printf_format',
2892        'build/storage_class',
2893        'build/using_std',
2894        'legal/copyright',
2895        'readability/braces',
2896        'readability/casting',
2897        'readability/check',
2898        'readability/comparison_to_zero',
2899        'readability/constructors',
2900        'readability/control_flow',
2901        'readability/fn_size',
2902        'readability/function',
2903        'readability/multiline_comment',
2904        'readability/multiline_string',
2905        'readability/naming',
2906        'readability/null',
2907        'readability/streams',
2908        'readability/todo',
2909        'readability/utf8',
2910        'runtime/arrays',
2911        'runtime/casting',
2912        'runtime/explicit',
2913        'runtime/init',
2914        'runtime/int',
2915        'runtime/invalid_increment',
2916        'runtime/max_min_macros',
2917        'runtime/memset',
2918        'runtime/printf',
2919        'runtime/printf_format',
2920        'runtime/references',
2921        'runtime/rtti',
2922        'runtime/sizeof',
2923        'runtime/string',
2924        'runtime/threadsafe_fn',
2925        'runtime/virtual',
2926        'whitespace/blank_line',
2927        'whitespace/braces',
2928        'whitespace/comma',
2929        'whitespace/comments',
2930        'whitespace/declaration',
2931        'whitespace/end_of_line',
2932        'whitespace/ending_newline',
2933        'whitespace/indent',
2934        'whitespace/labels',
2935        'whitespace/line_length',
2936        'whitespace/newline',
2937        'whitespace/operators',
2938        'whitespace/parens',
2939        'whitespace/semicolon',
2940        'whitespace/tab',
2941        'whitespace/todo',
2942        ])
2943
2944    def __init__(self, file_path, file_extension, handle_style_error, verbosity):
2945        """Create a CppProcessor instance.
2946
2947        Args:
2948          file_extension: A string that is the file extension, without
2949                          the leading dot.
2950
2951        """
2952        self.file_extension = file_extension
2953        self.file_path = file_path
2954        self.handle_style_error = handle_style_error
2955        self.verbosity = verbosity
2956
2957    # Useful for unit testing.
2958    def __eq__(self, other):
2959        """Return whether this CppProcessor instance is equal to another."""
2960        if self.file_extension != other.file_extension:
2961            return False
2962        if self.file_path != other.file_path:
2963            return False
2964        if self.handle_style_error != other.handle_style_error:
2965            return False
2966        if self.verbosity != other.verbosity:
2967            return False
2968
2969        return True
2970
2971    # Useful for unit testing.
2972    def __ne__(self, other):
2973        # Python does not automatically deduce __ne__() from __eq__().
2974        return not self.__eq__(other)
2975
2976    def process(self, lines):
2977        _process_lines(self.file_path, self.file_extension, lines,
2978                       self.handle_style_error, self.verbosity)
2979
2980
2981# FIXME: Remove this function (requires refactoring unit tests).
2982def process_file_data(filename, file_extension, lines, error, verbosity):
2983    processor = CppProcessor(filename, file_extension, error, verbosity)
2984    processor.process(lines)
2985
2986