1#!/usr/bin/python 2# -*- coding: utf-8 -*- 3# 4# Copyright (C) 2009 Google Inc. All rights reserved. 5# Copyright (C) 2009 Torch Mobile Inc. 6# 7# Redistribution and use in source and binary forms, with or without 8# modification, are permitted provided that the following conditions are 9# met: 10# 11# * Redistributions of source code must retain the above copyright 12# notice, this list of conditions and the following disclaimer. 13# * Redistributions in binary form must reproduce the above 14# copyright notice, this list of conditions and the following disclaimer 15# in the documentation and/or other materials provided with the 16# distribution. 17# * Neither the name of Google Inc. nor the names of its 18# contributors may be used to endorse or promote products derived from 19# this software without specific prior written permission. 20# 21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 33# This is the modified version of Google's cpplint. The original code is 34# http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py 35 36"""Does WebKit-lint on c++ files. 37 38The goal of this script is to identify places in the code that *may* 39be in non-compliance with WebKit style. It does not attempt to fix 40up these problems -- the point is to educate. It does also not 41attempt to find all problems, or to ensure that everything it does 42find is legitimately a problem. 43 44In particular, we can get very confused by /* and // inside strings! 45We do a small hack, which is to ignore //'s with "'s after them on the 46same line, but it is far from perfect (in either direction). 47""" 48 49import codecs 50import getopt 51import math # for log 52import os 53import os.path 54import re 55import sre_compile 56import string 57import sys 58import unicodedata 59 60 61_USAGE = """ 62Syntax: %(program_name)s [--verbose=#] [--output=vs7] [--filter=-x,+y,...] 63 <file> [file] ... 64 65 The style guidelines this tries to follow are those in 66 http://webkit.org/coding/coding-style.html 67 68 Every problem is given a confidence score from 1-5, with 5 meaning we are 69 certain of the problem, and 1 meaning it could be a legitimate construct. 70 This will miss some errors, and is not a substitute for a code review. 71 72 To prevent specific lines from being linted, add a '// NOLINT' comment to the 73 end of the line. 74 75 The files passed in will be linted; at least one file must be provided. 76 Linted extensions are .cpp, .c and .h. Other file types will be ignored. 77 78 Flags: 79 80 output=vs7 81 By default, the output is formatted to ease emacs parsing. Visual Studio 82 compatible output (vs7) may also be used. Other formats are unsupported. 83 84 verbose=# 85 Specify a number 0-5 to restrict errors to certain verbosity levels. 86 87 filter=-x,+y,... 88 Specify a comma-separated list of category-filters to apply: only 89 error messages whose category names pass the filters will be printed. 90 (Category names are printed with the message and look like 91 "[whitespace/indent]".) Filters are evaluated left to right. 92 "-FOO" and "FOO" means "do not print categories that start with FOO". 93 "+FOO" means "do print categories that start with FOO". 94 95 Examples: --filter=-whitespace,+whitespace/braces 96 --filter=whitespace,runtime/printf,+runtime/printf_format 97 --filter=-,+build/include_what_you_use 98 99 To see a list of all the categories used in %(program_name)s, pass no arg: 100 --filter= 101""" % {'program_name': sys.argv[0]} 102 103# We categorize each error message we print. Here are the categories. 104# We want an explicit list so we can list them all in cpp_style --filter=. 105# If you add a new error message with a new category, add it to the list 106# here! cpp_style_unittest.py should tell you if you forget to do this. 107# \ used for clearer layout -- pylint: disable-msg=C6013 108_ERROR_CATEGORIES = '''\ 109 build/class 110 build/deprecated 111 build/endif_comment 112 build/forward_decl 113 build/header_guard 114 build/include 115 build/include_order 116 build/include_what_you_use 117 build/namespaces 118 build/printf_format 119 build/storage_class 120 legal/copyright 121 readability/braces 122 readability/casting 123 readability/check 124 readability/comparison_to_zero 125 readability/constructors 126 readability/control_flow 127 readability/fn_size 128 readability/function 129 readability/multiline_comment 130 readability/multiline_string 131 readability/null 132 readability/streams 133 readability/todo 134 readability/utf8 135 runtime/arrays 136 runtime/casting 137 runtime/explicit 138 runtime/int 139 runtime/init 140 runtime/invalid_increment 141 runtime/memset 142 runtime/printf 143 runtime/printf_format 144 runtime/references 145 runtime/rtti 146 runtime/sizeof 147 runtime/string 148 runtime/threadsafe_fn 149 runtime/virtual 150 whitespace/blank_line 151 whitespace/braces 152 whitespace/comma 153 whitespace/comments 154 whitespace/declaration 155 whitespace/end_of_line 156 whitespace/ending_newline 157 whitespace/indent 158 whitespace/labels 159 whitespace/line_length 160 whitespace/newline 161 whitespace/operators 162 whitespace/parens 163 whitespace/semicolon 164 whitespace/tab 165 whitespace/todo 166''' 167 168# The default state of the category filter. This is overrided by the --filter= 169# flag. By default all errors are on, so only add here categories that should be 170# off by default (i.e., categories that must be enabled by the --filter= flags). 171# All entries here should start with a '-' or '+', as in the --filter= flag. 172_DEFAULT_FILTERS = [] 173 174# Headers that we consider STL headers. 175_STL_HEADERS = frozenset([ 176 'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception', 177 'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set', 178 'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'pair.h', 179 'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack', 180 'stl_alloc.h', 'stl_relops.h', 'type_traits.h', 181 'utility', 'vector', 'vector.h', 182 ]) 183 184 185# Non-STL C++ system headers. 186_CPP_HEADERS = frozenset([ 187 'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype', 188 'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath', 189 'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef', 190 'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype', 191 'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream', 192 'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip', 193 'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream.h', 194 'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h', 195 'numeric', 'ostream.h', 'parsestream.h', 'pfstream.h', 'PlotFile.h', 196 'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h', 'ropeimpl.h', 197 'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept', 198 'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string', 199 'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray', 200 ]) 201 202 203# Assertion macros. These are defined in base/logging.h and 204# testing/base/gunit.h. Note that the _M versions need to come first 205# for substring matching to work. 206_CHECK_MACROS = [ 207 'DCHECK', 'CHECK', 208 'EXPECT_TRUE_M', 'EXPECT_TRUE', 209 'ASSERT_TRUE_M', 'ASSERT_TRUE', 210 'EXPECT_FALSE_M', 'EXPECT_FALSE', 211 'ASSERT_FALSE_M', 'ASSERT_FALSE', 212 ] 213 214# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE 215_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS]) 216 217for op, replacement in [('==', 'EQ'), ('!=', 'NE'), 218 ('>=', 'GE'), ('>', 'GT'), 219 ('<=', 'LE'), ('<', 'LT')]: 220 _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement 221 _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement 222 _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement 223 _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement 224 _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement 225 _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement 226 227for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'), 228 ('>=', 'LT'), ('>', 'LE'), 229 ('<=', 'GT'), ('<', 'GE')]: 230 _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement 231 _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement 232 _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement 233 _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement 234 235 236# These constants define types of headers for use with 237# _IncludeState.check_next_include_order(). 238_CONFIG_HEADER = 0 239_PRIMARY_HEADER = 1 240_OTHER_HEADER = 2 241 242 243_regexp_compile_cache = {} 244 245 246def match(pattern, s): 247 """Matches the string with the pattern, caching the compiled regexp.""" 248 # The regexp compilation caching is inlined in both match and search for 249 # performance reasons; factoring it out into a separate function turns out 250 # to be noticeably expensive. 251 if not pattern in _regexp_compile_cache: 252 _regexp_compile_cache[pattern] = sre_compile.compile(pattern) 253 return _regexp_compile_cache[pattern].match(s) 254 255 256def search(pattern, s): 257 """Searches the string for the pattern, caching the compiled regexp.""" 258 if not pattern in _regexp_compile_cache: 259 _regexp_compile_cache[pattern] = sre_compile.compile(pattern) 260 return _regexp_compile_cache[pattern].search(s) 261 262 263class _IncludeState(dict): 264 """Tracks line numbers for includes, and the order in which includes appear. 265 266 As a dict, an _IncludeState object serves as a mapping between include 267 filename and line number on which that file was included. 268 269 Call check_next_include_order() once for each header in the file, passing 270 in the type constants defined above. Calls in an illegal order will 271 raise an _IncludeError with an appropriate error message. 272 273 """ 274 # self._section will move monotonically through this set. If it ever 275 # needs to move backwards, check_next_include_order will raise an error. 276 _INITIAL_SECTION = 0 277 _CONFIG_SECTION = 1 278 _PRIMARY_SECTION = 2 279 _OTHER_SECTION = 3 280 281 _TYPE_NAMES = { 282 _CONFIG_HEADER: 'WebCore config.h', 283 _PRIMARY_HEADER: 'header this file implements', 284 _OTHER_HEADER: 'other header', 285 } 286 _SECTION_NAMES = { 287 _INITIAL_SECTION: "... nothing.", 288 _CONFIG_SECTION: "WebCore config.h.", 289 _PRIMARY_SECTION: 'a header this file implements.', 290 _OTHER_SECTION: 'other header.', 291 } 292 293 def __init__(self): 294 dict.__init__(self) 295 self._section = self._INITIAL_SECTION 296 self._visited_primary_section = False 297 self.header_types = dict(); 298 299 def visited_primary_section(self): 300 return self._visited_primary_section 301 302 def check_next_include_order(self, header_type, file_is_header): 303 """Returns a non-empty error message if the next header is out of order. 304 305 This function also updates the internal state to be ready to check 306 the next include. 307 308 Args: 309 header_type: One of the _XXX_HEADER constants defined above. 310 file_is_header: Whether the file that owns this _IncludeState is itself a header 311 312 Returns: 313 The empty string if the header is in the right order, or an 314 error message describing what's wrong. 315 316 """ 317 if header_type == _CONFIG_HEADER and file_is_header: 318 return 'Header file should not contain WebCore config.h.' 319 if header_type == _PRIMARY_HEADER and file_is_header: 320 return 'Header file should not contain itself.' 321 322 error_message = '' 323 if self._section != self._OTHER_SECTION: 324 before_error_message = ('Found %s before %s' % 325 (self._TYPE_NAMES[header_type], 326 self._SECTION_NAMES[self._section + 1])) 327 after_error_message = ('Found %s after %s' % 328 (self._TYPE_NAMES[header_type], 329 self._SECTION_NAMES[self._section])) 330 331 if header_type == _CONFIG_HEADER: 332 if self._section >= self._CONFIG_SECTION: 333 error_message = after_error_message 334 self._section = self._CONFIG_SECTION 335 elif header_type == _PRIMARY_HEADER: 336 if self._section >= self._PRIMARY_SECTION: 337 error_message = after_error_message 338 elif self._section < self._CONFIG_SECTION: 339 error_message = before_error_message 340 self._section = self._PRIMARY_SECTION 341 self._visited_primary_section = True 342 else: 343 assert header_type == _OTHER_HEADER 344 if not file_is_header and self._section < self._PRIMARY_SECTION: 345 error_message = before_error_message 346 self._section = self._OTHER_SECTION 347 348 return error_message 349 350 351class _CppStyleState(object): 352 """Maintains module-wide state..""" 353 354 def __init__(self): 355 self.verbose_level = 1 # global setting. 356 self.error_count = 0 # global count of reported errors 357 # filters to apply when emitting error messages 358 self.filters = _DEFAULT_FILTERS[:] 359 360 # output format: 361 # "emacs" - format that emacs can parse (default) 362 # "vs7" - format that Microsoft Visual Studio 7 can parse 363 self.output_format = 'emacs' 364 365 def set_output_format(self, output_format): 366 """Sets the output format for errors.""" 367 self.output_format = output_format 368 369 def set_verbose_level(self, level): 370 """Sets the module's verbosity, and returns the previous setting.""" 371 last_verbose_level = self.verbose_level 372 self.verbose_level = level 373 return last_verbose_level 374 375 def set_filters(self, filters): 376 """Sets the error-message filters. 377 378 These filters are applied when deciding whether to emit a given 379 error message. 380 381 Args: 382 filters: A string of comma-separated filters (eg "+whitespace/indent"). 383 Each filter should start with + or -; else we die. 384 385 Raises: 386 ValueError: The comma-separated filters did not all start with '+' or '-'. 387 E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter" 388 """ 389 # Default filters always have less priority than the flag ones. 390 self.filters = _DEFAULT_FILTERS[:] 391 for filter in filters.split(','): 392 clean_filter = filter.strip() 393 if clean_filter: 394 self.filters.append(clean_filter) 395 for filter in self.filters: 396 if not (filter.startswith('+') or filter.startswith('-')): 397 raise ValueError('Every filter in --filter must start with ' 398 '+ or - (%s does not)' % filter) 399 400 def reset_error_count(self): 401 """Sets the module's error statistic back to zero.""" 402 self.error_count = 0 403 404 def increment_error_count(self): 405 """Bumps the module's error statistic.""" 406 self.error_count += 1 407 408 409_cpp_style_state = _CppStyleState() 410 411 412def _output_format(): 413 """Gets the module's output format.""" 414 return _cpp_style_state.output_format 415 416 417def _set_output_format(output_format): 418 """Sets the module's output format.""" 419 _cpp_style_state.set_output_format(output_format) 420 421 422def _verbose_level(): 423 """Returns the module's verbosity setting.""" 424 return _cpp_style_state.verbose_level 425 426 427def _set_verbose_level(level): 428 """Sets the module's verbosity, and returns the previous setting.""" 429 return _cpp_style_state.set_verbose_level(level) 430 431 432def _filters(): 433 """Returns the module's list of output filters, as a list.""" 434 return _cpp_style_state.filters 435 436 437def _set_filters(filters): 438 """Sets the module's error-message filters. 439 440 These filters are applied when deciding whether to emit a given 441 error message. 442 443 Args: 444 filters: A string of comma-separated filters (eg "whitespace/indent"). 445 Each filter should start with + or -; else we die. 446 """ 447 _cpp_style_state.set_filters(filters) 448 449 450def error_count(): 451 """Returns the global count of reported errors.""" 452 return _cpp_style_state.error_count 453 454 455class _FunctionState(object): 456 """Tracks current function name and the number of lines in its body.""" 457 458 _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc. 459 _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER. 460 461 def __init__(self): 462 self.in_a_function = False 463 self.lines_in_function = 0 464 self.current_function = '' 465 466 def begin(self, function_name): 467 """Start analyzing function body. 468 469 Args: 470 function_name: The name of the function being tracked. 471 """ 472 self.in_a_function = True 473 self.lines_in_function = 0 474 self.current_function = function_name 475 476 def count(self): 477 """Count line in current function body.""" 478 if self.in_a_function: 479 self.lines_in_function += 1 480 481 def check(self, error, filename, line_number): 482 """Report if too many lines in function body. 483 484 Args: 485 error: The function to call with any errors found. 486 filename: The name of the current file. 487 line_number: The number of the line to check. 488 """ 489 if match(r'T(EST|est)', self.current_function): 490 base_trigger = self._TEST_TRIGGER 491 else: 492 base_trigger = self._NORMAL_TRIGGER 493 trigger = base_trigger * 2 ** _verbose_level() 494 495 if self.lines_in_function > trigger: 496 error_level = int(math.log(self.lines_in_function / base_trigger, 2)) 497 # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ... 498 if error_level > 5: 499 error_level = 5 500 error(filename, line_number, 'readability/fn_size', error_level, 501 'Small and focused functions are preferred:' 502 ' %s has %d non-comment lines' 503 ' (error triggered by exceeding %d lines).' % ( 504 self.current_function, self.lines_in_function, trigger)) 505 506 def end(self): 507 """Stop analizing function body.""" 508 self.in_a_function = False 509 510 511class _IncludeError(Exception): 512 """Indicates a problem with the include order in a file.""" 513 pass 514 515 516class FileInfo: 517 """Provides utility functions for filenames. 518 519 FileInfo provides easy access to the components of a file's path 520 relative to the project root. 521 """ 522 523 def __init__(self, filename): 524 self._filename = filename 525 526 def full_name(self): 527 """Make Windows paths like Unix.""" 528 return os.path.abspath(self._filename).replace('\\', '/') 529 530 def repository_name(self): 531 """Full name after removing the local path to the repository. 532 533 If we have a real absolute path name here we can try to do something smart: 534 detecting the root of the checkout and truncating /path/to/checkout from 535 the name so that we get header guards that don't include things like 536 "C:\Documents and Settings\..." or "/home/username/..." in them and thus 537 people on different computers who have checked the source out to different 538 locations won't see bogus errors. 539 """ 540 fullname = self.full_name() 541 542 if os.path.exists(fullname): 543 project_dir = os.path.dirname(fullname) 544 545 if os.path.exists(os.path.join(project_dir, ".svn")): 546 # If there's a .svn file in the current directory, we 547 # recursively look up the directory tree for the top 548 # of the SVN checkout 549 root_dir = project_dir 550 one_up_dir = os.path.dirname(root_dir) 551 while os.path.exists(os.path.join(one_up_dir, ".svn")): 552 root_dir = os.path.dirname(root_dir) 553 one_up_dir = os.path.dirname(one_up_dir) 554 555 prefix = os.path.commonprefix([root_dir, project_dir]) 556 return fullname[len(prefix) + 1:] 557 558 # Not SVN? Try to find a git top level directory by 559 # searching up from the current path. 560 root_dir = os.path.dirname(fullname) 561 while (root_dir != os.path.dirname(root_dir) 562 and not os.path.exists(os.path.join(root_dir, ".git"))): 563 root_dir = os.path.dirname(root_dir) 564 if os.path.exists(os.path.join(root_dir, ".git")): 565 prefix = os.path.commonprefix([root_dir, project_dir]) 566 return fullname[len(prefix) + 1:] 567 568 # Don't know what to do; header guard warnings may be wrong... 569 return fullname 570 571 def split(self): 572 """Splits the file into the directory, basename, and extension. 573 574 For 'chrome/browser/browser.cpp', Split() would 575 return ('chrome/browser', 'browser', '.cpp') 576 577 Returns: 578 A tuple of (directory, basename, extension). 579 """ 580 581 googlename = self.repository_name() 582 project, rest = os.path.split(googlename) 583 return (project,) + os.path.splitext(rest) 584 585 def base_name(self): 586 """File base name - text after the final slash, before the final period.""" 587 return self.split()[1] 588 589 def extension(self): 590 """File extension - text following the final period.""" 591 return self.split()[2] 592 593 def no_extension(self): 594 """File has no source file extension.""" 595 return '/'.join(self.split()[0:2]) 596 597 def is_source(self): 598 """File has a source file extension.""" 599 return self.extension()[1:] in ('c', 'cc', 'cpp', 'cxx') 600 601 602def _should_print_error(category, confidence): 603 """Returns true iff confidence >= verbose, and category passes filter.""" 604 # There are two ways we might decide not to print an error message: 605 # the verbosity level isn't high enough, or the filters filter it out. 606 if confidence < _cpp_style_state.verbose_level: 607 return False 608 609 is_filtered = False 610 for one_filter in _filters(): 611 if one_filter.startswith('-'): 612 if category.startswith(one_filter[1:]): 613 is_filtered = True 614 elif one_filter.startswith('+'): 615 if category.startswith(one_filter[1:]): 616 is_filtered = False 617 else: 618 assert False # should have been checked for in set_filter. 619 if is_filtered: 620 return False 621 622 return True 623 624 625def error(filename, line_number, category, confidence, message): 626 """Logs the fact we've found a lint error. 627 628 We log where the error was found, and also our confidence in the error, 629 that is, how certain we are this is a legitimate style regression, and 630 not a misidentification or a use that's sometimes justified. 631 632 Args: 633 filename: The name of the file containing the error. 634 line_number: The number of the line containing the error. 635 category: A string used to describe the "category" this bug 636 falls under: "whitespace", say, or "runtime". Categories 637 may have a hierarchy separated by slashes: "whitespace/indent". 638 confidence: A number from 1-5 representing a confidence score for 639 the error, with 5 meaning that we are certain of the problem, 640 and 1 meaning that it could be a legitimate construct. 641 message: The error message. 642 """ 643 # There are two ways we might decide not to print an error message: 644 # the verbosity level isn't high enough, or the filters filter it out. 645 if _should_print_error(category, confidence): 646 _cpp_style_state.increment_error_count() 647 if _cpp_style_state.output_format == 'vs7': 648 sys.stderr.write('%s(%s): %s [%s] [%d]\n' % ( 649 filename, line_number, message, category, confidence)) 650 else: 651 sys.stderr.write('%s:%s: %s [%s] [%d]\n' % ( 652 filename, line_number, message, category, confidence)) 653 654 655# Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard. 656_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile( 657 r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)') 658# Matches strings. Escape codes should already be removed by ESCAPES. 659_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"') 660# Matches characters. Escape codes should already be removed by ESCAPES. 661_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'") 662# Matches multi-line C++ comments. 663# This RE is a little bit more complicated than one might expect, because we 664# have to take care of space removals tools so we can handle comments inside 665# statements better. 666# The current rule is: We only clear spaces from both sides when we're at the 667# end of the line. Otherwise, we try to remove spaces from the right side, 668# if this doesn't work we try on left side but only if there's a non-character 669# on the right. 670_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile( 671 r"""(\s*/\*.*\*/\s*$| 672 /\*.*\*/\s+| 673 \s+/\*.*\*/(?=\W)| 674 /\*.*\*/)""", re.VERBOSE) 675 676 677def is_cpp_string(line): 678 """Does line terminate so, that the next symbol is in string constant. 679 680 This function does not consider single-line nor multi-line comments. 681 682 Args: 683 line: is a partial line of code starting from the 0..n. 684 685 Returns: 686 True, if next character appended to 'line' is inside a 687 string constant. 688 """ 689 690 line = line.replace(r'\\', 'XX') # after this, \\" does not match to \" 691 return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1 692 693 694def find_next_multi_line_comment_start(lines, line_index): 695 """Find the beginning marker for a multiline comment.""" 696 while line_index < len(lines): 697 if lines[line_index].strip().startswith('/*'): 698 # Only return this marker if the comment goes beyond this line 699 if lines[line_index].strip().find('*/', 2) < 0: 700 return line_index 701 line_index += 1 702 return len(lines) 703 704 705def find_next_multi_line_comment_end(lines, line_index): 706 """We are inside a comment, find the end marker.""" 707 while line_index < len(lines): 708 if lines[line_index].strip().endswith('*/'): 709 return line_index 710 line_index += 1 711 return len(lines) 712 713 714def remove_multi_line_comments_from_range(lines, begin, end): 715 """Clears a range of lines for multi-line comments.""" 716 # Having // dummy comments makes the lines non-empty, so we will not get 717 # unnecessary blank line warnings later in the code. 718 for i in range(begin, end): 719 lines[i] = '// dummy' 720 721 722def remove_multi_line_comments(filename, lines, error): 723 """Removes multiline (c-style) comments from lines.""" 724 line_index = 0 725 while line_index < len(lines): 726 line_index_begin = find_next_multi_line_comment_start(lines, line_index) 727 if line_index_begin >= len(lines): 728 return 729 line_index_end = find_next_multi_line_comment_end(lines, line_index_begin) 730 if line_index_end >= len(lines): 731 error(filename, line_index_begin + 1, 'readability/multiline_comment', 5, 732 'Could not find end of multi-line comment') 733 return 734 remove_multi_line_comments_from_range(lines, line_index_begin, line_index_end + 1) 735 line_index = line_index_end + 1 736 737 738def cleanse_comments(line): 739 """Removes //-comments and single-line C-style /* */ comments. 740 741 Args: 742 line: A line of C++ source. 743 744 Returns: 745 The line with single-line comments removed. 746 """ 747 comment_position = line.find('//') 748 if comment_position != -1 and not is_cpp_string(line[:comment_position]): 749 line = line[:comment_position] 750 # get rid of /* ... */ 751 return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line) 752 753 754class CleansedLines(object): 755 """Holds 3 copies of all lines with different preprocessing applied to them. 756 757 1) elided member contains lines without strings and comments, 758 2) lines member contains lines without comments, and 759 3) raw member contains all the lines without processing. 760 All these three members are of <type 'list'>, and of the same length. 761 """ 762 763 def __init__(self, lines): 764 self.elided = [] 765 self.lines = [] 766 self.raw_lines = lines 767 self._num_lines = len(lines) 768 for line_number in range(len(lines)): 769 self.lines.append(cleanse_comments(lines[line_number])) 770 elided = self.collapse_strings(lines[line_number]) 771 self.elided.append(cleanse_comments(elided)) 772 773 def num_lines(self): 774 """Returns the number of lines represented.""" 775 return self._num_lines 776 777 @staticmethod 778 def collapse_strings(elided): 779 """Collapses strings and chars on a line to simple "" or '' blocks. 780 781 We nix strings first so we're not fooled by text like '"http://"' 782 783 Args: 784 elided: The line being processed. 785 786 Returns: 787 The line with collapsed strings. 788 """ 789 if not _RE_PATTERN_INCLUDE.match(elided): 790 # Remove escaped characters first to make quote/single quote collapsing 791 # basic. Things that look like escaped characters shouldn't occur 792 # outside of strings and chars. 793 elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided) 794 elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided) 795 elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided) 796 return elided 797 798 799def close_expression(clean_lines, line_number, pos): 800 """If input points to ( or { or [, finds the position that closes it. 801 802 If lines[line_number][pos] points to a '(' or '{' or '[', finds the the 803 line_number/pos that correspond to the closing of the expression. 804 805 Args: 806 clean_lines: A CleansedLines instance containing the file. 807 line_number: The number of the line to check. 808 pos: A position on the line. 809 810 Returns: 811 A tuple (line, line_number, pos) pointer *past* the closing brace, or 812 (line, len(lines), -1) if we never find a close. Note we ignore 813 strings and comments when matching; and the line we return is the 814 'cleansed' line at line_number. 815 """ 816 817 line = clean_lines.elided[line_number] 818 start_character = line[pos] 819 if start_character not in '({[': 820 return (line, clean_lines.num_lines(), -1) 821 if start_character == '(': 822 end_character = ')' 823 if start_character == '[': 824 end_character = ']' 825 if start_character == '{': 826 end_character = '}' 827 828 num_open = line.count(start_character) - line.count(end_character) 829 while line_number < clean_lines.num_lines() and num_open > 0: 830 line_number += 1 831 line = clean_lines.elided[line_number] 832 num_open += line.count(start_character) - line.count(end_character) 833 # OK, now find the end_character that actually got us back to even 834 endpos = len(line) 835 while num_open >= 0: 836 endpos = line.rfind(')', 0, endpos) 837 num_open -= 1 # chopped off another ) 838 return (line, line_number, endpos + 1) 839 840 841def check_for_copyright(filename, lines, error): 842 """Logs an error if no Copyright message appears at the top of the file.""" 843 844 # We'll say it should occur by line 10. Don't forget there's a 845 # dummy line at the front. 846 for line in xrange(1, min(len(lines), 11)): 847 if re.search(r'Copyright', lines[line], re.I): 848 break 849 else: # means no copyright line was found 850 error(filename, 0, 'legal/copyright', 5, 851 'No copyright message found. ' 852 'You should have a line: "Copyright [year] <Copyright Owner>"') 853 854 855def get_header_guard_cpp_variable(filename): 856 """Returns the CPP variable that should be used as a header guard. 857 858 Args: 859 filename: The name of a C++ header file. 860 861 Returns: 862 The CPP variable that should be used as a header guard in the 863 named file. 864 865 """ 866 867 fileinfo = FileInfo(filename) 868 return re.sub(r'[-./\s]', '_', fileinfo.repository_name()).upper() + '_' 869 870 871def check_for_header_guard(filename, lines, error): 872 """Checks that the file contains a header guard. 873 874 Logs an error if no #ifndef header guard is present. For other 875 headers, checks that the full pathname is used. 876 877 Args: 878 filename: The name of the C++ header file. 879 lines: An array of strings, each representing a line of the file. 880 error: The function to call with any errors found. 881 """ 882 883 cppvar = get_header_guard_cpp_variable(filename) 884 885 ifndef = None 886 ifndef_line_number = 0 887 define = None 888 endif = None 889 endif_line_number = 0 890 for line_number, line in enumerate(lines): 891 line_split = line.split() 892 if len(line_split) >= 2: 893 # find the first occurrence of #ifndef and #define, save arg 894 if not ifndef and line_split[0] == '#ifndef': 895 # set ifndef to the header guard presented on the #ifndef line. 896 ifndef = line_split[1] 897 ifndef_line_number = line_number 898 if not define and line_split[0] == '#define': 899 define = line_split[1] 900 # find the last occurrence of #endif, save entire line 901 if line.startswith('#endif'): 902 endif = line 903 endif_line_number = line_number 904 905 if not ifndef or not define or ifndef != define: 906 error(filename, 0, 'build/header_guard', 5, 907 'No #ifndef header guard found, suggested CPP variable is: %s' % 908 cppvar) 909 return 910 911 # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__ 912 # for backward compatibility. 913 if ifndef != cppvar: 914 error_level = 0 915 if ifndef != cppvar + '_': 916 error_level = 5 917 918 error(filename, ifndef_line_number, 'build/header_guard', error_level, 919 '#ifndef header guard has wrong style, please use: %s' % cppvar) 920 921 if endif != ('#endif // %s' % cppvar): 922 error_level = 0 923 if endif != ('#endif // %s' % (cppvar + '_')): 924 error_level = 5 925 926 error(filename, endif_line_number, 'build/header_guard', error_level, 927 '#endif line should be "#endif // %s"' % cppvar) 928 929 930def check_for_unicode_replacement_characters(filename, lines, error): 931 """Logs an error for each line containing Unicode replacement characters. 932 933 These indicate that either the file contained invalid UTF-8 (likely) 934 or Unicode replacement characters (which it shouldn't). Note that 935 it's possible for this to throw off line numbering if the invalid 936 UTF-8 occurred adjacent to a newline. 937 938 Args: 939 filename: The name of the current file. 940 lines: An array of strings, each representing a line of the file. 941 error: The function to call with any errors found. 942 """ 943 for line_number, line in enumerate(lines): 944 if u'\ufffd' in line: 945 error(filename, line_number, 'readability/utf8', 5, 946 'Line contains invalid UTF-8 (or Unicode replacement character).') 947 948 949def check_for_new_line_at_eof(filename, lines, error): 950 """Logs an error if there is no newline char at the end of the file. 951 952 Args: 953 filename: The name of the current file. 954 lines: An array of strings, each representing a line of the file. 955 error: The function to call with any errors found. 956 """ 957 958 # The array lines() was created by adding two newlines to the 959 # original file (go figure), then splitting on \n. 960 # To verify that the file ends in \n, we just have to make sure the 961 # last-but-two element of lines() exists and is empty. 962 if len(lines) < 3 or lines[-2]: 963 error(filename, len(lines) - 2, 'whitespace/ending_newline', 5, 964 'Could not find a newline character at the end of the file.') 965 966 967def check_for_multiline_comments_and_strings(filename, clean_lines, line_number, error): 968 """Logs an error if we see /* ... */ or "..." that extend past one line. 969 970 /* ... */ comments are legit inside macros, for one line. 971 Otherwise, we prefer // comments, so it's ok to warn about the 972 other. Likewise, it's ok for strings to extend across multiple 973 lines, as long as a line continuation character (backslash) 974 terminates each line. Although not currently prohibited by the C++ 975 style guide, it's ugly and unnecessary. We don't do well with either 976 in this lint program, so we warn about both. 977 978 Args: 979 filename: The name of the current file. 980 clean_lines: A CleansedLines instance containing the file. 981 line_number: The number of the line to check. 982 error: The function to call with any errors found. 983 """ 984 line = clean_lines.elided[line_number] 985 986 # Remove all \\ (escaped backslashes) from the line. They are OK, and the 987 # second (escaped) slash may trigger later \" detection erroneously. 988 line = line.replace('\\\\', '') 989 990 if line.count('/*') > line.count('*/'): 991 error(filename, line_number, 'readability/multiline_comment', 5, 992 'Complex multi-line /*...*/-style comment found. ' 993 'Lint may give bogus warnings. ' 994 'Consider replacing these with //-style comments, ' 995 'with #if 0...#endif, ' 996 'or with more clearly structured multi-line comments.') 997 998 if (line.count('"') - line.count('\\"')) % 2: 999 error(filename, line_number, 'readability/multiline_string', 5, 1000 'Multi-line string ("...") found. This lint script doesn\'t ' 1001 'do well with such strings, and may give bogus warnings. They\'re ' 1002 'ugly and unnecessary, and you should use concatenation instead".') 1003 1004 1005_THREADING_LIST = ( 1006 ('asctime(', 'asctime_r('), 1007 ('ctime(', 'ctime_r('), 1008 ('getgrgid(', 'getgrgid_r('), 1009 ('getgrnam(', 'getgrnam_r('), 1010 ('getlogin(', 'getlogin_r('), 1011 ('getpwnam(', 'getpwnam_r('), 1012 ('getpwuid(', 'getpwuid_r('), 1013 ('gmtime(', 'gmtime_r('), 1014 ('localtime(', 'localtime_r('), 1015 ('rand(', 'rand_r('), 1016 ('readdir(', 'readdir_r('), 1017 ('strtok(', 'strtok_r('), 1018 ('ttyname(', 'ttyname_r('), 1019 ) 1020 1021 1022def check_posix_threading(filename, clean_lines, line_number, error): 1023 """Checks for calls to thread-unsafe functions. 1024 1025 Much code has been originally written without consideration of 1026 multi-threading. Also, engineers are relying on their old experience; 1027 they have learned posix before threading extensions were added. These 1028 tests guide the engineers to use thread-safe functions (when using 1029 posix directly). 1030 1031 Args: 1032 filename: The name of the current file. 1033 clean_lines: A CleansedLines instance containing the file. 1034 line_number: The number of the line to check. 1035 error: The function to call with any errors found. 1036 """ 1037 line = clean_lines.elided[line_number] 1038 for single_thread_function, multithread_safe_function in _THREADING_LIST: 1039 index = line.find(single_thread_function) 1040 # Comparisons made explicit for clarity -- pylint: disable-msg=C6403 1041 if index >= 0 and (index == 0 or (not line[index - 1].isalnum() 1042 and line[index - 1] not in ('_', '.', '>'))): 1043 error(filename, line_number, 'runtime/threadsafe_fn', 2, 1044 'Consider using ' + multithread_safe_function + 1045 '...) instead of ' + single_thread_function + 1046 '...) for improved thread safety.') 1047 1048 1049# Matches invalid increment: *count++, which moves pointer instead of 1050# incrementing a value. 1051_RE_PATTERN_INVALID_INCREMENT = re.compile( 1052 r'^\s*\*\w+(\+\+|--);') 1053 1054 1055def check_invalid_increment(filename, clean_lines, line_number, error): 1056 """Checks for invalid increment *count++. 1057 1058 For example following function: 1059 void increment_counter(int* count) { 1060 *count++; 1061 } 1062 is invalid, because it effectively does count++, moving pointer, and should 1063 be replaced with ++*count, (*count)++ or *count += 1. 1064 1065 Args: 1066 filename: The name of the current file. 1067 clean_lines: A CleansedLines instance containing the file. 1068 line_number: The number of the line to check. 1069 error: The function to call with any errors found. 1070 """ 1071 line = clean_lines.elided[line_number] 1072 if _RE_PATTERN_INVALID_INCREMENT.match(line): 1073 error(filename, line_number, 'runtime/invalid_increment', 5, 1074 'Changing pointer instead of value (or unused value of operator*).') 1075 1076 1077class _ClassInfo(object): 1078 """Stores information about a class.""" 1079 1080 def __init__(self, name, line_number): 1081 self.name = name 1082 self.line_number = line_number 1083 self.seen_open_brace = False 1084 self.is_derived = False 1085 self.virtual_method_line_number = None 1086 self.has_virtual_destructor = False 1087 self.brace_depth = 0 1088 1089 1090class _ClassState(object): 1091 """Holds the current state of the parse relating to class declarations. 1092 1093 It maintains a stack of _ClassInfos representing the parser's guess 1094 as to the current nesting of class declarations. The innermost class 1095 is at the top (back) of the stack. Typically, the stack will either 1096 be empty or have exactly one entry. 1097 """ 1098 1099 def __init__(self): 1100 self.classinfo_stack = [] 1101 1102 def check_finished(self, filename, error): 1103 """Checks that all classes have been completely parsed. 1104 1105 Call this when all lines in a file have been processed. 1106 Args: 1107 filename: The name of the current file. 1108 error: The function to call with any errors found. 1109 """ 1110 if self.classinfo_stack: 1111 # Note: This test can result in false positives if #ifdef constructs 1112 # get in the way of brace matching. See the testBuildClass test in 1113 # cpp_style_unittest.py for an example of this. 1114 error(filename, self.classinfo_stack[0].line_number, 'build/class', 5, 1115 'Failed to find complete declaration of class %s' % 1116 self.classinfo_stack[0].name) 1117 1118 1119def check_for_non_standard_constructs(filename, clean_lines, line_number, 1120 class_state, error): 1121 """Logs an error if we see certain non-ANSI constructs ignored by gcc-2. 1122 1123 Complain about several constructs which gcc-2 accepts, but which are 1124 not standard C++. Warning about these in lint is one way to ease the 1125 transition to new compilers. 1126 - put storage class first (e.g. "static const" instead of "const static"). 1127 - "%lld" instead of %qd" in printf-type functions. 1128 - "%1$d" is non-standard in printf-type functions. 1129 - "\%" is an undefined character escape sequence. 1130 - text after #endif is not allowed. 1131 - invalid inner-style forward declaration. 1132 - >? and <? operators, and their >?= and <?= cousins. 1133 - classes with virtual methods need virtual destructors (compiler warning 1134 available, but not turned on yet.) 1135 1136 Additionally, check for constructor/destructor style violations as it 1137 is very convenient to do so while checking for gcc-2 compliance. 1138 1139 Args: 1140 filename: The name of the current file. 1141 clean_lines: A CleansedLines instance containing the file. 1142 line_number: The number of the line to check. 1143 class_state: A _ClassState instance which maintains information about 1144 the current stack of nested class declarations being parsed. 1145 error: A callable to which errors are reported, which takes 4 arguments: 1146 filename, line number, error level, and message 1147 """ 1148 1149 # Remove comments from the line, but leave in strings for now. 1150 line = clean_lines.lines[line_number] 1151 1152 if search(r'printf\s*\(.*".*%[-+ ]?\d*q', line): 1153 error(filename, line_number, 'runtime/printf_format', 3, 1154 '%q in format strings is deprecated. Use %ll instead.') 1155 1156 if search(r'printf\s*\(.*".*%\d+\$', line): 1157 error(filename, line_number, 'runtime/printf_format', 2, 1158 '%N$ formats are unconventional. Try rewriting to avoid them.') 1159 1160 # Remove escaped backslashes before looking for undefined escapes. 1161 line = line.replace('\\\\', '') 1162 1163 if search(r'("|\').*\\(%|\[|\(|{)', line): 1164 error(filename, line_number, 'build/printf_format', 3, 1165 '%, [, (, and { are undefined character escapes. Unescape them.') 1166 1167 # For the rest, work with both comments and strings removed. 1168 line = clean_lines.elided[line_number] 1169 1170 if search(r'\b(const|volatile|void|char|short|int|long' 1171 r'|float|double|signed|unsigned' 1172 r'|schar|u?int8|u?int16|u?int32|u?int64)' 1173 r'\s+(auto|register|static|extern|typedef)\b', 1174 line): 1175 error(filename, line_number, 'build/storage_class', 5, 1176 'Storage class (static, extern, typedef, etc) should be first.') 1177 1178 if match(r'\s*#\s*endif\s*[^/\s]+', line): 1179 error(filename, line_number, 'build/endif_comment', 5, 1180 'Uncommented text after #endif is non-standard. Use a comment.') 1181 1182 if match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line): 1183 error(filename, line_number, 'build/forward_decl', 5, 1184 'Inner-style forward declarations are invalid. Remove this line.') 1185 1186 if search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?', line): 1187 error(filename, line_number, 'build/deprecated', 3, 1188 '>? and <? (max and min) operators are non-standard and deprecated.') 1189 1190 # Track class entry and exit, and attempt to find cases within the 1191 # class declaration that don't meet the C++ style 1192 # guidelines. Tracking is very dependent on the code matching Google 1193 # style guidelines, but it seems to perform well enough in testing 1194 # to be a worthwhile addition to the checks. 1195 classinfo_stack = class_state.classinfo_stack 1196 # Look for a class declaration 1197 class_decl_match = match( 1198 r'\s*(template\s*<[\w\s<>,:]*>\s*)?(class|struct)\s+(\w+(::\w+)*)', line) 1199 if class_decl_match: 1200 classinfo_stack.append(_ClassInfo(class_decl_match.group(3), line_number)) 1201 1202 # Everything else in this function uses the top of the stack if it's 1203 # not empty. 1204 if not classinfo_stack: 1205 return 1206 1207 classinfo = classinfo_stack[-1] 1208 1209 # If the opening brace hasn't been seen look for it and also 1210 # parent class declarations. 1211 if not classinfo.seen_open_brace: 1212 # If the line has a ';' in it, assume it's a forward declaration or 1213 # a single-line class declaration, which we won't process. 1214 if line.find(';') != -1: 1215 classinfo_stack.pop() 1216 return 1217 classinfo.seen_open_brace = (line.find('{') != -1) 1218 # Look for a bare ':' 1219 if search('(^|[^:]):($|[^:])', line): 1220 classinfo.is_derived = True 1221 if not classinfo.seen_open_brace: 1222 return # Everything else in this function is for after open brace 1223 1224 # The class may have been declared with namespace or classname qualifiers. 1225 # The constructor and destructor will not have those qualifiers. 1226 base_classname = classinfo.name.split('::')[-1] 1227 1228 # Look for single-argument constructors that aren't marked explicit. 1229 # Technically a valid construct, but against style. 1230 args = match(r'(?<!explicit)\s+%s\s*\(([^,()]+)\)' 1231 % re.escape(base_classname), 1232 line) 1233 if (args 1234 and args.group(1) != 'void' 1235 and not match(r'(const\s+)?%s\s*&' % re.escape(base_classname), 1236 args.group(1).strip())): 1237 error(filename, line_number, 'runtime/explicit', 5, 1238 'Single-argument constructors should be marked explicit.') 1239 1240 # Look for methods declared virtual. 1241 if search(r'\bvirtual\b', line): 1242 classinfo.virtual_method_line_number = line_number 1243 # Only look for a destructor declaration on the same line. It would 1244 # be extremely unlikely for the destructor declaration to occupy 1245 # more than one line. 1246 if search(r'~%s\s*\(' % base_classname, line): 1247 classinfo.has_virtual_destructor = True 1248 1249 # Look for class end. 1250 brace_depth = classinfo.brace_depth 1251 brace_depth = brace_depth + line.count('{') - line.count('}') 1252 if brace_depth <= 0: 1253 classinfo = classinfo_stack.pop() 1254 # Try to detect missing virtual destructor declarations. 1255 # For now, only warn if a non-derived class with virtual methods lacks 1256 # a virtual destructor. This is to make it less likely that people will 1257 # declare derived virtual destructors without declaring the base 1258 # destructor virtual. 1259 if ((classinfo.virtual_method_line_number is not None) 1260 and (not classinfo.has_virtual_destructor) 1261 and (not classinfo.is_derived)): # Only warn for base classes 1262 error(filename, classinfo.line_number, 'runtime/virtual', 4, 1263 'The class %s probably needs a virtual destructor due to ' 1264 'having virtual method(s), one declared at line %d.' 1265 % (classinfo.name, classinfo.virtual_method_line_number)) 1266 else: 1267 classinfo.brace_depth = brace_depth 1268 1269 1270def check_spacing_for_function_call(filename, line, line_number, error): 1271 """Checks for the correctness of various spacing around function calls. 1272 1273 Args: 1274 filename: The name of the current file. 1275 line: The text of the line to check. 1276 line_number: The number of the line to check. 1277 error: The function to call with any errors found. 1278 """ 1279 1280 # Since function calls often occur inside if/for/foreach/while/switch 1281 # expressions - which have their own, more liberal conventions - we 1282 # first see if we should be looking inside such an expression for a 1283 # function call, to which we can apply more strict standards. 1284 function_call = line # if there's no control flow construct, look at whole line 1285 for pattern in (r'\bif\s*\((.*)\)\s*{', 1286 r'\bfor\s*\((.*)\)\s*{', 1287 r'\bforeach\s*\((.*)\)\s*{', 1288 r'\bwhile\s*\((.*)\)\s*[{;]', 1289 r'\bswitch\s*\((.*)\)\s*{'): 1290 matched = search(pattern, line) 1291 if matched: 1292 function_call = matched.group(1) # look inside the parens for function calls 1293 break 1294 1295 # Except in if/for/foreach/while/switch, there should never be space 1296 # immediately inside parens (eg "f( 3, 4 )"). We make an exception 1297 # for nested parens ( (a+b) + c ). Likewise, there should never be 1298 # a space before a ( when it's a function argument. I assume it's a 1299 # function argument when the char before the whitespace is legal in 1300 # a function name (alnum + _) and we're not starting a macro. Also ignore 1301 # pointers and references to arrays and functions coz they're too tricky: 1302 # we use a very simple way to recognize these: 1303 # " (something)(maybe-something)" or 1304 # " (something)(maybe-something," or 1305 # " (something)[something]" 1306 # Note that we assume the contents of [] to be short enough that 1307 # they'll never need to wrap. 1308 if ( # Ignore control structures. 1309 not search(r'\b(if|for|foreach|while|switch|return|new|delete)\b', function_call) 1310 # Ignore pointers/references to functions. 1311 and not search(r' \([^)]+\)\([^)]*(\)|,$)', function_call) 1312 # Ignore pointers/references to arrays. 1313 and not search(r' \([^)]+\)\[[^\]]+\]', function_call)): 1314 if search(r'\w\s*\([ \t](?!\s*\\$)', function_call): # a ( used for a fn call 1315 error(filename, line_number, 'whitespace/parens', 4, 1316 'Extra space after ( in function call') 1317 elif search(r'\([ \t]+(?!(\s*\\)|\()', function_call): 1318 error(filename, line_number, 'whitespace/parens', 2, 1319 'Extra space after (') 1320 if (search(r'\w\s+\(', function_call) 1321 and not search(r'#\s*define|typedef', function_call)): 1322 error(filename, line_number, 'whitespace/parens', 4, 1323 'Extra space before ( in function call') 1324 # If the ) is followed only by a newline or a { + newline, assume it's 1325 # part of a control statement (if/while/etc), and don't complain 1326 if search(r'[^)\s]\s+\)(?!\s*$|{\s*$)', function_call): 1327 error(filename, line_number, 'whitespace/parens', 2, 1328 'Extra space before )') 1329 1330 1331def is_blank_line(line): 1332 """Returns true if the given line is blank. 1333 1334 We consider a line to be blank if the line is empty or consists of 1335 only white spaces. 1336 1337 Args: 1338 line: A line of a string. 1339 1340 Returns: 1341 True, if the given line is blank. 1342 """ 1343 return not line or line.isspace() 1344 1345 1346def check_for_function_lengths(filename, clean_lines, line_number, 1347 function_state, error): 1348 """Reports for long function bodies. 1349 1350 For an overview why this is done, see: 1351 http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions 1352 1353 Uses a simplistic algorithm assuming other style guidelines 1354 (especially spacing) are followed. 1355 Only checks unindented functions, so class members are unchecked. 1356 Trivial bodies are unchecked, so constructors with huge initializer lists 1357 may be missed. 1358 Blank/comment lines are not counted so as to avoid encouraging the removal 1359 of vertical space and commments just to get through a lint check. 1360 NOLINT *on the last line of a function* disables this check. 1361 1362 Args: 1363 filename: The name of the current file. 1364 clean_lines: A CleansedLines instance containing the file. 1365 line_number: The number of the line to check. 1366 function_state: Current function name and lines in body so far. 1367 error: The function to call with any errors found. 1368 """ 1369 lines = clean_lines.lines 1370 line = lines[line_number] 1371 raw = clean_lines.raw_lines 1372 raw_line = raw[line_number] 1373 joined_line = '' 1374 1375 starting_func = False 1376 regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ... 1377 match_result = match(regexp, line) 1378 if match_result: 1379 # If the name is all caps and underscores, figure it's a macro and 1380 # ignore it, unless it's TEST or TEST_F. 1381 function_name = match_result.group(1).split()[-1] 1382 if function_name == 'TEST' or function_name == 'TEST_F' or (not match(r'[A-Z_]+$', function_name)): 1383 starting_func = True 1384 1385 if starting_func: 1386 body_found = False 1387 for start_line_number in xrange(line_number, clean_lines.num_lines()): 1388 start_line = lines[start_line_number] 1389 joined_line += ' ' + start_line.lstrip() 1390 if search(r'(;|})', start_line): # Declarations and trivial functions 1391 body_found = True 1392 break # ... ignore 1393 if search(r'{', start_line): 1394 body_found = True 1395 function = search(r'((\w|:)*)\(', line).group(1) 1396 if match(r'TEST', function): # Handle TEST... macros 1397 parameter_regexp = search(r'(\(.*\))', joined_line) 1398 if parameter_regexp: # Ignore bad syntax 1399 function += parameter_regexp.group(1) 1400 else: 1401 function += '()' 1402 function_state.begin(function) 1403 break 1404 if not body_found: 1405 # No body for the function (or evidence of a non-function) was found. 1406 error(filename, line_number, 'readability/fn_size', 5, 1407 'Lint failed to find start of function body.') 1408 elif match(r'^\}\s*$', line): # function end 1409 if not search(r'\bNOLINT\b', raw_line): 1410 function_state.check(error, filename, line_number) 1411 function_state.end() 1412 elif not match(r'^\s*$', line): 1413 function_state.count() # Count non-blank/non-comment lines. 1414 1415 1416def check_spacing(filename, clean_lines, line_number, error): 1417 """Checks for the correctness of various spacing issues in the code. 1418 1419 Things we check for: spaces around operators, spaces after 1420 if/for/while/switch, no spaces around parens in function calls, two 1421 spaces between code and comment, don't start a block with a blank 1422 line, don't end a function with a blank line, don't have too many 1423 blank lines in a row. 1424 1425 Args: 1426 filename: The name of the current file. 1427 clean_lines: A CleansedLines instance containing the file. 1428 line_number: The number of the line to check. 1429 error: The function to call with any errors found. 1430 """ 1431 1432 raw = clean_lines.raw_lines 1433 line = raw[line_number] 1434 1435 # Before nixing comments, check if the line is blank for no good 1436 # reason. This includes the first line after a block is opened, and 1437 # blank lines at the end of a function (ie, right before a line like '}'). 1438 if is_blank_line(line): 1439 elided = clean_lines.elided 1440 previous_line = elided[line_number - 1] 1441 previous_brace = previous_line.rfind('{') 1442 # FIXME: Don't complain if line before blank line, and line after, 1443 # both start with alnums and are indented the same amount. 1444 # This ignores whitespace at the start of a namespace block 1445 # because those are not usually indented. 1446 if (previous_brace != -1 and previous_line[previous_brace:].find('}') == -1 1447 and previous_line[:previous_brace].find('namespace') == -1): 1448 # OK, we have a blank line at the start of a code block. Before we 1449 # complain, we check if it is an exception to the rule: The previous 1450 # non-empty line has the parameters of a function header that are indented 1451 # 4 spaces (because they did not fit in a 80 column line when placed on 1452 # the same line as the function name). We also check for the case where 1453 # the previous line is indented 6 spaces, which may happen when the 1454 # initializers of a constructor do not fit into a 80 column line. 1455 exception = False 1456 if match(r' {6}\w', previous_line): # Initializer list? 1457 # We are looking for the opening column of initializer list, which 1458 # should be indented 4 spaces to cause 6 space indentation afterwards. 1459 search_position = line_number - 2 1460 while (search_position >= 0 1461 and match(r' {6}\w', elided[search_position])): 1462 search_position -= 1 1463 exception = (search_position >= 0 1464 and elided[search_position][:5] == ' :') 1465 else: 1466 # Search for the function arguments or an initializer list. We use a 1467 # simple heuristic here: If the line is indented 4 spaces; and we have a 1468 # closing paren, without the opening paren, followed by an opening brace 1469 # or colon (for initializer lists) we assume that it is the last line of 1470 # a function header. If we have a colon indented 4 spaces, it is an 1471 # initializer list. 1472 exception = (match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)', 1473 previous_line) 1474 or match(r' {4}:', previous_line)) 1475 1476 if not exception: 1477 error(filename, line_number, 'whitespace/blank_line', 2, 1478 'Blank line at the start of a code block. Is this needed?') 1479 # This doesn't ignore whitespace at the end of a namespace block 1480 # because that is too hard without pairing open/close braces; 1481 # however, a special exception is made for namespace closing 1482 # brackets which have a comment containing "namespace". 1483 # 1484 # Also, ignore blank lines at the end of a block in a long if-else 1485 # chain, like this: 1486 # if (condition1) { 1487 # // Something followed by a blank line 1488 # 1489 # } else if (condition2) { 1490 # // Something else 1491 # } 1492 if line_number + 1 < clean_lines.num_lines(): 1493 next_line = raw[line_number + 1] 1494 if (next_line 1495 and match(r'\s*}', next_line) 1496 and next_line.find('namespace') == -1 1497 and next_line.find('} else ') == -1): 1498 error(filename, line_number, 'whitespace/blank_line', 3, 1499 'Blank line at the end of a code block. Is this needed?') 1500 1501 # Next, we complain if there's a comment too near the text 1502 comment_position = line.find('//') 1503 if comment_position != -1: 1504 # Check if the // may be in quotes. If so, ignore it 1505 # Comparisons made explicit for clarity -- pylint: disable-msg=C6403 1506 if (line.count('"', 0, comment_position) - line.count('\\"', 0, comment_position)) % 2 == 0: # not in quotes 1507 # Allow one space for new scopes, two spaces otherwise: 1508 if (not match(r'^\s*{ //', line) 1509 and ((comment_position >= 1 1510 and line[comment_position-1] not in string.whitespace) 1511 or (comment_position >= 2 1512 and line[comment_position-2] not in string.whitespace))): 1513 error(filename, line_number, 'whitespace/comments', 2, 1514 'At least two spaces is best between code and comments') 1515 # There should always be a space between the // and the comment 1516 commentend = comment_position + 2 1517 if commentend < len(line) and not line[commentend] == ' ': 1518 # but some lines are exceptions -- e.g. if they're big 1519 # comment delimiters like: 1520 # //---------------------------------------------------------- 1521 # or they begin with multiple slashes followed by a space: 1522 # //////// Header comment 1523 matched = (search(r'[=/-]{4,}\s*$', line[commentend:]) 1524 or search(r'^/+ ', line[commentend:])) 1525 if not matched: 1526 error(filename, line_number, 'whitespace/comments', 4, 1527 'Should have a space between // and comment') 1528 1529 line = clean_lines.elided[line_number] # get rid of comments and strings 1530 1531 # Don't try to do spacing checks for operator methods 1532 line = re.sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line) 1533 1534 # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )". 1535 # Otherwise not. Note we only check for non-spaces on *both* sides; 1536 # sometimes people put non-spaces on one side when aligning ='s among 1537 # many lines (not that this is behavior that I approve of...) 1538 if search(r'[\w.]=[\w.]', line) and not search(r'\b(if|while) ', line): 1539 error(filename, line_number, 'whitespace/operators', 4, 1540 'Missing spaces around =') 1541 1542 # FIXME: It's not ok to have spaces around binary operators like + - * / . 1543 1544 # You should always have whitespace around binary operators. 1545 # Alas, we can't test < or > because they're legitimately used sans spaces 1546 # (a->b, vector<int> a). The only time we can tell is a < with no >, and 1547 # only if it's not template params list spilling into the next line. 1548 matched = search(r'[^<>=!\s](==|!=|<=|>=)[^<>=!\s]', line) 1549 if not matched: 1550 # Note that while it seems that the '<[^<]*' term in the following 1551 # regexp could be simplified to '<.*', which would indeed match 1552 # the same class of strings, the [^<] means that searching for the 1553 # regexp takes linear rather than quadratic time. 1554 if not search(r'<[^<]*,\s*$', line): # template params spill 1555 matched = search(r'[^<>=!\s](<)[^<>=!\s]([^>]|->)*$', line) 1556 if matched: 1557 error(filename, line_number, 'whitespace/operators', 3, 1558 'Missing spaces around %s' % matched.group(1)) 1559 # We allow no-spaces around << and >> when used like this: 10<<20, but 1560 # not otherwise (particularly, not when used as streams) 1561 matched = search(r'[^0-9\s](<<|>>)[^0-9\s]', line) 1562 if matched: 1563 error(filename, line_number, 'whitespace/operators', 3, 1564 'Missing spaces around %s' % matched.group(1)) 1565 1566 # There shouldn't be space around unary operators 1567 matched = search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line) 1568 if matched: 1569 error(filename, line_number, 'whitespace/operators', 4, 1570 'Extra space for operator %s' % matched.group(1)) 1571 1572 # A pet peeve of mine: no spaces after an if, while, switch, or for 1573 matched = search(r' (if\(|for\(|foreach\(|while\(|switch\()', line) 1574 if matched: 1575 error(filename, line_number, 'whitespace/parens', 5, 1576 'Missing space before ( in %s' % matched.group(1)) 1577 1578 # For if/for/foreach/while/switch, the left and right parens should be 1579 # consistent about how many spaces are inside the parens, and 1580 # there should either be zero or one spaces inside the parens. 1581 # We don't want: "if ( foo)" or "if ( foo )". 1582 # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed. 1583 matched = search(r'\b(if|for|foreach|while|switch)\s*\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$', 1584 line) 1585 if matched: 1586 if len(matched.group(2)) != len(matched.group(4)): 1587 if not (matched.group(3) == ';' 1588 and len(matched.group(2)) == 1 + len(matched.group(4)) 1589 or not matched.group(2) and search(r'\bfor\s*\(.*; \)', line)): 1590 error(filename, line_number, 'whitespace/parens', 5, 1591 'Mismatching spaces inside () in %s' % matched.group(1)) 1592 if not len(matched.group(2)) in [0, 1]: 1593 error(filename, line_number, 'whitespace/parens', 5, 1594 'Should have zero or one spaces inside ( and ) in %s' % 1595 matched.group(1)) 1596 1597 # You should always have a space after a comma (either as fn arg or operator) 1598 if search(r',[^\s]', line): 1599 error(filename, line_number, 'whitespace/comma', 3, 1600 'Missing space after ,') 1601 1602 if filename.endswith('.cpp'): 1603 # C++ should have the & or * beside the type not the variable name. 1604 matched = match(r'\s*\w+(?<!\breturn)\s+(?P<pointer_operator>\*|\&)\w+', line) 1605 if matched: 1606 error(filename, line_number, 'whitespace/declaration', 3, 1607 'Declaration has space between type name and %s in %s' % (matched.group('pointer_operator'), matched.group(0).strip())) 1608 1609 elif filename.endswith('.c'): 1610 # C Pointer declaration should have the * beside the variable not the type name. 1611 matched = search(r'^\s*\w+\*\s+\w+', line) 1612 if matched: 1613 error(filename, line_number, 'whitespace/declaration', 3, 1614 'Declaration has space between * and variable name in %s' % matched.group(0).strip()) 1615 1616 # Next we will look for issues with function calls. 1617 check_spacing_for_function_call(filename, line, line_number, error) 1618 1619 # Except after an opening paren, you should have spaces before your braces. 1620 # And since you should never have braces at the beginning of a line, this is 1621 # an easy test. 1622 if search(r'[^ ({]{', line): 1623 error(filename, line_number, 'whitespace/braces', 5, 1624 'Missing space before {') 1625 1626 # Make sure '} else {' has spaces. 1627 if search(r'}else', line): 1628 error(filename, line_number, 'whitespace/braces', 5, 1629 'Missing space before else') 1630 1631 # You shouldn't have spaces before your brackets, except maybe after 1632 # 'delete []' or 'new char * []'. 1633 if search(r'\w\s+\[', line) and not search(r'delete\s+\[', line): 1634 error(filename, line_number, 'whitespace/braces', 5, 1635 'Extra space before [') 1636 1637 # You shouldn't have a space before a semicolon at the end of the line. 1638 # There's a special case for "for" since the style guide allows space before 1639 # the semicolon there. 1640 if search(r':\s*;\s*$', line): 1641 error(filename, line_number, 'whitespace/semicolon', 5, 1642 'Semicolon defining empty statement. Use { } instead.') 1643 elif search(r'^\s*;\s*$', line): 1644 error(filename, line_number, 'whitespace/semicolon', 5, 1645 'Line contains only semicolon. If this should be an empty statement, ' 1646 'use { } instead.') 1647 elif (search(r'\s+;\s*$', line) and not search(r'\bfor\b', line)): 1648 error(filename, line_number, 'whitespace/semicolon', 5, 1649 'Extra space before last semicolon. If this should be an empty ' 1650 'statement, use { } instead.') 1651 elif (search(r'\b(for|while)\s*\(.*\)\s*;\s*$', line) 1652 and line.count('(') == line.count(')') 1653 # Allow do {} while(); 1654 and not search(r'}\s*while', line)): 1655 error(filename, line_number, 'whitespace/semicolon', 5, 1656 'Semicolon defining empty statement for this loop. Use { } instead.') 1657 1658 1659def get_previous_non_blank_line(clean_lines, line_number): 1660 """Return the most recent non-blank line and its line number. 1661 1662 Args: 1663 clean_lines: A CleansedLines instance containing the file contents. 1664 line_number: The number of the line to check. 1665 1666 Returns: 1667 A tuple with two elements. The first element is the contents of the last 1668 non-blank line before the current line, or the empty string if this is the 1669 first non-blank line. The second is the line number of that line, or -1 1670 if this is the first non-blank line. 1671 """ 1672 1673 previous_line_number = line_number - 1 1674 while previous_line_number >= 0: 1675 previous_line = clean_lines.elided[previous_line_number] 1676 if not is_blank_line(previous_line): # if not a blank line... 1677 return (previous_line, previous_line_number) 1678 previous_line_number -= 1 1679 return ('', -1) 1680 1681 1682def check_namespace_indentation(filename, clean_lines, line_number, file_extension, error): 1683 """Looks for indentation errors inside of namespaces. 1684 1685 Args: 1686 filename: The name of the current file. 1687 clean_lines: A CleansedLines instance containing the file. 1688 line_number: The number of the line to check. 1689 file_extension: The extension (dot not included) of the file. 1690 error: The function to call with any errors found. 1691 """ 1692 1693 line = clean_lines.elided[line_number] # Get rid of comments and strings. 1694 1695 namespace_match = match(r'(?P<namespace_indentation>\s*)namespace\s+\S+\s*{\s*$', line) 1696 if not namespace_match: 1697 return 1698 1699 namespace_indentation = namespace_match.group('namespace_indentation') 1700 1701 is_header_file = file_extension == 'h' 1702 is_implementation_file = not is_header_file 1703 line_offset = 0 1704 1705 if is_header_file: 1706 inner_indentation = namespace_indentation + ' ' * 4 1707 1708 for current_line in clean_lines.raw_lines[line_number + 1:]: 1709 line_offset += 1 1710 1711 # Skip not only empty lines but also those with preprocessor directives. 1712 # Goto labels don't occur in header files, so no need to check for those. 1713 if current_line.strip() == '' or current_line.startswith('#'): 1714 continue 1715 1716 if not current_line.startswith(inner_indentation): 1717 # If something unindented was discovered, make sure it's a closing brace. 1718 if not current_line.startswith(namespace_indentation + '}'): 1719 error(filename, line_number + line_offset, 'whitespace/indent', 4, 1720 'In a header, code inside a namespace should be indented.') 1721 break 1722 1723 if is_implementation_file: 1724 for current_line in clean_lines.raw_lines[line_number + 1:]: 1725 line_offset += 1 1726 1727 # Skip not only empty lines but also those with (goto) labels. 1728 # The goto label regexp accepts spaces or the beginning of a 1729 # comment (if anything) after the initial colon. 1730 if current_line.strip() == '' or match(r'\w+\s*:([\s\/].*)?$', current_line): 1731 continue 1732 1733 remaining_line = current_line[len(namespace_indentation):] 1734 if not match(r'\S', remaining_line): 1735 error(filename, line_number + line_offset, 'whitespace/indent', 4, 1736 'In an implementation file, code inside a namespace should not be indented.') 1737 1738 # Just check the first non-empty line in any case, because 1739 # otherwise we would need to count opened and closed braces, 1740 # which is obviously a lot more complicated. 1741 break 1742 1743 1744def check_switch_indentation(filename, clean_lines, line_number, error): 1745 """Looks for indentation errors inside of switch statements. 1746 1747 Args: 1748 filename: The name of the current file. 1749 clean_lines: A CleansedLines instance containing the file. 1750 line_number: The number of the line to check. 1751 error: The function to call with any errors found. 1752 """ 1753 1754 line = clean_lines.elided[line_number] # Get rid of comments and strings. 1755 1756 switch_match = match(r'(?P<switch_indentation>\s*)switch\s*\(.+\)\s*{\s*$', line) 1757 if not switch_match: 1758 return 1759 1760 switch_indentation = switch_match.group('switch_indentation') 1761 inner_indentation = switch_indentation + ' ' * 4 1762 line_offset = 0 1763 encountered_nested_switch = False 1764 1765 for current_line in clean_lines.elided[line_number + 1:]: 1766 line_offset += 1 1767 1768 # Skip not only empty lines but also those with preprocessor directives. 1769 if current_line.strip() == '' or current_line.startswith('#'): 1770 continue 1771 1772 if match(r'\s*switch\s*\(.+\)\s*{\s*$', current_line): 1773 # Complexity alarm - another switch statement nested inside the one 1774 # that we're currently testing. We'll need to track the extent of 1775 # that inner switch if the upcoming label tests are still supposed 1776 # to work correctly. Let's not do that; instead, we'll finish 1777 # checking this line, and then leave it like that. Assuming the 1778 # indentation is done consistently (even if incorrectly), this will 1779 # still catch all indentation issues in practice. 1780 encountered_nested_switch = True 1781 1782 current_indentation_match = match(r'(?P<indentation>\s*)(?P<remaining_line>.*)$', current_line); 1783 current_indentation = current_indentation_match.group('indentation') 1784 remaining_line = current_indentation_match.group('remaining_line') 1785 1786 # End the check at the end of the switch statement. 1787 if remaining_line.startswith('}') and current_indentation == switch_indentation: 1788 break 1789 # Case and default branches should not be indented. The regexp also 1790 # catches single-line cases like "default: break;" but does not trigger 1791 # on stuff like "Document::Foo();". 1792 elif match(r'(default|case\s+.*)\s*:([^:].*)?$', remaining_line): 1793 if current_indentation != switch_indentation: 1794 error(filename, line_number + line_offset, 'whitespace/indent', 4, 1795 'A case label should not be indented, but line up with its switch statement.') 1796 # Don't throw an error for multiple badly indented labels, 1797 # one should be enough to figure out the problem. 1798 break 1799 # We ignore goto labels at the very beginning of a line. 1800 elif match(r'\w+\s*:\s*$', remaining_line): 1801 continue 1802 # It's not a goto label, so check if it's indented at least as far as 1803 # the switch statement plus one more level of indentation. 1804 elif not current_indentation.startswith(inner_indentation): 1805 error(filename, line_number + line_offset, 'whitespace/indent', 4, 1806 'Non-label code inside switch statements should be indented.') 1807 # Don't throw an error for multiple badly indented statements, 1808 # one should be enough to figure out the problem. 1809 break 1810 1811 if encountered_nested_switch: 1812 break 1813 1814 1815def check_braces(filename, clean_lines, line_number, error): 1816 """Looks for misplaced braces (e.g. at the end of line). 1817 1818 Args: 1819 filename: The name of the current file. 1820 clean_lines: A CleansedLines instance containing the file. 1821 line_number: The number of the line to check. 1822 error: The function to call with any errors found. 1823 """ 1824 1825 line = clean_lines.elided[line_number] # Get rid of comments and strings. 1826 1827 if match(r'\s*{\s*$', line): 1828 # We allow an open brace to start a line in the case where someone 1829 # is using braces for function definition or in a block to 1830 # explicitly create a new scope, which is commonly used to control 1831 # the lifetime of stack-allocated variables. We don't detect this 1832 # perfectly: we just don't complain if the last non-whitespace 1833 # character on the previous non-blank line is ';', ':', '{', '}', 1834 # ')', or ') const' and doesn't begin with 'if|for|while|switch|else'. 1835 # We also allow '#' for #endif and '=' for array initialization. 1836 previous_line = get_previous_non_blank_line(clean_lines, line_number)[0] 1837 if ((not search(r'[;:}{)=]\s*$|\)\s*const\s*$', previous_line) 1838 or search(r'\b(if|for|foreach|while|switch|else)\b', previous_line)) 1839 and previous_line.find('#') < 0): 1840 error(filename, line_number, 'whitespace/braces', 4, 1841 'This { should be at the end of the previous line') 1842 elif (search(r'\)\s*(const\s*)?{\s*$', line) 1843 and line.count('(') == line.count(')') 1844 and not search(r'\b(if|for|foreach|while|switch)\b', line)): 1845 error(filename, line_number, 'whitespace/braces', 4, 1846 'Place brace on its own line for function definitions.') 1847 1848 if (match(r'\s*}\s*(else\s*({\s*)?)?$', line) and line_number > 1): 1849 # We check if a closed brace has started a line to see if a 1850 # one line control statement was previous. 1851 previous_line = clean_lines.elided[line_number - 2] 1852 if (previous_line.find('{') > 0 1853 and search(r'\b(if|for|foreach|while|else)\b', previous_line)): 1854 error(filename, line_number, 'whitespace/braces', 4, 1855 'One line control clauses should not use braces.') 1856 1857 # An else clause should be on the same line as the preceding closing brace. 1858 if match(r'\s*else\s*', line): 1859 previous_line = get_previous_non_blank_line(clean_lines, line_number)[0] 1860 if match(r'\s*}\s*$', previous_line): 1861 error(filename, line_number, 'whitespace/newline', 4, 1862 'An else should appear on the same line as the preceding }') 1863 1864 # Likewise, an else should never have the else clause on the same line 1865 if search(r'\belse [^\s{]', line) and not search(r'\belse if\b', line): 1866 error(filename, line_number, 'whitespace/newline', 4, 1867 'Else clause should never be on same line as else (use 2 lines)') 1868 1869 # In the same way, a do/while should never be on one line 1870 if match(r'\s*do [^\s{]', line): 1871 error(filename, line_number, 'whitespace/newline', 4, 1872 'do/while clauses should not be on a single line') 1873 1874 # Braces shouldn't be followed by a ; unless they're defining a struct 1875 # or initializing an array. 1876 # We can't tell in general, but we can for some common cases. 1877 previous_line_number = line_number 1878 while True: 1879 (previous_line, previous_line_number) = get_previous_non_blank_line(clean_lines, previous_line_number) 1880 if match(r'\s+{.*}\s*;', line) and not previous_line.count(';'): 1881 line = previous_line + line 1882 else: 1883 break 1884 if (search(r'{.*}\s*;', line) 1885 and line.count('{') == line.count('}') 1886 and not search(r'struct|class|enum|\s*=\s*{', line)): 1887 error(filename, line_number, 'readability/braces', 4, 1888 "You don't need a ; after a }") 1889 1890 1891def check_exit_statement_simplifications(filename, clean_lines, line_number, error): 1892 """Looks for else or else-if statements that should be written as an 1893 if statement when the prior if concludes with a return, break, continue or 1894 goto statement. 1895 1896 Args: 1897 filename: The name of the current file. 1898 clean_lines: A CleansedLines instance containing the file. 1899 line_number: The number of the line to check. 1900 error: The function to call with any errors found. 1901 """ 1902 1903 line = clean_lines.elided[line_number] # Get rid of comments and strings. 1904 1905 else_match = match(r'(?P<else_indentation>\s*)(\}\s*)?else(\s+if\s*\(|(?P<else>\s*(\{\s*)?\Z))', line) 1906 if not else_match: 1907 return 1908 1909 else_indentation = else_match.group('else_indentation') 1910 inner_indentation = else_indentation + ' ' * 4 1911 1912 previous_lines = clean_lines.elided[:line_number] 1913 previous_lines.reverse() 1914 line_offset = 0 1915 encountered_exit_statement = False 1916 1917 for current_line in previous_lines: 1918 line_offset -= 1 1919 1920 # Skip not only empty lines but also those with preprocessor directives 1921 # and goto labels. 1922 if current_line.strip() == '' or current_line.startswith('#') or match(r'\w+\s*:\s*$', current_line): 1923 continue 1924 1925 # Skip lines with closing braces on the original indentation level. 1926 # Even though the styleguide says they should be on the same line as 1927 # the "else if" statement, we also want to check for instances where 1928 # the current code does not comply with the coding style. Thus, ignore 1929 # these lines and proceed to the line before that. 1930 if current_line == else_indentation + '}': 1931 continue 1932 1933 current_indentation_match = match(r'(?P<indentation>\s*)(?P<remaining_line>.*)$', current_line); 1934 current_indentation = current_indentation_match.group('indentation') 1935 remaining_line = current_indentation_match.group('remaining_line') 1936 1937 # As we're going up the lines, the first real statement to encounter 1938 # has to be an exit statement (return, break, continue or goto) - 1939 # otherwise, this check doesn't apply. 1940 if not encountered_exit_statement: 1941 # We only want to find exit statements if they are on exactly 1942 # the same level of indentation as expected from the code inside 1943 # the block. If the indentation doesn't strictly match then we 1944 # might have a nested if or something, which must be ignored. 1945 if current_indentation != inner_indentation: 1946 break 1947 if match(r'(return(\W+.*)|(break|continue)\s*;|goto\s*\w+;)$', remaining_line): 1948 encountered_exit_statement = True 1949 continue 1950 break 1951 1952 # When code execution reaches this point, we've found an exit statement 1953 # as last statement of the previous block. Now we only need to make 1954 # sure that the block belongs to an "if", then we can throw an error. 1955 1956 # Skip lines with opening braces on the original indentation level, 1957 # similar to the closing braces check above. ("if (condition)\n{") 1958 if current_line == else_indentation + '{': 1959 continue 1960 1961 # Skip everything that's further indented than our "else" or "else if". 1962 if current_indentation.startswith(else_indentation) and current_indentation != else_indentation: 1963 continue 1964 1965 # So we've got a line with same (or less) indentation. Is it an "if"? 1966 # If yes: throw an error. If no: don't throw an error. 1967 # Whatever the outcome, this is the end of our loop. 1968 if match(r'if\s*\(', remaining_line): 1969 if else_match.start('else') != -1: 1970 error(filename, line_number + line_offset, 'readability/control_flow', 4, 1971 'An else statement can be removed when the prior "if" ' 1972 'concludes with a return, break, continue or goto statement.') 1973 else: 1974 error(filename, line_number + line_offset, 'readability/control_flow', 4, 1975 'An else if statement should be written as an if statement ' 1976 'when the prior "if" concludes with a return, break, ' 1977 'continue or goto statement.') 1978 break 1979 1980 1981def replaceable_check(operator, macro, line): 1982 """Determine whether a basic CHECK can be replaced with a more specific one. 1983 1984 For example suggest using CHECK_EQ instead of CHECK(a == b) and 1985 similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE. 1986 1987 Args: 1988 operator: The C++ operator used in the CHECK. 1989 macro: The CHECK or EXPECT macro being called. 1990 line: The current source line. 1991 1992 Returns: 1993 True if the CHECK can be replaced with a more specific one. 1994 """ 1995 1996 # This matches decimal and hex integers, strings, and chars (in that order). 1997 match_constant = r'([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')' 1998 1999 # Expression to match two sides of the operator with something that 2000 # looks like a literal, since CHECK(x == iterator) won't compile. 2001 # This means we can't catch all the cases where a more specific 2002 # CHECK is possible, but it's less annoying than dealing with 2003 # extraneous warnings. 2004 match_this = (r'\s*' + macro + r'\((\s*' + 2005 match_constant + r'\s*' + operator + r'[^<>].*|' 2006 r'.*[^<>]' + operator + r'\s*' + match_constant + 2007 r'\s*\))') 2008 2009 # Don't complain about CHECK(x == NULL) or similar because 2010 # CHECK_EQ(x, NULL) won't compile (requires a cast). 2011 # Also, don't complain about more complex boolean expressions 2012 # involving && or || such as CHECK(a == b || c == d). 2013 return match(match_this, line) and not search(r'NULL|&&|\|\|', line) 2014 2015 2016def check_check(filename, clean_lines, line_number, error): 2017 """Checks the use of CHECK and EXPECT macros. 2018 2019 Args: 2020 filename: The name of the current file. 2021 clean_lines: A CleansedLines instance containing the file. 2022 line_number: The number of the line to check. 2023 error: The function to call with any errors found. 2024 """ 2025 2026 # Decide the set of replacement macros that should be suggested 2027 raw_lines = clean_lines.raw_lines 2028 current_macro = '' 2029 for macro in _CHECK_MACROS: 2030 if raw_lines[line_number].find(macro) >= 0: 2031 current_macro = macro 2032 break 2033 if not current_macro: 2034 # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT' 2035 return 2036 2037 line = clean_lines.elided[line_number] # get rid of comments and strings 2038 2039 # Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc. 2040 for operator in ['==', '!=', '>=', '>', '<=', '<']: 2041 if replaceable_check(operator, current_macro, line): 2042 error(filename, line_number, 'readability/check', 2, 2043 'Consider using %s instead of %s(a %s b)' % ( 2044 _CHECK_REPLACEMENT[current_macro][operator], 2045 current_macro, operator)) 2046 break 2047 2048 2049def check_for_comparisons_to_zero(filename, clean_lines, line_number, error): 2050 # Get the line without comments and strings. 2051 line = clean_lines.elided[line_number] 2052 2053 # Include NULL here so that users don't have to convert NULL to 0 first and then get this error. 2054 if search(r'[=!]=\s*(NULL|0|true|false)\W', line) or search(r'\W(NULL|0|true|false)\s*[=!]=', line): 2055 error(filename, line_number, 'readability/comparison_to_zero', 5, 2056 'Tests for true/false, null/non-null, and zero/non-zero should all be done without equality comparisons.') 2057 2058 2059def check_for_null(filename, clean_lines, line_number, error): 2060 # This check doesn't apply to C or Objective-C implementation files. 2061 if filename.endswith('.c') or filename.endswith('.m'): 2062 return 2063 2064 line = clean_lines.elided[line_number] 2065 if search(r'\bNULL\b', line): 2066 error(filename, line_number, 'readability/null', 5, 'Use 0 instead of NULL.') 2067 return 2068 2069 line = clean_lines.raw_lines[line_number] 2070 # See if NULL occurs in any comments in the line. If the search for NULL using the raw line 2071 # matches, then do the check with strings collapsed to avoid giving errors for 2072 # NULLs occurring in strings. 2073 if search(r'\bNULL\b', line) and search(r'\bNULL\b', CleansedLines.collapse_strings(line)): 2074 error(filename, line_number, 'readability/null', 4, 'Use 0 instead of NULL.') 2075 2076def get_line_width(line): 2077 """Determines the width of the line in column positions. 2078 2079 Args: 2080 line: A string, which may be a Unicode string. 2081 2082 Returns: 2083 The width of the line in column positions, accounting for Unicode 2084 combining characters and wide characters. 2085 """ 2086 if isinstance(line, unicode): 2087 width = 0 2088 for c in unicodedata.normalize('NFC', line): 2089 if unicodedata.east_asian_width(c) in ('W', 'F'): 2090 width += 2 2091 elif not unicodedata.combining(c): 2092 width += 1 2093 return width 2094 return len(line) 2095 2096 2097def check_style(filename, clean_lines, line_number, file_extension, error): 2098 """Checks rules from the 'C++ style rules' section of cppguide.html. 2099 2100 Most of these rules are hard to test (naming, comment style), but we 2101 do what we can. In particular we check for 4-space indents, line lengths, 2102 tab usage, spaces inside code, etc. 2103 2104 Args: 2105 filename: The name of the current file. 2106 clean_lines: A CleansedLines instance containing the file. 2107 line_number: The number of the line to check. 2108 file_extension: The extension (without the dot) of the filename. 2109 error: The function to call with any errors found. 2110 """ 2111 2112 raw_lines = clean_lines.raw_lines 2113 line = raw_lines[line_number] 2114 2115 if line.find('\t') != -1: 2116 error(filename, line_number, 'whitespace/tab', 1, 2117 'Tab found; better to use spaces') 2118 2119 # One or three blank spaces at the beginning of the line is weird; it's 2120 # hard to reconcile that with 4-space indents. 2121 # NOTE: here are the conditions rob pike used for his tests. Mine aren't 2122 # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces 2123 # if(RLENGTH > 20) complain = 0; 2124 # if(match($0, " +(error|private|public|protected):")) complain = 0; 2125 # if(match(prev, "&& *$")) complain = 0; 2126 # if(match(prev, "\\|\\| *$")) complain = 0; 2127 # if(match(prev, "[\",=><] *$")) complain = 0; 2128 # if(match($0, " <<")) complain = 0; 2129 # if(match(prev, " +for \\(")) complain = 0; 2130 # if(prevodd && match(prevprev, " +for \\(")) complain = 0; 2131 initial_spaces = 0 2132 cleansed_line = clean_lines.elided[line_number] 2133 while initial_spaces < len(line) and line[initial_spaces] == ' ': 2134 initial_spaces += 1 2135 if line and line[-1].isspace(): 2136 error(filename, line_number, 'whitespace/end_of_line', 4, 2137 'Line ends in whitespace. Consider deleting these extra spaces.') 2138 # There are certain situations we allow one space, notably for labels 2139 elif ((initial_spaces >= 1 and initial_spaces <= 3) 2140 and not match(r'\s*\w+\s*:\s*$', cleansed_line)): 2141 error(filename, line_number, 'whitespace/indent', 3, 2142 'Weird number of spaces at line-start. ' 2143 'Are you using a 4-space indent?') 2144 # Labels should always be indented at least one space. 2145 elif not initial_spaces and line[:2] != '//': 2146 label_match = match(r'(?P<label>[^:]+):\s*$', line) 2147 2148 if label_match: 2149 label = label_match.group('label') 2150 # Only throw errors for stuff that is definitely not a goto label, 2151 # because goto labels can in fact occur at the start of the line. 2152 if label in ['public', 'private', 'protected'] or label.find(' ') != -1: 2153 error(filename, line_number, 'whitespace/labels', 4, 2154 'Labels should always be indented at least one space. ' 2155 'If this is a member-initializer list in a constructor, ' 2156 'the colon should be on the line after the definition header.') 2157 2158 if (cleansed_line.count(';') > 1 2159 # for loops are allowed two ;'s (and may run over two lines). 2160 and cleansed_line.find('for') == -1 2161 and (get_previous_non_blank_line(clean_lines, line_number)[0].find('for') == -1 2162 or get_previous_non_blank_line(clean_lines, line_number)[0].find(';') != -1) 2163 # It's ok to have many commands in a switch case that fits in 1 line 2164 and not ((cleansed_line.find('case ') != -1 2165 or cleansed_line.find('default:') != -1) 2166 and cleansed_line.find('break;') != -1)): 2167 error(filename, line_number, 'whitespace/newline', 4, 2168 'More than one command on the same line') 2169 2170 if cleansed_line.strip().endswith('||') or cleansed_line.strip().endswith('&&'): 2171 error(filename, line_number, 'whitespace/operators', 4, 2172 'Boolean expressions that span multiple lines should have their ' 2173 'operators on the left side of the line instead of the right side.') 2174 2175 # Some more style checks 2176 check_namespace_indentation(filename, clean_lines, line_number, file_extension, error) 2177 check_switch_indentation(filename, clean_lines, line_number, error) 2178 check_braces(filename, clean_lines, line_number, error) 2179 check_exit_statement_simplifications(filename, clean_lines, line_number, error) 2180 check_spacing(filename, clean_lines, line_number, error) 2181 check_check(filename, clean_lines, line_number, error) 2182 check_for_comparisons_to_zero(filename, clean_lines, line_number, error) 2183 check_for_null(filename, clean_lines, line_number, error) 2184 2185 2186_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"') 2187_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$') 2188# Matches the first component of a filename delimited by -s and _s. That is: 2189# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo' 2190# _RE_FIRST_COMPONENT.match('foo.cpp').group(0) == 'foo' 2191# _RE_FIRST_COMPONENT.match('foo-bar_baz.cpp').group(0) == 'foo' 2192# _RE_FIRST_COMPONENT.match('foo_bar-baz.cpp').group(0) == 'foo' 2193_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+') 2194 2195 2196def _drop_common_suffixes(filename): 2197 """Drops common suffixes like _test.cpp or -inl.h from filename. 2198 2199 For example: 2200 >>> _drop_common_suffixes('foo/foo-inl.h') 2201 'foo/foo' 2202 >>> _drop_common_suffixes('foo/bar/foo.cpp') 2203 'foo/bar/foo' 2204 >>> _drop_common_suffixes('foo/foo_internal.h') 2205 'foo/foo' 2206 >>> _drop_common_suffixes('foo/foo_unusualinternal.h') 2207 'foo/foo_unusualinternal' 2208 2209 Args: 2210 filename: The input filename. 2211 2212 Returns: 2213 The filename with the common suffix removed. 2214 """ 2215 for suffix in ('test.cpp', 'regtest.cpp', 'unittest.cpp', 2216 'inl.h', 'impl.h', 'internal.h'): 2217 if (filename.endswith(suffix) and len(filename) > len(suffix) 2218 and filename[-len(suffix) - 1] in ('-', '_')): 2219 return filename[:-len(suffix) - 1] 2220 return os.path.splitext(filename)[0] 2221 2222 2223def _is_test_filename(filename): 2224 """Determines if the given filename has a suffix that identifies it as a test. 2225 2226 Args: 2227 filename: The input filename. 2228 2229 Returns: 2230 True if 'filename' looks like a test, False otherwise. 2231 """ 2232 if (filename.endswith('_test.cpp') 2233 or filename.endswith('_unittest.cpp') 2234 or filename.endswith('_regtest.cpp')): 2235 return True 2236 return False 2237 2238 2239def _classify_include(filename, include, is_system, include_state): 2240 """Figures out what kind of header 'include' is. 2241 2242 Args: 2243 filename: The current file cpp_style is running over. 2244 include: The path to a #included file. 2245 is_system: True if the #include used <> rather than "". 2246 include_state: An _IncludeState instance in which the headers are inserted. 2247 2248 Returns: 2249 One of the _XXX_HEADER constants. 2250 2251 For example: 2252 >>> _classify_include('foo.cpp', 'config.h', False) 2253 _CONFIG_HEADER 2254 >>> _classify_include('foo.cpp', 'foo.h', False) 2255 _PRIMARY_HEADER 2256 >>> _classify_include('foo.cpp', 'bar.h', False) 2257 _OTHER_HEADER 2258 """ 2259 2260 # If it is a system header we know it is classified as _OTHER_HEADER. 2261 if is_system: 2262 return _OTHER_HEADER 2263 2264 # If the include is named config.h then this is WebCore/config.h. 2265 if include == "config.h": 2266 return _CONFIG_HEADER 2267 2268 # There cannot be primary includes in header files themselves. Only an 2269 # include exactly matches the header filename will be is flagged as 2270 # primary, so that it triggers the "don't include yourself" check. 2271 if filename.endswith('.h') and filename != include: 2272 return _OTHER_HEADER; 2273 2274 # If the target file basename starts with the include we're checking 2275 # then we consider it the primary header. 2276 target_base = FileInfo(filename).base_name() 2277 include_base = FileInfo(include).base_name() 2278 2279 # If we haven't encountered a primary header, then be lenient in checking. 2280 if not include_state.visited_primary_section() and target_base.startswith(include_base): 2281 return _PRIMARY_HEADER 2282 # If we already encountered a primary header, perform a strict comparison. 2283 # In case the two filename bases are the same then the above lenient check 2284 # probably was a false positive. 2285 elif include_state.visited_primary_section() and target_base == include_base: 2286 return _PRIMARY_HEADER 2287 2288 return _OTHER_HEADER 2289 2290 2291 2292def check_include_line(filename, clean_lines, line_number, include_state, error): 2293 """Check rules that are applicable to #include lines. 2294 2295 Strings on #include lines are NOT removed from elided line, to make 2296 certain tasks easier. However, to prevent false positives, checks 2297 applicable to #include lines in CheckLanguage must be put here. 2298 2299 Args: 2300 filename: The name of the current file. 2301 clean_lines: A CleansedLines instance containing the file. 2302 line_number: The number of the line to check. 2303 include_state: An _IncludeState instance in which the headers are inserted. 2304 error: The function to call with any errors found. 2305 """ 2306 2307 line = clean_lines.lines[line_number] 2308 2309 matched = _RE_PATTERN_INCLUDE.search(line) 2310 if not matched: 2311 return 2312 2313 include = matched.group(2) 2314 is_system = (matched.group(1) == '<') 2315 2316 # Look for any of the stream classes that are part of standard C++. 2317 if match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include): 2318 # Many unit tests use cout, so we exempt them. 2319 if not _is_test_filename(filename): 2320 error(filename, line_number, 'readability/streams', 3, 2321 'Streams are highly discouraged.') 2322 2323 # Look for specific includes to fix. 2324 if include.startswith('wtf/') and not is_system: 2325 error(filename, line_number, 'build/include', 4, 2326 'wtf includes should be <wtf/file.h> instead of "wtf/file.h".') 2327 2328 duplicate_header = include in include_state 2329 if duplicate_header: 2330 error(filename, line_number, 'build/include', 4, 2331 '"%s" already included at %s:%s' % 2332 (include, filename, include_state[include])) 2333 else: 2334 include_state[include] = line_number 2335 2336 header_type = _classify_include(filename, include, is_system, include_state) 2337 include_state.header_types[line_number] = header_type 2338 2339 # Only proceed if this isn't a duplicate header. 2340 if duplicate_header: 2341 return 2342 2343 # We want to ensure that headers appear in the right order: 2344 # 1) for implementation files: config.h, primary header, blank line, alphabetically sorted 2345 # 2) for header files: alphabetically sorted 2346 # The include_state object keeps track of the last type seen 2347 # and complains if the header types are out of order or missing. 2348 error_message = include_state.check_next_include_order(header_type, filename.endswith('.h')) 2349 2350 # Check to make sure we have a blank line after primary header. 2351 if not error_message and header_type == _PRIMARY_HEADER: 2352 next_line = clean_lines.raw_lines[line_number + 1] 2353 if not is_blank_line(next_line): 2354 error(filename, line_number, 'build/include_order', 4, 2355 'You should add a blank line after implementation file\'s own header.') 2356 2357 # Check to make sure all headers besides config.h and the primary header are 2358 # alphabetically sorted. 2359 if not error_message and header_type == _OTHER_HEADER: 2360 previous_line_number = line_number - 1; 2361 previous_line = clean_lines.lines[previous_line_number] 2362 previous_match = _RE_PATTERN_INCLUDE.search(previous_line) 2363 while (not previous_match and previous_line_number > 0 2364 and not search(r'\A(#if|#ifdef|#ifndef|#else|#elif|#endif)', previous_line)): 2365 previous_line_number -= 1; 2366 previous_line = clean_lines.lines[previous_line_number] 2367 previous_match = _RE_PATTERN_INCLUDE.search(previous_line) 2368 if previous_match: 2369 previous_header_type = include_state.header_types[previous_line_number] 2370 if previous_header_type == _OTHER_HEADER and previous_line.strip() > line.strip(): 2371 error(filename, line_number, 'build/include_order', 4, 2372 'Alphabetical sorting problem.') 2373 2374 if error_message: 2375 if filename.endswith('.h'): 2376 error(filename, line_number, 'build/include_order', 4, 2377 '%s Should be: alphabetically sorted.' % 2378 error_message) 2379 else: 2380 error(filename, line_number, 'build/include_order', 4, 2381 '%s Should be: config.h, primary header, blank line, and then alphabetically sorted.' % 2382 error_message) 2383 2384 2385def check_language(filename, clean_lines, line_number, file_extension, include_state, 2386 error): 2387 """Checks rules from the 'C++ language rules' section of cppguide.html. 2388 2389 Some of these rules are hard to test (function overloading, using 2390 uint32 inappropriately), but we do the best we can. 2391 2392 Args: 2393 filename: The name of the current file. 2394 clean_lines: A CleansedLines instance containing the file. 2395 line_number: The number of the line to check. 2396 file_extension: The extension (without the dot) of the filename. 2397 include_state: An _IncludeState instance in which the headers are inserted. 2398 error: The function to call with any errors found. 2399 """ 2400 # If the line is empty or consists of entirely a comment, no need to 2401 # check it. 2402 line = clean_lines.elided[line_number] 2403 if not line: 2404 return 2405 2406 matched = _RE_PATTERN_INCLUDE.search(line) 2407 if matched: 2408 check_include_line(filename, clean_lines, line_number, include_state, error) 2409 return 2410 2411 # FIXME: figure out if they're using default arguments in fn proto. 2412 2413 # Check to see if they're using an conversion function cast. 2414 # I just try to capture the most common basic types, though there are more. 2415 # Parameterless conversion functions, such as bool(), are allowed as they are 2416 # probably a member operator declaration or default constructor. 2417 matched = search( 2418 r'\b(int|float|double|bool|char|int32|uint32|int64|uint64)\([^)]', line) 2419 if matched: 2420 # gMock methods are defined using some variant of MOCK_METHODx(name, type) 2421 # where type may be float(), int(string), etc. Without context they are 2422 # virtually indistinguishable from int(x) casts. 2423 if not match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line): 2424 error(filename, line_number, 'readability/casting', 4, 2425 'Using deprecated casting style. ' 2426 'Use static_cast<%s>(...) instead' % 2427 matched.group(1)) 2428 2429 check_c_style_cast(filename, line_number, line, clean_lines.raw_lines[line_number], 2430 'static_cast', 2431 r'\((int|float|double|bool|char|u?int(16|32|64))\)', 2432 error) 2433 # This doesn't catch all cases. Consider (const char * const)"hello". 2434 check_c_style_cast(filename, line_number, line, clean_lines.raw_lines[line_number], 2435 'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error) 2436 2437 # In addition, we look for people taking the address of a cast. This 2438 # is dangerous -- casts can assign to temporaries, so the pointer doesn't 2439 # point where you think. 2440 if search( 2441 r'(&\([^)]+\)[\w(])|(&(static|dynamic|reinterpret)_cast\b)', line): 2442 error(filename, line_number, 'runtime/casting', 4, 2443 ('Are you taking an address of a cast? ' 2444 'This is dangerous: could be a temp var. ' 2445 'Take the address before doing the cast, rather than after')) 2446 2447 # Check for people declaring static/global STL strings at the top level. 2448 # This is dangerous because the C++ language does not guarantee that 2449 # globals with constructors are initialized before the first access. 2450 matched = match( 2451 r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)', 2452 line) 2453 # Make sure it's not a function. 2454 # Function template specialization looks like: "string foo<Type>(...". 2455 # Class template definitions look like: "string Foo<Type>::Method(...". 2456 if matched and not match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)', 2457 matched.group(3)): 2458 error(filename, line_number, 'runtime/string', 4, 2459 'For a static/global string constant, use a C style string instead: ' 2460 '"%schar %s[]".' % 2461 (matched.group(1), matched.group(2))) 2462 2463 # Check that we're not using RTTI outside of testing code. 2464 if search(r'\bdynamic_cast<', line) and not _is_test_filename(filename): 2465 error(filename, line_number, 'runtime/rtti', 5, 2466 'Do not use dynamic_cast<>. If you need to cast within a class ' 2467 "hierarchy, use static_cast<> to upcast. Google doesn't support " 2468 'RTTI.') 2469 2470 if search(r'\b([A-Za-z0-9_]*_)\(\1\)', line): 2471 error(filename, line_number, 'runtime/init', 4, 2472 'You seem to be initializing a member variable with itself.') 2473 2474 if file_extension == 'h': 2475 # FIXME: check that 1-arg constructors are explicit. 2476 # How to tell it's a constructor? 2477 # (handled in check_for_non_standard_constructs for now) 2478 pass 2479 2480 # Check if people are using the verboten C basic types. The only exception 2481 # we regularly allow is "unsigned short port" for port. 2482 if search(r'\bshort port\b', line): 2483 if not search(r'\bunsigned short port\b', line): 2484 error(filename, line_number, 'runtime/int', 4, 2485 'Use "unsigned short" for ports, not "short"') 2486 2487 # When snprintf is used, the second argument shouldn't be a literal. 2488 matched = search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line) 2489 if matched: 2490 error(filename, line_number, 'runtime/printf', 3, 2491 'If you can, use sizeof(%s) instead of %s as the 2nd arg ' 2492 'to snprintf.' % (matched.group(1), matched.group(2))) 2493 2494 # Check if some verboten C functions are being used. 2495 if search(r'\bsprintf\b', line): 2496 error(filename, line_number, 'runtime/printf', 5, 2497 'Never use sprintf. Use snprintf instead.') 2498 matched = search(r'\b(strcpy|strcat)\b', line) 2499 if matched: 2500 error(filename, line_number, 'runtime/printf', 4, 2501 'Almost always, snprintf is better than %s' % matched.group(1)) 2502 2503 if search(r'\bsscanf\b', line): 2504 error(filename, line_number, 'runtime/printf', 1, 2505 'sscanf can be ok, but is slow and can overflow buffers.') 2506 2507 # Check for suspicious usage of "if" like 2508 # } if (a == b) { 2509 if search(r'\}\s*if\s*\(', line): 2510 error(filename, line_number, 'readability/braces', 4, 2511 'Did you mean "else if"? If not, start a new line for "if".') 2512 2513 # Check for potential format string bugs like printf(foo). 2514 # We constrain the pattern not to pick things like DocidForPrintf(foo). 2515 # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str()) 2516 matched = re.search(r'\b((?:string)?printf)\s*\(([\w.\->()]+)\)', line, re.I) 2517 if matched: 2518 error(filename, line_number, 'runtime/printf', 4, 2519 'Potential format string bug. Do %s("%%s", %s) instead.' 2520 % (matched.group(1), matched.group(2))) 2521 2522 # Check for potential memset bugs like memset(buf, sizeof(buf), 0). 2523 matched = search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line) 2524 if matched and not match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", matched.group(2)): 2525 error(filename, line_number, 'runtime/memset', 4, 2526 'Did you mean "memset(%s, 0, %s)"?' 2527 % (matched.group(1), matched.group(2))) 2528 2529 # Detect variable-length arrays. 2530 matched = match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line) 2531 if (matched and matched.group(2) != 'return' and matched.group(2) != 'delete' and 2532 matched.group(3).find(']') == -1): 2533 # Split the size using space and arithmetic operators as delimiters. 2534 # If any of the resulting tokens are not compile time constants then 2535 # report the error. 2536 tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', matched.group(3)) 2537 is_const = True 2538 skip_next = False 2539 for tok in tokens: 2540 if skip_next: 2541 skip_next = False 2542 continue 2543 2544 if search(r'sizeof\(.+\)', tok): 2545 continue 2546 if search(r'arraysize\(\w+\)', tok): 2547 continue 2548 2549 tok = tok.lstrip('(') 2550 tok = tok.rstrip(')') 2551 if not tok: 2552 continue 2553 if match(r'\d+', tok): 2554 continue 2555 if match(r'0[xX][0-9a-fA-F]+', tok): 2556 continue 2557 if match(r'k[A-Z0-9]\w*', tok): 2558 continue 2559 if match(r'(.+::)?k[A-Z0-9]\w*', tok): 2560 continue 2561 if match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): 2562 continue 2563 # A catch all for tricky sizeof cases, including 'sizeof expression', 2564 # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)' 2565 # requires skipping the next token becasue we split on ' ' and '*'. 2566 if tok.startswith('sizeof'): 2567 skip_next = True 2568 continue 2569 is_const = False 2570 break 2571 if not is_const: 2572 error(filename, line_number, 'runtime/arrays', 1, 2573 'Do not use variable-length arrays. Use an appropriately named ' 2574 "('k' followed by CamelCase) compile-time constant for the size.") 2575 2576 # Check for use of unnamed namespaces in header files. Registration 2577 # macros are typically OK, so we allow use of "namespace {" on lines 2578 # that end with backslashes. 2579 if (file_extension == 'h' 2580 and search(r'\bnamespace\s*{', line) 2581 and line[-1] != '\\'): 2582 error(filename, line_number, 'build/namespaces', 4, 2583 'Do not use unnamed namespaces in header files. See ' 2584 'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces' 2585 ' for more information.') 2586 2587 2588def check_c_style_cast(filename, line_number, line, raw_line, cast_type, pattern, 2589 error): 2590 """Checks for a C-style cast by looking for the pattern. 2591 2592 This also handles sizeof(type) warnings, due to similarity of content. 2593 2594 Args: 2595 filename: The name of the current file. 2596 line_number: The number of the line to check. 2597 line: The line of code to check. 2598 raw_line: The raw line of code to check, with comments. 2599 cast_type: The string for the C++ cast to recommend. This is either 2600 reinterpret_cast or static_cast, depending. 2601 pattern: The regular expression used to find C-style casts. 2602 error: The function to call with any errors found. 2603 """ 2604 matched = search(pattern, line) 2605 if not matched: 2606 return 2607 2608 # e.g., sizeof(int) 2609 sizeof_match = match(r'.*sizeof\s*$', line[0:matched.start(1) - 1]) 2610 if sizeof_match: 2611 error(filename, line_number, 'runtime/sizeof', 1, 2612 'Using sizeof(type). Use sizeof(varname) instead if possible') 2613 return 2614 2615 remainder = line[matched.end(0):] 2616 2617 # The close paren is for function pointers as arguments to a function. 2618 # eg, void foo(void (*bar)(int)); 2619 # The semicolon check is a more basic function check; also possibly a 2620 # function pointer typedef. 2621 # eg, void foo(int); or void foo(int) const; 2622 # The equals check is for function pointer assignment. 2623 # eg, void *(*foo)(int) = ... 2624 # 2625 # Right now, this will only catch cases where there's a single argument, and 2626 # it's unnamed. It should probably be expanded to check for multiple 2627 # arguments with some unnamed. 2628 function_match = match(r'\s*(\)|=|(const)?\s*(;|\{|throw\(\)))', remainder) 2629 if function_match: 2630 if (not function_match.group(3) 2631 or function_match.group(3) == ';' 2632 or raw_line.find('/*') < 0): 2633 error(filename, line_number, 'readability/function', 3, 2634 'All parameters should be named in a function') 2635 return 2636 2637 # At this point, all that should be left is actual casts. 2638 error(filename, line_number, 'readability/casting', 4, 2639 'Using C-style cast. Use %s<%s>(...) instead' % 2640 (cast_type, matched.group(1))) 2641 2642 2643_HEADERS_CONTAINING_TEMPLATES = ( 2644 ('<deque>', ('deque',)), 2645 ('<functional>', ('unary_function', 'binary_function', 2646 'plus', 'minus', 'multiplies', 'divides', 'modulus', 2647 'negate', 2648 'equal_to', 'not_equal_to', 'greater', 'less', 2649 'greater_equal', 'less_equal', 2650 'logical_and', 'logical_or', 'logical_not', 2651 'unary_negate', 'not1', 'binary_negate', 'not2', 2652 'bind1st', 'bind2nd', 2653 'pointer_to_unary_function', 2654 'pointer_to_binary_function', 2655 'ptr_fun', 2656 'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t', 2657 'mem_fun_ref_t', 2658 'const_mem_fun_t', 'const_mem_fun1_t', 2659 'const_mem_fun_ref_t', 'const_mem_fun1_ref_t', 2660 'mem_fun_ref', 2661 )), 2662 ('<limits>', ('numeric_limits',)), 2663 ('<list>', ('list',)), 2664 ('<map>', ('map', 'multimap',)), 2665 ('<memory>', ('allocator',)), 2666 ('<queue>', ('queue', 'priority_queue',)), 2667 ('<set>', ('set', 'multiset',)), 2668 ('<stack>', ('stack',)), 2669 ('<string>', ('char_traits', 'basic_string',)), 2670 ('<utility>', ('pair',)), 2671 ('<vector>', ('vector',)), 2672 2673 # gcc extensions. 2674 # Note: std::hash is their hash, ::hash is our hash 2675 ('<hash_map>', ('hash_map', 'hash_multimap',)), 2676 ('<hash_set>', ('hash_set', 'hash_multiset',)), 2677 ('<slist>', ('slist',)), 2678 ) 2679 2680_HEADERS_ACCEPTED_BUT_NOT_PROMOTED = { 2681 # We can trust with reasonable confidence that map gives us pair<>, too. 2682 'pair<>': ('map', 'multimap', 'hash_map', 'hash_multimap') 2683} 2684 2685_RE_PATTERN_STRING = re.compile(r'\bstring\b') 2686 2687_re_pattern_algorithm_header = [] 2688for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap', 2689 'transform'): 2690 # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or 2691 # type::max(). 2692 _re_pattern_algorithm_header.append( 2693 (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'), 2694 _template, 2695 '<algorithm>')) 2696 2697_re_pattern_templates = [] 2698for _header, _templates in _HEADERS_CONTAINING_TEMPLATES: 2699 for _template in _templates: 2700 _re_pattern_templates.append( 2701 (re.compile(r'(\<|\b)' + _template + r'\s*\<'), 2702 _template + '<>', 2703 _header)) 2704 2705 2706def files_belong_to_same_module(filename_cpp, filename_h): 2707 """Check if these two filenames belong to the same module. 2708 2709 The concept of a 'module' here is a as follows: 2710 foo.h, foo-inl.h, foo.cpp, foo_test.cpp and foo_unittest.cpp belong to the 2711 same 'module' if they are in the same directory. 2712 some/path/public/xyzzy and some/path/internal/xyzzy are also considered 2713 to belong to the same module here. 2714 2715 If the filename_cpp contains a longer path than the filename_h, for example, 2716 '/absolute/path/to/base/sysinfo.cpp', and this file would include 2717 'base/sysinfo.h', this function also produces the prefix needed to open the 2718 header. This is used by the caller of this function to more robustly open the 2719 header file. We don't have access to the real include paths in this context, 2720 so we need this guesswork here. 2721 2722 Known bugs: tools/base/bar.cpp and base/bar.h belong to the same module 2723 according to this implementation. Because of this, this function gives 2724 some false positives. This should be sufficiently rare in practice. 2725 2726 Args: 2727 filename_cpp: is the path for the .cpp file 2728 filename_h: is the path for the header path 2729 2730 Returns: 2731 Tuple with a bool and a string: 2732 bool: True if filename_cpp and filename_h belong to the same module. 2733 string: the additional prefix needed to open the header file. 2734 """ 2735 2736 if not filename_cpp.endswith('.cpp'): 2737 return (False, '') 2738 filename_cpp = filename_cpp[:-len('.cpp')] 2739 if filename_cpp.endswith('_unittest'): 2740 filename_cpp = filename_cpp[:-len('_unittest')] 2741 elif filename_cpp.endswith('_test'): 2742 filename_cpp = filename_cpp[:-len('_test')] 2743 filename_cpp = filename_cpp.replace('/public/', '/') 2744 filename_cpp = filename_cpp.replace('/internal/', '/') 2745 2746 if not filename_h.endswith('.h'): 2747 return (False, '') 2748 filename_h = filename_h[:-len('.h')] 2749 if filename_h.endswith('-inl'): 2750 filename_h = filename_h[:-len('-inl')] 2751 filename_h = filename_h.replace('/public/', '/') 2752 filename_h = filename_h.replace('/internal/', '/') 2753 2754 files_belong_to_same_module = filename_cpp.endswith(filename_h) 2755 common_path = '' 2756 if files_belong_to_same_module: 2757 common_path = filename_cpp[:-len(filename_h)] 2758 return files_belong_to_same_module, common_path 2759 2760 2761def update_include_state(filename, include_state, io=codecs): 2762 """Fill up the include_state with new includes found from the file. 2763 2764 Args: 2765 filename: the name of the header to read. 2766 include_state: an _IncludeState instance in which the headers are inserted. 2767 io: The io factory to use to read the file. Provided for testability. 2768 2769 Returns: 2770 True if a header was succesfully added. False otherwise. 2771 """ 2772 header_file = None 2773 try: 2774 header_file = io.open(filename, 'r', 'utf8', 'replace') 2775 except IOError: 2776 return False 2777 line_number = 0 2778 for line in header_file: 2779 line_number += 1 2780 clean_line = cleanse_comments(line) 2781 matched = _RE_PATTERN_INCLUDE.search(clean_line) 2782 if matched: 2783 include = matched.group(2) 2784 # The value formatting is cute, but not really used right now. 2785 # What matters here is that the key is in include_state. 2786 include_state.setdefault(include, '%s:%d' % (filename, line_number)) 2787 return True 2788 2789 2790def check_for_include_what_you_use(filename, clean_lines, include_state, error, 2791 io=codecs): 2792 """Reports for missing stl includes. 2793 2794 This function will output warnings to make sure you are including the headers 2795 necessary for the stl containers and functions that you use. We only give one 2796 reason to include a header. For example, if you use both equal_to<> and 2797 less<> in a .h file, only one (the latter in the file) of these will be 2798 reported as a reason to include the <functional>. 2799 2800 Args: 2801 filename: The name of the current file. 2802 clean_lines: A CleansedLines instance containing the file. 2803 include_state: An _IncludeState instance. 2804 error: The function to call with any errors found. 2805 io: The IO factory to use to read the header file. Provided for unittest 2806 injection. 2807 """ 2808 required = {} # A map of header name to line_number and the template entity. 2809 # Example of required: { '<functional>': (1219, 'less<>') } 2810 2811 for line_number in xrange(clean_lines.num_lines()): 2812 line = clean_lines.elided[line_number] 2813 if not line or line[0] == '#': 2814 continue 2815 2816 # String is special -- it is a non-templatized type in STL. 2817 if _RE_PATTERN_STRING.search(line): 2818 required['<string>'] = (line_number, 'string') 2819 2820 for pattern, template, header in _re_pattern_algorithm_header: 2821 if pattern.search(line): 2822 required[header] = (line_number, template) 2823 2824 # The following function is just a speed up, no semantics are changed. 2825 if not '<' in line: # Reduces the cpu time usage by skipping lines. 2826 continue 2827 2828 for pattern, template, header in _re_pattern_templates: 2829 if pattern.search(line): 2830 required[header] = (line_number, template) 2831 2832 # The policy is that if you #include something in foo.h you don't need to 2833 # include it again in foo.cpp. Here, we will look at possible includes. 2834 # Let's copy the include_state so it is only messed up within this function. 2835 include_state = include_state.copy() 2836 2837 # Did we find the header for this file (if any) and succesfully load it? 2838 header_found = False 2839 2840 # Use the absolute path so that matching works properly. 2841 abs_filename = os.path.abspath(filename) 2842 2843 # For Emacs's flymake. 2844 # If cpp_style is invoked from Emacs's flymake, a temporary file is generated 2845 # by flymake and that file name might end with '_flymake.cpp'. In that case, 2846 # restore original file name here so that the corresponding header file can be 2847 # found. 2848 # e.g. If the file name is 'foo_flymake.cpp', we should search for 'foo.h' 2849 # instead of 'foo_flymake.h' 2850 emacs_flymake_suffix = '_flymake.cpp' 2851 if abs_filename.endswith(emacs_flymake_suffix): 2852 abs_filename = abs_filename[:-len(emacs_flymake_suffix)] + '.cpp' 2853 2854 # include_state is modified during iteration, so we iterate over a copy of 2855 # the keys. 2856 for header in include_state.keys(): #NOLINT 2857 (same_module, common_path) = files_belong_to_same_module(abs_filename, header) 2858 fullpath = common_path + header 2859 if same_module and update_include_state(fullpath, include_state, io): 2860 header_found = True 2861 2862 # If we can't find the header file for a .cpp, assume it's because we don't 2863 # know where to look. In that case we'll give up as we're not sure they 2864 # didn't include it in the .h file. 2865 # FIXME: Do a better job of finding .h files so we are confident that 2866 # not having the .h file means there isn't one. 2867 if filename.endswith('.cpp') and not header_found: 2868 return 2869 2870 # All the lines have been processed, report the errors found. 2871 for required_header_unstripped in required: 2872 template = required[required_header_unstripped][1] 2873 if template in _HEADERS_ACCEPTED_BUT_NOT_PROMOTED: 2874 headers = _HEADERS_ACCEPTED_BUT_NOT_PROMOTED[template] 2875 if [True for header in headers if header in include_state]: 2876 continue 2877 if required_header_unstripped.strip('<>"') not in include_state: 2878 error(filename, required[required_header_unstripped][0], 2879 'build/include_what_you_use', 4, 2880 'Add #include ' + required_header_unstripped + ' for ' + template) 2881 2882 2883def process_line(filename, file_extension, 2884 clean_lines, line, include_state, function_state, 2885 class_state, error): 2886 """Processes a single line in the file. 2887 2888 Args: 2889 filename: Filename of the file that is being processed. 2890 file_extension: The extension (dot not included) of the file. 2891 clean_lines: An array of strings, each representing a line of the file, 2892 with comments stripped. 2893 line: Number of line being processed. 2894 include_state: An _IncludeState instance in which the headers are inserted. 2895 function_state: A _FunctionState instance which counts function lines, etc. 2896 class_state: A _ClassState instance which maintains information about 2897 the current stack of nested class declarations being parsed. 2898 error: A callable to which errors are reported, which takes 4 arguments: 2899 filename, line number, error level, and message 2900 2901 """ 2902 raw_lines = clean_lines.raw_lines 2903 check_for_function_lengths(filename, clean_lines, line, function_state, error) 2904 if search(r'\bNOLINT\b', raw_lines[line]): # ignore nolint lines 2905 return 2906 check_for_multiline_comments_and_strings(filename, clean_lines, line, error) 2907 check_style(filename, clean_lines, line, file_extension, error) 2908 check_language(filename, clean_lines, line, file_extension, include_state, 2909 error) 2910 check_for_non_standard_constructs(filename, clean_lines, line, 2911 class_state, error) 2912 check_posix_threading(filename, clean_lines, line, error) 2913 check_invalid_increment(filename, clean_lines, line, error) 2914 2915 2916def process_file_data(filename, file_extension, lines, error): 2917 """Performs lint checks and reports any errors to the given error function. 2918 2919 Args: 2920 filename: Filename of the file that is being processed. 2921 file_extension: The extension (dot not included) of the file. 2922 lines: An array of strings, each representing a line of the file, with the 2923 last element being empty if the file is termined with a newline. 2924 error: A callable to which errors are reported, which takes 4 arguments: 2925 """ 2926 lines = (['// marker so line numbers and indices both start at 1'] + lines + 2927 ['// marker so line numbers end in a known way']) 2928 2929 include_state = _IncludeState() 2930 function_state = _FunctionState() 2931 class_state = _ClassState() 2932 2933 check_for_copyright(filename, lines, error) 2934 2935 if file_extension == 'h': 2936 check_for_header_guard(filename, lines, error) 2937 2938 remove_multi_line_comments(filename, lines, error) 2939 clean_lines = CleansedLines(lines) 2940 for line in xrange(clean_lines.num_lines()): 2941 process_line(filename, file_extension, clean_lines, line, 2942 include_state, function_state, class_state, error) 2943 class_state.check_finished(filename, error) 2944 2945 check_for_include_what_you_use(filename, clean_lines, include_state, error) 2946 2947 # We check here rather than inside process_line so that we see raw 2948 # lines rather than "cleaned" lines. 2949 check_for_unicode_replacement_characters(filename, lines, error) 2950 2951 check_for_new_line_at_eof(filename, lines, error) 2952 2953 2954def process_file(filename, error=error): 2955 """Performs cpp_style on a single file. 2956 2957 Args: 2958 filename: The name of the file to parse. 2959 error: The function to call with any errors found. 2960 """ 2961 try: 2962 # Support the UNIX convention of using "-" for stdin. Note that 2963 # we are not opening the file with universal newline support 2964 # (which codecs doesn't support anyway), so the resulting lines do 2965 # contain trailing '\r' characters if we are reading a file that 2966 # has CRLF endings. 2967 # If after the split a trailing '\r' is present, it is removed 2968 # below. If it is not expected to be present (i.e. os.linesep != 2969 # '\r\n' as in Windows), a warning is issued below if this file 2970 # is processed. 2971 2972 if filename == '-': 2973 lines = codecs.StreamReaderWriter(sys.stdin, 2974 codecs.getreader('utf8'), 2975 codecs.getwriter('utf8'), 2976 'replace').read().split('\n') 2977 else: 2978 lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n') 2979 2980 carriage_return_found = False 2981 # Remove trailing '\r'. 2982 for line_number in range(len(lines)): 2983 if lines[line_number].endswith('\r'): 2984 lines[line_number] = lines[line_number].rstrip('\r') 2985 carriage_return_found = True 2986 2987 except IOError: 2988 sys.stderr.write( 2989 "Skipping input '%s': Can't open for reading\n" % filename) 2990 return 2991 2992 # Note, if no dot is found, this will give the entire filename as the ext. 2993 file_extension = filename[filename.rfind('.') + 1:] 2994 2995 # When reading from stdin, the extension is unknown, so no cpp_style tests 2996 # should rely on the extension. 2997 if (filename != '-' and file_extension != 'h' and file_extension != 'cpp' 2998 and file_extension != 'c'): 2999 sys.stderr.write('Ignoring %s; not a .cpp, .c or .h file\n' % filename) 3000 else: 3001 process_file_data(filename, file_extension, lines, error) 3002 if carriage_return_found and os.linesep != '\r\n': 3003 # Use 0 for line_number since outputing only one error for potentially 3004 # several lines. 3005 error(filename, 0, 'whitespace/newline', 1, 3006 'One or more unexpected \\r (^M) found;' 3007 'better to use only a \\n') 3008 3009 sys.stderr.write('Done processing %s\n' % filename) 3010 3011 3012def print_usage(message): 3013 """Prints a brief usage string and exits, optionally with an error message. 3014 3015 Args: 3016 message: The optional error message. 3017 """ 3018 sys.stderr.write(_USAGE) 3019 if message: 3020 sys.exit('\nFATAL ERROR: ' + message) 3021 else: 3022 sys.exit(1) 3023 3024 3025def print_categories(): 3026 """Prints a list of all the error-categories used by error messages. 3027 3028 These are the categories used to filter messages via --filter. 3029 """ 3030 sys.stderr.write(_ERROR_CATEGORIES) 3031 sys.exit(0) 3032 3033 3034def parse_arguments(args, additional_flags=[]): 3035 """Parses the command line arguments. 3036 3037 This may set the output format and verbosity level as side-effects. 3038 3039 Args: 3040 args: The command line arguments: 3041 additional_flags: A list of strings which specifies flags we allow. 3042 3043 Returns: 3044 A tuple of (filenames, flags) 3045 3046 filenames: The list of filenames to lint. 3047 flags: The dict of the flag names and the flag values. 3048 """ 3049 flags = ['help', 'output=', 'verbose=', 'filter='] + additional_flags 3050 additional_flag_values = {} 3051 try: 3052 (opts, filenames) = getopt.getopt(args, '', flags) 3053 except getopt.GetoptError: 3054 print_usage('Invalid arguments.') 3055 3056 verbosity = _verbose_level() 3057 output_format = _output_format() 3058 filters = '' 3059 3060 for (opt, val) in opts: 3061 if opt == '--help': 3062 print_usage(None) 3063 elif opt == '--output': 3064 if not val in ('emacs', 'vs7'): 3065 print_usage('The only allowed output formats are emacs and vs7.') 3066 output_format = val 3067 elif opt == '--verbose': 3068 verbosity = int(val) 3069 elif opt == '--filter': 3070 filters = val 3071 if not filters: 3072 print_categories() 3073 else: 3074 additional_flag_values[opt] = val 3075 3076 _set_output_format(output_format) 3077 _set_verbose_level(verbosity) 3078 _set_filters(filters) 3079 3080 return (filenames, additional_flag_values) 3081 3082 3083def use_webkit_styles(): 3084 """Disables some features which are not suitable for WebKit.""" 3085 # FIXME: For filters we will never want to have, remove them. 3086 # For filters we want to have similar functionalities, 3087 # modify the implementation and enable them. 3088 global _DEFAULT_FILTERS 3089 _DEFAULT_FILTERS = [ 3090 '-whitespace/comments', 3091 '-whitespace/blank_line', 3092 '-runtime/explicit', # explicit 3093 '-runtime/virtual', # virtual dtor 3094 '-runtime/printf', 3095 '-runtime/threadsafe_fn', 3096 '-runtime/rtti', 3097 '-build/include_what_you_use', # <string> for std::string 3098 '-legal/copyright', 3099 '-readability/multiline_comment', 3100 '-readability/braces', # int foo() {}; 3101 '-readability/fn_size', 3102 '-build/storage_class', # const static 3103 '-build/endif_comment', 3104 '-whitespace/labels', 3105 '-runtime/arrays', # variable length array 3106 '-build/header_guard', 3107 '-readability/casting', 3108 '-readability/function', 3109 '-runtime/casting', 3110 '-runtime/sizeof', 3111 ] 3112 3113 3114def main(): 3115 sys.stderr.write( 3116 '''********************* WARNING WARNING WARNING ********************* 3117 3118This tool is in the process of development and may give inaccurate 3119results at present. Please file bugs (and/or patches) for things 3120that you notice that it flags incorrectly. 3121 3122********************* WARNING WARNING WARNING ********************* 3123 3124''') 3125 3126 use_webkit_styles() 3127 3128 (filenames, flags) = parse_arguments(sys.argv[1:]) 3129 if not filenames: 3130 print_usage('No files were specified.') 3131 3132 # Change stderr to write with replacement characters so we don't die 3133 # if we try to print something containing non-ASCII characters. 3134 sys.stderr = codecs.StreamReaderWriter(sys.stderr, 3135 codecs.getreader('utf8'), 3136 codecs.getwriter('utf8'), 3137 'replace') 3138 3139 _cpp_style_state.reset_error_count() 3140 for filename in filenames: 3141 process_file(filename) 3142 sys.stderr.write('Total errors found: %d\n' % _cpp_style_state.error_count) 3143 sys.exit(_cpp_style_state.error_count > 0) 3144 3145 3146if __name__ == '__main__': 3147 main() 3148