• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# pylint: skip-file
3#
4# Copyright (c) 2009 Google Inc. All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions are
8# met:
9#
10#    * Redistributions of source code must retain the above copyright
11# notice, this list of conditions and the following disclaimer.
12#    * Redistributions in binary form must reproduce the above
13# copyright notice, this list of conditions and the following disclaimer
14# in the documentation and/or other materials provided with the
15# distribution.
16#    * Neither the name of Google Inc. nor the names of its
17# contributors may be used to endorse or promote products derived from
18# this software without specific prior written permission.
19#
20# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32"""Does google-lint on c++ files.
33
34The goal of this script is to identify places in the code that *may*
35be in non-compliance with google style.  It does not attempt to fix
36up these problems -- the point is to educate.  It does also not
37attempt to find all problems, or to ensure that everything it does
38find is legitimately a problem.
39
40In particular, we can get very confused by /* and // inside strings!
41We do a small hack, which is to ignore //'s with "'s after them on the
42same line, but it is far from perfect (in either direction).
43"""
44
45import codecs
46import collections
47import copy
48import getopt
49import glob
50import itertools
51import math  # for log
52import os
53import re
54import string
55import sys
56import sysconfig
57import unicodedata
58import xml.etree.ElementTree
59
60# if empty, use defaults
61_valid_extensions = set([])
62
63__VERSION__ = '2.0.0'
64
65_USAGE = """
66Syntax: cpplint.py [--verbose=#] [--output=emacs|eclipse|vs7|junit|sed|gsed]
67                   [--filter=-x,+y,...]
68                   [--counting=total|toplevel|detailed] [--root=subdir]
69                   [--repository=path]
70                   [--linelength=digits] [--headers=x,y,...]
71                   [--recursive]
72                   [--exclude=path]
73                   [--extensions=hpp,cpp,...]
74                   [--includeorder=default|standardcfirst]
75                   [--config=filename]
76                   [--quiet]
77                   [--version]
78        <file> [file] ...
79
80  Style checker for C/C++ source files.
81  This is a fork of the Google style checker with minor extensions.
82
83  The style guidelines this tries to follow are those in
84    https://google.github.io/styleguide/cppguide.html
85
86  Every problem is given a confidence score from 1-5, with 5 meaning we are
87  certain of the problem, and 1 meaning it could be a legitimate construct.
88  This will miss some errors, and is not a substitute for a code review.
89
90  To suppress false-positive errors of certain categories, add a
91  'NOLINT(category[, category...])' comment to the line.  NOLINT or NOLINT(*)
92  suppresses errors of all categories on that line. To suppress categories
93  on the next line use NOLINTNEXTLINE instead of NOLINT. To suppress errors in
94  a block of code 'NOLINTBEGIN(category[, category...])' comment to a line at
95  the start of the block and to end the block add a comment with 'NOLINTEND'.
96  NOLINT blocks are inclusive so any statements on the same line as a BEGIN
97  or END will have the error suppression applied.
98
99  The files passed in will be linted; at least one file must be provided.
100  Default linted extensions are %s.
101  Other file types will be ignored.
102  Change the extensions with the --extensions flag.
103
104  Flags:
105
106    output=emacs|eclipse|vs7|junit|sed|gsed
107      By default, the output is formatted to ease emacs parsing.  Visual Studio
108      compatible output (vs7) may also be used.  Further support exists for
109      eclipse (eclipse), and JUnit (junit). XML parsers such as those used
110      in Jenkins and Bamboo may also be used.
111      The sed format outputs sed commands that should fix some of the errors.
112      Note that this requires gnu sed. If that is installed as gsed on your
113      system (common e.g. on macOS with homebrew) you can use the gsed output
114      format. Sed commands are written to stdout, not stderr, so you should be
115      able to pipe output straight to a shell to run the fixes.
116
117    verbose=#
118      Specify a number 0-5 to restrict errors to certain verbosity levels.
119      Errors with lower verbosity levels have lower confidence and are more
120      likely to be false positives.
121
122    quiet
123      Don't print anything if no errors are found.
124
125    filter=-x,+y,...
126      Specify a comma-separated list of category-filters to apply: only
127      error messages whose category names pass the filters will be printed.
128      (Category names are printed with the message and look like
129      "[whitespace/indent]".)  Filters are evaluated left to right.
130      "-FOO" means "do not print categories that start with FOO".
131      "+FOO" means "do print categories that start with FOO".
132
133      Examples: --filter=-whitespace,+whitespace/braces
134                --filter=-whitespace,-runtime/printf,+runtime/printf_format
135                --filter=-,+build/include_what_you_use
136
137      To see a list of all the categories used in cpplint, pass no arg:
138         --filter=
139
140      Filters can directly be limited to files and also line numbers. The
141      syntax is category:file:line , where line is optional. The filter limitation
142      works for both + and - and can be combined with ordinary filters:
143
144      Examples: --filter=-whitespace:foo.h,+whitespace/braces:foo.h
145                --filter=-whitespace,-runtime/printf:foo.h:14,+runtime/printf_format:foo.h
146                --filter=-,+build/include_what_you_use:foo.h:321
147
148    counting=total|toplevel|detailed
149      The total number of errors found is always printed. If
150      'toplevel' is provided, then the count of errors in each of
151      the top-level categories like 'build' and 'whitespace' will
152      also be printed. If 'detailed' is provided, then a count
153      is provided for each category like 'legal/copyright'.
154
155    repository=path
156      The top level directory of the repository, used to derive the header
157      guard CPP variable. By default, this is determined by searching for a
158      path that contains .git, .hg, or .svn. When this flag is specified, the
159      given path is used instead. This option allows the header guard CPP
160      variable to remain consistent even if members of a team have different
161      repository root directories (such as when checking out a subdirectory
162      with SVN). In addition, users of non-mainstream version control systems
163      can use this flag to ensure readable header guard CPP variables.
164
165      Examples:
166        Assuming that Alice checks out ProjectName and Bob checks out
167        ProjectName/trunk and trunk contains src/chrome/ui/browser.h, then
168        with no --repository flag, the header guard CPP variable will be:
169
170        Alice => TRUNK_SRC_CHROME_BROWSER_UI_BROWSER_H_
171        Bob   => SRC_CHROME_BROWSER_UI_BROWSER_H_
172
173        If Alice uses the --repository=trunk flag and Bob omits the flag or
174        uses --repository=. then the header guard CPP variable will be:
175
176        Alice => SRC_CHROME_BROWSER_UI_BROWSER_H_
177        Bob   => SRC_CHROME_BROWSER_UI_BROWSER_H_
178
179    root=subdir
180      The root directory used for deriving header guard CPP variable.
181      This directory is relative to the top level directory of the repository
182      which by default is determined by searching for a directory that contains
183      .git, .hg, or .svn but can also be controlled with the --repository flag.
184      If the specified directory does not exist, this flag is ignored.
185
186      Examples:
187        Assuming that src is the top level directory of the repository (and
188        cwd=top/src), the header guard CPP variables for
189        src/chrome/browser/ui/browser.h are:
190
191        No flag => CHROME_BROWSER_UI_BROWSER_H_
192        --root=chrome => BROWSER_UI_BROWSER_H_
193        --root=chrome/browser => UI_BROWSER_H_
194        --root=.. => SRC_CHROME_BROWSER_UI_BROWSER_H_
195
196    linelength=digits
197      This is the allowed line length for the project. The default value is
198      80 characters.
199
200      Examples:
201        --linelength=120
202
203    recursive
204      Search for files to lint recursively. Each directory given in the list
205      of files to be linted is replaced by all files that descend from that
206      directory. Files with extensions not in the valid extensions list are
207      excluded.
208
209    exclude=path
210      Exclude the given path from the list of files to be linted. Relative
211      paths are evaluated relative to the current directory and shell globbing
212      is performed. This flag can be provided multiple times to exclude
213      multiple files.
214
215      Examples:
216        --exclude=one.cc
217        --exclude=src/*.cc
218        --exclude=src/*.cc --exclude=test/*.cc
219
220    extensions=extension,extension,...
221      The allowed file extensions that cpplint will check
222
223      Examples:
224        --extensions=%s
225
226    includeorder=default|standardcfirst
227      For the build/include_order rule, the default is to blindly assume angle
228      bracket includes with file extension are c-system-headers (default),
229      even knowing this will have false classifications.
230      The default is established at google.
231      standardcfirst means to instead use an allow-list of known c headers and
232      treat all others as separate group of "other system headers". The C headers
233      included are those of the C-standard lib and closely related ones.
234
235    config=filename
236      Search for config files with the specified name instead of CPPLINT.cfg
237
238    headers=x,y,...
239      The header extensions that cpplint will treat as .h in checks. Values are
240      automatically added to --extensions list.
241     (by default, only files with extensions %s will be assumed to be headers)
242
243      Examples:
244        --headers=%s
245        --headers=hpp,hxx
246        --headers=hpp
247
248    cpplint.py supports per-directory configurations specified in CPPLINT.cfg
249    files. CPPLINT.cfg file can contain a number of key=value pairs.
250    Currently the following options are supported:
251
252      set noparent
253      filter=+filter1,-filter2,...
254      exclude_files=regex
255      linelength=80
256      root=subdir
257      headers=x,y,...
258
259    "set noparent" option prevents cpplint from traversing directory tree
260    upwards looking for more .cfg files in parent directories. This option
261    is usually placed in the top-level project directory.
262
263    The "filter" option is similar in function to --filter flag. It specifies
264    message filters in addition to the |_DEFAULT_FILTERS| and those specified
265    through --filter command-line flag.
266
267    "exclude_files" allows to specify a regular expression to be matched against
268    a file name. If the expression matches, the file is skipped and not run
269    through the linter.
270
271    "linelength" allows to specify the allowed line length for the project.
272
273    The "root" option is similar in function to the --root flag (see example
274    above). Paths are relative to the directory of the CPPLINT.cfg.
275
276    The "headers" option is similar in function to the --headers flag
277    (see example above).
278
279    CPPLINT.cfg has an effect on files in the same directory and all
280    sub-directories, unless overridden by a nested configuration file.
281
282      Example file:
283        filter=-build/include_order,+build/include_alpha
284        exclude_files=.*\\.cc
285
286    The above example disables build/include_order warning and enables
287    build/include_alpha as well as excludes all .cc from being
288    processed by linter, in the current directory (where the .cfg
289    file is located) and all sub-directories.
290"""
291
292# We categorize each error message we print.  Here are the categories.
293# We want an explicit list so we can list them all in cpplint --filter=.
294# If you add a new error message with a new category, add it to the list
295# here!  cpplint_unittest.py should tell you if you forget to do this.
296_ERROR_CATEGORIES = [
297    'build/c++11',
298    'build/c++17',
299    'build/deprecated',
300    'build/endif_comment',
301    'build/explicit_make_pair',
302    'build/forward_decl',
303    'build/header_guard',
304    'build/include',
305    'build/include_subdir',
306    'build/include_alpha',
307    'build/include_order',
308    'build/include_what_you_use',
309    'build/namespaces_headers',
310    'build/namespaces_literals',
311    'build/namespaces',
312    'build/printf_format',
313    'build/storage_class',
314    'legal/copyright',
315    'readability/alt_tokens',
316    'readability/braces',
317    'readability/casting',
318    'readability/check',
319    'readability/constructors',
320    'readability/fn_size',
321    'readability/inheritance',
322    'readability/multiline_comment',
323    'readability/multiline_string',
324    'readability/namespace',
325    'readability/nolint',
326    'readability/nul',
327    'readability/strings',
328    'readability/todo',
329    'readability/utf8',
330    'runtime/arrays',
331    'runtime/casting',
332    'runtime/explicit',
333    'runtime/int',
334    'runtime/init',
335    'runtime/invalid_increment',
336    'runtime/member_string_references',
337    'runtime/memset',
338    'runtime/operator',
339    'runtime/printf',
340    'runtime/printf_format',
341    'runtime/references',
342    'runtime/string',
343    'runtime/threadsafe_fn',
344    'runtime/vlog',
345    'whitespace/blank_line',
346    'whitespace/braces',
347    'whitespace/comma',
348    'whitespace/comments',
349    'whitespace/empty_conditional_body',
350    'whitespace/empty_if_body',
351    'whitespace/empty_loop_body',
352    'whitespace/end_of_line',
353    'whitespace/ending_newline',
354    'whitespace/forcolon',
355    'whitespace/indent',
356    'whitespace/indent_namespace',
357    'whitespace/line_length',
358    'whitespace/newline',
359    'whitespace/operators',
360    'whitespace/parens',
361    'whitespace/semicolon',
362    'whitespace/tab',
363    'whitespace/todo',
364    ]
365
366# keywords to use with --outputs which generate stdout for machine processing
367_MACHINE_OUTPUTS = [
368  'junit',
369  'sed',
370  'gsed'
371]
372
373# These error categories are no longer enforced by cpplint, but for backwards-
374# compatibility they may still appear in NOLINT comments.
375_LEGACY_ERROR_CATEGORIES = [
376    'build/class',
377    'readability/streams',
378    'readability/function',
379    ]
380
381# These prefixes for categories should be ignored since they relate to other
382# tools which also use the NOLINT syntax, e.g. clang-tidy.
383_OTHER_NOLINT_CATEGORY_PREFIXES = [
384    'clang-analyzer-',
385    'abseil-',
386    'altera-',
387    'android-',
388    'boost-',
389    'bugprone-',
390    'cert-',
391    'concurrency-',
392    'cppcoreguidelines-',
393    'darwin-',
394    'fuchsia-',
395    'google-',
396    'hicpp-',
397    'linuxkernel-',
398    'llvm-',
399    'llvmlibc-',
400    'misc-',
401    'modernize-',
402    'mpi-',
403    'objc-',
404    'openmp-',
405    'performance-',
406    'portability-',
407    'readability-',
408    'zircon-',
409    ]
410
411# The default state of the category filter. This is overridden by the --filter=
412# flag. By default all errors are on, so only add here categories that should be
413# off by default (i.e., categories that must be enabled by the --filter= flags).
414# All entries here should start with a '-' or '+', as in the --filter= flag.
415_DEFAULT_FILTERS = [
416    '-build/include_alpha',
417    '-readability/fn_size',
418    ]
419
420# The default list of categories suppressed for C (not C++) files.
421_DEFAULT_C_SUPPRESSED_CATEGORIES = [
422    'readability/casting',
423    ]
424
425# The default list of categories suppressed for Linux Kernel files.
426_DEFAULT_KERNEL_SUPPRESSED_CATEGORIES = [
427    'whitespace/tab',
428    ]
429
430# We used to check for high-bit characters, but after much discussion we
431# decided those were OK, as long as they were in UTF-8 and didn't represent
432# hard-coded international strings, which belong in a separate i18n file.
433
434# C++ headers
435_CPP_HEADERS = frozenset([
436    # Legacy
437    'algobase.h',
438    'algo.h',
439    'alloc.h',
440    'builtinbuf.h',
441    'bvector.h',
442    # 'complex.h', collides with System C header "complex.h" since C11
443    'defalloc.h',
444    'deque.h',
445    'editbuf.h',
446    'fstream.h',
447    'function.h',
448    'hash_map',
449    'hash_map.h',
450    'hash_set',
451    'hash_set.h',
452    'hashtable.h',
453    'heap.h',
454    'indstream.h',
455    'iomanip.h',
456    'iostream.h',
457    'istream.h',
458    'iterator.h',
459    'list.h',
460    'map.h',
461    'multimap.h',
462    'multiset.h',
463    'ostream.h',
464    'pair.h',
465    'parsestream.h',
466    'pfstream.h',
467    'procbuf.h',
468    'pthread_alloc',
469    'pthread_alloc.h',
470    'rope',
471    'rope.h',
472    'ropeimpl.h',
473    'set.h',
474    'slist',
475    'slist.h',
476    'stack.h',
477    'stdiostream.h',
478    'stl_alloc.h',
479    'stl_relops.h',
480    'streambuf.h',
481    'stream.h',
482    'strfile.h',
483    'strstream.h',
484    'tempbuf.h',
485    'tree.h',
486    'type_traits.h',
487    'vector.h',
488    # C++ library headers
489    'algorithm',
490    'array',
491    'atomic',
492    'bitset',
493    'chrono',
494    'codecvt',
495    'complex',
496    'condition_variable',
497    'deque',
498    'exception',
499    'forward_list',
500    'fstream',
501    'functional',
502    'future',
503    'initializer_list',
504    'iomanip',
505    'ios',
506    'iosfwd',
507    'iostream',
508    'istream',
509    'iterator',
510    'limits',
511    'list',
512    'locale',
513    'map',
514    'memory',
515    'mutex',
516    'new',
517    'numeric',
518    'ostream',
519    'queue',
520    'random',
521    'ratio',
522    'regex',
523    'scoped_allocator',
524    'set',
525    'sstream',
526    'stack',
527    'stdexcept',
528    'streambuf',
529    'string',
530    'strstream',
531    'system_error',
532    'thread',
533    'tuple',
534    'typeindex',
535    'typeinfo',
536    'type_traits',
537    'unordered_map',
538    'unordered_set',
539    'utility',
540    'valarray',
541    'vector',
542    # C++14 headers
543    'shared_mutex',
544    # C++17 headers
545    'any',
546    'charconv',
547    'codecvt',
548    'execution',
549    'filesystem',
550    'memory_resource',
551    'optional',
552    'string_view',
553    'variant',
554    # C++20 headers
555    'barrier',
556    'bit',
557    'compare',
558    'concepts',
559    'coroutine',
560    'format',
561    'latch'
562    'numbers',
563    'ranges',
564    'semaphore',
565    'source_location',
566    'span',
567    'stop_token',
568    'syncstream',
569    'version',
570    # C++23 headers
571    'expected',
572    'flat_map',
573    'flat_set',
574    'generator',
575    'mdspan',
576    'print',
577    'spanstream',
578    'stacktrace',
579    'stdfloat',
580    # C++ headers for C library facilities
581    'cassert',
582    'ccomplex',
583    'cctype',
584    'cerrno',
585    'cfenv',
586    'cfloat',
587    'cinttypes',
588    'ciso646',
589    'climits',
590    'clocale',
591    'cmath',
592    'csetjmp',
593    'csignal',
594    'cstdalign',
595    'cstdarg',
596    'cstdbool',
597    'cstddef',
598    'cstdint',
599    'cstdio',
600    'cstdlib',
601    'cstring',
602    'ctgmath',
603    'ctime',
604    'cuchar',
605    'cwchar',
606    'cwctype',
607    ])
608
609# C headers
610_C_HEADERS = frozenset([
611    # System C headers
612    'assert.h',
613    'complex.h',
614    'ctype.h',
615    'errno.h',
616    'fenv.h',
617    'float.h',
618    'inttypes.h',
619    'iso646.h',
620    'limits.h',
621    'locale.h',
622    'math.h',
623    'setjmp.h',
624    'signal.h',
625    'stdalign.h',
626    'stdarg.h',
627    'stdatomic.h',
628    'stdbool.h',
629    'stddef.h',
630    'stdint.h',
631    'stdio.h',
632    'stdlib.h',
633    'stdnoreturn.h',
634    'string.h',
635    'tgmath.h',
636    'threads.h',
637    'time.h',
638    'uchar.h',
639    'wchar.h',
640    'wctype.h',
641    # C23 headers
642    'stdbit.h',
643    'stdckdint.h',
644    # additional POSIX C headers
645    'aio.h',
646    'arpa/inet.h',
647    'cpio.h',
648    'dirent.h',
649    'dlfcn.h',
650    'fcntl.h',
651    'fmtmsg.h',
652    'fnmatch.h',
653    'ftw.h',
654    'glob.h',
655    'grp.h',
656    'iconv.h',
657    'langinfo.h',
658    'libgen.h',
659    'monetary.h',
660    'mqueue.h',
661    'ndbm.h',
662    'net/if.h',
663    'netdb.h',
664    'netinet/in.h',
665    'netinet/tcp.h',
666    'nl_types.h',
667    'poll.h',
668    'pthread.h',
669    'pwd.h',
670    'regex.h',
671    'sched.h',
672    'search.h',
673    'semaphore.h',
674    'setjmp.h',
675    'signal.h',
676    'spawn.h',
677    'strings.h',
678    'stropts.h',
679    'syslog.h',
680    'tar.h',
681    'termios.h',
682    'trace.h',
683    'ulimit.h',
684    'unistd.h',
685    'utime.h',
686    'utmpx.h',
687    'wordexp.h',
688    # additional GNUlib headers
689    'a.out.h',
690    'aliases.h',
691    'alloca.h',
692    'ar.h',
693    'argp.h',
694    'argz.h',
695    'byteswap.h',
696    'crypt.h',
697    'endian.h',
698    'envz.h',
699    'err.h',
700    'error.h',
701    'execinfo.h',
702    'fpu_control.h',
703    'fstab.h',
704    'fts.h',
705    'getopt.h',
706    'gshadow.h',
707    'ieee754.h',
708    'ifaddrs.h',
709    'libintl.h',
710    'mcheck.h',
711    'mntent.h',
712    'obstack.h',
713    'paths.h',
714    'printf.h',
715    'pty.h',
716    'resolv.h',
717    'shadow.h',
718    'sysexits.h',
719    'ttyent.h',
720    # Additional linux glibc headers
721    'dlfcn.h',
722    'elf.h',
723    'features.h',
724    'gconv.h',
725    'gnu-versions.h',
726    'lastlog.h',
727    'libio.h',
728    'link.h',
729    'malloc.h',
730    'memory.h',
731    'netash/ash.h',
732    'netatalk/at.h',
733    'netax25/ax25.h',
734    'neteconet/ec.h',
735    'netipx/ipx.h',
736    'netiucv/iucv.h',
737    'netpacket/packet.h',
738    'netrom/netrom.h',
739    'netrose/rose.h',
740    'nfs/nfs.h',
741    'nl_types.h',
742    'nss.h',
743    're_comp.h',
744    'regexp.h',
745    'sched.h',
746    'sgtty.h',
747    'stab.h',
748    'stdc-predef.h',
749    'stdio_ext.h',
750    'syscall.h',
751    'termio.h',
752    'thread_db.h',
753    'ucontext.h',
754    'ustat.h',
755    'utmp.h',
756    'values.h',
757    'wait.h',
758    'xlocale.h',
759    # Hardware specific headers
760    'arm_neon.h',
761    'emmintrin.h',
762    'xmmintin.h',
763    ])
764
765# Folders of C libraries so commonly used in C++,
766# that they have parity with standard C libraries.
767C_STANDARD_HEADER_FOLDERS = frozenset([
768    # standard C library
769    "sys",
770    # glibc for linux
771    "arpa",
772    "asm-generic",
773    "bits",
774    "gnu",
775    "net",
776    "netinet",
777    "protocols",
778    "rpc",
779    "rpcsvc",
780    "scsi",
781    # linux kernel header
782    "drm",
783    "linux",
784    "misc",
785    "mtd",
786    "rdma",
787    "sound",
788    "video",
789    "xen",
790  ])
791
792# Type names
793_TYPES = re.compile(
794    r'^(?:'
795    # [dcl.type.simple]
796    r'(char(16_t|32_t)?)|wchar_t|'
797    r'bool|short|int|long|signed|unsigned|float|double|'
798    # [support.types]
799    r'(ptrdiff_t|size_t|max_align_t|nullptr_t)|'
800    # [cstdint.syn]
801    r'(u?int(_fast|_least)?(8|16|32|64)_t)|'
802    r'(u?int(max|ptr)_t)|'
803    r')$')
804
805
806# These headers are excluded from [build/include] and [build/include_order]
807# checks:
808# - Anything not following google file name conventions (containing an
809#   uppercase character, such as Python.h or nsStringAPI.h, for example).
810# - Lua headers.
811_THIRD_PARTY_HEADERS_PATTERN = re.compile(
812    r'^(?:[^/]*[A-Z][^/]*\.h|lua\.h|lauxlib\.h|lualib\.h)$')
813
814# Pattern for matching FileInfo.BaseName() against test file name
815_test_suffixes = ['_test', '_regtest', '_unittest']
816_TEST_FILE_SUFFIX = '(' + '|'.join(_test_suffixes) + r')$'
817
818# Pattern that matches only complete whitespace, possibly across multiple lines.
819_EMPTY_CONDITIONAL_BODY_PATTERN = re.compile(r'^\s*$', re.DOTALL)
820
821# Assertion macros.  These are defined in base/logging.h and
822# testing/base/public/gunit.h.
823_CHECK_MACROS = [
824    'DCHECK', 'CHECK',
825    'EXPECT_TRUE', 'ASSERT_TRUE',
826    'EXPECT_FALSE', 'ASSERT_FALSE',
827    ]
828
829# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
830_CHECK_REPLACEMENT = dict([(macro_var, {}) for macro_var in _CHECK_MACROS])
831
832for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
833                        ('>=', 'GE'), ('>', 'GT'),
834                        ('<=', 'LE'), ('<', 'LT')]:
835  _CHECK_REPLACEMENT['DCHECK'][op] = f'DCHECK_{replacement}'
836  _CHECK_REPLACEMENT['CHECK'][op] = f'CHECK_{replacement}'
837  _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = f'EXPECT_{replacement}'
838  _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = f'ASSERT_{replacement}'
839
840for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
841                            ('>=', 'LT'), ('>', 'LE'),
842                            ('<=', 'GT'), ('<', 'GE')]:
843  _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = f'EXPECT_{inv_replacement}'
844  _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = f'ASSERT_{inv_replacement}'
845
846# Alternative tokens and their replacements.  For full list, see section 2.5
847# Alternative tokens [lex.digraph] in the C++ standard.
848#
849# Digraphs (such as '%:') are not included here since it's a mess to
850# match those on a word boundary.
851_ALT_TOKEN_REPLACEMENT = {
852    'and': '&&',
853    'bitor': '|',
854    'or': '||',
855    'xor': '^',
856    'compl': '~',
857    'bitand': '&',
858    'and_eq': '&=',
859    'or_eq': '|=',
860    'xor_eq': '^=',
861    'not': '!',
862    'not_eq': '!='
863    }
864
865# Compile regular expression that matches all the above keywords.  The "[ =()]"
866# bit is meant to avoid matching these keywords outside of boolean expressions.
867#
868# False positives include C-style multi-line comments and multi-line strings
869# but those have always been troublesome for cpplint.
870_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(
871    r'([ =()])(' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')([ (]|$)')
872
873
874# These constants define types of headers for use with
875# _IncludeState.CheckNextIncludeOrder().
876_C_SYS_HEADER = 1
877_CPP_SYS_HEADER = 2
878_OTHER_SYS_HEADER = 3
879_LIKELY_MY_HEADER = 4
880_POSSIBLE_MY_HEADER = 5
881_OTHER_HEADER = 6
882
883# These constants define the current inline assembly state
884_NO_ASM = 0       # Outside of inline assembly block
885_INSIDE_ASM = 1   # Inside inline assembly block
886_END_ASM = 2      # Last line of inline assembly block
887_BLOCK_ASM = 3    # The whole block is an inline assembly block
888
889# Match start of assembly blocks
890_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)'
891                        r'(?:\s+(volatile|__volatile__))?'
892                        r'\s*[{(]')
893
894# Match strings that indicate we're working on a C (not C++) file.
895_SEARCH_C_FILE = re.compile(r'\b(?:LINT_C_FILE|'
896                            r'vim?:\s*.*(\s*|:)filetype=c(\s*|:|$))')
897
898# Match string that indicates we're working on a Linux Kernel file.
899_SEARCH_KERNEL_FILE = re.compile(r'\b(?:LINT_KERNEL_FILE)')
900
901# Commands for sed to fix the problem
902_SED_FIXUPS = {
903  'Remove spaces around =': r's/ = /=/',
904  'Remove spaces around !=': r's/ != /!=/',
905  'Remove space before ( in if (': r's/if (/if(/',
906  'Remove space before ( in for (': r's/for (/for(/',
907  'Remove space before ( in while (': r's/while (/while(/',
908  'Remove space before ( in switch (': r's/switch (/switch(/',
909  'Should have a space between // and comment': r's/\/\//\/\/ /',
910  'Missing space before {': r's/\([^ ]\){/\1 {/',
911  'Tab found, replace by spaces': r's/\t/  /g',
912  'Line ends in whitespace.  Consider deleting these extra spaces.': r's/\s*$//',
913  'You don\'t need a ; after a }': r's/};/}/',
914  'Missing space after ,': r's/,\([^ ]\)/, \1/g',
915}
916
917# {str, set(int)}: a map from error categories to sets of linenumbers
918# on which those errors are expected and should be suppressed.
919_error_suppressions = {}
920
921# The root directory used for deriving header guard CPP variable.
922# This is set by --root flag.
923_root = None
924_root_debug = False
925
926# The top level repository directory. If set, _root is calculated relative to
927# this directory instead of the directory containing version control artifacts.
928# This is set by the --repository flag.
929_repository = None
930
931# Files to exclude from linting. This is set by the --exclude flag.
932_excludes = None
933
934# Whether to suppress all PrintInfo messages, UNRELATED to --quiet flag
935_quiet = False
936
937# The allowed line length of files.
938# This is set by --linelength flag.
939_line_length = 80
940
941# This allows to use different include order rule than default
942_include_order = "default"
943
944# This allows different config files to be used
945_config_filename = "CPPLINT.cfg"
946
947# Treat all headers starting with 'h' equally: .h, .hpp, .hxx etc.
948# This is set by --headers flag.
949_hpp_headers = set([])
950
951class ErrorSuppressions:
952  """Class to track all error suppressions for cpplint"""
953
954  class LineRange:
955    """Class to represent a range of line numbers for which an error is suppressed"""
956    def __init__(self, begin, end):
957      self.begin = begin
958      self.end = end
959
960    def __str__(self):
961      return f'[{self.begin}-{self.end}]'
962
963    def __contains__(self, obj):
964      return self.begin <= obj <= self.end
965
966    def ContainsRange(self, other):
967      return self.begin <= other.begin and self.end >= other.end
968
969  def __init__(self):
970    self._suppressions = collections.defaultdict(list)
971    self._open_block_suppression = None
972
973  def _AddSuppression(self, category, line_range):
974    suppressed = self._suppressions[category]
975    if not (suppressed and suppressed[-1].ContainsRange(line_range)):
976      suppressed.append(line_range)
977
978  def GetOpenBlockStart(self):
979    """:return: The start of the current open block or `-1` if there is not an open block"""
980    return self._open_block_suppression.begin if self._open_block_suppression else -1
981
982  def AddGlobalSuppression(self, category):
983    """Add a suppression for `category` which is suppressed for the whole file"""
984    self._AddSuppression(category, self.LineRange(0, math.inf))
985
986  def AddLineSuppression(self, category, linenum):
987    """Add a suppression for `category` which is suppressed only on `linenum`"""
988    self._AddSuppression(category, self.LineRange(linenum, linenum))
989
990  def StartBlockSuppression(self, category, linenum):
991    """Start a suppression block for `category` on `linenum`. inclusive"""
992    if self._open_block_suppression is None:
993      self._open_block_suppression = self.LineRange(linenum, math.inf)
994    self._AddSuppression(category, self._open_block_suppression)
995
996  def EndBlockSuppression(self, linenum):
997    """End the current block suppression on `linenum`. inclusive"""
998    if self._open_block_suppression:
999      self._open_block_suppression.end = linenum
1000      self._open_block_suppression = None
1001
1002  def IsSuppressed(self, category, linenum):
1003    """:return: `True` if `category` is suppressed for `linenum`"""
1004    suppressed = self._suppressions[category] + self._suppressions[None]
1005    return any(linenum in lr for lr in suppressed)
1006
1007  def HasOpenBlock(self):
1008    """:return: `True` if a block suppression was started but not ended"""
1009    return self._open_block_suppression is not None
1010
1011  def Clear(self):
1012    """Clear all current error suppressions"""
1013    self._suppressions.clear()
1014    self._open_block_suppression = None
1015
1016_error_suppressions = ErrorSuppressions()
1017
1018def ProcessHppHeadersOption(val):
1019  global _hpp_headers
1020  try:
1021    _hpp_headers = {ext.strip() for ext in val.split(',')}
1022  except ValueError:
1023    PrintUsage('Header extensions must be comma separated list.')
1024
1025def ProcessIncludeOrderOption(val):
1026  if val is None or val == "default":
1027    pass
1028  elif val == "standardcfirst":
1029    global _include_order
1030    _include_order = val
1031  else:
1032    PrintUsage('Invalid includeorder value %s. Expected default|standardcfirst')
1033
1034def IsHeaderExtension(file_extension):
1035  return file_extension in GetHeaderExtensions()
1036
1037def GetHeaderExtensions():
1038  if _hpp_headers:
1039    return _hpp_headers
1040  if _valid_extensions:
1041    return {h for h in _valid_extensions if 'h' in h}
1042  return set(['h', 'hh', 'hpp', 'hxx', 'h++', 'cuh'])
1043
1044# The allowed extensions for file names
1045# This is set by --extensions flag
1046def GetAllExtensions():
1047  return GetHeaderExtensions().union(_valid_extensions or set(
1048    ['c', 'cc', 'cpp', 'cxx', 'c++', 'cu']))
1049
1050def ProcessExtensionsOption(val):
1051  global _valid_extensions
1052  try:
1053    extensions = [ext.strip() for ext in val.split(',')]
1054    _valid_extensions = set(extensions)
1055  except ValueError:
1056    PrintUsage('Extensions should be a comma-separated list of values;'
1057               'for example: extensions=hpp,cpp\n'
1058              f'This could not be parsed: "{val}"')
1059
1060def GetNonHeaderExtensions():
1061  return GetAllExtensions().difference(GetHeaderExtensions())
1062
1063def ParseNolintSuppressions(filename, raw_line, linenum, error):
1064  """Updates the global list of line error-suppressions.
1065
1066  Parses any NOLINT comments on the current line, updating the global
1067  error_suppressions store.  Reports an error if the NOLINT comment
1068  was malformed.
1069
1070  Args:
1071    filename: str, the name of the input file.
1072    raw_line: str, the line of input text, with comments.
1073    linenum: int, the number of the current line.
1074    error: function, an error handler.
1075  """
1076  matched = re.search(r'\bNOLINT(NEXTLINE|BEGIN|END)?\b(\([^)]+\))?', raw_line)
1077  if matched:
1078    no_lint_type = matched.group(1)
1079    if no_lint_type == 'NEXTLINE':
1080      def ProcessCategory(category):
1081        _error_suppressions.AddLineSuppression(category, linenum + 1)
1082    elif no_lint_type == 'BEGIN':
1083      if _error_suppressions.HasOpenBlock():
1084        error(filename, linenum, 'readability/nolint', 5,
1085              f'NONLINT block already defined on line {_error_suppressions.GetOpenBlockStart()}')
1086
1087      def ProcessCategory(category):
1088        _error_suppressions.StartBlockSuppression(category, linenum)
1089    elif no_lint_type == 'END':
1090      if not _error_suppressions.HasOpenBlock():
1091        error(filename, linenum, 'readability/nolint', 5, 'Not in a NOLINT block')
1092
1093      def ProcessCategory(category):
1094        if category is not None:
1095          error(filename, linenum, 'readability/nolint', 5,
1096                f'NOLINT categories not supported in block END: {category}')
1097        _error_suppressions.EndBlockSuppression(linenum)
1098    else:
1099      def ProcessCategory(category):
1100        _error_suppressions.AddLineSuppression(category, linenum)
1101    categories = matched.group(2)
1102    if categories in (None, '(*)'):  # => "suppress all"
1103      ProcessCategory(None)
1104    elif categories.startswith('(') and categories.endswith(')'):
1105      for category in set(map(lambda c: c.strip(), categories[1:-1].split(','))):
1106        if category in _ERROR_CATEGORIES:
1107          ProcessCategory(category)
1108        elif any(c for c in _OTHER_NOLINT_CATEGORY_PREFIXES if category.startswith(c)):
1109          # Ignore any categories from other tools.
1110          pass
1111        elif category not in _LEGACY_ERROR_CATEGORIES:
1112          error(filename, linenum, 'readability/nolint', 5,
1113                f'Unknown NOLINT error category: {category}')
1114
1115def ProcessGlobalSuppresions(lines):
1116  """Deprecated; use ProcessGlobalSuppressions."""
1117  ProcessGlobalSuppressions(lines)
1118
1119def ProcessGlobalSuppressions(lines):
1120  """Updates the list of global error suppressions.
1121
1122  Parses any lint directives in the file that have global effect.
1123
1124  Args:
1125    lines: An array of strings, each representing a line of the file, with the
1126           last element being empty if the file is terminated with a newline.
1127  """
1128  for line in lines:
1129    if _SEARCH_C_FILE.search(line):
1130      for category in _DEFAULT_C_SUPPRESSED_CATEGORIES:
1131        _error_suppressions.AddGlobalSuppression(category)
1132    if _SEARCH_KERNEL_FILE.search(line):
1133      for category in _DEFAULT_KERNEL_SUPPRESSED_CATEGORIES:
1134        _error_suppressions.AddGlobalSuppression(category)
1135
1136
1137def ResetNolintSuppressions():
1138  """Resets the set of NOLINT suppressions to empty."""
1139  _error_suppressions.Clear()
1140
1141
1142def IsErrorSuppressedByNolint(category, linenum):
1143  """Returns true if the specified error category is suppressed on this line.
1144
1145  Consults the global error_suppressions map populated by
1146  ParseNolintSuppressions/ProcessGlobalSuppressions/ResetNolintSuppressions.
1147
1148  Args:
1149    category: str, the category of the error.
1150    linenum: int, the current line number.
1151  Returns:
1152    bool, True iff the error should be suppressed due to a NOLINT comment,
1153    block suppression or global suppression.
1154  """
1155  return _error_suppressions.IsSuppressed(category, linenum)
1156
1157
1158def _IsSourceExtension(s):
1159  """File extension (excluding dot) matches a source file extension."""
1160  return s in GetNonHeaderExtensions()
1161
1162
1163class _IncludeState(object):
1164  """Tracks line numbers for includes, and the order in which includes appear.
1165
1166  include_list contains list of lists of (header, line number) pairs.
1167  It's a lists of lists rather than just one flat list to make it
1168  easier to update across preprocessor boundaries.
1169
1170  Call CheckNextIncludeOrder() once for each header in the file, passing
1171  in the type constants defined above. Calls in an illegal order will
1172  raise an _IncludeError with an appropriate error message.
1173
1174  """
1175  # self._section will move monotonically through this set. If it ever
1176  # needs to move backwards, CheckNextIncludeOrder will raise an error.
1177  _INITIAL_SECTION = 0
1178  _MY_H_SECTION = 1
1179  _C_SECTION = 2
1180  _CPP_SECTION = 3
1181  _OTHER_SYS_SECTION = 4
1182  _OTHER_H_SECTION = 5
1183
1184  _TYPE_NAMES = {
1185      _C_SYS_HEADER: 'C system header',
1186      _CPP_SYS_HEADER: 'C++ system header',
1187      _OTHER_SYS_HEADER: 'other system header',
1188      _LIKELY_MY_HEADER: 'header this file implements',
1189      _POSSIBLE_MY_HEADER: 'header this file may implement',
1190      _OTHER_HEADER: 'other header',
1191      }
1192  _SECTION_NAMES = {
1193      _INITIAL_SECTION: "... nothing. (This can't be an error.)",
1194      _MY_H_SECTION: 'a header this file implements',
1195      _C_SECTION: 'C system header',
1196      _CPP_SECTION: 'C++ system header',
1197      _OTHER_SYS_SECTION: 'other system header',
1198      _OTHER_H_SECTION: 'other header',
1199      }
1200
1201  def __init__(self):
1202    self.include_list = [[]]
1203    self._section = None
1204    self._last_header = None
1205    self.ResetSection('')
1206
1207  def FindHeader(self, header):
1208    """Check if a header has already been included.
1209
1210    Args:
1211      header: header to check.
1212    Returns:
1213      Line number of previous occurrence, or -1 if the header has not
1214      been seen before.
1215    """
1216    for section_list in self.include_list:
1217      for f in section_list:
1218        if f[0] == header:
1219          return f[1]
1220    return -1
1221
1222  def ResetSection(self, directive):
1223    """Reset section checking for preprocessor directive.
1224
1225    Args:
1226      directive: preprocessor directive (e.g. "if", "else").
1227    """
1228    # The name of the current section.
1229    self._section = self._INITIAL_SECTION
1230    # The path of last found header.
1231    self._last_header = ''
1232
1233    # Update list of includes.  Note that we never pop from the
1234    # include list.
1235    if directive in ('if', 'ifdef', 'ifndef'):
1236      self.include_list.append([])
1237    elif directive in ('else', 'elif'):
1238      self.include_list[-1] = []
1239
1240  def SetLastHeader(self, header_path):
1241    self._last_header = header_path
1242
1243  def CanonicalizeAlphabeticalOrder(self, header_path):
1244    """Returns a path canonicalized for alphabetical comparison.
1245
1246    - replaces "-" with "_" so they both cmp the same.
1247    - removes '-inl' since we don't require them to be after the main header.
1248    - lowercase everything, just in case.
1249
1250    Args:
1251      header_path: Path to be canonicalized.
1252
1253    Returns:
1254      Canonicalized path.
1255    """
1256    return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
1257
1258  def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path):
1259    """Check if a header is in alphabetical order with the previous header.
1260
1261    Args:
1262      clean_lines: A CleansedLines instance containing the file.
1263      linenum: The number of the line to check.
1264      header_path: Canonicalized header to be checked.
1265
1266    Returns:
1267      Returns true if the header is in alphabetical order.
1268    """
1269    # If previous section is different from current section, _last_header will
1270    # be reset to empty string, so it's always less than current header.
1271    #
1272    # If previous line was a blank line, assume that the headers are
1273    # intentionally sorted the way they are.
1274    if (self._last_header > header_path and
1275        re.match(r'^\s*#\s*include\b', clean_lines.elided[linenum - 1])):
1276      return False
1277    return True
1278
1279  def CheckNextIncludeOrder(self, header_type):
1280    """Returns a non-empty error message if the next header is out of order.
1281
1282    This function also updates the internal state to be ready to check
1283    the next include.
1284
1285    Args:
1286      header_type: One of the _XXX_HEADER constants defined above.
1287
1288    Returns:
1289      The empty string if the header is in the right order, or an
1290      error message describing what's wrong.
1291
1292    """
1293    error_message = (f'Found {self._TYPE_NAMES[header_type]}'
1294                     f' after {self._SECTION_NAMES[self._section]}')
1295
1296    last_section = self._section
1297
1298    if header_type == _C_SYS_HEADER:
1299      if self._section <= self._C_SECTION:
1300        self._section = self._C_SECTION
1301      else:
1302        self._last_header = ''
1303        return error_message
1304    elif header_type == _CPP_SYS_HEADER:
1305      if self._section <= self._CPP_SECTION:
1306        self._section = self._CPP_SECTION
1307      else:
1308        self._last_header = ''
1309        return error_message
1310    elif header_type == _OTHER_SYS_HEADER:
1311      if self._section <= self._OTHER_SYS_SECTION:
1312        self._section = self._OTHER_SYS_SECTION
1313      else:
1314        self._last_header = ''
1315        return error_message
1316    elif header_type == _LIKELY_MY_HEADER:
1317      if self._section <= self._MY_H_SECTION:
1318        self._section = self._MY_H_SECTION
1319      else:
1320        self._section = self._OTHER_H_SECTION
1321    elif header_type == _POSSIBLE_MY_HEADER:
1322      if self._section <= self._MY_H_SECTION:
1323        self._section = self._MY_H_SECTION
1324      else:
1325        # This will always be the fallback because we're not sure
1326        # enough that the header is associated with this file.
1327        self._section = self._OTHER_H_SECTION
1328    else:
1329      assert header_type == _OTHER_HEADER
1330      self._section = self._OTHER_H_SECTION
1331
1332    if last_section != self._section:
1333      self._last_header = ''
1334
1335    return ''
1336
1337
1338class _CppLintState(object):
1339  """Maintains module-wide state.."""
1340
1341  def __init__(self):
1342    self.verbose_level = 1  # global setting.
1343    self.error_count = 0    # global count of reported errors
1344    # filters to apply when emitting error messages
1345    self.filters = _DEFAULT_FILTERS[:]
1346    # backup of filter list. Used to restore the state after each file.
1347    self._filters_backup = self.filters[:]
1348    self.counting = 'total'  # In what way are we counting errors?
1349    self.errors_by_category = {}  # string to int dict storing error counts
1350    self.quiet = False  # Suppress non-error messages?
1351
1352    # output format:
1353    # "emacs" - format that emacs can parse (default)
1354    # "eclipse" - format that eclipse can parse
1355    # "vs7" - format that Microsoft Visual Studio 7 can parse
1356    # "junit" - format that Jenkins, Bamboo, etc can parse
1357    # "sed" - returns a gnu sed command to fix the problem
1358    # "gsed" - like sed, but names the command gsed, e.g. for macOS homebrew users
1359    self.output_format = 'emacs'
1360
1361    # For JUnit output, save errors and failures until the end so that they
1362    # can be written into the XML
1363    self._junit_errors = []
1364    self._junit_failures = []
1365
1366  def SetOutputFormat(self, output_format):
1367    """Sets the output format for errors."""
1368    self.output_format = output_format
1369
1370  def SetQuiet(self, quiet):
1371    """Sets the module's quiet settings, and returns the previous setting."""
1372    last_quiet = self.quiet
1373    self.quiet = quiet
1374    return last_quiet
1375
1376  def SetVerboseLevel(self, level):
1377    """Sets the module's verbosity, and returns the previous setting."""
1378    last_verbose_level = self.verbose_level
1379    self.verbose_level = level
1380    return last_verbose_level
1381
1382  def SetCountingStyle(self, counting_style):
1383    """Sets the module's counting options."""
1384    self.counting = counting_style
1385
1386  def SetFilters(self, filters):
1387    """Sets the error-message filters.
1388
1389    These filters are applied when deciding whether to emit a given
1390    error message.
1391
1392    Args:
1393      filters: A string of comma-separated filters (eg "+whitespace/indent").
1394               Each filter should start with + or -; else we die.
1395
1396    Raises:
1397      ValueError: The comma-separated filters did not all start with '+' or '-'.
1398                  E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
1399    """
1400    # Default filters always have less priority than the flag ones.
1401    self.filters = _DEFAULT_FILTERS[:]
1402    self.AddFilters(filters)
1403
1404  def AddFilters(self, filters):
1405    """ Adds more filters to the existing list of error-message filters. """
1406    for filt in filters.split(','):
1407      clean_filt = filt.strip()
1408      if clean_filt:
1409        self.filters.append(clean_filt)
1410    for filt in self.filters:
1411      if not (filt.startswith('+') or filt.startswith('-')):
1412        raise ValueError('Every filter in --filters must start with + or -'
1413                        f' ({filt} does not)')
1414
1415  def BackupFilters(self):
1416    """ Saves the current filter list to backup storage."""
1417    self._filters_backup = self.filters[:]
1418
1419  def RestoreFilters(self):
1420    """ Restores filters previously backed up."""
1421    self.filters = self._filters_backup[:]
1422
1423  def ResetErrorCounts(self):
1424    """Sets the module's error statistic back to zero."""
1425    self.error_count = 0
1426    self.errors_by_category = {}
1427
1428  def IncrementErrorCount(self, category):
1429    """Bumps the module's error statistic."""
1430    self.error_count += 1
1431    if self.counting in ('toplevel', 'detailed'):
1432      if self.counting != 'detailed':
1433        category = category.split('/')[0]
1434      if category not in self.errors_by_category:
1435        self.errors_by_category[category] = 0
1436      self.errors_by_category[category] += 1
1437
1438  def PrintErrorCounts(self):
1439    """Print a summary of errors by category, and the total."""
1440    for category, count in sorted(dict.items(self.errors_by_category)):
1441      self.PrintInfo(f'Category \'{category}\' errors found: {count}\n')
1442    if self.error_count > 0:
1443      self.PrintInfo(f'Total errors found: {self.error_count}\n')
1444
1445  def PrintInfo(self, message):
1446    # _quiet does not represent --quiet flag.
1447    # Hide infos from stdout to keep stdout pure for machine consumption
1448    if not _quiet and self.output_format not in _MACHINE_OUTPUTS:
1449      sys.stdout.write(message)
1450
1451  def PrintError(self, message):
1452    if self.output_format == 'junit':
1453      self._junit_errors.append(message)
1454    else:
1455      sys.stderr.write(message)
1456
1457  def AddJUnitFailure(self, filename, linenum, message, category, confidence):
1458    self._junit_failures.append((filename, linenum, message, category,
1459        confidence))
1460
1461  def FormatJUnitXML(self):
1462    num_errors = len(self._junit_errors)
1463    num_failures = len(self._junit_failures)
1464
1465    testsuite = xml.etree.ElementTree.Element('testsuite')
1466    testsuite.attrib['errors'] = str(num_errors)
1467    testsuite.attrib['failures'] = str(num_failures)
1468    testsuite.attrib['name'] = 'cpplint'
1469
1470    if num_errors == 0 and num_failures == 0:
1471      testsuite.attrib['tests'] = str(1)
1472      xml.etree.ElementTree.SubElement(testsuite, 'testcase', name='passed')
1473
1474    else:
1475      testsuite.attrib['tests'] = str(num_errors + num_failures)
1476      if num_errors > 0:
1477        testcase = xml.etree.ElementTree.SubElement(testsuite, 'testcase')
1478        testcase.attrib['name'] = 'errors'
1479        error = xml.etree.ElementTree.SubElement(testcase, 'error')
1480        error.text = '\n'.join(self._junit_errors)
1481      if num_failures > 0:
1482        # Group failures by file
1483        failed_file_order = []
1484        failures_by_file = {}
1485        for failure in self._junit_failures:
1486          failed_file = failure[0]
1487          if failed_file not in failed_file_order:
1488            failed_file_order.append(failed_file)
1489            failures_by_file[failed_file] = []
1490          failures_by_file[failed_file].append(failure)
1491        # Create a testcase for each file
1492        for failed_file in failed_file_order:
1493          failures = failures_by_file[failed_file]
1494          testcase = xml.etree.ElementTree.SubElement(testsuite, 'testcase')
1495          testcase.attrib['name'] = failed_file
1496          failure = xml.etree.ElementTree.SubElement(testcase, 'failure')
1497          template = '{0}: {1} [{2}] [{3}]'
1498          texts = [template.format(f[1], f[2], f[3], f[4]) for f in failures]
1499          failure.text = '\n'.join(texts)
1500
1501    xml_decl = '<?xml version="1.0" encoding="UTF-8" ?>\n'
1502    return xml_decl + xml.etree.ElementTree.tostring(testsuite, 'utf-8').decode('utf-8')
1503
1504
1505_cpplint_state = _CppLintState()
1506
1507
1508def _OutputFormat():
1509  """Gets the module's output format."""
1510  return _cpplint_state.output_format
1511
1512
1513def _SetOutputFormat(output_format):
1514  """Sets the module's output format."""
1515  _cpplint_state.SetOutputFormat(output_format)
1516
1517def _Quiet():
1518  """Return's the module's quiet setting."""
1519  return _cpplint_state.quiet
1520
1521def _SetQuiet(quiet):
1522  """Set the module's quiet status, and return previous setting."""
1523  return _cpplint_state.SetQuiet(quiet)
1524
1525
1526def _VerboseLevel():
1527  """Returns the module's verbosity setting."""
1528  return _cpplint_state.verbose_level
1529
1530
1531def _SetVerboseLevel(level):
1532  """Sets the module's verbosity, and returns the previous setting."""
1533  return _cpplint_state.SetVerboseLevel(level)
1534
1535
1536def _SetCountingStyle(level):
1537  """Sets the module's counting options."""
1538  _cpplint_state.SetCountingStyle(level)
1539
1540
1541def _Filters():
1542  """Returns the module's list of output filters, as a list."""
1543  return _cpplint_state.filters
1544
1545
1546def _SetFilters(filters):
1547  """Sets the module's error-message filters.
1548
1549  These filters are applied when deciding whether to emit a given
1550  error message.
1551
1552  Args:
1553    filters: A string of comma-separated filters (eg "whitespace/indent").
1554             Each filter should start with + or -; else we die.
1555  """
1556  _cpplint_state.SetFilters(filters)
1557
1558def _AddFilters(filters):
1559  """Adds more filter overrides.
1560
1561  Unlike _SetFilters, this function does not reset the current list of filters
1562  available.
1563
1564  Args:
1565    filters: A string of comma-separated filters (eg "whitespace/indent").
1566             Each filter should start with + or -; else we die.
1567  """
1568  _cpplint_state.AddFilters(filters)
1569
1570def _BackupFilters():
1571  """ Saves the current filter list to backup storage."""
1572  _cpplint_state.BackupFilters()
1573
1574def _RestoreFilters():
1575  """ Restores filters previously backed up."""
1576  _cpplint_state.RestoreFilters()
1577
1578class _FunctionState(object):
1579  """Tracks current function name and the number of lines in its body."""
1580
1581  _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
1582  _TEST_TRIGGER = 400    # about 50% more than _NORMAL_TRIGGER.
1583
1584  def __init__(self):
1585    self.in_a_function = False
1586    self.lines_in_function = 0
1587    self.current_function = ''
1588
1589  def Begin(self, function_name):
1590    """Start analyzing function body.
1591
1592    Args:
1593      function_name: The name of the function being tracked.
1594    """
1595    self.in_a_function = True
1596    self.lines_in_function = 0
1597    self.current_function = function_name
1598
1599  def Count(self):
1600    """Count line in current function body."""
1601    if self.in_a_function:
1602      self.lines_in_function += 1
1603
1604  def Check(self, error, filename, linenum):
1605    """Report if too many lines in function body.
1606
1607    Args:
1608      error: The function to call with any errors found.
1609      filename: The name of the current file.
1610      linenum: The number of the line to check.
1611    """
1612    if not self.in_a_function:
1613      return
1614
1615    if re.match(r'T(EST|est)', self.current_function):
1616      base_trigger = self._TEST_TRIGGER
1617    else:
1618      base_trigger = self._NORMAL_TRIGGER
1619    trigger = base_trigger * 2**_VerboseLevel()
1620
1621    if self.lines_in_function > trigger:
1622      error_level = int(math.log(self.lines_in_function / base_trigger, 2))
1623      # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
1624      if error_level > 5:
1625        error_level = 5
1626      error(filename, linenum, 'readability/fn_size', error_level,
1627            'Small and focused functions are preferred:'
1628            f' {self.current_function} has {self.lines_in_function} non-comment lines'
1629            f' (error triggered by exceeding {trigger} lines).')
1630
1631  def End(self):
1632    """Stop analyzing function body."""
1633    self.in_a_function = False
1634
1635
1636class _IncludeError(Exception):
1637  """Indicates a problem with the include order in a file."""
1638  pass
1639
1640
1641class FileInfo(object):
1642  """Provides utility functions for filenames.
1643
1644  FileInfo provides easy access to the components of a file's path
1645  relative to the project root.
1646  """
1647
1648  def __init__(self, filename):
1649    self._filename = filename
1650
1651  def FullName(self):
1652    """Make Windows paths like Unix."""
1653    return os.path.abspath(self._filename).replace('\\', '/')
1654
1655  def RepositoryName(self):
1656    r"""FullName after removing the local path to the repository.
1657
1658    If we have a real absolute path name here we can try to do something smart:
1659    detecting the root of the checkout and truncating /path/to/checkout from
1660    the name so that we get header guards that don't include things like
1661    "C:\\Documents and Settings\\..." or "/home/username/..." in them and thus
1662    people on different computers who have checked the source out to different
1663    locations won't see bogus errors.
1664    """
1665    fullname = self.FullName()
1666
1667    if os.path.exists(fullname):
1668      project_dir = os.path.dirname(fullname)
1669
1670      # If the user specified a repository path, it exists, and the file is
1671      # contained in it, use the specified repository path
1672      if _repository:
1673        repo = FileInfo(_repository).FullName()
1674        root_dir = project_dir
1675        while os.path.exists(root_dir):
1676          # allow case insensitive compare on Windows
1677          if os.path.normcase(root_dir) == os.path.normcase(repo):
1678            return os.path.relpath(fullname, root_dir).replace('\\', '/')
1679          one_up_dir = os.path.dirname(root_dir)
1680          if one_up_dir == root_dir:
1681            break
1682          root_dir = one_up_dir
1683
1684      if os.path.exists(os.path.join(project_dir, ".svn")):
1685        # If there's a .svn file in the current directory, we recursively look
1686        # up the directory tree for the top of the SVN checkout
1687        root_dir = project_dir
1688        one_up_dir = os.path.dirname(root_dir)
1689        while os.path.exists(os.path.join(one_up_dir, ".svn")):
1690          root_dir = os.path.dirname(root_dir)
1691          one_up_dir = os.path.dirname(one_up_dir)
1692
1693        prefix = os.path.commonprefix([root_dir, project_dir])
1694        return fullname[len(prefix) + 1:]
1695
1696      # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
1697      # searching up from the current path.
1698      root_dir = current_dir = os.path.dirname(fullname)
1699      while current_dir != os.path.dirname(current_dir):
1700        if (os.path.exists(os.path.join(current_dir, ".git")) or
1701            os.path.exists(os.path.join(current_dir, ".hg")) or
1702            os.path.exists(os.path.join(current_dir, ".svn"))):
1703          root_dir = current_dir
1704          break
1705        current_dir = os.path.dirname(current_dir)
1706
1707      if (os.path.exists(os.path.join(root_dir, ".git")) or
1708          os.path.exists(os.path.join(root_dir, ".hg")) or
1709          os.path.exists(os.path.join(root_dir, ".svn"))):
1710        prefix = os.path.commonprefix([root_dir, project_dir])
1711        return fullname[len(prefix) + 1:]
1712
1713    # Don't know what to do; header guard warnings may be wrong...
1714    return fullname
1715
1716  def Split(self):
1717    """Splits the file into the directory, basename, and extension.
1718
1719    For 'chrome/browser/browser.cc', Split() would
1720    return ('chrome/browser', 'browser', '.cc')
1721
1722    Returns:
1723      A tuple of (directory, basename, extension).
1724    """
1725
1726    googlename = self.RepositoryName()
1727    project, rest = os.path.split(googlename)
1728    return (project,) + os.path.splitext(rest)
1729
1730  def BaseName(self):
1731    """File base name - text after the final slash, before the final period."""
1732    return self.Split()[1]
1733
1734  def Extension(self):
1735    """File extension - text following the final period, includes that period."""
1736    return self.Split()[2]
1737
1738  def NoExtension(self):
1739    """File has no source file extension."""
1740    return '/'.join(self.Split()[0:2])
1741
1742  def IsSource(self):
1743    """File has a source file extension."""
1744    return _IsSourceExtension(self.Extension()[1:])
1745
1746
1747def _ShouldPrintError(category, confidence, filename, linenum):
1748  """If confidence >= verbose, category passes filter and is not suppressed."""
1749
1750  # There are three ways we might decide not to print an error message:
1751  # a "NOLINT(category)" comment appears in the source,
1752  # the verbosity level isn't high enough, or the filters filter it out.
1753  if IsErrorSuppressedByNolint(category, linenum):
1754    return False
1755
1756  if confidence < _cpplint_state.verbose_level:
1757    return False
1758
1759  is_filtered = False
1760  for one_filter in _Filters():
1761    filter_cat, filter_file, filter_line = _ParseFilterSelector(one_filter[1:])
1762    category_match = category.startswith(filter_cat)
1763    file_match = filter_file == "" or filter_file == filename
1764    line_match = filter_line == linenum or filter_line == -1
1765
1766    if one_filter.startswith('-'):
1767      if category_match and file_match and line_match:
1768        is_filtered = True
1769    elif one_filter.startswith('+'):
1770      if category_match and file_match and line_match:
1771        is_filtered = False
1772    else:
1773      assert False  # should have been checked for in SetFilter.
1774  if is_filtered:
1775    return False
1776
1777  return True
1778
1779
1780def Error(filename, linenum, category, confidence, message):
1781  """Logs the fact we've found a lint error.
1782
1783  We log where the error was found, and also our confidence in the error,
1784  that is, how certain we are this is a legitimate style regression, and
1785  not a misidentification or a use that's sometimes justified.
1786
1787  False positives can be suppressed by the use of "NOLINT(category)"
1788  comments, NOLINTNEXTLINE or in blocks started by NOLINTBEGIN.  These
1789  are parsed into _error_suppressions.
1790
1791  Args:
1792    filename: The name of the file containing the error.
1793    linenum: The number of the line containing the error.
1794    category: A string used to describe the "category" this bug
1795      falls under: "whitespace", say, or "runtime".  Categories
1796      may have a hierarchy separated by slashes: "whitespace/indent".
1797    confidence: A number from 1-5 representing a confidence score for
1798      the error, with 5 meaning that we are certain of the problem,
1799      and 1 meaning that it could be a legitimate construct.
1800    message: The error message.
1801  """
1802  if _ShouldPrintError(category, confidence, filename, linenum):
1803    _cpplint_state.IncrementErrorCount(category)
1804    if _cpplint_state.output_format == 'vs7':
1805      _cpplint_state.PrintError(f'{filename}({linenum}): error cpplint:'
1806                                f' [{category}] {message} [{confidence}]\n')
1807    elif _cpplint_state.output_format == 'eclipse':
1808      sys.stderr.write(f'{filename}:{linenum}: warning:'
1809                       f' {message}  [{category}] [{confidence}]\n')
1810    elif _cpplint_state.output_format == 'junit':
1811      _cpplint_state.AddJUnitFailure(filename, linenum, message, category, confidence)
1812    elif _cpplint_state.output_format in ['sed', 'gsed']:
1813      if message in _SED_FIXUPS:
1814        sys.stdout.write(f"{_cpplint_state.output_format} -i"
1815                         f" '{linenum}{_SED_FIXUPS[message]}' {filename}"
1816                         f" # {message}  [{category}] [{confidence}]\n")
1817      else:
1818        sys.stderr.write(f'# {filename}:{linenum}: '
1819                         f' "{message}"  [{category}] [{confidence}]\n')
1820    else:
1821      final_message = (f'{filename}:{linenum}: '
1822                       f' {message}  [{category}] [{confidence}]\n')
1823      sys.stderr.write(final_message)
1824
1825
1826# Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard.
1827_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
1828    r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
1829# Match a single C style comment on the same line.
1830_RE_PATTERN_C_COMMENTS = r'/\*(?:[^*]|\*(?!/))*\*/'
1831# Matches multi-line C style comments.
1832# This RE is a little bit more complicated than one might expect, because we
1833# have to take care of space removals tools so we can handle comments inside
1834# statements better.
1835# The current rule is: We only clear spaces from both sides when we're at the
1836# end of the line. Otherwise, we try to remove spaces from the right side,
1837# if this doesn't work we try on left side but only if there's a non-character
1838# on the right.
1839_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
1840    r'(\s*' + _RE_PATTERN_C_COMMENTS + r'\s*$|' +
1841    _RE_PATTERN_C_COMMENTS + r'\s+|' +
1842    r'\s+' + _RE_PATTERN_C_COMMENTS + r'(?=\W)|' +
1843    _RE_PATTERN_C_COMMENTS + r')')
1844
1845
1846def IsCppString(line):
1847  """Does line terminate so, that the next symbol is in string constant.
1848
1849  This function does not consider single-line nor multi-line comments.
1850
1851  Args:
1852    line: is a partial line of code starting from the 0..n.
1853
1854  Returns:
1855    True, if next character appended to 'line' is inside a
1856    string constant.
1857  """
1858
1859  line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
1860  return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
1861
1862
1863def CleanseRawStrings(raw_lines):
1864  """Removes C++11 raw strings from lines.
1865
1866    Before:
1867      static const char kData[] = R"(
1868          multi-line string
1869          )";
1870
1871    After:
1872      static const char kData[] = ""
1873          (replaced by blank line)
1874          "";
1875
1876  Args:
1877    raw_lines: list of raw lines.
1878
1879  Returns:
1880    list of lines with C++11 raw strings replaced by empty strings.
1881  """
1882
1883  delimiter = None
1884  lines_without_raw_strings = []
1885  for line in raw_lines:
1886    if delimiter:
1887      # Inside a raw string, look for the end
1888      end = line.find(delimiter)
1889      if end >= 0:
1890        # Found the end of the string, match leading space for this
1891        # line and resume copying the original lines, and also insert
1892        # a "" on the last line.
1893        leading_space = re.match(r'^(\s*)\S', line)
1894        line = leading_space.group(1) + '""' + line[end + len(delimiter):]
1895        delimiter = None
1896      else:
1897        # Haven't found the end yet, append a blank line.
1898        line = '""'
1899
1900    # Look for beginning of a raw string, and replace them with
1901    # empty strings.  This is done in a loop to handle multiple raw
1902    # strings on the same line.
1903    while delimiter is None:
1904      # Look for beginning of a raw string.
1905      # See 2.14.15 [lex.string] for syntax.
1906      #
1907      # Once we have matched a raw string, we check the prefix of the
1908      # line to make sure that the line is not part of a single line
1909      # comment.  It's done this way because we remove raw strings
1910      # before removing comments as opposed to removing comments
1911      # before removing raw strings.  This is because there are some
1912      # cpplint checks that requires the comments to be preserved, but
1913      # we don't want to check comments that are inside raw strings.
1914      matched = re.match(r'^(.*?)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line)
1915      if (matched and
1916          not re.match(r'^([^\'"]|\'(\\.|[^\'])*\'|"(\\.|[^"])*")*//',
1917                    matched.group(1))):
1918        delimiter = ')' + matched.group(2) + '"'
1919
1920        end = matched.group(3).find(delimiter)
1921        if end >= 0:
1922          # Raw string ended on same line
1923          line = (matched.group(1) + '""' +
1924                  matched.group(3)[end + len(delimiter):])
1925          delimiter = None
1926        else:
1927          # Start of a multi-line raw string
1928          line = matched.group(1) + '""'
1929      else:
1930        break
1931
1932    lines_without_raw_strings.append(line)
1933
1934  # TODO(unknown): if delimiter is not None here, we might want to
1935  # emit a warning for unterminated string.
1936  return lines_without_raw_strings
1937
1938
1939def FindNextMultiLineCommentStart(lines, lineix):
1940  """Find the beginning marker for a multiline comment."""
1941  while lineix < len(lines):
1942    if lines[lineix].strip().startswith('/*'):
1943      # Only return this marker if the comment goes beyond this line
1944      if lines[lineix].strip().find('*/', 2) < 0:
1945        return lineix
1946    lineix += 1
1947  return len(lines)
1948
1949
1950def FindNextMultiLineCommentEnd(lines, lineix):
1951  """We are inside a comment, find the end marker."""
1952  while lineix < len(lines):
1953    if lines[lineix].strip().endswith('*/'):
1954      return lineix
1955    lineix += 1
1956  return len(lines)
1957
1958
1959def RemoveMultiLineCommentsFromRange(lines, begin, end):
1960  """Clears a range of lines for multi-line comments."""
1961  # Having // <empty> comments makes the lines non-empty, so we will not get
1962  # unnecessary blank line warnings later in the code.
1963  for i in range(begin, end):
1964    lines[i] = '/**/'
1965
1966
1967def RemoveMultiLineComments(filename, lines, error):
1968  """Removes multiline (c-style) comments from lines."""
1969  lineix = 0
1970  while lineix < len(lines):
1971    lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
1972    if lineix_begin >= len(lines):
1973      return
1974    lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
1975    if lineix_end >= len(lines):
1976      error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
1977            'Could not find end of multi-line comment')
1978      return
1979    RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
1980    lineix = lineix_end + 1
1981
1982
1983def CleanseComments(line):
1984  """Removes //-comments and single-line C-style /* */ comments.
1985
1986  Args:
1987    line: A line of C++ source.
1988
1989  Returns:
1990    The line with single-line comments removed.
1991  """
1992  commentpos = line.find('//')
1993  if commentpos != -1 and not IsCppString(line[:commentpos]):
1994    line = line[:commentpos].rstrip()
1995  # get rid of /* ... */
1996  return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
1997
1998
1999def ReplaceAlternateTokens(line):
2000  """Replace any alternate token by its original counterpart.
2001
2002  In order to comply with the google rule stating that unary operators should
2003  never be followed by a space, an exception is made for the 'not' and 'compl'
2004  alternate tokens. For these, any trailing space is removed during the
2005  conversion.
2006
2007  Args:
2008    line: The line being processed.
2009
2010  Returns:
2011    The line with alternate tokens replaced.
2012  """
2013  for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line):
2014    token = _ALT_TOKEN_REPLACEMENT[match.group(2)]
2015    tail = '' if match.group(2) in ['not', 'compl'] and match.group(3) == ' ' \
2016           else r'\3'
2017    line = re.sub(match.re, rf'\1{token}{tail}', line, count=1)
2018  return line
2019
2020
2021class CleansedLines(object):
2022  """Holds 4 copies of all lines with different preprocessing applied to them.
2023
2024  1) elided member contains lines without strings and comments.
2025  2) lines member contains lines without comments.
2026  3) raw_lines member contains all the lines without processing.
2027  4) lines_without_raw_strings member is same as raw_lines, but with C++11 raw
2028     strings removed.
2029  All these members are of <type 'list'>, and of the same length.
2030  """
2031
2032  def __init__(self, lines):
2033    if '-readability/alt_tokens' in _cpplint_state.filters:
2034      for i, line in enumerate(lines):
2035        lines[i] = ReplaceAlternateTokens(line)
2036    self.elided = []
2037    self.lines = []
2038    self.raw_lines = lines
2039    self.num_lines = len(lines)
2040    self.lines_without_raw_strings = CleanseRawStrings(lines)
2041    for line in self.lines_without_raw_strings:
2042      self.lines.append(CleanseComments(line))
2043      elided = self._CollapseStrings(line)
2044      self.elided.append(CleanseComments(elided))
2045
2046  def NumLines(self):
2047    """Returns the number of lines represented."""
2048    return self.num_lines
2049
2050  @staticmethod
2051  def _CollapseStrings(elided):
2052    """Collapses strings and chars on a line to simple "" or '' blocks.
2053
2054    We nix strings first so we're not fooled by text like '"http://"'
2055
2056    Args:
2057      elided: The line being processed.
2058
2059    Returns:
2060      The line with collapsed strings.
2061    """
2062    if _RE_PATTERN_INCLUDE.match(elided):
2063      return elided
2064
2065    # Remove escaped characters first to make quote/single quote collapsing
2066    # basic.  Things that look like escaped characters shouldn't occur
2067    # outside of strings and chars.
2068    elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
2069
2070    # Replace quoted strings and digit separators.  Both single quotes
2071    # and double quotes are processed in the same loop, otherwise
2072    # nested quotes wouldn't work.
2073    collapsed = ''
2074    while True:
2075      # Find the first quote character
2076      match = re.match(r'^([^\'"]*)([\'"])(.*)$', elided)
2077      if not match:
2078        collapsed += elided
2079        break
2080      head, quote, tail = match.groups()
2081
2082      if quote == '"':
2083        # Collapse double quoted strings
2084        second_quote = tail.find('"')
2085        if second_quote >= 0:
2086          collapsed += head + '""'
2087          elided = tail[second_quote + 1:]
2088        else:
2089          # Unmatched double quote, don't bother processing the rest
2090          # of the line since this is probably a multiline string.
2091          collapsed += elided
2092          break
2093      else:
2094        # Found single quote, check nearby text to eliminate digit separators.
2095        #
2096        # There is no special handling for floating point here, because
2097        # the integer/fractional/exponent parts would all be parsed
2098        # correctly as long as there are digits on both sides of the
2099        # separator.  So we are fine as long as we don't see something
2100        # like "0.'3" (gcc 4.9.0 will not allow this literal).
2101        if re.search(r'\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$', head):
2102          match_literal = re.match(r'^((?:\'?[0-9a-zA-Z_])*)(.*)$', "'" + tail)
2103          collapsed += head + match_literal.group(1).replace("'", '')
2104          elided = match_literal.group(2)
2105        else:
2106          second_quote = tail.find('\'')
2107          if second_quote >= 0:
2108            collapsed += head + "''"
2109            elided = tail[second_quote + 1:]
2110          else:
2111            # Unmatched single quote
2112            collapsed += elided
2113            break
2114
2115    return collapsed
2116
2117
2118def FindEndOfExpressionInLine(line, startpos, stack):
2119  """Find the position just after the end of current parenthesized expression.
2120
2121  Args:
2122    line: a CleansedLines line.
2123    startpos: start searching at this position.
2124    stack: nesting stack at startpos.
2125
2126  Returns:
2127    On finding matching end: (index just after matching end, None)
2128    On finding an unclosed expression: (-1, None)
2129    Otherwise: (-1, new stack at end of this line)
2130  """
2131  for i in range(startpos, len(line)):
2132    char = line[i]
2133    if char in '([{':
2134      # Found start of parenthesized expression, push to expression stack
2135      stack.append(char)
2136    elif char == '<':
2137      # Found potential start of template argument list
2138      if i > 0 and line[i - 1] == '<':
2139        # Left shift operator
2140        if stack and stack[-1] == '<':
2141          stack.pop()
2142          if not stack:
2143            return (-1, None)
2144      elif i > 0 and re.search(r'\boperator\s*$', line[0:i]):
2145        # operator<, don't add to stack
2146        continue
2147      else:
2148        # Tentative start of template argument list
2149        stack.append('<')
2150    elif char in ')]}':
2151      # Found end of parenthesized expression.
2152      #
2153      # If we are currently expecting a matching '>', the pending '<'
2154      # must have been an operator.  Remove them from expression stack.
2155      while stack and stack[-1] == '<':
2156        stack.pop()
2157      if not stack:
2158        return (-1, None)
2159      if ((stack[-1] == '(' and char == ')') or
2160          (stack[-1] == '[' and char == ']') or
2161          (stack[-1] == '{' and char == '}')):
2162        stack.pop()
2163        if not stack:
2164          return (i + 1, None)
2165      else:
2166        # Mismatched parentheses
2167        return (-1, None)
2168    elif char == '>':
2169      # Found potential end of template argument list.
2170
2171      # Ignore "->" and operator functions
2172      if (i > 0 and
2173          (line[i - 1] == '-' or re.search(r'\boperator\s*$', line[0:i - 1]))):
2174        continue
2175
2176      # Pop the stack if there is a matching '<'.  Otherwise, ignore
2177      # this '>' since it must be an operator.
2178      if stack:
2179        if stack[-1] == '<':
2180          stack.pop()
2181          if not stack:
2182            return (i + 1, None)
2183    elif char == ';':
2184      # Found something that look like end of statements.  If we are currently
2185      # expecting a '>', the matching '<' must have been an operator, since
2186      # template argument list should not contain statements.
2187      while stack and stack[-1] == '<':
2188        stack.pop()
2189      if not stack:
2190        return (-1, None)
2191
2192  # Did not find end of expression or unbalanced parentheses on this line
2193  return (-1, stack)
2194
2195
2196def CloseExpression(clean_lines, linenum, pos):
2197  """If input points to ( or { or [ or <, finds the position that closes it.
2198
2199  If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the
2200  linenum/pos that correspond to the closing of the expression.
2201
2202  TODO(unknown): cpplint spends a fair bit of time matching parentheses.
2203  Ideally we would want to index all opening and closing parentheses once
2204  and have CloseExpression be just a simple lookup, but due to preprocessor
2205  tricks, this is not so easy.
2206
2207  Args:
2208    clean_lines: A CleansedLines instance containing the file.
2209    linenum: The number of the line to check.
2210    pos: A position on the line.
2211
2212  Returns:
2213    A tuple (line, linenum, pos) pointer *past* the closing brace, or
2214    (line, len(lines), -1) if we never find a close.  Note we ignore
2215    strings and comments when matching; and the line we return is the
2216    'cleansed' line at linenum.
2217  """
2218
2219  line = clean_lines.elided[linenum]
2220  if (line[pos] not in '({[<') or re.match(r'<[<=]', line[pos:]):
2221    return (line, clean_lines.NumLines(), -1)
2222
2223  # Check first line
2224  (end_pos, stack) = FindEndOfExpressionInLine(line, pos, [])
2225  if end_pos > -1:
2226    return (line, linenum, end_pos)
2227
2228  # Continue scanning forward
2229  while stack and linenum < clean_lines.NumLines() - 1:
2230    linenum += 1
2231    line = clean_lines.elided[linenum]
2232    (end_pos, stack) = FindEndOfExpressionInLine(line, 0, stack)
2233    if end_pos > -1:
2234      return (line, linenum, end_pos)
2235
2236  # Did not find end of expression before end of file, give up
2237  return (line, clean_lines.NumLines(), -1)
2238
2239
2240def FindStartOfExpressionInLine(line, endpos, stack):
2241  """Find position at the matching start of current expression.
2242
2243  This is almost the reverse of FindEndOfExpressionInLine, but note
2244  that the input position and returned position differs by 1.
2245
2246  Args:
2247    line: a CleansedLines line.
2248    endpos: start searching at this position.
2249    stack: nesting stack at endpos.
2250
2251  Returns:
2252    On finding matching start: (index at matching start, None)
2253    On finding an unclosed expression: (-1, None)
2254    Otherwise: (-1, new stack at beginning of this line)
2255  """
2256  i = endpos
2257  while i >= 0:
2258    char = line[i]
2259    if char in ')]}':
2260      # Found end of expression, push to expression stack
2261      stack.append(char)
2262    elif char == '>':
2263      # Found potential end of template argument list.
2264      #
2265      # Ignore it if it's a "->" or ">=" or "operator>"
2266      if (i > 0 and
2267          (line[i - 1] == '-' or
2268           re.match(r'\s>=\s', line[i - 1:]) or
2269           re.search(r'\boperator\s*$', line[0:i]))):
2270        i -= 1
2271      else:
2272        stack.append('>')
2273    elif char == '<':
2274      # Found potential start of template argument list
2275      if i > 0 and line[i - 1] == '<':
2276        # Left shift operator
2277        i -= 1
2278      else:
2279        # If there is a matching '>', we can pop the expression stack.
2280        # Otherwise, ignore this '<' since it must be an operator.
2281        if stack and stack[-1] == '>':
2282          stack.pop()
2283          if not stack:
2284            return (i, None)
2285    elif char in '([{':
2286      # Found start of expression.
2287      #
2288      # If there are any unmatched '>' on the stack, they must be
2289      # operators.  Remove those.
2290      while stack and stack[-1] == '>':
2291        stack.pop()
2292      if not stack:
2293        return (-1, None)
2294      if ((char == '(' and stack[-1] == ')') or
2295          (char == '[' and stack[-1] == ']') or
2296          (char == '{' and stack[-1] == '}')):
2297        stack.pop()
2298        if not stack:
2299          return (i, None)
2300      else:
2301        # Mismatched parentheses
2302        return (-1, None)
2303    elif char == ';':
2304      # Found something that look like end of statements.  If we are currently
2305      # expecting a '<', the matching '>' must have been an operator, since
2306      # template argument list should not contain statements.
2307      while stack and stack[-1] == '>':
2308        stack.pop()
2309      if not stack:
2310        return (-1, None)
2311
2312    i -= 1
2313
2314  return (-1, stack)
2315
2316
2317def ReverseCloseExpression(clean_lines, linenum, pos):
2318  """If input points to ) or } or ] or >, finds the position that opens it.
2319
2320  If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the
2321  linenum/pos that correspond to the opening of the expression.
2322
2323  Args:
2324    clean_lines: A CleansedLines instance containing the file.
2325    linenum: The number of the line to check.
2326    pos: A position on the line.
2327
2328  Returns:
2329    A tuple (line, linenum, pos) pointer *at* the opening brace, or
2330    (line, 0, -1) if we never find the matching opening brace.  Note
2331    we ignore strings and comments when matching; and the line we
2332    return is the 'cleansed' line at linenum.
2333  """
2334  line = clean_lines.elided[linenum]
2335  if line[pos] not in ')}]>':
2336    return (line, 0, -1)
2337
2338  # Check last line
2339  (start_pos, stack) = FindStartOfExpressionInLine(line, pos, [])
2340  if start_pos > -1:
2341    return (line, linenum, start_pos)
2342
2343  # Continue scanning backward
2344  while stack and linenum > 0:
2345    linenum -= 1
2346    line = clean_lines.elided[linenum]
2347    (start_pos, stack) = FindStartOfExpressionInLine(line, len(line) - 1, stack)
2348    if start_pos > -1:
2349      return (line, linenum, start_pos)
2350
2351  # Did not find start of expression before beginning of file, give up
2352  return (line, 0, -1)
2353
2354
2355def CheckForCopyright(filename, lines, error):
2356  """Logs an error if no Copyright message appears at the top of the file."""
2357
2358  # We'll say it should occur by line 10. Don't forget there's a
2359  # placeholder line at the front.
2360  for line in range(1, min(len(lines), 11)):
2361    if re.search(r'Copyright', lines[line], re.I): break
2362  else:                       # means no copyright line was found
2363    error(filename, 0, 'legal/copyright', 5,
2364          'No copyright message found.  '
2365          'You should have a line: "Copyright [year] <Copyright Owner>"')
2366
2367
2368def GetIndentLevel(line):
2369  """Return the number of leading spaces in line.
2370
2371  Args:
2372    line: A string to check.
2373
2374  Returns:
2375    An integer count of leading spaces, possibly zero.
2376  """
2377  indent = re.match(r'^( *)\S', line)
2378  if indent:
2379    return len(indent.group(1))
2380  else:
2381    return 0
2382
2383def PathSplitToList(path):
2384  """Returns the path split into a list by the separator.
2385
2386  Args:
2387    path: An absolute or relative path (e.g. '/a/b/c/' or '../a')
2388
2389  Returns:
2390    A list of path components (e.g. ['a', 'b', 'c]).
2391  """
2392  lst = []
2393  while True:
2394    (head, tail) = os.path.split(path)
2395    if head == path:  # absolute paths end
2396      lst.append(head)
2397      break
2398    if tail == path:  # relative paths end
2399      lst.append(tail)
2400      break
2401
2402    path = head
2403    lst.append(tail)
2404
2405  lst.reverse()
2406  return lst
2407
2408def GetHeaderGuardCPPVariable(filename):
2409  """Returns the CPP variable that should be used as a header guard.
2410
2411  Args:
2412    filename: The name of a C++ header file.
2413
2414  Returns:
2415    The CPP variable that should be used as a header guard in the
2416    named file.
2417
2418  """
2419
2420  # Restores original filename in case that cpplint is invoked from Emacs's
2421  # flymake.
2422  filename = re.sub(r'_flymake\.h$', '.h', filename)
2423  filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename)
2424  # Replace 'c++' with 'cpp'.
2425  filename = filename.replace('C++', 'cpp').replace('c++', 'cpp')
2426
2427  fileinfo = FileInfo(filename)
2428  file_path_from_root = fileinfo.RepositoryName()
2429
2430  def FixupPathFromRoot():
2431    if _root_debug:
2432      sys.stderr.write(f"\n_root fixup, _root = '{_root}',"
2433                       f" repository name = '{fileinfo.RepositoryName()}'\n")
2434
2435    # Process the file path with the --root flag if it was set.
2436    if not _root:
2437      if _root_debug:
2438        sys.stderr.write("_root unspecified\n")
2439      return file_path_from_root
2440
2441    def StripListPrefix(lst, prefix):
2442      # f(['x', 'y'], ['w, z']) -> None  (not a valid prefix)
2443      if lst[:len(prefix)] != prefix:
2444        return None
2445      # f(['a, 'b', 'c', 'd'], ['a', 'b']) -> ['c', 'd']
2446      return lst[(len(prefix)):]
2447
2448    # root behavior:
2449    #   --root=subdir , lstrips subdir from the header guard
2450    maybe_path = StripListPrefix(PathSplitToList(file_path_from_root),
2451                                 PathSplitToList(_root))
2452
2453    if _root_debug:
2454      sys.stderr.write(("_root lstrip (maybe_path=%s, file_path_from_root=%s," +
2455          " _root=%s)\n") % (maybe_path, file_path_from_root, _root))
2456
2457    if maybe_path:
2458      return os.path.join(*maybe_path)
2459
2460    #   --root=.. , will prepend the outer directory to the header guard
2461    full_path = fileinfo.FullName()
2462    # adapt slashes for windows
2463    root_abspath = os.path.abspath(_root).replace('\\', '/')
2464
2465    maybe_path = StripListPrefix(PathSplitToList(full_path),
2466                                 PathSplitToList(root_abspath))
2467
2468    if _root_debug:
2469      sys.stderr.write(("_root prepend (maybe_path=%s, full_path=%s, " +
2470          "root_abspath=%s)\n") % (maybe_path, full_path, root_abspath))
2471
2472    if maybe_path:
2473      return os.path.join(*maybe_path)
2474
2475    if _root_debug:
2476      sys.stderr.write(f"_root ignore, returning {file_path_from_root}\n")
2477
2478    #   --root=FAKE_DIR is ignored
2479    return file_path_from_root
2480
2481  file_path_from_root = FixupPathFromRoot()
2482  return re.sub(r'[^a-zA-Z0-9]', '_', file_path_from_root).upper() + '_'
2483
2484
2485def CheckForHeaderGuard(filename, clean_lines, error, cppvar):
2486  """Checks that the file contains a header guard.
2487
2488  Logs an error if no #ifndef header guard is present.  For other
2489  headers, checks that the full pathname is used.
2490
2491  Args:
2492    filename: The name of the C++ header file.
2493    clean_lines: A CleansedLines instance containing the file.
2494    error: The function to call with any errors found.
2495  """
2496
2497  # Don't check for header guards if there are error suppression
2498  # comments somewhere in this file.
2499  #
2500  # Because this is silencing a warning for a nonexistent line, we
2501  # only support the very specific NOLINT(build/header_guard) syntax,
2502  # and not the general NOLINT or NOLINT(*) syntax.
2503  raw_lines = clean_lines.lines_without_raw_strings
2504  for i in raw_lines:
2505    if re.search(r'//\s*NOLINT\(build/header_guard\)', i):
2506      return
2507
2508  # Allow pragma once instead of header guards
2509  for i in raw_lines:
2510    if re.search(r'^\s*#pragma\s+once', i):
2511      return
2512
2513  ifndef = ''
2514  ifndef_linenum = 0
2515  define = ''
2516  endif = ''
2517  endif_linenum = 0
2518  for linenum, line in enumerate(raw_lines):
2519    linesplit = line.split()
2520    if len(linesplit) >= 2:
2521      # find the first occurrence of #ifndef and #define, save arg
2522      if not ifndef and linesplit[0] == '#ifndef':
2523        # set ifndef to the header guard presented on the #ifndef line.
2524        ifndef = linesplit[1]
2525        ifndef_linenum = linenum
2526      if not define and linesplit[0] == '#define':
2527        define = linesplit[1]
2528    # find the last occurrence of #endif, save entire line
2529    if line.startswith('#endif'):
2530      endif = line
2531      endif_linenum = linenum
2532
2533  if not ifndef or not define or ifndef != define:
2534    error(filename, 0, 'build/header_guard', 5,
2535          f'No #ifndef header guard found, suggested CPP variable is: {cppvar}')
2536    return
2537
2538  # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
2539  # for backward compatibility.
2540  if ifndef != cppvar:
2541    error_level = 0
2542    if ifndef != cppvar + '_':
2543      error_level = 5
2544
2545    ParseNolintSuppressions(filename, raw_lines[ifndef_linenum], ifndef_linenum,
2546                            error)
2547    error(filename, ifndef_linenum, 'build/header_guard', error_level,
2548          f'#ifndef header guard has wrong style, please use: {cppvar}')
2549
2550  # Check for "//" comments on endif line.
2551  ParseNolintSuppressions(filename, raw_lines[endif_linenum], endif_linenum,
2552                          error)
2553  match = re.match(r'#endif\s*//\s*' + cppvar + r'(_)?\b', endif)
2554  if match:
2555    if match.group(1) == '_':
2556      # Issue low severity warning for deprecated double trailing underscore
2557      error(filename, endif_linenum, 'build/header_guard', 0,
2558            f'#endif line should be "#endif  // {cppvar}"')
2559    return
2560
2561  # Didn't find the corresponding "//" comment.  If this file does not
2562  # contain any "//" comments at all, it could be that the compiler
2563  # only wants "/**/" comments, look for those instead.
2564  no_single_line_comments = True
2565  for i in range(1, len(raw_lines) - 1):
2566    line = raw_lines[i]
2567    if re.match(r'^(?:(?:\'(?:\.|[^\'])*\')|(?:"(?:\.|[^"])*")|[^\'"])*//', line):
2568      no_single_line_comments = False
2569      break
2570
2571  if no_single_line_comments:
2572    match = re.match(r'#endif\s*/\*\s*' + cppvar + r'(_)?\s*\*/', endif)
2573    if match:
2574      if match.group(1) == '_':
2575        # Low severity warning for double trailing underscore
2576        error(filename, endif_linenum, 'build/header_guard', 0,
2577              f'#endif line should be "#endif  /* {cppvar} */"')
2578      return
2579
2580  # Didn't find anything
2581  error(filename, endif_linenum, 'build/header_guard', 5,
2582        f'#endif line should be "#endif  // {cppvar}"')
2583
2584
2585def CheckHeaderFileIncluded(filename, include_state, error):
2586  """Logs an error if a source file does not include its header."""
2587
2588  # Do not check test files
2589  fileinfo = FileInfo(filename)
2590  if re.search(_TEST_FILE_SUFFIX, fileinfo.BaseName()):
2591    return
2592
2593  first_include = message = None
2594  basefilename = filename[0:len(filename) - len(fileinfo.Extension())]
2595  for ext in GetHeaderExtensions():
2596    headerfile = basefilename + '.' + ext
2597    if not os.path.exists(headerfile):
2598      continue
2599    headername = FileInfo(headerfile).RepositoryName()
2600    include_uses_unix_dir_aliases = False
2601    for section_list in include_state.include_list:
2602      for f in section_list:
2603        include_text = f[0]
2604        if "./" in include_text:
2605          include_uses_unix_dir_aliases = True
2606        if headername in include_text or include_text in headername:
2607          return
2608        if not first_include:
2609          first_include = f[1]
2610
2611    message = f'{fileinfo.RepositoryName()} should include its header file {headername}'
2612    if include_uses_unix_dir_aliases:
2613      message += ". Relative paths like . and .. are not allowed."
2614
2615  if message:
2616    error(filename, first_include, 'build/include', 5, message)
2617
2618
2619def CheckForBadCharacters(filename, lines, error):
2620  """Logs an error for each line containing bad characters.
2621
2622  Two kinds of bad characters:
2623
2624  1. Unicode replacement characters: These indicate that either the file
2625  contained invalid UTF-8 (likely) or Unicode replacement characters (which
2626  it shouldn't).  Note that it's possible for this to throw off line
2627  numbering if the invalid UTF-8 occurred adjacent to a newline.
2628
2629  2. NUL bytes.  These are problematic for some tools.
2630
2631  Args:
2632    filename: The name of the current file.
2633    lines: An array of strings, each representing a line of the file.
2634    error: The function to call with any errors found.
2635  """
2636  for linenum, line in enumerate(lines):
2637    if '\ufffd' in line:
2638      error(filename, linenum, 'readability/utf8', 5,
2639            'Line contains invalid UTF-8 (or Unicode replacement character).')
2640    if '\0' in line:
2641      error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.')
2642
2643
2644def CheckForNewlineAtEOF(filename, lines, error):
2645  """Logs an error if there is no newline char at the end of the file.
2646
2647  Args:
2648    filename: The name of the current file.
2649    lines: An array of strings, each representing a line of the file.
2650    error: The function to call with any errors found.
2651  """
2652
2653  # The array lines() was created by adding two newlines to the
2654  # original file (go figure), then splitting on \n.
2655  # To verify that the file ends in \n, we just have to make sure the
2656  # last-but-two element of lines() exists and is empty.
2657  if len(lines) < 3 or lines[-2]:
2658    error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
2659          'Could not find a newline character at the end of the file.')
2660
2661
2662def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
2663  """Logs an error if we see /* ... */ or "..." that extend past one line.
2664
2665  /* ... */ comments are legit inside macros, for one line.
2666  Otherwise, we prefer // comments, so it's ok to warn about the
2667  other.  Likewise, it's ok for strings to extend across multiple
2668  lines, as long as a line continuation character (backslash)
2669  terminates each line. Although not currently prohibited by the C++
2670  style guide, it's ugly and unnecessary. We don't do well with either
2671  in this lint program, so we warn about both.
2672
2673  Args:
2674    filename: The name of the current file.
2675    clean_lines: A CleansedLines instance containing the file.
2676    linenum: The number of the line to check.
2677    error: The function to call with any errors found.
2678  """
2679  line = clean_lines.elided[linenum]
2680
2681  # Remove all \\ (escaped backslashes) from the line. They are OK, and the
2682  # second (escaped) slash may trigger later \" detection erroneously.
2683  line = line.replace('\\\\', '')
2684
2685  if line.count('/*') > line.count('*/'):
2686    error(filename, linenum, 'readability/multiline_comment', 5,
2687          'Complex multi-line /*...*/-style comment found. '
2688          'Lint may give bogus warnings.  '
2689          'Consider replacing these with //-style comments, '
2690          'with #if 0...#endif, '
2691          'or with more clearly structured multi-line comments.')
2692
2693  if (line.count('"') - line.count('\\"')) % 2:
2694    error(filename, linenum, 'readability/multiline_string', 5,
2695          'Multi-line string ("...") found.  This lint script doesn\'t '
2696          'do well with such strings, and may give bogus warnings.  '
2697          'Use C++11 raw strings or concatenation instead.')
2698
2699
2700# (non-threadsafe name, thread-safe alternative, validation pattern)
2701#
2702# The validation pattern is used to eliminate false positives such as:
2703#  _rand();               // false positive due to substring match.
2704#  ->rand();              // some member function rand().
2705#  ACMRandom rand(seed);  // some variable named rand.
2706#  ISAACRandom rand();    // another variable named rand.
2707#
2708# Basically we require the return value of these functions to be used
2709# in some expression context on the same line by matching on some
2710# operator before the function name.  This eliminates constructors and
2711# member function calls.
2712_UNSAFE_FUNC_PREFIX = r'(?:[-+*/=%^&|(<]\s*|>\s+)'
2713_THREADING_LIST = (
2714    ('asctime(', 'asctime_r(', _UNSAFE_FUNC_PREFIX + r'asctime\([^)]+\)'),
2715    ('ctime(', 'ctime_r(', _UNSAFE_FUNC_PREFIX + r'ctime\([^)]+\)'),
2716    ('getgrgid(', 'getgrgid_r(', _UNSAFE_FUNC_PREFIX + r'getgrgid\([^)]+\)'),
2717    ('getgrnam(', 'getgrnam_r(', _UNSAFE_FUNC_PREFIX + r'getgrnam\([^)]+\)'),
2718    ('getlogin(', 'getlogin_r(', _UNSAFE_FUNC_PREFIX + r'getlogin\(\)'),
2719    ('getpwnam(', 'getpwnam_r(', _UNSAFE_FUNC_PREFIX + r'getpwnam\([^)]+\)'),
2720    ('getpwuid(', 'getpwuid_r(', _UNSAFE_FUNC_PREFIX + r'getpwuid\([^)]+\)'),
2721    ('gmtime(', 'gmtime_r(', _UNSAFE_FUNC_PREFIX + r'gmtime\([^)]+\)'),
2722    ('localtime(', 'localtime_r(', _UNSAFE_FUNC_PREFIX + r'localtime\([^)]+\)'),
2723    ('rand(', 'rand_r(', _UNSAFE_FUNC_PREFIX + r'rand\(\)'),
2724    ('strtok(', 'strtok_r(',
2725     _UNSAFE_FUNC_PREFIX + r'strtok\([^)]+\)'),
2726    ('ttyname(', 'ttyname_r(', _UNSAFE_FUNC_PREFIX + r'ttyname\([^)]+\)'),
2727    )
2728
2729
2730def CheckPosixThreading(filename, clean_lines, linenum, error):
2731  """Checks for calls to thread-unsafe functions.
2732
2733  Much code has been originally written without consideration of
2734  multi-threading. Also, engineers are relying on their old experience;
2735  they have learned posix before threading extensions were added. These
2736  tests guide the engineers to use thread-safe functions (when using
2737  posix directly).
2738
2739  Args:
2740    filename: The name of the current file.
2741    clean_lines: A CleansedLines instance containing the file.
2742    linenum: The number of the line to check.
2743    error: The function to call with any errors found.
2744  """
2745  line = clean_lines.elided[linenum]
2746  for single_thread_func, multithread_safe_func, pattern in _THREADING_LIST:
2747    # Additional pattern matching check to confirm that this is the
2748    # function we are looking for
2749    if re.search(pattern, line):
2750      error(filename, linenum, 'runtime/threadsafe_fn', 2,
2751            'Consider using ' + multithread_safe_func +
2752            '...) instead of ' + single_thread_func +
2753            '...) for improved thread safety.')
2754
2755
2756def CheckVlogArguments(filename, clean_lines, linenum, error):
2757  """Checks that VLOG() is only used for defining a logging level.
2758
2759  For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and
2760  VLOG(FATAL) are not.
2761
2762  Args:
2763    filename: The name of the current file.
2764    clean_lines: A CleansedLines instance containing the file.
2765    linenum: The number of the line to check.
2766    error: The function to call with any errors found.
2767  """
2768  line = clean_lines.elided[linenum]
2769  if re.search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line):
2770    error(filename, linenum, 'runtime/vlog', 5,
2771          'VLOG() should be used with numeric verbosity level.  '
2772          'Use LOG() if you want symbolic severity levels.')
2773
2774# Matches invalid increment: *count++, which moves pointer instead of
2775# incrementing a value.
2776_RE_PATTERN_INVALID_INCREMENT = re.compile(
2777    r'^\s*\*\w+(\+\+|--);')
2778
2779
2780def CheckInvalidIncrement(filename, clean_lines, linenum, error):
2781  """Checks for invalid increment *count++.
2782
2783  For example following function:
2784  void increment_counter(int* count) {
2785    *count++;
2786  }
2787  is invalid, because it effectively does count++, moving pointer, and should
2788  be replaced with ++*count, (*count)++ or *count += 1.
2789
2790  Args:
2791    filename: The name of the current file.
2792    clean_lines: A CleansedLines instance containing the file.
2793    linenum: The number of the line to check.
2794    error: The function to call with any errors found.
2795  """
2796  line = clean_lines.elided[linenum]
2797  if _RE_PATTERN_INVALID_INCREMENT.match(line):
2798    error(filename, linenum, 'runtime/invalid_increment', 5,
2799          'Changing pointer instead of value (or unused value of operator*).')
2800
2801
2802def IsMacroDefinition(clean_lines, linenum):
2803  if re.search(r'^#define', clean_lines[linenum]):
2804    return True
2805
2806  if linenum > 0 and re.search(r'\\$', clean_lines[linenum - 1]):
2807    return True
2808
2809  return False
2810
2811
2812def IsForwardClassDeclaration(clean_lines, linenum):
2813  return re.match(r'^\s*(\btemplate\b)*.*class\s+\w+;\s*$', clean_lines[linenum])
2814
2815
2816class _BlockInfo(object):
2817  """Stores information about a generic block of code."""
2818
2819  def __init__(self, linenum, seen_open_brace):
2820    self.starting_linenum = linenum
2821    self.seen_open_brace = seen_open_brace
2822    self.open_parentheses = 0
2823    self.inline_asm = _NO_ASM
2824    self.check_namespace_indentation = False
2825
2826  def CheckBegin(self, filename, clean_lines, linenum, error):
2827    """Run checks that applies to text up to the opening brace.
2828
2829    This is mostly for checking the text after the class identifier
2830    and the "{", usually where the base class is specified.  For other
2831    blocks, there isn't much to check, so we always pass.
2832
2833    Args:
2834      filename: The name of the current file.
2835      clean_lines: A CleansedLines instance containing the file.
2836      linenum: The number of the line to check.
2837      error: The function to call with any errors found.
2838    """
2839    pass
2840
2841  def CheckEnd(self, filename, clean_lines, linenum, error):
2842    """Run checks that applies to text after the closing brace.
2843
2844    This is mostly used for checking end of namespace comments.
2845
2846    Args:
2847      filename: The name of the current file.
2848      clean_lines: A CleansedLines instance containing the file.
2849      linenum: The number of the line to check.
2850      error: The function to call with any errors found.
2851    """
2852    pass
2853
2854  def IsBlockInfo(self):
2855    """Returns true if this block is a _BlockInfo.
2856
2857    This is convenient for verifying that an object is an instance of
2858    a _BlockInfo, but not an instance of any of the derived classes.
2859
2860    Returns:
2861      True for this class, False for derived classes.
2862    """
2863    return self.__class__ == _BlockInfo
2864
2865
2866class _ExternCInfo(_BlockInfo):
2867  """Stores information about an 'extern "C"' block."""
2868
2869  def __init__(self, linenum):
2870    _BlockInfo.__init__(self, linenum, True)
2871
2872
2873class _ClassInfo(_BlockInfo):
2874  """Stores information about a class."""
2875
2876  def __init__(self, name, class_or_struct, clean_lines, linenum):
2877    _BlockInfo.__init__(self, linenum, False)
2878    self.name = name
2879    self.is_derived = False
2880    self.check_namespace_indentation = True
2881    if class_or_struct == 'struct':
2882      self.access = 'public'
2883      self.is_struct = True
2884    else:
2885      self.access = 'private'
2886      self.is_struct = False
2887
2888    # Remember initial indentation level for this class.  Using raw_lines here
2889    # instead of elided to account for leading comments.
2890    self.class_indent = GetIndentLevel(clean_lines.raw_lines[linenum])
2891
2892    # Try to find the end of the class.  This will be confused by things like:
2893    #   class A {
2894    #   } *x = { ...
2895    #
2896    # But it's still good enough for CheckSectionSpacing.
2897    self.last_line = 0
2898    depth = 0
2899    for i in range(linenum, clean_lines.NumLines()):
2900      line = clean_lines.elided[i]
2901      depth += line.count('{') - line.count('}')
2902      if not depth:
2903        self.last_line = i
2904        break
2905
2906  def CheckBegin(self, filename, clean_lines, linenum, error):
2907    # Look for a bare ':'
2908    if re.search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]):
2909      self.is_derived = True
2910
2911  def CheckEnd(self, filename, clean_lines, linenum, error):
2912    # If there is a DISALLOW macro, it should appear near the end of
2913    # the class.
2914    seen_last_thing_in_class = False
2915    for i in range(linenum - 1, self.starting_linenum, -1):
2916      match = re.search(
2917          r'\b(DISALLOW_COPY_AND_ASSIGN|DISALLOW_IMPLICIT_CONSTRUCTORS)\(' +
2918          self.name + r'\)',
2919          clean_lines.elided[i])
2920      if match:
2921        if seen_last_thing_in_class:
2922          error(filename, i, 'readability/constructors', 3,
2923                match.group(1) + ' should be the last thing in the class')
2924        break
2925
2926      if not re.match(r'^\s*$', clean_lines.elided[i]):
2927        seen_last_thing_in_class = True
2928
2929    # Check that closing brace is aligned with beginning of the class.
2930    # Only do this if the closing brace is indented by only whitespaces.
2931    # This means we will not check single-line class definitions.
2932    indent = re.match(r'^( *)\}', clean_lines.elided[linenum])
2933    if indent and len(indent.group(1)) != self.class_indent:
2934      if self.is_struct:
2935        parent = 'struct ' + self.name
2936      else:
2937        parent = 'class ' + self.name
2938      error(filename, linenum, 'whitespace/indent', 3,
2939            f'Closing brace should be aligned with beginning of {parent}')
2940
2941
2942class _NamespaceInfo(_BlockInfo):
2943  """Stores information about a namespace."""
2944
2945  def __init__(self, name, linenum):
2946    _BlockInfo.__init__(self, linenum, False)
2947    self.name = name or ''
2948    self.check_namespace_indentation = True
2949
2950  def CheckEnd(self, filename, clean_lines, linenum, error):
2951    """Check end of namespace comments."""
2952    line = clean_lines.raw_lines[linenum]
2953
2954    # Check how many lines is enclosed in this namespace.  Don't issue
2955    # warning for missing namespace comments if there aren't enough
2956    # lines.  However, do apply checks if there is already an end of
2957    # namespace comment and it's incorrect.
2958    #
2959    # TODO(unknown): We always want to check end of namespace comments
2960    # if a namespace is large, but sometimes we also want to apply the
2961    # check if a short namespace contained nontrivial things (something
2962    # other than forward declarations).  There is currently no logic on
2963    # deciding what these nontrivial things are, so this check is
2964    # triggered by namespace size only, which works most of the time.
2965    if (linenum - self.starting_linenum < 10
2966        and not re.match(r'^\s*};*\s*(//|/\*).*\bnamespace\b', line)):
2967      return
2968
2969    # Look for matching comment at end of namespace.
2970    #
2971    # Note that we accept C style "/* */" comments for terminating
2972    # namespaces, so that code that terminate namespaces inside
2973    # preprocessor macros can be cpplint clean.
2974    #
2975    # We also accept stuff like "// end of namespace <name>." with the
2976    # period at the end.
2977    #
2978    # Besides these, we don't accept anything else, otherwise we might
2979    # get false negatives when existing comment is a substring of the
2980    # expected namespace.
2981    if self.name:
2982      # Named namespace
2983      if not re.match((r'^\s*};*\s*(//|/\*).*\bnamespace\s+' +
2984                    re.escape(self.name) + r'[\*/\.\\\s]*$'),
2985                   line):
2986        error(filename, linenum, 'readability/namespace', 5,
2987              f'Namespace should be terminated with "// namespace {self.name}"')
2988    else:
2989      # Anonymous namespace
2990      if not re.match(r'^\s*};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line):
2991        # If "// namespace anonymous" or "// anonymous namespace (more text)",
2992        # mention "// anonymous namespace" as an acceptable form
2993        if re.match(r'^\s*}.*\b(namespace anonymous|anonymous namespace)\b', line):
2994          error(filename, linenum, 'readability/namespace', 5,
2995                'Anonymous namespace should be terminated with "// namespace"'
2996                ' or "// anonymous namespace"')
2997        else:
2998          error(filename, linenum, 'readability/namespace', 5,
2999                'Anonymous namespace should be terminated with "// namespace"')
3000
3001
3002class _PreprocessorInfo(object):
3003  """Stores checkpoints of nesting stacks when #if/#else is seen."""
3004
3005  def __init__(self, stack_before_if):
3006    # The entire nesting stack before #if
3007    self.stack_before_if = stack_before_if
3008
3009    # The entire nesting stack up to #else
3010    self.stack_before_else = []
3011
3012    # Whether we have already seen #else or #elif
3013    self.seen_else = False
3014
3015
3016class NestingState(object):
3017  """Holds states related to parsing braces."""
3018
3019  def __init__(self):
3020    # Stack for tracking all braces.  An object is pushed whenever we
3021    # see a "{", and popped when we see a "}".  Only 3 types of
3022    # objects are possible:
3023    # - _ClassInfo: a class or struct.
3024    # - _NamespaceInfo: a namespace.
3025    # - _BlockInfo: some other type of block.
3026    self.stack = []
3027
3028    # Top of the previous stack before each Update().
3029    #
3030    # Because the nesting_stack is updated at the end of each line, we
3031    # had to do some convoluted checks to find out what is the current
3032    # scope at the beginning of the line.  This check is simplified by
3033    # saving the previous top of nesting stack.
3034    #
3035    # We could save the full stack, but we only need the top.  Copying
3036    # the full nesting stack would slow down cpplint by ~10%.
3037    self.previous_stack_top = []
3038
3039    # Stack of _PreprocessorInfo objects.
3040    self.pp_stack = []
3041
3042  def SeenOpenBrace(self):
3043    """Check if we have seen the opening brace for the innermost block.
3044
3045    Returns:
3046      True if we have seen the opening brace, False if the innermost
3047      block is still expecting an opening brace.
3048    """
3049    return (not self.stack) or self.stack[-1].seen_open_brace
3050
3051  def InNamespaceBody(self):
3052    """Check if we are currently one level inside a namespace body.
3053
3054    Returns:
3055      True if top of the stack is a namespace block, False otherwise.
3056    """
3057    return self.stack and isinstance(self.stack[-1], _NamespaceInfo)
3058
3059  def InExternC(self):
3060    """Check if we are currently one level inside an 'extern "C"' block.
3061
3062    Returns:
3063      True if top of the stack is an extern block, False otherwise.
3064    """
3065    return self.stack and isinstance(self.stack[-1], _ExternCInfo)
3066
3067  def InClassDeclaration(self):
3068    """Check if we are currently one level inside a class or struct declaration.
3069
3070    Returns:
3071      True if top of the stack is a class/struct, False otherwise.
3072    """
3073    return self.stack and isinstance(self.stack[-1], _ClassInfo)
3074
3075  def InAsmBlock(self):
3076    """Check if we are currently one level inside an inline ASM block.
3077
3078    Returns:
3079      True if the top of the stack is a block containing inline ASM.
3080    """
3081    return self.stack and self.stack[-1].inline_asm != _NO_ASM
3082
3083  def InTemplateArgumentList(self, clean_lines, linenum, pos):
3084    """Check if current position is inside template argument list.
3085
3086    Args:
3087      clean_lines: A CleansedLines instance containing the file.
3088      linenum: The number of the line to check.
3089      pos: position just after the suspected template argument.
3090    Returns:
3091      True if (linenum, pos) is inside template arguments.
3092    """
3093    while linenum < clean_lines.NumLines():
3094      # Find the earliest character that might indicate a template argument
3095      line = clean_lines.elided[linenum]
3096      match = re.match(r'^[^{};=\[\]\.<>]*(.)', line[pos:])
3097      if not match:
3098        linenum += 1
3099        pos = 0
3100        continue
3101      token = match.group(1)
3102      pos += len(match.group(0))
3103
3104      # These things do not look like template argument list:
3105      #   class Suspect {
3106      #   class Suspect x; }
3107      if token in ('{', '}', ';'): return False
3108
3109      # These things look like template argument list:
3110      #   template <class Suspect>
3111      #   template <class Suspect = default_value>
3112      #   template <class Suspect[]>
3113      #   template <class Suspect...>
3114      if token in ('>', '=', '[', ']', '.'): return True
3115
3116      # Check if token is an unmatched '<'.
3117      # If not, move on to the next character.
3118      if token != '<':
3119        pos += 1
3120        if pos >= len(line):
3121          linenum += 1
3122          pos = 0
3123        continue
3124
3125      # We can't be sure if we just find a single '<', and need to
3126      # find the matching '>'.
3127      (_, end_line, end_pos) = CloseExpression(clean_lines, linenum, pos - 1)
3128      if end_pos < 0:
3129        # Not sure if template argument list or syntax error in file
3130        return False
3131      linenum = end_line
3132      pos = end_pos
3133    return False
3134
3135  def UpdatePreprocessor(self, line):
3136    """Update preprocessor stack.
3137
3138    We need to handle preprocessors due to classes like this:
3139      #ifdef SWIG
3140      struct ResultDetailsPageElementExtensionPoint {
3141      #else
3142      struct ResultDetailsPageElementExtensionPoint : public Extension {
3143      #endif
3144
3145    We make the following assumptions (good enough for most files):
3146    - Preprocessor condition evaluates to true from #if up to first
3147      #else/#elif/#endif.
3148
3149    - Preprocessor condition evaluates to false from #else/#elif up
3150      to #endif.  We still perform lint checks on these lines, but
3151      these do not affect nesting stack.
3152
3153    Args:
3154      line: current line to check.
3155    """
3156    if re.match(r'^\s*#\s*(if|ifdef|ifndef)\b', line):
3157      # Beginning of #if block, save the nesting stack here.  The saved
3158      # stack will allow us to restore the parsing state in the #else case.
3159      self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack)))
3160    elif re.match(r'^\s*#\s*(else|elif)\b', line):
3161      # Beginning of #else block
3162      if self.pp_stack:
3163        if not self.pp_stack[-1].seen_else:
3164          # This is the first #else or #elif block.  Remember the
3165          # whole nesting stack up to this point.  This is what we
3166          # keep after the #endif.
3167          self.pp_stack[-1].seen_else = True
3168          self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack)
3169
3170        # Restore the stack to how it was before the #if
3171        self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if)
3172      else:
3173        # TODO(unknown): unexpected #else, issue warning?
3174        pass
3175    elif re.match(r'^\s*#\s*endif\b', line):
3176      # End of #if or #else blocks.
3177      if self.pp_stack:
3178        # If we saw an #else, we will need to restore the nesting
3179        # stack to its former state before the #else, otherwise we
3180        # will just continue from where we left off.
3181        if self.pp_stack[-1].seen_else:
3182          # Here we can just use a shallow copy since we are the last
3183          # reference to it.
3184          self.stack = self.pp_stack[-1].stack_before_else
3185        # Drop the corresponding #if
3186        self.pp_stack.pop()
3187      else:
3188        # TODO(unknown): unexpected #endif, issue warning?
3189        pass
3190
3191  # TODO(unknown): Update() is too long, but we will refactor later.
3192  def Update(self, filename, clean_lines, linenum, error):
3193    """Update nesting state with current line.
3194
3195    Args:
3196      filename: The name of the current file.
3197      clean_lines: A CleansedLines instance containing the file.
3198      linenum: The number of the line to check.
3199      error: The function to call with any errors found.
3200    """
3201    line = clean_lines.elided[linenum]
3202
3203    # Remember top of the previous nesting stack.
3204    #
3205    # The stack is always pushed/popped and not modified in place, so
3206    # we can just do a shallow copy instead of copy.deepcopy.  Using
3207    # deepcopy would slow down cpplint by ~28%.
3208    if self.stack:
3209      self.previous_stack_top = self.stack[-1]
3210    else:
3211      self.previous_stack_top = None
3212
3213    # Update pp_stack
3214    self.UpdatePreprocessor(line)
3215
3216    # Count parentheses.  This is to avoid adding struct arguments to
3217    # the nesting stack.
3218    if self.stack:
3219      inner_block = self.stack[-1]
3220      depth_change = line.count('(') - line.count(')')
3221      inner_block.open_parentheses += depth_change
3222
3223      # Also check if we are starting or ending an inline assembly block.
3224      if inner_block.inline_asm in (_NO_ASM, _END_ASM):
3225        if (depth_change != 0 and
3226            inner_block.open_parentheses == 1 and
3227            _MATCH_ASM.match(line)):
3228          # Enter assembly block
3229          inner_block.inline_asm = _INSIDE_ASM
3230        else:
3231          # Not entering assembly block.  If previous line was _END_ASM,
3232          # we will now shift to _NO_ASM state.
3233          inner_block.inline_asm = _NO_ASM
3234      elif (inner_block.inline_asm == _INSIDE_ASM and
3235            inner_block.open_parentheses == 0):
3236        # Exit assembly block
3237        inner_block.inline_asm = _END_ASM
3238
3239    # Consume namespace declaration at the beginning of the line.  Do
3240    # this in a loop so that we catch same line declarations like this:
3241    #   namespace proto2 { namespace bridge { class MessageSet; } }
3242    while True:
3243      # Match start of namespace.  The "\b\s*" below catches namespace
3244      # declarations even if it weren't followed by a whitespace, this
3245      # is so that we don't confuse our namespace checker.  The
3246      # missing spaces will be flagged by CheckSpacing.
3247      namespace_decl_match = re.match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line)
3248      if not namespace_decl_match:
3249        break
3250
3251      new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum)
3252      self.stack.append(new_namespace)
3253
3254      line = namespace_decl_match.group(2)
3255      if line.find('{') != -1:
3256        new_namespace.seen_open_brace = True
3257        line = line[line.find('{') + 1:]
3258
3259    # Look for a class declaration in whatever is left of the line
3260    # after parsing namespaces.  The regexp accounts for decorated classes
3261    # such as in:
3262    #   class LOCKABLE API Object {
3263    #   };
3264    class_decl_match = re.match(
3265        r'^(\s*(?:template\s*<[\w\s<>,:=]*>\s*)?'
3266        r'(class|struct)\s+(?:[a-zA-Z0-9_]+\s+)*(\w+(?:::\w+)*))'
3267        r'(.*)$', line)
3268    if (class_decl_match and
3269        (not self.stack or self.stack[-1].open_parentheses == 0)):
3270      # We do not want to accept classes that are actually template arguments:
3271      #   template <class Ignore1,
3272      #             class Ignore2 = Default<Args>,
3273      #             template <Args> class Ignore3>
3274      #   void Function() {};
3275      #
3276      # To avoid template argument cases, we scan forward and look for
3277      # an unmatched '>'.  If we see one, assume we are inside a
3278      # template argument list.
3279      end_declaration = len(class_decl_match.group(1))
3280      if not self.InTemplateArgumentList(clean_lines, linenum, end_declaration):
3281        self.stack.append(_ClassInfo(
3282            class_decl_match.group(3), class_decl_match.group(2),
3283            clean_lines, linenum))
3284        line = class_decl_match.group(4)
3285
3286    # If we have not yet seen the opening brace for the innermost block,
3287    # run checks here.
3288    if not self.SeenOpenBrace():
3289      self.stack[-1].CheckBegin(filename, clean_lines, linenum, error)
3290
3291    # Update access control if we are inside a class/struct
3292    if self.stack and isinstance(self.stack[-1], _ClassInfo):
3293      classinfo = self.stack[-1]
3294      access_match = re.match(
3295          r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?'
3296          r':(?:[^:]|$)',
3297          line)
3298      if access_match:
3299        classinfo.access = access_match.group(2)
3300
3301        # Check that access keywords are indented +1 space.  Skip this
3302        # check if the keywords are not preceded by whitespaces.
3303        indent = access_match.group(1)
3304        if (len(indent) != classinfo.class_indent + 1 and
3305            re.match(r'^\s*$', indent)):
3306          if classinfo.is_struct:
3307            parent = 'struct ' + classinfo.name
3308          else:
3309            parent = 'class ' + classinfo.name
3310          slots = ''
3311          if access_match.group(3):
3312            slots = access_match.group(3)
3313          error(filename, linenum, 'whitespace/indent', 3,
3314                f'{access_match.group(2)}{slots}:'
3315                f' should be indented +1 space inside {parent}')
3316
3317    # Consume braces or semicolons from what's left of the line
3318    while True:
3319      # Match first brace, semicolon, or closed parenthesis.
3320      matched = re.match(r'^[^{;)}]*([{;)}])(.*)$', line)
3321      if not matched:
3322        break
3323
3324      token = matched.group(1)
3325      if token == '{':
3326        # If namespace or class hasn't seen a opening brace yet, mark
3327        # namespace/class head as complete.  Push a new block onto the
3328        # stack otherwise.
3329        if not self.SeenOpenBrace():
3330          self.stack[-1].seen_open_brace = True
3331        elif re.match(r'^extern\s*"[^"]*"\s*\{', line):
3332          self.stack.append(_ExternCInfo(linenum))
3333        else:
3334          self.stack.append(_BlockInfo(linenum, True))
3335          if _MATCH_ASM.match(line):
3336            self.stack[-1].inline_asm = _BLOCK_ASM
3337
3338      elif token == ';' or token == ')':
3339        # If we haven't seen an opening brace yet, but we already saw
3340        # a semicolon, this is probably a forward declaration.  Pop
3341        # the stack for these.
3342        #
3343        # Similarly, if we haven't seen an opening brace yet, but we
3344        # already saw a closing parenthesis, then these are probably
3345        # function arguments with extra "class" or "struct" keywords.
3346        # Also pop these stack for these.
3347        if not self.SeenOpenBrace():
3348          self.stack.pop()
3349      else:  # token == '}'
3350        # Perform end of block checks and pop the stack.
3351        if self.stack:
3352          self.stack[-1].CheckEnd(filename, clean_lines, linenum, error)
3353          self.stack.pop()
3354      line = matched.group(2)
3355
3356  def InnermostClass(self):
3357    """Get class info on the top of the stack.
3358
3359    Returns:
3360      A _ClassInfo object if we are inside a class, or None otherwise.
3361    """
3362    for i in range(len(self.stack), 0, -1):
3363      classinfo = self.stack[i - 1]
3364      if isinstance(classinfo, _ClassInfo):
3365        return classinfo
3366    return None
3367
3368def CheckForNonStandardConstructs(filename, clean_lines, linenum,
3369                                  nesting_state, error):
3370  r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
3371
3372  Complain about several constructs which gcc-2 accepts, but which are
3373  not standard C++.  Warning about these in lint is one way to ease the
3374  transition to new compilers.
3375  - put storage class first (e.g. "static const" instead of "const static").
3376  - "%lld" instead of %qd" in printf-type functions.
3377  - "%1$d" is non-standard in printf-type functions.
3378  - "\%" is an undefined character escape sequence.
3379  - text after #endif is not allowed.
3380  - invalid inner-style forward declaration.
3381  - >? and <? operators, and their >?= and <?= cousins.
3382
3383  Additionally, check for constructor/destructor style violations and reference
3384  members, as it is very convenient to do so while checking for
3385  gcc-2 compliance.
3386
3387  Args:
3388    filename: The name of the current file.
3389    clean_lines: A CleansedLines instance containing the file.
3390    linenum: The number of the line to check.
3391    nesting_state: A NestingState instance which maintains information about
3392                   the current stack of nested blocks being parsed.
3393    error: A callable to which errors are reported, which takes 4 arguments:
3394           filename, line number, error level, and message
3395  """
3396
3397  # Remove comments from the line, but leave in strings for now.
3398  line = clean_lines.lines[linenum]
3399
3400  if re.search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
3401    error(filename, linenum, 'runtime/printf_format', 3,
3402          '%q in format strings is deprecated.  Use %ll instead.')
3403
3404  if re.search(r'printf\s*\(.*".*%\d+\$', line):
3405    error(filename, linenum, 'runtime/printf_format', 2,
3406          '%N$ formats are unconventional.  Try rewriting to avoid them.')
3407
3408  # Remove escaped backslashes before looking for undefined escapes.
3409  line = line.replace('\\\\', '')
3410
3411  if re.search(r'("|\').*\\(%|\[|\(|{)', line):
3412    error(filename, linenum, 'build/printf_format', 3,
3413          '%, [, (, and { are undefined character escapes.  Unescape them.')
3414
3415  # For the rest, work with both comments and strings removed.
3416  line = clean_lines.elided[linenum]
3417
3418  if re.search(r'\b(const|volatile|void|char|short|int|long'
3419            r'|float|double|signed|unsigned'
3420            r'|schar|u?int8_t|u?int16_t|u?int32_t|u?int64_t)'
3421            r'\s+(register|static|extern|typedef)\b',
3422            line):
3423    error(filename, linenum, 'build/storage_class', 5,
3424          'Storage-class specifier (static, extern, typedef, etc) should be '
3425          'at the beginning of the declaration.')
3426
3427  if re.match(r'\s*#\s*endif\s*[^/\s]+', line):
3428    error(filename, linenum, 'build/endif_comment', 5,
3429          'Uncommented text after #endif is non-standard.  Use a comment.')
3430
3431  if re.match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
3432    error(filename, linenum, 'build/forward_decl', 5,
3433          'Inner-style forward declarations are invalid.  Remove this line.')
3434
3435  if re.search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
3436            line):
3437    error(filename, linenum, 'build/deprecated', 3,
3438          '>? and <? (max and min) operators are non-standard and deprecated.')
3439
3440  if re.search(r'^\s*const\s*string\s*&\s*\w+\s*;', line):
3441    # TODO(unknown): Could it be expanded safely to arbitrary references,
3442    # without triggering too many false positives? The first
3443    # attempt triggered 5 warnings for mostly benign code in the regtest, hence
3444    # the restriction.
3445    # Here's the original regexp, for the reference:
3446    # type_name = r'\w+((\s*::\s*\w+)|(\s*<\s*\w+?\s*>))?'
3447    # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;'
3448    error(filename, linenum, 'runtime/member_string_references', 2,
3449          'const string& members are dangerous. It is much better to use '
3450          'alternatives, such as pointers or simple constants.')
3451
3452  # Everything else in this function operates on class declarations.
3453  # Return early if the top of the nesting stack is not a class, or if
3454  # the class head is not completed yet.
3455  classinfo = nesting_state.InnermostClass()
3456  if not classinfo or not classinfo.seen_open_brace:
3457    return
3458
3459  # The class may have been declared with namespace or classname qualifiers.
3460  # The constructor and destructor will not have those qualifiers.
3461  base_classname = classinfo.name.split('::')[-1]
3462
3463  # Look for single-argument constructors that aren't marked explicit.
3464  # Technically a valid construct, but against style.
3465  explicit_constructor_match = re.match(
3466      r'\s+(?:(?:inline|constexpr)\s+)*(explicit\s+)?'
3467      rf'(?:(?:inline|constexpr)\s+)*{re.escape(base_classname)}\s*'
3468      r'\(((?:[^()]|\([^()]*\))*)\)', line)
3469
3470  if explicit_constructor_match:
3471    is_marked_explicit = explicit_constructor_match.group(1)
3472
3473    if not explicit_constructor_match.group(2):
3474      constructor_args = []
3475    else:
3476      constructor_args = explicit_constructor_match.group(2).split(',')
3477
3478    # collapse arguments so that commas in template parameter lists and function
3479    # argument parameter lists don't split arguments in two
3480    i = 0
3481    while i < len(constructor_args):
3482      constructor_arg = constructor_args[i]
3483      while (constructor_arg.count('<') > constructor_arg.count('>') or
3484             constructor_arg.count('(') > constructor_arg.count(')')):
3485        constructor_arg += ',' + constructor_args[i + 1]
3486        del constructor_args[i + 1]
3487      constructor_args[i] = constructor_arg
3488      i += 1
3489
3490    variadic_args = [arg for arg in constructor_args if '&&...' in arg]
3491    defaulted_args = [arg for arg in constructor_args if '=' in arg]
3492    noarg_constructor = (not constructor_args or  # empty arg list
3493                         # 'void' arg specifier
3494                         (len(constructor_args) == 1 and
3495                          constructor_args[0].strip() == 'void'))
3496    onearg_constructor = ((len(constructor_args) == 1 and  # exactly one arg
3497                           not noarg_constructor) or
3498                          # all but at most one arg defaulted
3499                          (len(constructor_args) >= 1 and
3500                           not noarg_constructor and
3501                           len(defaulted_args) >= len(constructor_args) - 1) or
3502                          # variadic arguments with zero or one argument
3503                          (len(constructor_args) <= 2 and
3504                           len(variadic_args) >= 1))
3505    initializer_list_constructor = bool(
3506        onearg_constructor and
3507        re.search(r'\bstd\s*::\s*initializer_list\b', constructor_args[0]))
3508    copy_constructor = bool(
3509        onearg_constructor and
3510        re.match(r'((const\s+(volatile\s+)?)?|(volatile\s+(const\s+)?))?'
3511                rf'{re.escape(base_classname)}(\s*<[^>]*>)?(\s+const)?\s*(?:<\w+>\s*)?&',
3512                constructor_args[0].strip())
3513    )
3514
3515    if (not is_marked_explicit and
3516        onearg_constructor and
3517        not initializer_list_constructor and
3518        not copy_constructor):
3519      if defaulted_args or variadic_args:
3520        error(filename, linenum, 'runtime/explicit', 4,
3521              'Constructors callable with one argument '
3522              'should be marked explicit.')
3523      else:
3524        error(filename, linenum, 'runtime/explicit', 4,
3525              'Single-parameter constructors should be marked explicit.')
3526
3527
3528def CheckSpacingForFunctionCall(filename, clean_lines, linenum, error):
3529  """Checks for the correctness of various spacing around function calls.
3530
3531  Args:
3532    filename: The name of the current file.
3533    clean_lines: A CleansedLines instance containing the file.
3534    linenum: The number of the line to check.
3535    error: The function to call with any errors found.
3536  """
3537  line = clean_lines.elided[linenum]
3538
3539  # Since function calls often occur inside if/for/while/switch
3540  # expressions - which have their own, more liberal conventions - we
3541  # first see if we should be looking inside such an expression for a
3542  # function call, to which we can apply more strict standards.
3543  fncall = line    # if there's no control flow construct, look at whole line
3544  for pattern in (r'\bif\s*\((.*)\)\s*{',
3545                  r'\bfor\s*\((.*)\)\s*{',
3546                  r'\bwhile\s*\((.*)\)\s*[{;]',
3547                  r'\bswitch\s*\((.*)\)\s*{'):
3548    match = re.search(pattern, line)
3549    if match:
3550      fncall = match.group(1)    # look inside the parens for function calls
3551      break
3552
3553  # Except in if/for/while/switch, there should never be space
3554  # immediately inside parens (eg "f( 3, 4 )").  We make an exception
3555  # for nested parens ( (a+b) + c ).  Likewise, there should never be
3556  # a space before a ( when it's a function argument.  I assume it's a
3557  # function argument when the char before the whitespace is legal in
3558  # a function name (alnum + _) and we're not starting a macro. Also ignore
3559  # pointers and references to arrays and functions coz they're too tricky:
3560  # we use a very simple way to recognize these:
3561  # " (something)(maybe-something)" or
3562  # " (something)(maybe-something," or
3563  # " (something)[something]"
3564  # Note that we assume the contents of [] to be short enough that
3565  # they'll never need to wrap.
3566  if (  # Ignore control structures.
3567      not re.search(r'\b(if|elif|for|while|switch|return|new|delete|catch|sizeof)\b',
3568                 fncall) and
3569      # Ignore pointers/references to functions.
3570      not re.search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
3571      # Ignore pointers/references to arrays.
3572      not re.search(r' \([^)]+\)\[[^\]]+\]', fncall)):
3573    if re.search(r'\w\s*\(\s(?!\s*\\$)', fncall):      # a ( used for a fn call
3574      error(filename, linenum, 'whitespace/parens', 4,
3575            'Extra space after ( in function call')
3576    elif re.search(r'\(\s+(?!(\s*\\)|\()', fncall):
3577      error(filename, linenum, 'whitespace/parens', 2,
3578            'Extra space after (')
3579    if (re.search(r'\w\s+\(', fncall) and
3580        not re.search(r'_{0,2}asm_{0,2}\s+_{0,2}volatile_{0,2}\s+\(', fncall) and
3581        not re.search(r'#\s*define|typedef|using\s+\w+\s*=', fncall) and
3582        not re.search(r'\w\s+\((\w+::)*\*\w+\)\(', fncall) and
3583        not re.search(r'\bcase\s+\(', fncall)):
3584      # TODO(unknown): Space after an operator function seem to be a common
3585      # error, silence those for now by restricting them to highest verbosity.
3586      if re.search(r'\boperator_*\b', line):
3587        error(filename, linenum, 'whitespace/parens', 0,
3588              'Extra space before ( in function call')
3589      else:
3590        error(filename, linenum, 'whitespace/parens', 4,
3591              'Extra space before ( in function call')
3592    # If the ) is followed only by a newline or a { + newline, assume it's
3593    # part of a control statement (if/while/etc), and don't complain
3594    if re.search(r'[^)]\s+\)\s*[^{\s]', fncall):
3595      # If the closing parenthesis is preceded by only whitespaces,
3596      # try to give a more descriptive error message.
3597      if re.search(r'^\s+\)', fncall):
3598        error(filename, linenum, 'whitespace/parens', 2,
3599              'Closing ) should be moved to the previous line')
3600      else:
3601        error(filename, linenum, 'whitespace/parens', 2,
3602              'Extra space before )')
3603
3604
3605def IsBlankLine(line):
3606  """Returns true if the given line is blank.
3607
3608  We consider a line to be blank if the line is empty or consists of
3609  only white spaces.
3610
3611  Args:
3612    line: A line of a string.
3613
3614  Returns:
3615    True, if the given line is blank.
3616  """
3617  return not line or line.isspace()
3618
3619
3620def CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line,
3621                                 error):
3622  is_namespace_indent_item = (
3623      len(nesting_state.stack) >= 1 and
3624      (isinstance(nesting_state.stack[-1], _NamespaceInfo) or
3625      (isinstance(nesting_state.previous_stack_top, _NamespaceInfo)))
3626      )
3627
3628  if ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item,
3629                                     clean_lines.elided, line):
3630    CheckItemIndentationInNamespace(filename, clean_lines.elided,
3631                                    line, error)
3632
3633
3634def CheckForFunctionLengths(filename, clean_lines, linenum,
3635                            function_state, error):
3636  """Reports for long function bodies.
3637
3638  For an overview why this is done, see:
3639  https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
3640
3641  Uses a simplistic algorithm assuming other style guidelines
3642  (especially spacing) are followed.
3643  Only checks unindented functions, so class members are unchecked.
3644  Trivial bodies are unchecked, so constructors with huge initializer lists
3645  may be missed.
3646  Blank/comment lines are not counted so as to avoid encouraging the removal
3647  of vertical space and comments just to get through a lint check.
3648  NOLINT *on the last line of a function* disables this check.
3649
3650  Args:
3651    filename: The name of the current file.
3652    clean_lines: A CleansedLines instance containing the file.
3653    linenum: The number of the line to check.
3654    function_state: Current function name and lines in body so far.
3655    error: The function to call with any errors found.
3656  """
3657  lines = clean_lines.lines
3658  line = lines[linenum]
3659  joined_line = ''
3660
3661  starting_func = False
3662  regexp = r'(\w(\w|::|\*|\&|\s)*)\('  # decls * & space::name( ...
3663  match_result = re.match(regexp, line)
3664  if match_result:
3665    # If the name is all caps and underscores, figure it's a macro and
3666    # ignore it, unless it's TEST or TEST_F.
3667    function_name = match_result.group(1).split()[-1]
3668    if function_name == 'TEST' or function_name == 'TEST_F' or (
3669        not re.match(r'[A-Z_]+$', function_name)):
3670      starting_func = True
3671
3672  if starting_func:
3673    body_found = False
3674    for start_linenum in range(linenum, clean_lines.NumLines()):
3675      start_line = lines[start_linenum]
3676      joined_line += ' ' + start_line.lstrip()
3677      if re.search(r'(;|})', start_line):  # Declarations and trivial functions
3678        body_found = True
3679        break                              # ... ignore
3680      if re.search(r'{', start_line):
3681        body_found = True
3682        function = re.search(r'((\w|:)*)\(', line).group(1)
3683        if re.match(r'TEST', function):    # Handle TEST... macros
3684          parameter_regexp = re.search(r'(\(.*\))', joined_line)
3685          if parameter_regexp:             # Ignore bad syntax
3686            function += parameter_regexp.group(1)
3687        else:
3688          function += '()'
3689        function_state.Begin(function)
3690        break
3691    if not body_found:
3692      # No body for the function (or evidence of a non-function) was found.
3693      error(filename, linenum, 'readability/fn_size', 5,
3694            'Lint failed to find start of function body.')
3695  elif re.match(r'^\}\s*$', line):  # function end
3696    function_state.Check(error, filename, linenum)
3697    function_state.End()
3698  elif not re.match(r'^\s*$', line):
3699    function_state.Count()  # Count non-blank/non-comment lines.
3700
3701
3702_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
3703
3704
3705def CheckComment(line, filename, linenum, next_line_start, error):
3706  """Checks for common mistakes in comments.
3707
3708  Args:
3709    line: The line in question.
3710    filename: The name of the current file.
3711    linenum: The number of the line to check.
3712    next_line_start: The first non-whitespace column of the next line.
3713    error: The function to call with any errors found.
3714  """
3715  commentpos = line.find('//')
3716  if commentpos != -1:
3717    # Check if the // may be in quotes.  If so, ignore it
3718    if re.sub(r'\\.', '', line[0:commentpos]).count('"') % 2 == 0:
3719      # Allow one space for new scopes, two spaces otherwise:
3720      if (not (re.match(r'^.*{ *//', line) and next_line_start == commentpos) and
3721          ((commentpos >= 1 and
3722            line[commentpos-1] not in string.whitespace) or
3723           (commentpos >= 2 and
3724            line[commentpos-2] not in string.whitespace))):
3725        error(filename, linenum, 'whitespace/comments', 2,
3726              'At least two spaces is best between code and comments')
3727
3728      # Checks for common mistakes in TODO comments.
3729      comment = line[commentpos:]
3730      match = _RE_PATTERN_TODO.match(comment)
3731      if match:
3732        # One whitespace is correct; zero whitespace is handled elsewhere.
3733        leading_whitespace = match.group(1)
3734        if len(leading_whitespace) > 1:
3735          error(filename, linenum, 'whitespace/todo', 2,
3736                'Too many spaces before TODO')
3737
3738        username = match.group(2)
3739        if not username:
3740          error(filename, linenum, 'readability/todo', 2,
3741                'Missing username in TODO; it should look like '
3742                '"// TODO(my_username): Stuff."')
3743
3744        middle_whitespace = match.group(3)
3745        # Comparisons made explicit for correctness
3746        #  -- pylint: disable=g-explicit-bool-comparison
3747        if middle_whitespace != ' ' and middle_whitespace != '':
3748          error(filename, linenum, 'whitespace/todo', 2,
3749                'TODO(my_username) should be followed by a space')
3750
3751      # If the comment contains an alphanumeric character, there
3752      # should be a space somewhere between it and the // unless
3753      # it's a /// or //! Doxygen comment.
3754      if (re.match(r'//[^ ]*\w', comment) and
3755          not re.match(r'(///|//\!)(\s+|$)', comment)):
3756        error(filename, linenum, 'whitespace/comments', 4,
3757              'Should have a space between // and comment')
3758
3759
3760def CheckSpacing(filename, clean_lines, linenum, nesting_state, error):
3761  """Checks for the correctness of various spacing issues in the code.
3762
3763  Things we check for: spaces around operators, spaces after
3764  if/for/while/switch, no spaces around parens in function calls, two
3765  spaces between code and comment, don't start a block with a blank
3766  line, don't end a function with a blank line, don't add a blank line
3767  after public/protected/private, don't have too many blank lines in a row.
3768
3769  Args:
3770    filename: The name of the current file.
3771    clean_lines: A CleansedLines instance containing the file.
3772    linenum: The number of the line to check.
3773    nesting_state: A NestingState instance which maintains information about
3774                   the current stack of nested blocks being parsed.
3775    error: The function to call with any errors found.
3776  """
3777
3778  # Don't use "elided" lines here, otherwise we can't check commented lines.
3779  # Don't want to use "raw" either, because we don't want to check inside C++11
3780  # raw strings,
3781  raw = clean_lines.lines_without_raw_strings
3782  line = raw[linenum]
3783
3784  # Before nixing comments, check if the line is blank for no good
3785  # reason.  This includes the first line after a block is opened, and
3786  # blank lines at the end of a function (ie, right before a line like '}'
3787  #
3788  # Skip all the blank line checks if we are immediately inside a
3789  # namespace body.  In other words, don't issue blank line warnings
3790  # for this block:
3791  #   namespace {
3792  #
3793  #   }
3794  #
3795  # A warning about missing end of namespace comments will be issued instead.
3796  #
3797  # Also skip blank line checks for 'extern "C"' blocks, which are formatted
3798  # like namespaces.
3799  if (IsBlankLine(line) and
3800      not nesting_state.InNamespaceBody() and
3801      not nesting_state.InExternC()):
3802    elided = clean_lines.elided
3803    prev_line = elided[linenum - 1]
3804    prevbrace = prev_line.rfind('{')
3805    # TODO(unknown): Don't complain if line before blank line, and line after,
3806    #                both start with alnums and are indented the same amount.
3807    #                This ignores whitespace at the start of a namespace block
3808    #                because those are not usually indented.
3809    if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1:
3810      # OK, we have a blank line at the start of a code block.  Before we
3811      # complain, we check if it is an exception to the rule: The previous
3812      # non-empty line has the parameters of a function header that are indented
3813      # 4 spaces (because they did not fit in a 80 column line when placed on
3814      # the same line as the function name).  We also check for the case where
3815      # the previous line is indented 6 spaces, which may happen when the
3816      # initializers of a constructor do not fit into a 80 column line.
3817      exception = False
3818      if re.match(r' {6}\w', prev_line):  # Initializer list?
3819        # We are looking for the opening column of initializer list, which
3820        # should be indented 4 spaces to cause 6 space indentation afterwards.
3821        search_position = linenum-2
3822        while (search_position >= 0
3823               and re.match(r' {6}\w', elided[search_position])):
3824          search_position -= 1
3825        exception = (search_position >= 0
3826                     and elided[search_position][:5] == '    :')
3827      else:
3828        # Search for the function arguments or an initializer list.  We use a
3829        # simple heuristic here: If the line is indented 4 spaces; and we have a
3830        # closing paren, without the opening paren, followed by an opening brace
3831        # or colon (for initializer lists) we assume that it is the last line of
3832        # a function header.  If we have a colon indented 4 spaces, it is an
3833        # initializer list.
3834        exception = (re.match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
3835                           prev_line)
3836                     or re.match(r' {4}:', prev_line))
3837
3838      if not exception:
3839        error(filename, linenum, 'whitespace/blank_line', 2,
3840              'Redundant blank line at the start of a code block '
3841              'should be deleted.')
3842    # Ignore blank lines at the end of a block in a long if-else
3843    # chain, like this:
3844    #   if (condition1) {
3845    #     // Something followed by a blank line
3846    #
3847    #   } else if (condition2) {
3848    #     // Something else
3849    #   }
3850    if linenum + 1 < clean_lines.NumLines():
3851      next_line = raw[linenum + 1]
3852      if (next_line
3853          and re.match(r'\s*}', next_line)
3854          and next_line.find('} else ') == -1):
3855        error(filename, linenum, 'whitespace/blank_line', 3,
3856              'Redundant blank line at the end of a code block '
3857              'should be deleted.')
3858
3859    matched = re.match(r'\s*(public|protected|private):', prev_line)
3860    if matched:
3861      error(filename, linenum, 'whitespace/blank_line', 3,
3862            f'Do not leave a blank line after "{matched.group(1)}:"')
3863
3864  # Next, check comments
3865  next_line_start = 0
3866  if linenum + 1 < clean_lines.NumLines():
3867    next_line = raw[linenum + 1]
3868    next_line_start = len(next_line) - len(next_line.lstrip())
3869  CheckComment(line, filename, linenum, next_line_start, error)
3870
3871  # get rid of comments and strings
3872  line = clean_lines.elided[linenum]
3873
3874  # You shouldn't have spaces before your brackets, except for C++11 attributes
3875  # or maybe after 'delete []', 'return []() {};', or 'auto [abc, ...] = ...;'.
3876  if (re.search(r'\w\s+\[(?!\[)', line) and
3877      not re.search(r'(?:auto&?|delete|return)\s+\[', line)):
3878    error(filename, linenum, 'whitespace/braces', 5,
3879          'Extra space before [')
3880
3881  # In range-based for, we wanted spaces before and after the colon, but
3882  # not around "::" tokens that might appear.
3883  if (re.search(r'for *\(.*[^:]:[^: ]', line) or
3884      re.search(r'for *\(.*[^: ]:[^:]', line)):
3885    error(filename, linenum, 'whitespace/forcolon', 2,
3886          'Missing space around colon in range-based for loop')
3887
3888
3889def CheckOperatorSpacing(filename, clean_lines, linenum, error):
3890  """Checks for horizontal spacing around operators.
3891
3892  Args:
3893    filename: The name of the current file.
3894    clean_lines: A CleansedLines instance containing the file.
3895    linenum: The number of the line to check.
3896    error: The function to call with any errors found.
3897  """
3898  line = clean_lines.elided[linenum]
3899
3900  # Don't try to do spacing checks for operator methods.  Do this by
3901  # replacing the troublesome characters with something else,
3902  # preserving column position for all other characters.
3903  #
3904  # The replacement is done repeatedly to avoid false positives from
3905  # operators that call operators.
3906  while True:
3907    match = re.match(r'^(.*\boperator\b)(\S+)(\s*\(.*)$', line)
3908    if match:
3909      line = match.group(1) + ('_' * len(match.group(2))) + match.group(3)
3910    else:
3911      break
3912
3913  # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
3914  # Otherwise not.  Note we only check for non-spaces on *both* sides;
3915  # sometimes people put non-spaces on one side when aligning ='s among
3916  # many lines (not that this is behavior that I approve of...)
3917  if ((re.search(r'[\w.]=', line) or
3918       re.search(r'=[\w.]', line))
3919      and not re.search(r'\b(if|while|for) ', line)
3920      # Operators taken from [lex.operators] in C++11 standard.
3921      and not re.search(r'(>=|<=|==|!=|&=|\^=|\|=|\+=|\*=|\/=|\%=)', line)
3922      and not re.search(r'operator=', line)):
3923    error(filename, linenum, 'whitespace/operators', 4,
3924          'Missing spaces around =')
3925
3926  # It's ok not to have spaces around binary operators like + - * /, but if
3927  # there's too little whitespace, we get concerned.  It's hard to tell,
3928  # though, so we punt on this one for now.  TODO.
3929
3930  # You should always have whitespace around binary operators.
3931  #
3932  # Check <= and >= first to avoid false positives with < and >, then
3933  # check non-include lines for spacing around < and >.
3934  #
3935  # If the operator is followed by a comma, assume it's be used in a
3936  # macro context and don't do any checks.  This avoids false
3937  # positives.
3938  #
3939  # Note that && is not included here.  This is because there are too
3940  # many false positives due to RValue references.
3941  match = re.search(r'[^<>=!\s](==|!=|<=|>=|\|\|)[^<>=!\s,;\)]', line)
3942  if match:
3943    # TODO: support alternate operators
3944    error(filename, linenum, 'whitespace/operators', 3,
3945          f'Missing spaces around {match.group(1)}')
3946  elif not re.match(r'#.*include', line):
3947    # Look for < that is not surrounded by spaces.  This is only
3948    # triggered if both sides are missing spaces, even though
3949    # technically should should flag if at least one side is missing a
3950    # space.  This is done to avoid some false positives with shifts.
3951    match = re.match(r'^(.*[^\s<])<[^\s=<,]', line)
3952    if match:
3953      (_, _, end_pos) = CloseExpression(
3954          clean_lines, linenum, len(match.group(1)))
3955      if end_pos <= -1:
3956        error(filename, linenum, 'whitespace/operators', 3,
3957              'Missing spaces around <')
3958
3959    # Look for > that is not surrounded by spaces.  Similar to the
3960    # above, we only trigger if both sides are missing spaces to avoid
3961    # false positives with shifts.
3962    match = re.match(r'^(.*[^-\s>])>[^\s=>,]', line)
3963    if match:
3964      (_, _, start_pos) = ReverseCloseExpression(
3965          clean_lines, linenum, len(match.group(1)))
3966      if start_pos <= -1:
3967        error(filename, linenum, 'whitespace/operators', 3,
3968              'Missing spaces around >')
3969
3970  # We allow no-spaces around << when used like this: 10<<20, but
3971  # not otherwise (particularly, not when used as streams)
3972  #
3973  # We also allow operators following an opening parenthesis, since
3974  # those tend to be macros that deal with operators.
3975  match = re.search(r'(operator|[^\s(<])(?:L|UL|LL|ULL|l|ul|ll|ull)?<<([^\s,=<])', line)
3976  if (match and not (match.group(1).isdigit() and match.group(2).isdigit()) and
3977      not (match.group(1) == 'operator' and match.group(2) == ';')):
3978    error(filename, linenum, 'whitespace/operators', 3,
3979          'Missing spaces around <<')
3980
3981  # We allow no-spaces around >> for almost anything.  This is because
3982  # C++11 allows ">>" to close nested templates, which accounts for
3983  # most cases when ">>" is not followed by a space.
3984  #
3985  # We still warn on ">>" followed by alpha character, because that is
3986  # likely due to ">>" being used for right shifts, e.g.:
3987  #   value >> alpha
3988  #
3989  # When ">>" is used to close templates, the alphanumeric letter that
3990  # follows would be part of an identifier, and there should still be
3991  # a space separating the template type and the identifier.
3992  #   type<type<type>> alpha
3993  match = re.search(r'>>[a-zA-Z_]', line)
3994  if match:
3995    error(filename, linenum, 'whitespace/operators', 3,
3996          'Missing spaces around >>')
3997
3998  # There shouldn't be space around unary operators
3999  match = re.search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
4000  if match:
4001    error(filename, linenum, 'whitespace/operators', 4,
4002          f'Extra space for operator {match.group(1)}')
4003
4004
4005def CheckParenthesisSpacing(filename, clean_lines, linenum, error):
4006  """Checks for horizontal spacing around parentheses.
4007
4008  Args:
4009    filename: The name of the current file.
4010    clean_lines: A CleansedLines instance containing the file.
4011    linenum: The number of the line to check.
4012    error: The function to call with any errors found.
4013  """
4014  line = clean_lines.elided[linenum]
4015
4016  # No spaces after an if, while, switch, or for
4017  match = re.search(r' (if\(|for\(|while\(|switch\()', line)
4018  if match:
4019    error(filename, linenum, 'whitespace/parens', 5,
4020          f'Missing space before ( in {match.group(1)}')
4021
4022  # For if/for/while/switch, the left and right parens should be
4023  # consistent about how many spaces are inside the parens, and
4024  # there should either be zero or one spaces inside the parens.
4025  # We don't want: "if ( foo)" or "if ( foo   )".
4026  # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
4027  match = re.search(r'\b(if|for|while|switch)\s*'
4028                 r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
4029                 line)
4030  if match:
4031    if len(match.group(2)) != len(match.group(4)):
4032      if not (match.group(3) == ';' and
4033              len(match.group(2)) == 1 + len(match.group(4)) or
4034              not match.group(2) and re.search(r'\bfor\s*\(.*; \)', line)):
4035        error(filename, linenum, 'whitespace/parens', 5,
4036              f'Mismatching spaces inside () in {match.group(1)}')
4037    if len(match.group(2)) not in [0, 1]:
4038      error(filename, linenum, 'whitespace/parens', 5,
4039            f'Should have zero or one spaces inside ( and ) in {match.group(1)}')
4040
4041
4042def CheckCommaSpacing(filename, clean_lines, linenum, error):
4043  """Checks for horizontal spacing near commas and semicolons.
4044
4045  Args:
4046    filename: The name of the current file.
4047    clean_lines: A CleansedLines instance containing the file.
4048    linenum: The number of the line to check.
4049    error: The function to call with any errors found.
4050  """
4051  raw = clean_lines.lines_without_raw_strings
4052  line = clean_lines.elided[linenum]
4053
4054  # You should always have a space after a comma (either as fn arg or operator)
4055  #
4056  # This does not apply when the non-space character following the
4057  # comma is another comma, since the only time when that happens is
4058  # for empty macro arguments.
4059  #
4060  # We run this check in two passes: first pass on elided lines to
4061  # verify that lines contain missing whitespaces, second pass on raw
4062  # lines to confirm that those missing whitespaces are not due to
4063  # elided comments.
4064  match = re.search(r',[^,\s]', re.sub(r'\b__VA_OPT__\s*\(,\)', '',
4065                                       re.sub(r'\boperator\s*,\s*\(', 'F(', line)))
4066  if (match and re.search(r',[^,\s]', raw[linenum])):
4067    error(filename, linenum, 'whitespace/comma', 3,
4068          'Missing space after ,')
4069
4070  # You should always have a space after a semicolon
4071  # except for few corner cases
4072  # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
4073  # space after ;
4074  if re.search(r';[^\s};\\)/]', line):
4075    error(filename, linenum, 'whitespace/semicolon', 3,
4076          'Missing space after ;')
4077
4078
4079def _IsType(clean_lines, nesting_state, expr):
4080  """Check if expression looks like a type name, returns true if so.
4081
4082  Args:
4083    clean_lines: A CleansedLines instance containing the file.
4084    nesting_state: A NestingState instance which maintains information about
4085                   the current stack of nested blocks being parsed.
4086    expr: The expression to check.
4087  Returns:
4088    True, if token looks like a type.
4089  """
4090  # Keep only the last token in the expression
4091  last_word = re.match(r'^.*(\b\S+)$', expr)
4092  if last_word:
4093    token = last_word.group(1)
4094  else:
4095    token = expr
4096
4097  # Match native types and stdint types
4098  if _TYPES.match(token):
4099    return True
4100
4101  # Try a bit harder to match templated types.  Walk up the nesting
4102  # stack until we find something that resembles a typename
4103  # declaration for what we are looking for.
4104  typename_pattern = (r'\b(?:typename|class|struct)\s+' + re.escape(token) +
4105                      r'\b')
4106  block_index = len(nesting_state.stack) - 1
4107  while block_index >= 0:
4108    if isinstance(nesting_state.stack[block_index], _NamespaceInfo):
4109      return False
4110
4111    # Found where the opening brace is.  We want to scan from this
4112    # line up to the beginning of the function, minus a few lines.
4113    #   template <typename Type1,  // stop scanning here
4114    #             ...>
4115    #   class C
4116    #     : public ... {  // start scanning here
4117    last_line = nesting_state.stack[block_index].starting_linenum
4118
4119    next_block_start = 0
4120    if block_index > 0:
4121      next_block_start = nesting_state.stack[block_index - 1].starting_linenum
4122    first_line = last_line
4123    while first_line >= next_block_start:
4124      if clean_lines.elided[first_line].find('template') >= 0:
4125        break
4126      first_line -= 1
4127    if first_line < next_block_start:
4128      # Didn't find any "template" keyword before reaching the next block,
4129      # there are probably no template things to check for this block
4130      block_index -= 1
4131      continue
4132
4133    # Look for typename in the specified range
4134    for i in range(first_line, last_line + 1, 1):
4135      if re.search(typename_pattern, clean_lines.elided[i]):
4136        return True
4137    block_index -= 1
4138
4139  return False
4140
4141
4142def CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error):
4143  """Checks for horizontal spacing near commas.
4144
4145  Args:
4146    filename: The name of the current file.
4147    clean_lines: A CleansedLines instance containing the file.
4148    linenum: The number of the line to check.
4149    nesting_state: A NestingState instance which maintains information about
4150                   the current stack of nested blocks being parsed.
4151    error: The function to call with any errors found.
4152  """
4153  line = clean_lines.elided[linenum]
4154
4155  # Except after an opening paren, or after another opening brace (in case of
4156  # an initializer list, for instance), you should have spaces before your
4157  # braces when they are delimiting blocks, classes, namespaces etc.
4158  # And since you should never have braces at the beginning of a line,
4159  # this is an easy test.  Except that braces used for initialization don't
4160  # follow the same rule; we often don't want spaces before those.
4161  match = re.match(r'^(.*[^ ({>]){', line)
4162
4163  if match:
4164    # Try a bit harder to check for brace initialization.  This
4165    # happens in one of the following forms:
4166    #   Constructor() : initializer_list_{} { ... }
4167    #   Constructor{}.MemberFunction()
4168    #   Type variable{};
4169    #   FunctionCall(type{}, ...);
4170    #   LastArgument(..., type{});
4171    #   LOG(INFO) << type{} << " ...";
4172    #   map_of_type[{...}] = ...;
4173    #   ternary = expr ? new type{} : nullptr;
4174    #   OuterTemplate<InnerTemplateConstructor<Type>{}>
4175    #
4176    # We check for the character following the closing brace, and
4177    # silence the warning if it's one of those listed above, i.e.
4178    # "{.;,)<>]:".
4179    #
4180    # To account for nested initializer list, we allow any number of
4181    # closing braces up to "{;,)<".  We can't simply silence the
4182    # warning on first sight of closing brace, because that would
4183    # cause false negatives for things that are not initializer lists.
4184    #   Silence this:         But not this:
4185    #     Outer{                if (...) {
4186    #       Inner{...}            if (...){  // Missing space before {
4187    #     };                    }
4188    #
4189    # There is a false negative with this approach if people inserted
4190    # spurious semicolons, e.g. "if (cond){};", but we will catch the
4191    # spurious semicolon with a separate check.
4192    leading_text = match.group(1)
4193    (endline, endlinenum, endpos) = CloseExpression(
4194        clean_lines, linenum, len(match.group(1)))
4195    trailing_text = ''
4196    if endpos > -1:
4197      trailing_text = endline[endpos:]
4198    for offset in range(endlinenum + 1,
4199                         min(endlinenum + 3, clean_lines.NumLines() - 1)):
4200      trailing_text += clean_lines.elided[offset]
4201    # We also suppress warnings for `uint64_t{expression}` etc., as the style
4202    # guide recommends brace initialization for integral types to avoid
4203    # overflow/truncation.
4204    if (not re.match(r'^[\s}]*[{.;,)<>\]:]', trailing_text)
4205        and not _IsType(clean_lines, nesting_state, leading_text)):
4206      error(filename, linenum, 'whitespace/braces', 5,
4207            'Missing space before {')
4208
4209  # Make sure '} else {' has spaces.
4210  if re.search(r'}else', line):
4211    error(filename, linenum, 'whitespace/braces', 5,
4212          'Missing space before else')
4213
4214  # You shouldn't have a space before a semicolon at the end of the line.
4215  # There's a special case for "for" since the style guide allows space before
4216  # the semicolon there.
4217  if re.search(r':\s*;\s*$', line):
4218    error(filename, linenum, 'whitespace/semicolon', 5,
4219          'Semicolon defining empty statement. Use {} instead.')
4220  elif re.search(r'^\s*;\s*$', line):
4221    error(filename, linenum, 'whitespace/semicolon', 5,
4222          'Line contains only semicolon. If this should be an empty statement, '
4223          'use {} instead.')
4224  elif (re.search(r'\s+;\s*$', line) and
4225        not re.search(r'\bfor\b', line)):
4226    error(filename, linenum, 'whitespace/semicolon', 5,
4227          'Extra space before last semicolon. If this should be an empty '
4228          'statement, use {} instead.')
4229
4230
4231def IsDecltype(clean_lines, linenum, column):
4232  """Check if the token ending on (linenum, column) is decltype().
4233
4234  Args:
4235    clean_lines: A CleansedLines instance containing the file.
4236    linenum: the number of the line to check.
4237    column: end column of the token to check.
4238  Returns:
4239    True if this token is decltype() expression, False otherwise.
4240  """
4241  (text, _, start_col) = ReverseCloseExpression(clean_lines, linenum, column)
4242  if start_col < 0:
4243    return False
4244  if re.search(r'\bdecltype\s*$', text[0:start_col]):
4245    return True
4246  return False
4247
4248def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error):
4249  """Checks for additional blank line issues related to sections.
4250
4251  Currently the only thing checked here is blank line before protected/private.
4252
4253  Args:
4254    filename: The name of the current file.
4255    clean_lines: A CleansedLines instance containing the file.
4256    class_info: A _ClassInfo objects.
4257    linenum: The number of the line to check.
4258    error: The function to call with any errors found.
4259  """
4260  # Skip checks if the class is small, where small means 25 lines or less.
4261  # 25 lines seems like a good cutoff since that's the usual height of
4262  # terminals, and any class that can't fit in one screen can't really
4263  # be considered "small".
4264  #
4265  # Also skip checks if we are on the first line.  This accounts for
4266  # classes that look like
4267  #   class Foo { public: ... };
4268  #
4269  # If we didn't find the end of the class, last_line would be zero,
4270  # and the check will be skipped by the first condition.
4271  if (class_info.last_line - class_info.starting_linenum <= 24 or
4272      linenum <= class_info.starting_linenum):
4273    return
4274
4275  matched = re.match(r'\s*(public|protected|private):', clean_lines.lines[linenum])
4276  if matched:
4277    # Issue warning if the line before public/protected/private was
4278    # not a blank line, but don't do this if the previous line contains
4279    # "class" or "struct".  This can happen two ways:
4280    #  - We are at the beginning of the class.
4281    #  - We are forward-declaring an inner class that is semantically
4282    #    private, but needed to be public for implementation reasons.
4283    # Also ignores cases where the previous line ends with a backslash as can be
4284    # common when defining classes in C macros.
4285    prev_line = clean_lines.lines[linenum - 1]
4286    if (not IsBlankLine(prev_line) and
4287        not re.search(r'\b(class|struct)\b', prev_line) and
4288        not re.search(r'\\$', prev_line)):
4289      # Try a bit harder to find the beginning of the class.  This is to
4290      # account for multi-line base-specifier lists, e.g.:
4291      #   class Derived
4292      #       : public Base {
4293      end_class_head = class_info.starting_linenum
4294      for i in range(class_info.starting_linenum, linenum):
4295        if re.search(r'\{\s*$', clean_lines.lines[i]):
4296          end_class_head = i
4297          break
4298      if end_class_head < linenum - 1:
4299        error(filename, linenum, 'whitespace/blank_line', 3,
4300              f'"{matched.group(1)}:" should be preceded by a blank line')
4301
4302
4303def GetPreviousNonBlankLine(clean_lines, linenum):
4304  """Return the most recent non-blank line and its line number.
4305
4306  Args:
4307    clean_lines: A CleansedLines instance containing the file contents.
4308    linenum: The number of the line to check.
4309
4310  Returns:
4311    A tuple with two elements.  The first element is the contents of the last
4312    non-blank line before the current line, or the empty string if this is the
4313    first non-blank line.  The second is the line number of that line, or -1
4314    if this is the first non-blank line.
4315  """
4316
4317  prevlinenum = linenum - 1
4318  while prevlinenum >= 0:
4319    prevline = clean_lines.elided[prevlinenum]
4320    if not IsBlankLine(prevline):     # if not a blank line...
4321      return (prevline, prevlinenum)
4322    prevlinenum -= 1
4323  return ('', -1)
4324
4325
4326def CheckBraces(filename, clean_lines, linenum, error):
4327  """Looks for misplaced braces (e.g. at the end of line).
4328
4329  Args:
4330    filename: The name of the current file.
4331    clean_lines: A CleansedLines instance containing the file.
4332    linenum: The number of the line to check.
4333    error: The function to call with any errors found.
4334  """
4335
4336  line = clean_lines.elided[linenum]        # get rid of comments and strings
4337
4338  if re.match(r'\s*{\s*$', line):
4339    # We allow an open brace to start a line in the case where someone is using
4340    # braces in a block to explicitly create a new scope, which is commonly used
4341    # to control the lifetime of stack-allocated variables.  Braces are also
4342    # used for brace initializers inside function calls.  We don't detect this
4343    # perfectly: we just don't complain if the last non-whitespace character on
4344    # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the
4345    # previous line starts a preprocessor block. We also allow a brace on the
4346    # following line if it is part of an array initialization and would not fit
4347    # within the 80 character limit of the preceding line.
4348    prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
4349    if (not re.search(r'[,;:}{(]\s*$', prevline) and
4350        not re.match(r'\s*#', prevline) and
4351        not (GetLineWidth(prevline) > _line_length - 2 and '[]' in prevline)):
4352      error(filename, linenum, 'whitespace/braces', 4,
4353            '{ should almost always be at the end of the previous line')
4354
4355  # An else clause should be on the same line as the preceding closing brace.
4356  if last_wrong := re.match(r'\s*else\b\s*(?:if\b|\{|$)', line):
4357    prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
4358    if re.match(r'\s*}\s*$', prevline):
4359      error(filename, linenum, 'whitespace/newline', 4,
4360            'An else should appear on the same line as the preceding }')
4361    else:
4362      last_wrong = False
4363
4364  # If braces come on one side of an else, they should be on both.
4365  # However, we have to worry about "else if" that spans multiple lines!
4366  if re.search(r'else if\s*\(', line):       # could be multi-line if
4367    brace_on_left = bool(re.search(r'}\s*else if\s*\(', line))
4368    # find the ( after the if
4369    pos = line.find('else if')
4370    pos = line.find('(', pos)
4371    if pos > 0:
4372      (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
4373      brace_on_right = endline[endpos:].find('{') != -1
4374      if brace_on_left != brace_on_right:    # must be brace after if
4375        error(filename, linenum, 'readability/braces', 5,
4376              'If an else has a brace on one side, it should have it on both')
4377  # Prevent detection if statement has { and we detected an improper newline after }
4378  elif re.search(r'}\s*else[^{]*$', line) or (re.match(r'[^}]*else\s*{', line) and not last_wrong):
4379    error(filename, linenum, 'readability/braces', 5,
4380          'If an else has a brace on one side, it should have it on both')
4381
4382  # No control clauses with braces should have its contents on the same line
4383  # Exclude } which will be covered by empty-block detect
4384  # Exclude ; which may be used by while in a do-while
4385  if keyword := re.search(
4386      r'\b(else if|if|while|for|switch)'  # These have parens
4387      r'\s*\(.*\)\s*(?:\[\[(?:un)?likely\]\]\s*)?{\s*[^\s\\};]', line):
4388    error(filename, linenum, 'whitespace/newline', 5,
4389          f'Controlled statements inside brackets of {keyword.group(1)} clause'
4390          ' should be on a separate line')
4391  elif keyword := re.search(
4392      r'\b(else|do|try)'  # These don't have parens
4393      r'\s*(?:\[\[(?:un)?likely\]\]\s*)?{\s*[^\s\\}]', line):
4394    error(filename, linenum, 'whitespace/newline', 5,
4395          f'Controlled statements inside brackets of {keyword.group(1)} clause'
4396          ' should be on a separate line')
4397
4398  # TODO: Err on if...else and do...while statements without braces;
4399  # style guide has changed since the below comment was written
4400
4401  # Check single-line if/else bodies. The style guide says 'curly braces are not
4402  # required for single-line statements'. We additionally allow multi-line,
4403  # single statements, but we reject anything with more than one semicolon in
4404  # it. This means that the first semicolon after the if should be at the end of
4405  # its line, and the line after that should have an indent level equal to or
4406  # lower than the if. We also check for ambiguous if/else nesting without
4407  # braces.
4408  if_else_match = re.search(r'\b(if\s*(|constexpr)\s*\(|else\b)', line)
4409  if if_else_match and not re.match(r'\s*#', line):
4410    if_indent = GetIndentLevel(line)
4411    endline, endlinenum, endpos = line, linenum, if_else_match.end()
4412    if_match = re.search(r'\bif\s*(|constexpr)\s*\(', line)
4413    if if_match:
4414      # This could be a multiline if condition, so find the end first.
4415      pos = if_match.end() - 1
4416      (endline, endlinenum, endpos) = CloseExpression(clean_lines, linenum, pos)
4417    # Check for an opening brace, either directly after the if or on the next
4418    # line. If found, this isn't a single-statement conditional.
4419    if (not re.match(r'\s*(?:\[\[(?:un)?likely\]\]\s*)?{', endline[endpos:])
4420        and not (re.match(r'\s*$', endline[endpos:])
4421                 and endlinenum < (len(clean_lines.elided) - 1)
4422                 and re.match(r'\s*{', clean_lines.elided[endlinenum + 1]))):
4423      while (endlinenum < len(clean_lines.elided)
4424             and ';' not in clean_lines.elided[endlinenum][endpos:]):
4425        endlinenum += 1
4426        endpos = 0
4427      if endlinenum < len(clean_lines.elided):
4428        endline = clean_lines.elided[endlinenum]
4429        # We allow a mix of whitespace and closing braces (e.g. for one-liner
4430        # methods) and a single \ after the semicolon (for macros)
4431        endpos = endline.find(';')
4432        if not re.match(r';[\s}]*(\\?)$', endline[endpos:]):
4433          # Semicolon isn't the last character, there's something trailing.
4434          # Output a warning if the semicolon is not contained inside
4435          # a lambda expression.
4436          if not re.match(r'^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}]*\}\s*\)*[;,]\s*$',
4437                       endline):
4438            error(filename, linenum, 'readability/braces', 4,
4439                  'If/else bodies with multiple statements require braces')
4440        elif endlinenum < len(clean_lines.elided) - 1:
4441          # Make sure the next line is dedented
4442          next_line = clean_lines.elided[endlinenum + 1]
4443          next_indent = GetIndentLevel(next_line)
4444          # With ambiguous nested if statements, this will error out on the
4445          # if that *doesn't* match the else, regardless of whether it's the
4446          # inner one or outer one.
4447          if (if_match and re.match(r'\s*else\b', next_line)
4448              and next_indent != if_indent):
4449            error(filename, linenum, 'readability/braces', 4,
4450                  'Else clause should be indented at the same level as if. '
4451                  'Ambiguous nested if/else chains require braces.')
4452          elif next_indent > if_indent:
4453            error(filename, linenum, 'readability/braces', 4,
4454                  'If/else bodies with multiple statements require braces')
4455
4456
4457def CheckTrailingSemicolon(filename, clean_lines, linenum, error):
4458  """Looks for redundant trailing semicolon.
4459
4460  Args:
4461    filename: The name of the current file.
4462    clean_lines: A CleansedLines instance containing the file.
4463    linenum: The number of the line to check.
4464    error: The function to call with any errors found.
4465  """
4466
4467  line = clean_lines.elided[linenum]
4468
4469  # Block bodies should not be followed by a semicolon.  Due to C++11
4470  # brace initialization, there are more places where semicolons are
4471  # required than not, so we explicitly list the allowed rules rather
4472  # than listing the disallowed ones.  These are the places where "};"
4473  # should be replaced by just "}":
4474  # 1. Some flavor of block following closing parenthesis:
4475  #    for (;;) {};
4476  #    while (...) {};
4477  #    switch (...) {};
4478  #    Function(...) {};
4479  #    if (...) {};
4480  #    if (...) else if (...) {};
4481  #
4482  # 2. else block:
4483  #    if (...) else {};
4484  #
4485  # 3. const member function:
4486  #    Function(...) const {};
4487  #
4488  # 4. Block following some statement:
4489  #    x = 42;
4490  #    {};
4491  #
4492  # 5. Block at the beginning of a function:
4493  #    Function(...) {
4494  #      {};
4495  #    }
4496  #
4497  #    Note that naively checking for the preceding "{" will also match
4498  #    braces inside multi-dimensional arrays, but this is fine since
4499  #    that expression will not contain semicolons.
4500  #
4501  # 6. Block following another block:
4502  #    while (true) {}
4503  #    {};
4504  #
4505  # 7. End of namespaces:
4506  #    namespace {};
4507  #
4508  #    These semicolons seems far more common than other kinds of
4509  #    redundant semicolons, possibly due to people converting classes
4510  #    to namespaces.  For now we do not warn for this case.
4511  #
4512  # Try matching case 1 first.
4513  match = re.match(r'^(.*\)\s*)\{', line)
4514  if match:
4515    # Matched closing parenthesis (case 1).  Check the token before the
4516    # matching opening parenthesis, and don't warn if it looks like a
4517    # macro.  This avoids these false positives:
4518    #  - macro that defines a base class
4519    #  - multi-line macro that defines a base class
4520    #  - macro that defines the whole class-head
4521    #
4522    # But we still issue warnings for macros that we know are safe to
4523    # warn, specifically:
4524    #  - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P
4525    #  - TYPED_TEST
4526    #  - INTERFACE_DEF
4527    #  - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED:
4528    #
4529    # We implement a list of safe macros instead of a list of
4530    # unsafe macros, even though the latter appears less frequently in
4531    # google code and would have been easier to implement.  This is because
4532    # the downside for getting the allowed checks wrong means some extra
4533    # semicolons, while the downside for getting disallowed checks wrong
4534    # would result in compile errors.
4535    #
4536    # In addition to macros, we also don't want to warn on
4537    #  - Compound literals
4538    #  - Lambdas
4539    #  - alignas specifier with anonymous structs
4540    #  - decltype
4541    #  - concepts (requires expression)
4542    closing_brace_pos = match.group(1).rfind(')')
4543    opening_parenthesis = ReverseCloseExpression(
4544        clean_lines, linenum, closing_brace_pos)
4545    if opening_parenthesis[2] > -1:
4546      line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]]
4547      macro = re.search(r'\b([A-Z_][A-Z0-9_]*)\s*$', line_prefix)
4548      func = re.match(r'^(.*\])\s*$', line_prefix)
4549      if ((macro and
4550           macro.group(1) not in (
4551               'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST',
4552               'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED',
4553               'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or
4554          (func and not re.search(r'\boperator\s*\[\s*\]', func.group(1))) or
4555          re.search(r'\b(?:struct|union)\s+alignas\s*$', line_prefix) or
4556          re.search(r'\bdecltype$', line_prefix) or
4557          re.search(r'\brequires.*$', line_prefix) or
4558          re.search(r'\s+=\s*$', line_prefix)):
4559        match = None
4560    if (match and
4561        opening_parenthesis[1] > 1 and
4562        re.search(r'\]\s*$', clean_lines.elided[opening_parenthesis[1] - 1])):
4563      # Multi-line lambda-expression
4564      match = None
4565
4566  else:
4567    # Try matching cases 2-3.
4568    match = re.match(r'^(.*(?:else|\)\s*const)\s*)\{', line)
4569    if not match:
4570      # Try matching cases 4-6.  These are always matched on separate lines.
4571      #
4572      # Note that we can't simply concatenate the previous line to the
4573      # current line and do a single match, otherwise we may output
4574      # duplicate warnings for the blank line case:
4575      #   if (cond) {
4576      #     // blank line
4577      #   }
4578      prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
4579      if prevline and re.search(r'[;{}]\s*$', prevline):
4580        match = re.match(r'^(\s*)\{', line)
4581
4582  # Check matching closing brace
4583  if match:
4584    (endline, endlinenum, endpos) = CloseExpression(
4585        clean_lines, linenum, len(match.group(1)))
4586    if endpos > -1 and re.match(r'^\s*;', endline[endpos:]):
4587      # Current {} pair is eligible for semicolon check, and we have found
4588      # the redundant semicolon, output warning here.
4589      #
4590      # Note: because we are scanning forward for opening braces, and
4591      # outputting warnings for the matching closing brace, if there are
4592      # nested blocks with trailing semicolons, we will get the error
4593      # messages in reversed order.
4594
4595      # We need to check the line forward for NOLINT
4596      raw_lines = clean_lines.raw_lines
4597      ParseNolintSuppressions(filename, raw_lines[endlinenum-1], endlinenum-1,
4598                              error)
4599      ParseNolintSuppressions(filename, raw_lines[endlinenum], endlinenum,
4600                              error)
4601
4602      error(filename, endlinenum, 'readability/braces', 4,
4603            "You don't need a ; after a }")
4604
4605
4606def CheckEmptyBlockBody(filename, clean_lines, linenum, error):
4607  """Look for empty loop/conditional body with only a single semicolon.
4608
4609  Args:
4610    filename: The name of the current file.
4611    clean_lines: A CleansedLines instance containing the file.
4612    linenum: The number of the line to check.
4613    error: The function to call with any errors found.
4614  """
4615
4616  # Search for loop keywords at the beginning of the line.  Because only
4617  # whitespaces are allowed before the keywords, this will also ignore most
4618  # do-while-loops, since those lines should start with closing brace.
4619  #
4620  # We also check "if" blocks here, since an empty conditional block
4621  # is likely an error.
4622  line = clean_lines.elided[linenum]
4623  matched = re.match(r'\s*(for|while|if)\s*\(', line)
4624  if matched:
4625    # Find the end of the conditional expression.
4626    (end_line, end_linenum, end_pos) = CloseExpression(
4627        clean_lines, linenum, line.find('('))
4628
4629    # Output warning if what follows the condition expression is a semicolon.
4630    # No warning for all other cases, including whitespace or newline, since we
4631    # have a separate check for semicolons preceded by whitespace.
4632    if end_pos >= 0 and re.match(r';', end_line[end_pos:]):
4633      if matched.group(1) == 'if':
4634        error(filename, end_linenum, 'whitespace/empty_conditional_body', 5,
4635              'Empty conditional bodies should use {}')
4636      else:
4637        error(filename, end_linenum, 'whitespace/empty_loop_body', 5,
4638              'Empty loop bodies should use {} or continue')
4639
4640    # Check for if statements that have completely empty bodies (no comments)
4641    # and no else clauses.
4642    if end_pos >= 0 and matched.group(1) == 'if':
4643      # Find the position of the opening { for the if statement.
4644      # Return without logging an error if it has no brackets.
4645      opening_linenum = end_linenum
4646      opening_line_fragment = end_line[end_pos:]
4647      # Loop until EOF or find anything that's not whitespace or opening {.
4648      while not re.search(r'^\s*\{', opening_line_fragment):
4649        if re.search(r'^(?!\s*$)', opening_line_fragment):
4650          # Conditional has no brackets.
4651          return
4652        opening_linenum += 1
4653        if opening_linenum == len(clean_lines.elided):
4654          # Couldn't find conditional's opening { or any code before EOF.
4655          return
4656        opening_line_fragment = clean_lines.elided[opening_linenum]
4657      # Set opening_line (opening_line_fragment may not be entire opening line).
4658      opening_line = clean_lines.elided[opening_linenum]
4659
4660      # Find the position of the closing }.
4661      opening_pos = opening_line_fragment.find('{')
4662      if opening_linenum == end_linenum:
4663        # We need to make opening_pos relative to the start of the entire line.
4664        opening_pos += end_pos
4665      (closing_line, closing_linenum, closing_pos) = CloseExpression(
4666          clean_lines, opening_linenum, opening_pos)
4667      if closing_pos < 0:
4668        return
4669
4670      # Now construct the body of the conditional. This consists of the portion
4671      # of the opening line after the {, all lines until the closing line,
4672      # and the portion of the closing line before the }.
4673      if (clean_lines.raw_lines[opening_linenum] !=
4674          CleanseComments(clean_lines.raw_lines[opening_linenum])):
4675        # Opening line ends with a comment, so conditional isn't empty.
4676        return
4677      if closing_linenum > opening_linenum:
4678        # Opening line after the {. Ignore comments here since we checked above.
4679        bodylist = list(opening_line[opening_pos+1:])
4680        # All lines until closing line, excluding closing line, with comments.
4681        bodylist.extend(clean_lines.raw_lines[opening_linenum+1:closing_linenum])
4682        # Closing line before the }. Won't (and can't) have comments.
4683        bodylist.append(clean_lines.elided[closing_linenum][:closing_pos-1])
4684        body = '\n'.join(bodylist)
4685      else:
4686        # If statement has brackets and fits on a single line.
4687        body = opening_line[opening_pos+1:closing_pos-1]
4688
4689      # Check if the body is empty
4690      if not _EMPTY_CONDITIONAL_BODY_PATTERN.search(body):
4691        return
4692      # The body is empty. Now make sure there's not an else clause.
4693      current_linenum = closing_linenum
4694      current_line_fragment = closing_line[closing_pos:]
4695      # Loop until EOF or find anything that's not whitespace or else clause.
4696      while re.search(r'^\s*$|^(?=\s*else)', current_line_fragment):
4697        if re.search(r'^(?=\s*else)', current_line_fragment):
4698          # Found an else clause, so don't log an error.
4699          return
4700        current_linenum += 1
4701        if current_linenum == len(clean_lines.elided):
4702          break
4703        current_line_fragment = clean_lines.elided[current_linenum]
4704
4705      # The body is empty and there's no else clause until EOF or other code.
4706      error(filename, end_linenum, 'whitespace/empty_if_body', 4,
4707            ('If statement had no body and no else clause'))
4708
4709
4710def FindCheckMacro(line):
4711  """Find a replaceable CHECK-like macro.
4712
4713  Args:
4714    line: line to search on.
4715  Returns:
4716    (macro name, start position), or (None, -1) if no replaceable
4717    macro is found.
4718  """
4719  for macro in _CHECK_MACROS:
4720    i = line.find(macro)
4721    if i >= 0:
4722      # Find opening parenthesis.  Do a regular expression match here
4723      # to make sure that we are matching the expected CHECK macro, as
4724      # opposed to some other macro that happens to contain the CHECK
4725      # substring.
4726      matched = re.match(r'^(.*\b' + macro + r'\s*)\(', line)
4727      if not matched:
4728        continue
4729      return (macro, len(matched.group(1)))
4730  return (None, -1)
4731
4732
4733def CheckCheck(filename, clean_lines, linenum, error):
4734  """Checks the use of CHECK and EXPECT macros.
4735
4736  Args:
4737    filename: The name of the current file.
4738    clean_lines: A CleansedLines instance containing the file.
4739    linenum: The number of the line to check.
4740    error: The function to call with any errors found.
4741  """
4742
4743  # Decide the set of replacement macros that should be suggested
4744  lines = clean_lines.elided
4745  (check_macro, start_pos) = FindCheckMacro(lines[linenum])
4746  if not check_macro:
4747    return
4748
4749  # Find end of the boolean expression by matching parentheses
4750  (last_line, end_line, end_pos) = CloseExpression(
4751      clean_lines, linenum, start_pos)
4752  if end_pos < 0:
4753    return
4754
4755  # If the check macro is followed by something other than a
4756  # semicolon, assume users will log their own custom error messages
4757  # and don't suggest any replacements.
4758  if not re.match(r'\s*;', last_line[end_pos:]):
4759    return
4760
4761  if linenum == end_line:
4762    expression = lines[linenum][start_pos + 1:end_pos - 1]
4763  else:
4764    expression = lines[linenum][start_pos + 1:]
4765    for i in range(linenum + 1, end_line):
4766      expression += lines[i]
4767    expression += last_line[0:end_pos - 1]
4768
4769  # Parse expression so that we can take parentheses into account.
4770  # This avoids false positives for inputs like "CHECK((a < 4) == b)",
4771  # which is not replaceable by CHECK_LE.
4772  lhs = ''
4773  rhs = ''
4774  operator = None
4775  while expression:
4776    matched = re.match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||'
4777                    r'==|!=|>=|>|<=|<|\()(.*)$', expression)
4778    if matched:
4779      token = matched.group(1)
4780      if token == '(':
4781        # Parenthesized operand
4782        expression = matched.group(2)
4783        (end, _) = FindEndOfExpressionInLine(expression, 0, ['('])
4784        if end < 0:
4785          return  # Unmatched parenthesis
4786        lhs += '(' + expression[0:end]
4787        expression = expression[end:]
4788      elif token in ('&&', '||'):
4789        # Logical and/or operators.  This means the expression
4790        # contains more than one term, for example:
4791        #   CHECK(42 < a && a < b);
4792        #
4793        # These are not replaceable with CHECK_LE, so bail out early.
4794        return
4795      elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'):
4796        # Non-relational operator
4797        lhs += token
4798        expression = matched.group(2)
4799      else:
4800        # Relational operator
4801        operator = token
4802        rhs = matched.group(2)
4803        break
4804    else:
4805      # Unparenthesized operand.  Instead of appending to lhs one character
4806      # at a time, we do another regular expression match to consume several
4807      # characters at once if possible.  Trivial benchmark shows that this
4808      # is more efficient when the operands are longer than a single
4809      # character, which is generally the case.
4810      matched = re.match(r'^([^-=!<>()&|]+)(.*)$', expression)
4811      if not matched:
4812        matched = re.match(r'^(\s*\S)(.*)$', expression)
4813        if not matched:
4814          break
4815      lhs += matched.group(1)
4816      expression = matched.group(2)
4817
4818  # Only apply checks if we got all parts of the boolean expression
4819  if not (lhs and operator and rhs):
4820    return
4821
4822  # Check that rhs do not contain logical operators.  We already know
4823  # that lhs is fine since the loop above parses out && and ||.
4824  if rhs.find('&&') > -1 or rhs.find('||') > -1:
4825    return
4826
4827  # At least one of the operands must be a constant literal.  This is
4828  # to avoid suggesting replacements for unprintable things like
4829  # CHECK(variable != iterator)
4830  #
4831  # The following pattern matches decimal, hex integers, strings, and
4832  # characters (in that order).
4833  lhs = lhs.strip()
4834  rhs = rhs.strip()
4835  match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$'
4836  if re.match(match_constant, lhs) or re.match(match_constant, rhs):
4837    # Note: since we know both lhs and rhs, we can provide a more
4838    # descriptive error message like:
4839    #   Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42)
4840    # Instead of:
4841    #   Consider using CHECK_EQ instead of CHECK(a == b)
4842    #
4843    # We are still keeping the less descriptive message because if lhs
4844    # or rhs gets long, the error message might become unreadable.
4845    error(filename, linenum, 'readability/check', 2,
4846          f'Consider using {_CHECK_REPLACEMENT[check_macro][operator]}'
4847          f' instead of {check_macro}(a {operator} b)')
4848
4849
4850def CheckAltTokens(filename, clean_lines, linenum, error):
4851  """Check alternative keywords being used in boolean expressions.
4852
4853  Args:
4854    filename: The name of the current file.
4855    clean_lines: A CleansedLines instance containing the file.
4856    linenum: The number of the line to check.
4857    error: The function to call with any errors found.
4858  """
4859  line = clean_lines.elided[linenum]
4860
4861  # Avoid preprocessor lines
4862  if re.match(r'^\s*#', line):
4863    return
4864
4865  # Last ditch effort to avoid multi-line comments.  This will not help
4866  # if the comment started before the current line or ended after the
4867  # current line, but it catches most of the false positives.  At least,
4868  # it provides a way to workaround this warning for people who use
4869  # multi-line comments in preprocessor macros.
4870  #
4871  # TODO(unknown): remove this once cpplint has better support for
4872  # multi-line comments.
4873  if line.find('/*') >= 0 or line.find('*/') >= 0:
4874    return
4875
4876  for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line):
4877    error(filename, linenum, 'readability/alt_tokens', 2,
4878          f'Use operator {_ALT_TOKEN_REPLACEMENT[match.group(2)]}'
4879          f' instead of {match.group(2)}')
4880
4881
4882def GetLineWidth(line):
4883  """Determines the width of the line in column positions.
4884
4885  Args:
4886    line: A string, which may be a Unicode string.
4887
4888  Returns:
4889    The width of the line in column positions, accounting for Unicode
4890    combining characters and wide characters.
4891  """
4892  if isinstance(line, str):
4893    width = 0
4894    for uc in unicodedata.normalize('NFC', line):
4895      if unicodedata.east_asian_width(uc) in ('W', 'F'):
4896        width += 2
4897      elif not unicodedata.combining(uc):
4898        # Issue 337
4899        # https://mail.python.org/pipermail/python-list/2012-August/628809.html
4900        if (sys.version_info.major, sys.version_info.minor) <= (3, 2):
4901          # https://github.com/python/cpython/blob/2.7/Include/unicodeobject.h#L81
4902          is_wide_build = sysconfig.get_config_var("Py_UNICODE_SIZE") >= 4
4903          # https://github.com/python/cpython/blob/2.7/Objects/unicodeobject.c#L564
4904          is_low_surrogate = 0xDC00 <= ord(uc) <= 0xDFFF
4905          if not is_wide_build and is_low_surrogate:
4906            width -= 1
4907
4908        width += 1
4909    return width
4910  else:
4911    return len(line)
4912
4913
4914def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state,
4915               error, cppvar=None):
4916  """Checks rules from the 'C++ style rules' section of cppguide.html.
4917
4918  Most of these rules are hard to test (naming, comment style), but we
4919  do what we can.  In particular we check for 2-space indents, line lengths,
4920  tab usage, spaces inside code, etc.
4921
4922  Args:
4923    filename: The name of the current file.
4924    clean_lines: A CleansedLines instance containing the file.
4925    linenum: The number of the line to check.
4926    file_extension: The extension (without the dot) of the filename.
4927    nesting_state: A NestingState instance which maintains information about
4928                   the current stack of nested blocks being parsed.
4929    error: The function to call with any errors found.
4930    cppvar: The header guard variable returned by GetHeaderGuardCPPVar.
4931  """
4932
4933  # Don't use "elided" lines here, otherwise we can't check commented lines.
4934  # Don't want to use "raw" either, because we don't want to check inside C++11
4935  # raw strings,
4936  raw_lines = clean_lines.lines_without_raw_strings
4937  line = raw_lines[linenum]
4938  prev = raw_lines[linenum - 1] if linenum > 0 else ''
4939
4940  if line.find('\t') != -1:
4941    error(filename, linenum, 'whitespace/tab', 1,
4942          'Tab found; better to use spaces')
4943
4944  # One or three blank spaces at the beginning of the line is weird; it's
4945  # hard to reconcile that with 2-space indents.
4946  # NOTE: here are the conditions rob pike used for his tests.  Mine aren't
4947  # as sophisticated, but it may be worth becoming so:  RLENGTH==initial_spaces
4948  # if(RLENGTH > 20) complain = 0;
4949  # if(match($0, " +(error|private|public|protected):")) complain = 0;
4950  # if(match(prev, "&& *$")) complain = 0;
4951  # if(match(prev, "\\|\\| *$")) complain = 0;
4952  # if(match(prev, "[\",=><] *$")) complain = 0;
4953  # if(match($0, " <<")) complain = 0;
4954  # if(match(prev, " +for \\(")) complain = 0;
4955  # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
4956  scope_or_label_pattern = r'\s*(?:public|private|protected|signals)(?:\s+(?:slots\s*)?)?:\s*\\?$'
4957  classinfo = nesting_state.InnermostClass()
4958  initial_spaces = 0
4959  cleansed_line = clean_lines.elided[linenum]
4960  while initial_spaces < len(line) and line[initial_spaces] == ' ':
4961    initial_spaces += 1
4962  # There are certain situations we allow one space, notably for
4963  # section labels, and also lines containing multi-line raw strings.
4964  # We also don't check for lines that look like continuation lines
4965  # (of lines ending in double quotes, commas, equals, or angle brackets)
4966  # because the rules for how to indent those are non-trivial.
4967  if (not re.search(r'[",=><] *$', prev) and
4968      (initial_spaces == 1 or initial_spaces == 3) and
4969      not re.match(scope_or_label_pattern, cleansed_line) and
4970      not (clean_lines.raw_lines[linenum] != line and
4971           re.match(r'^\s*""', line))):
4972    error(filename, linenum, 'whitespace/indent', 3,
4973          'Weird number of spaces at line-start.  '
4974          'Are you using a 2-space indent?')
4975
4976  if line and line[-1].isspace():
4977    error(filename, linenum, 'whitespace/end_of_line', 4,
4978          'Line ends in whitespace.  Consider deleting these extra spaces.')
4979
4980  # Check if the line is a header guard.
4981  is_header_guard = False
4982  if IsHeaderExtension(file_extension):
4983    if (line.startswith(f'#ifndef {cppvar}') or
4984        line.startswith(f'#define {cppvar}') or
4985        line.startswith(f'#endif  // {cppvar}')):
4986      is_header_guard = True
4987  # #include lines and header guards can be long, since there's no clean way to
4988  # split them.
4989  #
4990  # URLs can be long too.  It's possible to split these, but it makes them
4991  # harder to cut&paste.
4992  #
4993  # The "$Id:...$" comment may also get very long without it being the
4994  # developers fault.
4995  #
4996  # Doxygen documentation copying can get pretty long when using an overloaded
4997  # function declaration
4998  if (not line.startswith('#include') and not is_header_guard and
4999      not re.match(r'^\s*//.*http(s?)://\S*$', line) and
5000      not re.match(r'^\s*//\s*[^\s]*$', line) and
5001      not re.match(r'^// \$Id:.*#[0-9]+ \$$', line) and
5002      not re.match(r'^\s*/// [@\\](copydoc|copydetails|copybrief) .*$', line)):
5003    line_width = GetLineWidth(line)
5004    if line_width > _line_length:
5005      error(filename, linenum, 'whitespace/line_length', 2,
5006            f'Lines should be <= {_line_length} characters long')
5007
5008  if (cleansed_line.count(';') > 1 and
5009      # allow simple single line lambdas
5010      not re.match(r'^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}\n\r]*\}',
5011                line) and
5012      # for loops are allowed two ;'s (and may run over two lines).
5013      cleansed_line.find('for') == -1 and
5014      (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
5015       GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
5016      # It's ok to have many commands in a switch case that fits in 1 line
5017      not ((cleansed_line.find('case ') != -1 or
5018            cleansed_line.find('default:') != -1) and
5019           cleansed_line.find('break;') != -1)):
5020    error(filename, linenum, 'whitespace/newline', 0,
5021          'More than one command on the same line')
5022
5023  # Some more style checks
5024  CheckBraces(filename, clean_lines, linenum, error)
5025  CheckTrailingSemicolon(filename, clean_lines, linenum, error)
5026  CheckEmptyBlockBody(filename, clean_lines, linenum, error)
5027  CheckSpacing(filename, clean_lines, linenum, nesting_state, error)
5028  CheckOperatorSpacing(filename, clean_lines, linenum, error)
5029  CheckParenthesisSpacing(filename, clean_lines, linenum, error)
5030  CheckCommaSpacing(filename, clean_lines, linenum, error)
5031  CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error)
5032  CheckSpacingForFunctionCall(filename, clean_lines, linenum, error)
5033  CheckCheck(filename, clean_lines, linenum, error)
5034  CheckAltTokens(filename, clean_lines, linenum, error)
5035  classinfo = nesting_state.InnermostClass()
5036  if classinfo:
5037    CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error)
5038
5039
5040_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
5041# Matches the first component of a filename delimited by -s and _s. That is:
5042#  _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
5043#  _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
5044#  _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
5045#  _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
5046_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
5047
5048
5049def _DropCommonSuffixes(filename):
5050  """Drops common suffixes like _test.cc or -inl.h from filename.
5051
5052  For example:
5053    >>> _DropCommonSuffixes('foo/foo-inl.h')
5054    'foo/foo'
5055    >>> _DropCommonSuffixes('foo/bar/foo.cc')
5056    'foo/bar/foo'
5057    >>> _DropCommonSuffixes('foo/foo_internal.h')
5058    'foo/foo'
5059    >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
5060    'foo/foo_unusualinternal'
5061
5062  Args:
5063    filename: The input filename.
5064
5065  Returns:
5066    The filename with the common suffix removed.
5067  """
5068  for suffix in itertools.chain(
5069      (f"{test_suffix.lstrip('_')}.{ext}"
5070       for test_suffix, ext in itertools.product(_test_suffixes, GetNonHeaderExtensions())),
5071      (f'{suffix}.{ext}'
5072       for suffix, ext in itertools.product(['inl', 'imp', 'internal'], GetHeaderExtensions()))):
5073    if (filename.endswith(suffix) and len(filename) > len(suffix) and
5074        filename[-len(suffix) - 1] in ('-', '_')):
5075      return filename[:-len(suffix) - 1]
5076  return os.path.splitext(filename)[0]
5077
5078
5079def _ClassifyInclude(fileinfo, include, used_angle_brackets, include_order="default"):
5080  """Figures out what kind of header 'include' is.
5081
5082  Args:
5083    fileinfo: The current file cpplint is running over. A FileInfo instance.
5084    include: The path to a #included file.
5085    used_angle_brackets: True if the #include used <> rather than "".
5086    include_order: "default" or other value allowed in program arguments
5087
5088  Returns:
5089    One of the _XXX_HEADER constants.
5090
5091  For example:
5092    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
5093    _C_SYS_HEADER
5094    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
5095    _CPP_SYS_HEADER
5096    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', True, "standardcfirst")
5097    _OTHER_SYS_HEADER
5098    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
5099    _LIKELY_MY_HEADER
5100    >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
5101    ...                  'bar/foo_other_ext.h', False)
5102    _POSSIBLE_MY_HEADER
5103    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
5104    _OTHER_HEADER
5105  """
5106  # This is a list of all standard c++ header files, except
5107  # those already checked for above.
5108  is_cpp_header = include in _CPP_HEADERS
5109
5110  # Mark include as C header if in list or in a known folder for standard-ish C headers.
5111  is_std_c_header = (include_order == "default") or (include in _C_HEADERS
5112            # additional linux glibc header folders
5113            or re.search(rf'(?:{"|".join(C_STANDARD_HEADER_FOLDERS)})\/.*\.h', include))
5114
5115  # Headers with C++ extensions shouldn't be considered C system headers
5116  include_ext = os.path.splitext(include)[1]
5117  is_system = used_angle_brackets and include_ext not in ['.hh', '.hpp', '.hxx', '.h++']
5118
5119  if is_system:
5120    if is_cpp_header:
5121      return _CPP_SYS_HEADER
5122    if is_std_c_header:
5123      return _C_SYS_HEADER
5124    else:
5125      return _OTHER_SYS_HEADER
5126
5127  # If the target file and the include we're checking share a
5128  # basename when we drop common extensions, and the include
5129  # lives in . , then it's likely to be owned by the target file.
5130  target_dir, target_base = (
5131      os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
5132  include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
5133  target_dir_pub = os.path.normpath(target_dir + '/../public')
5134  target_dir_pub = target_dir_pub.replace('\\', '/')
5135  if target_base == include_base and (
5136      include_dir == target_dir or
5137      include_dir == target_dir_pub):
5138    return _LIKELY_MY_HEADER
5139
5140  # If the target and include share some initial basename
5141  # component, it's possible the target is implementing the
5142  # include, so it's allowed to be first, but we'll never
5143  # complain if it's not there.
5144  target_first_component = _RE_FIRST_COMPONENT.match(target_base)
5145  include_first_component = _RE_FIRST_COMPONENT.match(include_base)
5146  if (target_first_component and include_first_component and
5147      target_first_component.group(0) ==
5148      include_first_component.group(0)):
5149    return _POSSIBLE_MY_HEADER
5150
5151  return _OTHER_HEADER
5152
5153
5154
5155def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
5156  """Check rules that are applicable to #include lines.
5157
5158  Strings on #include lines are NOT removed from elided line, to make
5159  certain tasks easier. However, to prevent false positives, checks
5160  applicable to #include lines in CheckLanguage must be put here.
5161
5162  Args:
5163    filename: The name of the current file.
5164    clean_lines: A CleansedLines instance containing the file.
5165    linenum: The number of the line to check.
5166    include_state: An _IncludeState instance in which the headers are inserted.
5167    error: The function to call with any errors found.
5168  """
5169  fileinfo = FileInfo(filename)
5170  line = clean_lines.lines[linenum]
5171
5172  # "include" should use the new style "foo/bar.h" instead of just "bar.h"
5173  # Only do this check if the included header follows google naming
5174  # conventions.  If not, assume that it's a 3rd party API that
5175  # requires special include conventions.
5176  #
5177  # We also make an exception for Lua headers, which follow google
5178  # naming convention but not the include convention.
5179  match = re.match(r'#include\s*"([^/]+\.(.*))"', line)
5180  if match:
5181    if (IsHeaderExtension(match.group(2)) and
5182        not _THIRD_PARTY_HEADERS_PATTERN.match(match.group(1))):
5183      error(filename, linenum, 'build/include_subdir', 4,
5184            'Include the directory when naming header files')
5185
5186  # we shouldn't include a file more than once. actually, there are a
5187  # handful of instances where doing so is okay, but in general it's
5188  # not.
5189  match = _RE_PATTERN_INCLUDE.search(line)
5190  if match:
5191    include = match.group(2)
5192    used_angle_brackets = match.group(1) == '<'
5193    duplicate_line = include_state.FindHeader(include)
5194    if duplicate_line >= 0:
5195      error(filename, linenum, 'build/include', 4,
5196            f'"{include}" already included at {filename}:{duplicate_line}')
5197      return
5198
5199    for extension in GetNonHeaderExtensions():
5200      if (include.endswith('.' + extension) and
5201          os.path.dirname(fileinfo.RepositoryName()) != os.path.dirname(include)):
5202        error(filename, linenum, 'build/include', 4,
5203              'Do not include .' + extension + ' files from other packages')
5204        return
5205
5206    # We DO want to include a 3rd party looking header if it matches the
5207    # filename. Otherwise we get an erroneous error "...should include its
5208    # header" error later.
5209    third_src_header = False
5210    for ext in GetHeaderExtensions():
5211      basefilename = filename[0:len(filename) - len(fileinfo.Extension())]
5212      headerfile = basefilename + '.' + ext
5213      headername = FileInfo(headerfile).RepositoryName()
5214      if headername in include or include in headername:
5215        third_src_header = True
5216        break
5217
5218    if third_src_header or not _THIRD_PARTY_HEADERS_PATTERN.match(include):
5219      include_state.include_list[-1].append((include, linenum))
5220
5221      # We want to ensure that headers appear in the right order:
5222      # 1) for foo.cc, foo.h  (preferred location)
5223      # 2) c system files
5224      # 3) cpp system files
5225      # 4) for foo.cc, foo.h  (deprecated location)
5226      # 5) other google headers
5227      #
5228      # We classify each include statement as one of those 5 types
5229      # using a number of techniques. The include_state object keeps
5230      # track of the highest type seen, and complains if we see a
5231      # lower type after that.
5232      error_message = include_state.CheckNextIncludeOrder(
5233          _ClassifyInclude(fileinfo, include, used_angle_brackets, _include_order))
5234      if error_message:
5235        error(filename, linenum, 'build/include_order', 4,
5236              f'{error_message}. Should be: {fileinfo.BaseName()}.h, c system,'
5237              ' c++ system, other.')
5238      canonical_include = include_state.CanonicalizeAlphabeticalOrder(include)
5239      if not include_state.IsInAlphabeticalOrder(
5240          clean_lines, linenum, canonical_include):
5241        error(filename, linenum, 'build/include_alpha', 4,
5242              f'Include "{include}" not in alphabetical order')
5243      include_state.SetLastHeader(canonical_include)
5244
5245
5246
5247def _GetTextInside(text, start_pattern):
5248  r"""Retrieves all the text between matching open and close parentheses.
5249
5250  Given a string of lines and a regular expression string, retrieve all the text
5251  following the expression and between opening punctuation symbols like
5252  (, [, or {, and the matching close-punctuation symbol. This properly nested
5253  occurrences of the punctuations, so for the text like
5254    printf(a(), b(c()));
5255  a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
5256  start_pattern must match string having an open punctuation symbol at the end.
5257
5258  Args:
5259    text: The lines to extract text. Its comments and strings must be elided.
5260           It can be single line and can span multiple lines.
5261    start_pattern: The regexp string indicating where to start extracting
5262                   the text.
5263  Returns:
5264    The extracted text.
5265    None if either the opening string or ending punctuation could not be found.
5266  """
5267  # TODO(unknown): Audit cpplint.py to see what places could be profitably
5268  # rewritten to use _GetTextInside (and use inferior regexp matching today).
5269
5270  # Give opening punctuations to get the matching close-punctuations.
5271  matching_punctuation = {'(': ')', '{': '}', '[': ']'}
5272  closing_punctuation = set(dict.values(matching_punctuation))
5273
5274  # Find the position to start extracting text.
5275  match = re.search(start_pattern, text, re.M)
5276  if not match:  # start_pattern not found in text.
5277    return None
5278  start_position = match.end(0)
5279
5280  assert start_position > 0, (
5281      'start_pattern must ends with an opening punctuation.')
5282  assert text[start_position - 1] in matching_punctuation, (
5283      'start_pattern must ends with an opening punctuation.')
5284  # Stack of closing punctuations we expect to have in text after position.
5285  punctuation_stack = [matching_punctuation[text[start_position - 1]]]
5286  position = start_position
5287  while punctuation_stack and position < len(text):
5288    if text[position] == punctuation_stack[-1]:
5289      punctuation_stack.pop()
5290    elif text[position] in closing_punctuation:
5291      # A closing punctuation without matching opening punctuations.
5292      return None
5293    elif text[position] in matching_punctuation:
5294      punctuation_stack.append(matching_punctuation[text[position]])
5295    position += 1
5296  if punctuation_stack:
5297    # Opening punctuations left without matching close-punctuations.
5298    return None
5299  # punctuations match.
5300  return text[start_position:position - 1]
5301
5302
5303# Patterns for matching call-by-reference parameters.
5304#
5305# Supports nested templates up to 2 levels deep using this messy pattern:
5306#   < (?: < (?: < [^<>]*
5307#               >
5308#           |   [^<>] )*
5309#         >
5310#     |   [^<>] )*
5311#   >
5312_RE_PATTERN_IDENT = r'[_a-zA-Z]\w*'  # =~ [[:alpha:]][[:alnum:]]*
5313_RE_PATTERN_TYPE = (
5314    r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?'
5315    r'(?:\w|'
5316    r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|'
5317    r'::)+')
5318# A call-by-reference parameter ends with '& identifier'.
5319_RE_PATTERN_REF_PARAM = re.compile(
5320    r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*'
5321    r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]')
5322# A call-by-const-reference parameter either ends with 'const& identifier'
5323# or looks like 'const type& identifier' when 'type' is atomic.
5324_RE_PATTERN_CONST_REF_PARAM = (
5325    r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT +
5326    r'|const\s+' + _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')')
5327# Stream types.
5328_RE_PATTERN_REF_STREAM_PARAM = (
5329    r'(?:.*stream\s*&\s*' + _RE_PATTERN_IDENT + r')')
5330
5331
5332def CheckLanguage(filename, clean_lines, linenum, file_extension,
5333                  include_state, nesting_state, error):
5334  """Checks rules from the 'C++ language rules' section of cppguide.html.
5335
5336  Some of these rules are hard to test (function overloading, using
5337  uint32_t inappropriately), but we do the best we can.
5338
5339  Args:
5340    filename: The name of the current file.
5341    clean_lines: A CleansedLines instance containing the file.
5342    linenum: The number of the line to check.
5343    file_extension: The extension (without the dot) of the filename.
5344    include_state: An _IncludeState instance in which the headers are inserted.
5345    nesting_state: A NestingState instance which maintains information about
5346                   the current stack of nested blocks being parsed.
5347    error: The function to call with any errors found.
5348  """
5349  # If the line is empty or consists of entirely a comment, no need to
5350  # check it.
5351  line = clean_lines.elided[linenum]
5352  if not line:
5353    return
5354
5355  match = _RE_PATTERN_INCLUDE.search(line)
5356  if match:
5357    CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
5358    return
5359
5360  # Reset include state across preprocessor directives.  This is meant
5361  # to silence warnings for conditional includes.
5362  match = re.match(r'^\s*#\s*(if|ifdef|ifndef|elif|else|endif)\b', line)
5363  if match:
5364    include_state.ResetSection(match.group(1))
5365
5366
5367  # Perform other checks now that we are sure that this is not an include line
5368  CheckCasts(filename, clean_lines, linenum, error)
5369  CheckGlobalStatic(filename, clean_lines, linenum, error)
5370  CheckPrintf(filename, clean_lines, linenum, error)
5371
5372  if IsHeaderExtension(file_extension):
5373    # TODO(unknown): check that 1-arg constructors are explicit.
5374    #                How to tell it's a constructor?
5375    #                (handled in CheckForNonStandardConstructs for now)
5376    # TODO(unknown): check that classes declare or disable copy/assign
5377    #                (level 1 error)
5378    pass
5379
5380  # Check if people are using the verboten C basic types.  The only exception
5381  # we regularly allow is "unsigned short port" for port.
5382  if re.search(r'\bshort port\b', line):
5383    if not re.search(r'\bunsigned short port\b', line):
5384      error(filename, linenum, 'runtime/int', 4,
5385            'Use "unsigned short" for ports, not "short"')
5386  else:
5387    match = re.search(r'\b(short|long(?! +double)|long long)\b', line)
5388    if match:
5389      error(filename, linenum, 'runtime/int', 4,
5390            f'Use int16_t/int64_t/etc, rather than the C type {match.group(1)}')
5391
5392  # Check if some verboten operator overloading is going on
5393  # TODO(unknown): catch out-of-line unary operator&:
5394  #   class X {};
5395  #   int operator&(const X& x) { return 42; }  // unary operator&
5396  # The trick is it's hard to tell apart from binary operator&:
5397  #   class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
5398  if re.search(r'\boperator\s*&\s*\(\s*\)', line):
5399    error(filename, linenum, 'runtime/operator', 4,
5400          'Unary operator& is dangerous.  Do not use it.')
5401
5402  # Check for suspicious usage of "if" like
5403  # } if (a == b) {
5404  if re.search(r'\}\s*if\s*\(', line):
5405    error(filename, linenum, 'readability/braces', 4,
5406          'Did you mean "else if"? If not, start a new line for "if".')
5407
5408  # Check for potential format string bugs like printf(foo).
5409  # We constrain the pattern not to pick things like DocidForPrintf(foo).
5410  # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
5411  # TODO(unknown): Catch the following case. Need to change the calling
5412  # convention of the whole function to process multiple line to handle it.
5413  #   printf(
5414  #       boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
5415  printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
5416  if printf_args:
5417    match = re.match(r'([\w.\->()]+)$', printf_args)
5418    if match and match.group(1) != '__VA_ARGS__':
5419      function_name = re.search(r'\b((?:string)?printf)\s*\(',
5420                                line, re.I).group(1)
5421      error(filename, linenum, 'runtime/printf', 4,
5422            'Potential format string bug. Do'
5423            f' {function_name}("%s", {match.group(1)}) instead.')
5424
5425  # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
5426  match = re.search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
5427  if match and not re.match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
5428    error(filename, linenum, 'runtime/memset', 4,
5429          f'Did you mean "memset({match.group(1)}, 0, {match.group(2)})"?')
5430
5431  if re.search(r'\busing namespace\b', line):
5432    if re.search(r'\bliterals\b', line):
5433      error(filename, linenum, 'build/namespaces_literals', 5,
5434            'Do not use namespace using-directives.  '
5435            'Use using-declarations instead.')
5436    else:
5437      error(filename, linenum, 'build/namespaces', 5,
5438            'Do not use namespace using-directives.  '
5439            'Use using-declarations instead.')
5440
5441  # Detect variable-length arrays.
5442  match = re.match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
5443  if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
5444      match.group(3).find(']') == -1):
5445    # Split the size using space and arithmetic operators as delimiters.
5446    # If any of the resulting tokens are not compile time constants then
5447    # report the error.
5448    tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
5449    is_const = True
5450    skip_next = False
5451    for tok in tokens:
5452      if skip_next:
5453        skip_next = False
5454        continue
5455
5456      if re.search(r'sizeof\(.+\)', tok): continue
5457      if re.search(r'arraysize\(\w+\)', tok): continue
5458
5459      tok = tok.lstrip('(')
5460      tok = tok.rstrip(')')
5461      if not tok: continue
5462      if re.match(r'\d+', tok): continue
5463      if re.match(r'0[xX][0-9a-fA-F]+', tok): continue
5464      if re.match(r'k[A-Z0-9]\w*', tok): continue
5465      if re.match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
5466      if re.match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
5467      # A catch all for tricky sizeof cases, including 'sizeof expression',
5468      # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
5469      # requires skipping the next token because we split on ' ' and '*'.
5470      if tok.startswith('sizeof'):
5471        skip_next = True
5472        continue
5473      is_const = False
5474      break
5475    if not is_const:
5476      error(filename, linenum, 'runtime/arrays', 1,
5477            'Do not use variable-length arrays.  Use an appropriately named '
5478            "('k' followed by CamelCase) compile-time constant for the size.")
5479
5480  # Check for use of unnamed namespaces in header files.  Registration
5481  # macros are typically OK, so we allow use of "namespace {" on lines
5482  # that end with backslashes.
5483  if (IsHeaderExtension(file_extension)
5484      and re.search(r'\bnamespace\s*{', line)
5485      and line[-1] != '\\'):
5486    error(filename, linenum, 'build/namespaces_headers', 4,
5487          'Do not use unnamed namespaces in header files.  See '
5488          'https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
5489          ' for more information.')
5490
5491
5492def CheckGlobalStatic(filename, clean_lines, linenum, error):
5493  """Check for unsafe global or static objects.
5494
5495  Args:
5496    filename: The name of the current file.
5497    clean_lines: A CleansedLines instance containing the file.
5498    linenum: The number of the line to check.
5499    error: The function to call with any errors found.
5500  """
5501  line = clean_lines.elided[linenum]
5502
5503  # Match two lines at a time to support multiline declarations
5504  if linenum + 1 < clean_lines.NumLines() and not re.search(r'[;({]', line):
5505    line += clean_lines.elided[linenum + 1].strip()
5506
5507  # Check for people declaring static/global STL strings at the top level.
5508  # This is dangerous because the C++ language does not guarantee that
5509  # globals with constructors are initialized before the first access, and
5510  # also because globals can be destroyed when some threads are still running.
5511  # TODO(unknown): Generalize this to also find static unique_ptr instances.
5512  # TODO(unknown): File bugs for clang-tidy to find these.
5513  match = re.match(
5514      r'((?:|static +)(?:|const +))(?::*std::)?string( +const)? +'
5515      r'([a-zA-Z0-9_:]+)\b(.*)',
5516      line)
5517
5518  # Remove false positives:
5519  # - String pointers (as opposed to values).
5520  #    string *pointer
5521  #    const string *pointer
5522  #    string const *pointer
5523  #    string *const pointer
5524  #
5525  # - Functions and template specializations.
5526  #    string Function<Type>(...
5527  #    string Class<Type>::Method(...
5528  #
5529  # - Operators.  These are matched separately because operator names
5530  #   cross non-word boundaries, and trying to match both operators
5531  #   and functions at the same time would decrease accuracy of
5532  #   matching identifiers.
5533  #    string Class::operator*()
5534  if (match and
5535      not re.search(r'\bstring\b(\s+const)?\s*[\*\&]\s*(const\s+)?\w', line) and
5536      not re.search(r'\boperator\W', line) and
5537      not re.match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)*\s*\(([^"]|$)', match.group(4))):
5538    if re.search(r'\bconst\b', line):
5539      error(filename, linenum, 'runtime/string', 4,
5540            'For a static/global string constant, use a C style string instead:'
5541            f' "{match.group(1)}char{match.group(2) or ""} {match.group(3)}[]".')
5542    else:
5543      error(filename, linenum, 'runtime/string', 4,
5544            'Static/global string variables are not permitted.')
5545
5546  if (re.search(r'\b([A-Za-z0-9_]*_)\(\1\)', line) or
5547      re.search(r'\b([A-Za-z0-9_]*_)\(CHECK_NOTNULL\(\1\)\)', line)):
5548    error(filename, linenum, 'runtime/init', 4,
5549          'You seem to be initializing a member variable with itself.')
5550
5551
5552def CheckPrintf(filename, clean_lines, linenum, error):
5553  """Check for printf related issues.
5554
5555  Args:
5556    filename: The name of the current file.
5557    clean_lines: A CleansedLines instance containing the file.
5558    linenum: The number of the line to check.
5559    error: The function to call with any errors found.
5560  """
5561  line = clean_lines.elided[linenum]
5562
5563  # When snprintf is used, the second argument shouldn't be a literal.
5564  match = re.search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
5565  if match and match.group(2) != '0':
5566    # If 2nd arg is zero, snprintf is used to calculate size.
5567    error(filename, linenum, 'runtime/printf', 3, 'If you can, use'
5568          f' sizeof({match.group(1)}) instead of {match.group(2)}'
5569          ' as the 2nd arg to snprintf.')
5570
5571  # Check if some verboten C functions are being used.
5572  if re.search(r'\bsprintf\s*\(', line):
5573    error(filename, linenum, 'runtime/printf', 5,
5574          'Never use sprintf. Use snprintf instead.')
5575  match = re.search(r'\b(strcpy|strcat)\s*\(', line)
5576  if match:
5577    error(filename, linenum, 'runtime/printf', 4,
5578          f'Almost always, snprintf is better than {match.group(1)}')
5579
5580
5581def IsDerivedFunction(clean_lines, linenum):
5582  """Check if current line contains an inherited function.
5583
5584  Args:
5585    clean_lines: A CleansedLines instance containing the file.
5586    linenum: The number of the line to check.
5587  Returns:
5588    True if current line contains a function with "override"
5589    virt-specifier.
5590  """
5591  # Scan back a few lines for start of current function
5592  for i in range(linenum, max(-1, linenum - 10), -1):
5593    match = re.match(r'^([^()]*\w+)\(', clean_lines.elided[i])
5594    if match:
5595      # Look for "override" after the matching closing parenthesis
5596      line, _, closing_paren = CloseExpression(
5597          clean_lines, i, len(match.group(1)))
5598      return (closing_paren >= 0 and
5599              re.search(r'\boverride\b', line[closing_paren:]))
5600  return False
5601
5602
5603def IsOutOfLineMethodDefinition(clean_lines, linenum):
5604  """Check if current line contains an out-of-line method definition.
5605
5606  Args:
5607    clean_lines: A CleansedLines instance containing the file.
5608    linenum: The number of the line to check.
5609  Returns:
5610    True if current line contains an out-of-line method definition.
5611  """
5612  # Scan back a few lines for start of current function
5613  for i in range(linenum, max(-1, linenum - 10), -1):
5614    if re.match(r'^([^()]*\w+)\(', clean_lines.elided[i]):
5615      return re.match(r'^[^()]*\w+::\w+\(', clean_lines.elided[i]) is not None
5616  return False
5617
5618
5619def IsInitializerList(clean_lines, linenum):
5620  """Check if current line is inside constructor initializer list.
5621
5622  Args:
5623    clean_lines: A CleansedLines instance containing the file.
5624    linenum: The number of the line to check.
5625  Returns:
5626    True if current line appears to be inside constructor initializer
5627    list, False otherwise.
5628  """
5629  for i in range(linenum, 1, -1):
5630    line = clean_lines.elided[i]
5631    if i == linenum:
5632      remove_function_body = re.match(r'^(.*)\{\s*$', line)
5633      if remove_function_body:
5634        line = remove_function_body.group(1)
5635
5636    if re.search(r'\s:\s*\w+[({]', line):
5637      # A lone colon tend to indicate the start of a constructor
5638      # initializer list.  It could also be a ternary operator, which
5639      # also tend to appear in constructor initializer lists as
5640      # opposed to parameter lists.
5641      return True
5642    if re.search(r'\}\s*,\s*$', line):
5643      # A closing brace followed by a comma is probably the end of a
5644      # brace-initialized member in constructor initializer list.
5645      return True
5646    if re.search(r'[{};]\s*$', line):
5647      # Found one of the following:
5648      # - A closing brace or semicolon, probably the end of the previous
5649      #   function.
5650      # - An opening brace, probably the start of current class or namespace.
5651      #
5652      # Current line is probably not inside an initializer list since
5653      # we saw one of those things without seeing the starting colon.
5654      return False
5655
5656  # Got to the beginning of the file without seeing the start of
5657  # constructor initializer list.
5658  return False
5659
5660
5661def CheckForNonConstReference(filename, clean_lines, linenum,
5662                              nesting_state, error):
5663  """Check for non-const references.
5664
5665  Separate from CheckLanguage since it scans backwards from current
5666  line, instead of scanning forward.
5667
5668  Args:
5669    filename: The name of the current file.
5670    clean_lines: A CleansedLines instance containing the file.
5671    linenum: The number of the line to check.
5672    nesting_state: A NestingState instance which maintains information about
5673                   the current stack of nested blocks being parsed.
5674    error: The function to call with any errors found.
5675  """
5676  # Do nothing if there is no '&' on current line.
5677  line = clean_lines.elided[linenum]
5678  if '&' not in line:
5679    return
5680
5681  # If a function is inherited, current function doesn't have much of
5682  # a choice, so any non-const references should not be blamed on
5683  # derived function.
5684  if IsDerivedFunction(clean_lines, linenum):
5685    return
5686
5687  # Don't warn on out-of-line method definitions, as we would warn on the
5688  # in-line declaration, if it isn't marked with 'override'.
5689  if IsOutOfLineMethodDefinition(clean_lines, linenum):
5690    return
5691
5692  # Long type names may be broken across multiple lines, usually in one
5693  # of these forms:
5694  #   LongType
5695  #       ::LongTypeContinued &identifier
5696  #   LongType::
5697  #       LongTypeContinued &identifier
5698  #   LongType<
5699  #       ...>::LongTypeContinued &identifier
5700  #
5701  # If we detected a type split across two lines, join the previous
5702  # line to current line so that we can match const references
5703  # accordingly.
5704  #
5705  # Note that this only scans back one line, since scanning back
5706  # arbitrary number of lines would be expensive.  If you have a type
5707  # that spans more than 2 lines, please use a typedef.
5708  if linenum > 1:
5709    previous = None
5710    if re.match(r'\s*::(?:[\w<>]|::)+\s*&\s*\S', line):
5711      # previous_line\n + ::current_line
5712      previous = re.search(r'\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$',
5713                        clean_lines.elided[linenum - 1])
5714    elif re.match(r'\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S', line):
5715      # previous_line::\n + current_line
5716      previous = re.search(r'\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$',
5717                        clean_lines.elided[linenum - 1])
5718    if previous:
5719      line = previous.group(1) + line.lstrip()
5720    else:
5721      # Check for templated parameter that is split across multiple lines
5722      endpos = line.rfind('>')
5723      if endpos > -1:
5724        (_, startline, startpos) = ReverseCloseExpression(
5725            clean_lines, linenum, endpos)
5726        if startpos > -1 and startline < linenum:
5727          # Found the matching < on an earlier line, collect all
5728          # pieces up to current line.
5729          line = ''
5730          for i in range(startline, linenum + 1):
5731            line += clean_lines.elided[i].strip()
5732
5733  # Check for non-const references in function parameters.  A single '&' may
5734  # found in the following places:
5735  #   inside expression: binary & for bitwise AND
5736  #   inside expression: unary & for taking the address of something
5737  #   inside declarators: reference parameter
5738  # We will exclude the first two cases by checking that we are not inside a
5739  # function body, including one that was just introduced by a trailing '{'.
5740  # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare].
5741  if (nesting_state.previous_stack_top and
5742      not (isinstance(nesting_state.previous_stack_top, _ClassInfo) or
5743           isinstance(nesting_state.previous_stack_top, _NamespaceInfo))):
5744    # Not at toplevel, not within a class, and not within a namespace
5745    return
5746
5747  # Avoid initializer lists.  We only need to scan back from the
5748  # current line for something that starts with ':'.
5749  #
5750  # We don't need to check the current line, since the '&' would
5751  # appear inside the second set of parentheses on the current line as
5752  # opposed to the first set.
5753  if linenum > 0:
5754    for i in range(linenum - 1, max(0, linenum - 10), -1):
5755      previous_line = clean_lines.elided[i]
5756      if not re.search(r'[),]\s*$', previous_line):
5757        break
5758      if re.match(r'^\s*:\s+\S', previous_line):
5759        return
5760
5761  # Avoid preprocessors
5762  if re.search(r'\\\s*$', line):
5763    return
5764
5765  # Avoid constructor initializer lists
5766  if IsInitializerList(clean_lines, linenum):
5767    return
5768
5769  # We allow non-const references in a few standard places, like functions
5770  # called "swap()" or iostream operators like "<<" or ">>".  Do not check
5771  # those function parameters.
5772  #
5773  # We also accept & in static_assert, which looks like a function but
5774  # it's actually a declaration expression.
5775  allowed_functions = (r'(?:[sS]wap(?:<\w:+>)?|'
5776                           r'operator\s*[<>][<>]|'
5777                           r'static_assert|COMPILE_ASSERT'
5778                           r')\s*\(')
5779  if re.search(allowed_functions, line):
5780    return
5781  elif not re.search(r'\S+\([^)]*$', line):
5782    # Don't see an allowed function on this line.  Actually we
5783    # didn't see any function name on this line, so this is likely a
5784    # multi-line parameter list.  Try a bit harder to catch this case.
5785    for i in range(2):
5786      if (linenum > i and
5787          re.search(allowed_functions, clean_lines.elided[linenum - i - 1])):
5788        return
5789
5790  decls = re.sub(r'{[^}]*}', ' ', line)  # exclude function body
5791  for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls):
5792    if (not re.match(_RE_PATTERN_CONST_REF_PARAM, parameter) and
5793        not re.match(_RE_PATTERN_REF_STREAM_PARAM, parameter)):
5794      error(filename, linenum, 'runtime/references', 2,
5795            'Is this a non-const reference? '
5796            'If so, make const or use a pointer: ' +
5797            re.sub(' *<', '<', parameter))
5798
5799
5800def CheckCasts(filename, clean_lines, linenum, error):
5801  """Various cast related checks.
5802
5803  Args:
5804    filename: The name of the current file.
5805    clean_lines: A CleansedLines instance containing the file.
5806    linenum: The number of the line to check.
5807    error: The function to call with any errors found.
5808  """
5809  line = clean_lines.elided[linenum]
5810
5811  # Check to see if they're using an conversion function cast.
5812  # I just try to capture the most common basic types, though there are more.
5813  # Parameterless conversion functions, such as bool(), are allowed as they are
5814  # probably a member operator declaration or default constructor.
5815  match = re.search(
5816      r'(\bnew\s+(?:const\s+)?|\S<\s*(?:const\s+)?)?\b'
5817      r'(int|float|double|bool|char|int16_t|uint16_t|int32_t|uint32_t|int64_t|uint64_t)'
5818      r'(\([^)].*)', line)
5819  expecting_function = ExpectingFunctionArgs(clean_lines, linenum)
5820  if match and not expecting_function:
5821    matched_type = match.group(2)
5822
5823    # matched_new_or_template is used to silence two false positives:
5824    # - New operators
5825    # - Template arguments with function types
5826    #
5827    # For template arguments, we match on types immediately following
5828    # an opening bracket without any spaces.  This is a fast way to
5829    # silence the common case where the function type is the first
5830    # template argument.  False negative with less-than comparison is
5831    # avoided because those operators are usually followed by a space.
5832    #
5833    #   function<double(double)>   // bracket + no space = false positive
5834    #   value < double(42)         // bracket + space = true positive
5835    matched_new_or_template = match.group(1)
5836
5837    # Avoid arrays by looking for brackets that come after the closing
5838    # parenthesis.
5839    if re.match(r'\([^()]+\)\s*\[', match.group(3)):
5840      return
5841
5842    # Other things to ignore:
5843    # - Function pointers
5844    # - Casts to pointer types
5845    # - Placement new
5846    # - Alias declarations
5847    matched_funcptr = match.group(3)
5848    if (matched_new_or_template is None and
5849        not (matched_funcptr and
5850             (re.match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(',
5851                    matched_funcptr) or
5852              matched_funcptr.startswith('(*)'))) and
5853        not re.match(r'\s*using\s+\S+\s*=\s*' + matched_type, line) and
5854        not re.search(r'new\(\S+\)\s*' + matched_type, line)):
5855      error(filename, linenum, 'readability/casting', 4,
5856            'Using deprecated casting style.  '
5857            f'Use static_cast<{matched_type}>(...) instead')
5858
5859  if not expecting_function:
5860    CheckCStyleCast(filename, clean_lines, linenum, 'static_cast',
5861                    r'\((int|float|double|bool|char|u?int(16|32|64)_t|size_t)\)', error)
5862
5863  # This doesn't catch all cases. Consider (const char * const)"hello".
5864  #
5865  # (char *) "foo" should always be a const_cast (reinterpret_cast won't
5866  # compile).
5867  if CheckCStyleCast(filename, clean_lines, linenum, 'const_cast',
5868                     r'\((char\s?\*+\s?)\)\s*"', error):
5869    pass
5870  else:
5871    # Check pointer casts for other than string constants
5872    CheckCStyleCast(filename, clean_lines, linenum, 'reinterpret_cast',
5873                    r'\((\w+\s?\*+\s?)\)', error)
5874
5875  # In addition, we look for people taking the address of a cast.  This
5876  # is dangerous -- casts can assign to temporaries, so the pointer doesn't
5877  # point where you think.
5878  #
5879  # Some non-identifier character is required before the '&' for the
5880  # expression to be recognized as a cast.  These are casts:
5881  #   expression = &static_cast<int*>(temporary());
5882  #   function(&(int*)(temporary()));
5883  #
5884  # This is not a cast:
5885  #   reference_type&(int* function_param);
5886  match = re.search(
5887      r'(?:[^\w]&\(([^)*][^)]*)\)[\w(])|'
5888      r'(?:[^\w]&(static|dynamic|down|reinterpret)_cast\b)', line)
5889  if match:
5890    # Try a better error message when the & is bound to something
5891    # dereferenced by the casted pointer, as opposed to the casted
5892    # pointer itself.
5893    parenthesis_error = False
5894    match = re.match(r'^(.*&(?:static|dynamic|down|reinterpret)_cast\b)<', line)
5895    if match:
5896      _, y1, x1 = CloseExpression(clean_lines, linenum, len(match.group(1)))
5897      if x1 >= 0 and clean_lines.elided[y1][x1] == '(':
5898        _, y2, x2 = CloseExpression(clean_lines, y1, x1)
5899        if x2 >= 0:
5900          extended_line = clean_lines.elided[y2][x2:]
5901          if y2 < clean_lines.NumLines() - 1:
5902            extended_line += clean_lines.elided[y2 + 1]
5903          if re.match(r'\s*(?:->|\[)', extended_line):
5904            parenthesis_error = True
5905
5906    if parenthesis_error:
5907      error(filename, linenum, 'readability/casting', 4,
5908            ('Are you taking an address of something dereferenced '
5909             'from a cast?  Wrapping the dereferenced expression in '
5910             'parentheses will make the binding more obvious'))
5911    else:
5912      error(filename, linenum, 'runtime/casting', 4,
5913            ('Are you taking an address of a cast?  '
5914             'This is dangerous: could be a temp var.  '
5915             'Take the address before doing the cast, rather than after'))
5916
5917
5918def CheckCStyleCast(filename, clean_lines, linenum, cast_type, pattern, error):
5919  """Checks for a C-style cast by looking for the pattern.
5920
5921  Args:
5922    filename: The name of the current file.
5923    clean_lines: A CleansedLines instance containing the file.
5924    linenum: The number of the line to check.
5925    cast_type: The string for the C++ cast to recommend.  This is either
5926      reinterpret_cast, static_cast, or const_cast, depending.
5927    pattern: The regular expression used to find C-style casts.
5928    error: The function to call with any errors found.
5929
5930  Returns:
5931    True if an error was emitted.
5932    False otherwise.
5933  """
5934  line = clean_lines.elided[linenum]
5935  match = re.search(pattern, line)
5936  if not match:
5937    return False
5938
5939  # Exclude lines with keywords that tend to look like casts
5940  context = line[0:match.start(1) - 1]
5941  if re.match(r'.*\b(?:sizeof|alignof|alignas|[_A-Z][_A-Z0-9]*)\s*$', context):
5942    return False
5943
5944  # Try expanding current context to see if we one level of
5945  # parentheses inside a macro.
5946  if linenum > 0:
5947    for i in range(linenum - 1, max(0, linenum - 5), -1):
5948      context = clean_lines.elided[i] + context
5949  if re.match(r'.*\b[_A-Z][_A-Z0-9]*\s*\((?:\([^()]*\)|[^()])*$', context):
5950    return False
5951
5952  # operator++(int) and operator--(int)
5953  if (context.endswith(' operator++') or context.endswith(' operator--') or
5954      context.endswith('::operator++') or context.endswith('::operator--')):
5955    return False
5956
5957  # A single unnamed argument for a function tends to look like old style cast.
5958  # If we see those, don't issue warnings for deprecated casts.
5959  remainder = line[match.end(0):]
5960  if re.match(r'^\s*(?:;|const\b|throw\b|final\b|override\b|[=>{),]|->)',
5961           remainder):
5962    return False
5963
5964  # At this point, all that should be left is actual casts.
5965  error(filename, linenum, 'readability/casting', 4,
5966        f'Using C-style cast.  Use {cast_type}<{match.group(1)}>(...) instead')
5967
5968  return True
5969
5970
5971def ExpectingFunctionArgs(clean_lines, linenum):
5972  """Checks whether where function type arguments are expected.
5973
5974  Args:
5975    clean_lines: A CleansedLines instance containing the file.
5976    linenum: The number of the line to check.
5977
5978  Returns:
5979    True if the line at 'linenum' is inside something that expects arguments
5980    of function types.
5981  """
5982  line = clean_lines.elided[linenum]
5983  return (re.match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
5984          (linenum >= 2 and
5985           (re.match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$',
5986                  clean_lines.elided[linenum - 1]) or
5987            re.match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$',
5988                  clean_lines.elided[linenum - 2]) or
5989            re.search(r'\bstd::m?function\s*\<\s*$',
5990                   clean_lines.elided[linenum - 1]))))
5991
5992
5993_HEADERS_CONTAINING_TEMPLATES = (
5994    ('<deque>', ('deque',)),
5995    ('<functional>', ('unary_function', 'binary_function',
5996                      'plus', 'minus', 'multiplies', 'divides', 'modulus',
5997                      'negate',
5998                      'equal_to', 'not_equal_to', 'greater', 'less',
5999                      'greater_equal', 'less_equal',
6000                      'logical_and', 'logical_or', 'logical_not',
6001                      'unary_negate', 'not1', 'binary_negate', 'not2',
6002                      'bind1st', 'bind2nd',
6003                      'pointer_to_unary_function',
6004                      'pointer_to_binary_function',
6005                      'ptr_fun',
6006                      'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
6007                      'mem_fun_ref_t',
6008                      'const_mem_fun_t', 'const_mem_fun1_t',
6009                      'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
6010                      'mem_fun_ref',
6011                     )),
6012    ('<limits>', ('numeric_limits',)),
6013    ('<list>', ('list',)),
6014    ('<map>', ('multimap',)),
6015    ('<memory>', ('allocator', 'make_shared', 'make_unique', 'shared_ptr',
6016                  'unique_ptr', 'weak_ptr')),
6017    ('<queue>', ('queue', 'priority_queue',)),
6018    ('<set>', ('set', 'multiset',)),
6019    ('<stack>', ('stack',)),
6020    ('<string>', ('char_traits', 'basic_string',)),
6021    ('<tuple>', ('tuple',)),
6022    ('<unordered_map>', ('unordered_map', 'unordered_multimap')),
6023    ('<unordered_set>', ('unordered_set', 'unordered_multiset')),
6024    ('<utility>', ('pair',)),
6025    ('<vector>', ('vector',)),
6026
6027    # gcc extensions.
6028    # Note: std::hash is their hash, ::hash is our hash
6029    ('<hash_map>', ('hash_map', 'hash_multimap',)),
6030    ('<hash_set>', ('hash_set', 'hash_multiset',)),
6031    ('<slist>', ('slist',)),
6032    )
6033
6034_HEADERS_MAYBE_TEMPLATES = (
6035    ('<algorithm>', ('copy', 'max', 'min', 'min_element', 'sort',
6036                     'transform',
6037                    )),
6038    ('<utility>', ('forward', 'make_pair', 'move', 'swap')),
6039    )
6040
6041# Non templated types or global objects
6042_HEADERS_TYPES_OR_OBJS = (
6043    # String and others are special -- it is a non-templatized type in STL.
6044    ('<string>', ('string',)),
6045    ('<iostream>', ('cin', 'cout', 'cerr', 'clog', 'wcin', 'wcout',
6046                    'wcerr', 'wclog')),
6047    ('<cstdio>', ('FILE', 'fpos_t')))
6048
6049# Non templated functions
6050_HEADERS_FUNCTIONS = (
6051    ('<cstdio>', ('fopen', 'freopen',
6052                  'fclose', 'fflush', 'setbuf', 'setvbuf', 'fread',
6053                  'fwrite', 'fgetc', 'getc', 'fgets', 'fputc', 'putc',
6054                  'fputs', 'getchar', 'gets', 'putchar', 'puts', 'ungetc',
6055                  'scanf', 'fscanf', 'sscanf', 'vscanf', 'vfscanf',
6056                  'vsscanf', 'printf', 'fprintf', 'sprintf', 'snprintf',
6057                  'vprintf', 'vfprintf', 'vsprintf', 'vsnprintf',
6058                  'ftell', 'fgetpos', 'fseek', 'fsetpos',
6059                  'clearerr', 'feof', 'ferror', 'perror',
6060                  'tmpfile', 'tmpnam'),),)
6061
6062_re_pattern_headers_maybe_templates = []
6063for _header, _templates in _HEADERS_MAYBE_TEMPLATES:
6064  for _template in _templates:
6065    # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
6066    # 'type::max()'.
6067    _re_pattern_headers_maybe_templates.append(
6068        (re.compile(r'((\bstd::)|[^>.:])\b' + _template + r'(<.*?>)?\([^\)]'),
6069            _template,
6070            _header))
6071
6072# Map is often overloaded. Only check, if it is fully qualified.
6073# Match 'std::map<type>(...)', but not 'map<type>(...)''
6074_re_pattern_headers_maybe_templates.append(
6075    (re.compile(r'(std\b::\bmap\s*\<)|(^(std\b::\b)map\b\(\s*\<)'),
6076        'map<>',
6077        '<map>'))
6078
6079# Other scripts may reach in and modify this pattern.
6080_re_pattern_templates = []
6081for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
6082  for _template in _templates:
6083    _re_pattern_templates.append(
6084        (re.compile(r'((^|(^|\s|((^|\W)::))std::)|[^>.:]\b)' + _template + r'\s*\<'),
6085         _template + '<>',
6086         _header))
6087
6088_re_pattern_types_or_objs = []
6089for _header, _types_or_objs in _HEADERS_TYPES_OR_OBJS:
6090  for _type_or_obj in _types_or_objs:
6091    _re_pattern_types_or_objs.append(
6092        (re.compile(r'\b' + _type_or_obj + r'\b'),
6093            _type_or_obj,
6094            _header))
6095
6096_re_pattern_functions = []
6097for _header, _functions in _HEADERS_FUNCTIONS:
6098  for _function in _functions:
6099    # Match printf(..., ...), but not foo->printf, foo.printf or
6100    # 'type::printf()'.
6101    _re_pattern_functions.append(
6102        (re.compile(r'([^>.]|^)\b' + _function + r'\([^\)]'),
6103            _function,
6104            _header))
6105
6106def FilesBelongToSameModule(filename_cc, filename_h):
6107  """Check if these two filenames belong to the same module.
6108
6109  The concept of a 'module' here is a as follows:
6110  foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
6111  same 'module' if they are in the same directory.
6112  some/path/public/xyzzy and some/path/internal/xyzzy are also considered
6113  to belong to the same module here.
6114
6115  If the filename_cc contains a longer path than the filename_h, for example,
6116  '/absolute/path/to/base/sysinfo.cc', and this file would include
6117  'base/sysinfo.h', this function also produces the prefix needed to open the
6118  header. This is used by the caller of this function to more robustly open the
6119  header file. We don't have access to the real include paths in this context,
6120  so we need this guesswork here.
6121
6122  Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
6123  according to this implementation. Because of this, this function gives
6124  some false positives. This should be sufficiently rare in practice.
6125
6126  Args:
6127    filename_cc: is the path for the source (e.g. .cc) file
6128    filename_h: is the path for the header path
6129
6130  Returns:
6131    Tuple with a bool and a string:
6132    bool: True if filename_cc and filename_h belong to the same module.
6133    string: the additional prefix needed to open the header file.
6134  """
6135  fileinfo_cc = FileInfo(filename_cc)
6136  if fileinfo_cc.Extension().lstrip('.') not in GetNonHeaderExtensions():
6137    return (False, '')
6138
6139  fileinfo_h = FileInfo(filename_h)
6140  if not IsHeaderExtension(fileinfo_h.Extension().lstrip('.')):
6141    return (False, '')
6142
6143  filename_cc = filename_cc[:-(len(fileinfo_cc.Extension()))]
6144  matched_test_suffix = re.search(_TEST_FILE_SUFFIX, fileinfo_cc.BaseName())
6145  if matched_test_suffix:
6146    filename_cc = filename_cc[:-len(matched_test_suffix.group(1))]
6147
6148  filename_cc = filename_cc.replace('/public/', '/')
6149  filename_cc = filename_cc.replace('/internal/', '/')
6150
6151  filename_h = filename_h[:-(len(fileinfo_h.Extension()))]
6152  if filename_h.endswith('-inl'):
6153    filename_h = filename_h[:-len('-inl')]
6154  filename_h = filename_h.replace('/public/', '/')
6155  filename_h = filename_h.replace('/internal/', '/')
6156
6157  files_belong_to_same_module = filename_cc.endswith(filename_h)
6158  common_path = ''
6159  if files_belong_to_same_module:
6160    common_path = filename_cc[:-len(filename_h)]
6161  return files_belong_to_same_module, common_path
6162
6163
6164def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
6165                              io=codecs):
6166  """Reports for missing stl includes.
6167
6168  This function will output warnings to make sure you are including the headers
6169  necessary for the stl containers and functions that you use. We only give one
6170  reason to include a header. For example, if you use both equal_to<> and
6171  less<> in a .h file, only one (the latter in the file) of these will be
6172  reported as a reason to include the <functional>.
6173
6174  Args:
6175    filename: The name of the current file.
6176    clean_lines: A CleansedLines instance containing the file.
6177    include_state: An _IncludeState instance.
6178    error: The function to call with any errors found.
6179    io: The IO factory to use to read the header file. Provided for unittest
6180        injection.
6181  """
6182  required = {}  # A map of header name to linenumber and the template entity.
6183                 # Example of required: { '<functional>': (1219, 'less<>') }
6184
6185  for linenum in range(clean_lines.NumLines()):
6186    line = clean_lines.elided[linenum]
6187    if not line or line[0] == '#':
6188      continue
6189
6190    _re_patterns = []
6191    _re_patterns.extend(_re_pattern_types_or_objs)
6192    _re_patterns.extend(_re_pattern_functions)
6193    for pattern, item, header in _re_patterns:
6194      matched = pattern.search(line)
6195      if matched:
6196        # Don't warn about strings in non-STL namespaces:
6197        # (We check only the first match per line; good enough.)
6198        prefix = line[:matched.start()]
6199        if prefix.endswith('std::') or not prefix.endswith('::'):
6200          required[header] = (linenum, item)
6201
6202    for pattern, template, header in _re_pattern_headers_maybe_templates:
6203      if pattern.search(line):
6204        required[header] = (linenum, template)
6205
6206    # The following function is just a speed up, no semantics are changed.
6207    if '<' not in line:  # Reduces the cpu time usage by skipping lines.
6208      continue
6209
6210    for pattern, template, header in _re_pattern_templates:
6211      matched = pattern.search(line)
6212      if matched:
6213        # Don't warn about IWYU in non-STL namespaces:
6214        # (We check only the first match per line; good enough.)
6215        prefix = line[:matched.start()]
6216        if prefix.endswith('std::') or not prefix.endswith('::'):
6217          required[header] = (linenum, template)
6218
6219  # Let's flatten the include_state include_list and copy it into a dictionary.
6220  include_dict = dict([item for sublist in include_state.include_list
6221                       for item in sublist])
6222
6223  # All the lines have been processed, report the errors found.
6224  for required_header_unstripped in sorted(required, key=required.__getitem__):
6225    template = required[required_header_unstripped][1]
6226    if required_header_unstripped.strip('<>"') not in include_dict:
6227      error(filename, required[required_header_unstripped][0],
6228            'build/include_what_you_use', 4,
6229            'Add #include ' + required_header_unstripped + ' for ' + template)
6230
6231
6232_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
6233
6234
6235def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
6236  """Check that make_pair's template arguments are deduced.
6237
6238  G++ 4.6 in C++11 mode fails badly if make_pair's template arguments are
6239  specified explicitly, and such use isn't intended in any case.
6240
6241  Args:
6242    filename: The name of the current file.
6243    clean_lines: A CleansedLines instance containing the file.
6244    linenum: The number of the line to check.
6245    error: The function to call with any errors found.
6246  """
6247  line = clean_lines.elided[linenum]
6248  match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line)
6249  if match:
6250    error(filename, linenum, 'build/explicit_make_pair',
6251          4,  # 4 = high confidence
6252          'For C++11-compatibility, omit template arguments from make_pair'
6253          ' OR use pair directly OR if appropriate, construct a pair directly')
6254
6255
6256def CheckRedundantVirtual(filename, clean_lines, linenum, error):
6257  """Check if line contains a redundant "virtual" function-specifier.
6258
6259  Args:
6260    filename: The name of the current file.
6261    clean_lines: A CleansedLines instance containing the file.
6262    linenum: The number of the line to check.
6263    error: The function to call with any errors found.
6264  """
6265  # Look for "virtual" on current line.
6266  line = clean_lines.elided[linenum]
6267  virtual = re.match(r'^(.*)(\bvirtual\b)(.*)$', line)
6268  if not virtual: return
6269
6270  # Ignore "virtual" keywords that are near access-specifiers.  These
6271  # are only used in class base-specifier and do not apply to member
6272  # functions.
6273  if (re.search(r'\b(public|protected|private)\s+$', virtual.group(1)) or
6274      re.match(r'^\s+(public|protected|private)\b', virtual.group(3))):
6275    return
6276
6277  # Ignore the "virtual" keyword from virtual base classes.  Usually
6278  # there is a column on the same line in these cases (virtual base
6279  # classes are rare in google3 because multiple inheritance is rare).
6280  if re.match(r'^.*[^:]:[^:].*$', line): return
6281
6282  # Look for the next opening parenthesis.  This is the start of the
6283  # parameter list (possibly on the next line shortly after virtual).
6284  # TODO(unknown): doesn't work if there are virtual functions with
6285  # decltype() or other things that use parentheses, but csearch suggests
6286  # that this is rare.
6287  end_col = -1
6288  end_line = -1
6289  start_col = len(virtual.group(2))
6290  for start_line in range(linenum, min(linenum + 3, clean_lines.NumLines())):
6291    line = clean_lines.elided[start_line][start_col:]
6292    parameter_list = re.match(r'^([^(]*)\(', line)
6293    if parameter_list:
6294      # Match parentheses to find the end of the parameter list
6295      (_, end_line, end_col) = CloseExpression(
6296          clean_lines, start_line, start_col + len(parameter_list.group(1)))
6297      break
6298    start_col = 0
6299
6300  if end_col < 0:
6301    return  # Couldn't find end of parameter list, give up
6302
6303  # Look for "override" or "final" after the parameter list
6304  # (possibly on the next few lines).
6305  for i in range(end_line, min(end_line + 3, clean_lines.NumLines())):
6306    line = clean_lines.elided[i][end_col:]
6307    match = re.search(r'\b(override|final)\b', line)
6308    if match:
6309      error(filename, linenum, 'readability/inheritance', 4,
6310            ('"virtual" is redundant since function is '
6311            f'already declared as "{match.group(1)}"'))
6312
6313    # Set end_col to check whole lines after we are done with the
6314    # first line.
6315    end_col = 0
6316    if re.search(r'[^\w]\s*$', line):
6317      break
6318
6319
6320def CheckRedundantOverrideOrFinal(filename, clean_lines, linenum, error):
6321  """Check if line contains a redundant "override" or "final" virt-specifier.
6322
6323  Args:
6324    filename: The name of the current file.
6325    clean_lines: A CleansedLines instance containing the file.
6326    linenum: The number of the line to check.
6327    error: The function to call with any errors found.
6328  """
6329  # Look for closing parenthesis nearby.  We need one to confirm where
6330  # the declarator ends and where the virt-specifier starts to avoid
6331  # false positives.
6332  line = clean_lines.elided[linenum]
6333  declarator_end = line.rfind(')')
6334  if declarator_end >= 0:
6335    fragment = line[declarator_end:]
6336  else:
6337    if linenum > 1 and clean_lines.elided[linenum - 1].rfind(')') >= 0:
6338      fragment = line
6339    else:
6340      return
6341
6342  # Check that at most one of "override" or "final" is present, not both
6343  if re.search(r'\boverride\b', fragment) and re.search(r'\bfinal\b', fragment):
6344    error(filename, linenum, 'readability/inheritance', 4,
6345          ('"override" is redundant since function is '
6346           'already declared as "final"'))
6347
6348
6349
6350
6351# Returns true if we are at a new block, and it is directly
6352# inside of a namespace.
6353def IsBlockInNameSpace(nesting_state, is_forward_declaration):
6354  """Checks that the new block is directly in a namespace.
6355
6356  Args:
6357    nesting_state: The _NestingState object that contains info about our state.
6358    is_forward_declaration: If the class is a forward declared class.
6359  Returns:
6360    Whether or not the new block is directly in a namespace.
6361  """
6362  if is_forward_declaration:
6363    return len(nesting_state.stack) >= 1 and (
6364      isinstance(nesting_state.stack[-1], _NamespaceInfo))
6365
6366  if len(nesting_state.stack) >= 1:
6367    if isinstance(nesting_state.stack[-1], _NamespaceInfo):
6368      return True
6369    elif (len(nesting_state.stack) > 1 and
6370          isinstance(nesting_state.previous_stack_top, _NamespaceInfo) and
6371          isinstance(nesting_state.stack[-2], _NamespaceInfo)):
6372      return True
6373  return False
6374
6375
6376def ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item,
6377                                    raw_lines_no_comments, linenum):
6378  """This method determines if we should apply our namespace indentation check.
6379
6380  Args:
6381    nesting_state: The current nesting state.
6382    is_namespace_indent_item: If we just put a new class on the stack, True.
6383      If the top of the stack is not a class, or we did not recently
6384      add the class, False.
6385    raw_lines_no_comments: The lines without the comments.
6386    linenum: The current line number we are processing.
6387
6388  Returns:
6389    True if we should apply our namespace indentation check. Currently, it
6390    only works for classes and namespaces inside of a namespace.
6391  """
6392
6393  is_forward_declaration = IsForwardClassDeclaration(raw_lines_no_comments,
6394                                                     linenum)
6395
6396  if not (is_namespace_indent_item or is_forward_declaration):
6397    return False
6398
6399  # If we are in a macro, we do not want to check the namespace indentation.
6400  if IsMacroDefinition(raw_lines_no_comments, linenum):
6401    return False
6402
6403  return IsBlockInNameSpace(nesting_state, is_forward_declaration)
6404
6405
6406# Call this method if the line is directly inside of a namespace.
6407# If the line above is blank (excluding comments) or the start of
6408# an inner namespace, it cannot be indented.
6409def CheckItemIndentationInNamespace(filename, raw_lines_no_comments, linenum,
6410                                    error):
6411  line = raw_lines_no_comments[linenum]
6412  if re.match(r'^\s+', line):
6413    error(filename, linenum, 'whitespace/indent_namespace', 4,
6414          'Do not indent within a namespace.')
6415
6416
6417def ProcessLine(filename, file_extension, clean_lines, line,
6418                include_state, function_state, nesting_state, error,
6419                extra_check_functions=None, cppvar=None):
6420  """Processes a single line in the file.
6421
6422  Args:
6423    filename: Filename of the file that is being processed.
6424    file_extension: The extension (dot not included) of the file.
6425    clean_lines: An array of strings, each representing a line of the file,
6426                 with comments stripped.
6427    line: Number of line being processed.
6428    include_state: An _IncludeState instance in which the headers are inserted.
6429    function_state: A _FunctionState instance which counts function lines, etc.
6430    nesting_state: A NestingState instance which maintains information about
6431                   the current stack of nested blocks being parsed.
6432    error: A callable to which errors are reported, which takes 4 arguments:
6433           filename, line number, error level, and message
6434    extra_check_functions: An array of additional check functions that will be
6435                           run on each source line. Each function takes 4
6436                           arguments: filename, clean_lines, line, error
6437    cppvar: The header guard variable returned by GetHeaderGuardCPPVar.
6438  """
6439  raw_lines = clean_lines.raw_lines
6440  ParseNolintSuppressions(filename, raw_lines[line], line, error)
6441  nesting_state.Update(filename, clean_lines, line, error)
6442  CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line,
6443                               error)
6444  if nesting_state.InAsmBlock(): return
6445  CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
6446  CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
6447  CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error, cppvar)
6448  CheckLanguage(filename, clean_lines, line, file_extension, include_state,
6449                nesting_state, error)
6450  CheckForNonConstReference(filename, clean_lines, line, nesting_state, error)
6451  CheckForNonStandardConstructs(filename, clean_lines, line,
6452                                nesting_state, error)
6453  CheckVlogArguments(filename, clean_lines, line, error)
6454  CheckPosixThreading(filename, clean_lines, line, error)
6455  CheckInvalidIncrement(filename, clean_lines, line, error)
6456  CheckMakePairUsesDeduction(filename, clean_lines, line, error)
6457  CheckRedundantVirtual(filename, clean_lines, line, error)
6458  CheckRedundantOverrideOrFinal(filename, clean_lines, line, error)
6459  if extra_check_functions:
6460    for check_fn in extra_check_functions:
6461      check_fn(filename, clean_lines, line, error)
6462
6463
6464def FlagCxxHeaders(filename, clean_lines, linenum, error):
6465  """Flag C++ headers that the styleguide restricts.
6466
6467  Args:
6468    filename: The name of the current file.
6469    clean_lines: A CleansedLines instance containing the file.
6470    linenum: The number of the line to check.
6471    error: The function to call with any errors found.
6472  """
6473  line = clean_lines.elided[linenum]
6474
6475  include = re.match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line)
6476
6477  # Flag unapproved C++11 headers.
6478  if include and include.group(1) in ('cfenv',
6479                                      'fenv.h',
6480                                      'ratio',
6481                                     ):
6482    error(filename, linenum, 'build/c++11', 5,
6483          f"<{include.group(1)}> is an unapproved C++11 header.")
6484
6485  # filesystem is the only unapproved C++17 header
6486  if include and include.group(1) == 'filesystem':
6487    error(filename, linenum, 'build/c++17', 5,
6488          "<filesystem> is an unapproved C++17 header.")
6489
6490
6491def ProcessFileData(filename, file_extension, lines, error,
6492                    extra_check_functions=None):
6493  """Performs lint checks and reports any errors to the given error function.
6494
6495  Args:
6496    filename: Filename of the file that is being processed.
6497    file_extension: The extension (dot not included) of the file.
6498    lines: An array of strings, each representing a line of the file, with the
6499           last element being empty if the file is terminated with a newline.
6500    error: A callable to which errors are reported, which takes 4 arguments:
6501           filename, line number, error level, and message
6502    extra_check_functions: An array of additional check functions that will be
6503                           run on each source line. Each function takes 4
6504                           arguments: filename, clean_lines, line, error
6505  """
6506  lines = (['// marker so line numbers and indices both start at 1'] + lines +
6507           ['// marker so line numbers end in a known way'])
6508
6509  include_state = _IncludeState()
6510  function_state = _FunctionState()
6511  nesting_state = NestingState()
6512
6513  ResetNolintSuppressions()
6514
6515  CheckForCopyright(filename, lines, error)
6516  ProcessGlobalSuppressions(lines)
6517  RemoveMultiLineComments(filename, lines, error)
6518  clean_lines = CleansedLines(lines)
6519
6520  cppvar = None
6521  if IsHeaderExtension(file_extension):
6522    cppvar = GetHeaderGuardCPPVariable(filename)
6523    CheckForHeaderGuard(filename, clean_lines, error, cppvar)
6524
6525  for line in range(clean_lines.NumLines()):
6526    ProcessLine(filename, file_extension, clean_lines, line,
6527                include_state, function_state, nesting_state, error,
6528                extra_check_functions, cppvar)
6529    FlagCxxHeaders(filename, clean_lines, line, error)
6530  if _error_suppressions.HasOpenBlock():
6531    error(filename, _error_suppressions.GetOpenBlockStart(), 'readability/nolint', 5,
6532          'NONLINT block never ended')
6533
6534  CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
6535
6536  # Check that the .cc file has included its header if it exists.
6537  if _IsSourceExtension(file_extension):
6538    CheckHeaderFileIncluded(filename, include_state, error)
6539
6540  # We check here rather than inside ProcessLine so that we see raw
6541  # lines rather than "cleaned" lines.
6542  CheckForBadCharacters(filename, lines, error)
6543
6544  CheckForNewlineAtEOF(filename, lines, error)
6545
6546def ProcessConfigOverrides(filename):
6547  """ Loads the configuration files and processes the config overrides.
6548
6549  Args:
6550    filename: The name of the file being processed by the linter.
6551
6552  Returns:
6553    False if the current |filename| should not be processed further.
6554  """
6555
6556  abs_filename = os.path.abspath(filename)
6557  cfg_filters = []
6558  keep_looking = True
6559  while keep_looking:
6560    abs_path, base_name = os.path.split(abs_filename)
6561    if not base_name:
6562      break  # Reached the root directory.
6563
6564    cfg_file = os.path.join(abs_path, _config_filename)
6565    abs_filename = abs_path
6566    if not os.path.isfile(cfg_file):
6567      continue
6568
6569    try:
6570      with codecs.open(cfg_file, 'r', 'utf8', 'replace') as file_handle:
6571        for line in file_handle:
6572          line, _, _ = line.partition('#')  # Remove comments.
6573          if not line.strip():
6574            continue
6575
6576          name, _, val = line.partition('=')
6577          name = name.strip()
6578          val = val.strip()
6579          if name == 'set noparent':
6580            keep_looking = False
6581          elif name == 'filter':
6582            cfg_filters.append(val)
6583          elif name == 'exclude_files':
6584            # When matching exclude_files pattern, use the base_name of
6585            # the current file name or the directory name we are processing.
6586            # For example, if we are checking for lint errors in /foo/bar/baz.cc
6587            # and we found the .cfg file at /foo/CPPLINT.cfg, then the config
6588            # file's "exclude_files" filter is meant to be checked against "bar"
6589            # and not "baz" nor "bar/baz.cc".
6590            if base_name:
6591              pattern = re.compile(val)
6592              if pattern.match(base_name):
6593                if _cpplint_state.quiet:
6594                  # Suppress "Ignoring file" warning when using --quiet.
6595                  return False
6596                _cpplint_state.PrintInfo(f'Ignoring "{filename}": file excluded by "{cfg_file}". '
6597                                 'File path component "%s" matches '
6598                                 'pattern "%s"\n' %
6599                                 (base_name, val))
6600                return False
6601          elif name == 'linelength':
6602            global _line_length
6603            try:
6604              _line_length = int(val)
6605            except ValueError:
6606              _cpplint_state.PrintError('Line length must be numeric.')
6607          elif name == 'extensions':
6608            ProcessExtensionsOption(val)
6609          elif name == 'root':
6610            global _root
6611            # root directories are specified relative to CPPLINT.cfg dir.
6612            _root = os.path.join(os.path.dirname(cfg_file), val)
6613          elif name == 'headers':
6614            ProcessHppHeadersOption(val)
6615          elif name == 'includeorder':
6616            ProcessIncludeOrderOption(val)
6617          else:
6618            _cpplint_state.PrintError(
6619                f'Invalid configuration option ({name}) in file {cfg_file}\n')
6620
6621    except IOError:
6622      _cpplint_state.PrintError(
6623          f"Skipping config file '{cfg_file}': Can't open for reading\n")
6624      keep_looking = False
6625
6626  # Apply all the accumulated filters in reverse order (top-level directory
6627  # config options having the least priority).
6628  for cfg_filter in reversed(cfg_filters):
6629    _AddFilters(cfg_filter)
6630
6631  return True
6632
6633
6634def ProcessFile(filename, vlevel, extra_check_functions=None):
6635  """Does google-lint on a single file.
6636
6637  Args:
6638    filename: The name of the file to parse.
6639
6640    vlevel: The level of errors to report.  Every error of confidence
6641    >= verbose_level will be reported.  0 is a good default.
6642
6643    extra_check_functions: An array of additional check functions that will be
6644                           run on each source line. Each function takes 4
6645                           arguments: filename, clean_lines, line, error
6646  """
6647
6648  _SetVerboseLevel(vlevel)
6649  _BackupFilters()
6650  old_errors = _cpplint_state.error_count
6651
6652  if not ProcessConfigOverrides(filename):
6653    _RestoreFilters()
6654    return
6655
6656  lf_lines = []
6657  crlf_lines = []
6658  try:
6659    # Support the UNIX convention of using "-" for stdin.  Note that
6660    # we are not opening the file with universal newline support
6661    # (which codecs doesn't support anyway), so the resulting lines do
6662    # contain trailing '\r' characters if we are reading a file that
6663    # has CRLF endings.
6664    # If after the split a trailing '\r' is present, it is removed
6665    # below.
6666    if filename == '-':
6667      lines = sys.stdin.read().split('\n')
6668    else:
6669      with codecs.open(filename, 'r', 'utf8', 'replace') as target_file:
6670        lines = target_file.read().split('\n')
6671
6672    # Remove trailing '\r'.
6673    # The -1 accounts for the extra trailing blank line we get from split()
6674    for linenum in range(len(lines) - 1):
6675      if lines[linenum].endswith('\r'):
6676        lines[linenum] = lines[linenum].rstrip('\r')
6677        crlf_lines.append(linenum + 1)
6678      else:
6679        lf_lines.append(linenum + 1)
6680
6681  except IOError:
6682    # TODO: Maybe make this have an exit code of 2 after all is done
6683    _cpplint_state.PrintError(
6684        f"Skipping input '{filename}': Can't open for reading\n")
6685    _RestoreFilters()
6686    return
6687
6688  # Note, if no dot is found, this will give the entire filename as the ext.
6689  file_extension = filename[filename.rfind('.') + 1:]
6690
6691  # When reading from stdin, the extension is unknown, so no cpplint tests
6692  # should rely on the extension.
6693  if filename != '-' and file_extension not in GetAllExtensions():
6694    _cpplint_state.PrintError(f'Ignoring {filename}; not a valid file name'
6695                              f' ({(", ".join(GetAllExtensions()))})\n')
6696  else:
6697    ProcessFileData(filename, file_extension, lines, Error,
6698                    extra_check_functions)
6699
6700    # If end-of-line sequences are a mix of LF and CR-LF, issue
6701    # warnings on the lines with CR.
6702    #
6703    # Don't issue any warnings if all lines are uniformly LF or CR-LF,
6704    # since critique can handle these just fine, and the style guide
6705    # doesn't dictate a particular end of line sequence.
6706    #
6707    # We can't depend on os.linesep to determine what the desired
6708    # end-of-line sequence should be, since that will return the
6709    # server-side end-of-line sequence.
6710    if lf_lines and crlf_lines:
6711      # Warn on every line with CR.  An alternative approach might be to
6712      # check whether the file is mostly CRLF or just LF, and warn on the
6713      # minority, we bias toward LF here since most tools prefer LF.
6714      for linenum in crlf_lines:
6715        Error(filename, linenum, 'whitespace/newline', 1,
6716              'Unexpected \\r (^M) found; better to use only \\n')
6717
6718  # Suppress printing anything if --quiet was passed unless the error
6719  # count has increased after processing this file.
6720  if not _cpplint_state.quiet or old_errors != _cpplint_state.error_count:
6721    _cpplint_state.PrintInfo(f'Done processing {filename}\n')
6722  _RestoreFilters()
6723
6724
6725def PrintUsage(message):
6726  """Prints a brief usage string and exits, optionally with an error message.
6727
6728  Args:
6729    message: The optional error message.
6730  """
6731  sys.stderr.write(_USAGE  % (sorted(list(GetAllExtensions())),
6732       ','.join(sorted(list(GetAllExtensions()))),
6733       sorted(GetHeaderExtensions()),
6734       ','.join(sorted(GetHeaderExtensions()))))
6735
6736  if message:
6737    sys.exit('\nFATAL ERROR: ' + message)
6738  else:
6739    sys.exit(0)
6740
6741def PrintVersion():
6742  sys.stdout.write('Cpplint fork (https://github.com/cpplint/cpplint)\n')
6743  sys.stdout.write('cpplint ' + __VERSION__ + '\n')
6744  sys.stdout.write('Python ' + sys.version + '\n')
6745  sys.exit(0)
6746
6747def PrintCategories():
6748  """Prints a list of all the error-categories used by error messages.
6749
6750  These are the categories used to filter messages via --filter.
6751  """
6752  sys.stderr.write(''.join(f'  {cat}\n' for cat in _ERROR_CATEGORIES))
6753  sys.exit(0)
6754
6755
6756def ParseArguments(args):
6757  """Parses the command line arguments.
6758
6759  This may set the output format and verbosity level as side-effects.
6760
6761  Args:
6762    args: The command line arguments:
6763
6764  Returns:
6765    The list of filenames to lint.
6766  """
6767  try:
6768    (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
6769                                                 'v=',
6770                                                 'version',
6771                                                 'counting=',
6772                                                 'filter=',
6773                                                 'root=',
6774                                                 'repository=',
6775                                                 'linelength=',
6776                                                 'extensions=',
6777                                                 'exclude=',
6778                                                 'recursive',
6779                                                 'headers=',
6780                                                 'includeorder=',
6781                                                 'config=',
6782                                                 'quiet'])
6783  except getopt.GetoptError:
6784    PrintUsage('Invalid arguments.')
6785
6786  verbosity = _VerboseLevel()
6787  output_format = _OutputFormat()
6788  filters = ''
6789  quiet = _Quiet()
6790  counting_style = ''
6791  recursive = False
6792
6793  for (opt, val) in opts:
6794    if opt == '--help':
6795      PrintUsage(None)
6796    if opt == '--version':
6797      PrintVersion()
6798    elif opt == '--output':
6799      if val not in ('emacs', 'vs7', 'eclipse', 'junit', 'sed', 'gsed'):
6800        PrintUsage('The only allowed output formats are emacs, vs7, eclipse '
6801                   'sed, gsed and junit.')
6802      output_format = val
6803    elif opt == '--quiet':
6804      quiet = True
6805    elif opt == '--verbose' or opt == '--v':
6806      verbosity = int(val)
6807    elif opt == '--filter':
6808      filters = val
6809      if not filters:
6810        PrintCategories()
6811    elif opt == '--counting':
6812      if val not in ('total', 'toplevel', 'detailed'):
6813        PrintUsage('Valid counting options are total, toplevel, and detailed')
6814      counting_style = val
6815    elif opt == '--root':
6816      global _root
6817      _root = val
6818    elif opt == '--repository':
6819      global _repository
6820      _repository = val
6821    elif opt == '--linelength':
6822      global _line_length
6823      try:
6824        _line_length = int(val)
6825      except ValueError:
6826        PrintUsage('Line length must be digits.')
6827    elif opt == '--exclude':
6828      global _excludes
6829      if not _excludes:
6830        _excludes = set()
6831      _excludes.update(glob.glob(val))
6832    elif opt == '--extensions':
6833      ProcessExtensionsOption(val)
6834    elif opt == '--headers':
6835      ProcessHppHeadersOption(val)
6836    elif opt == '--recursive':
6837      recursive = True
6838    elif opt == '--includeorder':
6839      ProcessIncludeOrderOption(val)
6840    elif opt == '--config':
6841      global _config_filename
6842      _config_filename = val
6843      if os.path.basename(_config_filename) != _config_filename:
6844        PrintUsage('Config file name must not include directory components.')
6845
6846  if not filenames:
6847    PrintUsage('No files were specified.')
6848
6849  if recursive:
6850    filenames = _ExpandDirectories(filenames)
6851
6852  if _excludes:
6853    filenames = _FilterExcludedFiles(filenames)
6854
6855  _SetOutputFormat(output_format)
6856  _SetQuiet(quiet)
6857  _SetVerboseLevel(verbosity)
6858  _SetFilters(filters)
6859  _SetCountingStyle(counting_style)
6860
6861  filenames.sort()
6862  return filenames
6863
6864def _ParseFilterSelector(parameter):
6865  """Parses the given command line parameter for file- and line-specific
6866  exclusions.
6867  readability/casting:file.cpp
6868  readability/casting:file.cpp:43
6869
6870  Args:
6871    parameter: The parameter value of --filter
6872
6873  Returns:
6874    [category, filename, line].
6875    Category is always given.
6876    Filename is either a filename or empty if all files are meant.
6877    Line is either a line in filename or -1 if all lines are meant.
6878  """
6879  colon_pos = parameter.find(":")
6880  if colon_pos == -1:
6881    return parameter, "", -1
6882  category = parameter[:colon_pos]
6883  second_colon_pos = parameter.find(":", colon_pos + 1)
6884  if second_colon_pos == -1:
6885    return category, parameter[colon_pos + 1:], -1
6886  else:
6887    return category, parameter[colon_pos + 1: second_colon_pos], \
6888      int(parameter[second_colon_pos + 1:])
6889
6890def _ExpandDirectories(filenames):
6891  """Searches a list of filenames and replaces directories in the list with
6892  all files descending from those directories. Files with extensions not in
6893  the valid extensions list are excluded.
6894
6895  Args:
6896    filenames: A list of files or directories
6897
6898  Returns:
6899    A list of all files that are members of filenames or descended from a
6900    directory in filenames
6901  """
6902  expanded = set()
6903  for filename in filenames:
6904    if not os.path.isdir(filename):
6905      expanded.add(filename)
6906      continue
6907
6908    for root, _, files in os.walk(filename):
6909      for loopfile in files:
6910        fullname = os.path.join(root, loopfile)
6911        if fullname.startswith('.' + os.path.sep):
6912          fullname = fullname[len('.' + os.path.sep):]
6913        expanded.add(fullname)
6914
6915  filtered = []
6916  for filename in expanded:
6917    if os.path.splitext(filename)[1][1:] in GetAllExtensions():
6918      filtered.append(filename)
6919  return filtered
6920
6921def _FilterExcludedFiles(fnames):
6922  """Filters out files listed in the --exclude command line switch. File paths
6923  in the switch are evaluated relative to the current working directory
6924  """
6925  exclude_paths = [os.path.abspath(f) for f in _excludes]
6926  # because globbing does not work recursively, exclude all subpath of all excluded entries
6927  return [f for f in fnames
6928          if not any(e for e in exclude_paths
6929                  if _IsParentOrSame(e, os.path.abspath(f)))]
6930
6931def _IsParentOrSame(parent, child):
6932  """Return true if child is subdirectory of parent.
6933  Assumes both paths are absolute and don't contain symlinks.
6934  """
6935  parent = os.path.normpath(parent)
6936  child = os.path.normpath(child)
6937  if parent == child:
6938    return True
6939
6940  prefix = os.path.commonprefix([parent, child])
6941  if prefix != parent:
6942    return False
6943  # Note: os.path.commonprefix operates on character basis, so
6944  # take extra care of situations like '/foo/ba' and '/foo/bar/baz'
6945  child_suffix = child[len(prefix):]
6946  child_suffix = child_suffix.lstrip(os.sep)
6947  return child == os.path.join(prefix, child_suffix)
6948
6949def main():
6950  filenames = ParseArguments(sys.argv[1:])
6951  backup_err = sys.stderr
6952  try:
6953    # Change stderr to write with replacement characters so we don't die
6954    # if we try to print something containing non-ASCII characters.
6955    sys.stderr = codecs.StreamReader(sys.stderr, 'replace')
6956
6957    _cpplint_state.ResetErrorCounts()
6958    for filename in filenames:
6959      ProcessFile(filename, _cpplint_state.verbose_level)
6960    # If --quiet is passed, suppress printing error count unless there are errors.
6961    if not _cpplint_state.quiet or _cpplint_state.error_count > 0:
6962      _cpplint_state.PrintErrorCounts()
6963
6964    if _cpplint_state.output_format == 'junit':
6965      sys.stderr.write(_cpplint_state.FormatJUnitXML())
6966
6967  finally:
6968    sys.stderr = backup_err
6969
6970  sys.exit(_cpplint_state.error_count > 0)
6971
6972
6973if __name__ == '__main__':
6974  main()
6975