• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3# Copyright The Mbed TLS Contributors
4# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
5
6"""
7This script confirms that the naming of all symbols and identifiers in Mbed TLS
8are consistent with the house style and are also self-consistent. It only runs
9on Linux and macOS since it depends on nm.
10
11It contains two major Python classes, CodeParser and NameChecker. They both have
12a comprehensive "run-all" function (comprehensive_parse() and perform_checks())
13but the individual functions can also be used for specific needs.
14
15CodeParser makes heavy use of regular expressions to parse the code, and is
16dependent on the current code formatting. Many Python C parser libraries require
17preprocessed C code, which means no macro parsing. Compiler tools are also not
18very helpful when we want the exact location in the original source (which
19becomes impossible when e.g. comments are stripped).
20
21NameChecker performs the following checks:
22
23- All exported and available symbols in the library object files, are explicitly
24  declared in the header files. This uses the nm command.
25- All macros, constants, and identifiers (function names, struct names, etc)
26  follow the required regex pattern.
27- Typo checking: All words that begin with MBED|PSA exist as macros or constants.
28
29The script returns 0 on success, 1 on test failure, and 2 if there is a script
30error. It must be run from Mbed TLS root.
31"""
32
33import abc
34import argparse
35import fnmatch
36import glob
37import textwrap
38import os
39import sys
40import traceback
41import re
42import enum
43import shutil
44import subprocess
45import logging
46
47import scripts_path # pylint: disable=unused-import
48from mbedtls_dev import build_tree
49
50
51# Naming patterns to check against. These are defined outside the NameCheck
52# class for ease of modification.
53MACRO_PATTERN = r"^(MBEDTLS|PSA)_[0-9A-Z_]*[0-9A-Z]$"
54CONSTANTS_PATTERN = MACRO_PATTERN
55IDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$"
56
57class Match(): # pylint: disable=too-few-public-methods
58    """
59    A class representing a match, together with its found position.
60
61    Fields:
62    * filename: the file that the match was in.
63    * line: the full line containing the match.
64    * line_no: the line number.
65    * pos: a tuple of (start, end) positions on the line where the match is.
66    * name: the match itself.
67    """
68    def __init__(self, filename, line, line_no, pos, name):
69        # pylint: disable=too-many-arguments
70        self.filename = filename
71        self.line = line
72        self.line_no = line_no
73        self.pos = pos
74        self.name = name
75
76    def __str__(self):
77        """
78        Return a formatted code listing representation of the erroneous line.
79        """
80        gutter = format(self.line_no, "4d")
81        underline = self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^"
82
83        return (
84            " {0} |\n".format(" " * len(gutter)) +
85            " {0} | {1}".format(gutter, self.line) +
86            " {0} | {1}\n".format(" " * len(gutter), underline)
87        )
88
89class Problem(abc.ABC): # pylint: disable=too-few-public-methods
90    """
91    An abstract parent class representing a form of static analysis error.
92    It extends an Abstract Base Class, which means it is not instantiable, and
93    it also mandates certain abstract methods to be implemented in subclasses.
94    """
95    # Class variable to control the quietness of all problems
96    quiet = False
97    def __init__(self):
98        self.textwrapper = textwrap.TextWrapper()
99        self.textwrapper.width = 80
100        self.textwrapper.initial_indent = "    > "
101        self.textwrapper.subsequent_indent = "      "
102
103    def __str__(self):
104        """
105        Unified string representation method for all Problems.
106        """
107        if self.__class__.quiet:
108            return self.quiet_output()
109        return self.verbose_output()
110
111    @abc.abstractmethod
112    def quiet_output(self):
113        """
114        The output when --quiet is enabled.
115        """
116        pass
117
118    @abc.abstractmethod
119    def verbose_output(self):
120        """
121        The default output with explanation and code snippet if appropriate.
122        """
123        pass
124
125class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods
126    """
127    A problem that occurs when an exported/available symbol in the object file
128    is not explicitly declared in header files. Created with
129    NameCheck.check_symbols_declared_in_header()
130
131    Fields:
132    * symbol_name: the name of the symbol.
133    """
134    def __init__(self, symbol_name):
135        self.symbol_name = symbol_name
136        Problem.__init__(self)
137
138    def quiet_output(self):
139        return "{0}".format(self.symbol_name)
140
141    def verbose_output(self):
142        return self.textwrapper.fill(
143            "'{0}' was found as an available symbol in the output of nm, "
144            "however it was not declared in any header files."
145            .format(self.symbol_name))
146
147class PatternMismatch(Problem): # pylint: disable=too-few-public-methods
148    """
149    A problem that occurs when something doesn't match the expected pattern.
150    Created with NameCheck.check_match_pattern()
151
152    Fields:
153    * pattern: the expected regex pattern
154    * match: the Match object in question
155    """
156    def __init__(self, pattern, match):
157        self.pattern = pattern
158        self.match = match
159        Problem.__init__(self)
160
161
162    def quiet_output(self):
163        return (
164            "{0}:{1}:{2}"
165            .format(self.match.filename, self.match.line_no, self.match.name)
166        )
167
168    def verbose_output(self):
169        return self.textwrapper.fill(
170            "{0}:{1}: '{2}' does not match the required pattern '{3}'."
171            .format(
172                self.match.filename,
173                self.match.line_no,
174                self.match.name,
175                self.pattern
176            )
177        ) + "\n" + str(self.match)
178
179class Typo(Problem): # pylint: disable=too-few-public-methods
180    """
181    A problem that occurs when a word using MBED or PSA doesn't
182    appear to be defined as constants nor enum values. Created with
183    NameCheck.check_for_typos()
184
185    Fields:
186    * match: the Match object of the MBED|PSA name in question.
187    """
188    def __init__(self, match):
189        self.match = match
190        Problem.__init__(self)
191
192    def quiet_output(self):
193        return (
194            "{0}:{1}:{2}"
195            .format(self.match.filename, self.match.line_no, self.match.name)
196        )
197
198    def verbose_output(self):
199        return self.textwrapper.fill(
200            "{0}:{1}: '{2}' looks like a typo. It was not found in any "
201            "macros or any enums. If this is not a typo, put "
202            "//no-check-names after it."
203            .format(self.match.filename, self.match.line_no, self.match.name)
204        ) + "\n" + str(self.match)
205
206class CodeParser():
207    """
208    Class for retrieving files and parsing the code. This can be used
209    independently of the checks that NameChecker performs, for example for
210    list_internal_identifiers.py.
211    """
212    def __init__(self, log):
213        self.log = log
214        build_tree.check_repo_path()
215
216        # Memo for storing "glob expression": set(filepaths)
217        self.files = {}
218
219        # Globally excluded filenames.
220        # Note that "*" can match directory separators in exclude lists.
221        self.excluded_files = ["*/bn_mul", "*/compat-1.3.h"]
222
223    def comprehensive_parse(self):
224        """
225        Comprehensive ("default") function to call each parsing function and
226        retrieve various elements of the code, together with the source location.
227
228        Returns a dict of parsed item key to the corresponding List of Matches.
229        """
230        self.log.info("Parsing source code...")
231        self.log.debug(
232            "The following files are excluded from the search: {}"
233            .format(str(self.excluded_files))
234        )
235
236        all_macros = self.parse_macros([
237            "include/mbedtls/*.h",
238            "include/psa/*.h",
239            "library/*.h",
240            "tests/include/test/drivers/*.h",
241            "3rdparty/everest/include/everest/everest.h",
242            "3rdparty/everest/include/everest/x25519.h"
243        ])
244        private_macros = self.parse_macros([
245            "library/*.c",
246        ])
247        enum_consts = self.parse_enum_consts([
248            "include/mbedtls/*.h",
249            "include/psa/*.h",
250            "library/*.h",
251            "library/*.c",
252            "3rdparty/everest/include/everest/everest.h",
253            "3rdparty/everest/include/everest/x25519.h"
254        ])
255        identifiers, excluded_identifiers = self.parse_identifiers([
256            "include/mbedtls/*.h",
257            "include/psa/*.h",
258            "library/*.h",
259            "3rdparty/everest/include/everest/everest.h",
260            "3rdparty/everest/include/everest/x25519.h"
261        ])
262        mbed_psa_words = self.parse_mbed_psa_words([
263            "include/mbedtls/*.h",
264            "include/psa/*.h",
265            "library/*.h",
266            "3rdparty/everest/include/everest/everest.h",
267            "3rdparty/everest/include/everest/x25519.h",
268            "library/*.c",
269            "3rdparty/everest/library/everest.c",
270            "3rdparty/everest/library/x25519.c"
271        ])
272        symbols = self.parse_symbols()
273
274        # Remove identifier macros like mbedtls_printf or mbedtls_calloc
275        identifiers_justname = [x.name for x in identifiers]
276        actual_macros = []
277        for macro in all_macros:
278            if macro.name not in identifiers_justname:
279                actual_macros.append(macro)
280
281        self.log.debug("Found:")
282        # Aligns the counts on the assumption that none exceeds 4 digits
283        self.log.debug("  {:4} Total Macros".format(len(all_macros)))
284        self.log.debug("  {:4} Non-identifier Macros".format(len(actual_macros)))
285        self.log.debug("  {:4} Enum Constants".format(len(enum_consts)))
286        self.log.debug("  {:4} Identifiers".format(len(identifiers)))
287        self.log.debug("  {:4} Exported Symbols".format(len(symbols)))
288        return {
289            "macros": actual_macros,
290            "private_macros": private_macros,
291            "enum_consts": enum_consts,
292            "identifiers": identifiers,
293            "excluded_identifiers": excluded_identifiers,
294            "symbols": symbols,
295            "mbed_psa_words": mbed_psa_words
296        }
297
298    def is_file_excluded(self, path, exclude_wildcards):
299        """Whether the given file path is excluded."""
300        # exclude_wildcards may be None. Also, consider the global exclusions.
301        exclude_wildcards = (exclude_wildcards or []) + self.excluded_files
302        for pattern in exclude_wildcards:
303            if fnmatch.fnmatch(path, pattern):
304                return True
305        return False
306
307    def get_all_files(self, include_wildcards, exclude_wildcards):
308        """
309        Get all files that match any of the included UNIX-style wildcards
310        and filter them into included and excluded lists.
311        While the check_names script is designed only for use on UNIX/macOS
312        (due to nm), this function alone will work fine on Windows even with
313        forward slashes in the wildcard.
314
315        Args:
316        * include_wildcards: a List of shell-style wildcards to match filepaths.
317        * exclude_wildcards: a List of shell-style wildcards to exclude.
318
319        Returns:
320        * inc_files: A List of relative filepaths for included files.
321        * exc_files: A List of relative filepaths for excluded files.
322        """
323        accumulator = set()
324        all_wildcards = include_wildcards + (exclude_wildcards or [])
325        for wildcard in all_wildcards:
326            accumulator = accumulator.union(glob.iglob(wildcard))
327
328        inc_files = []
329        exc_files = []
330        for path in accumulator:
331            if self.is_file_excluded(path, exclude_wildcards):
332                exc_files.append(path)
333            else:
334                inc_files.append(path)
335        return (inc_files, exc_files)
336
337    def get_included_files(self, include_wildcards, exclude_wildcards):
338        """
339        Get all files that match any of the included UNIX-style wildcards.
340        While the check_names script is designed only for use on UNIX/macOS
341        (due to nm), this function alone will work fine on Windows even with
342        forward slashes in the wildcard.
343
344        Args:
345        * include_wildcards: a List of shell-style wildcards to match filepaths.
346        * exclude_wildcards: a List of shell-style wildcards to exclude.
347
348        Returns a List of relative filepaths.
349        """
350        accumulator = set()
351
352        for include_wildcard in include_wildcards:
353            accumulator = accumulator.union(glob.iglob(include_wildcard))
354
355        return list(path for path in accumulator
356                    if not self.is_file_excluded(path, exclude_wildcards))
357
358    def parse_macros(self, include, exclude=None):
359        """
360        Parse all macros defined by #define preprocessor directives.
361
362        Args:
363        * include: A List of glob expressions to look for files through.
364        * exclude: A List of glob expressions for excluding files.
365
366        Returns a List of Match objects for the found macros.
367        """
368        macro_regex = re.compile(r"# *define +(?P<macro>\w+)")
369        exclusions = (
370            "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
371        )
372
373        files = self.get_included_files(include, exclude)
374        self.log.debug("Looking for macros in {} files".format(len(files)))
375
376        macros = []
377        for header_file in files:
378            with open(header_file, "r", encoding="utf-8") as header:
379                for line_no, line in enumerate(header):
380                    for macro in macro_regex.finditer(line):
381                        if macro.group("macro").startswith(exclusions):
382                            continue
383
384                        macros.append(Match(
385                            header_file,
386                            line,
387                            line_no,
388                            macro.span("macro"),
389                            macro.group("macro")))
390
391        return macros
392
393    def parse_mbed_psa_words(self, include, exclude=None):
394        """
395        Parse all words in the file that begin with MBED|PSA, in and out of
396        macros, comments, anything.
397
398        Args:
399        * include: A List of glob expressions to look for files through.
400        * exclude: A List of glob expressions for excluding files.
401
402        Returns a List of Match objects for words beginning with MBED|PSA.
403        """
404        # Typos of TLS are common, hence the broader check below than MBEDTLS.
405        mbed_regex = re.compile(r"\b(MBED.+?|PSA)_[A-Z0-9_]*")
406        exclusions = re.compile(r"// *no-check-names|#error")
407
408        files = self.get_included_files(include, exclude)
409        self.log.debug(
410            "Looking for MBED|PSA words in {} files"
411            .format(len(files))
412        )
413
414        mbed_psa_words = []
415        for filename in files:
416            with open(filename, "r", encoding="utf-8") as fp:
417                for line_no, line in enumerate(fp):
418                    if exclusions.search(line):
419                        continue
420
421                    for name in mbed_regex.finditer(line):
422                        mbed_psa_words.append(Match(
423                            filename,
424                            line,
425                            line_no,
426                            name.span(0),
427                            name.group(0)))
428
429        return mbed_psa_words
430
431    def parse_enum_consts(self, include, exclude=None):
432        """
433        Parse all enum value constants that are declared.
434
435        Args:
436        * include: A List of glob expressions to look for files through.
437        * exclude: A List of glob expressions for excluding files.
438
439        Returns a List of Match objects for the findings.
440        """
441        files = self.get_included_files(include, exclude)
442        self.log.debug("Looking for enum consts in {} files".format(len(files)))
443
444        # Emulate a finite state machine to parse enum declarations.
445        # OUTSIDE_KEYWORD = outside the enum keyword
446        # IN_BRACES = inside enum opening braces
447        # IN_BETWEEN = between enum keyword and opening braces
448        states = enum.Enum("FSM", ["OUTSIDE_KEYWORD", "IN_BRACES", "IN_BETWEEN"])
449        enum_consts = []
450        for header_file in files:
451            state = states.OUTSIDE_KEYWORD
452            with open(header_file, "r", encoding="utf-8") as header:
453                for line_no, line in enumerate(header):
454                    # Match typedefs and brackets only when they are at the
455                    # beginning of the line -- if they are indented, they might
456                    # be sub-structures within structs, etc.
457                    optional_c_identifier = r"([_a-zA-Z][_a-zA-Z0-9]*)?"
458                    if (state == states.OUTSIDE_KEYWORD and
459                            re.search(r"^(typedef +)?enum " + \
460                                    optional_c_identifier + \
461                                    r" *{", line)):
462                        state = states.IN_BRACES
463                    elif (state == states.OUTSIDE_KEYWORD and
464                          re.search(r"^(typedef +)?enum", line)):
465                        state = states.IN_BETWEEN
466                    elif (state == states.IN_BETWEEN and
467                          re.search(r"^{", line)):
468                        state = states.IN_BRACES
469                    elif (state == states.IN_BRACES and
470                          re.search(r"^}", line)):
471                        state = states.OUTSIDE_KEYWORD
472                    elif (state == states.IN_BRACES and
473                          not re.search(r"^ *#", line)):
474                        enum_const = re.search(r"^ *(?P<enum_const>\w+)", line)
475                        if not enum_const:
476                            continue
477
478                        enum_consts.append(Match(
479                            header_file,
480                            line,
481                            line_no,
482                            enum_const.span("enum_const"),
483                            enum_const.group("enum_const")))
484
485        return enum_consts
486
487    IGNORED_CHUNK_REGEX = re.compile('|'.join([
488        r'/\*.*?\*/', # block comment entirely on one line
489        r'//.*', # line comment
490        r'(?P<string>")(?:[^\\\"]|\\.)*"', # string literal
491    ]))
492
493    def strip_comments_and_literals(self, line, in_block_comment):
494        """Strip comments and string literals from line.
495
496        Continuation lines are not supported.
497
498        If in_block_comment is true, assume that the line starts inside a
499        block comment.
500
501        Return updated values of (line, in_block_comment) where:
502        * Comments in line have been replaced by a space (or nothing at the
503          start or end of the line).
504        * String contents have been removed.
505        * in_block_comment indicates whether the line ends inside a block
506          comment that continues on the next line.
507        """
508
509        # Terminate current multiline comment?
510        if in_block_comment:
511            m = re.search(r"\*/", line)
512            if m:
513                in_block_comment = False
514                line = line[m.end(0):]
515            else:
516                return '', True
517
518        # Remove full comments and string literals.
519        # Do it all together to handle cases like "/*" correctly.
520        # Note that continuation lines are not supported.
521        line = re.sub(self.IGNORED_CHUNK_REGEX,
522                      lambda s: '""' if s.group('string') else ' ',
523                      line)
524
525        # Start an unfinished comment?
526        # (If `/*` was part of a complete comment, it's already been removed.)
527        m = re.search(r"/\*", line)
528        if m:
529            in_block_comment = True
530            line = line[:m.start(0)]
531
532        return line, in_block_comment
533
534    IDENTIFIER_REGEX = re.compile('|'.join([
535        # Match " something(a" or " *something(a". Functions.
536        # Assumptions:
537        # - function definition from return type to one of its arguments is
538        #   all on one line
539        # - function definition line only contains alphanumeric, asterisk,
540        #   underscore, and open bracket
541        r".* \**(\w+) *\( *\w",
542        # Match "(*something)(".
543        r".*\( *\* *(\w+) *\) *\(",
544        # Match names of named data structures.
545        r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$",
546        # Match names of typedef instances, after closing bracket.
547        r"}? *(\w+)[;[].*",
548    ]))
549    # The regex below is indented for clarity.
550    EXCLUSION_LINES = re.compile("|".join([
551        r"extern +\"C\"",
552        r"(typedef +)?(struct|union|enum)( *{)?$",
553        r"} *;?$",
554        r"$",
555        r"//",
556        r"#",
557    ]))
558
559    def parse_identifiers_in_file(self, header_file, identifiers):
560        """
561        Parse all lines of a header where a function/enum/struct/union/typedef
562        identifier is declared, based on some regex and heuristics. Highly
563        dependent on formatting style.
564
565        Append found matches to the list ``identifiers``.
566        """
567
568        with open(header_file, "r", encoding="utf-8") as header:
569            in_block_comment = False
570            # The previous line variable is used for concatenating lines
571            # when identifiers are formatted and spread across multiple
572            # lines.
573            previous_line = ""
574
575            for line_no, line in enumerate(header):
576                line, in_block_comment = \
577                    self.strip_comments_and_literals(line, in_block_comment)
578
579                if self.EXCLUSION_LINES.match(line):
580                    previous_line = ""
581                    continue
582
583                # If the line contains only space-separated alphanumeric
584                # characters (or underscore, asterisk, or open parenthesis),
585                # and nothing else, high chance it's a declaration that
586                # continues on the next line
587                if re.search(r"^([\w\*\(]+\s+)+$", line):
588                    previous_line += line
589                    continue
590
591                # If previous line seemed to start an unfinished declaration
592                # (as above), concat and treat them as one.
593                if previous_line:
594                    line = previous_line.strip() + " " + line.strip() + "\n"
595                    previous_line = ""
596
597                # Skip parsing if line has a space in front = heuristic to
598                # skip function argument lines (highly subject to formatting
599                # changes)
600                if line[0] == " ":
601                    continue
602
603                identifier = self.IDENTIFIER_REGEX.search(line)
604
605                if not identifier:
606                    continue
607
608                # Find the group that matched, and append it
609                for group in identifier.groups():
610                    if not group:
611                        continue
612
613                    identifiers.append(Match(
614                        header_file,
615                        line,
616                        line_no,
617                        identifier.span(),
618                        group))
619
620    def parse_identifiers(self, include, exclude=None):
621        """
622        Parse all lines of a header where a function/enum/struct/union/typedef
623        identifier is declared, based on some regex and heuristics. Highly
624        dependent on formatting style. Identifiers in excluded files are still
625        parsed
626
627        Args:
628        * include: A List of glob expressions to look for files through.
629        * exclude: A List of glob expressions for excluding files.
630
631        Returns: a Tuple of two Lists of Match objects with identifiers.
632        * included_identifiers: A List of Match objects with identifiers from
633          included files.
634        * excluded_identifiers: A List of Match objects with identifiers from
635          excluded files.
636        """
637
638        included_files, excluded_files = \
639            self.get_all_files(include, exclude)
640
641        self.log.debug("Looking for included identifiers in {} files".format \
642            (len(included_files)))
643
644        included_identifiers = []
645        excluded_identifiers = []
646        for header_file in included_files:
647            self.parse_identifiers_in_file(header_file, included_identifiers)
648        for header_file in excluded_files:
649            self.parse_identifiers_in_file(header_file, excluded_identifiers)
650
651        return (included_identifiers, excluded_identifiers)
652
653    def parse_symbols(self):
654        """
655        Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509
656        object files using nm to retrieve the list of referenced symbols.
657        Exceptions thrown here are rethrown because they would be critical
658        errors that void several tests, and thus needs to halt the program. This
659        is explicitly done for clarity.
660
661        Returns a List of unique symbols defined and used in the libraries.
662        """
663        self.log.info("Compiling...")
664        symbols = []
665
666        # Back up the config and atomically compile with the full configuration.
667        shutil.copy(
668            "include/mbedtls/config.h",
669            "include/mbedtls/config.h.bak"
670        )
671        try:
672            # Use check=True in all subprocess calls so that failures are raised
673            # as exceptions and logged.
674            subprocess.run(
675                ["python3", "scripts/config.py", "full"],
676                universal_newlines=True,
677                check=True
678            )
679            my_environment = os.environ.copy()
680            my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
681            # Run make clean separately to lib to prevent unwanted behavior when
682            # make is invoked with parallelism.
683            subprocess.run(
684                ["make", "clean"],
685                universal_newlines=True,
686                check=True
687            )
688            subprocess.run(
689                ["make", "lib"],
690                env=my_environment,
691                universal_newlines=True,
692                stdout=subprocess.PIPE,
693                stderr=subprocess.STDOUT,
694                check=True
695            )
696
697            # Perform object file analysis using nm
698            symbols = self.parse_symbols_from_nm([
699                "library/libmbedcrypto.a",
700                "library/libmbedtls.a",
701                "library/libmbedx509.a"
702            ])
703
704            subprocess.run(
705                ["make", "clean"],
706                universal_newlines=True,
707                check=True
708            )
709        except subprocess.CalledProcessError as error:
710            self.log.debug(error.output)
711            raise error
712        finally:
713            # Put back the original config regardless of there being errors.
714            # Works also for keyboard interrupts.
715            shutil.move(
716                "include/mbedtls/config.h.bak",
717                "include/mbedtls/config.h"
718            )
719
720        return symbols
721
722    def parse_symbols_from_nm(self, object_files):
723        """
724        Run nm to retrieve the list of referenced symbols in each object file.
725        Does not return the position data since it is of no use.
726
727        Args:
728        * object_files: a List of compiled object filepaths to search through.
729
730        Returns a List of unique symbols defined and used in any of the object
731        files.
732        """
733        nm_undefined_regex = re.compile(r"^\S+: +U |^$|^\S+:$")
734        nm_valid_regex = re.compile(r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)")
735        exclusions = ("FStar", "Hacl")
736
737        symbols = []
738
739        # Gather all outputs of nm
740        nm_output = ""
741        for lib in object_files:
742            nm_output += subprocess.run(
743                ["nm", "-og", lib],
744                universal_newlines=True,
745                stdout=subprocess.PIPE,
746                stderr=subprocess.STDOUT,
747                check=True
748            ).stdout
749
750        for line in nm_output.splitlines():
751            if not nm_undefined_regex.search(line):
752                symbol = nm_valid_regex.search(line)
753                if (symbol and not symbol.group("symbol").startswith(exclusions)):
754                    symbols.append(symbol.group("symbol"))
755                else:
756                    self.log.error(line)
757
758        return symbols
759
760class NameChecker():
761    """
762    Representation of the core name checking operation performed by this script.
763    """
764    def __init__(self, parse_result, log):
765        self.parse_result = parse_result
766        self.log = log
767
768    def perform_checks(self, quiet=False):
769        """
770        A comprehensive checker that performs each check in order, and outputs
771        a final verdict.
772
773        Args:
774        * quiet: whether to hide detailed problem explanation.
775        """
776        self.log.info("=============")
777        Problem.quiet = quiet
778        problems = 0
779        problems += self.check_symbols_declared_in_header()
780
781        pattern_checks = [
782            ("macros", MACRO_PATTERN),
783            ("enum_consts", CONSTANTS_PATTERN),
784            ("identifiers", IDENTIFIER_PATTERN)
785        ]
786        for group, check_pattern in pattern_checks:
787            problems += self.check_match_pattern(group, check_pattern)
788
789        problems += self.check_for_typos()
790
791        self.log.info("=============")
792        if problems > 0:
793            self.log.info("FAIL: {0} problem(s) to fix".format(str(problems)))
794            if quiet:
795                self.log.info("Remove --quiet to see explanations.")
796            else:
797                self.log.info("Use --quiet for minimal output.")
798            return 1
799        else:
800            self.log.info("PASS")
801            return 0
802
803    def check_symbols_declared_in_header(self):
804        """
805        Perform a check that all detected symbols in the library object files
806        are properly declared in headers.
807        Assumes parse_names_in_source() was called before this.
808
809        Returns the number of problems that need fixing.
810        """
811        problems = []
812        all_identifiers = self.parse_result["identifiers"] +  \
813            self.parse_result["excluded_identifiers"]
814
815        for symbol in self.parse_result["symbols"]:
816            found_symbol_declared = False
817            for identifier_match in all_identifiers:
818                if symbol == identifier_match.name:
819                    found_symbol_declared = True
820                    break
821
822            if not found_symbol_declared:
823                problems.append(SymbolNotInHeader(symbol))
824
825        self.output_check_result("All symbols in header", problems)
826        return len(problems)
827
828    def check_match_pattern(self, group_to_check, check_pattern):
829        """
830        Perform a check that all items of a group conform to a regex pattern.
831        Assumes parse_names_in_source() was called before this.
832
833        Args:
834        * group_to_check: string key to index into self.parse_result.
835        * check_pattern: the regex to check against.
836
837        Returns the number of problems that need fixing.
838        """
839        problems = []
840
841        for item_match in self.parse_result[group_to_check]:
842            if not re.search(check_pattern, item_match.name):
843                problems.append(PatternMismatch(check_pattern, item_match))
844            # Double underscore should not be used for names
845            if re.search(r".*__.*", item_match.name):
846                problems.append(
847                    PatternMismatch("no double underscore allowed", item_match))
848
849        self.output_check_result(
850            "Naming patterns of {}".format(group_to_check),
851            problems)
852        return len(problems)
853
854    def check_for_typos(self):
855        """
856        Perform a check that all words in the source code beginning with MBED are
857        either defined as macros, or as enum constants.
858        Assumes parse_names_in_source() was called before this.
859
860        Returns the number of problems that need fixing.
861        """
862        problems = []
863
864        # Set comprehension, equivalent to a list comprehension wrapped by set()
865        all_caps_names = {
866            match.name
867            for match
868            in self.parse_result["macros"] +
869            self.parse_result["private_macros"] +
870            self.parse_result["enum_consts"]
871        }
872        typo_exclusion = re.compile(r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$|"
873                                    r"MBEDTLS_TEST_LIBTESTDRIVER*|"
874                                    r"PSA_CRYPTO_DRIVER_TEST")
875
876        for name_match in self.parse_result["mbed_psa_words"]:
877            found = name_match.name in all_caps_names
878
879            # Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the
880            # PSA driver, they will not exist as macros. However, they
881            # should still be checked for typos using the equivalent
882            # BUILTINs that exist.
883            if "MBEDTLS_PSA_ACCEL_" in name_match.name:
884                found = name_match.name.replace(
885                    "MBEDTLS_PSA_ACCEL_",
886                    "MBEDTLS_PSA_BUILTIN_") in all_caps_names
887
888            if not found and not typo_exclusion.search(name_match.name):
889                problems.append(Typo(name_match))
890
891        self.output_check_result("Likely typos", problems)
892        return len(problems)
893
894    def output_check_result(self, name, problems):
895        """
896        Write out the PASS/FAIL status of a performed check depending on whether
897        there were problems.
898
899        Args:
900        * name: the name of the test
901        * problems: a List of encountered Problems
902        """
903        if problems:
904            self.log.info("{}: FAIL\n".format(name))
905            for problem in problems:
906                self.log.warning(str(problem))
907        else:
908            self.log.info("{}: PASS".format(name))
909
910def main():
911    """
912    Perform argument parsing, and create an instance of CodeParser and
913    NameChecker to begin the core operation.
914    """
915    parser = argparse.ArgumentParser(
916        formatter_class=argparse.RawDescriptionHelpFormatter,
917        description=(
918            "This script confirms that the naming of all symbols and identifiers "
919            "in Mbed TLS are consistent with the house style and are also "
920            "self-consistent.\n\n"
921            "Expected to be run from the MbedTLS root directory.")
922    )
923    parser.add_argument(
924        "-v", "--verbose",
925        action="store_true",
926        help="show parse results"
927    )
928    parser.add_argument(
929        "-q", "--quiet",
930        action="store_true",
931        help="hide unnecessary text, explanations, and highlights"
932    )
933
934    args = parser.parse_args()
935
936    # Configure the global logger, which is then passed to the classes below
937    log = logging.getLogger()
938    log.setLevel(logging.DEBUG if args.verbose else logging.INFO)
939    log.addHandler(logging.StreamHandler())
940
941    try:
942        code_parser = CodeParser(log)
943        parse_result = code_parser.comprehensive_parse()
944    except Exception: # pylint: disable=broad-except
945        traceback.print_exc()
946        sys.exit(2)
947
948    name_checker = NameChecker(parse_result, log)
949    return_code = name_checker.perform_checks(quiet=args.quiet)
950
951    sys.exit(return_code)
952
953if __name__ == "__main__":
954    main()
955