# Copyright 2023 The Chromium Authors # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """Python module to find feature names in source code. These functions are declared in a separate module to allow multiprocessing to correctly unpickle the called functions again. """ import glob import itertools import multiprocessing import pathlib import re BASE_FEATURE_PATTERN = br'BASE_FEATURE\((.*?),(.*?),(.*?)\);' BASE_FEATURE_RE = re.compile(BASE_FEATURE_PATTERN, flags=re.MULTILINE + re.DOTALL) # Only search these directories for flags. If your flag is outside these root # directories, then add the directory here. DIRECTORIES_TO_SEARCH = [ 'android_webview', 'apps', 'ash', 'base', 'cc', 'chrome', 'chromecast', 'chromeos', 'clank', 'components', 'content', 'courgette', 'crypto', 'dbus', 'device', 'extensions', 'fuchsia_web', 'gin', 'google_apis', 'gpu', 'headless', 'infra', 'internal', 'ios', 'ipc', 'media', 'mojo', 'native_client', 'native_client_sdk', 'net', 'pdf', 'ppapi', 'printing', 'remoting', 'rlz', 'sandbox', 'services', 'skia', 'sql', 'storage', # third_party/blink handled separately in FindDeclaredFeatures 'ui', 'url', 'v8', 'webkit', 'weblayer', ] def _FindFeaturesInFile(filepath): # Work on bytes to avoid utf-8 decode errors outside feature declarations file_contents = pathlib.Path(filepath).read_bytes() matches = BASE_FEATURE_RE.finditer(file_contents) # Remove whitespace and surrounding " from the second argument # which is the feature name. return [m.group(2).strip().strip(b'"').decode('utf-8') for m in matches] def FindDeclaredFeatures(input_api): """Finds all declared feature names in the source code. This function will scan all *.cc and *.mm files and look for features defined with the BASE_FEATURE macro. It will extract the feature names. Args: input_api: InputApi instance for opening files Returns: Set of defined feature names in the source tree. """ # Features are supposed to be defined in .cc files. # Iterate over the search folders in the root. root = pathlib.Path(input_api.change.RepositoryRoot()) glob_patterns = [ str(p / pathlib.Path('**/*.cc')) for p in root.iterdir() if p.is_dir() and p.name in DIRECTORIES_TO_SEARCH ] # blink is the only directory in third_party that should be searched. blink_glob = str(root / pathlib.Path('third_party/blink/**/*.cc')) glob_patterns.append(blink_glob) # Additional features for iOS can be found in mm files in the ios directory. mm_glob = str(root / pathlib.Path('ios/**/*.mm')) glob_patterns.append(mm_glob) # Create glob iterators that lazily go over the files to search glob_iterators = [ glob.iglob(pattern, recursive=True) for pattern in glob_patterns ] # Limit to 4 processes - the disk accesses becomes a bottleneck with just a # few processes, but splitting the searching across multiple CPUs does yield # a benefit of a few seconds. # The exact batch size does not seem to matter much, as long as it is >> 1. pool = multiprocessing.Pool(4) found_features = pool.imap_unordered(_FindFeaturesInFile, itertools.chain(*glob_iterators), 1000) pool.close() pool.join() feature_names = set() for feature_list in found_features: feature_names.update(feature_list) return feature_names