1# python3 2# Copyright (C) 2019 The Android Open Source Project 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16"""Grep warnings messages and output HTML tables or warning counts in CSV. 17 18Default is to output warnings in HTML tables grouped by warning severity. 19Use option --byproject to output tables grouped by source file projects. 20Use option --gencsv to output warning counts in CSV format. 21 22Default input file is build.log, which can be changed with the --log flag. 23""" 24 25# List of important data structures and functions in this script. 26# 27# To parse and keep warning message in the input file: 28# severity: classification of message severity 29# warn_patterns: 30# warn_patterns[w]['category'] tool that issued the warning, not used now 31# warn_patterns[w]['description'] table heading 32# warn_patterns[w]['members'] matched warnings from input 33# warn_patterns[w]['patterns'] regular expressions to match warnings 34# warn_patterns[w]['projects'][p] number of warnings of pattern w in p 35# warn_patterns[w]['severity'] severity tuple 36# project_list[p][0] project name 37# project_list[p][1] regular expression to match a project path 38# project_patterns[p] re.compile(project_list[p][1]) 39# project_names[p] project_list[p][0] 40# warning_messages array of each warning message, without source url 41# warning_links array of each warning code search link; for 'chrome' 42# warning_records array of [idx to warn_patterns, 43# idx to project_names, 44# idx to warning_messages, 45# idx to warning_links] 46# parse_input_file 47# 48import argparse 49import io 50import multiprocessing 51import os 52import re 53import sys 54 55# pylint:disable=relative-beyond-top-level,no-name-in-module 56# suppress false positive of no-name-in-module warnings 57from . import android_project_list 58from . import chrome_project_list 59from . import cpp_warn_patterns as cpp_patterns 60from . import html_writer 61from . import java_warn_patterns as java_patterns 62from . import make_warn_patterns as make_patterns 63from . import other_warn_patterns as other_patterns 64from . import tidy_warn_patterns as tidy_patterns 65 66 67def parse_args(use_google3): 68 """Define and parse the args. Return the parse_args() result.""" 69 parser = argparse.ArgumentParser( 70 description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) 71 parser.add_argument('--capacitor_path', default='', 72 help='Save capacitor warning file to the passed absolute' 73 ' path') 74 # csvpath has a different naming than the above path because historically the 75 # original Android script used csvpath, so other scripts rely on it 76 parser.add_argument('--csvpath', default='', 77 help='Save CSV warning file to the passed path') 78 parser.add_argument('--gencsv', action='store_true', 79 help='Generate CSV file with number of various warnings') 80 parser.add_argument('--csvwithdescription', default='', 81 help="""Save CSV warning file to the passed path this csv 82 will contain all the warning descriptions""") 83 parser.add_argument('--byproject', action='store_true', 84 help='Separate warnings in HTML output by project names') 85 parser.add_argument('--url', default='', 86 help='Root URL of an Android source code tree prefixed ' 87 'before files in warnings') 88 parser.add_argument('--separator', default='?l=', 89 help='Separator between the end of a URL and the line ' 90 'number argument. e.g. #') 91 parser.add_argument('--processes', default=multiprocessing.cpu_count(), 92 type=int, 93 help='Number of parallel processes to process warnings') 94 # Old Android build scripts call warn.py without --platform, 95 # so the default platform is set to 'android'. 96 parser.add_argument('--platform', default='android', 97 choices=['chrome', 'android'], 98 help='Platform of the build log') 99 # Old Android build scripts call warn.py with only a build.log file path. 100 parser.add_argument('--log', help='Path to build log file') 101 parser.add_argument(dest='buildlog', metavar='build.log', 102 default='build.log', nargs='?', 103 help='Path to build.log file') 104 flags = parser.parse_args() 105 if not flags.log: 106 flags.log = flags.buildlog 107 if not use_google3 and not os.path.exists(flags.log): 108 sys.exit('Cannot find log file: ' + flags.log) 109 return flags 110 111 112def get_project_names(project_list): 113 """Get project_names from project_list.""" 114 return [p[0] for p in project_list] 115 116 117def find_project_index(line, project_patterns): 118 """Return the index to the project pattern array.""" 119 for idx, pattern in enumerate(project_patterns): 120 if pattern.match(line): 121 return idx 122 return -1 123 124 125def classify_one_warning(warning, link, results, project_patterns, 126 warn_patterns): 127 """Classify one warning line.""" 128 for idx, pattern in enumerate(warn_patterns): 129 for cpat in pattern['compiled_patterns']: 130 if cpat.match(warning): 131 project_idx = find_project_index(warning, project_patterns) 132 results.append([warning, link, idx, project_idx]) 133 return 134 # If we end up here, there was a problem parsing the log 135 # probably caused by 'make -j' mixing the output from 136 # 2 or more concurrent compiles 137 138 139def remove_prefix(src, sub): 140 """Remove everything before last occurrence of substring sub in string src.""" 141 if sub in src: 142 inc_sub = src.rfind(sub) 143 return src[inc_sub:] 144 return src 145 146 147# TODO(emmavukelj): Don't have any generate_*_cs_link functions call 148# normalize_path a second time (the first time being in parse_input_file) 149def generate_cs_link(warning_line, flags, android_root=None): 150 """Try to add code search HTTP URL prefix.""" 151 if flags.platform == 'chrome': 152 return generate_chrome_cs_link(warning_line, flags) 153 if flags.platform == 'android': 154 return generate_android_cs_link(warning_line, flags, android_root) 155 return 'https://cs.corp.google.com/' 156 157 158def generate_android_cs_link(warning_line, flags, android_root): 159 """Generate the code search link for a warning line in Android.""" 160 # max_splits=2 -> only 3 items 161 raw_path, line_number_str, _ = warning_line.split(':', 2) 162 normalized_path = normalize_path(raw_path, flags, android_root) 163 if not flags.url: 164 return normalized_path 165 link_path = flags.url + '/' + normalized_path 166 if line_number_str.isdigit(): 167 link_path += flags.separator + line_number_str 168 return link_path 169 170 171def generate_chrome_cs_link(warning_line, flags): 172 """Generate the code search link for a warning line in Chrome.""" 173 split_line = warning_line.split(':') 174 raw_path = split_line[0] 175 normalized_path = normalize_path(raw_path, flags) 176 link_base = 'https://cs.chromium.org/' 177 link_add = 'chromium' 178 link_path = None 179 180 # Basically just going through a few specific directory cases and specifying 181 # the proper behavior for that case. This list of cases was accumulated 182 # through trial and error manually going through the warnings. 183 # 184 # This code pattern of using case-specific "if"s instead of "elif"s looks 185 # possibly accidental and mistaken but it is intentional because some paths 186 # fall under several cases (e.g. third_party/lib/nghttp2_frame.c) and for 187 # those we want the most specific case to be applied. If there is reliable 188 # knowledge of exactly where these occur, this could be changed to "elif"s 189 # but there is no reliable set of paths falling under multiple cases at the 190 # moment. 191 if '/src/third_party' in raw_path: 192 link_path = remove_prefix(raw_path, '/src/third_party/') 193 if '/chrome_root/src_internal/' in raw_path: 194 link_path = remove_prefix(raw_path, '/chrome_root/src_internal/') 195 link_path = link_path[len('/chrome_root'):] # remove chrome_root 196 if '/chrome_root/src/' in raw_path: 197 link_path = remove_prefix(raw_path, '/chrome_root/src/') 198 link_path = link_path[len('/chrome_root'):] # remove chrome_root 199 if '/libassistant/' in raw_path: 200 link_add = 'eureka_internal/chromium/src' 201 link_base = 'https://cs.corp.google.com/' # internal data 202 link_path = remove_prefix(normalized_path, '/libassistant/') 203 if raw_path.startswith('gen/'): 204 link_path = '/src/out/Debug/gen/' + normalized_path 205 if '/gen/' in raw_path: 206 return '%s?q=file:%s' % (link_base, remove_prefix(normalized_path, '/gen/')) 207 208 if not link_path and (raw_path.startswith('src/') or 209 raw_path.startswith('src_internal/')): 210 link_path = '/%s' % raw_path 211 212 if not link_path: # can't find specific link, send a query 213 return '%s?q=file:%s' % (link_base, normalized_path) 214 215 line_number = int(split_line[1]) 216 link = '%s%s%s?l=%d' % (link_base, link_add, link_path, line_number) 217 return link 218 219 220def find_warn_py_and_android_root(path): 221 """Return android source root path if warn.py is found.""" 222 parts = path.split('/') 223 for idx in reversed(range(2, len(parts))): 224 root_path = '/'.join(parts[:idx]) 225 # Android root directory should contain this script. 226 if os.path.exists(root_path + '/build/make/tools/warn.py'): 227 return root_path 228 return '' 229 230 231def find_android_root(buildlog): 232 """Guess android source root from common prefix of file paths.""" 233 # Use the longest common prefix of the absolute file paths 234 # of the first 10000 warning messages as the android_root. 235 warning_lines = [] 236 warning_pattern = re.compile('^/[^ ]*/[^ ]*: warning: .*') 237 count = 0 238 for line in buildlog: 239 # We want to find android_root of a local build machine. 240 # Do not use RBE warning lines, which has '/b/f/w/' path prefix. 241 # Do not use /tmp/ file warnings. 242 if warning_pattern.match(line) and ( 243 '/b/f/w' not in line and not line.startswith('/tmp/')): 244 warning_lines.append(line) 245 count += 1 246 if count > 9999: 247 break 248 # Try to find warn.py and use its location to find 249 # the source tree root. 250 if count < 100: 251 path = os.path.normpath(re.sub(':.*$', '', line)) 252 android_root = find_warn_py_and_android_root(path) 253 if android_root: 254 return android_root 255 # Do not use common prefix of a small number of paths. 256 if count > 10: 257 # pytype: disable=wrong-arg-types 258 root_path = os.path.commonprefix(warning_lines) 259 # pytype: enable=wrong-arg-types 260 if len(root_path) > 2 and root_path[len(root_path) - 1] == '/': 261 return root_path[:-1] 262 return '' 263 264 265def remove_android_root_prefix(path, android_root): 266 """Remove android_root prefix from path if it is found.""" 267 if path.startswith(android_root): 268 return path[1 + len(android_root):] 269 return path 270 271 272def normalize_path(path, flags, android_root=None): 273 """Normalize file path relative to src/ or src-internal/ directory.""" 274 path = os.path.normpath(path) 275 276 if flags.platform == 'android': 277 if android_root: 278 return remove_android_root_prefix(path, android_root) 279 return path 280 281 # Remove known prefix of root path and normalize the suffix. 282 idx = path.find('chrome_root/') 283 if idx >= 0: 284 # remove chrome_root/, we want path relative to that 285 return path[idx + len('chrome_root/'):] 286 return path 287 288 289def normalize_warning_line(line, flags, android_root=None): 290 """Normalize file path relative to src directory in a warning line.""" 291 line = re.sub(u'[\u2018\u2019]', '\'', line) 292 # replace non-ASCII chars to spaces 293 line = re.sub(u'[^\x00-\x7f]', ' ', line) 294 line = line.strip() 295 first_column = line.find(':') 296 return normalize_path(line[:first_column], flags, 297 android_root) + line[first_column:] 298 299 300def parse_input_file_chrome(infile, flags): 301 """Parse Chrome input file, collect parameters and warning lines.""" 302 platform_version = 'unknown' 303 board_name = 'unknown' 304 architecture = 'unknown' 305 306 # only handle warning lines of format 'file_path:line_no:col_no: warning: ...' 307 # Bug: http://198657613, This might need change to handle RBE output. 308 chrome_warning_pattern = r'^[^ ]*/[^ ]*:[0-9]+:[0-9]+: warning: .*' 309 310 warning_pattern = re.compile(chrome_warning_pattern) 311 312 # Collect all unique warning lines 313 # Remove the duplicated warnings save ~8% of time when parsing 314 # one typical build log than before 315 unique_warnings = dict() 316 for line in infile: 317 if warning_pattern.match(line): 318 normalized_line = normalize_warning_line(line, flags) 319 if normalized_line not in unique_warnings: 320 unique_warnings[normalized_line] = generate_cs_link(line, flags) 321 elif (platform_version == 'unknown' or board_name == 'unknown' or 322 architecture == 'unknown'): 323 result = re.match(r'.+Package:.+chromeos-base/chromeos-chrome-', line) 324 if result is not None: 325 platform_version = 'R' + line.split('chrome-')[1].split('_')[0] 326 continue 327 result = re.match(r'.+Source\sunpacked\sin\s(.+)', line) 328 if result is not None: 329 board_name = result.group(1).split('/')[2] 330 continue 331 result = re.match(r'.+USE:\s*([^\s]*).*', line) 332 if result is not None: 333 architecture = result.group(1) 334 continue 335 336 header_str = '%s - %s - %s' % (platform_version, board_name, architecture) 337 return unique_warnings, header_str 338 339 340def add_normalized_line_to_warnings(line, flags, android_root, unique_warnings): 341 """Parse/normalize path, updating warning line and add to warnings dict.""" 342 normalized_line = normalize_warning_line(line, flags, android_root) 343 if normalized_line not in unique_warnings: 344 unique_warnings[normalized_line] = generate_cs_link(line, flags, 345 android_root) 346 return unique_warnings 347 348 349def parse_input_file_android(infile, flags): 350 """Parse Android input file, collect parameters and warning lines.""" 351 # pylint:disable=too-many-locals,too-many-branches 352 platform_version = 'unknown' 353 target_product = 'unknown' 354 target_variant = 'unknown' 355 build_id = 'unknown' 356 use_rbe = False 357 android_root = find_android_root(infile) 358 infile.seek(0) 359 360 # rustc warning messages have two lines that should be combined: 361 # warning: description 362 # --> file_path:line_number:column_number 363 # Some warning messages have no file name: 364 # warning: macro replacement list ... [bugprone-macro-parentheses] 365 # Some makefile warning messages have no line number: 366 # some/path/file.mk: warning: description 367 # C/C++ compiler warning messages have line and column numbers: 368 # some/path/file.c:line_number:column_number: warning: description 369 warning_pattern = re.compile('(^[^ ]*/[^ ]*: warning: .*)|(^warning: .*)') 370 warning_without_file = re.compile('^warning: .*') 371 rustc_file_position = re.compile('^[ ]+--> [^ ]*/[^ ]*:[0-9]+:[0-9]+') 372 373 # If RBE was used, try to reclaim some warning lines mixed with some 374 # leading chars from other concurrent job's stderr output . 375 # The leading characters can be any character, including digits and spaces. 376 # It's impossible to correctly identify the starting point of the source 377 # file path without the file directory name knowledge. 378 # Here we can only be sure to recover lines containing "/b/f/w/". 379 rbe_warning_pattern = re.compile('.*/b/f/w/[^ ]*: warning: .*') 380 381 # Collect all unique warning lines 382 # Remove the duplicated warnings save ~8% of time when parsing 383 # one typical build log than before 384 unique_warnings = dict() 385 line_counter = 0 386 prev_warning = '' 387 for line in infile: 388 if prev_warning: 389 if rustc_file_position.match(line): 390 # must be a rustc warning, combine 2 lines into one warning 391 line = line.strip().replace('--> ', '') + ': ' + prev_warning 392 unique_warnings = add_normalized_line_to_warnings( 393 line, flags, android_root, unique_warnings) 394 prev_warning = '' 395 continue 396 # add prev_warning, and then process the current line 397 prev_warning = 'unknown_source_file: ' + prev_warning 398 unique_warnings = add_normalized_line_to_warnings( 399 prev_warning, flags, android_root, unique_warnings) 400 prev_warning = '' 401 402 if use_rbe and rbe_warning_pattern.match(line): 403 cleaned_up_line = re.sub('.*/b/f/w/', '', line) 404 unique_warnings = add_normalized_line_to_warnings( 405 cleaned_up_line, flags, android_root, unique_warnings) 406 continue 407 408 if warning_pattern.match(line): 409 if warning_without_file.match(line): 410 # save this line and combine it with the next line 411 prev_warning = line 412 else: 413 unique_warnings = add_normalized_line_to_warnings( 414 line, flags, android_root, unique_warnings) 415 continue 416 417 if line_counter < 100: 418 # save a little bit of time by only doing this for the first few lines 419 line_counter += 1 420 result = re.search('(?<=^PLATFORM_VERSION=).*', line) 421 if result is not None: 422 platform_version = result.group(0) 423 continue 424 result = re.search('(?<=^TARGET_PRODUCT=).*', line) 425 if result is not None: 426 target_product = result.group(0) 427 continue 428 result = re.search('(?<=^TARGET_BUILD_VARIANT=).*', line) 429 if result is not None: 430 target_variant = result.group(0) 431 continue 432 result = re.search('(?<=^BUILD_ID=).*', line) 433 if result is not None: 434 build_id = result.group(0) 435 continue 436 result = re.search('(?<=^TOP=).*', line) 437 if result is not None: 438 android_root = result.group(1) 439 continue 440 if re.search('USE_RBE=', line) is not None: 441 use_rbe = True 442 continue 443 444 if android_root: 445 new_unique_warnings = dict() 446 for warning_line in unique_warnings: 447 normalized_line = normalize_warning_line(warning_line, flags, 448 android_root) 449 new_unique_warnings[normalized_line] = generate_android_cs_link( 450 warning_line, flags, android_root) 451 unique_warnings = new_unique_warnings 452 453 header_str = '%s - %s - %s (%s)' % ( 454 platform_version, target_product, target_variant, build_id) 455 return unique_warnings, header_str 456 457 458def parse_input_file(infile, flags): 459 """Parse one input file for chrome or android.""" 460 if flags.platform == 'chrome': 461 return parse_input_file_chrome(infile, flags) 462 if flags.platform == 'android': 463 return parse_input_file_android(infile, flags) 464 raise RuntimeError('parse_input_file not defined for platform %s' % 465 flags.platform) 466 467 468def parse_compiler_output(compiler_output): 469 """Parse compiler output for relevant info.""" 470 split_output = compiler_output.split(':', 3) # 3 = max splits 471 file_path = split_output[0] 472 line_number = int(split_output[1]) 473 col_number = int(split_output[2].split(' ')[0]) 474 warning_message = split_output[3] 475 return file_path, line_number, col_number, warning_message 476 477 478def get_warn_patterns(platform): 479 """Get and initialize warn_patterns.""" 480 warn_patterns = [] 481 if platform == 'chrome': 482 warn_patterns = cpp_patterns.warn_patterns 483 elif platform == 'android': 484 warn_patterns = (make_patterns.warn_patterns + cpp_patterns.warn_patterns + 485 java_patterns.warn_patterns + tidy_patterns.warn_patterns + 486 other_patterns.warn_patterns) 487 else: 488 raise Exception('platform name %s is not valid' % platform) 489 for pattern in warn_patterns: 490 pattern['members'] = [] 491 # Each warning pattern has a 'projects' dictionary, that 492 # maps a project name to number of warnings in that project. 493 pattern['projects'] = {} 494 return warn_patterns 495 496 497def get_project_list(platform): 498 """Return project list for appropriate platform.""" 499 if platform == 'chrome': 500 return chrome_project_list.project_list 501 if platform == 'android': 502 return android_project_list.project_list 503 raise Exception('platform name %s is not valid' % platform) 504 505 506def parallel_classify_warnings(warning_data, args, project_names, 507 project_patterns, warn_patterns, 508 use_google3, create_launch_subprocs_fn, 509 classify_warnings_fn): 510 """Classify all warning lines with num_cpu parallel processes.""" 511 # pylint:disable=too-many-arguments,too-many-locals 512 num_cpu = args.processes 513 group_results = [] 514 515 if num_cpu > 1: 516 # set up parallel processing for this... 517 warning_groups = [[] for _ in range(num_cpu)] 518 i = 0 519 for warning, link in warning_data.items(): 520 warning_groups[i].append((warning, link)) 521 i = (i + 1) % num_cpu 522 arg_groups = [[] for _ in range(num_cpu)] 523 for i, group in enumerate(warning_groups): 524 arg_groups[i] = [{ 525 'group': group, 526 'project_patterns': project_patterns, 527 'warn_patterns': warn_patterns, 528 'num_processes': num_cpu 529 }] 530 531 group_results = create_launch_subprocs_fn(num_cpu, 532 classify_warnings_fn, 533 arg_groups, 534 group_results) 535 else: 536 group_results = [] 537 for warning, link in warning_data.items(): 538 classify_one_warning(warning, link, group_results, 539 project_patterns, warn_patterns) 540 group_results = [group_results] 541 542 warning_messages = [] 543 warning_links = [] 544 warning_records = [] 545 if use_google3: 546 group_results = [group_results] 547 for group_result in group_results: 548 for result in group_result: 549 for line, link, pattern_idx, project_idx in result: 550 pattern = warn_patterns[pattern_idx] 551 pattern['members'].append(line) 552 message_idx = len(warning_messages) 553 warning_messages.append(line) 554 link_idx = len(warning_links) 555 warning_links.append(link) 556 warning_records.append([pattern_idx, project_idx, message_idx, 557 link_idx]) 558 pname = '???' if project_idx < 0 else project_names[project_idx] 559 # Count warnings by project. 560 if pname in pattern['projects']: 561 pattern['projects'][pname] += 1 562 else: 563 pattern['projects'][pname] = 1 564 return warning_messages, warning_links, warning_records 565 566 567def process_log(logfile, flags, project_names, project_patterns, warn_patterns, 568 html_path, use_google3, create_launch_subprocs_fn, 569 classify_warnings_fn, logfile_object): 570 # pylint does not recognize g-doc-* 571 # pylint: disable=bad-option-value,g-doc-args 572 # pylint: disable=bad-option-value,g-doc-return-or-yield 573 # pylint: disable=too-many-arguments,too-many-locals 574 """Function that handles processing of a log. 575 576 This is isolated into its own function (rather than just taking place in main) 577 so that it can be used by both warn.py and the borg job process_gs_logs.py, to 578 avoid duplication of code. 579 Note that if the arguments to this function change, process_gs_logs.py must 580 be updated accordingly. 581 """ 582 if logfile_object is None: 583 with io.open(logfile, encoding='utf-8') as log: 584 warning_lines_and_links, header_str = parse_input_file(log, flags) 585 else: 586 warning_lines_and_links, header_str = parse_input_file( 587 logfile_object, flags) 588 warning_messages, warning_links, warning_records = parallel_classify_warnings( 589 warning_lines_and_links, flags, project_names, project_patterns, 590 warn_patterns, use_google3, create_launch_subprocs_fn, 591 classify_warnings_fn) 592 593 html_writer.write_html(flags, project_names, warn_patterns, html_path, 594 warning_messages, warning_links, warning_records, 595 header_str) 596 597 return warning_messages, warning_links, warning_records, header_str 598 599 600def common_main(use_google3, create_launch_subprocs_fn, classify_warnings_fn, 601 logfile_object=None): 602 """Shared main function for Google3 and non-Google3 versions of warn.py.""" 603 flags = parse_args(use_google3) 604 warn_patterns = get_warn_patterns(flags.platform) 605 project_list = get_project_list(flags.platform) 606 607 project_names = get_project_names(project_list) 608 project_patterns = [re.compile(p[1]) for p in project_list] 609 610 # html_path=None because we output html below if not outputting CSV 611 warning_messages, warning_links, warning_records, header_str = process_log( 612 logfile=flags.log, flags=flags, project_names=project_names, 613 project_patterns=project_patterns, warn_patterns=warn_patterns, 614 html_path=None, use_google3=use_google3, 615 create_launch_subprocs_fn=create_launch_subprocs_fn, 616 classify_warnings_fn=classify_warnings_fn, 617 logfile_object=logfile_object) 618 619 html_writer.write_out_csv(flags, warn_patterns, warning_messages, 620 warning_links, warning_records, header_str, 621 project_names) 622 623 # Return these values, so that caller can use them, if desired. 624 return flags, warning_messages, warning_records, warn_patterns 625