• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# python3
2# Copyright (C) 2019 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16"""Grep warnings messages and output HTML tables or warning counts in CSV.
17
18Default is to output warnings in HTML tables grouped by warning severity.
19Use option --byproject to output tables grouped by source file projects.
20Use option --gencsv to output warning counts in CSV format.
21
22Default input file is build.log, which can be changed with the --log flag.
23"""
24
25# List of important data structures and functions in this script.
26#
27# To parse and keep warning message in the input file:
28#   severity:                classification of message severity
29#   warn_patterns:
30#   warn_patterns[w]['category']     tool that issued the warning, not used now
31#   warn_patterns[w]['description']  table heading
32#   warn_patterns[w]['members']      matched warnings from input
33#   warn_patterns[w]['patterns']     regular expressions to match warnings
34#   warn_patterns[w]['projects'][p]  number of warnings of pattern w in p
35#   warn_patterns[w]['severity']     severity tuple
36#   project_list[p][0]               project name
37#   project_list[p][1]               regular expression to match a project path
38#   project_patterns[p]              re.compile(project_list[p][1])
39#   project_names[p]                 project_list[p][0]
40#   warning_messages     array of each warning message, without source url
41#   warning_links        array of each warning code search link; for 'chrome'
42#   warning_records      array of [idx to warn_patterns,
43#                                  idx to project_names,
44#                                  idx to warning_messages,
45#                                  idx to warning_links]
46#   parse_input_file
47#
48import argparse
49import io
50import multiprocessing
51import os
52import re
53import sys
54
55# pylint:disable=relative-beyond-top-level,no-name-in-module
56# suppress false positive of no-name-in-module warnings
57from . import android_project_list
58from . import chrome_project_list
59from . import cpp_warn_patterns as cpp_patterns
60from . import html_writer
61from . import java_warn_patterns as java_patterns
62from . import make_warn_patterns as make_patterns
63from . import other_warn_patterns as other_patterns
64from . import tidy_warn_patterns as tidy_patterns
65
66
67def parse_args(use_google3):
68  """Define and parse the args. Return the parse_args() result."""
69  parser = argparse.ArgumentParser(
70      description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
71  parser.add_argument('--capacitor_path', default='',
72                      help='Save capacitor warning file to the passed absolute'
73                      ' path')
74  # csvpath has a different naming than the above path because historically the
75  # original Android script used csvpath, so other scripts rely on it
76  parser.add_argument('--csvpath', default='',
77                      help='Save CSV warning file to the passed path')
78  parser.add_argument('--gencsv', action='store_true',
79                      help='Generate CSV file with number of various warnings')
80  parser.add_argument('--csvwithdescription', default='',
81                      help="""Save CSV warning file to the passed path this csv
82                            will contain all the warning descriptions""")
83  parser.add_argument('--byproject', action='store_true',
84                      help='Separate warnings in HTML output by project names')
85  parser.add_argument('--url', default='',
86                      help='Root URL of an Android source code tree prefixed '
87                      'before files in warnings')
88  parser.add_argument('--separator', default='?l=',
89                      help='Separator between the end of a URL and the line '
90                      'number argument. e.g. #')
91  parser.add_argument('--processes', default=multiprocessing.cpu_count(),
92                      type=int,
93                      help='Number of parallel processes to process warnings')
94  # Old Android build scripts call warn.py without --platform,
95  # so the default platform is set to 'android'.
96  parser.add_argument('--platform', default='android',
97                      choices=['chrome', 'android'],
98                      help='Platform of the build log')
99  # Old Android build scripts call warn.py with only a build.log file path.
100  parser.add_argument('--log', help='Path to build log file')
101  parser.add_argument(dest='buildlog', metavar='build.log',
102                      default='build.log', nargs='?',
103                      help='Path to build.log file')
104  flags = parser.parse_args()
105  if not flags.log:
106    flags.log = flags.buildlog
107  if not use_google3 and not os.path.exists(flags.log):
108    sys.exit('Cannot find log file: ' + flags.log)
109  return flags
110
111
112def get_project_names(project_list):
113  """Get project_names from project_list."""
114  return [p[0] for p in project_list]
115
116
117def find_project_index(line, project_patterns):
118  """Return the index to the project pattern array."""
119  for idx, pattern in enumerate(project_patterns):
120    if pattern.match(line):
121      return idx
122  return -1
123
124
125def classify_one_warning(warning, link, results, project_patterns,
126                         warn_patterns):
127  """Classify one warning line."""
128  for idx, pattern in enumerate(warn_patterns):
129    for cpat in pattern['compiled_patterns']:
130      if cpat.match(warning):
131        project_idx = find_project_index(warning, project_patterns)
132        results.append([warning, link, idx, project_idx])
133        return
134  # If we end up here, there was a problem parsing the log
135  # probably caused by 'make -j' mixing the output from
136  # 2 or more concurrent compiles
137
138
139def remove_prefix(src, sub):
140  """Remove everything before last occurrence of substring sub in string src."""
141  if sub in src:
142    inc_sub = src.rfind(sub)
143    return src[inc_sub:]
144  return src
145
146
147# TODO(emmavukelj): Don't have any generate_*_cs_link functions call
148# normalize_path a second time (the first time being in parse_input_file)
149def generate_cs_link(warning_line, flags, android_root=None):
150  """Try to add code search HTTP URL prefix."""
151  if flags.platform == 'chrome':
152    return generate_chrome_cs_link(warning_line, flags)
153  if flags.platform == 'android':
154    return generate_android_cs_link(warning_line, flags, android_root)
155  return 'https://cs.corp.google.com/'
156
157
158def generate_android_cs_link(warning_line, flags, android_root):
159  """Generate the code search link for a warning line in Android."""
160  # max_splits=2 -> only 3 items
161  raw_path, line_number_str, _ = warning_line.split(':', 2)
162  normalized_path = normalize_path(raw_path, flags, android_root)
163  if not flags.url:
164    return normalized_path
165  link_path = flags.url + '/' + normalized_path
166  if line_number_str.isdigit():
167    link_path += flags.separator + line_number_str
168  return link_path
169
170
171def generate_chrome_cs_link(warning_line, flags):
172  """Generate the code search link for a warning line in Chrome."""
173  split_line = warning_line.split(':')
174  raw_path = split_line[0]
175  normalized_path = normalize_path(raw_path, flags)
176  link_base = 'https://cs.chromium.org/'
177  link_add = 'chromium'
178  link_path = None
179
180  # Basically just going through a few specific directory cases and specifying
181  # the proper behavior for that case. This list of cases was accumulated
182  # through trial and error manually going through the warnings.
183  #
184  # This code pattern of using case-specific "if"s instead of "elif"s looks
185  # possibly accidental and mistaken but it is intentional because some paths
186  # fall under several cases (e.g. third_party/lib/nghttp2_frame.c) and for
187  # those we want the most specific case to be applied. If there is reliable
188  # knowledge of exactly where these occur, this could be changed to "elif"s
189  # but there is no reliable set of paths falling under multiple cases at the
190  # moment.
191  if '/src/third_party' in raw_path:
192    link_path = remove_prefix(raw_path, '/src/third_party/')
193  if '/chrome_root/src_internal/' in raw_path:
194    link_path = remove_prefix(raw_path, '/chrome_root/src_internal/')
195    link_path = link_path[len('/chrome_root'):]  # remove chrome_root
196  if '/chrome_root/src/' in raw_path:
197    link_path = remove_prefix(raw_path, '/chrome_root/src/')
198    link_path = link_path[len('/chrome_root'):]  # remove chrome_root
199  if '/libassistant/' in raw_path:
200    link_add = 'eureka_internal/chromium/src'
201    link_base = 'https://cs.corp.google.com/'  # internal data
202    link_path = remove_prefix(normalized_path, '/libassistant/')
203  if raw_path.startswith('gen/'):
204    link_path = '/src/out/Debug/gen/' + normalized_path
205  if '/gen/' in raw_path:
206    return '%s?q=file:%s' % (link_base, remove_prefix(normalized_path, '/gen/'))
207
208  if not link_path and (raw_path.startswith('src/') or
209                        raw_path.startswith('src_internal/')):
210    link_path = '/%s' % raw_path
211
212  if not link_path:  # can't find specific link, send a query
213    return '%s?q=file:%s' % (link_base, normalized_path)
214
215  line_number = int(split_line[1])
216  link = '%s%s%s?l=%d' % (link_base, link_add, link_path, line_number)
217  return link
218
219
220def find_warn_py_and_android_root(path):
221  """Return android source root path if warn.py is found."""
222  parts = path.split('/')
223  for idx in reversed(range(2, len(parts))):
224    root_path = '/'.join(parts[:idx])
225    # Android root directory should contain this script.
226    if os.path.exists(root_path + '/build/make/tools/warn.py'):
227      return root_path
228  return ''
229
230
231def find_android_root(buildlog):
232  """Guess android source root from common prefix of file paths."""
233  # Use the longest common prefix of the absolute file paths
234  # of the first 10000 warning messages as the android_root.
235  warning_lines = []
236  warning_pattern = re.compile('^/[^ ]*/[^ ]*: warning: .*')
237  count = 0
238  for line in buildlog:
239    # We want to find android_root of a local build machine.
240    # Do not use RBE warning lines, which has '/b/f/w/' path prefix.
241    # Do not use /tmp/ file warnings.
242    if warning_pattern.match(line) and (
243        '/b/f/w' not in line and not line.startswith('/tmp/')):
244      warning_lines.append(line)
245      count += 1
246      if count > 9999:
247        break
248      # Try to find warn.py and use its location to find
249      # the source tree root.
250      if count < 100:
251        path = os.path.normpath(re.sub(':.*$', '', line))
252        android_root = find_warn_py_and_android_root(path)
253        if android_root:
254          return android_root
255  # Do not use common prefix of a small number of paths.
256  if count > 10:
257    # pytype: disable=wrong-arg-types
258    root_path = os.path.commonprefix(warning_lines)
259    # pytype: enable=wrong-arg-types
260    if len(root_path) > 2 and root_path[len(root_path) - 1] == '/':
261      return root_path[:-1]
262  return ''
263
264
265def remove_android_root_prefix(path, android_root):
266  """Remove android_root prefix from path if it is found."""
267  if path.startswith(android_root):
268    return path[1 + len(android_root):]
269  return path
270
271
272def normalize_path(path, flags, android_root=None):
273  """Normalize file path relative to src/ or src-internal/ directory."""
274  path = os.path.normpath(path)
275
276  if flags.platform == 'android':
277    if android_root:
278      return remove_android_root_prefix(path, android_root)
279    return path
280
281  # Remove known prefix of root path and normalize the suffix.
282  idx = path.find('chrome_root/')
283  if idx >= 0:
284    # remove chrome_root/, we want path relative to that
285    return path[idx + len('chrome_root/'):]
286  return path
287
288
289def normalize_warning_line(line, flags, android_root=None):
290  """Normalize file path relative to src directory in a warning line."""
291  line = re.sub(u'[\u2018\u2019]', '\'', line)
292  # replace non-ASCII chars to spaces
293  line = re.sub(u'[^\x00-\x7f]', ' ', line)
294  line = line.strip()
295  first_column = line.find(':')
296  return normalize_path(line[:first_column], flags,
297                        android_root) + line[first_column:]
298
299
300def parse_input_file_chrome(infile, flags):
301  """Parse Chrome input file, collect parameters and warning lines."""
302  platform_version = 'unknown'
303  board_name = 'unknown'
304  architecture = 'unknown'
305
306  # only handle warning lines of format 'file_path:line_no:col_no: warning: ...'
307  # Bug: http://198657613, This might need change to handle RBE output.
308  chrome_warning_pattern = r'^[^ ]*/[^ ]*:[0-9]+:[0-9]+: warning: .*'
309
310  warning_pattern = re.compile(chrome_warning_pattern)
311
312  # Collect all unique warning lines
313  # Remove the duplicated warnings save ~8% of time when parsing
314  # one typical build log than before
315  unique_warnings = dict()
316  for line in infile:
317    if warning_pattern.match(line):
318      normalized_line = normalize_warning_line(line, flags)
319      if normalized_line not in unique_warnings:
320        unique_warnings[normalized_line] = generate_cs_link(line, flags)
321    elif (platform_version == 'unknown' or board_name == 'unknown' or
322          architecture == 'unknown'):
323      result = re.match(r'.+Package:.+chromeos-base/chromeos-chrome-', line)
324      if result is not None:
325        platform_version = 'R' + line.split('chrome-')[1].split('_')[0]
326        continue
327      result = re.match(r'.+Source\sunpacked\sin\s(.+)', line)
328      if result is not None:
329        board_name = result.group(1).split('/')[2]
330        continue
331      result = re.match(r'.+USE:\s*([^\s]*).*', line)
332      if result is not None:
333        architecture = result.group(1)
334        continue
335
336  header_str = '%s - %s - %s' % (platform_version, board_name, architecture)
337  return unique_warnings, header_str
338
339
340def add_normalized_line_to_warnings(line, flags, android_root, unique_warnings):
341  """Parse/normalize path, updating warning line and add to warnings dict."""
342  normalized_line = normalize_warning_line(line, flags, android_root)
343  if normalized_line not in unique_warnings:
344    unique_warnings[normalized_line] = generate_cs_link(line, flags,
345                                                        android_root)
346  return unique_warnings
347
348
349def parse_input_file_android(infile, flags):
350  """Parse Android input file, collect parameters and warning lines."""
351  # pylint:disable=too-many-locals,too-many-branches
352  platform_version = 'unknown'
353  target_product = 'unknown'
354  target_variant = 'unknown'
355  build_id = 'unknown'
356  use_rbe = False
357  android_root = find_android_root(infile)
358  infile.seek(0)
359
360  # rustc warning messages have two lines that should be combined:
361  #     warning: description
362  #        --> file_path:line_number:column_number
363  # Some warning messages have no file name:
364  #     warning: macro replacement list ... [bugprone-macro-parentheses]
365  # Some makefile warning messages have no line number:
366  #     some/path/file.mk: warning: description
367  # C/C++ compiler warning messages have line and column numbers:
368  #     some/path/file.c:line_number:column_number: warning: description
369  warning_pattern = re.compile('(^[^ ]*/[^ ]*: warning: .*)|(^warning: .*)')
370  warning_without_file = re.compile('^warning: .*')
371  rustc_file_position = re.compile('^[ ]+--> [^ ]*/[^ ]*:[0-9]+:[0-9]+')
372
373  # If RBE was used, try to reclaim some warning lines mixed with some
374  # leading chars from other concurrent job's stderr output .
375  # The leading characters can be any character, including digits and spaces.
376  # It's impossible to correctly identify the starting point of the source
377  # file path without the file directory name knowledge.
378  # Here we can only be sure to recover lines containing "/b/f/w/".
379  rbe_warning_pattern = re.compile('.*/b/f/w/[^ ]*: warning: .*')
380
381   # Collect all unique warning lines
382  # Remove the duplicated warnings save ~8% of time when parsing
383  # one typical build log than before
384  unique_warnings = dict()
385  line_counter = 0
386  prev_warning = ''
387  for line in infile:
388    if prev_warning:
389      if rustc_file_position.match(line):
390        # must be a rustc warning, combine 2 lines into one warning
391        line = line.strip().replace('--> ', '') + ': ' + prev_warning
392        unique_warnings = add_normalized_line_to_warnings(
393            line, flags, android_root, unique_warnings)
394        prev_warning = ''
395        continue
396      # add prev_warning, and then process the current line
397      prev_warning = 'unknown_source_file: ' + prev_warning
398      unique_warnings = add_normalized_line_to_warnings(
399          prev_warning, flags, android_root, unique_warnings)
400      prev_warning = ''
401
402    if use_rbe and rbe_warning_pattern.match(line):
403      cleaned_up_line = re.sub('.*/b/f/w/', '', line)
404      unique_warnings = add_normalized_line_to_warnings(
405          cleaned_up_line, flags, android_root, unique_warnings)
406      continue
407
408    if warning_pattern.match(line):
409      if warning_without_file.match(line):
410        # save this line and combine it with the next line
411        prev_warning = line
412      else:
413        unique_warnings = add_normalized_line_to_warnings(
414            line, flags, android_root, unique_warnings)
415      continue
416
417    if line_counter < 100:
418      # save a little bit of time by only doing this for the first few lines
419      line_counter += 1
420      result = re.search('(?<=^PLATFORM_VERSION=).*', line)
421      if result is not None:
422        platform_version = result.group(0)
423        continue
424      result = re.search('(?<=^TARGET_PRODUCT=).*', line)
425      if result is not None:
426        target_product = result.group(0)
427        continue
428      result = re.search('(?<=^TARGET_BUILD_VARIANT=).*', line)
429      if result is not None:
430        target_variant = result.group(0)
431        continue
432      result = re.search('(?<=^BUILD_ID=).*', line)
433      if result is not None:
434        build_id = result.group(0)
435        continue
436      result = re.search('(?<=^TOP=).*', line)
437      if result is not None:
438        android_root = result.group(1)
439        continue
440      if re.search('USE_RBE=', line) is not None:
441        use_rbe = True
442        continue
443
444  if android_root:
445    new_unique_warnings = dict()
446    for warning_line in unique_warnings:
447      normalized_line = normalize_warning_line(warning_line, flags,
448                                               android_root)
449      new_unique_warnings[normalized_line] = generate_android_cs_link(
450          warning_line, flags, android_root)
451    unique_warnings = new_unique_warnings
452
453  header_str = '%s - %s - %s (%s)' % (
454      platform_version, target_product, target_variant, build_id)
455  return unique_warnings, header_str
456
457
458def parse_input_file(infile, flags):
459  """Parse one input file for chrome or android."""
460  if flags.platform == 'chrome':
461    return parse_input_file_chrome(infile, flags)
462  if flags.platform == 'android':
463    return parse_input_file_android(infile, flags)
464  raise RuntimeError('parse_input_file not defined for platform %s' %
465                     flags.platform)
466
467
468def parse_compiler_output(compiler_output):
469  """Parse compiler output for relevant info."""
470  split_output = compiler_output.split(':', 3)  # 3 = max splits
471  file_path = split_output[0]
472  line_number = int(split_output[1])
473  col_number = int(split_output[2].split(' ')[0])
474  warning_message = split_output[3]
475  return file_path, line_number, col_number, warning_message
476
477
478def get_warn_patterns(platform):
479  """Get and initialize warn_patterns."""
480  warn_patterns = []
481  if platform == 'chrome':
482    warn_patterns = cpp_patterns.warn_patterns
483  elif platform == 'android':
484    warn_patterns = (make_patterns.warn_patterns + cpp_patterns.warn_patterns +
485                     java_patterns.warn_patterns + tidy_patterns.warn_patterns +
486                     other_patterns.warn_patterns)
487  else:
488    raise Exception('platform name %s is not valid' % platform)
489  for pattern in warn_patterns:
490    pattern['members'] = []
491    # Each warning pattern has a 'projects' dictionary, that
492    # maps a project name to number of warnings in that project.
493    pattern['projects'] = {}
494  return warn_patterns
495
496
497def get_project_list(platform):
498  """Return project list for appropriate platform."""
499  if platform == 'chrome':
500    return chrome_project_list.project_list
501  if platform == 'android':
502    return android_project_list.project_list
503  raise Exception('platform name %s is not valid' % platform)
504
505
506def parallel_classify_warnings(warning_data, args, project_names,
507                               project_patterns, warn_patterns,
508                               use_google3, create_launch_subprocs_fn,
509                               classify_warnings_fn):
510  """Classify all warning lines with num_cpu parallel processes."""
511  # pylint:disable=too-many-arguments,too-many-locals
512  num_cpu = args.processes
513  group_results = []
514
515  if num_cpu > 1:
516    # set up parallel processing for this...
517    warning_groups = [[] for _ in range(num_cpu)]
518    i = 0
519    for warning, link in warning_data.items():
520      warning_groups[i].append((warning, link))
521      i = (i + 1) % num_cpu
522    arg_groups = [[] for _ in range(num_cpu)]
523    for i, group in enumerate(warning_groups):
524      arg_groups[i] = [{
525          'group': group,
526          'project_patterns': project_patterns,
527          'warn_patterns': warn_patterns,
528          'num_processes': num_cpu
529      }]
530
531    group_results = create_launch_subprocs_fn(num_cpu,
532                                              classify_warnings_fn,
533                                              arg_groups,
534                                              group_results)
535  else:
536    group_results = []
537    for warning, link in warning_data.items():
538      classify_one_warning(warning, link, group_results,
539                           project_patterns, warn_patterns)
540    group_results = [group_results]
541
542  warning_messages = []
543  warning_links = []
544  warning_records = []
545  if use_google3:
546    group_results = [group_results]
547  for group_result in group_results:
548    for result in group_result:
549      for line, link, pattern_idx, project_idx in result:
550        pattern = warn_patterns[pattern_idx]
551        pattern['members'].append(line)
552        message_idx = len(warning_messages)
553        warning_messages.append(line)
554        link_idx = len(warning_links)
555        warning_links.append(link)
556        warning_records.append([pattern_idx, project_idx, message_idx,
557                                link_idx])
558        pname = '???' if project_idx < 0 else project_names[project_idx]
559        # Count warnings by project.
560        if pname in pattern['projects']:
561          pattern['projects'][pname] += 1
562        else:
563          pattern['projects'][pname] = 1
564  return warning_messages, warning_links, warning_records
565
566
567def process_log(logfile, flags, project_names, project_patterns, warn_patterns,
568                html_path, use_google3, create_launch_subprocs_fn,
569                classify_warnings_fn, logfile_object):
570  # pylint does not recognize g-doc-*
571  # pylint: disable=bad-option-value,g-doc-args
572  # pylint: disable=bad-option-value,g-doc-return-or-yield
573  # pylint: disable=too-many-arguments,too-many-locals
574  """Function that handles processing of a log.
575
576  This is isolated into its own function (rather than just taking place in main)
577  so that it can be used by both warn.py and the borg job process_gs_logs.py, to
578  avoid duplication of code.
579  Note that if the arguments to this function change, process_gs_logs.py must
580  be updated accordingly.
581  """
582  if logfile_object is None:
583    with io.open(logfile, encoding='utf-8') as log:
584      warning_lines_and_links, header_str = parse_input_file(log, flags)
585  else:
586    warning_lines_and_links, header_str = parse_input_file(
587        logfile_object, flags)
588  warning_messages, warning_links, warning_records = parallel_classify_warnings(
589      warning_lines_and_links, flags, project_names, project_patterns,
590      warn_patterns, use_google3, create_launch_subprocs_fn,
591      classify_warnings_fn)
592
593  html_writer.write_html(flags, project_names, warn_patterns, html_path,
594                         warning_messages, warning_links, warning_records,
595                         header_str)
596
597  return warning_messages, warning_links, warning_records, header_str
598
599
600def common_main(use_google3, create_launch_subprocs_fn, classify_warnings_fn,
601                logfile_object=None):
602  """Shared main function for Google3 and non-Google3 versions of warn.py."""
603  flags = parse_args(use_google3)
604  warn_patterns = get_warn_patterns(flags.platform)
605  project_list = get_project_list(flags.platform)
606
607  project_names = get_project_names(project_list)
608  project_patterns = [re.compile(p[1]) for p in project_list]
609
610  # html_path=None because we output html below if not outputting CSV
611  warning_messages, warning_links, warning_records, header_str = process_log(
612      logfile=flags.log, flags=flags, project_names=project_names,
613      project_patterns=project_patterns, warn_patterns=warn_patterns,
614      html_path=None, use_google3=use_google3,
615      create_launch_subprocs_fn=create_launch_subprocs_fn,
616      classify_warnings_fn=classify_warnings_fn,
617      logfile_object=logfile_object)
618
619  html_writer.write_out_csv(flags, warn_patterns, warning_messages,
620                            warning_links, warning_records, header_str,
621                            project_names)
622
623  # Return these values, so that caller can use them, if desired.
624  return flags, warning_messages, warning_records, warn_patterns
625