• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (c) 2014 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import os
6from threading import Lock
7
8import blame
9from common import utils
10import component_dictionary
11import crash_utils
12import git_repository_parser
13import match_set
14import svn_repository_parser
15
16
17LINE_CHANGE_PRIORITY = 1
18FILE_CHANGE_PRIORITY = 2
19_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
20CONFIG = crash_utils.ParseURLsFromConfig(os.path.join(_THIS_DIR,
21                                                      'config.ini'))
22
23
24def GenerateMatchEntry(
25    matches, revision_info, revision_number, file_path, function,
26    component_path, component_name, crashed_line_numbers, stack_frame_indices,
27    file_change_type, repository_parser):
28  """Generates a match object and adds it to the match set.
29
30  Args:
31    matches: A matchset object, a map from CL to a match object.
32    revision_info: The revision information, a map from fields (message,
33                   changed files, etc) to its values.
34    revision_number: The SVN revision number or git hash.
35    file_path: The path of the file.
36    function: The function that caused an crash.
37    component_path: The path of the component this file is from.
38    component_name: The name of the component the file is from.
39    crashed_line_numbers: The list of the lines in the file that caused
40                          the crash.
41    stack_frame_indices: The list of positions of this file within a stack.
42    file_change_type: Whether file is modified, added or deleted.
43    repository_parser: The parser object to parse line diff.
44  """
45  # Check if this CL should be ignored.
46  with matches.matches_lock:
47    if revision_number in matches.cls_to_ignore:
48      return
49
50    # If this CL is not already identified as suspected, create a new entry.
51    if revision_number not in matches.matches:
52      match = match_set.Match(revision_info, component_name)
53      message = revision_info['message']
54      # TODO(jeun): Don't hold lock while issuing http request.
55      match.ParseMessage(message, matches.codereview_api_url)
56
57      # If this match is a revert, add to the set of CLs to be ignored.
58      if match.is_revert:
59        matches.cls_to_ignore.add(revision_number)
60
61        # If this match has info on what CL it is reverted from, add that CL.
62        if match.revert_of:
63          matches.cls_to_ignore.add(match.revert_of)
64
65        return
66
67      matches.matches[revision_number] = match
68
69    else:
70      match = matches.matches[revision_number]
71
72  (diff_url, changed_line_numbers, changed_line_contents) = (
73      repository_parser.ParseLineDiff(
74          file_path, component_path, file_change_type, revision_number))
75
76  # Ignore this match if the component is not supported for svn.
77  if not diff_url:
78    return
79
80  # Find the intersection between the lines that this file crashed on and
81  # the changed lines.
82  (line_number_intersection, stack_frame_index_intersection, functions) = (
83      crash_utils.Intersection(
84          crashed_line_numbers, stack_frame_indices, changed_line_numbers,
85          function))
86
87  # Find the minimum distance between the changed lines and crashed lines.
88  (min_distance, min_crashed_line, min_changed_line) = \
89      crash_utils.FindMinLineDistance(crashed_line_numbers,
90                                      changed_line_numbers)
91
92  # Check whether this CL changes the crashed lines or not.
93  if line_number_intersection:
94    priority = LINE_CHANGE_PRIORITY
95  else:
96    priority = FILE_CHANGE_PRIORITY
97
98  # Add the parsed information to the object.
99  with matches.matches_lock:
100    match.crashed_line_numbers.append(line_number_intersection)
101
102    file_name = file_path.split('/')[-1]
103    match.changed_files.append(file_name)
104
105    # Update the min distance only if it is less than the current one.
106    if min_distance < match.min_distance:
107      match.min_distance = min_distance
108      match.min_distance_info = (file_name, min_crashed_line, min_changed_line)
109
110    # If this CL does not change the crashed line, all occurrence of this
111    # file in the stack has the same priority.
112    if not stack_frame_index_intersection:
113      stack_frame_index_intersection = stack_frame_indices
114      functions = function
115    match.stack_frame_indices.append(stack_frame_index_intersection)
116    match.changed_file_urls.append(diff_url)
117    match.priorities.append(priority)
118    match.function_list.append(functions)
119
120
121def FindMatch(revisions_info_map, file_to_revision_info, file_to_crash_info,
122              component_path, component_name, repository_parser,
123              codereview_api_url):
124  """Finds a CL that modifies file in the stacktrace.
125
126  Args:
127    revisions_info_map: A dictionary mapping revision number to the CL
128                        information.
129    file_to_revision_info: A dictionary mapping file to the revision that
130                           modifies it.
131    file_to_crash_info: A dictionary mapping file to its occurrence in
132                       stacktrace.
133    component_path: The path of the component to search for.
134    component_name: The name of the component to search for.
135    repository_parser: The parser object to parse the line diff.
136    codereview_api_url: A code review url to retrieve data from.
137
138  Returns:
139    Matches, a set of match objects.
140  """
141  matches = match_set.MatchSet(codereview_api_url)
142
143  tasks = []
144  # Iterate through the crashed files in the stacktrace.
145  for crashed_file_path in file_to_crash_info:
146    # Ignore header file.
147    if crashed_file_path.endswith('.h'):
148      continue
149
150    # If the file in the stacktrace is not changed in any commits, continue.
151    for changed_file_path in file_to_revision_info:
152      changed_file_name = changed_file_path.split('/')[-1].lower()
153      crashed_file_name = crashed_file_path.split('/')[-1].lower()
154      if changed_file_name != crashed_file_name:
155        continue
156
157      if not crash_utils.GuessIfSameSubPath(
158          changed_file_path.lower(), crashed_file_path.lower()):
159        continue
160
161      crashed_line_numbers = file_to_crash_info.GetCrashedLineNumbers(
162          crashed_file_path)
163      stack_frame_nums = file_to_crash_info.GetCrashStackFrameIndices(
164          crashed_file_path)
165      functions = file_to_crash_info.GetCrashFunctions(crashed_file_path)
166
167      # Iterate through the CLs that this file path is changed.
168      for (cl, file_change_type) in file_to_revision_info[changed_file_path]:
169        # If the file change is delete, ignore this CL.
170        if file_change_type == 'D':
171          continue
172
173        revision = revisions_info_map[cl]
174
175        tasks.append({
176            'function': GenerateMatchEntry,
177            'args':[matches, revision, cl, changed_file_path, functions,
178                    component_path, component_name, crashed_line_numbers,
179                    stack_frame_nums, file_change_type,
180                   repository_parser]})
181
182  # Run all the tasks.
183  crash_utils.RunTasks(tasks)
184
185  matches.RemoveRevertedCLs()
186
187  return matches
188
189
190def FindMatchForComponent(component_path, file_to_crash_info, changelog,
191                          callstack_priority, results, results_lock):
192  """Parses changelog and finds suspected CLs for a given component.
193
194  Args:
195    component_path: The path of component to look for the culprit CL.
196    file_to_crash_info: A dictionary mapping file to its occurrence in
197                        stackframe.
198    changelog: The parsed changelog for this component.
199    callstack_priority: The priority of this call stack, 0 if from crash stack,
200                        1 if from freed, 2 if from previously allocated.
201    results: A dictionary to store the result.
202    results_lock: A lock that guards results.
203  """
204  (repository_parser, component_name, revisions, file_to_revision_map) = \
205      changelog
206
207  # Find match for this component.
208  codereview_api_url = CONFIG['codereview']['review_url']
209  component_result = FindMatch(
210      revisions, file_to_revision_map, file_to_crash_info, component_path,
211      component_name, repository_parser, codereview_api_url)
212  matches = component_result.matches
213
214  # For all the match results in a dictionary, add to the list so that it
215  # can be sorted.
216  with results_lock:
217    for cl in matches:
218      match = matches[cl]
219      results.append((callstack_priority, cl, match))
220
221
222def FindMatchForCallstack(
223    callstack, components, component_to_changelog_map, results,
224    results_lock):
225  """Finds culprit cl for a stack within a stacktrace.
226
227  For each components to look for, create new thread that computes the matches
228  and join the results at the end.
229
230  Args:
231    callstack: A callstack in a stacktrace to find the result for.
232    components: A set of components to look for.
233    component_to_changelog_map: A map from component to its parsed changelog.
234    results: A list to aggregrate results from all stacktraces.
235    results_lock: A lock that guards results.
236  """
237  # Create component dictionary from the component and call stack.
238  component_dict = component_dictionary.ComponentDictionary(callstack,
239                                                            components)
240  callstack_priority = callstack.priority
241
242  # Iterate through all components.
243  for component_path in component_dict:
244    # If the component to consider in this callstack is not in the parsed list
245    # of components, ignore this one.
246    if component_path not in component_to_changelog_map:
247      continue
248
249    changelog = component_to_changelog_map[component_path]
250    file_to_crash_info = component_dict.GetFileDict(component_path)
251    FindMatchForComponent(component_path, file_to_crash_info, changelog,
252                          callstack_priority, results, results_lock)
253
254
255def FindMatchForStacktrace(stacktrace, components,
256                           component_to_regression_dict):
257  """Finds the culprit CL for stacktrace.
258
259  The passed stacktrace is either from release build stacktrace
260  or debug build stacktrace.
261
262  Args:
263    stacktrace: A list of parsed stacks within a stacktrace.
264    components: A set of components to look for.
265    component_to_regression_dict: A dictionary mapping component path to
266                                  its regression.
267
268  Returns:
269    A list of match results from all stacks.
270  """
271  # A list to aggregate results from all the callstacks in the stacktrace.
272  results = []
273  results_lock = Lock()
274
275  # Setup parsers.
276  svn_parser = svn_repository_parser.SVNParser(CONFIG['svn'])
277  git_parser = git_repository_parser.GitParser(component_to_regression_dict,
278                                               CONFIG['git'])
279
280  # Create a cache of parsed revisions.
281  component_to_changelog_map = {}
282  for component_path in components:
283    component_object = component_to_regression_dict[component_path]
284    range_start = component_object['old_revision']
285    range_end = component_object['new_revision']
286
287    # If range start is 0, the range is too large and the crash has been
288    # introduced the archived build, so ignore this case.
289    if range_start == '0':
290      continue
291
292    component_name = component_to_regression_dict[component_path]['name']
293
294    is_git = utils.IsGitHash(range_start)
295    if is_git:
296      repository_parser = git_parser
297    else:
298      repository_parser = svn_parser
299
300    (revisions, file_to_revision_map) = repository_parser.ParseChangelog(
301        component_path, range_start, range_end)
302
303    # If the returned map from ParseChangeLog is empty, we don't need to look
304    # further because either the parsing failed or the changelog is empty.
305    if not (revisions and file_to_revision_map):
306      continue
307
308    component_to_changelog_map[component_path] = (repository_parser,
309                                                  component_name,
310                                                  revisions,
311                                                  file_to_revision_map)
312
313  # Analyze each of the call stacks in the stacktrace.
314  for callstack in stacktrace.stack_list:
315    FindMatchForCallstack(callstack, components, component_to_changelog_map,
316                          results, results_lock)
317
318  return results
319
320
321def SortMatchesFunction(match_with_stack_priority):
322  """A function to sort the match triple.
323
324  Currently, it sorts the list by:
325  1) The highest priority file change in the CL (changing crashed line is
326  higher priority than just changing the file).
327  2) The callstack this match is computed (crash stack, freed, allocation).
328  3) The minimum stack frame number of the changed file in the match.
329  4) The number of files this CL changes (higher the better).
330  5) The minimum distance between the lines that the CL changes and crashed
331  lines.
332
333  Args:
334    match_with_stack_priority: A match object, with the CL it is from and what
335                               callstack it is from.
336
337  Returns:
338    A sort key.
339  """
340  (stack_priority, _, match) = match_with_stack_priority
341
342  return (min(match.priorities),
343          stack_priority,
344          match.min_distance,
345          crash_utils.FindMinStackFrameNumber(match.stack_frame_indices,
346                                              match.priorities),
347          -len(match.changed_files))
348
349
350def SortAndFilterMatches(matches, num_important_frames=5):
351  """Filters the list of potential culprit CLs to remove noise.
352
353  Args:
354    matches: A list containing match results.
355    num_important_frames: A number of frames on the top of the frame to Check
356                          for when filtering the results. A match with a file
357                          that is in top num_important_frames of the stacktrace
358                          is regarded more probable then others.
359
360  Returns:
361    Filtered match results.
362  """
363  new_matches = []
364  line_changed = False
365  is_important_frame = False
366  highest_priority_stack = crash_utils.INFINITY
367  matches.sort(key=SortMatchesFunction)
368  # Iterate through the matches to find out what results are significant.
369  for stack_priority, cl, match in matches:
370    # Check if the current match changes crashed line.
371    is_line_change = (min(match.priorities) == LINE_CHANGE_PRIORITY)
372
373    # Check which stack this match is from, and finds the highest priority
374    # callstack up to this point.
375    current_stack = stack_priority
376    if current_stack < highest_priority_stack:
377      highest_priority_stack = current_stack
378
379    # Check if current match changes a file that occurs in crash state.
380    flattened_stack_frame_indices = [frame for frame_indices in
381                                     match.stack_frame_indices
382                                     for frame in frame_indices]
383    current_is_important = (
384        min(flattened_stack_frame_indices) < num_important_frames)
385
386    # This match and anything lower than this should be ignored if:
387    # - Current match does not change crashed lines but there are matches
388    # that do so.
389    # - Current match is not in crash state but there are matches in it.
390    # - There are other matches that came from higher priority stack.
391    if (line_changed and not is_line_change) or (
392        is_important_frame and not current_is_important) or (
393            current_stack > highest_priority_stack):
394      break
395
396    # Update the variables.
397    if is_line_change:
398      line_changed = True
399    if current_is_important:
400      is_important_frame = True
401
402    # Add current match to the filtered result.
403    new_matches.append((stack_priority, cl, match))
404
405  return new_matches
406
407
408def GenerateReasonForMatches(matches):
409  """Generates a reason that a match (CL) is a culprit cl.
410
411  Args:
412    matches: A list of match objects.
413  """
414  # Iterate through the matches in the list.
415  for i, _, match in matches:
416    reason = []
417
418    # Zip the files in the match by the reason they are suspected
419    # (how the file is modified).
420    match_by_priority = zip(
421        match.priorities, match.crashed_line_numbers, match.changed_files,
422        match.stack_frame_indices, match.function_list)
423
424    # Sort the zipped changed files in the match by their priority so that the
425    # changed lines comes first in the reason.
426    match_by_priority.sort(
427        key=lambda (priority, crashed_line_numbers, file_name,
428                    stack_frame_indices, function_list): priority)
429
430    # Iterate through the sorted match.
431    for i in range(len(match_by_priority)):
432      (priority, crashed_line_numbers, file_name, stack_frame_indices,
433       function_list) = match_by_priority[i]
434
435      # If the file in the match is a line change, append a explanation.
436      if priority == LINE_CHANGE_PRIORITY:
437        crashed_line_numbers = [crashed_line_number
438                                for lines in crashed_line_numbers
439                                for crashed_line_number in lines]
440        reason.append(
441            'Lines %s of file %s which potentially caused crash '
442            'are changed in this cl (%s).\n' %
443            (utils.JoinLineNumbers(crashed_line_numbers, accepted_gap=4),
444             file_name,
445             crash_utils.PrettifyFrameInfo(stack_frame_indices, function_list)))
446
447      else:
448        # Get all the files that are not line change.
449        rest_of_the_files = match_by_priority[i:]
450
451        if len(rest_of_the_files) == 1:
452          file_string = 'File %s is changed in this cl '
453        else:
454          file_string = 'Files %s are changed in this cl '
455
456        # Create a list of file names, and prettify the list.
457        file_names = [
458            file_name for (_, _, file_name, _, _) in rest_of_the_files]
459        pretty_file_names = crash_utils.PrettifyList(file_names)
460
461        # Add the reason, break because we took care of the rest of the files.
462        file_string += ('(and is part of stack %s)' %
463            crash_utils.PrettifyFrameInfo(stack_frame_indices, function_list))
464        reason.append(file_string % pretty_file_names)
465        break
466
467    # Set the reason as string.
468    match.reason = '\n'.join(reason)
469
470
471def CombineMatches(matches):
472  """Combine possible duplicates in matches.
473
474  Args:
475    matches: A list of matches object, along with its callstack priority and
476             CL it is from.
477  Returns:
478    A combined list of matches.
479  """
480  combined_matches = []
481
482  for stack_index, cl, match in matches:
483    found_match = None
484
485    # Iterate through the list of combined matches.
486    for _, cl_combined, match_combined in combined_matches:
487      # Check for if current CL is already higher up in the result.
488      if cl == cl_combined:
489        found_match = match_combined
490        break
491
492    # If current match is not already in, add it to the list of matches.
493    if not found_match:
494      combined_matches.append((stack_index, cl, match))
495      continue
496
497    # Combine the reason if the current match is already in there.
498    found_match.reason += match.reason
499    if match.min_distance < found_match.min_distance:
500      found_match.min_distance = match.min_distance
501      found_match.min_distance_info = match.min_distance_info
502
503  for stack_index, cl, match in combined_matches:
504    if match.min_distance_info:
505      file_name, min_crashed_line, min_changed_line = match.min_distance_info
506      match.reason += \
507          ('\nMinimum distance from crash line to modified line: %d. '
508           '(file: %s, crashed on: %d, modified: %d).\n' %
509           (match.min_distance, file_name, min_crashed_line, min_changed_line))
510
511  return combined_matches
512
513
514def FilterAndGenerateReasonForMatches(result):
515  """A wrapper function.
516
517  It generates reasons for the matches and returns string representation
518  of filtered results.
519
520  Args:
521    result: A list of match objects.
522
523  Returns:
524    A string representation of filtered results.
525  """
526  new_result = SortAndFilterMatches(result)
527  GenerateReasonForMatches(new_result)
528  combined_matches = CombineMatches(new_result)
529  return crash_utils.MatchListToResultList(combined_matches)
530
531
532def ParseCrashComponents(main_stack):
533  """Parses the crashing component.
534
535  Crashing components is a component that top_n_frames of the stacktrace is
536  from.
537
538  Args:
539    main_stack: Main stack from the stacktrace.
540
541  Returns:
542    A set of components.
543  """
544  components = set()
545
546  for frame in main_stack.frame_list:
547    components.add(frame.component_path)
548
549  return components
550
551
552def GenerateAndFilterBlameList(callstack, component_to_crash_revision_dict,
553                               component_to_regression_dict):
554  """A wrapper function.
555
556  Finds blame information for stack and returns string representation.
557
558  Args:
559    callstack: A callstack to find the blame information.
560    component_to_crash_revision_dict: A dictionary mapping component to its
561                                      crash revision.
562    component_to_regression_dict: A dictionary mapping component to its
563                                  regression.
564
565  Returns:
566    A list of blame results.
567  """
568  if component_to_regression_dict:
569    parsed_deps = component_to_regression_dict
570  else:
571    parsed_deps = component_to_crash_revision_dict
572
573  # Setup parser objects to use for parsing blame information.
574  svn_parser = svn_repository_parser.SVNParser(CONFIG['svn'])
575  git_parser = git_repository_parser.GitParser(parsed_deps, CONFIG['git'])
576  parsers = {}
577  parsers['svn'] = svn_parser
578  parsers['git'] = git_parser
579
580  # Create and generate the blame objects from the callstack.
581  blame_list = blame.BlameList()
582  blame_list.FindBlame(callstack, component_to_crash_revision_dict,
583                       component_to_regression_dict,
584                       parsers)
585
586  blame_list.FilterAndSortBlameList()
587  return crash_utils.BlameListToResultList(blame_list)
588
589
590def FindItForCrash(stacktrace_list,
591                   callstack,
592                   component_to_regression_dict,
593                   component_to_crash_revision_dict):
594  """Finds the culprit CL from the list of stacktrace.
595
596  Args:
597    stacktrace_list: A list of stacktraces to look for, in the order of
598                     decreasing significance.
599    callstack: A callstack object to show blame information for, if there are
600               no results for all stacktraces in the stacktrace_list.
601    component_to_regression_dict: A parsed regression information as a
602                                  result of parsing DEPS file.
603    component_to_crash_revision_dict: A parsed crash revision information.
604
605  Returns:
606    A list of result objects, with the message how the result is created.
607  """
608  # If regression information is not available, return blame information.
609  if not component_to_regression_dict:
610    result = GenerateAndFilterBlameList(callstack,
611                                        component_to_crash_revision_dict,
612                                        component_to_regression_dict)
613    if result:
614      return_message = (
615          'Regression information is not available. The result is '
616          'the blame information.')
617    else:
618      return_message = ('Findit could not find any suspected CLs.')
619
620    return (return_message, result)
621
622  for stacktrace in stacktrace_list:
623    # Check the next stacktrace if current one is empty.
624    if not stacktrace.stack_list:
625      continue
626
627    # Get the crash stack for this stacktrace, and extract crashing components
628    # from it.
629    main_stack = stacktrace.GetCrashStack()
630    components = ParseCrashComponents(main_stack)
631
632    result_for_stacktrace = FindMatchForStacktrace(
633        stacktrace, components, component_to_regression_dict)
634    filtered_result = FilterAndGenerateReasonForMatches(result_for_stacktrace)
635
636    # If the result is empty, check the next stacktrace. Else, return the
637    # filtered result.
638    if not filtered_result:
639      continue
640
641    return_message = (
642        'The result is a list of CLs that change the crashed files.')
643    return (return_message, filtered_result)
644
645  # If no match is found, return the blame information for the input
646  # callstack.
647  result = GenerateAndFilterBlameList(
648      callstack, component_to_crash_revision_dict,
649      component_to_regression_dict)
650
651  if result:
652    return_message = (
653        'No CL in the regression range changes the crashed files. '
654        'The result is the blame information.')
655
656  # When findit could not find any CL that changes file in stacktrace or if
657  # if cannot get any blame information, return a message saying that no
658  # results are available.
659  else:
660    return_message = ('Findit could not find any suspected CLs.')
661
662  return (return_message, result)
663
664