1# Copyright (c) 2014 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import os 6from threading import Lock 7 8import blame 9from common import utils 10import component_dictionary 11import crash_utils 12import git_repository_parser 13import match_set 14import svn_repository_parser 15 16 17LINE_CHANGE_PRIORITY = 1 18FILE_CHANGE_PRIORITY = 2 19_THIS_DIR = os.path.abspath(os.path.dirname(__file__)) 20CONFIG = crash_utils.ParseURLsFromConfig(os.path.join(_THIS_DIR, 21 'config.ini')) 22 23 24def GenerateMatchEntry( 25 matches, revision_info, revision_number, file_path, function, 26 component_path, component_name, crashed_line_numbers, stack_frame_indices, 27 file_change_type, repository_parser): 28 """Generates a match object and adds it to the match set. 29 30 Args: 31 matches: A matchset object, a map from CL to a match object. 32 revision_info: The revision information, a map from fields (message, 33 changed files, etc) to its values. 34 revision_number: The SVN revision number or git hash. 35 file_path: The path of the file. 36 function: The function that caused an crash. 37 component_path: The path of the component this file is from. 38 component_name: The name of the component the file is from. 39 crashed_line_numbers: The list of the lines in the file that caused 40 the crash. 41 stack_frame_indices: The list of positions of this file within a stack. 42 file_change_type: Whether file is modified, added or deleted. 43 repository_parser: The parser object to parse line diff. 44 """ 45 # Check if this CL should be ignored. 46 with matches.matches_lock: 47 if revision_number in matches.cls_to_ignore: 48 return 49 50 # If this CL is not already identified as suspected, create a new entry. 51 if revision_number not in matches.matches: 52 match = match_set.Match(revision_info, component_name) 53 message = revision_info['message'] 54 # TODO(jeun): Don't hold lock while issuing http request. 55 match.ParseMessage(message, matches.codereview_api_url) 56 57 # If this match is a revert, add to the set of CLs to be ignored. 58 if match.is_revert: 59 matches.cls_to_ignore.add(revision_number) 60 61 # If this match has info on what CL it is reverted from, add that CL. 62 if match.revert_of: 63 matches.cls_to_ignore.add(match.revert_of) 64 65 return 66 67 matches.matches[revision_number] = match 68 69 else: 70 match = matches.matches[revision_number] 71 72 (diff_url, changed_line_numbers, changed_line_contents) = ( 73 repository_parser.ParseLineDiff( 74 file_path, component_path, file_change_type, revision_number)) 75 76 # Ignore this match if the component is not supported for svn. 77 if not diff_url: 78 return 79 80 # Find the intersection between the lines that this file crashed on and 81 # the changed lines. 82 (line_number_intersection, stack_frame_index_intersection, functions) = ( 83 crash_utils.Intersection( 84 crashed_line_numbers, stack_frame_indices, changed_line_numbers, 85 function)) 86 87 # Find the minimum distance between the changed lines and crashed lines. 88 (min_distance, min_crashed_line, min_changed_line) = \ 89 crash_utils.FindMinLineDistance(crashed_line_numbers, 90 changed_line_numbers) 91 92 # Check whether this CL changes the crashed lines or not. 93 if line_number_intersection: 94 priority = LINE_CHANGE_PRIORITY 95 else: 96 priority = FILE_CHANGE_PRIORITY 97 98 # Add the parsed information to the object. 99 with matches.matches_lock: 100 match.crashed_line_numbers.append(line_number_intersection) 101 102 file_name = file_path.split('/')[-1] 103 match.changed_files.append(file_name) 104 105 # Update the min distance only if it is less than the current one. 106 if min_distance < match.min_distance: 107 match.min_distance = min_distance 108 match.min_distance_info = (file_name, min_crashed_line, min_changed_line) 109 110 # If this CL does not change the crashed line, all occurrence of this 111 # file in the stack has the same priority. 112 if not stack_frame_index_intersection: 113 stack_frame_index_intersection = stack_frame_indices 114 functions = function 115 match.stack_frame_indices.append(stack_frame_index_intersection) 116 match.changed_file_urls.append(diff_url) 117 match.priorities.append(priority) 118 match.function_list.append(functions) 119 120 121def FindMatch(revisions_info_map, file_to_revision_info, file_to_crash_info, 122 component_path, component_name, repository_parser, 123 codereview_api_url): 124 """Finds a CL that modifies file in the stacktrace. 125 126 Args: 127 revisions_info_map: A dictionary mapping revision number to the CL 128 information. 129 file_to_revision_info: A dictionary mapping file to the revision that 130 modifies it. 131 file_to_crash_info: A dictionary mapping file to its occurrence in 132 stacktrace. 133 component_path: The path of the component to search for. 134 component_name: The name of the component to search for. 135 repository_parser: The parser object to parse the line diff. 136 codereview_api_url: A code review url to retrieve data from. 137 138 Returns: 139 Matches, a set of match objects. 140 """ 141 matches = match_set.MatchSet(codereview_api_url) 142 143 tasks = [] 144 # Iterate through the crashed files in the stacktrace. 145 for crashed_file_path in file_to_crash_info: 146 # Ignore header file. 147 if crashed_file_path.endswith('.h'): 148 continue 149 150 # If the file in the stacktrace is not changed in any commits, continue. 151 for changed_file_path in file_to_revision_info: 152 changed_file_name = changed_file_path.split('/')[-1].lower() 153 crashed_file_name = crashed_file_path.split('/')[-1].lower() 154 if changed_file_name != crashed_file_name: 155 continue 156 157 if not crash_utils.GuessIfSameSubPath( 158 changed_file_path.lower(), crashed_file_path.lower()): 159 continue 160 161 crashed_line_numbers = file_to_crash_info.GetCrashedLineNumbers( 162 crashed_file_path) 163 stack_frame_nums = file_to_crash_info.GetCrashStackFrameIndices( 164 crashed_file_path) 165 functions = file_to_crash_info.GetCrashFunctions(crashed_file_path) 166 167 # Iterate through the CLs that this file path is changed. 168 for (cl, file_change_type) in file_to_revision_info[changed_file_path]: 169 # If the file change is delete, ignore this CL. 170 if file_change_type == 'D': 171 continue 172 173 revision = revisions_info_map[cl] 174 175 tasks.append({ 176 'function': GenerateMatchEntry, 177 'args':[matches, revision, cl, changed_file_path, functions, 178 component_path, component_name, crashed_line_numbers, 179 stack_frame_nums, file_change_type, 180 repository_parser]}) 181 182 # Run all the tasks. 183 crash_utils.RunTasks(tasks) 184 185 matches.RemoveRevertedCLs() 186 187 return matches 188 189 190def FindMatchForComponent(component_path, file_to_crash_info, changelog, 191 callstack_priority, results, results_lock): 192 """Parses changelog and finds suspected CLs for a given component. 193 194 Args: 195 component_path: The path of component to look for the culprit CL. 196 file_to_crash_info: A dictionary mapping file to its occurrence in 197 stackframe. 198 changelog: The parsed changelog for this component. 199 callstack_priority: The priority of this call stack, 0 if from crash stack, 200 1 if from freed, 2 if from previously allocated. 201 results: A dictionary to store the result. 202 results_lock: A lock that guards results. 203 """ 204 (repository_parser, component_name, revisions, file_to_revision_map) = \ 205 changelog 206 207 # Find match for this component. 208 codereview_api_url = CONFIG['codereview']['review_url'] 209 component_result = FindMatch( 210 revisions, file_to_revision_map, file_to_crash_info, component_path, 211 component_name, repository_parser, codereview_api_url) 212 matches = component_result.matches 213 214 # For all the match results in a dictionary, add to the list so that it 215 # can be sorted. 216 with results_lock: 217 for cl in matches: 218 match = matches[cl] 219 results.append((callstack_priority, cl, match)) 220 221 222def FindMatchForCallstack( 223 callstack, components, component_to_changelog_map, results, 224 results_lock): 225 """Finds culprit cl for a stack within a stacktrace. 226 227 For each components to look for, create new thread that computes the matches 228 and join the results at the end. 229 230 Args: 231 callstack: A callstack in a stacktrace to find the result for. 232 components: A set of components to look for. 233 component_to_changelog_map: A map from component to its parsed changelog. 234 results: A list to aggregrate results from all stacktraces. 235 results_lock: A lock that guards results. 236 """ 237 # Create component dictionary from the component and call stack. 238 component_dict = component_dictionary.ComponentDictionary(callstack, 239 components) 240 callstack_priority = callstack.priority 241 242 # Iterate through all components. 243 for component_path in component_dict: 244 # If the component to consider in this callstack is not in the parsed list 245 # of components, ignore this one. 246 if component_path not in component_to_changelog_map: 247 continue 248 249 changelog = component_to_changelog_map[component_path] 250 file_to_crash_info = component_dict.GetFileDict(component_path) 251 FindMatchForComponent(component_path, file_to_crash_info, changelog, 252 callstack_priority, results, results_lock) 253 254 255def FindMatchForStacktrace(stacktrace, components, 256 component_to_regression_dict): 257 """Finds the culprit CL for stacktrace. 258 259 The passed stacktrace is either from release build stacktrace 260 or debug build stacktrace. 261 262 Args: 263 stacktrace: A list of parsed stacks within a stacktrace. 264 components: A set of components to look for. 265 component_to_regression_dict: A dictionary mapping component path to 266 its regression. 267 268 Returns: 269 A list of match results from all stacks. 270 """ 271 # A list to aggregate results from all the callstacks in the stacktrace. 272 results = [] 273 results_lock = Lock() 274 275 # Setup parsers. 276 svn_parser = svn_repository_parser.SVNParser(CONFIG['svn']) 277 git_parser = git_repository_parser.GitParser(component_to_regression_dict, 278 CONFIG['git']) 279 280 # Create a cache of parsed revisions. 281 component_to_changelog_map = {} 282 for component_path in components: 283 component_object = component_to_regression_dict[component_path] 284 range_start = component_object['old_revision'] 285 range_end = component_object['new_revision'] 286 287 # If range start is 0, the range is too large and the crash has been 288 # introduced the archived build, so ignore this case. 289 if range_start == '0': 290 continue 291 292 component_name = component_to_regression_dict[component_path]['name'] 293 294 is_git = utils.IsGitHash(range_start) 295 if is_git: 296 repository_parser = git_parser 297 else: 298 repository_parser = svn_parser 299 300 (revisions, file_to_revision_map) = repository_parser.ParseChangelog( 301 component_path, range_start, range_end) 302 303 # If the returned map from ParseChangeLog is empty, we don't need to look 304 # further because either the parsing failed or the changelog is empty. 305 if not (revisions and file_to_revision_map): 306 continue 307 308 component_to_changelog_map[component_path] = (repository_parser, 309 component_name, 310 revisions, 311 file_to_revision_map) 312 313 # Analyze each of the call stacks in the stacktrace. 314 for callstack in stacktrace.stack_list: 315 FindMatchForCallstack(callstack, components, component_to_changelog_map, 316 results, results_lock) 317 318 return results 319 320 321def SortMatchesFunction(match_with_stack_priority): 322 """A function to sort the match triple. 323 324 Currently, it sorts the list by: 325 1) The highest priority file change in the CL (changing crashed line is 326 higher priority than just changing the file). 327 2) The callstack this match is computed (crash stack, freed, allocation). 328 3) The minimum stack frame number of the changed file in the match. 329 4) The number of files this CL changes (higher the better). 330 5) The minimum distance between the lines that the CL changes and crashed 331 lines. 332 333 Args: 334 match_with_stack_priority: A match object, with the CL it is from and what 335 callstack it is from. 336 337 Returns: 338 A sort key. 339 """ 340 (stack_priority, _, match) = match_with_stack_priority 341 342 return (min(match.priorities), 343 stack_priority, 344 match.min_distance, 345 crash_utils.FindMinStackFrameNumber(match.stack_frame_indices, 346 match.priorities), 347 -len(match.changed_files)) 348 349 350def SortAndFilterMatches(matches, num_important_frames=5): 351 """Filters the list of potential culprit CLs to remove noise. 352 353 Args: 354 matches: A list containing match results. 355 num_important_frames: A number of frames on the top of the frame to Check 356 for when filtering the results. A match with a file 357 that is in top num_important_frames of the stacktrace 358 is regarded more probable then others. 359 360 Returns: 361 Filtered match results. 362 """ 363 new_matches = [] 364 line_changed = False 365 is_important_frame = False 366 highest_priority_stack = crash_utils.INFINITY 367 matches.sort(key=SortMatchesFunction) 368 # Iterate through the matches to find out what results are significant. 369 for stack_priority, cl, match in matches: 370 # Check if the current match changes crashed line. 371 is_line_change = (min(match.priorities) == LINE_CHANGE_PRIORITY) 372 373 # Check which stack this match is from, and finds the highest priority 374 # callstack up to this point. 375 current_stack = stack_priority 376 if current_stack < highest_priority_stack: 377 highest_priority_stack = current_stack 378 379 # Check if current match changes a file that occurs in crash state. 380 flattened_stack_frame_indices = [frame for frame_indices in 381 match.stack_frame_indices 382 for frame in frame_indices] 383 current_is_important = ( 384 min(flattened_stack_frame_indices) < num_important_frames) 385 386 # This match and anything lower than this should be ignored if: 387 # - Current match does not change crashed lines but there are matches 388 # that do so. 389 # - Current match is not in crash state but there are matches in it. 390 # - There are other matches that came from higher priority stack. 391 if (line_changed and not is_line_change) or ( 392 is_important_frame and not current_is_important) or ( 393 current_stack > highest_priority_stack): 394 break 395 396 # Update the variables. 397 if is_line_change: 398 line_changed = True 399 if current_is_important: 400 is_important_frame = True 401 402 # Add current match to the filtered result. 403 new_matches.append((stack_priority, cl, match)) 404 405 return new_matches 406 407 408def GenerateReasonForMatches(matches): 409 """Generates a reason that a match (CL) is a culprit cl. 410 411 Args: 412 matches: A list of match objects. 413 """ 414 # Iterate through the matches in the list. 415 for i, _, match in matches: 416 reason = [] 417 418 # Zip the files in the match by the reason they are suspected 419 # (how the file is modified). 420 match_by_priority = zip( 421 match.priorities, match.crashed_line_numbers, match.changed_files, 422 match.stack_frame_indices, match.function_list) 423 424 # Sort the zipped changed files in the match by their priority so that the 425 # changed lines comes first in the reason. 426 match_by_priority.sort( 427 key=lambda (priority, crashed_line_numbers, file_name, 428 stack_frame_indices, function_list): priority) 429 430 # Iterate through the sorted match. 431 for i in range(len(match_by_priority)): 432 (priority, crashed_line_numbers, file_name, stack_frame_indices, 433 function_list) = match_by_priority[i] 434 435 # If the file in the match is a line change, append a explanation. 436 if priority == LINE_CHANGE_PRIORITY: 437 crashed_line_numbers = [crashed_line_number 438 for lines in crashed_line_numbers 439 for crashed_line_number in lines] 440 reason.append( 441 'Lines %s of file %s which potentially caused crash ' 442 'are changed in this cl (%s).\n' % 443 (utils.JoinLineNumbers(crashed_line_numbers, accepted_gap=4), 444 file_name, 445 crash_utils.PrettifyFrameInfo(stack_frame_indices, function_list))) 446 447 else: 448 # Get all the files that are not line change. 449 rest_of_the_files = match_by_priority[i:] 450 451 if len(rest_of_the_files) == 1: 452 file_string = 'File %s is changed in this cl ' 453 else: 454 file_string = 'Files %s are changed in this cl ' 455 456 # Create a list of file names, and prettify the list. 457 file_names = [ 458 file_name for (_, _, file_name, _, _) in rest_of_the_files] 459 pretty_file_names = crash_utils.PrettifyList(file_names) 460 461 # Add the reason, break because we took care of the rest of the files. 462 file_string += ('(and is part of stack %s)' % 463 crash_utils.PrettifyFrameInfo(stack_frame_indices, function_list)) 464 reason.append(file_string % pretty_file_names) 465 break 466 467 # Set the reason as string. 468 match.reason = '\n'.join(reason) 469 470 471def CombineMatches(matches): 472 """Combine possible duplicates in matches. 473 474 Args: 475 matches: A list of matches object, along with its callstack priority and 476 CL it is from. 477 Returns: 478 A combined list of matches. 479 """ 480 combined_matches = [] 481 482 for stack_index, cl, match in matches: 483 found_match = None 484 485 # Iterate through the list of combined matches. 486 for _, cl_combined, match_combined in combined_matches: 487 # Check for if current CL is already higher up in the result. 488 if cl == cl_combined: 489 found_match = match_combined 490 break 491 492 # If current match is not already in, add it to the list of matches. 493 if not found_match: 494 combined_matches.append((stack_index, cl, match)) 495 continue 496 497 # Combine the reason if the current match is already in there. 498 found_match.reason += match.reason 499 if match.min_distance < found_match.min_distance: 500 found_match.min_distance = match.min_distance 501 found_match.min_distance_info = match.min_distance_info 502 503 for stack_index, cl, match in combined_matches: 504 if match.min_distance_info: 505 file_name, min_crashed_line, min_changed_line = match.min_distance_info 506 match.reason += \ 507 ('\nMinimum distance from crash line to modified line: %d. ' 508 '(file: %s, crashed on: %d, modified: %d).\n' % 509 (match.min_distance, file_name, min_crashed_line, min_changed_line)) 510 511 return combined_matches 512 513 514def FilterAndGenerateReasonForMatches(result): 515 """A wrapper function. 516 517 It generates reasons for the matches and returns string representation 518 of filtered results. 519 520 Args: 521 result: A list of match objects. 522 523 Returns: 524 A string representation of filtered results. 525 """ 526 new_result = SortAndFilterMatches(result) 527 GenerateReasonForMatches(new_result) 528 combined_matches = CombineMatches(new_result) 529 return crash_utils.MatchListToResultList(combined_matches) 530 531 532def ParseCrashComponents(main_stack): 533 """Parses the crashing component. 534 535 Crashing components is a component that top_n_frames of the stacktrace is 536 from. 537 538 Args: 539 main_stack: Main stack from the stacktrace. 540 541 Returns: 542 A set of components. 543 """ 544 components = set() 545 546 for frame in main_stack.frame_list: 547 components.add(frame.component_path) 548 549 return components 550 551 552def GenerateAndFilterBlameList(callstack, component_to_crash_revision_dict, 553 component_to_regression_dict): 554 """A wrapper function. 555 556 Finds blame information for stack and returns string representation. 557 558 Args: 559 callstack: A callstack to find the blame information. 560 component_to_crash_revision_dict: A dictionary mapping component to its 561 crash revision. 562 component_to_regression_dict: A dictionary mapping component to its 563 regression. 564 565 Returns: 566 A list of blame results. 567 """ 568 if component_to_regression_dict: 569 parsed_deps = component_to_regression_dict 570 else: 571 parsed_deps = component_to_crash_revision_dict 572 573 # Setup parser objects to use for parsing blame information. 574 svn_parser = svn_repository_parser.SVNParser(CONFIG['svn']) 575 git_parser = git_repository_parser.GitParser(parsed_deps, CONFIG['git']) 576 parsers = {} 577 parsers['svn'] = svn_parser 578 parsers['git'] = git_parser 579 580 # Create and generate the blame objects from the callstack. 581 blame_list = blame.BlameList() 582 blame_list.FindBlame(callstack, component_to_crash_revision_dict, 583 component_to_regression_dict, 584 parsers) 585 586 blame_list.FilterAndSortBlameList() 587 return crash_utils.BlameListToResultList(blame_list) 588 589 590def FindItForCrash(stacktrace_list, 591 callstack, 592 component_to_regression_dict, 593 component_to_crash_revision_dict): 594 """Finds the culprit CL from the list of stacktrace. 595 596 Args: 597 stacktrace_list: A list of stacktraces to look for, in the order of 598 decreasing significance. 599 callstack: A callstack object to show blame information for, if there are 600 no results for all stacktraces in the stacktrace_list. 601 component_to_regression_dict: A parsed regression information as a 602 result of parsing DEPS file. 603 component_to_crash_revision_dict: A parsed crash revision information. 604 605 Returns: 606 A list of result objects, with the message how the result is created. 607 """ 608 # If regression information is not available, return blame information. 609 if not component_to_regression_dict: 610 result = GenerateAndFilterBlameList(callstack, 611 component_to_crash_revision_dict, 612 component_to_regression_dict) 613 if result: 614 return_message = ( 615 'Regression information is not available. The result is ' 616 'the blame information.') 617 else: 618 return_message = ('Findit could not find any suspected CLs.') 619 620 return (return_message, result) 621 622 for stacktrace in stacktrace_list: 623 # Check the next stacktrace if current one is empty. 624 if not stacktrace.stack_list: 625 continue 626 627 # Get the crash stack for this stacktrace, and extract crashing components 628 # from it. 629 main_stack = stacktrace.GetCrashStack() 630 components = ParseCrashComponents(main_stack) 631 632 result_for_stacktrace = FindMatchForStacktrace( 633 stacktrace, components, component_to_regression_dict) 634 filtered_result = FilterAndGenerateReasonForMatches(result_for_stacktrace) 635 636 # If the result is empty, check the next stacktrace. Else, return the 637 # filtered result. 638 if not filtered_result: 639 continue 640 641 return_message = ( 642 'The result is a list of CLs that change the crashed files.') 643 return (return_message, filtered_result) 644 645 # If no match is found, return the blame information for the input 646 # callstack. 647 result = GenerateAndFilterBlameList( 648 callstack, component_to_crash_revision_dict, 649 component_to_regression_dict) 650 651 if result: 652 return_message = ( 653 'No CL in the regression range changes the crashed files. ' 654 'The result is the blame information.') 655 656 # When findit could not find any CL that changes file in stacktrace or if 657 # if cannot get any blame information, return a message saying that no 658 # results are available. 659 else: 660 return_message = ('Findit could not find any suspected CLs.') 661 662 return (return_message, result) 663 664