1# Copyright (c) 2014 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import atexit 6import cgi 7import ConfigParser 8import json 9import os 10import Queue 11import threading 12import time 13 14from common import utils 15from result import Result 16 17 18INFINITY = float('inf') 19 20MAX_THREAD_NUMBER = 10 21TASK_QUEUE = None 22 23 24def SignalWorkerThreads(): 25 global TASK_QUEUE 26 if not TASK_QUEUE: 27 return 28 29 for i in range(MAX_THREAD_NUMBER): 30 TASK_QUEUE.put(None) 31 32 # Give worker threads a chance to exit. 33 # Workaround the harmless bug in python 2.7 below. 34 time.sleep(1) 35 36 37atexit.register(SignalWorkerThreads) 38 39 40def Worker(): 41 global TASK_QUEUE 42 while True: 43 try: 44 task = TASK_QUEUE.get() 45 if not task: 46 return 47 except TypeError: 48 # According to http://bugs.python.org/issue14623, this is a harmless bug 49 # in python 2.7 which won't be fixed. 50 # The exception is raised on daemon threads when python interpreter is 51 # shutting down. 52 return 53 54 function, args, kwargs, result_semaphore = task 55 try: 56 function(*args, **kwargs) 57 except: 58 pass 59 finally: 60 # Signal one task is done in case of exception. 61 result_semaphore.release() 62 63 64def RunTasks(tasks): 65 """Run given tasks. Not thread-safe: no concurrent calls of this function. 66 67 Return after all tasks were completed. A task is a dict as below: 68 { 69 'function': the function to call, 70 'args': the positional argument to pass to the function, 71 'kwargs': the key-value arguments to pass to the function, 72 } 73 """ 74 if not tasks: 75 return 76 77 global TASK_QUEUE 78 if not TASK_QUEUE: 79 TASK_QUEUE = Queue.Queue() 80 for index in range(MAX_THREAD_NUMBER): 81 thread = threading.Thread(target=Worker, name='worker_%s' % index) 82 # Set as daemon, so no join is needed. 83 thread.daemon = True 84 thread.start() 85 86 result_semaphore = threading.Semaphore(0) 87 # Push task to task queue for execution. 88 for task in tasks: 89 TASK_QUEUE.put( 90 (task['function'], task.get('args', []), 91 task.get('kwargs', {}), result_semaphore)) 92 93 # Wait until all tasks to be executed. 94 for _ in tasks: 95 result_semaphore.acquire() 96 97 98def GetRepositoryType(revision_number): 99 """Returns the repository type of this revision number. 100 101 Args: 102 revision_number: A revision number or git hash. 103 104 Returns: 105 'git' or 'svn', depending on the revision_number. 106 """ 107 if utils.IsGitHash(revision_number): 108 return 'git' 109 else: 110 return 'svn' 111 112 113def ParseURLsFromConfig(file_name): 114 """Parses URLS from the config file. 115 116 The file should be in python config format, where svn section is in the 117 format "svn:component_path". 118 Each of the section for svn should contain changelog_url, revision_url, 119 diff_url and blame_url. 120 121 Args: 122 file_name: The name of the file that contains URL information. 123 124 Returns: 125 A dictionary that maps repository type to list of URLs. For svn, it maps 126 key 'svn' to another dictionary, which maps component path to the URLs 127 as explained above. For git, it maps to the URLs as explained above. 128 """ 129 config = ConfigParser.ConfigParser() 130 131 # Get the absolute path of the config file, and read the file. If it fails, 132 # return none. 133 config_file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 134 file_name) 135 config.read(config_file_path) 136 if not config: 137 return None 138 139 # Iterate through the config file, check for sections. 140 config_dict = {} 141 for section in config.sections(): 142 # These two do not need another layer of dictionary, so add it and go 143 # to next section. 144 if ':' not in section: 145 for option in config.options(section): 146 if section not in config_dict: 147 config_dict[section] = {} 148 149 url = config.get(section, option) 150 config_dict[section][option] = url 151 152 continue 153 154 # Get repository type and component name from the section name. 155 repository_type_and_component = section.split(':') 156 repository_type = repository_type_and_component[0] 157 component_path = repository_type_and_component[1] 158 159 # Add 'svn' as the key, if it is not already there. 160 if repository_type not in config_dict: 161 config_dict[repository_type] = {} 162 url_map_for_repository = config_dict[repository_type] 163 164 # Add the path to the 'svn', if it is not already there. 165 if component_path not in url_map_for_repository: 166 url_map_for_repository[component_path] = {} 167 type_to_url = url_map_for_repository[component_path] 168 169 # Add all URLs to this map. 170 for option in config.options(section): 171 url = config.get(section, option) 172 type_to_url[option] = url 173 174 return config_dict 175 176 177def NormalizePath(path, parsed_deps): 178 """Normalizes the path. 179 180 Args: 181 path: A string representing a path. 182 parsed_deps: A map from component path to its component name, repository, 183 etc. 184 185 Returns: 186 A tuple containing a component this path is in (e.g blink, skia, etc) 187 and a path in that component's repository. Returns None if the component 188 repository is not supported, i.e from googlecode. 189 """ 190 # First normalize the path by retreiving the normalized path. 191 normalized_path = os.path.normpath(path).replace('\\', '/') 192 193 # Iterate through all component paths in the parsed DEPS, in the decreasing 194 # order of the length of the file path. 195 for component_path in sorted(parsed_deps, 196 key=(lambda path: -len(path))): 197 # new_component_path is the component path with 'src/' removed. 198 new_component_path = component_path 199 if new_component_path.startswith('src/') and new_component_path != 'src/': 200 new_component_path = new_component_path[len('src/'):] 201 202 # We need to consider when the lowercased component path is in the path, 203 # because syzyasan build returns lowercased file path. 204 lower_component_path = new_component_path.lower() 205 206 # If this path is the part of file path, this file must be from this 207 # component. 208 if new_component_path in normalized_path or \ 209 lower_component_path in normalized_path: 210 211 # Case when the retreived path is in lowercase. 212 if lower_component_path in normalized_path: 213 current_component_path = lower_component_path 214 else: 215 current_component_path = new_component_path 216 217 # Normalize the path by stripping everything off the component's relative 218 # path. 219 normalized_path = normalized_path.split(current_component_path, 1)[1] 220 lower_normalized_path = normalized_path.lower() 221 222 # Add 'src/' or 'Source/' at the front of the normalized path, depending 223 # on what prefix the component path uses. For example, blink uses 224 # 'Source' but chromium uses 'src/', and blink component path is 225 # 'src/third_party/WebKit/Source', so add 'Source/' in front of the 226 # normalized path. 227 if not (lower_normalized_path.startswith('src/') or 228 lower_normalized_path.startswith('source/')): 229 230 if (lower_component_path.endswith('src/') or 231 lower_component_path.endswith('source/')): 232 normalized_path = (current_component_path.split('/')[-2] + '/' + 233 normalized_path) 234 235 else: 236 normalized_path = 'src/' + normalized_path 237 238 component_name = parsed_deps[component_path]['name'] 239 240 return (component_path, component_name, normalized_path) 241 242 # If the path does not match any component, default to chromium. 243 return ('src/', 'chromium', normalized_path) 244 245 246def SplitRange(regression): 247 """Splits a range as retrieved from clusterfuzz. 248 249 Args: 250 regression: A string in format 'r1234:r5678'. 251 252 Returns: 253 A list containing two numbers represented in string, for example 254 ['1234','5678']. 255 """ 256 if not regression: 257 return None 258 259 revisions = regression.split(':') 260 261 # If regression information is not available, return none. 262 if len(revisions) != 2: 263 return None 264 265 range_start = revisions[0] 266 range_end = revisions[1] 267 268 # Strip 'r' off the range start/end. Not using lstrip to avoid the case when 269 # the range is in git hash and it starts with 'r'. 270 if range_start.startswith('r'): 271 range_start = range_start[1:] 272 273 if range_end.startswith('r'): 274 range_end = range_end[1:] 275 276 return [range_start, range_end] 277 278 279def LoadJSON(json_string): 280 """Loads json object from string, or None. 281 282 Args: 283 json_string: A string to get object from. 284 285 Returns: 286 JSON object if the string represents a JSON object, None otherwise. 287 """ 288 try: 289 data = json.loads(json_string) 290 except ValueError: 291 data = None 292 293 return data 294 295 296def GetDataFromURL(url): 297 """Retrieves raw data from URL, tries 10 times. 298 299 Args: 300 url: URL to get data from. 301 retries: Number of times to retry connection. 302 303 Returns: 304 None if the data retrieval fails, or the raw data. 305 """ 306 status_code, data = utils.GetHttpClient().Get(url, retries=10) 307 if status_code == 200: 308 return data 309 else: 310 # Return None if it fails to read data. 311 return None 312 313 314def FindMinLineDistance(crashed_line_list, changed_line_numbers, 315 line_range=3): 316 """Calculates how far the changed line is from one of the crashes. 317 318 Finds the minimum distance between the lines that the file crashed on 319 and the lines that the file changed. For example, if the file crashed on 320 line 200 and the CL changes line 203,204 and 205, the function returns 3. 321 322 Args: 323 crashed_line_list: A list of lines that the file crashed on. 324 changed_line_numbers: A list of lines that the file changed. 325 line_range: Number of lines to look back for. 326 327 Returns: 328 The minimum distance. If either of the input lists is empty, 329 it returns inf. 330 331 """ 332 min_distance = INFINITY 333 crashed_line = -1 334 changed_line = -1 335 336 crashed_line_numbers = set() 337 for crashed_line_range in crashed_line_list: 338 for crashed_line in crashed_line_range: 339 for line in range(crashed_line - line_range, crashed_line + 1): 340 crashed_line_numbers.add(line) 341 342 for line in crashed_line_numbers: 343 for distance in changed_line_numbers: 344 # Find the current distance and update the min if current distance is 345 # less than current min. 346 current_distance = abs(line - distance) 347 if current_distance < min_distance: 348 min_distance = current_distance 349 crashed_line = line 350 changed_line = distance 351 352 return (min_distance, crashed_line, changed_line) 353 354 355def GuessIfSameSubPath(path1, path2): 356 """Guesses if two paths represent same path. 357 358 Compares the name of the folders in the path (by split('/')), and checks 359 if they match either more than 3 or min of path lengths. 360 361 Args: 362 path1: First path. 363 path2: Second path to compare. 364 365 Returns: 366 True if it they are thought to be a same path, False otherwise. 367 """ 368 path1 = path1.split('/') 369 path2 = path2.split('/') 370 371 intersection = set(path1).intersection(set(path2)) 372 return len(intersection) >= (min(3, min(len(path1), len(path2)))) 373 374 375def FindMinStackFrameNumber(stack_frame_indices, priorities): 376 """Finds the minimum stack number, from the list of stack numbers. 377 378 Args: 379 stack_frame_indices: A list of lists containing stack position. 380 priorities: A list of of priority for each file. 381 382 Returns: 383 Inf if stack_frame_indices is empty, minimum stack number otherwise. 384 """ 385 # Get the indexes of the highest priority (or low priority number). 386 highest_priority = min(priorities) 387 highest_priority_indices = [] 388 for i in range(len(priorities)): 389 if priorities[i] == highest_priority: 390 highest_priority_indices.append(i) 391 392 # Gather the list of stack frame numbers for the files that change the 393 # crash lines. 394 flattened = [] 395 for i in highest_priority_indices: 396 flattened += stack_frame_indices[i] 397 398 # If no stack frame information is available, return inf. Else, return min. 399 if not flattened: 400 return INFINITY 401 else: 402 return min(flattened) 403 404 405def AddHyperlink(text, link): 406 """Returns a string with HTML link tag. 407 408 Args: 409 text: A string to add link. 410 link: A link to add to the string. 411 412 Returns: 413 A string with hyperlink added. 414 """ 415 sanitized_link = cgi.escape(link, quote=True) 416 sanitized_text = cgi.escape(str(text)) 417 return '<a href="%s">%s</a>' % (sanitized_link, sanitized_text) 418 419 420def PrettifyList(items): 421 """Returns a string representation of a list. 422 423 It adds comma in between the elements and removes the brackets. 424 Args: 425 items: A list to prettify. 426 Returns: 427 A string representation of the list. 428 """ 429 return ', '.join(map(str, items)) 430 431 432def PrettifyFrameInfo(frame_indices, functions): 433 """Return a string to represent the frames with functions.""" 434 frames = [] 435 for frame_index, function in zip(frame_indices, functions): 436 frames.append('frame #%s, "%s"' % (frame_index, function.split('(')[0])) 437 return '; '.join(frames) 438 439 440def PrettifyFiles(file_list): 441 """Returns a string representation of a list of file names. 442 443 Args: 444 file_list: A list of tuple, (file_name, file_url). 445 Returns: 446 A string representation of file names with their urls. 447 """ 448 ret = ['\n'] 449 for file_name, file_url in file_list: 450 ret.append(' %s\n' % AddHyperlink(file_name, file_url)) 451 return ''.join(ret) 452 453 454def Intersection(crashed_line_list, stack_frame_index, changed_line_numbers, 455 function, line_range=3): 456 """Finds the overlap betwee changed lines and crashed lines. 457 458 Finds the intersection of the lines that caused the crash and 459 lines that the file changes. The intersection looks within 3 lines 460 of the line that caused the crash. 461 462 Args: 463 crashed_line_list: A list of lines that the file crashed on. 464 stack_frame_index: A list of positions in stack for each of the lines. 465 changed_line_numbers: A list of lines that the file changed. 466 function: A list of functions that the file crashed on. 467 line_range: Number of lines to look backwards from crashed lines. 468 469 Returns: 470 line_number_intersection: Intersection between crashed_line_list and 471 changed_line_numbers. 472 stack_frame_index_intersection: Stack number for each of the intersections. 473 """ 474 line_number_intersection = [] 475 stack_frame_index_intersection = [] 476 function_intersection = [] 477 478 # Iterate through the crashed lines, and its occurence in stack. 479 for (lines, stack_frame_index, function_name) in zip( 480 crashed_line_list, stack_frame_index, function): 481 # Also check previous 'line_range' lines. Create a set of all changed lines 482 # and lines within 3 lines range before the crashed line. 483 line_minus_n = set() 484 for line in lines: 485 for line_in_range in range(line - line_range, line + 1): 486 line_minus_n.add(line_in_range) 487 488 for changed_line in changed_line_numbers: 489 # If a CL does not change crahsed line, check next line. 490 if changed_line not in line_minus_n: 491 continue 492 493 intersected_line = set() 494 # If the changed line is exactly the crashed line, add that line. 495 for line in lines: 496 if line in changed_line_numbers: 497 intersected_line.add(line) 498 499 # If the changed line is in 3 lines of the crashed line, add the line. 500 else: 501 intersected_line.add(changed_line) 502 503 # Avoid adding the same line twice. 504 if intersected_line not in line_number_intersection: 505 line_number_intersection.append(list(intersected_line)) 506 stack_frame_index_intersection.append(stack_frame_index) 507 function_intersection.append(function_name) 508 break 509 510 return (line_number_intersection, stack_frame_index_intersection, 511 function_intersection) 512 513 514def MatchListToResultList(matches): 515 """Convert list of matches to the list of result objects. 516 517 Args: 518 matches: A list of match objects along with its stack priority and revision 519 number/git hash 520 Returns: 521 A list of result object. 522 523 """ 524 result_list = [] 525 526 for _, cl, match in matches: 527 suspected_cl = cl 528 revision_url = match.revision_url 529 component_name = match.component_name 530 author = match.author 531 reason = match.reason 532 review_url = match.review_url 533 reviewers = match.reviewers 534 # For matches, line content do not exist. 535 line_content = None 536 message = match.message 537 538 result = Result(suspected_cl, revision_url, component_name, author, reason, 539 review_url, reviewers, line_content, message) 540 result_list.append(result) 541 542 return result_list 543 544 545def BlameListToResultList(blame_list): 546 """Convert blame list to the list of result objects. 547 548 Args: 549 blame_list: A list of blame objects. 550 551 Returns: 552 A list of result objects. 553 """ 554 result_list = [] 555 556 for blame in blame_list: 557 suspected_cl = blame.revision 558 revision_url = blame.url 559 component_name = blame.component_name 560 author = blame.author 561 reason = ( 562 'The CL last changed line %s of file %s, which is stack frame %d.' % 563 (blame.line_number, blame.file, blame.stack_frame_index)) 564 # Blame object does not have review url and reviewers. 565 review_url = None 566 reviewers = None 567 line_content = blame.line_content 568 message = blame.message 569 570 result = Result(suspected_cl, revision_url, component_name, author, reason, 571 review_url, reviewers, line_content, message) 572 result_list.append(result) 573 574 return result_list 575