• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (c) 2014 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import atexit
6import cgi
7import ConfigParser
8import json
9import os
10import Queue
11import threading
12import time
13
14from common import utils
15from result import Result
16
17
18INFINITY = float('inf')
19
20MAX_THREAD_NUMBER = 10
21TASK_QUEUE = None
22
23
24def SignalWorkerThreads():
25  global TASK_QUEUE
26  if not TASK_QUEUE:
27    return
28
29  for i in range(MAX_THREAD_NUMBER):
30    TASK_QUEUE.put(None)
31
32  # Give worker threads a chance to exit.
33  # Workaround the harmless bug in python 2.7 below.
34  time.sleep(1)
35
36
37atexit.register(SignalWorkerThreads)
38
39
40def Worker():
41  global TASK_QUEUE
42  while True:
43    try:
44      task = TASK_QUEUE.get()
45      if not task:
46        return
47    except TypeError:
48      # According to http://bugs.python.org/issue14623, this is a harmless bug
49      # in python 2.7 which won't be fixed.
50      # The exception is raised on daemon threads when python interpreter is
51      # shutting down.
52      return
53
54    function, args, kwargs, result_semaphore = task
55    try:
56      function(*args, **kwargs)
57    except:
58      pass
59    finally:
60      # Signal one task is done in case of exception.
61      result_semaphore.release()
62
63
64def RunTasks(tasks):
65  """Run given tasks. Not thread-safe: no concurrent calls of this function.
66
67  Return after all tasks were completed. A task is a dict as below:
68    {
69      'function': the function to call,
70      'args': the positional argument to pass to the function,
71      'kwargs': the key-value arguments to pass to the function,
72    }
73  """
74  if not tasks:
75    return
76
77  global TASK_QUEUE
78  if not TASK_QUEUE:
79    TASK_QUEUE = Queue.Queue()
80    for index in range(MAX_THREAD_NUMBER):
81      thread = threading.Thread(target=Worker, name='worker_%s' % index)
82      # Set as daemon, so no join is needed.
83      thread.daemon = True
84      thread.start()
85
86  result_semaphore = threading.Semaphore(0)
87  # Push task to task queue for execution.
88  for task in tasks:
89    TASK_QUEUE.put(
90        (task['function'], task.get('args', []),
91         task.get('kwargs', {}), result_semaphore))
92
93  # Wait until all tasks to be executed.
94  for _ in tasks:
95    result_semaphore.acquire()
96
97
98def GetRepositoryType(revision_number):
99  """Returns the repository type of this revision number.
100
101  Args:
102    revision_number: A revision number or git hash.
103
104  Returns:
105    'git' or 'svn', depending on the revision_number.
106  """
107  if utils.IsGitHash(revision_number):
108    return 'git'
109  else:
110    return 'svn'
111
112
113def ParseURLsFromConfig(file_name):
114  """Parses URLS from the config file.
115
116  The file should be in python config format, where svn section is in the
117  format "svn:component_path".
118  Each of the section for svn should contain changelog_url, revision_url,
119  diff_url and blame_url.
120
121  Args:
122    file_name: The name of the file that contains URL information.
123
124  Returns:
125    A dictionary that maps repository type to list of URLs. For svn, it maps
126    key 'svn' to another dictionary, which maps component path to the URLs
127    as explained above. For git, it maps to the URLs as explained above.
128  """
129  config = ConfigParser.ConfigParser()
130
131  # Get the absolute path of the config file, and read the file. If it fails,
132  # return none.
133  config_file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)),
134                                  file_name)
135  config.read(config_file_path)
136  if not config:
137    return None
138
139  # Iterate through the config file, check for sections.
140  config_dict = {}
141  for section in config.sections():
142    # These two do not need another layer of dictionary, so add it and go
143    # to next section.
144    if ':' not in section:
145      for option in config.options(section):
146        if section not in config_dict:
147          config_dict[section] = {}
148
149        url = config.get(section, option)
150        config_dict[section][option] = url
151
152      continue
153
154    # Get repository type and component name from the section name.
155    repository_type_and_component = section.split(':')
156    repository_type = repository_type_and_component[0]
157    component_path = repository_type_and_component[1]
158
159    # Add 'svn' as the key, if it is not already there.
160    if repository_type not in config_dict:
161      config_dict[repository_type] = {}
162    url_map_for_repository = config_dict[repository_type]
163
164    # Add the path to the 'svn', if it is not already there.
165    if component_path not in url_map_for_repository:
166      url_map_for_repository[component_path] = {}
167    type_to_url = url_map_for_repository[component_path]
168
169    # Add all URLs to this map.
170    for option in config.options(section):
171      url = config.get(section, option)
172      type_to_url[option] = url
173
174  return config_dict
175
176
177def NormalizePath(path, parsed_deps):
178  """Normalizes the path.
179
180  Args:
181    path: A string representing a path.
182    parsed_deps: A map from component path to its component name, repository,
183                 etc.
184
185  Returns:
186    A tuple containing a component this path is in (e.g blink, skia, etc)
187    and a path in that component's repository. Returns None if the component
188    repository is not supported, i.e from googlecode.
189  """
190  # First normalize the path by retreiving the normalized path.
191  normalized_path = os.path.normpath(path).replace('\\', '/')
192
193  # Iterate through all component paths in the parsed DEPS, in the decreasing
194  # order of the length of the file path.
195  for component_path in sorted(parsed_deps,
196                               key=(lambda path: -len(path))):
197    # new_component_path is the component path with 'src/' removed.
198    new_component_path = component_path
199    if new_component_path.startswith('src/') and new_component_path != 'src/':
200      new_component_path = new_component_path[len('src/'):]
201
202    # We need to consider when the lowercased component path is in the path,
203    # because syzyasan build returns lowercased file path.
204    lower_component_path = new_component_path.lower()
205
206    # If this path is the part of file path, this file must be from this
207    # component.
208    if new_component_path in normalized_path or \
209        lower_component_path in normalized_path:
210
211      # Case when the retreived path is in lowercase.
212      if lower_component_path in normalized_path:
213        current_component_path = lower_component_path
214      else:
215        current_component_path = new_component_path
216
217      # Normalize the path by stripping everything off the component's relative
218      # path.
219      normalized_path = normalized_path.split(current_component_path, 1)[1]
220      lower_normalized_path = normalized_path.lower()
221
222      # Add 'src/' or 'Source/' at the front of the normalized path, depending
223      # on what prefix the component path uses. For example, blink uses
224      # 'Source' but chromium uses 'src/', and blink component path is
225      # 'src/third_party/WebKit/Source', so add 'Source/' in front of the
226      # normalized path.
227      if not (lower_normalized_path.startswith('src/') or
228              lower_normalized_path.startswith('source/')):
229
230        if (lower_component_path.endswith('src/') or
231            lower_component_path.endswith('source/')):
232          normalized_path = (current_component_path.split('/')[-2] + '/' +
233                             normalized_path)
234
235        else:
236          normalized_path = 'src/' + normalized_path
237
238      component_name = parsed_deps[component_path]['name']
239
240      return (component_path, component_name, normalized_path)
241
242  # If the path does not match any component, default to chromium.
243  return ('src/', 'chromium', normalized_path)
244
245
246def SplitRange(regression):
247  """Splits a range as retrieved from clusterfuzz.
248
249  Args:
250    regression: A string in format 'r1234:r5678'.
251
252  Returns:
253    A list containing two numbers represented in string, for example
254    ['1234','5678'].
255  """
256  if not regression:
257    return None
258
259  revisions = regression.split(':')
260
261  # If regression information is not available, return none.
262  if len(revisions) != 2:
263    return None
264
265  range_start = revisions[0]
266  range_end = revisions[1]
267
268  # Strip 'r' off the range start/end. Not using lstrip to avoid the case when
269  # the range is in git hash and it starts with 'r'.
270  if range_start.startswith('r'):
271    range_start = range_start[1:]
272
273  if range_end.startswith('r'):
274    range_end = range_end[1:]
275
276  return [range_start, range_end]
277
278
279def LoadJSON(json_string):
280  """Loads json object from string, or None.
281
282  Args:
283    json_string: A string to get object from.
284
285  Returns:
286    JSON object if the string represents a JSON object, None otherwise.
287  """
288  try:
289    data = json.loads(json_string)
290  except ValueError:
291    data = None
292
293  return data
294
295
296def GetDataFromURL(url):
297  """Retrieves raw data from URL, tries 10 times.
298
299  Args:
300    url: URL to get data from.
301    retries: Number of times to retry connection.
302
303  Returns:
304    None if the data retrieval fails, or the raw data.
305  """
306  status_code, data = utils.GetHttpClient().Get(url, retries=10)
307  if status_code == 200:
308    return data
309  else:
310    # Return None if it fails to read data.
311    return None
312
313
314def FindMinLineDistance(crashed_line_list, changed_line_numbers,
315                        line_range=3):
316  """Calculates how far the changed line is from one of the crashes.
317
318  Finds the minimum distance between the lines that the file crashed on
319  and the lines that the file changed. For example, if the file crashed on
320  line 200 and the CL changes line 203,204 and 205, the function returns 3.
321
322  Args:
323    crashed_line_list: A list of lines that the file crashed on.
324    changed_line_numbers: A list of lines that the file changed.
325    line_range: Number of lines to look back for.
326
327  Returns:
328    The minimum distance. If either of the input lists is empty,
329    it returns inf.
330
331  """
332  min_distance = INFINITY
333  crashed_line = -1
334  changed_line = -1
335
336  crashed_line_numbers = set()
337  for crashed_line_range in crashed_line_list:
338    for crashed_line in crashed_line_range:
339      for line in range(crashed_line - line_range, crashed_line + 1):
340        crashed_line_numbers.add(line)
341
342  for line in crashed_line_numbers:
343    for distance in changed_line_numbers:
344      # Find the current distance and update the min if current distance is
345      # less than current min.
346      current_distance = abs(line - distance)
347      if current_distance < min_distance:
348        min_distance = current_distance
349        crashed_line = line
350        changed_line = distance
351
352  return (min_distance, crashed_line, changed_line)
353
354
355def GuessIfSameSubPath(path1, path2):
356  """Guesses if two paths represent same path.
357
358  Compares the name of the folders in the path (by split('/')), and checks
359  if they match either more than 3 or min of path lengths.
360
361  Args:
362    path1: First path.
363    path2: Second path to compare.
364
365  Returns:
366    True if it they are thought to be a same path, False otherwise.
367  """
368  path1 = path1.split('/')
369  path2 = path2.split('/')
370
371  intersection = set(path1).intersection(set(path2))
372  return len(intersection) >= (min(3, min(len(path1), len(path2))))
373
374
375def FindMinStackFrameNumber(stack_frame_indices, priorities):
376  """Finds the minimum stack number, from the list of stack numbers.
377
378  Args:
379    stack_frame_indices: A list of lists containing stack position.
380    priorities: A list of of priority for each file.
381
382  Returns:
383    Inf if stack_frame_indices is empty, minimum stack number otherwise.
384  """
385  # Get the indexes of the highest priority (or low priority number).
386  highest_priority = min(priorities)
387  highest_priority_indices = []
388  for i in range(len(priorities)):
389    if priorities[i] == highest_priority:
390      highest_priority_indices.append(i)
391
392  # Gather the list of stack frame numbers for the files that change the
393  # crash lines.
394  flattened = []
395  for i in highest_priority_indices:
396    flattened += stack_frame_indices[i]
397
398  # If no stack frame information is available, return inf. Else, return min.
399  if not flattened:
400    return INFINITY
401  else:
402    return min(flattened)
403
404
405def AddHyperlink(text, link):
406  """Returns a string with HTML link tag.
407
408  Args:
409    text: A string to add link.
410    link: A link to add to the string.
411
412  Returns:
413    A string with hyperlink added.
414  """
415  sanitized_link = cgi.escape(link, quote=True)
416  sanitized_text = cgi.escape(str(text))
417  return '<a href="%s">%s</a>' % (sanitized_link, sanitized_text)
418
419
420def PrettifyList(items):
421  """Returns a string representation of a list.
422
423  It adds comma in between the elements and removes the brackets.
424  Args:
425    items: A list to prettify.
426  Returns:
427    A string representation of the list.
428  """
429  return ', '.join(map(str, items))
430
431
432def PrettifyFrameInfo(frame_indices, functions):
433  """Return a string to represent the frames with functions."""
434  frames = []
435  for frame_index, function in zip(frame_indices, functions):
436    frames.append('frame #%s, "%s"' % (frame_index, function.split('(')[0]))
437  return '; '.join(frames)
438
439
440def PrettifyFiles(file_list):
441  """Returns a string representation of a list of file names.
442
443  Args:
444    file_list: A list of tuple, (file_name, file_url).
445  Returns:
446    A string representation of file names with their urls.
447  """
448  ret = ['\n']
449  for file_name, file_url in file_list:
450    ret.append('      %s\n' % AddHyperlink(file_name, file_url))
451  return ''.join(ret)
452
453
454def Intersection(crashed_line_list, stack_frame_index, changed_line_numbers,
455                 function, line_range=3):
456  """Finds the overlap betwee changed lines and crashed lines.
457
458  Finds the intersection of the lines that caused the crash and
459  lines that the file changes. The intersection looks within 3 lines
460  of the line that caused the crash.
461
462  Args:
463    crashed_line_list: A list of lines that the file crashed on.
464    stack_frame_index: A list of positions in stack for each of the lines.
465    changed_line_numbers: A list of lines that the file changed.
466    function: A list of functions that the file crashed on.
467    line_range: Number of lines to look backwards from crashed lines.
468
469  Returns:
470    line_number_intersection: Intersection between crashed_line_list and
471                       changed_line_numbers.
472    stack_frame_index_intersection: Stack number for each of the intersections.
473  """
474  line_number_intersection = []
475  stack_frame_index_intersection = []
476  function_intersection = []
477
478  # Iterate through the crashed lines, and its occurence in stack.
479  for (lines, stack_frame_index, function_name) in zip(
480      crashed_line_list, stack_frame_index, function):
481    # Also check previous 'line_range' lines. Create a set of all changed lines
482    # and lines within 3 lines range before the crashed line.
483    line_minus_n = set()
484    for line in lines:
485      for line_in_range in range(line - line_range, line + 1):
486        line_minus_n.add(line_in_range)
487
488    for changed_line in changed_line_numbers:
489      # If a CL does not change crahsed line, check next line.
490      if changed_line not in line_minus_n:
491        continue
492
493      intersected_line = set()
494      # If the changed line is exactly the crashed line, add that line.
495      for line in lines:
496        if line in changed_line_numbers:
497          intersected_line.add(line)
498
499        # If the changed line is in 3 lines of the crashed line, add the line.
500        else:
501          intersected_line.add(changed_line)
502
503      # Avoid adding the same line twice.
504      if intersected_line not in line_number_intersection:
505        line_number_intersection.append(list(intersected_line))
506        stack_frame_index_intersection.append(stack_frame_index)
507        function_intersection.append(function_name)
508      break
509
510  return (line_number_intersection, stack_frame_index_intersection,
511          function_intersection)
512
513
514def MatchListToResultList(matches):
515  """Convert list of matches to the list of result objects.
516
517  Args:
518    matches: A list of match objects along with its stack priority and revision
519             number/git hash
520  Returns:
521    A list of result object.
522
523  """
524  result_list = []
525
526  for _, cl, match in matches:
527    suspected_cl = cl
528    revision_url = match.revision_url
529    component_name = match.component_name
530    author = match.author
531    reason = match.reason
532    review_url = match.review_url
533    reviewers = match.reviewers
534    # For matches, line content do not exist.
535    line_content = None
536    message = match.message
537
538    result = Result(suspected_cl, revision_url, component_name, author, reason,
539                    review_url, reviewers, line_content, message)
540    result_list.append(result)
541
542  return result_list
543
544
545def BlameListToResultList(blame_list):
546  """Convert blame list to the list of result objects.
547
548  Args:
549    blame_list: A list of blame objects.
550
551  Returns:
552    A list of result objects.
553  """
554  result_list = []
555
556  for blame in blame_list:
557    suspected_cl = blame.revision
558    revision_url = blame.url
559    component_name = blame.component_name
560    author = blame.author
561    reason = (
562        'The CL last changed line %s of file %s, which is stack frame %d.' %
563        (blame.line_number, blame.file, blame.stack_frame_index))
564    # Blame object does not have review url and reviewers.
565    review_url = None
566    reviewers = None
567    line_content = blame.line_content
568    message = blame.message
569
570    result = Result(suspected_cl, revision_url, component_name, author, reason,
571                    review_url, reviewers, line_content, message)
572    result_list.append(result)
573
574  return result_list
575