• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2020 The Chromium Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4"""Methods related to test expectations/expectation files."""
5
6import collections
7import copy
8import datetime
9import logging
10import os
11import re
12import subprocess
13from typing import Dict, FrozenSet, Iterable, List, Optional, Set, Tuple, Union
14
15import six
16
17from typ import expectations_parser
18from unexpected_passes_common import data_types
19
20FINDER_DISABLE_COMMENT_BASE = 'finder:disable'
21FINDER_ENABLE_COMMENT_BASE = 'finder:enable'
22FINDER_COMMENT_SUFFIX_GENERAL = '-general'
23FINDER_COMMENT_SUFFIX_STALE = '-stale'
24FINDER_COMMENT_SUFFIX_UNUSED = '-unused'
25FINDER_COMMENT_SUFFIX_NARROWING = '-narrowing'
26
27FINDER_GROUP_COMMENT_START = 'finder:group-start'
28FINDER_GROUP_COMMENT_END = 'finder:group-end'
29
30ALL_FINDER_START_ANNOTATION_BASES = frozenset([
31    FINDER_DISABLE_COMMENT_BASE,
32    FINDER_GROUP_COMMENT_START,
33])
34
35ALL_FINDER_END_ANNOTATION_BASES = frozenset([
36    FINDER_ENABLE_COMMENT_BASE,
37    FINDER_GROUP_COMMENT_END,
38])
39
40ALL_FINDER_DISABLE_SUFFIXES = frozenset([
41    FINDER_COMMENT_SUFFIX_GENERAL,
42    FINDER_COMMENT_SUFFIX_STALE,
43    FINDER_COMMENT_SUFFIX_UNUSED,
44    FINDER_COMMENT_SUFFIX_NARROWING,
45])
46
47FINDER_DISABLE_COMMENT_GENERAL = (FINDER_DISABLE_COMMENT_BASE +
48                                  FINDER_COMMENT_SUFFIX_GENERAL)
49FINDER_DISABLE_COMMENT_STALE = (FINDER_DISABLE_COMMENT_BASE +
50                                FINDER_COMMENT_SUFFIX_STALE)
51FINDER_DISABLE_COMMENT_UNUSED = (FINDER_DISABLE_COMMENT_BASE +
52                                 FINDER_COMMENT_SUFFIX_UNUSED)
53FINDER_DISABLE_COMMENT_NARROWING = (FINDER_DISABLE_COMMENT_BASE +
54                                    FINDER_COMMENT_SUFFIX_NARROWING)
55FINDER_ENABLE_COMMENT_GENERAL = (FINDER_ENABLE_COMMENT_BASE +
56                                 FINDER_COMMENT_SUFFIX_GENERAL)
57FINDER_ENABLE_COMMENT_STALE = (FINDER_ENABLE_COMMENT_BASE +
58                               FINDER_COMMENT_SUFFIX_STALE)
59FINDER_ENABLE_COMMENT_UNUSED = (FINDER_ENABLE_COMMENT_BASE +
60                                FINDER_COMMENT_SUFFIX_UNUSED)
61FINDER_ENABLE_COMMENT_NARROWING = (FINDER_ENABLE_COMMENT_BASE +
62                                   FINDER_COMMENT_SUFFIX_NARROWING)
63
64FINDER_DISABLE_COMMENTS = frozenset([
65    FINDER_DISABLE_COMMENT_GENERAL,
66    FINDER_DISABLE_COMMENT_STALE,
67    FINDER_DISABLE_COMMENT_UNUSED,
68    FINDER_DISABLE_COMMENT_NARROWING,
69])
70
71FINDER_ENABLE_COMMENTS = frozenset([
72    FINDER_ENABLE_COMMENT_GENERAL,
73    FINDER_ENABLE_COMMENT_STALE,
74    FINDER_ENABLE_COMMENT_UNUSED,
75    FINDER_ENABLE_COMMENT_NARROWING,
76])
77
78FINDER_ENABLE_DISABLE_PAIRS = frozenset([
79    (FINDER_DISABLE_COMMENT_GENERAL, FINDER_ENABLE_COMMENT_GENERAL),
80    (FINDER_DISABLE_COMMENT_STALE, FINDER_ENABLE_COMMENT_STALE),
81    (FINDER_DISABLE_COMMENT_UNUSED, FINDER_ENABLE_COMMENT_UNUSED),
82    (FINDER_DISABLE_COMMENT_NARROWING, FINDER_ENABLE_COMMENT_NARROWING),
83])
84
85FINDER_GROUP_COMMENTS = frozenset([
86    FINDER_GROUP_COMMENT_START,
87    FINDER_GROUP_COMMENT_END,
88])
89
90ALL_FINDER_COMMENTS = frozenset(FINDER_DISABLE_COMMENTS
91                                | FINDER_ENABLE_COMMENTS
92                                | FINDER_GROUP_COMMENTS)
93
94GIT_BLAME_REGEX = re.compile(
95    r'^[\w\s]+\(.+(?P<date>\d\d\d\d-\d\d-\d\d)[^\)]+\)(?P<content>.*)$',
96    re.DOTALL)
97TAG_GROUP_REGEX = re.compile(r'# tags: \[([^\]]*)\]', re.MULTILINE | re.DOTALL)
98
99# Annotation comment start (with optional leading whitespace) pattern.
100ANNOTATION_COMMENT_START_PATTERN = r' *# '
101# Pattern for matching optional description text after an annotation.
102ANNOTATION_OPTIONAL_TRAILING_TEXT_PATTERN = r'[^\n]*\n'
103# Pattern for matching required description text after an annotation.
104ANNOTATION_REQUIRED_TRAILING_TEXT_PATTERN = r'[^\n]+\n'
105# Pattern for matching blank or comment lines.
106BLANK_OR_COMMENT_LINES_PATTERN = r'(?:\s*| *#[^\n]*\n)*'
107# Looks for cases of the group start and end comments with nothing but optional
108# whitespace between them.
109ALL_STALE_COMMENT_REGEXES = set()
110for start_comment, end_comment in FINDER_ENABLE_DISABLE_PAIRS:
111  ALL_STALE_COMMENT_REGEXES.add(
112      re.compile(
113          ANNOTATION_COMMENT_START_PATTERN + start_comment +
114          ANNOTATION_OPTIONAL_TRAILING_TEXT_PATTERN +
115          BLANK_OR_COMMENT_LINES_PATTERN + ANNOTATION_COMMENT_START_PATTERN +
116          end_comment + r'\n', re.MULTILINE | re.DOTALL))
117ALL_STALE_COMMENT_REGEXES.add(
118    re.compile(
119        ANNOTATION_COMMENT_START_PATTERN + FINDER_GROUP_COMMENT_START +
120        ANNOTATION_REQUIRED_TRAILING_TEXT_PATTERN +
121        BLANK_OR_COMMENT_LINES_PATTERN + ANNOTATION_COMMENT_START_PATTERN +
122        FINDER_GROUP_COMMENT_END + r'\n', re.MULTILINE | re.DOTALL))
123ALL_STALE_COMMENT_REGEXES = frozenset(ALL_STALE_COMMENT_REGEXES)
124
125# pylint: disable=useless-object-inheritance
126
127# TODO(crbug.com/358591565): Refactor this to remove the need for global
128# statements.
129_registered_instance = None
130
131
132def GetInstance() -> 'Expectations':
133  return _registered_instance
134
135
136def RegisterInstance(instance: 'Expectations') -> None:
137  global _registered_instance  # pylint: disable=global-statement
138  assert _registered_instance is None
139  assert isinstance(instance, Expectations)
140  _registered_instance = instance
141
142
143def ClearInstance() -> None:
144  global _registered_instance  # pylint: disable=global-statement
145  _registered_instance = None
146
147
148class RemovalType(object):
149  STALE = FINDER_COMMENT_SUFFIX_STALE
150  UNUSED = FINDER_COMMENT_SUFFIX_UNUSED
151  NARROWING = FINDER_COMMENT_SUFFIX_NARROWING
152
153
154class Expectations(object):
155  def __init__(self):
156    self._cached_tag_groups = {}
157
158  def CreateTestExpectationMap(
159      self, expectation_files: Optional[Union[str, List[str]]],
160      tests: Optional[Iterable[str]],
161      grace_period: datetime.timedelta) -> data_types.TestExpectationMap:
162    """Creates an expectation map based off a file or list of tests.
163
164    Args:
165      expectation_files: A filepath or list of filepaths to expectation files to
166          read from, or None. If a filepath is specified, |tests| must be None.
167      tests: An iterable of strings containing test names to check. If
168          specified, |expectation_file| must be None.
169      grace_period: A datetime.timedelta specifying how many days old an
170          expectation must be in order to be parsed, i.e. how many days old an
171          expectation must be before it is a candidate for removal/modification.
172
173    Returns:
174      A data_types.TestExpectationMap, although all its BuilderStepMap contents
175      will be empty.
176    """
177
178    def AddContentToMap(content: str, ex_map: data_types.TestExpectationMap,
179                        expectation_file_name: str) -> None:
180      list_parser = expectations_parser.TaggedTestListParser(content)
181      expectations_for_file = ex_map.setdefault(
182          expectation_file_name, data_types.ExpectationBuilderMap())
183      logging.debug('Parsed %d expectations', len(list_parser.expectations))
184      for e in list_parser.expectations:
185        if 'Skip' in e.raw_results:
186          continue
187        # Expectations that only have a Pass expectation (usually used to
188        # override a broader, failing expectation) are not handled by the
189        # unexpected pass finder, so ignore those.
190        if e.raw_results == ['Pass']:
191          continue
192        expectation = data_types.Expectation(e.test, e.tags, e.raw_results,
193                                             e.reason)
194        assert expectation not in expectations_for_file
195        expectations_for_file[expectation] = data_types.BuilderStepMap()
196
197    logging.info('Creating test expectation map')
198    assert expectation_files or tests
199    assert not (expectation_files and tests)
200
201    expectation_map = data_types.TestExpectationMap()
202
203    if expectation_files:
204      if not isinstance(expectation_files, list):
205        expectation_files = [expectation_files]
206      for ef in expectation_files:
207        # Normalize to '/' as the path separator.
208        expectation_file_name = os.path.normpath(ef).replace(os.path.sep, '/')
209        content = self._GetNonRecentExpectationContent(expectation_file_name,
210                                                       grace_period)
211        AddContentToMap(content, expectation_map, expectation_file_name)
212    else:
213      expectation_file_name = ''
214      content = '# results: [ RetryOnFailure ]\n'
215      for t in tests:
216        content += '%s [ RetryOnFailure ]\n' % t
217      AddContentToMap(content, expectation_map, expectation_file_name)
218
219    return expectation_map
220
221  def _GetNonRecentExpectationContent(self, expectation_file_path: str,
222                                      num_days: datetime.timedelta) -> str:
223    """Gets content from |expectation_file_path| older than |num_days| days.
224
225    Args:
226      expectation_file_path: A string containing a filepath pointing to an
227          expectation file.
228      num_days: A datetime.timedelta containing how old an expectation in the
229          given expectation file must be to be included.
230
231    Returns:
232      The contents of the expectation file located at |expectation_file_path|
233      as a string with any recent expectations removed.
234    """
235    content = ''
236    # `git blame` output is normally in the format:
237    # revision optional_filename (author date time timezone lineno) line_content
238    # The --porcelain option is meant to be more machine readable, but is much
239    # more difficult to parse for what we need to do here. In order to
240    # guarantee that the filename won't be included in the output (by default,
241    # it will be shown if there is content from a renamed file), pass -c to
242    # use the same format as `git annotate`, which is:
243    # revision (author date time timezone lineno)line_content
244    # (Note the lack of space between the ) and the content).
245    cmd = ['git', 'blame', '-c', expectation_file_path]
246    with open(os.devnull, 'w', newline='', encoding='utf-8') as devnull:
247      blame_output = subprocess.check_output(cmd,
248                                             stderr=devnull).decode('utf-8')
249    for line in blame_output.splitlines(True):
250      match = GIT_BLAME_REGEX.match(line)
251      assert match
252      date = match.groupdict()['date']
253      line_content = match.groupdict()['content']
254      stripped_line_content = line_content.strip()
255      # Auto-add comments and blank space, otherwise only add if the grace
256      # period has expired.
257      if not stripped_line_content or stripped_line_content.startswith('#'):
258        content += line_content
259      else:
260        if six.PY2:
261          date_parts = date.split('-')
262          date = datetime.date(year=int(date_parts[0]),
263                               month=int(date_parts[1]),
264                               day=int(date_parts[2]))
265        else:
266          date = datetime.date.fromisoformat(date)
267        date_diff = datetime.date.today() - date
268        if date_diff > num_days:
269          content += line_content
270        else:
271          logging.debug('Omitting expectation %s because it is too new',
272                        line_content.rstrip())
273    return content
274
275  def RemoveExpectationsFromFile(self,
276                                 expectations: List[data_types.Expectation],
277                                 expectation_file: str,
278                                 removal_type: str) -> Set[str]:
279    """Removes lines corresponding to |expectations| from |expectation_file|.
280
281    Ignores any lines that match but are within a disable block or have an
282    inline disable comment.
283
284    Args:
285      expectations: A list of data_types.Expectations to remove.
286      expectation_file: A filepath pointing to an expectation file to remove
287          lines from.
288      removal_type: A RemovalType enum corresponding to the type of expectations
289          being removed.
290
291    Returns:
292      A set of strings containing URLs of bugs associated with the removed
293      expectations.
294    """
295
296    with open(expectation_file, encoding='utf-8') as f:
297      input_contents = f.read()
298
299    group_to_expectations, expectation_to_group = (
300        self._GetExpectationGroupsFromFileContent(expectation_file,
301                                                  input_contents))
302    disable_annotated_expectations = (
303        self._GetDisableAnnotatedExpectationsFromFile(expectation_file,
304                                                      input_contents))
305
306    output_contents = ''
307    removed_urls = set()
308    removed_lines = set()
309    num_removed_lines = 0
310    for line_number, line in enumerate(input_contents.splitlines(True)):
311      # Auto-add any comments or empty lines
312      stripped_line = line.strip()
313      if _IsCommentOrBlankLine(stripped_line):
314        output_contents += line
315        continue
316
317      current_expectation = self._CreateExpectationFromExpectationFileLine(
318          line, expectation_file)
319
320      # Add any lines containing expectations that don't match any of the given
321      # expectations to remove.
322      if any(e for e in expectations if e == current_expectation):
323        # Skip any expectations that match if we're in a disable block or there
324        # is an inline disable comment.
325        disable_block_suffix, disable_block_reason = (
326            disable_annotated_expectations.get(current_expectation,
327                                               (None, None)))
328        if disable_block_suffix and _DisableSuffixIsRelevant(
329            disable_block_suffix, removal_type):
330          output_contents += line
331          logging.info(
332              'Would have removed expectation %s, but it is inside a disable '
333              'block or has an inline disable with reason %s', stripped_line,
334              disable_block_reason)
335        elif _ExpectationPartOfNonRemovableGroup(current_expectation,
336                                                 group_to_expectations,
337                                                 expectation_to_group,
338                                                 expectations):
339          output_contents += line
340          logging.info(
341              'Would have removed expectation %s, but it is part of group "%s" '
342              'whose members are not all removable.', stripped_line,
343              expectation_to_group[current_expectation])
344        else:
345          bug = current_expectation.bug
346          if bug:
347            # It's possible to have multiple whitespace-separated bugs per
348            # expectation, so treat each one separately.
349            removed_urls |= set(bug.split())
350          # Record that we've removed this line. By subtracting the number of
351          # lines we've already removed, we keep the line numbers relative to
352          # the content we're outputting rather than relative to the input
353          # content. This also has the effect of automatically compressing
354          # contiguous blocks of removal into a single line number.
355          removed_lines.add(line_number - num_removed_lines)
356          num_removed_lines += 1
357      else:
358        output_contents += line
359
360    header_length = len(
361        self._GetExpectationFileTagHeader(expectation_file).splitlines(True))
362    output_contents = _RemoveStaleComments(output_contents, removed_lines,
363                                           header_length)
364
365    with open(expectation_file, 'w', newline='', encoding='utf-8') as f:
366      f.write(output_contents)
367
368    return removed_urls
369
370  def _GetDisableAnnotatedExpectationsFromFile(
371      self, expectation_file: str,
372      content: str) -> Dict[data_types.Expectation, Tuple[str, str]]:
373    """Extracts expectations which are affected by disable annotations.
374
375    Args:
376      expectation_file: A filepath pointing to an expectation file.
377      content: A string containing the contents of |expectation_file|.
378
379    Returns:
380      A dict mapping data_types.Expectation to (disable_suffix, disable_reason).
381      If an expectation is present in this dict, it is affected by a disable
382      annotation of some sort. |disable_suffix| is a string specifying which
383      type of annotation is applicable, while |disable_reason| is a string
384      containing the comment/reason why the disable annotation is present.
385    """
386    in_disable_block = False
387    disable_block_reason = ''
388    disable_block_suffix = ''
389    disable_annotated_expectations = {}
390    for line in content.splitlines(True):
391      stripped_line = line.strip()
392      # Look for cases of disable/enable blocks.
393      if _IsCommentOrBlankLine(stripped_line):
394        # Only allow one enable/disable per line.
395        assert len([c for c in ALL_FINDER_COMMENTS if c in line]) <= 1
396        if _LineContainsDisableComment(line):
397          if in_disable_block:
398            raise RuntimeError(
399                'Invalid expectation file %s - contains a disable comment "%s" '
400                'that is in another disable block.' %
401                (expectation_file, stripped_line))
402          in_disable_block = True
403          disable_block_reason = _GetDisableReasonFromComment(line)
404          disable_block_suffix = _GetFinderCommentSuffix(line)
405        elif _LineContainsEnableComment(line):
406          if not in_disable_block:
407            raise RuntimeError(
408                'Invalid expectation file %s - contains an enable comment "%s" '
409                'that is outside of a disable block.' %
410                (expectation_file, stripped_line))
411          in_disable_block = False
412        continue
413
414      current_expectation = self._CreateExpectationFromExpectationFileLine(
415          line, expectation_file)
416
417      if in_disable_block:
418        disable_annotated_expectations[current_expectation] = (
419            disable_block_suffix, disable_block_reason)
420      elif _LineContainsDisableComment(line):
421        disable_block_reason = _GetDisableReasonFromComment(line)
422        disable_block_suffix = _GetFinderCommentSuffix(line)
423        disable_annotated_expectations[current_expectation] = (
424            disable_block_suffix, disable_block_reason)
425    return disable_annotated_expectations
426
427  def _GetExpectationGroupsFromFileContent(
428      self, expectation_file: str, content: str
429  ) -> Tuple[Dict[str, Set[data_types.Expectation]], Dict[data_types.
430                                                          Expectation, str]]:
431    """Extracts all groups of expectations from an expectationfile.
432
433    Args:
434      expectation_file: A filepath pointing to an expectation file.
435      content: A string containing the contents of |expectation_file|.
436
437    Returns:
438      A tuple (group_to_expectations, expectation_to_group).
439      |group_to_expectations| is a dict of group names to sets of
440      data_type.Expectations that belong to that group. |expectation_to_group|
441      is the same, but mapped the other way from data_type.Expectations to group
442      names.
443    """
444    group_to_expectations = collections.defaultdict(set)
445    expectation_to_group = {}
446    group_name = None
447
448    for line in content.splitlines():
449      stripped_line = line.strip()
450      # Possibly starting/ending a group.
451      if _IsCommentOrBlankLine(stripped_line):
452        if _LineContainsGroupStartComment(stripped_line):
453          # Start of a new group.
454          if group_name:
455            raise RuntimeError(
456                'Invalid expectation file %s - contains a group comment "%s" '
457                'that is inside another group block.' %
458                (expectation_file, stripped_line))
459          group_name = _GetGroupNameFromCommentLine(stripped_line)
460        elif _LineContainsGroupEndComment(stripped_line):
461          # End of current group.
462          if not group_name:
463            raise RuntimeError(
464                'Invalid expectation file %s - contains a group comment "%s" '
465                'without a group start comment.' %
466                (expectation_file, stripped_line))
467          group_name = None
468      elif group_name:
469        # Currently in a group.
470        e = self._CreateExpectationFromExpectationFileLine(
471            stripped_line, expectation_file)
472        group_to_expectations[group_name].add(e)
473        expectation_to_group[e] = group_name
474      # If we aren't in a group, do nothing.
475    return group_to_expectations, expectation_to_group
476
477  def _CreateExpectationFromExpectationFileLine(self, line: str,
478                                                expectation_file: str
479                                                ) -> data_types.Expectation:
480    """Creates a data_types.Expectation from |line|.
481
482    Args:
483      line: A string containing a single line from an expectation file.
484      expectation_file: A filepath pointing to an expectation file |line| came
485          from.
486
487    Returns:
488      A data_types.Expectation containing the same information as |line|.
489    """
490    header = self._GetExpectationFileTagHeader(expectation_file)
491    single_line_content = header + line
492    list_parser = expectations_parser.TaggedTestListParser(single_line_content)
493    assert len(list_parser.expectations) == 1
494    typ_expectation = list_parser.expectations[0]
495    return data_types.Expectation(typ_expectation.test, typ_expectation.tags,
496                                  typ_expectation.raw_results,
497                                  typ_expectation.reason)
498
499  def _GetExpectationFileTagHeader(self, expectation_file: str) -> str:
500    """Gets the tag header used for expectation files.
501
502    Args:
503      expectation_file: A filepath pointing to an expectation file to get the
504          tag header from.
505
506    Returns:
507      A string containing an expectation file header, i.e. the comment block at
508      the top of the file defining possible tags and expected results.
509    """
510    raise NotImplementedError()
511
512  def ParseTaggedTestListContent(self, content: str
513                                 ) -> expectations_parser.TaggedTestListParser:
514    """Helper to parse typ expectation files.
515
516    This allows subclasses to avoid adding typ to PYTHONPATH.
517    """
518    return expectations_parser.TaggedTestListParser(content)
519
520  def FilterToKnownTags(self, tags: Iterable[str]) -> Set[str]:
521    """Filters |tags| to only include tags known to expectation files.
522
523    Args:
524      tags: An iterable of strings containing tags.
525
526    Returns:
527      A set containing the elements of |tags| with any tags that are not defined
528      in any expectation files removed.
529    """
530    return self._GetKnownTags() & set(tags)
531
532  def _GetKnownTags(self) -> Set[str]:
533    """Gets all known/defined tags from expectation files.
534
535    Returns:
536      A set of strings containing all known/defined tags from expectation files.
537    """
538    raise NotImplementedError()
539
540  def _FilterToMostSpecificTypTags(self, typ_tags: FrozenSet[str],
541                                   expectation_file: str) -> FrozenSet[str]:
542    """Filters |typ_tags| to the most specific set.
543
544    Assumes that the tags in |expectation_file| are ordered from least specific
545    to most specific within each tag group.
546
547    Args:
548      typ_tags: A frozenset of strings containing the typ tags to filter.
549      expectations_file: A string containing a filepath pointing to the
550          expectation file to filter tags with.
551
552    Returns:
553      A frozenset containing the contents of |typ_tags| with only the most
554      specific tag from each group remaining.
555    """
556    # The logic for this function was lifted from the GPU/Blink flake finders,
557    # so there may be room to share code between the two.
558
559    if expectation_file not in self._cached_tag_groups:
560      with open(expectation_file, encoding='utf-8') as infile:
561        contents = infile.read()
562      tag_groups = []
563      for match in TAG_GROUP_REGEX.findall(contents):
564        tag_groups.append(match.lower().strip().replace('#', '').split())
565      self._cached_tag_groups[expectation_file] = tag_groups
566    tag_groups = self._cached_tag_groups[expectation_file]
567
568    num_matches = 0
569    tags_in_same_group = collections.defaultdict(list)
570    for tag in typ_tags:
571      for index, tag_group in enumerate(tag_groups):
572        if tag in tag_group:
573          tags_in_same_group[index].append(tag)
574          num_matches += 1
575          break
576    if num_matches != len(typ_tags):
577      all_tags = set()
578      for group in tag_groups:
579        all_tags |= set(group)
580      raise RuntimeError('Found tags not in expectation file %s: %s' %
581                         (expectation_file, ' '.join(set(typ_tags) - all_tags)))
582
583    filtered_tags = set()
584    for index, tags in tags_in_same_group.items():
585      if len(tags) == 1:
586        filtered_tags.add(tags[0])
587      else:
588        tag_group = tag_groups[index]
589        best_index = -1
590        for t in tags:
591          i = tag_group.index(t)
592          if i > best_index:
593            best_index = i
594        filtered_tags.add(tag_group[best_index])
595    return frozenset(filtered_tags)
596
597  def _ConsolidateKnownOverlappingTags(self, typ_tags: FrozenSet[str]
598                                       ) -> FrozenSet[str]:
599    """Consolidates tags that are known to overlap/cause issues.
600
601    One known example of this would be dual GPU machines that report tags for
602    both GPUs.
603    """
604    return typ_tags
605
606  def NarrowSemiStaleExpectationScope(
607      self, stale_expectation_map: data_types.TestExpectationMap) -> Set[str]:
608    """Narrows the scope of expectations in |stale_expectation_map|.
609
610    Expectations are modified such that they only apply to configurations that
611    need them, to the best extent possible. If scope narrowing is not possible,
612    e.g. the same hardware/software combination reports fully passing on one bot
613    but reports some failures on another bot, the expectation will not be
614    modified.
615
616    Args:
617      stale_expectation_map: A data_types.TestExpectationMap containing
618          semi-stale expectations.
619
620    Returns:
621      A set of strings containing URLs of bugs associated with the modified
622      expectations.
623    """
624    modified_urls = set()
625    cached_disable_annotated_expectations = {}
626    for expectation_file, e, builder_map in (
627        stale_expectation_map.IterBuilderStepMaps()):
628      # Check if the current annotation has scope narrowing disabled.
629      if expectation_file not in cached_disable_annotated_expectations:
630        with open(expectation_file, encoding='utf-8') as infile:
631          disable_annotated_expectations = (
632              self._GetDisableAnnotatedExpectationsFromFile(
633                  expectation_file, infile.read()))
634          cached_disable_annotated_expectations[
635              expectation_file] = disable_annotated_expectations
636      disable_block_suffix, disable_block_reason = (
637          cached_disable_annotated_expectations[expectation_file].get(
638              e, ('', '')))
639      if _DisableSuffixIsRelevant(disable_block_suffix, RemovalType.NARROWING):
640        logging.info(
641            'Skipping semi-stale narrowing check for expectation %s since it '
642            'has a narrowing disable annotation with reason %s',
643            e.AsExpectationFileString(), disable_block_reason)
644        continue
645
646      skip_to_next_expectation = False
647
648      pass_tag_sets = set()
649      fail_tag_sets = set()
650      # Determine which tags sets failures can occur on vs. tag sets that
651      # don't have any failures.
652      for builder, step, build_stats in builder_map.IterBuildStats():
653        if len(build_stats.tag_sets) > 1:
654          # This shouldn't really be happening during normal operation, but is
655          # expected to happen if a configuration changes, e.g. an OS was
656          # upgraded. In these cases, the old data will eventually age out and
657          # we will stop getting multiple tag sets.
658          logging.warning(
659              'Step %s on builder %s produced multiple tag sets: %s. Not '
660              'narrowing expectation scope for expectation %s.', step, builder,
661              build_stats.tag_sets, e.AsExpectationFileString())
662          skip_to_next_expectation = True
663          break
664        if build_stats.NeverNeededExpectation(e):
665          pass_tag_sets |= build_stats.tag_sets
666        else:
667          fail_tag_sets |= build_stats.tag_sets
668      if skip_to_next_expectation:
669        continue
670
671      # Remove all instances of tags that are shared between all sets other than
672      # the tags that were used by the expectation, as they are redundant.
673      common_tags = set()
674      for ts in pass_tag_sets:
675        common_tags |= ts
676        # We only need one initial tag set, but sets do not have a way of
677        # retrieving a single element other than pop(), which removes the
678        # element, which we don't want.
679        break
680      for ts in pass_tag_sets | fail_tag_sets:
681        common_tags &= ts
682      common_tags -= e.tags
683      pass_tag_sets = {ts - common_tags for ts in pass_tag_sets}
684      fail_tag_sets = {ts - common_tags for ts in fail_tag_sets}
685
686      # Calculate new tag sets that should be functionally equivalent to the
687      # single, more broad tag set that we are replacing. This is done by
688      # checking if the intersection between any pairs of fail tag sets are
689      # still distinct from any pass tag sets, i.e. if the intersection between
690      # fail tag sets is still a valid fail tag set. If so, the original sets
691      # are replaced by the intersection.
692      new_tag_sets = set()
693      covered_fail_tag_sets = set()
694      for fail_tags in fail_tag_sets:
695        if any(fail_tags <= pt for pt in pass_tag_sets):
696          logging.warning(
697              'Unable to determine what makes failing configs unique for %s, '
698              'not narrowing expectation scope.', e.AsExpectationFileString())
699          skip_to_next_expectation = True
700          break
701        if fail_tags in covered_fail_tag_sets:
702          continue
703        tag_set_to_add = fail_tags
704        for ft in fail_tag_sets:
705          if ft in covered_fail_tag_sets:
706            continue
707          intersection = tag_set_to_add & ft
708          if any(intersection <= pt for pt in pass_tag_sets):
709            # Intersection is too small, as it also covers a passing tag set.
710            continue
711          if any(intersection <= cft for cft in covered_fail_tag_sets):
712            # Both the intersection and some tag set from new_tag_sets
713            # apply to the same original failing tag set,
714            # which means if we add the intersection to new_tag_sets,
715            # they will conflict on the bot from the original failing tag set.
716            # The above check works because new_tag_sets and
717            # covered_fail_tag_sets are updated together below.
718            continue
719          tag_set_to_add = intersection
720        new_tag_sets.add(tag_set_to_add)
721        covered_fail_tag_sets.update(cft for cft in fail_tag_sets
722                                     if tag_set_to_add <= cft)
723      if skip_to_next_expectation:
724        continue
725
726      # Remove anything we know could be problematic, e.g. causing expectation
727      # file parsing errors.
728      new_tag_sets = {
729          self._ConsolidateKnownOverlappingTags(nts)
730          for nts in new_tag_sets
731      }
732      new_tag_sets = {
733          self._FilterToMostSpecificTypTags(nts, expectation_file)
734          for nts in new_tag_sets
735      }
736
737      # Replace the existing expectation with our new ones.
738      with open(expectation_file, encoding='utf-8') as infile:
739        file_contents = infile.read()
740      line, _ = self._GetExpectationLine(e, file_contents, expectation_file)
741      modified_urls |= set(e.bug.split())
742      expectation_strs = []
743      for new_tags in new_tag_sets:
744        expectation_copy = copy.copy(e)
745        expectation_copy.tags = new_tags
746        expectation_strs.append(expectation_copy.AsExpectationFileString())
747      expectation_strs.sort()
748      replacement_lines = '\n'.join(expectation_strs)
749      file_contents = file_contents.replace(line, replacement_lines)
750      with open(expectation_file, 'w', newline='', encoding='utf-8') as outfile:
751        outfile.write(file_contents)
752
753    return modified_urls
754
755  def _GetExpectationLine(self, expectation: data_types.Expectation,
756                          file_contents: str, expectation_file: str
757                          ) -> Union[Tuple[None, None], Tuple[str, int]]:
758    """Gets the line and line number of |expectation| in |file_contents|.
759
760    Args:
761      expectation: A data_types.Expectation.
762      file_contents: A string containing the contents read from an expectation
763          file.
764      expectation_file: A string containing the path to the expectation file
765          that |file_contents| came from.
766
767    Returns:
768      A tuple (line, line_number). |line| is a string containing the exact line
769      in |file_contents| corresponding to |expectation|. |line_number| is an int
770      corresponding to where |line| is in |file_contents|. |line_number| may be
771      off if the file on disk has changed since |file_contents| was read. If a
772      corresponding line cannot be found, both |line| and |line_number| are
773      None.
774    """
775    # We have all the information necessary to recreate the expectation line and
776    # line number can be pulled during the initial expectation parsing. However,
777    # the information we have is not necessarily in the same order as the
778    # text file (e.g. tag ordering), and line numbers can change pretty
779    # dramatically between the initial parse and now due to stale expectations
780    # being removed. So, parse this way in order to improve the user experience.
781    file_lines = file_contents.splitlines()
782    for line_number, line in enumerate(file_lines):
783      if _IsCommentOrBlankLine(line.strip()):
784        continue
785      current_expectation = self._CreateExpectationFromExpectationFileLine(
786          line, expectation_file)
787      if expectation == current_expectation:
788        return line, line_number + 1
789    return None, None
790
791  def FindOrphanedBugs(self, affected_urls: Iterable[str]) -> Set[str]:
792    """Finds cases where expectations for bugs no longer exist.
793
794    Args:
795      affected_urls: An iterable of affected bug URLs, as returned by functions
796          such as RemoveExpectationsFromFile.
797
798    Returns:
799      A set containing a subset of |affected_urls| who no longer have any
800      associated expectations in any expectation files.
801    """
802    seen_bugs = set()
803
804    expectation_files = self.GetExpectationFilepaths()
805
806    for ef in expectation_files:
807      with open(ef, encoding='utf-8') as infile:
808        contents = infile.read()
809      for url in affected_urls:
810        if url in seen_bugs:
811          continue
812        if url in contents:
813          seen_bugs.add(url)
814    return set(affected_urls) - seen_bugs
815
816  def GetExpectationFilepaths(self) -> List[str]:
817    """Gets all the filepaths to expectation files of interest.
818
819    Returns:
820      A list of strings, each element being a filepath pointing towards an
821      expectation file.
822    """
823    raise NotImplementedError()
824
825
826def _LineContainsGroupStartComment(line: str) -> bool:
827  return FINDER_GROUP_COMMENT_START in line
828
829
830def _LineContainsGroupEndComment(line: str) -> bool:
831  return FINDER_GROUP_COMMENT_END in line
832
833
834def _LineContainsDisableComment(line: str) -> bool:
835  return FINDER_DISABLE_COMMENT_BASE in line
836
837
838def _LineContainsEnableComment(line: str) -> bool:
839  return FINDER_ENABLE_COMMENT_BASE in line
840
841
842def _GetGroupNameFromCommentLine(line: str) -> str:
843  """Gets the group name from the finder comment on the given line."""
844  assert FINDER_GROUP_COMMENT_START in line
845  uncommented_line = line.lstrip('#').strip()
846  split_line = uncommented_line.split(maxsplit=1)
847  if len(split_line) != 2:
848    raise RuntimeError('Given line %s did not have a group name.' % line)
849  return split_line[1]
850
851
852def _GetFinderCommentSuffix(line: str) -> str:
853  """Gets the suffix of the finder comment on the given line.
854
855  Examples:
856    'foo  # finder:disable' -> ''
857    'foo  # finder:disable-stale some_reason' -> '-stale'
858  """
859  target_str = None
860  if _LineContainsDisableComment(line):
861    target_str = FINDER_DISABLE_COMMENT_BASE
862  elif _LineContainsEnableComment(line):
863    target_str = FINDER_ENABLE_COMMENT_BASE
864  else:
865    raise RuntimeError('Given line %s did not have a finder comment.' % line)
866  line = line[line.find(target_str):]
867  line = line.split()[0]
868  suffix = line.replace(target_str, '')
869  assert suffix in ALL_FINDER_DISABLE_SUFFIXES
870  return suffix
871
872
873def _LineContainsRelevantDisableComment(line: str, removal_type: str) -> bool:
874  """Returns whether the given line contains a relevant disable comment.
875
876  Args:
877    line: A string containing the line to check.
878    removal_type: A RemovalType enum corresponding to the type of expectations
879        being removed.
880
881  Returns:
882    A bool denoting whether |line| contains a relevant disable comment given
883    |removal_type|.
884  """
885  if FINDER_DISABLE_COMMENT_GENERAL in line:
886    return True
887  if FINDER_DISABLE_COMMENT_BASE + removal_type in line:
888    return True
889  return False
890
891
892def _DisableSuffixIsRelevant(suffix: str, removal_type: str) -> bool:
893  """Returns whether the given suffix is relevant given the removal type.
894
895  Args:
896    suffix: A string containing a disable comment suffix.
897    removal_type: A RemovalType enum corresponding to the type of expectations
898        being removed.
899
900  Returns:
901    True if suffix is relevant and its disable request should be honored.
902  """
903  if suffix == FINDER_COMMENT_SUFFIX_GENERAL:
904    return True
905  if suffix == removal_type:
906    return True
907  return False
908
909
910def _GetDisableReasonFromComment(line: str) -> str:
911  suffix = _GetFinderCommentSuffix(line)
912  return line.split(FINDER_DISABLE_COMMENT_BASE + suffix, 1)[1].strip()
913
914
915def _IsCommentOrBlankLine(line: str) -> bool:
916  return not line or line.startswith('#')
917
918
919def _ExpectationPartOfNonRemovableGroup(
920    current_expectation: data_types.Expectation,
921    group_to_expectations: Dict[str, Set[data_types.Expectation]],
922    expectation_to_group: Dict[data_types.Expectation, str],
923    removable_expectations: List[data_types.Expectation]):
924  """Determines if the given expectation is part of a non-removable group.
925
926  This is the case if the expectation is part of a group, but not all
927  expectations in that group are marked as removable.
928
929  Args:
930    current_expectation: A data_types.Expectation that is being checked.
931    group_to_expectations: A dict mapping group names to sets of expectations
932        contained within that group.
933    expectation_to_group: A dict mapping an expectation to the group name it
934        belongs to.
935    removable_expectations: A list of all expectations that are removable.
936  """
937  # Since we'll only ever be using this to check for inclusion, use a set
938  # for efficiency.
939  removable_expectations = set(removable_expectations)
940
941  group_name = expectation_to_group.get(current_expectation)
942  if not group_name:
943    return False
944
945  all_expectations_in_group = group_to_expectations[group_name]
946  group_removable = all_expectations_in_group <= removable_expectations
947  return not group_removable
948
949
950def _RemoveStaleComments(content: str, removed_lines: Set[int],
951                         header_length: int) -> str:
952  """Attempts to remove stale contents from the given expectation file content.
953
954  Args:
955    content: A string containing the contents of an expectation file.
956    removed_lines: A set of ints denoting which line numbers were removed in
957        the process of creating |content|.
958    header_length: An int denoting how many lines long the tag header is.
959
960  Returns:
961    A copy of |content| with various stale comments removed, e.g. group blocks
962    if the group has been removed.
963  """
964  # Look for the case where we've removed an entire block of expectations that
965  # were preceded by a comment, which we should remove.
966  comment_line_numbers_to_remove = []
967  split_content = content.splitlines(True)
968  for rl in removed_lines:
969    found_trailing_annotation = False
970    found_starting_annotation = False
971    # Check for the end of the file, a blank line, or a comment after the block
972    # we've removed.
973    if rl < len(split_content):
974      stripped_line = split_content[rl].strip()
975      if stripped_line and not stripped_line.startswith('#'):
976        # We found an expectation, so the entire expectation block wasn't
977        # removed.
978        continue
979      if any(annotation in stripped_line
980             for annotation in ALL_FINDER_END_ANNOTATION_BASES):
981        found_trailing_annotation = True
982    # Look for a comment block immediately preceding the block we removed.
983    comment_line_number = rl - 1
984    while comment_line_number != header_length - 1:
985      stripped_line = split_content[comment_line_number].strip()
986      if stripped_line.startswith('#'):
987        # If we find what should be a trailing annotation, stop immediately so
988        # we don't accidentally remove it and create an orphan earlier in the
989        # file.
990        if any(annotation in stripped_line
991               for annotation in ALL_FINDER_END_ANNOTATION_BASES):
992          break
993        if any(annotation in stripped_line
994               for annotation in ALL_FINDER_START_ANNOTATION_BASES):
995          # If we've already found a starting annotation, skip past this line.
996          # This is to handle the case of nested annotations, e.g. a
997          # disable-narrowing block inside of a group block. We'll find the
998          # inner-most block here and remove it. Any outer blocks will be
999          # removed as part of the lingering stale annotation removal later on.
1000          # If we don't skip past these outer annotations, then we get left with
1001          # orphaned trailing annotations.
1002          if found_starting_annotation:
1003            comment_line_number -= 1
1004            continue
1005          found_starting_annotation = True
1006          # If we found a starting annotation but not a trailing annotation, we
1007          # shouldn't remove the starting one, as that would cause the trailing
1008          # one that is later in the file to be orphaned. We also don't want to
1009          # continue and remove comments above that since it is assumedly still
1010          # valid.
1011          if found_starting_annotation and not found_trailing_annotation:
1012            break
1013        comment_line_numbers_to_remove.append(comment_line_number)
1014        comment_line_number -= 1
1015      else:
1016        break
1017    # In the event that we found both a start and trailing annotation, we need
1018    # to also remove the trailing one.
1019    if found_trailing_annotation and found_starting_annotation:
1020      comment_line_numbers_to_remove.append(rl)
1021
1022  # Actually remove the comments we found above.
1023  for i in comment_line_numbers_to_remove:
1024    split_content[i] = ''
1025  if comment_line_numbers_to_remove:
1026    content = ''.join(split_content)
1027
1028  # Remove any lingering cases of stale annotations that we can easily detect.
1029  for regex in ALL_STALE_COMMENT_REGEXES:
1030    for match in regex.findall(content):
1031      content = content.replace(match, '')
1032
1033  return content
1034