• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2020 The Chromium Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4"""Methods related to test expectations/expectation files."""
5
6from __future__ import print_function
7
8import collections
9import copy
10import datetime
11import logging
12import os
13import re
14import subprocess
15import sys
16from typing import Dict, FrozenSet, Iterable, List, Optional, Set, Tuple, Union
17
18import six
19
20from typ import expectations_parser
21from unexpected_passes_common import data_types
22from unexpected_passes_common import result_output
23
24FINDER_DISABLE_COMMENT_BASE = 'finder:disable'
25FINDER_ENABLE_COMMENT_BASE = 'finder:enable'
26FINDER_COMMENT_SUFFIX_GENERAL = '-general'
27FINDER_COMMENT_SUFFIX_STALE = '-stale'
28FINDER_COMMENT_SUFFIX_UNUSED = '-unused'
29FINDER_COMMENT_SUFFIX_NARROWING = '-narrowing'
30
31FINDER_GROUP_COMMENT_START = 'finder:group-start'
32FINDER_GROUP_COMMENT_END = 'finder:group-end'
33
34ALL_FINDER_START_ANNOTATION_BASES = frozenset([
35    FINDER_DISABLE_COMMENT_BASE,
36    FINDER_GROUP_COMMENT_START,
37])
38
39ALL_FINDER_END_ANNOTATION_BASES = frozenset([
40    FINDER_ENABLE_COMMENT_BASE,
41    FINDER_GROUP_COMMENT_END,
42])
43
44ALL_FINDER_DISABLE_SUFFIXES = frozenset([
45    FINDER_COMMENT_SUFFIX_GENERAL,
46    FINDER_COMMENT_SUFFIX_STALE,
47    FINDER_COMMENT_SUFFIX_UNUSED,
48    FINDER_COMMENT_SUFFIX_NARROWING,
49])
50
51FINDER_DISABLE_COMMENT_GENERAL = (FINDER_DISABLE_COMMENT_BASE +
52                                  FINDER_COMMENT_SUFFIX_GENERAL)
53FINDER_DISABLE_COMMENT_STALE = (FINDER_DISABLE_COMMENT_BASE +
54                                FINDER_COMMENT_SUFFIX_STALE)
55FINDER_DISABLE_COMMENT_UNUSED = (FINDER_DISABLE_COMMENT_BASE +
56                                 FINDER_COMMENT_SUFFIX_UNUSED)
57FINDER_DISABLE_COMMENT_NARROWING = (FINDER_DISABLE_COMMENT_BASE +
58                                    FINDER_COMMENT_SUFFIX_NARROWING)
59FINDER_ENABLE_COMMENT_GENERAL = (FINDER_ENABLE_COMMENT_BASE +
60                                 FINDER_COMMENT_SUFFIX_GENERAL)
61FINDER_ENABLE_COMMENT_STALE = (FINDER_ENABLE_COMMENT_BASE +
62                               FINDER_COMMENT_SUFFIX_STALE)
63FINDER_ENABLE_COMMENT_UNUSED = (FINDER_ENABLE_COMMENT_BASE +
64                                FINDER_COMMENT_SUFFIX_UNUSED)
65FINDER_ENABLE_COMMENT_NARROWING = (FINDER_ENABLE_COMMENT_BASE +
66                                   FINDER_COMMENT_SUFFIX_NARROWING)
67
68FINDER_DISABLE_COMMENTS = frozenset([
69    FINDER_DISABLE_COMMENT_GENERAL,
70    FINDER_DISABLE_COMMENT_STALE,
71    FINDER_DISABLE_COMMENT_UNUSED,
72    FINDER_DISABLE_COMMENT_NARROWING,
73])
74
75FINDER_ENABLE_COMMENTS = frozenset([
76    FINDER_ENABLE_COMMENT_GENERAL,
77    FINDER_ENABLE_COMMENT_STALE,
78    FINDER_ENABLE_COMMENT_UNUSED,
79    FINDER_ENABLE_COMMENT_NARROWING,
80])
81
82FINDER_ENABLE_DISABLE_PAIRS = frozenset([
83    (FINDER_DISABLE_COMMENT_GENERAL, FINDER_ENABLE_COMMENT_GENERAL),
84    (FINDER_DISABLE_COMMENT_STALE, FINDER_ENABLE_COMMENT_STALE),
85    (FINDER_DISABLE_COMMENT_UNUSED, FINDER_ENABLE_COMMENT_UNUSED),
86    (FINDER_DISABLE_COMMENT_NARROWING, FINDER_ENABLE_COMMENT_NARROWING),
87])
88
89FINDER_GROUP_COMMENTS = frozenset([
90    FINDER_GROUP_COMMENT_START,
91    FINDER_GROUP_COMMENT_END,
92])
93
94ALL_FINDER_COMMENTS = frozenset(FINDER_DISABLE_COMMENTS
95                                | FINDER_ENABLE_COMMENTS
96                                | FINDER_GROUP_COMMENTS)
97
98GIT_BLAME_REGEX = re.compile(
99    r'^[\w\s]+\(.+(?P<date>\d\d\d\d-\d\d-\d\d)[^\)]+\)(?P<content>.*)$',
100    re.DOTALL)
101TAG_GROUP_REGEX = re.compile(r'# tags: \[([^\]]*)\]', re.MULTILINE | re.DOTALL)
102
103# Annotation comment start (with optional leading whitespace) pattern.
104ANNOTATION_COMMENT_START_PATTERN = r' *# '
105# Pattern for matching optional description text after an annotation.
106ANNOTATION_OPTIONAL_TRAILING_TEXT_PATTERN = r'[^\n]*\n'
107# Pattern for matching required description text after an annotation.
108ANNOTATION_REQUIRED_TRAILING_TEXT_PATTERN = r'[^\n]+\n'
109# Pattern for matching blank or comment lines.
110BLANK_OR_COMMENT_LINES_PATTERN = r'(?:\s*| *#[^\n]*\n)*'
111# Looks for cases of the group start and end comments with nothing but optional
112# whitespace between them.
113ALL_STALE_COMMENT_REGEXES = set()
114for start_comment, end_comment in FINDER_ENABLE_DISABLE_PAIRS:
115  ALL_STALE_COMMENT_REGEXES.add(
116      re.compile(
117          ANNOTATION_COMMENT_START_PATTERN + start_comment +
118          ANNOTATION_OPTIONAL_TRAILING_TEXT_PATTERN +
119          BLANK_OR_COMMENT_LINES_PATTERN + ANNOTATION_COMMENT_START_PATTERN +
120          end_comment + r'\n', re.MULTILINE | re.DOTALL))
121ALL_STALE_COMMENT_REGEXES.add(
122    re.compile(
123        ANNOTATION_COMMENT_START_PATTERN + FINDER_GROUP_COMMENT_START +
124        ANNOTATION_REQUIRED_TRAILING_TEXT_PATTERN +
125        BLANK_OR_COMMENT_LINES_PATTERN + ANNOTATION_COMMENT_START_PATTERN +
126        FINDER_GROUP_COMMENT_END + r'\n', re.MULTILINE | re.DOTALL))
127ALL_STALE_COMMENT_REGEXES = frozenset(ALL_STALE_COMMENT_REGEXES)
128
129# pylint: disable=useless-object-inheritance
130
131_registered_instance = None
132
133
134def GetInstance() -> 'Expectations':
135  return _registered_instance
136
137
138def RegisterInstance(instance: 'Expectations') -> None:
139  global _registered_instance
140  assert _registered_instance is None
141  assert isinstance(instance, Expectations)
142  _registered_instance = instance
143
144
145def ClearInstance() -> None:
146  global _registered_instance
147  _registered_instance = None
148
149
150class RemovalType(object):
151  STALE = FINDER_COMMENT_SUFFIX_STALE
152  UNUSED = FINDER_COMMENT_SUFFIX_UNUSED
153  NARROWING = FINDER_COMMENT_SUFFIX_NARROWING
154
155
156class Expectations(object):
157  def __init__(self):
158    self._cached_tag_groups = {}
159
160  def CreateTestExpectationMap(
161      self, expectation_files: Optional[Union[str, List[str]]],
162      tests: Optional[Iterable[str]],
163      grace_period: int) -> data_types.TestExpectationMap:
164    """Creates an expectation map based off a file or list of tests.
165
166    Args:
167      expectation_files: A filepath or list of filepaths to expectation files to
168          read from, or None. If a filepath is specified, |tests| must be None.
169      tests: An iterable of strings containing test names to check. If
170          specified, |expectation_file| must be None.
171      grace_period: An int specifying how many days old an expectation must
172          be in order to be parsed, i.e. how many days old an expectation must
173          be before it is a candidate for removal/modification.
174
175    Returns:
176      A data_types.TestExpectationMap, although all its BuilderStepMap contents
177      will be empty.
178    """
179
180    def AddContentToMap(content: str, ex_map: data_types.TestExpectationMap,
181                        expectation_file_name: str) -> None:
182      list_parser = expectations_parser.TaggedTestListParser(content)
183      expectations_for_file = ex_map.setdefault(
184          expectation_file_name, data_types.ExpectationBuilderMap())
185      logging.debug('Parsed %d expectations', len(list_parser.expectations))
186      for e in list_parser.expectations:
187        if 'Skip' in e.raw_results:
188          continue
189        # Expectations that only have a Pass expectation (usually used to
190        # override a broader, failing expectation) are not handled by the
191        # unexpected pass finder, so ignore those.
192        if e.raw_results == ['Pass']:
193          continue
194        expectation = data_types.Expectation(e.test, e.tags, e.raw_results,
195                                             e.reason)
196        assert expectation not in expectations_for_file
197        expectations_for_file[expectation] = data_types.BuilderStepMap()
198
199    logging.info('Creating test expectation map')
200    assert expectation_files or tests
201    assert not (expectation_files and tests)
202
203    expectation_map = data_types.TestExpectationMap()
204
205    if expectation_files:
206      if not isinstance(expectation_files, list):
207        expectation_files = [expectation_files]
208      for ef in expectation_files:
209        # Normalize to '/' as the path separator.
210        expectation_file_name = os.path.normpath(ef).replace(os.path.sep, '/')
211        content = self._GetNonRecentExpectationContent(expectation_file_name,
212                                                       grace_period)
213        AddContentToMap(content, expectation_map, expectation_file_name)
214    else:
215      expectation_file_name = ''
216      content = '# results: [ RetryOnFailure ]\n'
217      for t in tests:
218        content += '%s [ RetryOnFailure ]\n' % t
219      AddContentToMap(content, expectation_map, expectation_file_name)
220
221    return expectation_map
222
223  def _GetNonRecentExpectationContent(self, expectation_file_path: str,
224                                      num_days: int) -> str:
225    """Gets content from |expectation_file_path| older than |num_days| days.
226
227    Args:
228      expectation_file_path: A string containing a filepath pointing to an
229          expectation file.
230      num_days: An int containing how old an expectation in the given
231          expectation file must be to be included.
232
233    Returns:
234      The contents of the expectation file located at |expectation_file_path|
235      as a string with any recent expectations removed.
236    """
237    num_days = datetime.timedelta(days=num_days)
238    content = ''
239    # `git blame` output is normally in the format:
240    # revision optional_filename (author date time timezone lineno) line_content
241    # The --porcelain option is meant to be more machine readable, but is much
242    # more difficult to parse for what we need to do here. In order to
243    # guarantee that the filename won't be included in the output (by default,
244    # it will be shown if there is content from a renamed file), pass -c to
245    # use the same format as `git annotate`, which is:
246    # revision (author date time timezone lineno)line_content
247    # (Note the lack of space between the ) and the content).
248    cmd = ['git', 'blame', '-c', expectation_file_path]
249    with open(os.devnull, 'w') as devnull:
250      blame_output = subprocess.check_output(cmd,
251                                             stderr=devnull).decode('utf-8')
252    for line in blame_output.splitlines(True):
253      match = GIT_BLAME_REGEX.match(line)
254      assert match
255      date = match.groupdict()['date']
256      line_content = match.groupdict()['content']
257      stripped_line_content = line_content.strip()
258      # Auto-add comments and blank space, otherwise only add if the grace
259      # period has expired.
260      if not stripped_line_content or stripped_line_content.startswith('#'):
261        content += line_content
262      else:
263        if six.PY2:
264          date_parts = date.split('-')
265          date = datetime.date(year=int(date_parts[0]),
266                               month=int(date_parts[1]),
267                               day=int(date_parts[2]))
268        else:
269          date = datetime.date.fromisoformat(date)
270        date_diff = datetime.date.today() - date
271        if date_diff > num_days:
272          content += line_content
273        else:
274          logging.debug('Omitting expectation %s because it is too new',
275                        line_content.rstrip())
276    return content
277
278  def RemoveExpectationsFromFile(self,
279                                 expectations: List[data_types.Expectation],
280                                 expectation_file: str,
281                                 removal_type: str) -> Set[str]:
282    """Removes lines corresponding to |expectations| from |expectation_file|.
283
284    Ignores any lines that match but are within a disable block or have an
285    inline disable comment.
286
287    Args:
288      expectations: A list of data_types.Expectations to remove.
289      expectation_file: A filepath pointing to an expectation file to remove
290          lines from.
291      removal_type: A RemovalType enum corresponding to the type of expectations
292          being removed.
293
294    Returns:
295      A set of strings containing URLs of bugs associated with the removed
296      expectations.
297    """
298
299    with open(expectation_file) as f:
300      input_contents = f.read()
301
302    group_to_expectations, expectation_to_group = (
303        self._GetExpectationGroupsFromFileContent(expectation_file,
304                                                  input_contents))
305    disable_annotated_expectations = (
306        self._GetDisableAnnotatedExpectationsFromFile(expectation_file,
307                                                      input_contents))
308
309    output_contents = ''
310    removed_urls = set()
311    removed_lines = set()
312    num_removed_lines = 0
313    for line_number, line in enumerate(input_contents.splitlines(True)):
314      # Auto-add any comments or empty lines
315      stripped_line = line.strip()
316      if _IsCommentOrBlankLine(stripped_line):
317        output_contents += line
318        continue
319
320      current_expectation = self._CreateExpectationFromExpectationFileLine(
321          line, expectation_file)
322
323      # Add any lines containing expectations that don't match any of the given
324      # expectations to remove.
325      if any(e for e in expectations if e == current_expectation):
326        # Skip any expectations that match if we're in a disable block or there
327        # is an inline disable comment.
328        disable_block_suffix, disable_block_reason = (
329            disable_annotated_expectations.get(current_expectation,
330                                               (None, None)))
331        if disable_block_suffix and _DisableSuffixIsRelevant(
332            disable_block_suffix, removal_type):
333          output_contents += line
334          logging.info(
335              'Would have removed expectation %s, but it is inside a disable '
336              'block or has an inline disable with reason %s', stripped_line,
337              disable_block_reason)
338        elif _ExpectationPartOfNonRemovableGroup(current_expectation,
339                                                 group_to_expectations,
340                                                 expectation_to_group,
341                                                 expectations):
342          output_contents += line
343          logging.info(
344              'Would have removed expectation %s, but it is part of group "%s" '
345              'whose members are not all removable.', stripped_line,
346              expectation_to_group[current_expectation])
347        else:
348          bug = current_expectation.bug
349          if bug:
350            # It's possible to have multiple whitespace-separated bugs per
351            # expectation, so treat each one separately.
352            removed_urls |= set(bug.split())
353          # Record that we've removed this line. By subtracting the number of
354          # lines we've already removed, we keep the line numbers relative to
355          # the content we're outputting rather than relative to the input
356          # content. This also has the effect of automatically compressing
357          # contiguous blocks of removal into a single line number.
358          removed_lines.add(line_number - num_removed_lines)
359          num_removed_lines += 1
360      else:
361        output_contents += line
362
363    header_length = len(
364        self._GetExpectationFileTagHeader(expectation_file).splitlines(True))
365    output_contents = _RemoveStaleComments(output_contents, removed_lines,
366                                           header_length)
367
368    with open(expectation_file, 'w') as f:
369      f.write(output_contents)
370
371    return removed_urls
372
373  def _GetDisableAnnotatedExpectationsFromFile(
374      self, expectation_file: str,
375      content: str) -> Dict[data_types.Expectation, Tuple[str, str]]:
376    """Extracts expectations which are affected by disable annotations.
377
378    Args:
379      expectation_file: A filepath pointing to an expectation file.
380      content: A string containing the contents of |expectation_file|.
381
382    Returns:
383      A dict mapping data_types.Expectation to (disable_suffix, disable_reason).
384      If an expectation is present in this dict, it is affected by a disable
385      annotation of some sort. |disable_suffix| is a string specifying which
386      type of annotation is applicable, while |disable_reason| is a string
387      containing the comment/reason why the disable annotation is present.
388    """
389    in_disable_block = False
390    disable_block_reason = ''
391    disable_block_suffix = ''
392    disable_annotated_expectations = {}
393    for line in content.splitlines(True):
394      stripped_line = line.strip()
395      # Look for cases of disable/enable blocks.
396      if _IsCommentOrBlankLine(stripped_line):
397        # Only allow one enable/disable per line.
398        assert len([c for c in ALL_FINDER_COMMENTS if c in line]) <= 1
399        if _LineContainsDisableComment(line):
400          if in_disable_block:
401            raise RuntimeError(
402                'Invalid expectation file %s - contains a disable comment "%s" '
403                'that is in another disable block.' %
404                (expectation_file, stripped_line))
405          in_disable_block = True
406          disable_block_reason = _GetDisableReasonFromComment(line)
407          disable_block_suffix = _GetFinderCommentSuffix(line)
408        elif _LineContainsEnableComment(line):
409          if not in_disable_block:
410            raise RuntimeError(
411                'Invalid expectation file %s - contains an enable comment "%s" '
412                'that is outside of a disable block.' %
413                (expectation_file, stripped_line))
414          in_disable_block = False
415        continue
416
417      current_expectation = self._CreateExpectationFromExpectationFileLine(
418          line, expectation_file)
419
420      if in_disable_block:
421        disable_annotated_expectations[current_expectation] = (
422            disable_block_suffix, disable_block_reason)
423      elif _LineContainsDisableComment(line):
424        disable_block_reason = _GetDisableReasonFromComment(line)
425        disable_block_suffix = _GetFinderCommentSuffix(line)
426        disable_annotated_expectations[current_expectation] = (
427            disable_block_suffix, disable_block_reason)
428    return disable_annotated_expectations
429
430  def _GetExpectationGroupsFromFileContent(
431      self, expectation_file: str, content: str
432  ) -> Tuple[Dict[str, Set[data_types.Expectation]], Dict[data_types.
433                                                          Expectation, str]]:
434    """Extracts all groups of expectations from an expectationfile.
435
436    Args:
437      expectation_file: A filepath pointing to an expectation file.
438      content: A string containing the contents of |expectation_file|.
439
440    Returns:
441      A tuple (group_to_expectations, expectation_to_group).
442      |group_to_expectations| is a dict of group names to sets of
443      data_type.Expectations that belong to that group. |expectation_to_group|
444      is the same, but mapped the other way from data_type.Expectations to group
445      names.
446    """
447    group_to_expectations = collections.defaultdict(set)
448    expectation_to_group = {}
449    group_name = None
450
451    for line in content.splitlines():
452      stripped_line = line.strip()
453      # Possibly starting/ending a group.
454      if _IsCommentOrBlankLine(stripped_line):
455        if _LineContainsGroupStartComment(stripped_line):
456          # Start of a new group.
457          if group_name:
458            raise RuntimeError(
459                'Invalid expectation file %s - contains a group comment "%s" '
460                'that is inside another group block.' %
461                (expectation_file, stripped_line))
462          group_name = _GetGroupNameFromCommentLine(stripped_line)
463        elif _LineContainsGroupEndComment(stripped_line):
464          # End of current group.
465          if not group_name:
466            raise RuntimeError(
467                'Invalid expectation file %s - contains a group comment "%s" '
468                'without a group start comment.' %
469                (expectation_file, stripped_line))
470          group_name = None
471      elif group_name:
472        # Currently in a group.
473        e = self._CreateExpectationFromExpectationFileLine(
474            stripped_line, expectation_file)
475        group_to_expectations[group_name].add(e)
476        expectation_to_group[e] = group_name
477      # If we aren't in a group, do nothing.
478    return group_to_expectations, expectation_to_group
479
480  def _CreateExpectationFromExpectationFileLine(self, line: str,
481                                                expectation_file: str
482                                                ) -> data_types.Expectation:
483    """Creates a data_types.Expectation from |line|.
484
485    Args:
486      line: A string containing a single line from an expectation file.
487      expectation_file: A filepath pointing to an expectation file |line| came
488          from.
489
490    Returns:
491      A data_types.Expectation containing the same information as |line|.
492    """
493    header = self._GetExpectationFileTagHeader(expectation_file)
494    single_line_content = header + line
495    list_parser = expectations_parser.TaggedTestListParser(single_line_content)
496    assert len(list_parser.expectations) == 1
497    typ_expectation = list_parser.expectations[0]
498    return data_types.Expectation(typ_expectation.test, typ_expectation.tags,
499                                  typ_expectation.raw_results,
500                                  typ_expectation.reason)
501
502  def _GetExpectationFileTagHeader(self, expectation_file: str) -> str:
503    """Gets the tag header used for expectation files.
504
505    Args:
506      expectation_file: A filepath pointing to an expectation file to get the
507          tag header from.
508
509    Returns:
510      A string containing an expectation file header, i.e. the comment block at
511      the top of the file defining possible tags and expected results.
512    """
513    raise NotImplementedError()
514
515  def ParseTaggedTestListContent(self, content: str
516                                 ) -> expectations_parser.TaggedTestListParser:
517    """Helper to parse typ expectation files.
518
519    This allows subclasses to avoid adding typ to PYTHONPATH.
520    """
521    return expectations_parser.TaggedTestListParser(content)
522
523  def FilterToKnownTags(self, tags: Iterable[str]) -> Set[str]:
524    """Filters |tags| to only include tags known to expectation files.
525
526    Args:
527      tags: An iterable of strings containing tags.
528
529    Returns:
530      A set containing the elements of |tags| with any tags that are not defined
531      in any expectation files removed.
532    """
533    return self._GetKnownTags() & set(tags)
534
535  def _GetKnownTags(self) -> Set[str]:
536    """Gets all known/defined tags from expectation files.
537
538    Returns:
539      A set of strings containing all known/defined tags from expectation files.
540    """
541    raise NotImplementedError()
542
543  def _FilterToMostSpecificTypTags(self, typ_tags: FrozenSet[str],
544                                   expectation_file: str) -> FrozenSet[str]:
545    """Filters |typ_tags| to the most specific set.
546
547    Assumes that the tags in |expectation_file| are ordered from least specific
548    to most specific within each tag group.
549
550    Args:
551      typ_tags: A frozenset of strings containing the typ tags to filter.
552      expectations_file: A string containing a filepath pointing to the
553          expectation file to filter tags with.
554
555    Returns:
556      A frozenset containing the contents of |typ_tags| with only the most
557      specific tag from each group remaining.
558    """
559    # The logic for this function was lifted from the GPU/Blink flake finders,
560    # so there may be room to share code between the two.
561
562    if expectation_file not in self._cached_tag_groups:
563      with open(expectation_file) as infile:
564        contents = infile.read()
565      tag_groups = []
566      for match in TAG_GROUP_REGEX.findall(contents):
567        tag_groups.append(match.lower().strip().replace('#', '').split())
568      self._cached_tag_groups[expectation_file] = tag_groups
569    tag_groups = self._cached_tag_groups[expectation_file]
570
571    num_matches = 0
572    tags_in_same_group = collections.defaultdict(list)
573    for tag in typ_tags:
574      for index, tag_group in enumerate(tag_groups):
575        if tag in tag_group:
576          tags_in_same_group[index].append(tag)
577          num_matches += 1
578          break
579    if num_matches != len(typ_tags):
580      all_tags = set()
581      for group in tag_groups:
582        all_tags |= set(group)
583      raise RuntimeError('Found tags not in expectation file %s: %s' %
584                         (expectation_file, ' '.join(set(typ_tags) - all_tags)))
585
586    filtered_tags = set()
587    for index, tags in tags_in_same_group.items():
588      if len(tags) == 1:
589        filtered_tags.add(tags[0])
590      else:
591        tag_group = tag_groups[index]
592        best_index = -1
593        for t in tags:
594          i = tag_group.index(t)
595          if i > best_index:
596            best_index = i
597        filtered_tags.add(tag_group[best_index])
598    return frozenset(filtered_tags)
599
600  def _ConsolidateKnownOverlappingTags(self, typ_tags: FrozenSet[str]
601                                       ) -> FrozenSet[str]:
602    """Consolidates tags that are known to overlap/cause issues.
603
604    One known example of this would be dual GPU machines that report tags for
605    both GPUs.
606    """
607    return typ_tags
608
609  def NarrowSemiStaleExpectationScope(
610      self, stale_expectation_map: data_types.TestExpectationMap) -> Set[str]:
611    """Narrows the scope of expectations in |stale_expectation_map|.
612
613    Expectations are modified such that they only apply to configurations that
614    need them, to the best extent possible. If scope narrowing is not possible,
615    e.g. the same hardware/software combination reports fully passing on one bot
616    but reports some failures on another bot, the expectation will not be
617    modified.
618
619    Args:
620      stale_expectation_map: A data_types.TestExpectationMap containing
621          semi-stale expectations.
622
623    Returns:
624      A set of strings containing URLs of bugs associated with the modified
625      expectations.
626    """
627    modified_urls = set()
628    cached_disable_annotated_expectations = {}
629    for expectation_file, e, builder_map in (
630        stale_expectation_map.IterBuilderStepMaps()):
631      # Check if the current annotation has scope narrowing disabled.
632      if expectation_file not in cached_disable_annotated_expectations:
633        with open(expectation_file) as infile:
634          disable_annotated_expectations = (
635              self._GetDisableAnnotatedExpectationsFromFile(
636                  expectation_file, infile.read()))
637          cached_disable_annotated_expectations[
638              expectation_file] = disable_annotated_expectations
639      disable_block_suffix, disable_block_reason = (
640          cached_disable_annotated_expectations[expectation_file].get(
641              e, ('', '')))
642      if _DisableSuffixIsRelevant(disable_block_suffix, RemovalType.NARROWING):
643        logging.info(
644            'Skipping semi-stale narrowing check for expectation %s since it '
645            'has a narrowing disable annotation with reason %s',
646            e.AsExpectationFileString(), disable_block_reason)
647        continue
648
649      skip_to_next_expectation = False
650
651      pass_tag_sets = set()
652      fail_tag_sets = set()
653      # Determine which tags sets failures can occur on vs. tag sets that
654      # don't have any failures.
655      for builder, step, build_stats in builder_map.IterBuildStats():
656        if len(build_stats.tag_sets) > 1:
657          # This shouldn't really be happening during normal operation, but is
658          # expected to happen if a configuration changes, e.g. an OS was
659          # upgraded. In these cases, the old data will eventually age out and
660          # we will stop getting multiple tag sets.
661          logging.warning(
662              'Step %s on builder %s produced multiple tag sets: %s. Not '
663              'narrowing expectation scope for expectation %s.', step, builder,
664              build_stats.tag_sets, e.AsExpectationFileString())
665          skip_to_next_expectation = True
666          break
667        if build_stats.NeverNeededExpectation(e):
668          pass_tag_sets |= build_stats.tag_sets
669        else:
670          fail_tag_sets |= build_stats.tag_sets
671      if skip_to_next_expectation:
672        continue
673
674      # Remove all instances of tags that are shared between all sets other than
675      # the tags that were used by the expectation, as they are redundant.
676      common_tags = set()
677      for ts in pass_tag_sets:
678        common_tags |= ts
679        # We only need one initial tag set, but sets do not have a way of
680        # retrieving a single element other than pop(), which removes the
681        # element, which we don't want.
682        break
683      for ts in pass_tag_sets | fail_tag_sets:
684        common_tags &= ts
685      common_tags -= e.tags
686      pass_tag_sets = {ts - common_tags for ts in pass_tag_sets}
687      fail_tag_sets = {ts - common_tags for ts in fail_tag_sets}
688
689      # Calculate new tag sets that should be functionally equivalent to the
690      # single, more broad tag set that we are replacing. This is done by
691      # checking if the intersection between any pairs of fail tag sets are
692      # still distinct from any pass tag sets, i.e. if the intersection between
693      # fail tag sets is still a valid fail tag set. If so, the original sets
694      # are replaced by the intersection.
695      new_tag_sets = set()
696      covered_fail_tag_sets = set()
697      for fail_tags in fail_tag_sets:
698        if any(fail_tags <= pt for pt in pass_tag_sets):
699          logging.warning(
700              'Unable to determine what makes failing configs unique for %s, '
701              'not narrowing expectation scope.', e.AsExpectationFileString())
702          skip_to_next_expectation = True
703          break
704        if fail_tags in covered_fail_tag_sets:
705          continue
706        tag_set_to_add = fail_tags
707        for ft in fail_tag_sets:
708          if ft in covered_fail_tag_sets:
709            continue
710          intersection = tag_set_to_add & ft
711          if any(intersection <= pt for pt in pass_tag_sets):
712            # Intersection is too small, as it also covers a passing tag set.
713            continue
714          if any(intersection <= cft for cft in covered_fail_tag_sets):
715            # Both the intersection and some tag set from new_tag_sets
716            # apply to the same original failing tag set,
717            # which means if we add the intersection to new_tag_sets,
718            # they will conflict on the bot from the original failing tag set.
719            # The above check works because new_tag_sets and
720            # covered_fail_tag_sets are updated together below.
721            continue
722          tag_set_to_add = intersection
723        new_tag_sets.add(tag_set_to_add)
724        covered_fail_tag_sets.update(cft for cft in fail_tag_sets
725                                     if tag_set_to_add <= cft)
726      if skip_to_next_expectation:
727        continue
728
729      # Remove anything we know could be problematic, e.g. causing expectation
730      # file parsing errors.
731      new_tag_sets = {
732          self._ConsolidateKnownOverlappingTags(nts)
733          for nts in new_tag_sets
734      }
735      new_tag_sets = {
736          self._FilterToMostSpecificTypTags(nts, expectation_file)
737          for nts in new_tag_sets
738      }
739
740      # Replace the existing expectation with our new ones.
741      with open(expectation_file) as infile:
742        file_contents = infile.read()
743      line, _ = self._GetExpectationLine(e, file_contents, expectation_file)
744      modified_urls |= set(e.bug.split())
745      expectation_strs = []
746      for new_tags in new_tag_sets:
747        expectation_copy = copy.copy(e)
748        expectation_copy.tags = new_tags
749        expectation_strs.append(expectation_copy.AsExpectationFileString())
750      expectation_strs.sort()
751      replacement_lines = '\n'.join(expectation_strs)
752      file_contents = file_contents.replace(line, replacement_lines)
753      with open(expectation_file, 'w') as outfile:
754        outfile.write(file_contents)
755
756    return modified_urls
757
758  def _GetExpectationLine(self, expectation: data_types.Expectation,
759                          file_contents: str, expectation_file: str
760                          ) -> Union[Tuple[None, None], Tuple[str, int]]:
761    """Gets the line and line number of |expectation| in |file_contents|.
762
763    Args:
764      expectation: A data_types.Expectation.
765      file_contents: A string containing the contents read from an expectation
766          file.
767      expectation_file: A string containing the path to the expectation file
768          that |file_contents| came from.
769
770    Returns:
771      A tuple (line, line_number). |line| is a string containing the exact line
772      in |file_contents| corresponding to |expectation|. |line_number| is an int
773      corresponding to where |line| is in |file_contents|. |line_number| may be
774      off if the file on disk has changed since |file_contents| was read. If a
775      corresponding line cannot be found, both |line| and |line_number| are
776      None.
777    """
778    # We have all the information necessary to recreate the expectation line and
779    # line number can be pulled during the initial expectation parsing. However,
780    # the information we have is not necessarily in the same order as the
781    # text file (e.g. tag ordering), and line numbers can change pretty
782    # dramatically between the initial parse and now due to stale expectations
783    # being removed. So, parse this way in order to improve the user experience.
784    file_lines = file_contents.splitlines()
785    for line_number, line in enumerate(file_lines):
786      if _IsCommentOrBlankLine(line.strip()):
787        continue
788      current_expectation = self._CreateExpectationFromExpectationFileLine(
789          line, expectation_file)
790      if expectation == current_expectation:
791        return line, line_number + 1
792    return None, None
793
794  def FindOrphanedBugs(self, affected_urls: Iterable[str]) -> Set[str]:
795    """Finds cases where expectations for bugs no longer exist.
796
797    Args:
798      affected_urls: An iterable of affected bug URLs, as returned by functions
799          such as RemoveExpectationsFromFile.
800
801    Returns:
802      A set containing a subset of |affected_urls| who no longer have any
803      associated expectations in any expectation files.
804    """
805    seen_bugs = set()
806
807    expectation_files = self.GetExpectationFilepaths()
808
809    for ef in expectation_files:
810      with open(ef) as infile:
811        contents = infile.read()
812      for url in affected_urls:
813        if url in seen_bugs:
814          continue
815        if url in contents:
816          seen_bugs.add(url)
817    return set(affected_urls) - seen_bugs
818
819  def GetExpectationFilepaths(self) -> List[str]:
820    """Gets all the filepaths to expectation files of interest.
821
822    Returns:
823      A list of strings, each element being a filepath pointing towards an
824      expectation file.
825    """
826    raise NotImplementedError()
827
828
829def _LineContainsGroupStartComment(line: str) -> bool:
830  return FINDER_GROUP_COMMENT_START in line
831
832
833def _LineContainsGroupEndComment(line: str) -> bool:
834  return FINDER_GROUP_COMMENT_END in line
835
836
837def _LineContainsDisableComment(line: str) -> bool:
838  return FINDER_DISABLE_COMMENT_BASE in line
839
840
841def _LineContainsEnableComment(line: str) -> bool:
842  return FINDER_ENABLE_COMMENT_BASE in line
843
844
845def _GetGroupNameFromCommentLine(line: str) -> str:
846  """Gets the group name from the finder comment on the given line."""
847  assert FINDER_GROUP_COMMENT_START in line
848  uncommented_line = line.lstrip('#').strip()
849  split_line = uncommented_line.split(maxsplit=1)
850  if len(split_line) != 2:
851    raise RuntimeError('Given line %s did not have a group name.' % line)
852  return split_line[1]
853
854
855def _GetFinderCommentSuffix(line: str) -> str:
856  """Gets the suffix of the finder comment on the given line.
857
858  Examples:
859    'foo  # finder:disable' -> ''
860    'foo  # finder:disable-stale some_reason' -> '-stale'
861  """
862  target_str = None
863  if _LineContainsDisableComment(line):
864    target_str = FINDER_DISABLE_COMMENT_BASE
865  elif _LineContainsEnableComment(line):
866    target_str = FINDER_ENABLE_COMMENT_BASE
867  else:
868    raise RuntimeError('Given line %s did not have a finder comment.' % line)
869  line = line[line.find(target_str):]
870  line = line.split()[0]
871  suffix = line.replace(target_str, '')
872  assert suffix in ALL_FINDER_DISABLE_SUFFIXES
873  return suffix
874
875
876def _LineContainsRelevantDisableComment(line: str, removal_type: str) -> bool:
877  """Returns whether the given line contains a relevant disable comment.
878
879  Args:
880    line: A string containing the line to check.
881    removal_type: A RemovalType enum corresponding to the type of expectations
882        being removed.
883
884  Returns:
885    A bool denoting whether |line| contains a relevant disable comment given
886    |removal_type|.
887  """
888  if FINDER_DISABLE_COMMENT_GENERAL in line:
889    return True
890  if FINDER_DISABLE_COMMENT_BASE + removal_type in line:
891    return True
892  return False
893
894
895def _DisableSuffixIsRelevant(suffix: str, removal_type: str) -> bool:
896  """Returns whether the given suffix is relevant given the removal type.
897
898  Args:
899    suffix: A string containing a disable comment suffix.
900    removal_type: A RemovalType enum corresponding to the type of expectations
901        being removed.
902
903  Returns:
904    True if suffix is relevant and its disable request should be honored.
905  """
906  if suffix == FINDER_COMMENT_SUFFIX_GENERAL:
907    return True
908  if suffix == removal_type:
909    return True
910  return False
911
912
913def _GetDisableReasonFromComment(line: str) -> str:
914  suffix = _GetFinderCommentSuffix(line)
915  return line.split(FINDER_DISABLE_COMMENT_BASE + suffix, 1)[1].strip()
916
917
918def _IsCommentOrBlankLine(line: str) -> bool:
919  return (not line or line.startswith('#'))
920
921
922def _ExpectationPartOfNonRemovableGroup(
923    current_expectation: data_types.Expectation,
924    group_to_expectations: Dict[str, Set[data_types.Expectation]],
925    expectation_to_group: Dict[data_types.Expectation, str],
926    removable_expectations: List[data_types.Expectation]):
927  """Determines if the given expectation is part of a non-removable group.
928
929  This is the case if the expectation is part of a group, but not all
930  expectations in that group are marked as removable.
931
932  Args:
933    current_expectation: A data_types.Expectation that is being checked.
934    group_to_expectations: A dict mapping group names to sets of expectations
935        contained within that group.
936    expectation_to_group: A dict mapping an expectation to the group name it
937        belongs to.
938    removable_expectations: A list of all expectations that are removable.
939  """
940  # Since we'll only ever be using this to check for inclusion, use a set
941  # for efficiency.
942  removable_expectations = set(removable_expectations)
943
944  group_name = expectation_to_group.get(current_expectation)
945  if not group_name:
946    return False
947
948  all_expectations_in_group = group_to_expectations[group_name]
949  return not (all_expectations_in_group <= removable_expectations)
950
951
952def _RemoveStaleComments(content: str, removed_lines: Set[int],
953                         header_length: int) -> str:
954  """Attempts to remove stale contents from the given expectation file content.
955
956  Args:
957    content: A string containing the contents of an expectation file.
958    removed_lines: A set of ints denoting which line numbers were removed in
959        the process of creating |content|.
960    header_length: An int denoting how many lines long the tag header is.
961
962  Returns:
963    A copy of |content| with various stale comments removed, e.g. group blocks
964    if the group has been removed.
965  """
966  # Look for the case where we've removed an entire block of expectations that
967  # were preceded by a comment, which we should remove.
968  comment_line_numbers_to_remove = []
969  split_content = content.splitlines(True)
970  for rl in removed_lines:
971    found_trailing_annotation = False
972    found_starting_annotation = False
973    # Check for the end of the file, a blank line, or a comment after the block
974    # we've removed.
975    if rl < len(split_content):
976      stripped_line = split_content[rl].strip()
977      if stripped_line and not stripped_line.startswith('#'):
978        # We found an expectation, so the entire expectation block wasn't
979        # removed.
980        continue
981      if any(annotation in stripped_line
982             for annotation in ALL_FINDER_END_ANNOTATION_BASES):
983        found_trailing_annotation = True
984    # Look for a comment block immediately preceding the block we removed.
985    comment_line_number = rl - 1
986    while comment_line_number != header_length - 1:
987      stripped_line = split_content[comment_line_number].strip()
988      if stripped_line.startswith('#'):
989        # If we find what should be a trailing annotation, stop immediately so
990        # we don't accidentally remove it and create an orphan earlier in the
991        # file.
992        if any(annotation in stripped_line
993               for annotation in ALL_FINDER_END_ANNOTATION_BASES):
994          break
995        if any(annotation in stripped_line
996               for annotation in ALL_FINDER_START_ANNOTATION_BASES):
997          # If we've already found a starting annotation, skip past this line.
998          # This is to handle the case of nested annotations, e.g. a
999          # disable-narrowing block inside of a group block. We'll find the
1000          # inner-most block here and remove it. Any outer blocks will be
1001          # removed as part of the lingering stale annotation removal later on.
1002          # If we don't skip past these outer annotations, then we get left with
1003          # orphaned trailing annotations.
1004          if found_starting_annotation:
1005            comment_line_number -= 1
1006            continue
1007          found_starting_annotation = True
1008          # If we found a starting annotation but not a trailing annotation, we
1009          # shouldn't remove the starting one, as that would cause the trailing
1010          # one that is later in the file to be orphaned. We also don't want to
1011          # continue and remove comments above that since it is assumedly still
1012          # valid.
1013          if found_starting_annotation and not found_trailing_annotation:
1014            break
1015        comment_line_numbers_to_remove.append(comment_line_number)
1016        comment_line_number -= 1
1017      else:
1018        break
1019    # In the event that we found both a start and trailing annotation, we need
1020    # to also remove the trailing one.
1021    if found_trailing_annotation and found_starting_annotation:
1022      comment_line_numbers_to_remove.append(rl)
1023
1024  # Actually remove the comments we found above.
1025  for i in comment_line_numbers_to_remove:
1026    split_content[i] = ''
1027  if comment_line_numbers_to_remove:
1028    content = ''.join(split_content)
1029
1030  # Remove any lingering cases of stale annotations that we can easily detect.
1031  for regex in ALL_STALE_COMMENT_REGEXES:
1032    for match in regex.findall(content):
1033      content = content.replace(match, '')
1034
1035  return content
1036