# Copyright 2015 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """The datastore models for graph data. The Chromium project uses Buildbot to run its performance tests, and the structure of the data for the Performance Dashboard reflects this. Metadata about tests are structured in Master, Bot, and TestMetadata entities. Master and Bot entities represent Buildbot masters and builders respectively, and TestMetadata entities represent groups of results, or individual data series, keyed by a full path to the test separated by '/' characters. For example, entities might be structured as follows: Master: ChromiumPerf Bot: win7 TestMetadata: ChromiumPerf/win7/page_cycler.moz TestMetadata: ChromiumPerf/win7/page_cycler.moz/times TestMetadata: ChromiumPerf/win7/page_cycler.moz/times/page_load_time TestMetadata: ChromiumPerf/win7/page_cycler.moz/times/page_load_time_ref TestMetadata: ChromiumPerf/win7/page_cycler.moz/times/www.amazon.com TestMetadata: ChromiumPerf/win7/page_cycler.moz/times/www.bing.com TestMetadata: ChromiumPerf/win7/page_cycler.moz/commit_charge TestMetadata: ChromiumPerf/win7/page_cycler.moz/commit_charge/ref TestMetadata: ChromiumPerf/win7/page_cycler.moz/commit_charge/www.amazon.com TestMetadata: ChromiumPerf/win7/page_cycler.moz/commit_charge/www.bing.com The graph data points are represented by Row entities. Each Row entity contains a revision and value, which are its X and Y values on a graph, and any other metadata associated with an individual performance test result. The keys of the Row entities for a particular data series are start with a TestContainer key, instead of a TestMetadata key. This way, the Row entities for each data series are in a different "entity group". This allows a faster rate of putting data in the datastore for many series at once. For example, Row entities are organized like this: TestContainer: ChromiumPerf/win7/page_cycler.moz/times/page_load_time Row: revision 12345, value 2.5 Row: revision 12346, value 2.0 Row: revision 12347, value 2.1 TestContainer: ChromiumPerf/win7/page_cycler.moz/times/page_load_time_ref Row: revision 12345, value 2.4 Row: revision 12346, value 2.0 Row: revision 12347, value 2.2 TestContainer: ChromiumPerf/win7/page_cycler.moz/commit_charge Row: revision 12345, value 10 Row: revision 12346, value 12 Row: revision 12347, value 11 IMPORTANT: If you add new kinds to this file, you must also add them to the Daily Backup url in cron.yaml in order for them to be properly backed up. See: https://developers.google.com/appengine/articles/scheduled_backups """ import logging from google.appengine.ext import ndb from dashboard import datastore_hooks from dashboard import layered_cache from dashboard import utils from dashboard.models import anomaly from dashboard.models import anomaly_config from dashboard.models import internal_only_model from dashboard.models import sheriff as sheriff_module from dashboard.models import stoppage_alert as stoppage_alert_module # Maximum level of nested tests. MAX_TEST_ANCESTORS = 10 # Keys to the datastore-based cache. See stored_object. LIST_TESTS_SUBTEST_CACHE_KEY = 'list_tests_get_tests_new_%s_%s_%s' _MAX_STRING_LENGTH = 500 class Master(internal_only_model.InternalOnlyModel): """Information about the Buildbot master. Masters are keyed by name, e.g. 'ChromiumGPU' or 'ChromiumPerf'. All Bot entities that are Buildbot slaves of one master are children of one Master entity in the datastore. """ # Master has no properties; the name of the master is the ID. class Bot(internal_only_model.InternalOnlyModel): """Information about a Buildbot slave that runs perf tests. Bots are keyed by name, e.g. 'xp-release-dual-core'. A Bot entity contains information about whether the tests are only viewable to internal users, and each bot has a parent that is a Master entity. To query the tests that run on a Bot, check the bot_name and master_name properties of the TestMetadata. """ internal_only = ndb.BooleanProperty(default=False, indexed=True) class TestMetadata(internal_only_model.CreateHookInternalOnlyModel): """A TestMetadata entity is a node in a hierarchy of tests. A TestMetadata entity can represent a specific series of results which will be plotted on a graph, or it can represent a group of such series of results, or both. A TestMetadata entity that the property has_rows set to True corresponds to a timeseries on a graph, and the TestMetadata for a group of such tests has a path one level less deep, which corresponds to a graph with several timeseries. A TestMetadata one level less deep for that test would correspond to a group of related graphs. Top-level TestMetadata (also known as test suites) are keyed master/bot/test. TestMetadata are keyed by the full path to the test (for example master/bot/test/metric/page), and they also contain other metadata such as description and units. NOTE: If you remove any properties from TestMetadata, they should be added to the TEST_EXCLUDE_PROPERTIES list in migrate_test_names.py. """ internal_only = ndb.BooleanProperty(default=False, indexed=True) # Sheriff rotation for this test. Rotations are specified by regular # expressions that can be edited at /edit_sheriffs. sheriff = ndb.KeyProperty(kind=sheriff_module.Sheriff, indexed=True) # There is a default anomaly threshold config (in anomaly.py), and it can # be overridden for a group of tests by using /edit_sheriffs. overridden_anomaly_config = ndb.KeyProperty( kind=anomaly_config.AnomalyConfig, indexed=True) # Keep track of what direction is an improvement for this graph so we can # filter out alerts on regressions. improvement_direction = ndb.IntegerProperty( default=anomaly.UNKNOWN, choices=[ anomaly.UP, anomaly.DOWN, anomaly.UNKNOWN ], indexed=False ) # Units of the child Rows of this test, or None if there are no child Rows. units = ndb.StringProperty(indexed=False) # The last alerted revision is used to avoid duplicate alerts. last_alerted_revision = ndb.IntegerProperty(indexed=False) # Whether or not the test has child rows. Set by hook on Row class put. has_rows = ndb.BooleanProperty(default=False, indexed=True) # If there is a currently a StoppageAlert that indicates that data hasn't # been received for some time, then will be set. Otherwise, it is None. stoppage_alert = ndb.KeyProperty( kind=stoppage_alert_module.StoppageAlert, indexed=True) # A test is marked "deprecated" if no new points have been received for # a long time; these tests should usually not be listed. deprecated = ndb.BooleanProperty(default=False, indexed=True) # For top-level test entities, this is a list of sub-tests that are checked # for alerts (i.e. they have a sheriff). For other tests, this is empty. monitored = ndb.KeyProperty(repeated=True, indexed=True) # Description of what the test measures. description = ndb.TextProperty(indexed=True) # Source code location of the test. Optional. code = ndb.StringProperty(indexed=False, repeated=True) # Command to run the test. Optional. command_line = ndb.StringProperty(indexed=False) # Computed properties are treated like member variables, so they have # lowercase names, even though they look like methods to pylint. # pylint: disable=invalid-name @ndb.ComputedProperty def bot(self): # pylint: disable=invalid-name """Immediate parent Bot entity, or None if this is not a test suite.""" parts = self.key.id().split('/') if len(parts) != 3: # This is not a test suite. return None return ndb.Key('Master', parts[0], 'Bot', parts[1]) @ndb.ComputedProperty def parent_test(self): # pylint: disable=invalid-name """Immediate parent TestMetadata entity, or None if this is a test suite.""" parts = self.key.id().split('/') if len(parts) < 4: # This is a test suite return None return ndb.Key('TestMetadata', '/'.join(parts[:-1])) @property def test_name(self): """The name of this specific test, without the test_path preceding.""" return self.key.id().split('/')[-1] @property def test_path(self): """Slash-separated list of key parts, 'master/bot/suite/chart/...'.""" return utils.TestPath(self.key) @ndb.ComputedProperty def master_name(self): return self.key.id().split('/')[0] @ndb.ComputedProperty def bot_name(self): return self.key.id().split('/')[1] @ndb.ComputedProperty def suite_name(self): return self.key.id().split('/')[2] @ndb.ComputedProperty def test_part1_name(self): parts = self.key.id().split('/') if len(parts) < 4: return '' return parts[3] @ndb.ComputedProperty def test_part2_name(self): parts = self.key.id().split('/') if len(parts) < 5: return '' return parts[4] @ndb.ComputedProperty def test_part3_name(self): parts = self.key.id().split('/') if len(parts) < 6: return '' return parts[5] @ndb.ComputedProperty def test_part4_name(self): parts = self.key.id().split('/') if len(parts) < 7: return '' return parts[6] @classmethod def _GetMasterBotSuite(cls, key): if not key: return None return tuple(key.id().split('/')[:3]) def __init__(self, *args, **kwargs): # Indexed StringProperty has a maximum length. If this length is exceeded, # then an error will be thrown in ndb.Model.__init__. # Truncate the "description" property if necessary. description = kwargs.get('description') or '' kwargs['description'] = description[:_MAX_STRING_LENGTH] super(TestMetadata, self).__init__(*args, **kwargs) def _pre_put_hook(self): """This method is called before a TestMetadata is put into the datastore. Here, we check the key to make sure it is valid and check the sheriffs and anomaly configs to make sure they are current. We also update the monitored list of the test suite. """ # Check to make sure the key is valid. # TestMetadata should not be an ancestor, so key.pairs() should have length # of 1. The id should have at least 3 slashes to represent master/bot/suite. assert len(self.key.pairs()) == 1 path_parts = self.key.id().split('/') assert len(path_parts) >= 3 # Set the sheriff to the first sheriff (alphabetically by sheriff name) # that has a test pattern that matches this test. self.sheriff = None for sheriff_entity in sheriff_module.Sheriff.query().fetch(): for pattern in sheriff_entity.patterns: if utils.TestMatchesPattern(self, pattern): self.sheriff = sheriff_entity.key if self.sheriff: break # If this test is monitored, add it to the monitored list of its test suite. # A test is be monitored iff it has a sheriff, and monitored tests are # tracked in the monitored list of a test suite TestMetadata entity. test_suite = ndb.Key('TestMetadata', '/'.join(path_parts[:3])).get() if self.sheriff: if test_suite and self.key not in test_suite.monitored: test_suite.monitored.append(self.key) test_suite.put() elif test_suite and self.key in test_suite.monitored: test_suite.monitored.remove(self.key) test_suite.put() # Set the anomaly threshold config to the first one that has a test pattern # that matches this test, if there is one. Anomaly configs are sorted by # name, so that a config with a name that comes earlier lexicographically # is considered higher-priority. self.overridden_anomaly_config = None anomaly_configs = anomaly_config.AnomalyConfig.query().fetch() anomaly_configs.sort(key=lambda config: config.key.string_id()) for anomaly_config_entity in anomaly_configs: for pattern in anomaly_config_entity.patterns: if utils.TestMatchesPattern(self, pattern): self.overridden_anomaly_config = anomaly_config_entity.key if self.overridden_anomaly_config: break def CreateCallback(self): """Called when the entity is first saved.""" if len(self.key.id().split('/')) > 3: # Since this is not a test suite, the menu cache for the suite must # be updated. layered_cache.Delete( LIST_TESTS_SUBTEST_CACHE_KEY % self._GetMasterBotSuite(self.key)) @classmethod # pylint: disable=unused-argument def _pre_delete_hook(cls, key): if len(key.id().split('/')) > 3: # Since this is not a test suite, the menu cache for the suite must # be updated. layered_cache.Delete( LIST_TESTS_SUBTEST_CACHE_KEY % TestMetadata._GetMasterBotSuite(key)) class LastAddedRevision(ndb.Model): """Represents the last added revision for a test path. The reason this property is separated from TestMetadata entity is to avoid contention issues (Frequent update of entity within the same group). This property is updated very frequent in /add_point. """ revision = ndb.IntegerProperty(indexed=False) class Row(internal_only_model.InternalOnlyModel, ndb.Expando): """A Row represents one data point. A Row has a revision and a value, which are the X and Y values, respectively. Each Row belongs to one TestMetadata, along with all of the other Row entities that it is plotted with. Rows are keyed by revision. In addition to the properties defined below, Row entities may also have other properties which specify additional supplemental data. These are called "supplemental columns", and should have the following prefixes: d_: A data point, such as d_1st_run or d_50th_percentile. FloatProperty. r_: Revision such as r_webkit or r_v8. StringProperty, limited to 25 characters, '0-9' and '.'. a_: Annotation such as a_chrome_bugid or a_gasp_anomaly. StringProperty. """ # Don't index by default (only explicitly indexed properties are indexed). _default_indexed = False internal_only = ndb.BooleanProperty(default=False, indexed=True) # The parent_test is the key of the TestMetadata entity that this Row belongs # to. @ndb.ComputedProperty def parent_test(self): # pylint: disable=invalid-name # The Test entity that a Row belongs to isn't actually its parent in # the datastore. Rather, the parent key of each Row contains a test path, # which contains the information necessary to get the actual Test # key. The Test key will need to be converted back to a new style # TestMetadata key to get information back out. This is because we have # over 3 trillion Rows in the datastore and cannot convert them all :( return utils.OldStyleTestKey(utils.TestKey(self.key.parent().string_id())) # Points in each graph are sorted by "revision". This is usually a Chromium # SVN version number, but it might also be any other integer, as long as # newer points have higher numbers. @ndb.ComputedProperty def revision(self): # pylint: disable=invalid-name return self.key.integer_id() # The time the revision was added to the dashboard is tracked in order # to too many points from being added in a short period of time, which would # indicate an error or malicious code. timestamp = ndb.DateTimeProperty(auto_now_add=True, indexed=True) # The Y-value at this point. value = ndb.FloatProperty(indexed=True) # The standard deviation at this point. Optional. error = ndb.FloatProperty(indexed=False) def _pre_put_hook(self): """Sets the has_rows property of the parent test before putting this Row. This isn't atomic because the parent_test put() and Row put() don't happen in the same transaction. But in practice it shouldn't be an issue because the parent test will get more points as the test runs. """ parent_test = utils.TestMetadataKey(self.key.parent().id()).get() # If the TestMetadata pointed to by parent_test is not valid, that indicates # that a TestMetadata entity was not properly created in add_point. if not parent_test: parent_key = self.key.parent() logging.warning( 'Row put without valid TestMetadata. Parent key: %s', parent_key) return if not parent_test.has_rows: parent_test.has_rows = True parent_test.put() def GetRowsForTestInRange(test_key, start_rev, end_rev, privileged=False): """Gets all the Row entities for a test between a given start and end.""" test_key = utils.OldStyleTestKey(test_key) if privileged: datastore_hooks.SetSinglePrivilegedRequest() query = Row.query( Row.parent_test == test_key, Row.revision >= start_rev, Row.revision <= end_rev) return query.fetch(batch_size=100) def GetRowsForTestAroundRev(test_key, rev, num_points, privileged=False): """Gets up to |num_points| Row entities for a test centered on a revision.""" test_key = utils.OldStyleTestKey(test_key) num_rows_before = int(num_points / 2) + 1 num_rows_after = int(num_points / 2) return GetRowsForTestBeforeAfterRev( test_key, rev, num_rows_before, num_rows_after, privileged) def GetRowsForTestBeforeAfterRev( test_key, rev, num_rows_before, num_rows_after, privileged=False): """Gets up to |num_points| Row entities for a test centered on a revision.""" test_key = utils.OldStyleTestKey(test_key) if privileged: datastore_hooks.SetSinglePrivilegedRequest() query_up_to_rev = Row.query(Row.parent_test == test_key, Row.revision <= rev) query_up_to_rev = query_up_to_rev.order(-Row.revision) rows_up_to_rev = list(reversed( query_up_to_rev.fetch(limit=num_rows_before, batch_size=100))) if privileged: datastore_hooks.SetSinglePrivilegedRequest() query_after_rev = Row.query(Row.parent_test == test_key, Row.revision > rev) query_after_rev = query_after_rev.order(Row.revision) rows_after_rev = query_after_rev.fetch(limit=num_rows_after, batch_size=100) return rows_up_to_rev + rows_after_rev def GetLatestRowsForTest( test_key, num_points, keys_only=False, privileged=False): """Gets the latest num_points Row entities for a test.""" test_key = utils.OldStyleTestKey(test_key) if privileged: datastore_hooks.SetSinglePrivilegedRequest() query = Row.query(Row.parent_test == test_key) query = query.order(-Row.revision) return query.fetch(limit=num_points, batch_size=100, keys_only=keys_only)