#!/usr/bin/env python # Copyright 2018 The Chromium OS Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """A simple service to monitor DUT statuses from master db/afe.""" import collections import logging import sys import time import common from autotest_lib.server import constants from autotest_lib.server import frontend from chromite.lib import metrics from chromite.lib import ts_mon_config from infra_libs import ts_mon DutCountBucket = collections.namedtuple('DutCountBucket', ['board', 'model', 'pool', 'is_locked', 'status'] ) def _get_bucket_for_host(host): """Determine the counter bucket for |host|. Args: host: A Host object as returned by afe. Returns: A DutCountBucket instance describing the bucket for this host. """ board = _get_unique_label(host.labels, constants.Labels.BOARD_PREFIX) model = _get_unique_label(host.labels, constants.Labels.MODEL_PREFIX) pool = _get_unique_label(host.labels, constants.Labels.POOL_PREFIX) if pool in constants.Pools.MANAGED_POOLS: pool = 'managed:' + pool status = host.status or '[None]' is_locked = host.locked return DutCountBucket(board, model, pool, is_locked, status) def _get_unique_label(labels, prefix): """Return the labels for a given prefix, with prefix stripped. If prefixed label does not occur, return '[None]' If prefixed label occurs multiply, return '[Multiple]' _get_unique_label(['foo:1', 'foo:2', 'bar1'], 'foo:') -> '[Multiple]' _get_unique_label(['foo:1', 'bar2', 'baz3'], 'foo:') -> '1' _get_prefixed_labels(['bar1', 'baz1'], 'foo:') -> '[None]' """ ls = [l[len(prefix):] for l in labels if l.startswith(prefix)] if not ls: return '[None]' elif len(ls) == 1: return ls[0] else: return '[Multiple]' def main(argv): """Entry point for dut_mon.""" logging.getLogger().setLevel(logging.INFO) with ts_mon_config.SetupTsMonGlobalState('dut_mon', indirect=True): afe = frontend.AFE() counters = collections.defaultdict(lambda: 0) field_spec = [ts_mon.StringField('board'), ts_mon.StringField('model'), ts_mon.StringField('pool'), ts_mon.BooleanField('is_locked'), ts_mon.StringField('status'), ] dut_count = metrics.Gauge('chromeos/autotest/dut_mon/dut_count', description='The number of duts in a given ' 'state and bucket.', field_spec=field_spec) tick_count = metrics.Counter('chromeos/autotest/dut_mon/tick', description='Tick counter of dut_mon.') while True: # Note: We reset all counters to zero in each loop rather than # creating a new defaultdict, because we want to ensure that any # gauges that were previously set to a nonzero value by this process # get set back to zero if necessary. for k in counters: counters[k] = 0 logging.info('Fetching all hosts.') hosts = afe.get_hosts() logging.info('Fetched %s hosts.', len(hosts)) for host in hosts: fields = _get_bucket_for_host(host) counters[fields] += 1 for field, value in counters.iteritems(): logging.info('%s %s', field, value) dut_count.set(value, fields=field.__dict__) tick_count.increment() logging.info('Sleeping for 2 minutes.') time.sleep(120) if __name__ == '__main__': main(sys.argv)