1#!/usr/bin/env python 2# Copyright 2018 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""A simple service to monitor DUT statuses from master db/afe.""" 7import collections 8import logging 9import sys 10import time 11 12import common 13from autotest_lib.server import constants 14from autotest_lib.server import frontend 15from chromite.lib import metrics 16from chromite.lib import ts_mon_config 17 18from infra_libs import ts_mon 19 20 21DutCountBucket = collections.namedtuple('DutCountBucket', 22 ['board', 23 'model', 24 'pool', 25 'is_locked', 26 'status'] 27 ) 28 29 30def _get_bucket_for_host(host): 31 """Determine the counter bucket for |host|. 32 33 Args: 34 host: A Host object as returned by afe. 35 36 Returns: 37 A DutCountBucket instance describing the bucket for this host. 38 """ 39 board = _get_unique_label(host.labels, constants.Labels.BOARD_PREFIX) 40 model = _get_unique_label(host.labels, constants.Labels.MODEL_PREFIX) 41 pool = _get_unique_label(host.labels, constants.Labels.POOL_PREFIX) 42 if pool in constants.Pools.MANAGED_POOLS: 43 pool = 'managed:' + pool 44 status = host.status or '[None]' 45 is_locked = host.locked 46 return DutCountBucket(board, model, pool, is_locked, status) 47 48 49def _get_unique_label(labels, prefix): 50 """Return the labels for a given prefix, with prefix stripped. 51 52 If prefixed label does not occur, return '[None]' 53 If prefixed label occurs multiply, return '[Multiple]' 54 55 _get_unique_label(['foo:1', 'foo:2', 'bar1'], 'foo:') -> '[Multiple]' 56 57 _get_unique_label(['foo:1', 'bar2', 'baz3'], 'foo:') -> '1' 58 59 _get_prefixed_labels(['bar1', 'baz1'], 'foo:') -> '[None]' 60 """ 61 ls = [l[len(prefix):] for l in labels if l.startswith(prefix)] 62 if not ls: 63 return '[None]' 64 elif len(ls) == 1: 65 return ls[0] 66 else: 67 return '[Multiple]' 68 69 70def main(argv): 71 """Entry point for dut_mon.""" 72 logging.getLogger().setLevel(logging.INFO) 73 74 with ts_mon_config.SetupTsMonGlobalState('dut_mon', indirect=True): 75 afe = frontend.AFE() 76 counters = collections.defaultdict(lambda: 0) 77 78 field_spec = [ts_mon.StringField('board'), 79 ts_mon.StringField('model'), 80 ts_mon.StringField('pool'), 81 ts_mon.BooleanField('is_locked'), 82 ts_mon.StringField('status'), 83 ] 84 dut_count = metrics.Gauge('chromeos/autotest/dut_mon/dut_count', 85 description='The number of duts in a given ' 86 'state and bucket.', 87 field_spec=field_spec) 88 tick_count = metrics.Counter('chromeos/autotest/dut_mon/tick', 89 description='Tick counter of dut_mon.') 90 91 while True: 92 # Note: We reset all counters to zero in each loop rather than 93 # creating a new defaultdict, because we want to ensure that any 94 # gauges that were previously set to a nonzero value by this process 95 # get set back to zero if necessary. 96 for k in counters: 97 counters[k] = 0 98 99 logging.info('Fetching all hosts.') 100 hosts = afe.get_hosts() 101 logging.info('Fetched %s hosts.', len(hosts)) 102 for host in hosts: 103 fields = _get_bucket_for_host(host) 104 counters[fields] += 1 105 106 for field, value in counters.iteritems(): 107 logging.info('%s %s', field, value) 108 dut_count.set(value, fields=field.__dict__) 109 110 tick_count.increment() 111 logging.info('Sleeping for 2 minutes.') 112 time.sleep(120) 113 114 115if __name__ == '__main__': 116 main(sys.argv) 117