• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""A simple service to monitor DUT statuses from master db/afe."""
7import collections
8import logging
9import sys
10import time
11
12import common
13from autotest_lib.server import constants
14from autotest_lib.server import frontend
15from chromite.lib import metrics
16from chromite.lib import ts_mon_config
17
18from infra_libs import ts_mon
19
20
21DutCountBucket = collections.namedtuple('DutCountBucket',
22                                        ['board',
23                                         'model',
24                                         'pool',
25                                         'is_locked',
26                                         'status']
27                                        )
28
29
30def _get_bucket_for_host(host):
31    """Determine the counter bucket for |host|.
32
33    Args:
34        host: A Host object as returned by afe.
35
36    Returns:
37        A DutCountBucket instance describing the bucket for this host.
38    """
39    board = _get_unique_label(host.labels, constants.Labels.BOARD_PREFIX)
40    model = _get_unique_label(host.labels, constants.Labels.MODEL_PREFIX)
41    pool = _get_unique_label(host.labels, constants.Labels.POOL_PREFIX)
42    if pool in constants.Pools.MANAGED_POOLS:
43        pool = 'managed:' + pool
44    status = host.status or '[None]'
45    is_locked = host.locked
46    return DutCountBucket(board, model, pool, is_locked, status)
47
48
49def _get_unique_label(labels, prefix):
50    """Return the labels for a given prefix, with prefix stripped.
51
52    If prefixed label does not occur, return '[None]'
53    If prefixed label occurs multiply, return '[Multiple]'
54
55    _get_unique_label(['foo:1', 'foo:2', 'bar1'], 'foo:') -> '[Multiple]'
56
57    _get_unique_label(['foo:1', 'bar2', 'baz3'], 'foo:') -> '1'
58
59    _get_prefixed_labels(['bar1', 'baz1'], 'foo:') -> '[None]'
60    """
61    ls = [l[len(prefix):] for l in labels if l.startswith(prefix)]
62    if not ls:
63        return '[None]'
64    elif len(ls) == 1:
65        return ls[0]
66    else:
67        return '[Multiple]'
68
69
70def main(argv):
71    """Entry point for dut_mon."""
72    logging.getLogger().setLevel(logging.INFO)
73
74    with ts_mon_config.SetupTsMonGlobalState('dut_mon', indirect=True):
75        afe = frontend.AFE()
76        counters = collections.defaultdict(lambda: 0)
77
78        field_spec = [ts_mon.StringField('board'),
79                      ts_mon.StringField('model'),
80                      ts_mon.StringField('pool'),
81                      ts_mon.BooleanField('is_locked'),
82                      ts_mon.StringField('status'),
83                      ]
84        dut_count = metrics.Gauge('chromeos/autotest/dut_mon/dut_count',
85                                  description='The number of duts in a given '
86                                              'state and bucket.',
87                                  field_spec=field_spec)
88        tick_count = metrics.Counter('chromeos/autotest/dut_mon/tick',
89                                     description='Tick counter of dut_mon.')
90
91        while True:
92            # Note: We reset all counters to zero in each loop rather than
93            # creating a new defaultdict, because we want to ensure that any
94            # gauges that were previously set to a nonzero value by this process
95            # get set back to zero if necessary.
96            for k in counters:
97                counters[k] = 0
98
99            logging.info('Fetching all hosts.')
100            hosts = afe.get_hosts()
101            logging.info('Fetched %s hosts.', len(hosts))
102            for host in hosts:
103                fields = _get_bucket_for_host(host)
104                counters[fields] += 1
105
106            for field, value in counters.iteritems():
107                logging.info('%s %s', field, value)
108                dut_count.set(value, fields=field.__dict__)
109
110            tick_count.increment()
111            logging.info('Sleeping for 2 minutes.')
112            time.sleep(120)
113
114
115if __name__ == '__main__':
116    main(sys.argv)
117