• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.android.tradefed.device.metric;
17 
18 import static com.android.tradefed.util.proto.TfMetricProtoUtil.stringToMetric;
19 
20 import com.android.os.AtomsProto.Atom;
21 import com.android.os.StatsLog.EventMetricData;
22 import com.android.os.StatsLog.StatsdStatsReport;
23 import com.android.tradefed.config.OptionClass;
24 import com.android.tradefed.device.DeviceNotAvailableException;
25 import com.android.tradefed.device.ITestDevice;
26 import com.android.tradefed.device.TestDeviceState;
27 import com.android.tradefed.log.LogUtil.CLog;
28 import com.android.tradefed.metrics.proto.MetricMeasurement.Metric;
29 import com.android.tradefed.util.statsd.ConfigUtil;
30 import com.android.tradefed.util.statsd.MetricUtil;
31 
32 import com.google.common.annotations.VisibleForTesting;
33 import com.google.common.collect.Iterables;
34 import com.google.protobuf.InvalidProtocolBufferException;
35 
36 import java.io.IOException;
37 import java.text.SimpleDateFormat;
38 import java.util.Arrays;
39 import java.util.ArrayList;
40 import java.util.Date;
41 import java.util.HashMap;
42 import java.util.List;
43 import java.util.Map;
44 import java.util.NoSuchElementException;
45 import java.util.concurrent.TimeUnit;
46 import java.util.stream.Collectors;
47 
48 /**
49  * Collector that collects timestamps of runtime restarts (system server crashes) during the test
50  * run, if any.
51  *
52  * <p>Outputs results in counts, wall clock time in seconds and in HH:mm:ss format, and system
53  * uptime in nanoseconds and HH:mm:ss format.
54  *
55  * <p>This collector uses two sources for system server crashes:
56  *
57  * <p>
58  *
59  * <ol>
60  *   <li>The system_restart_sec list from StatsdStatsReport, which is a rolling list of 20
61  *       timestamps when the system server crashes, in seconds, with newer crashes appended to the
62  *       end (when the list fills up, older timestamps fall off the beginning).
63  *   <li>The AppCrashOccurred statsd atom, where a system server crash shows up as a system_server
64  *       process crash (this behavior is documented in the statsd atoms.proto definition). The event
65  *       metric gives the device uptime when the crash occurs.
66  * </ol>
67  *
68  * <p>Both can be useful information, as the former makes it easy to correlate timestamps in logs,
69  * while the latter serves as a longevity metric.
70  */
71 @OptionClass(alias = "runtime-restart-collector")
72 public class RuntimeRestartCollector extends BaseDeviceMetricCollector {
73     private static final String METRIC_SEP = "-";
74     public static final String METRIC_PREFIX = "runtime-restart";
75     public static final String METRIC_SUFFIX_COUNT = "count";
76     public static final String METRIC_SUFFIX_SYSTEM_TIMESTAMP_SECS = "timestamps_secs";
77     public static final String METRIC_SUFFIX_SYSTEM_TIMESTAMP_FORMATTED = "timestamps_str";
78     public static final String METRIC_SUFFIX_UPTIME_NANOS = "uptime_nanos";
79     public static final String METRIC_SUFFIX_UPTIME_FORMATTED = "uptime_str";
80     public static final SimpleDateFormat TIME_FORMATTER = new SimpleDateFormat("HH:mm:ss");
81     public static final String SYSTEM_SERVER_KEYWORD = "system_server";
82 
83     private List<ITestDevice> mTestDevices;
84     // Map to store the runtime restart timestamps for each device, keyed by device serial numbers.
85     private Map<String, List<Integer>> mExistingTimestamps = new HashMap<>();
86     // Map to store the statsd config IDs for each device, keyed by device serial numbers.
87     private Map<String, Long> mDeviceConfigIds = new HashMap<>();
88     // Flag variable to indicate whether including the device serial in the metrics is needed.
89     // Device serial is included when there are more than one device under test.
90     private boolean mIncludeDeviceSerial = false;
91 
92     /**
93      * Store the existing timestamps of system server restarts prior to the test run as statsd keeps
94      * a running log of them, and push the config to collect app crashes.
95      */
96     @Override
onTestRunStart(DeviceMetricData runData)97     public void onTestRunStart(DeviceMetricData runData) throws DeviceNotAvailableException {
98         mTestDevices = getDevices();
99         mIncludeDeviceSerial = (mTestDevices.size() > 1);
100         for (ITestDevice device : mTestDevices) {
101             // Pull statsd metadata (StatsdStatsReport) from the device and keep track of existing
102             // runtime restart timestamps.
103             try {
104                 List<Integer> existingTimestamps =
105                         getStatsdMetadata(device).getSystemRestartSecList();
106                 mExistingTimestamps.put(device.getSerialNumber(), existingTimestamps);
107             } catch (InvalidProtocolBufferException e) {
108                 // Error is not thrown as we still want to process the other devices.
109                 CLog.e(
110                         "Failed to get statsd metadata from device %s. Exception: %s.",
111                         device.getSerialNumber(), e);
112                 continue;
113             }
114 
115             // Register statsd config to collect app crashes.
116             try {
117                 mDeviceConfigIds.put(
118                         device.getSerialNumber(),
119                         pushStatsConfig(
120                                 device, Arrays.asList(Atom.APP_CRASH_OCCURRED_FIELD_NUMBER)));
121             } catch (IOException e) {
122                 // Error is not thrown as we still want to push the config to other devices.
123                 CLog.e(
124                         "Failed to push statsd config to device %s. Exception: %s.",
125                         device.getSerialNumber(), e);
126                 continue;
127             }
128         }
129     }
130 
131     /**
132      * Pull the timestamps at the end of the test run and report the difference with existing ones,
133      * if any.
134      */
135     @Override
onTestRunEnd(DeviceMetricData runData, final Map<String, Metric> currentRunMetrics)136     public void onTestRunEnd(DeviceMetricData runData, final Map<String, Metric> currentRunMetrics)
137             throws DeviceNotAvailableException {
138         for (ITestDevice device : mTestDevices) {
139             if (!TestDeviceState.ONLINE.equals(device.getDeviceState())) {
140                 CLog.d(
141                         "Device '%s' is in state '%s' skipping RuntimeRestartCollector",
142                         device.getSerialNumber(), device.getDeviceState());
143                 continue;
144             }
145             // Pull statsd metadata again to look at the changes of system server crash timestamps
146             // during the test run, and report them.
147             // A copy of the list is created as the list returned by the proto is not modifiable.
148             List<Integer> updatedTimestamps = new ArrayList<>();
149             try {
150                 updatedTimestamps.addAll(getStatsdMetadata(device).getSystemRestartSecList());
151             } catch (InvalidProtocolBufferException e) {
152                 // Error is not thrown as we still want to process the other devices.
153                 CLog.e(
154                         "Failed to get statsd metadata from device %s. Exception: %s.",
155                         device.getSerialNumber(), e);
156                 continue;
157             }
158             if (!mExistingTimestamps.containsKey(device.getSerialNumber())) {
159                 CLog.e("No prior state recorded for device %s.", device.getSerialNumber());
160                 addStatsdStatsBasedMetrics(
161                         currentRunMetrics, updatedTimestamps, device.getSerialNumber());
162             } else {
163                 try {
164                     int lastTimestampBeforeTestRun =
165                             Iterables.getLast(mExistingTimestamps.get(device.getSerialNumber()));
166                     // If the last timestamp is not found, lastIndexOf(...) + 1 returns 0 which is
167                     // what is needed in that situation.
168                     addStatsdStatsBasedMetrics(
169                             currentRunMetrics,
170                             getAllValuesAfter(lastTimestampBeforeTestRun, updatedTimestamps),
171                             device.getSerialNumber());
172                 } catch (NoSuchElementException e) {
173                     // The exception occurs when no prior runtime restarts had been recorded. It is
174                     // thrown from Iterables.getLast().
175                     addStatsdStatsBasedMetrics(
176                             currentRunMetrics, updatedTimestamps, device.getSerialNumber());
177                 }
178             }
179         }
180 
181         // Pull the AppCrashOccurred event metric reports and report the system server crashes
182         // within.
183         for (ITestDevice device : mTestDevices) {
184             if (!mDeviceConfigIds.containsKey(device.getSerialNumber())) {
185                 CLog.e("No config ID is associated with device %s.", device.getSerialNumber());
186                 continue;
187             }
188             long configId = mDeviceConfigIds.get(device.getSerialNumber());
189             List<Long> uptimeListNanos = new ArrayList<>();
190             List<EventMetricData> metricData = getEventMetricData(device, configId);
191             uptimeListNanos.addAll(
192                     metricData.stream()
193                             .filter(d -> d.hasElapsedTimestampNanos())
194                             .filter(d -> d.hasAtom())
195                             .filter(d -> d.getAtom().hasAppCrashOccurred())
196                             .filter(d -> d.getAtom().getAppCrashOccurred().hasProcessName())
197                             .filter(
198                                     d ->
199                                             SYSTEM_SERVER_KEYWORD.equals(
200                                                     d.getAtom()
201                                                             .getAppCrashOccurred()
202                                                             .getProcessName()))
203                             .map(d -> d.getElapsedTimestampNanos())
204                             .collect(Collectors.toList()));
205             addAtomBasedMetrics(currentRunMetrics, uptimeListNanos, device.getSerialNumber());
206             removeConfig(device, configId);
207         }
208     }
209 
210     /** Helper method to add metrics from StatsdStatsReport according to timestamps. */
addStatsdStatsBasedMetrics( final Map<String, Metric> metrics, List<Integer> timestampsSecs, String serial)211     private void addStatsdStatsBasedMetrics(
212             final Map<String, Metric> metrics, List<Integer> timestampsSecs, String serial) {
213         // Always add a count of system server crashes, regardless of whether there are any.
214         // The statsd metadata-based count is used as the atom-based data can miss runtime restart
215         // instances.
216         // TODO(b/135770315): Re-assess this after the root cause for missing instances in the atom
217         // -based results is found.
218         String countMetricKey = createMetricKey(METRIC_SUFFIX_COUNT, serial);
219         metrics.put(countMetricKey, stringToMetric(String.valueOf(timestampsSecs.size())));
220         // If there are runtime restarts, add a comma-separated list of timestamps.
221         if (!timestampsSecs.isEmpty()) {
222             // Store both the raw timestamp and the formatted, more readable version.
223 
224             String timestampMetricKey =
225                     createMetricKey(METRIC_SUFFIX_SYSTEM_TIMESTAMP_SECS, serial);
226             metrics.put(
227                     timestampMetricKey,
228                     stringToMetric(
229                             timestampsSecs
230                                     .stream()
231                                     .map(t -> String.valueOf(t))
232                                     .collect(Collectors.joining(","))));
233 
234             String formattedTimestampMetricKey =
235                     createMetricKey(METRIC_SUFFIX_SYSTEM_TIMESTAMP_FORMATTED, serial);
236             metrics.put(
237                     formattedTimestampMetricKey,
238                     stringToMetric(
239                             timestampsSecs
240                                     .stream()
241                                     .map(t -> timestampToHoursMinutesSeconds(t))
242                                     .collect(Collectors.joining(","))));
243         }
244     }
245 
246     /** Helper method to add metrics from the AppCrashOccurred atoms according to timestamps. */
addAtomBasedMetrics( final Map<String, Metric> metrics, List<Long> timestampsNanos, String serial)247     private void addAtomBasedMetrics(
248             final Map<String, Metric> metrics, List<Long> timestampsNanos, String serial) {
249         // If there are runtime restarts, add a comma-separated list of device uptime timestamps.
250         if (!timestampsNanos.isEmpty()) {
251             // Store both the raw timestamp and the formatted, more readable version.
252 
253             String uptimeNanosMetricKey = createMetricKey(METRIC_SUFFIX_UPTIME_NANOS, serial);
254             metrics.put(
255                     uptimeNanosMetricKey,
256                     stringToMetric(
257                             timestampsNanos
258                                     .stream()
259                                     .map(t -> String.valueOf(t))
260                                     .collect(Collectors.joining(","))));
261 
262             String formattedUptimeMetricKey =
263                     createMetricKey(METRIC_SUFFIX_UPTIME_FORMATTED, serial);
264             metrics.put(
265                     formattedUptimeMetricKey,
266                     stringToMetric(
267                             timestampsNanos
268                                     .stream()
269                                     .map(t -> nanosToHoursMinutesSeconds(t))
270                                     .collect(Collectors.joining(","))));
271         }
272     }
273 
createMetricKey(String suffix, String serial)274     private String createMetricKey(String suffix, String serial) {
275         return mIncludeDeviceSerial
276                 ? String.join(METRIC_SEP, METRIC_PREFIX, serial, suffix)
277                 : String.join(METRIC_SEP, METRIC_PREFIX, suffix);
278     }
279 
280     /**
281      * Forwarding logic to {@link ConfigUtil} for testing as it is a static utility class.
282      *
283      * @hide
284      */
285     @VisibleForTesting
pushStatsConfig(ITestDevice device, List<Integer> eventAtomIds)286     protected long pushStatsConfig(ITestDevice device, List<Integer> eventAtomIds)
287             throws IOException, DeviceNotAvailableException {
288         return ConfigUtil.pushStatsConfig(device, eventAtomIds);
289     }
290 
291     /**
292      * Forwarding logic to {@link ConfigUtil} for testing as it is a static utility class.
293      *
294      * @hide
295      */
296     @VisibleForTesting
removeConfig(ITestDevice device, long configId)297     protected void removeConfig(ITestDevice device, long configId)
298             throws DeviceNotAvailableException {
299         ConfigUtil.removeConfig(device, configId);
300     }
301 
302     /**
303      * Forwarding logic to {@link MetricUtil} for testing as it is a static utility class.
304      *
305      * @hide
306      */
307     @VisibleForTesting
getStatsdMetadata(ITestDevice device)308     protected StatsdStatsReport getStatsdMetadata(ITestDevice device)
309             throws DeviceNotAvailableException, InvalidProtocolBufferException {
310         return MetricUtil.getStatsdMetadata(device);
311     }
312 
313     /**
314      * Forwarding logic to {@link MetricUtil} for testing as it is a static utility class.
315      *
316      * @hide
317      */
318     @VisibleForTesting
getEventMetricData(ITestDevice device, long configId)319     protected List<EventMetricData> getEventMetricData(ITestDevice device, long configId)
320             throws DeviceNotAvailableException {
321         return MetricUtil.getEventMetricData(device, configId);
322     }
323 
324     /**
325      * Helper method to convert nanoseconds to HH:mm:ss. {@link SimpleDateFormat} is not used here
326      * as the input is not a real epoch-based timestamp.
327      */
nanosToHoursMinutesSeconds(long nanos)328     private String nanosToHoursMinutesSeconds(long nanos) {
329         long hours = TimeUnit.NANOSECONDS.toHours(nanos);
330         nanos -= TimeUnit.HOURS.toNanos(hours);
331         long minutes = TimeUnit.NANOSECONDS.toMinutes(nanos);
332         nanos -= TimeUnit.MINUTES.toNanos(minutes);
333         long seconds = TimeUnit.NANOSECONDS.toSeconds(nanos);
334         return String.format("%02d:%02d:%02d", hours, minutes, seconds);
335     }
336 
337     /** Helper method to format an epoch-based timestamp in seconds to HH:mm:ss. */
timestampToHoursMinutesSeconds(int seconds)338     private String timestampToHoursMinutesSeconds(int seconds) {
339         return TIME_FORMATTER.format(new Date(TimeUnit.SECONDS.toMillis(seconds)));
340     }
341 
342     /** Helper method to get all values in a list that appear after all occurrences of target. */
getAllValuesAfter(int target, List<Integer> l)343     private List<Integer> getAllValuesAfter(int target, List<Integer> l) {
344         return l.subList(l.lastIndexOf(target) + 1, l.size());
345     }
346 }
347