1 /*
2 * Copyright (C) 2022 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "thermal_stats_helper.h"
18
19 #include <android-base/logging.h>
20 #include <android/binder_manager.h>
21
22 #include <algorithm>
23 #include <numeric>
24 #include <string_view>
25
26 #include "../thermal-helper.h"
27
28 namespace aidl {
29 namespace android {
30 namespace hardware {
31 namespace thermal {
32 namespace implementation {
33
34 constexpr std::string_view kCustomThresholdSetSuffix("-TH-");
35 constexpr std::string_view kCompressedThresholdSuffix("-CMBN-TH");
36
37 using aidl::android::frameworks::stats::VendorAtom;
38 namespace PixelAtoms = ::android::hardware::google::pixel::PixelAtoms;
39
40 namespace {
41 static std::shared_ptr<IStats> stats_client = nullptr;
getStatsService()42 std::shared_ptr<IStats> getStatsService() {
43 static std::once_flag statsServiceFlag;
44 std::call_once(statsServiceFlag, []() {
45 const std::string instance = std::string() + IStats::descriptor + "/default";
46 bool isStatsDeclared = AServiceManager_isDeclared(instance.c_str());
47 if (!isStatsDeclared) {
48 LOG(ERROR) << "Stats service is not registered.";
49 return;
50 }
51 stats_client = IStats::fromBinder(
52 ndk::SpAIBinder(AServiceManager_waitForService(instance.c_str())));
53 });
54 return stats_client;
55 }
56
isRecordByDefaultThreshold(const std::variant<bool,std::unordered_set<std::string>> & record_by_default_threshold_all_or_name_set_,std::string_view name)57 bool isRecordByDefaultThreshold(const std::variant<bool, std::unordered_set<std::string>>
58 &record_by_default_threshold_all_or_name_set_,
59 std::string_view name) {
60 if (std::holds_alternative<bool>(record_by_default_threshold_all_or_name_set_)) {
61 return std::get<bool>(record_by_default_threshold_all_or_name_set_);
62 }
63 return std::get<std::unordered_set<std::string>>(record_by_default_threshold_all_or_name_set_)
64 .count(name.data());
65 }
66
67 template <typename T>
calculateThresholdBucket(const std::vector<T> & thresholds,T value)68 int calculateThresholdBucket(const std::vector<T> &thresholds, T value) {
69 if (thresholds.empty()) {
70 LOG(VERBOSE) << "No threshold present, so bucket is " << value << " as int.";
71 return static_cast<int>(value);
72 }
73 auto threshold_idx = std::upper_bound(thresholds.begin(), thresholds.end(), value);
74 int bucket = (threshold_idx - thresholds.begin());
75 LOG(VERBOSE) << "For value: " << value << " bucket is: " << bucket;
76 return bucket;
77 }
78
resetCurrentTempStatus(CurrTempStatus * curr_temp_status,float new_temp)79 void resetCurrentTempStatus(CurrTempStatus *curr_temp_status, float new_temp) {
80 curr_temp_status->temp = new_temp;
81 curr_temp_status->start_time = boot_clock::now();
82 curr_temp_status->repeat_count = 1;
83 }
84
85 } // namespace
86
initializeStats(const Json::Value & config,const std::unordered_map<std::string,SensorInfo> & sensor_info_map_,const std::unordered_map<std::string,CdevInfo> & cooling_device_info_map_,ThermalHelper * const thermal_helper_handle)87 bool ThermalStatsHelper::initializeStats(
88 const Json::Value &config,
89 const std::unordered_map<std::string, SensorInfo> &sensor_info_map_,
90 const std::unordered_map<std::string, CdevInfo> &cooling_device_info_map_,
91 ThermalHelper *const thermal_helper_handle) {
92 StatsInfo<float> sensor_stats_info;
93 AbnormalStatsInfo abnormal_stats_info;
94 if (!ParseSensorStatsConfig(config, sensor_info_map_, &sensor_stats_info,
95 &abnormal_stats_info)) {
96 LOG(ERROR) << "Failed to parse sensor stats config";
97 return false;
98 }
99 StatsInfo<int> cooling_device_request_info;
100 if (!ParseCoolingDeviceStatsConfig(config, cooling_device_info_map_,
101 &cooling_device_request_info)) {
102 LOG(ERROR) << "Failed to parse cooling device stats config";
103 return false;
104 }
105 if (!initializeSensorTempStats(sensor_stats_info, sensor_info_map_)) {
106 LOG(ERROR) << "Failed to initialize sensor temp stats";
107 return false;
108 }
109 if (!initializeSensorCdevRequestStats(cooling_device_request_info, sensor_info_map_,
110 cooling_device_info_map_)) {
111 LOG(ERROR) << "Failed to initialize sensor cooling device request stats";
112 return false;
113 }
114 if (!initializeSensorAbnormalityStats(abnormal_stats_info, sensor_info_map_)) {
115 LOG(ERROR) << "Failed to initialize sensor abnormal stats";
116 return false;
117 }
118
119 thermal_helper_handle_ = thermal_helper_handle;
120 last_total_stats_report_time = boot_clock::now();
121 abnormal_stats_reported_per_update_interval = 0;
122 LOG(INFO) << "Thermal Stats Initialized Successfully";
123 return true;
124 }
125
initializeSensorCdevRequestStats(const StatsInfo<int> & request_stats_info,const std::unordered_map<std::string,SensorInfo> & sensor_info_map_,const std::unordered_map<std::string,CdevInfo> & cooling_device_info_map_)126 bool ThermalStatsHelper::initializeSensorCdevRequestStats(
127 const StatsInfo<int> &request_stats_info,
128 const std::unordered_map<std::string, SensorInfo> &sensor_info_map_,
129 const std::unordered_map<std::string, CdevInfo> &cooling_device_info_map_) {
130 std::unique_lock<std::shared_mutex> _lock(sensor_cdev_request_stats_map_mutex_);
131 for (const auto &[sensor, sensor_info] : sensor_info_map_) {
132 for (const auto &binded_cdev_info_pair :
133 sensor_info.throttling_info->binded_cdev_info_map) {
134 const auto &cdev = binded_cdev_info_pair.first;
135 const auto &max_state =
136 cooling_device_info_map_.at(binded_cdev_info_pair.first).max_state;
137 // Record by all state
138 if (isRecordByDefaultThreshold(
139 request_stats_info.record_by_default_threshold_all_or_name_set_, cdev)) {
140 // if the number of states is greater / equal(as state starts from 0) than
141 // residency_buckets in atom combine the initial states
142 if (max_state >= kMaxStatsResidencyCount) {
143 // buckets = [max_state -kMaxStatsResidencyCount + 1, ...max_state]
144 // idx = [1, .. max_state - (max_state - kMaxStatsResidencyCount + 1) + 1]
145 // idx = [1, .. kMaxStatsResidencyCount]
146 const auto starting_state = max_state - kMaxStatsResidencyCount + 1;
147 std::vector<int> thresholds(kMaxStatsResidencyCount);
148 std::iota(thresholds.begin(), thresholds.end(), starting_state);
149 const auto logging_name = cdev + kCompressedThresholdSuffix.data();
150 ThresholdList<int> threshold_list(logging_name, thresholds);
151 sensor_cdev_request_stats_map_[sensor][cdev]
152 .stats_by_custom_threshold.emplace_back(threshold_list);
153 } else {
154 // buckets = [0, 1, 2, 3, ...max_state]
155 const auto default_threshold_time_in_state_size = max_state + 1;
156 sensor_cdev_request_stats_map_[sensor][cdev].stats_by_default_threshold =
157 StatsRecord(default_threshold_time_in_state_size);
158 }
159 LOG(INFO) << "Sensor Cdev user vote stats on basis of all state initialized for ["
160 << sensor << "-" << cdev << "]";
161 }
162
163 // Record by custom threshold
164 if (request_stats_info.record_by_threshold.count(cdev)) {
165 for (const auto &threshold_list : request_stats_info.record_by_threshold.at(cdev)) {
166 // check last threshold value(which is >= number of buckets as numbers in
167 // threshold are strictly increasing from 0) is less than max_state
168 if (threshold_list.thresholds.back() >= max_state) {
169 LOG(ERROR) << "For sensor " << sensor << " bindedCdev: " << cdev
170 << "Invalid bindedCdev stats threshold: "
171 << threshold_list.thresholds.back() << " >= " << max_state;
172 sensor_cdev_request_stats_map_.clear();
173 return false;
174 }
175 sensor_cdev_request_stats_map_[sensor][cdev]
176 .stats_by_custom_threshold.emplace_back(threshold_list);
177 LOG(INFO)
178 << "Sensor Cdev user vote stats on basis of threshold initialized for ["
179 << sensor << "-" << cdev << "]";
180 }
181 }
182 }
183 }
184 return true;
185 }
186
initializeSensorTempStats(const StatsInfo<float> & sensor_stats_info,const std::unordered_map<std::string,SensorInfo> & sensor_info_map_)187 bool ThermalStatsHelper::initializeSensorTempStats(
188 const StatsInfo<float> &sensor_stats_info,
189 const std::unordered_map<std::string, SensorInfo> &sensor_info_map_) {
190 std::unique_lock<std::shared_mutex> _lock(sensor_stats_mutex_);
191 auto &temp_stats_map_ = sensor_stats.temp_stats_map_;
192 const int severity_time_in_state_size = kThrottlingSeverityCount;
193 for (const auto &[sensor, sensor_info] : sensor_info_map_) {
194 // Record by severity
195 if (sensor_info.is_watch &&
196 isRecordByDefaultThreshold(
197 sensor_stats_info.record_by_default_threshold_all_or_name_set_, sensor)) {
198 // number of buckets = number of severity
199 temp_stats_map_[sensor].stats_by_default_threshold =
200 StatsRecord(severity_time_in_state_size);
201 LOG(INFO) << "Sensor temp stats on basis of severity initialized for [" << sensor
202 << "]";
203 }
204
205 // Record by custom threshold
206 if (sensor_stats_info.record_by_threshold.count(sensor)) {
207 for (const auto &threshold_list : sensor_stats_info.record_by_threshold.at(sensor)) {
208 temp_stats_map_[sensor].stats_by_custom_threshold.emplace_back(threshold_list);
209 LOG(INFO) << "Sensor temp stats on basis of threshold initialized for [" << sensor
210 << "]";
211 }
212 }
213 }
214 return true;
215 }
216
initializeSensorAbnormalityStats(const AbnormalStatsInfo & abnormal_stats_info,const std::unordered_map<std::string,SensorInfo> & sensor_info_map_)217 bool ThermalStatsHelper::initializeSensorAbnormalityStats(
218 const AbnormalStatsInfo &abnormal_stats_info,
219 const std::unordered_map<std::string, SensorInfo> &sensor_info_map_) {
220 std::unique_lock<std::shared_mutex> _lock(sensor_stats_mutex_);
221 auto &temp_range_info_map_ = sensor_stats.temp_range_info_map_;
222 for (const auto &sensors_temp_range_info : abnormal_stats_info.sensors_temp_range_infos) {
223 const auto &temp_range_info_ptr =
224 std::make_shared<TempRangeInfo>(sensors_temp_range_info.temp_range_info);
225 for (const auto &sensor : sensors_temp_range_info.sensors) {
226 temp_range_info_map_[sensor] = temp_range_info_ptr;
227 }
228 }
229 auto &temp_stuck_info_map_ = sensor_stats.temp_stuck_info_map_;
230 for (const auto &sensors_temp_stuck_info : abnormal_stats_info.sensors_temp_stuck_infos) {
231 const auto &temp_stuck_info_ptr =
232 std::make_shared<TempStuckInfo>(sensors_temp_stuck_info.temp_stuck_info);
233 for (const auto &sensor : sensors_temp_stuck_info.sensors) {
234 temp_stuck_info_map_[sensor] = temp_stuck_info_ptr;
235 }
236 }
237 const auto &default_temp_range_info_ptr =
238 abnormal_stats_info.default_temp_range_info
239 ? std::make_shared<TempRangeInfo>(
240 abnormal_stats_info.default_temp_range_info.value())
241 : nullptr;
242 const auto &default_temp_stuck_info_ptr =
243 abnormal_stats_info.default_temp_stuck_info
244 ? std::make_shared<TempStuckInfo>(
245 abnormal_stats_info.default_temp_stuck_info.value())
246 : nullptr;
247 for (const auto &sensor_info : sensor_info_map_) {
248 const auto &sensor = sensor_info.first;
249 if (default_temp_range_info_ptr && !temp_range_info_map_.count(sensor))
250 temp_range_info_map_[sensor] = default_temp_range_info_ptr;
251 if (default_temp_stuck_info_ptr && !temp_stuck_info_map_.count(sensor))
252 temp_stuck_info_map_[sensor] = default_temp_stuck_info_ptr;
253 }
254
255 for (const auto &sensor_temp_stuck_info : temp_stuck_info_map_) {
256 sensor_stats.curr_temp_status_map_[sensor_temp_stuck_info.first] = {
257 .temp = std::numeric_limits<float>::min(),
258 .start_time = boot_clock::time_point::min(),
259 .repeat_count = 0,
260 };
261 }
262 return true;
263 }
264
updateStatsRecord(StatsRecord * stats_record,int new_state)265 void ThermalStatsHelper::updateStatsRecord(StatsRecord *stats_record, int new_state) {
266 const auto now = boot_clock::now();
267 const auto cur_state_duration = std::chrono::duration_cast<std::chrono::milliseconds>(
268 now - stats_record->cur_state_start_time);
269 LOG(VERBOSE) << "Adding duration " << cur_state_duration.count()
270 << " for cur_state: " << stats_record->cur_state << " with value: "
271 << stats_record->time_in_state_ms[stats_record->cur_state].count();
272 // Update last record end time
273 stats_record->time_in_state_ms[stats_record->cur_state] += cur_state_duration;
274 stats_record->cur_state_start_time = now;
275 stats_record->cur_state = new_state;
276 }
277
updateSensorCdevRequestStats(std::string_view sensor,std::string_view cdev,int new_value)278 void ThermalStatsHelper::updateSensorCdevRequestStats(std::string_view sensor,
279 std::string_view cdev, int new_value) {
280 std::unique_lock<std::shared_mutex> _lock(sensor_cdev_request_stats_map_mutex_);
281 if (!sensor_cdev_request_stats_map_.count(sensor.data()) ||
282 !sensor_cdev_request_stats_map_[sensor.data()].count(cdev.data())) {
283 return;
284 }
285 auto &request_stats = sensor_cdev_request_stats_map_[sensor.data()][cdev.data()];
286 for (auto &stats_by_threshold : request_stats.stats_by_custom_threshold) {
287 int value = calculateThresholdBucket(stats_by_threshold.thresholds, new_value);
288 if (value != stats_by_threshold.stats_record.cur_state) {
289 LOG(VERBOSE) << "Updating bindedCdev stats for sensor: " << sensor.data()
290 << " , cooling_device: " << cdev.data() << " with new value: " << value;
291 updateStatsRecord(&stats_by_threshold.stats_record, value);
292 }
293 }
294
295 if (request_stats.stats_by_default_threshold.has_value()) {
296 auto &stats_record = request_stats.stats_by_default_threshold.value();
297 if (new_value != stats_record.cur_state) {
298 LOG(VERBOSE) << "Updating bindedCdev stats for sensor: " << sensor.data()
299 << " , cooling_device: " << cdev.data()
300 << " with new value: " << new_value;
301 updateStatsRecord(&stats_record, new_value);
302 }
303 }
304 }
305
updateSensorTempStatsByThreshold(std::string_view sensor,float temperature)306 void ThermalStatsHelper::updateSensorTempStatsByThreshold(std::string_view sensor,
307 float temperature) {
308 std::unique_lock<std::shared_mutex> _lock(sensor_stats_mutex_);
309 verifySensorAbnormality(sensor, temperature);
310 auto &temp_stats_map_ = sensor_stats.temp_stats_map_;
311 if (!temp_stats_map_.count(sensor.data())) {
312 return;
313 }
314 auto &sensor_temp_stats = temp_stats_map_[sensor.data()];
315 for (auto &stats_by_threshold : sensor_temp_stats.stats_by_custom_threshold) {
316 int value = calculateThresholdBucket(stats_by_threshold.thresholds, temperature);
317 if (value != stats_by_threshold.stats_record.cur_state) {
318 LOG(VERBOSE) << "Updating sensor stats for sensor: " << sensor.data()
319 << " with value: " << value;
320 updateStatsRecord(&stats_by_threshold.stats_record, value);
321 }
322 }
323 if (temperature > sensor_temp_stats.max_temp) {
324 sensor_temp_stats.max_temp = temperature;
325 sensor_temp_stats.max_temp_timestamp = system_clock::now();
326 }
327 if (temperature < sensor_temp_stats.min_temp) {
328 sensor_temp_stats.min_temp = temperature;
329 sensor_temp_stats.min_temp_timestamp = system_clock::now();
330 }
331 }
332
updateSensorTempStatsBySeverity(std::string_view sensor,const ThrottlingSeverity & severity)333 void ThermalStatsHelper::updateSensorTempStatsBySeverity(std::string_view sensor,
334 const ThrottlingSeverity &severity) {
335 std::unique_lock<std::shared_mutex> _lock(sensor_stats_mutex_);
336 auto &temp_stats_map_ = sensor_stats.temp_stats_map_;
337 if (temp_stats_map_.count(sensor.data()) &&
338 temp_stats_map_[sensor.data()].stats_by_default_threshold.has_value()) {
339 auto &stats_record = temp_stats_map_[sensor.data()].stats_by_default_threshold.value();
340 int value = static_cast<int>(severity);
341 if (value != stats_record.cur_state) {
342 LOG(VERBOSE) << "Updating sensor stats for sensor: " << sensor.data()
343 << " with value: " << value;
344 updateStatsRecord(&stats_record, value);
345 }
346 }
347 }
348
verifySensorAbnormality(std::string_view sensor,float temp)349 void ThermalStatsHelper::verifySensorAbnormality(std::string_view sensor, float temp) {
350 LOG(VERBOSE) << "Verify sensor abnormality for " << sensor << " with temp " << temp;
351 if (sensor_stats.temp_range_info_map_.count(sensor.data())) {
352 const auto &temp_range_info = sensor_stats.temp_range_info_map_[sensor.data()];
353 if (temp < temp_range_info->min_temp_threshold) {
354 LOG(ERROR) << "Outlier Temperature Detected, sensor: " << sensor.data()
355 << " temp: " << temp << " < " << temp_range_info->min_temp_threshold;
356 reportThermalAbnormality(ThermalSensorAbnormalityDetected::EXTREME_LOW_TEMP, sensor,
357 std::round(temp));
358 } else if (temp > temp_range_info->max_temp_threshold) {
359 LOG(ERROR) << "Outlier Temperature Detected, sensor: " << sensor.data()
360 << " temp: " << temp << " > " << temp_range_info->max_temp_threshold;
361 reportThermalAbnormality(ThermalSensorAbnormalityDetected::EXTREME_HIGH_TEMP, sensor,
362 std::round(temp));
363 }
364 }
365 if (sensor_stats.temp_stuck_info_map_.count(sensor.data())) {
366 const auto &temp_stuck_info = sensor_stats.temp_stuck_info_map_[sensor.data()];
367 auto &curr_temp_status = sensor_stats.curr_temp_status_map_[sensor.data()];
368 LOG(VERBOSE) << "Current Temp Status: temp=" << curr_temp_status.temp
369 << " repeat_count=" << curr_temp_status.repeat_count
370 << " start_time=" << curr_temp_status.start_time.time_since_epoch().count();
371 if (std::fabs(curr_temp_status.temp - temp) <= kPrecisionThreshold) {
372 curr_temp_status.repeat_count++;
373 if (temp_stuck_info->min_polling_count <= curr_temp_status.repeat_count) {
374 auto time_elapsed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
375 boot_clock::now() - curr_temp_status.start_time);
376 if (temp_stuck_info->min_stuck_duration <= time_elapsed_ms) {
377 LOG(ERROR) << "Stuck Temperature Detected, sensor: " << sensor.data()
378 << " temp: " << temp << " repeated "
379 << temp_stuck_info->min_polling_count << " times for "
380 << time_elapsed_ms.count() << "ms";
381 if (reportThermalAbnormality(ThermalSensorAbnormalityDetected::SENSOR_STUCK,
382 sensor, std::round(temp))) {
383 // reset current status to verify for sensor stuck with start time as
384 // current polling
385 resetCurrentTempStatus(&curr_temp_status, temp);
386 }
387 }
388 }
389 } else {
390 resetCurrentTempStatus(&curr_temp_status, temp);
391 }
392 }
393 }
394
reportStats()395 int ThermalStatsHelper::reportStats() {
396 const auto curTime = boot_clock::now();
397 const auto since_last_total_stats_update_ms =
398 std::chrono::duration_cast<std::chrono::milliseconds>(curTime -
399 last_total_stats_report_time);
400 LOG(VERBOSE) << "Duration from last total stats update is: "
401 << since_last_total_stats_update_ms.count();
402 if (since_last_total_stats_update_ms < kUpdateIntervalMs) {
403 LOG(VERBOSE) << "Time elapsed since last update less than " << kUpdateIntervalMs.count();
404 return 0;
405 }
406
407 const std::shared_ptr<IStats> stats_client = getStatsService();
408 if (!stats_client) {
409 LOG(ERROR) << "Unable to get AIDL Stats service";
410 return -1;
411 }
412 int count_failed_reporting =
413 reportAllSensorTempStats(stats_client) + reportAllSensorCdevRequestStats(stats_client);
414 last_total_stats_report_time = curTime;
415 abnormal_stats_reported_per_update_interval = 0;
416 return count_failed_reporting;
417 }
418
reportAllSensorTempStats(const std::shared_ptr<IStats> & stats_client)419 int ThermalStatsHelper::reportAllSensorTempStats(const std::shared_ptr<IStats> &stats_client) {
420 int count_failed_reporting = 0;
421 std::unique_lock<std::shared_mutex> _lock(sensor_stats_mutex_);
422 for (auto &[sensor, temp_stats] : sensor_stats.temp_stats_map_) {
423 for (size_t threshold_set_idx = 0;
424 threshold_set_idx < temp_stats.stats_by_custom_threshold.size(); threshold_set_idx++) {
425 auto &stats_by_threshold = temp_stats.stats_by_custom_threshold[threshold_set_idx];
426 std::string sensor_name = stats_by_threshold.logging_name.value_or(
427 sensor + kCustomThresholdSetSuffix.data() + std::to_string(threshold_set_idx));
428 if (!reportSensorTempStats(stats_client, sensor_name, temp_stats,
429 &stats_by_threshold.stats_record)) {
430 count_failed_reporting++;
431 }
432 }
433 if (temp_stats.stats_by_default_threshold.has_value()) {
434 if (!reportSensorTempStats(stats_client, sensor, temp_stats,
435 &temp_stats.stats_by_default_threshold.value())) {
436 count_failed_reporting++;
437 }
438 }
439 // Reset temp stats after reporting
440 temp_stats.max_temp = std::numeric_limits<float>::min();
441 temp_stats.min_temp = std::numeric_limits<float>::max();
442 }
443 return count_failed_reporting;
444 }
445
reportSensorTempStats(const std::shared_ptr<IStats> & stats_client,std::string_view sensor,const SensorTempStats & sensor_temp_stats,StatsRecord * stats_record)446 bool ThermalStatsHelper::reportSensorTempStats(const std::shared_ptr<IStats> &stats_client,
447 std::string_view sensor,
448 const SensorTempStats &sensor_temp_stats,
449 StatsRecord *stats_record) {
450 LOG(VERBOSE) << "Reporting sensor stats for " << sensor;
451 // maintain a copy in case reporting fails
452 StatsRecord thermal_stats_before_reporting = *stats_record;
453 std::vector<VendorAtomValue> values(2);
454 values[0].set<VendorAtomValue::stringValue>(sensor);
455 std::vector<int64_t> time_in_state_ms = processStatsRecordForReporting(stats_record);
456 const auto since_last_update_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
457 stats_record->cur_state_start_time - stats_record->last_stats_report_time);
458 values[1].set<VendorAtomValue::longValue>(since_last_update_ms.count());
459 VendorAtomValue tmp;
460 for (auto &time_in_state : time_in_state_ms) {
461 tmp.set<VendorAtomValue::longValue>(time_in_state);
462 values.push_back(tmp);
463 }
464 auto remaining_residency_buckets_count = kMaxStatsResidencyCount - time_in_state_ms.size();
465 if (remaining_residency_buckets_count > 0) {
466 tmp.set<VendorAtomValue::longValue>(0);
467 values.insert(values.end(), remaining_residency_buckets_count, tmp);
468 }
469 tmp.set<VendorAtomValue::floatValue>(sensor_temp_stats.max_temp);
470 values.push_back(tmp);
471 tmp.set<VendorAtomValue::longValue>(
472 system_clock::to_time_t(sensor_temp_stats.max_temp_timestamp));
473 values.push_back(tmp);
474 tmp.set<VendorAtomValue::floatValue>(sensor_temp_stats.min_temp);
475 values.push_back(tmp);
476 tmp.set<VendorAtomValue::longValue>(
477 system_clock::to_time_t(sensor_temp_stats.min_temp_timestamp));
478 values.push_back(tmp);
479
480 if (!reportAtom(stats_client, PixelAtoms::Atom::kVendorTempResidencyStats, std::move(values))) {
481 LOG(ERROR) << "Unable to report VendorTempResidencyStats to Stats service for "
482 "sensor: "
483 << sensor;
484 *stats_record = restoreStatsRecordOnFailure(std::move(thermal_stats_before_reporting));
485 return false;
486 }
487 // Update last time of stats reporting
488 stats_record->last_stats_report_time = boot_clock::now();
489 return true;
490 }
491
reportAllSensorCdevRequestStats(const std::shared_ptr<IStats> & stats_client)492 int ThermalStatsHelper::reportAllSensorCdevRequestStats(
493 const std::shared_ptr<IStats> &stats_client) {
494 int count_failed_reporting = 0;
495 std::unique_lock<std::shared_mutex> _lock(sensor_cdev_request_stats_map_mutex_);
496 for (auto &[sensor, cdev_request_stats_map] : sensor_cdev_request_stats_map_) {
497 for (auto &[cdev, request_stats] : cdev_request_stats_map) {
498 for (size_t threshold_set_idx = 0;
499 threshold_set_idx < request_stats.stats_by_custom_threshold.size();
500 threshold_set_idx++) {
501 auto &stats_by_threshold =
502 request_stats.stats_by_custom_threshold[threshold_set_idx];
503 std::string cdev_name = stats_by_threshold.logging_name.value_or(
504 cdev + kCustomThresholdSetSuffix.data() +
505 std::to_string(threshold_set_idx));
506 if (!reportSensorCdevRequestStats(stats_client, sensor, cdev_name,
507 &stats_by_threshold.stats_record)) {
508 count_failed_reporting++;
509 }
510 }
511
512 if (request_stats.stats_by_default_threshold.has_value()) {
513 if (!reportSensorCdevRequestStats(
514 stats_client, sensor, cdev,
515 &request_stats.stats_by_default_threshold.value())) {
516 count_failed_reporting++;
517 }
518 }
519 }
520 }
521 return count_failed_reporting;
522 }
523
reportSensorCdevRequestStats(const std::shared_ptr<IStats> & stats_client,std::string_view sensor,std::string_view cdev,StatsRecord * stats_record)524 bool ThermalStatsHelper::reportSensorCdevRequestStats(const std::shared_ptr<IStats> &stats_client,
525 std::string_view sensor,
526 std::string_view cdev,
527 StatsRecord *stats_record) {
528 LOG(VERBOSE) << "Reporting bindedCdev stats for sensor: " << sensor
529 << " cooling_device: " << cdev;
530 // maintain a copy in case reporting fails
531 StatsRecord thermal_stats_before_reporting = *stats_record;
532 std::vector<VendorAtomValue> values(3);
533 values[0].set<VendorAtomValue::stringValue>(sensor);
534 values[1].set<VendorAtomValue::stringValue>(cdev);
535 std::vector<int64_t> time_in_state_ms = processStatsRecordForReporting(stats_record);
536 const auto since_last_update_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
537 stats_record->cur_state_start_time - stats_record->last_stats_report_time);
538 values[2].set<VendorAtomValue::longValue>(since_last_update_ms.count());
539 VendorAtomValue tmp;
540 for (auto &time_in_state : time_in_state_ms) {
541 tmp.set<VendorAtomValue::longValue>(time_in_state);
542 values.push_back(tmp);
543 }
544
545 if (!reportAtom(stats_client, PixelAtoms::Atom::kVendorSensorCoolingDeviceStats,
546 std::move(values))) {
547 LOG(ERROR) << "Unable to report VendorSensorCoolingDeviceStats to Stats "
548 "service for sensor: "
549 << sensor << " cooling_device: " << cdev;
550 *stats_record = restoreStatsRecordOnFailure(std::move(thermal_stats_before_reporting));
551 return false;
552 }
553 // Update last time of stats reporting
554 stats_record->last_stats_report_time = boot_clock::now();
555 return true;
556 }
557
processStatsRecordForReporting(StatsRecord * stats_record)558 std::vector<int64_t> ThermalStatsHelper::processStatsRecordForReporting(StatsRecord *stats_record) {
559 // update the last unclosed entry and start new record with same state
560 updateStatsRecord(stats_record, stats_record->cur_state);
561 std::vector<std::chrono::milliseconds> &time_in_state_ms = stats_record->time_in_state_ms;
562 // convert std::chrono::milliseconds time_in_state to int64_t vector for reporting
563 std::vector<int64_t> stats_residency(time_in_state_ms.size());
564 std::transform(time_in_state_ms.begin(), time_in_state_ms.end(), stats_residency.begin(),
565 [](std::chrono::milliseconds time_ms) { return time_ms.count(); });
566 // clear previous stats
567 std::fill(time_in_state_ms.begin(), time_in_state_ms.end(), std::chrono::milliseconds::zero());
568 return stats_residency;
569 }
570
reportThermalAbnormality(const ThermalSensorAbnormalityDetected::AbnormalityType & type,std::string_view name,std::optional<int> reading)571 bool ThermalStatsHelper::reportThermalAbnormality(
572 const ThermalSensorAbnormalityDetected::AbnormalityType &type, std::string_view name,
573 std::optional<int> reading) {
574 const auto value_str = reading.has_value() ? std::to_string(reading.value()) : "undefined";
575 if (abnormal_stats_reported_per_update_interval >= kMaxAbnormalLoggingPerUpdateInterval) {
576 LOG(ERROR) << "Thermal abnormal atom logging rate limited for " << name.data()
577 << " with value " << value_str;
578 return true;
579 }
580 const std::shared_ptr<IStats> stats_client = getStatsService();
581 if (!stats_client) {
582 LOG(ERROR) << "Unable to get AIDL Stats service";
583 return false;
584 }
585 std::vector<VendorAtomValue> values(3);
586 values[ThermalSensorAbnormalityDetected::kTypeFieldNumber - kVendorAtomOffset] =
587 VendorAtomValue::make<VendorAtomValue::intValue>(type);
588 values[ThermalSensorAbnormalityDetected::kSensorFieldNumber - kVendorAtomOffset] =
589 VendorAtomValue::make<VendorAtomValue::stringValue>(name);
590 if (reading.has_value()) {
591 values[ThermalSensorAbnormalityDetected::kTempFieldNumber - kVendorAtomOffset] =
592 VendorAtomValue::make<VendorAtomValue::intValue>(reading.value());
593 }
594
595 // Dump additional traces before reporting abnormal event
596 if (thermal_helper_handle_) {
597 thermal_helper_handle_->dumpTraces(name);
598 }
599
600 if (!reportAtom(stats_client, PixelAtoms::Atom::kThermalSensorAbnormalityDetected,
601 std::move(values))) {
602 LOG(ERROR) << "Failed to log thermal abnormal atom for " << name.data() << " with value "
603 << value_str;
604 return false;
605 }
606 LOG(INFO) << "Thermal abnormality reported for " << name.data() << " with value " << value_str;
607 abnormal_stats_reported_per_update_interval++;
608 return true;
609 }
610
reportAtom(const std::shared_ptr<IStats> & stats_client,const int32_t & atom_id,std::vector<VendorAtomValue> && values)611 bool ThermalStatsHelper::reportAtom(const std::shared_ptr<IStats> &stats_client,
612 const int32_t &atom_id, std::vector<VendorAtomValue> &&values) {
613 LOG(VERBOSE) << "Reporting thermal stats for atom_id " << atom_id;
614 // Send vendor atom to IStats HAL
615 VendorAtom event = {.reverseDomainName = "", .atomId = atom_id, .values = std::move(values)};
616 const ndk::ScopedAStatus ret = stats_client->reportVendorAtom(event);
617 return ret.isOk();
618 }
619
restoreStatsRecordOnFailure(StatsRecord && stats_record_before_failure)620 StatsRecord ThermalStatsHelper::restoreStatsRecordOnFailure(
621 StatsRecord &&stats_record_before_failure) {
622 stats_record_before_failure.report_fail_count += 1;
623 // If consecutive count of failure is high, reset stat to avoid overflow
624 if (stats_record_before_failure.report_fail_count >= kMaxStatsReportingFailCount) {
625 return StatsRecord(stats_record_before_failure.time_in_state_ms.size(),
626 stats_record_before_failure.cur_state);
627 } else {
628 return stats_record_before_failure;
629 }
630 }
631
GetSensorTempStatsSnapshot()632 std::unordered_map<std::string, SensorTempStats> ThermalStatsHelper::GetSensorTempStatsSnapshot() {
633 auto sensor_temp_stats_snapshot = sensor_stats.temp_stats_map_;
634 for (auto &sensor_temp_stats_pair : sensor_temp_stats_snapshot) {
635 for (auto &temp_stats : sensor_temp_stats_pair.second.stats_by_custom_threshold) {
636 // update the last unclosed entry and start new record with same state
637 updateStatsRecord(&temp_stats.stats_record, temp_stats.stats_record.cur_state);
638 }
639 if (sensor_temp_stats_pair.second.stats_by_default_threshold.has_value()) {
640 auto &stats_by_default_threshold =
641 sensor_temp_stats_pair.second.stats_by_default_threshold.value();
642 // update the last unclosed entry and start new record with same state
643 updateStatsRecord(&stats_by_default_threshold, stats_by_default_threshold.cur_state);
644 }
645 }
646 return sensor_temp_stats_snapshot;
647 }
648
649 std::unordered_map<std::string, std::unordered_map<std::string, ThermalStats<int>>>
GetSensorCoolingDeviceRequestStatsSnapshot()650 ThermalStatsHelper::GetSensorCoolingDeviceRequestStatsSnapshot() {
651 auto sensor_cdev_request_stats_snapshot = sensor_cdev_request_stats_map_;
652 for (auto &sensor_cdev_request_stats_pair : sensor_cdev_request_stats_snapshot) {
653 for (auto &cdev_request_stats_pair : sensor_cdev_request_stats_pair.second) {
654 for (auto &request_stats : cdev_request_stats_pair.second.stats_by_custom_threshold) {
655 // update the last unclosed entry and start new record with same state
656 updateStatsRecord(&request_stats.stats_record,
657 request_stats.stats_record.cur_state);
658 }
659 if (cdev_request_stats_pair.second.stats_by_default_threshold.has_value()) {
660 auto &stats_by_default_threshold =
661 cdev_request_stats_pair.second.stats_by_default_threshold.value();
662 // update the last unclosed entry and start new record with same state
663 updateStatsRecord(&stats_by_default_threshold,
664 stats_by_default_threshold.cur_state);
665 }
666 }
667 }
668 return sensor_cdev_request_stats_snapshot;
669 }
670
671 } // namespace implementation
672 } // namespace thermal
673 } // namespace hardware
674 } // namespace android
675 } // namespace aidl
676