• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "thermal_stats_helper.h"
18 
19 #include <android-base/logging.h>
20 #include <android/binder_manager.h>
21 
22 #include <algorithm>
23 #include <numeric>
24 #include <string_view>
25 
26 #include "../thermal-helper.h"
27 
28 namespace aidl {
29 namespace android {
30 namespace hardware {
31 namespace thermal {
32 namespace implementation {
33 
34 constexpr std::string_view kCustomThresholdSetSuffix("-TH-");
35 constexpr std::string_view kCompressedThresholdSuffix("-CMBN-TH");
36 
37 using aidl::android::frameworks::stats::VendorAtom;
38 namespace PixelAtoms = ::android::hardware::google::pixel::PixelAtoms;
39 
40 namespace {
41 static std::shared_ptr<IStats> stats_client = nullptr;
getStatsService()42 std::shared_ptr<IStats> getStatsService() {
43     static std::once_flag statsServiceFlag;
44     std::call_once(statsServiceFlag, []() {
45         const std::string instance = std::string() + IStats::descriptor + "/default";
46         bool isStatsDeclared = AServiceManager_isDeclared(instance.c_str());
47         if (!isStatsDeclared) {
48             LOG(ERROR) << "Stats service is not registered.";
49             return;
50         }
51         stats_client = IStats::fromBinder(
52                 ndk::SpAIBinder(AServiceManager_waitForService(instance.c_str())));
53     });
54     return stats_client;
55 }
56 
isRecordByDefaultThreshold(const std::variant<bool,std::unordered_set<std::string>> & record_by_default_threshold_all_or_name_set_,std::string_view name)57 bool isRecordByDefaultThreshold(const std::variant<bool, std::unordered_set<std::string>>
58                                         &record_by_default_threshold_all_or_name_set_,
59                                 std::string_view name) {
60     if (std::holds_alternative<bool>(record_by_default_threshold_all_or_name_set_)) {
61         return std::get<bool>(record_by_default_threshold_all_or_name_set_);
62     }
63     return std::get<std::unordered_set<std::string>>(record_by_default_threshold_all_or_name_set_)
64             .count(name.data());
65 }
66 
67 template <typename T>
calculateThresholdBucket(const std::vector<T> & thresholds,T value)68 int calculateThresholdBucket(const std::vector<T> &thresholds, T value) {
69     if (thresholds.empty()) {
70         LOG(VERBOSE) << "No threshold present, so bucket is " << value << " as int.";
71         return static_cast<int>(value);
72     }
73     auto threshold_idx = std::upper_bound(thresholds.begin(), thresholds.end(), value);
74     int bucket = (threshold_idx - thresholds.begin());
75     LOG(VERBOSE) << "For value: " << value << " bucket is: " << bucket;
76     return bucket;
77 }
78 
resetCurrentTempStatus(CurrTempStatus * curr_temp_status,float new_temp)79 void resetCurrentTempStatus(CurrTempStatus *curr_temp_status, float new_temp) {
80     curr_temp_status->temp = new_temp;
81     curr_temp_status->start_time = boot_clock::now();
82     curr_temp_status->repeat_count = 1;
83 }
84 
85 }  // namespace
86 
initializeStats(const Json::Value & config,const std::unordered_map<std::string,SensorInfo> & sensor_info_map_,const std::unordered_map<std::string,CdevInfo> & cooling_device_info_map_,ThermalHelper * const thermal_helper_handle)87 bool ThermalStatsHelper::initializeStats(
88         const Json::Value &config,
89         const std::unordered_map<std::string, SensorInfo> &sensor_info_map_,
90         const std::unordered_map<std::string, CdevInfo> &cooling_device_info_map_,
91         ThermalHelper *const thermal_helper_handle) {
92     StatsInfo<float> sensor_stats_info;
93     AbnormalStatsInfo abnormal_stats_info;
94     if (!ParseSensorStatsConfig(config, sensor_info_map_, &sensor_stats_info,
95                                 &abnormal_stats_info)) {
96         LOG(ERROR) << "Failed to parse sensor stats config";
97         return false;
98     }
99     StatsInfo<int> cooling_device_request_info;
100     if (!ParseCoolingDeviceStatsConfig(config, cooling_device_info_map_,
101                                        &cooling_device_request_info)) {
102         LOG(ERROR) << "Failed to parse cooling device stats config";
103         return false;
104     }
105     if (!initializeSensorTempStats(sensor_stats_info, sensor_info_map_)) {
106         LOG(ERROR) << "Failed to initialize sensor temp stats";
107         return false;
108     }
109     if (!initializeSensorCdevRequestStats(cooling_device_request_info, sensor_info_map_,
110                                           cooling_device_info_map_)) {
111         LOG(ERROR) << "Failed to initialize sensor cooling device request stats";
112         return false;
113     }
114     if (!initializeSensorAbnormalityStats(abnormal_stats_info, sensor_info_map_)) {
115         LOG(ERROR) << "Failed to initialize sensor abnormal stats";
116         return false;
117     }
118 
119     thermal_helper_handle_ = thermal_helper_handle;
120     last_total_stats_report_time = boot_clock::now();
121     abnormal_stats_reported_per_update_interval = 0;
122     LOG(INFO) << "Thermal Stats Initialized Successfully";
123     return true;
124 }
125 
initializeSensorCdevRequestStats(const StatsInfo<int> & request_stats_info,const std::unordered_map<std::string,SensorInfo> & sensor_info_map_,const std::unordered_map<std::string,CdevInfo> & cooling_device_info_map_)126 bool ThermalStatsHelper::initializeSensorCdevRequestStats(
127         const StatsInfo<int> &request_stats_info,
128         const std::unordered_map<std::string, SensorInfo> &sensor_info_map_,
129         const std::unordered_map<std::string, CdevInfo> &cooling_device_info_map_) {
130     std::unique_lock<std::shared_mutex> _lock(sensor_cdev_request_stats_map_mutex_);
131     for (const auto &[sensor, sensor_info] : sensor_info_map_) {
132         for (const auto &binded_cdev_info_pair :
133              sensor_info.throttling_info->binded_cdev_info_map) {
134             const auto &cdev = binded_cdev_info_pair.first;
135             const auto &max_state =
136                     cooling_device_info_map_.at(binded_cdev_info_pair.first).max_state;
137             // Record by all state
138             if (isRecordByDefaultThreshold(
139                         request_stats_info.record_by_default_threshold_all_or_name_set_, cdev)) {
140                 // if the number of states is greater / equal(as state starts from 0) than
141                 // residency_buckets in atom combine the initial states
142                 if (max_state >= kMaxStatsResidencyCount) {
143                     // buckets = [max_state -kMaxStatsResidencyCount + 1, ...max_state]
144                     //     idx = [1, .. max_state - (max_state - kMaxStatsResidencyCount + 1) + 1]
145                     //     idx = [1, .. kMaxStatsResidencyCount]
146                     const auto starting_state = max_state - kMaxStatsResidencyCount + 1;
147                     std::vector<int> thresholds(kMaxStatsResidencyCount);
148                     std::iota(thresholds.begin(), thresholds.end(), starting_state);
149                     const auto logging_name = cdev + kCompressedThresholdSuffix.data();
150                     ThresholdList<int> threshold_list(logging_name, thresholds);
151                     sensor_cdev_request_stats_map_[sensor][cdev]
152                             .stats_by_custom_threshold.emplace_back(threshold_list);
153                 } else {
154                     // buckets = [0, 1, 2, 3, ...max_state]
155                     const auto default_threshold_time_in_state_size = max_state + 1;
156                     sensor_cdev_request_stats_map_[sensor][cdev].stats_by_default_threshold =
157                             StatsRecord(default_threshold_time_in_state_size);
158                 }
159                 LOG(INFO) << "Sensor Cdev user vote stats on basis of all state initialized for ["
160                           << sensor << "-" << cdev << "]";
161             }
162 
163             // Record by custom threshold
164             if (request_stats_info.record_by_threshold.count(cdev)) {
165                 for (const auto &threshold_list : request_stats_info.record_by_threshold.at(cdev)) {
166                     // check last threshold value(which is >= number of buckets as numbers in
167                     // threshold are strictly increasing from 0) is less than max_state
168                     if (threshold_list.thresholds.back() >= max_state) {
169                         LOG(ERROR) << "For sensor " << sensor << " bindedCdev: " << cdev
170                                    << "Invalid bindedCdev stats threshold: "
171                                    << threshold_list.thresholds.back() << " >= " << max_state;
172                         sensor_cdev_request_stats_map_.clear();
173                         return false;
174                     }
175                     sensor_cdev_request_stats_map_[sensor][cdev]
176                             .stats_by_custom_threshold.emplace_back(threshold_list);
177                     LOG(INFO)
178                             << "Sensor Cdev user vote stats on basis of threshold initialized for ["
179                             << sensor << "-" << cdev << "]";
180                 }
181             }
182         }
183     }
184     return true;
185 }
186 
initializeSensorTempStats(const StatsInfo<float> & sensor_stats_info,const std::unordered_map<std::string,SensorInfo> & sensor_info_map_)187 bool ThermalStatsHelper::initializeSensorTempStats(
188         const StatsInfo<float> &sensor_stats_info,
189         const std::unordered_map<std::string, SensorInfo> &sensor_info_map_) {
190     std::unique_lock<std::shared_mutex> _lock(sensor_stats_mutex_);
191     auto &temp_stats_map_ = sensor_stats.temp_stats_map_;
192     const int severity_time_in_state_size = kThrottlingSeverityCount;
193     for (const auto &[sensor, sensor_info] : sensor_info_map_) {
194         // Record by severity
195         if (sensor_info.is_watch &&
196             isRecordByDefaultThreshold(
197                     sensor_stats_info.record_by_default_threshold_all_or_name_set_, sensor)) {
198             // number of buckets = number of severity
199             temp_stats_map_[sensor].stats_by_default_threshold =
200                     StatsRecord(severity_time_in_state_size);
201             LOG(INFO) << "Sensor temp stats on basis of severity initialized for [" << sensor
202                       << "]";
203         }
204 
205         // Record by custom threshold
206         if (sensor_stats_info.record_by_threshold.count(sensor)) {
207             for (const auto &threshold_list : sensor_stats_info.record_by_threshold.at(sensor)) {
208                 temp_stats_map_[sensor].stats_by_custom_threshold.emplace_back(threshold_list);
209                 LOG(INFO) << "Sensor temp stats on basis of threshold initialized for [" << sensor
210                           << "]";
211             }
212         }
213     }
214     return true;
215 }
216 
initializeSensorAbnormalityStats(const AbnormalStatsInfo & abnormal_stats_info,const std::unordered_map<std::string,SensorInfo> & sensor_info_map_)217 bool ThermalStatsHelper::initializeSensorAbnormalityStats(
218         const AbnormalStatsInfo &abnormal_stats_info,
219         const std::unordered_map<std::string, SensorInfo> &sensor_info_map_) {
220     std::unique_lock<std::shared_mutex> _lock(sensor_stats_mutex_);
221     auto &temp_range_info_map_ = sensor_stats.temp_range_info_map_;
222     for (const auto &sensors_temp_range_info : abnormal_stats_info.sensors_temp_range_infos) {
223         const auto &temp_range_info_ptr =
224                 std::make_shared<TempRangeInfo>(sensors_temp_range_info.temp_range_info);
225         for (const auto &sensor : sensors_temp_range_info.sensors) {
226             temp_range_info_map_[sensor] = temp_range_info_ptr;
227         }
228     }
229     auto &temp_stuck_info_map_ = sensor_stats.temp_stuck_info_map_;
230     for (const auto &sensors_temp_stuck_info : abnormal_stats_info.sensors_temp_stuck_infos) {
231         const auto &temp_stuck_info_ptr =
232                 std::make_shared<TempStuckInfo>(sensors_temp_stuck_info.temp_stuck_info);
233         for (const auto &sensor : sensors_temp_stuck_info.sensors) {
234             temp_stuck_info_map_[sensor] = temp_stuck_info_ptr;
235         }
236     }
237     const auto &default_temp_range_info_ptr =
238             abnormal_stats_info.default_temp_range_info
239                     ? std::make_shared<TempRangeInfo>(
240                               abnormal_stats_info.default_temp_range_info.value())
241                     : nullptr;
242     const auto &default_temp_stuck_info_ptr =
243             abnormal_stats_info.default_temp_stuck_info
244                     ? std::make_shared<TempStuckInfo>(
245                               abnormal_stats_info.default_temp_stuck_info.value())
246                     : nullptr;
247     for (const auto &sensor_info : sensor_info_map_) {
248         const auto &sensor = sensor_info.first;
249         if (default_temp_range_info_ptr && !temp_range_info_map_.count(sensor))
250             temp_range_info_map_[sensor] = default_temp_range_info_ptr;
251         if (default_temp_stuck_info_ptr && !temp_stuck_info_map_.count(sensor))
252             temp_stuck_info_map_[sensor] = default_temp_stuck_info_ptr;
253     }
254 
255     for (const auto &sensor_temp_stuck_info : temp_stuck_info_map_) {
256         sensor_stats.curr_temp_status_map_[sensor_temp_stuck_info.first] = {
257                 .temp = std::numeric_limits<float>::min(),
258                 .start_time = boot_clock::time_point::min(),
259                 .repeat_count = 0,
260         };
261     }
262     return true;
263 }
264 
updateStatsRecord(StatsRecord * stats_record,int new_state)265 void ThermalStatsHelper::updateStatsRecord(StatsRecord *stats_record, int new_state) {
266     const auto now = boot_clock::now();
267     const auto cur_state_duration = std::chrono::duration_cast<std::chrono::milliseconds>(
268             now - stats_record->cur_state_start_time);
269     LOG(VERBOSE) << "Adding duration " << cur_state_duration.count()
270                  << " for cur_state: " << stats_record->cur_state << " with value: "
271                  << stats_record->time_in_state_ms[stats_record->cur_state].count();
272     // Update last record end time
273     stats_record->time_in_state_ms[stats_record->cur_state] += cur_state_duration;
274     stats_record->cur_state_start_time = now;
275     stats_record->cur_state = new_state;
276 }
277 
updateSensorCdevRequestStats(std::string_view sensor,std::string_view cdev,int new_value)278 void ThermalStatsHelper::updateSensorCdevRequestStats(std::string_view sensor,
279                                                       std::string_view cdev, int new_value) {
280     std::unique_lock<std::shared_mutex> _lock(sensor_cdev_request_stats_map_mutex_);
281     if (!sensor_cdev_request_stats_map_.count(sensor.data()) ||
282         !sensor_cdev_request_stats_map_[sensor.data()].count(cdev.data())) {
283         return;
284     }
285     auto &request_stats = sensor_cdev_request_stats_map_[sensor.data()][cdev.data()];
286     for (auto &stats_by_threshold : request_stats.stats_by_custom_threshold) {
287         int value = calculateThresholdBucket(stats_by_threshold.thresholds, new_value);
288         if (value != stats_by_threshold.stats_record.cur_state) {
289             LOG(VERBOSE) << "Updating bindedCdev stats for sensor: " << sensor.data()
290                          << " , cooling_device: " << cdev.data() << " with new value: " << value;
291             updateStatsRecord(&stats_by_threshold.stats_record, value);
292         }
293     }
294 
295     if (request_stats.stats_by_default_threshold.has_value()) {
296         auto &stats_record = request_stats.stats_by_default_threshold.value();
297         if (new_value != stats_record.cur_state) {
298             LOG(VERBOSE) << "Updating bindedCdev stats for sensor: " << sensor.data()
299                          << " , cooling_device: " << cdev.data()
300                          << " with new value: " << new_value;
301             updateStatsRecord(&stats_record, new_value);
302         }
303     }
304 }
305 
updateSensorTempStatsByThreshold(std::string_view sensor,float temperature)306 void ThermalStatsHelper::updateSensorTempStatsByThreshold(std::string_view sensor,
307                                                           float temperature) {
308     std::unique_lock<std::shared_mutex> _lock(sensor_stats_mutex_);
309     verifySensorAbnormality(sensor, temperature);
310     auto &temp_stats_map_ = sensor_stats.temp_stats_map_;
311     if (!temp_stats_map_.count(sensor.data())) {
312         return;
313     }
314     auto &sensor_temp_stats = temp_stats_map_[sensor.data()];
315     for (auto &stats_by_threshold : sensor_temp_stats.stats_by_custom_threshold) {
316         int value = calculateThresholdBucket(stats_by_threshold.thresholds, temperature);
317         if (value != stats_by_threshold.stats_record.cur_state) {
318             LOG(VERBOSE) << "Updating sensor stats for sensor: " << sensor.data()
319                          << " with value: " << value;
320             updateStatsRecord(&stats_by_threshold.stats_record, value);
321         }
322     }
323     if (temperature > sensor_temp_stats.max_temp) {
324         sensor_temp_stats.max_temp = temperature;
325         sensor_temp_stats.max_temp_timestamp = system_clock::now();
326     }
327     if (temperature < sensor_temp_stats.min_temp) {
328         sensor_temp_stats.min_temp = temperature;
329         sensor_temp_stats.min_temp_timestamp = system_clock::now();
330     }
331 }
332 
updateSensorTempStatsBySeverity(std::string_view sensor,const ThrottlingSeverity & severity)333 void ThermalStatsHelper::updateSensorTempStatsBySeverity(std::string_view sensor,
334                                                          const ThrottlingSeverity &severity) {
335     std::unique_lock<std::shared_mutex> _lock(sensor_stats_mutex_);
336     auto &temp_stats_map_ = sensor_stats.temp_stats_map_;
337     if (temp_stats_map_.count(sensor.data()) &&
338         temp_stats_map_[sensor.data()].stats_by_default_threshold.has_value()) {
339         auto &stats_record = temp_stats_map_[sensor.data()].stats_by_default_threshold.value();
340         int value = static_cast<int>(severity);
341         if (value != stats_record.cur_state) {
342             LOG(VERBOSE) << "Updating sensor stats for sensor: " << sensor.data()
343                          << " with value: " << value;
344             updateStatsRecord(&stats_record, value);
345         }
346     }
347 }
348 
verifySensorAbnormality(std::string_view sensor,float temp)349 void ThermalStatsHelper::verifySensorAbnormality(std::string_view sensor, float temp) {
350     LOG(VERBOSE) << "Verify sensor abnormality for " << sensor << " with temp " << temp;
351     if (sensor_stats.temp_range_info_map_.count(sensor.data())) {
352         const auto &temp_range_info = sensor_stats.temp_range_info_map_[sensor.data()];
353         if (temp < temp_range_info->min_temp_threshold) {
354             LOG(ERROR) << "Outlier Temperature Detected, sensor: " << sensor.data()
355                        << " temp: " << temp << " < " << temp_range_info->min_temp_threshold;
356             reportThermalAbnormality(ThermalSensorAbnormalityDetected::EXTREME_LOW_TEMP, sensor,
357                                      std::round(temp));
358         } else if (temp > temp_range_info->max_temp_threshold) {
359             LOG(ERROR) << "Outlier Temperature Detected, sensor: " << sensor.data()
360                        << " temp: " << temp << " > " << temp_range_info->max_temp_threshold;
361             reportThermalAbnormality(ThermalSensorAbnormalityDetected::EXTREME_HIGH_TEMP, sensor,
362                                      std::round(temp));
363         }
364     }
365     if (sensor_stats.temp_stuck_info_map_.count(sensor.data())) {
366         const auto &temp_stuck_info = sensor_stats.temp_stuck_info_map_[sensor.data()];
367         auto &curr_temp_status = sensor_stats.curr_temp_status_map_[sensor.data()];
368         LOG(VERBOSE) << "Current Temp Status: temp=" << curr_temp_status.temp
369                      << " repeat_count=" << curr_temp_status.repeat_count
370                      << " start_time=" << curr_temp_status.start_time.time_since_epoch().count();
371         if (std::fabs(curr_temp_status.temp - temp) <= kPrecisionThreshold) {
372             curr_temp_status.repeat_count++;
373             if (temp_stuck_info->min_polling_count <= curr_temp_status.repeat_count) {
374                 auto time_elapsed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
375                         boot_clock::now() - curr_temp_status.start_time);
376                 if (temp_stuck_info->min_stuck_duration <= time_elapsed_ms) {
377                     LOG(ERROR) << "Stuck Temperature Detected, sensor: " << sensor.data()
378                                << " temp: " << temp << " repeated "
379                                << temp_stuck_info->min_polling_count << " times for "
380                                << time_elapsed_ms.count() << "ms";
381                     if (reportThermalAbnormality(ThermalSensorAbnormalityDetected::SENSOR_STUCK,
382                                                  sensor, std::round(temp))) {
383                         // reset current status to verify for sensor stuck with start time as
384                         // current polling
385                         resetCurrentTempStatus(&curr_temp_status, temp);
386                     }
387                 }
388             }
389         } else {
390             resetCurrentTempStatus(&curr_temp_status, temp);
391         }
392     }
393 }
394 
reportStats()395 int ThermalStatsHelper::reportStats() {
396     const auto curTime = boot_clock::now();
397     const auto since_last_total_stats_update_ms =
398             std::chrono::duration_cast<std::chrono::milliseconds>(curTime -
399                                                                   last_total_stats_report_time);
400     LOG(VERBOSE) << "Duration from last total stats update is: "
401                  << since_last_total_stats_update_ms.count();
402     if (since_last_total_stats_update_ms < kUpdateIntervalMs) {
403         LOG(VERBOSE) << "Time elapsed since last update less than " << kUpdateIntervalMs.count();
404         return 0;
405     }
406 
407     const std::shared_ptr<IStats> stats_client = getStatsService();
408     if (!stats_client) {
409         LOG(ERROR) << "Unable to get AIDL Stats service";
410         return -1;
411     }
412     int count_failed_reporting =
413             reportAllSensorTempStats(stats_client) + reportAllSensorCdevRequestStats(stats_client);
414     last_total_stats_report_time = curTime;
415     abnormal_stats_reported_per_update_interval = 0;
416     return count_failed_reporting;
417 }
418 
reportAllSensorTempStats(const std::shared_ptr<IStats> & stats_client)419 int ThermalStatsHelper::reportAllSensorTempStats(const std::shared_ptr<IStats> &stats_client) {
420     int count_failed_reporting = 0;
421     std::unique_lock<std::shared_mutex> _lock(sensor_stats_mutex_);
422     for (auto &[sensor, temp_stats] : sensor_stats.temp_stats_map_) {
423         for (size_t threshold_set_idx = 0;
424              threshold_set_idx < temp_stats.stats_by_custom_threshold.size(); threshold_set_idx++) {
425             auto &stats_by_threshold = temp_stats.stats_by_custom_threshold[threshold_set_idx];
426             std::string sensor_name = stats_by_threshold.logging_name.value_or(
427                     sensor + kCustomThresholdSetSuffix.data() + std::to_string(threshold_set_idx));
428             if (!reportSensorTempStats(stats_client, sensor_name, temp_stats,
429                                        &stats_by_threshold.stats_record)) {
430                 count_failed_reporting++;
431             }
432         }
433         if (temp_stats.stats_by_default_threshold.has_value()) {
434             if (!reportSensorTempStats(stats_client, sensor, temp_stats,
435                                        &temp_stats.stats_by_default_threshold.value())) {
436                 count_failed_reporting++;
437             }
438         }
439         // Reset temp stats after reporting
440         temp_stats.max_temp = std::numeric_limits<float>::min();
441         temp_stats.min_temp = std::numeric_limits<float>::max();
442     }
443     return count_failed_reporting;
444 }
445 
reportSensorTempStats(const std::shared_ptr<IStats> & stats_client,std::string_view sensor,const SensorTempStats & sensor_temp_stats,StatsRecord * stats_record)446 bool ThermalStatsHelper::reportSensorTempStats(const std::shared_ptr<IStats> &stats_client,
447                                                std::string_view sensor,
448                                                const SensorTempStats &sensor_temp_stats,
449                                                StatsRecord *stats_record) {
450     LOG(VERBOSE) << "Reporting sensor stats for " << sensor;
451     // maintain a copy in case reporting fails
452     StatsRecord thermal_stats_before_reporting = *stats_record;
453     std::vector<VendorAtomValue> values(2);
454     values[0].set<VendorAtomValue::stringValue>(sensor);
455     std::vector<int64_t> time_in_state_ms = processStatsRecordForReporting(stats_record);
456     const auto since_last_update_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
457             stats_record->cur_state_start_time - stats_record->last_stats_report_time);
458     values[1].set<VendorAtomValue::longValue>(since_last_update_ms.count());
459     VendorAtomValue tmp;
460     for (auto &time_in_state : time_in_state_ms) {
461         tmp.set<VendorAtomValue::longValue>(time_in_state);
462         values.push_back(tmp);
463     }
464     auto remaining_residency_buckets_count = kMaxStatsResidencyCount - time_in_state_ms.size();
465     if (remaining_residency_buckets_count > 0) {
466         tmp.set<VendorAtomValue::longValue>(0);
467         values.insert(values.end(), remaining_residency_buckets_count, tmp);
468     }
469     tmp.set<VendorAtomValue::floatValue>(sensor_temp_stats.max_temp);
470     values.push_back(tmp);
471     tmp.set<VendorAtomValue::longValue>(
472             system_clock::to_time_t(sensor_temp_stats.max_temp_timestamp));
473     values.push_back(tmp);
474     tmp.set<VendorAtomValue::floatValue>(sensor_temp_stats.min_temp);
475     values.push_back(tmp);
476     tmp.set<VendorAtomValue::longValue>(
477             system_clock::to_time_t(sensor_temp_stats.min_temp_timestamp));
478     values.push_back(tmp);
479 
480     if (!reportAtom(stats_client, PixelAtoms::Atom::kVendorTempResidencyStats, std::move(values))) {
481         LOG(ERROR) << "Unable to report VendorTempResidencyStats to Stats service for "
482                       "sensor: "
483                    << sensor;
484         *stats_record = restoreStatsRecordOnFailure(std::move(thermal_stats_before_reporting));
485         return false;
486     }
487     // Update last time of stats reporting
488     stats_record->last_stats_report_time = boot_clock::now();
489     return true;
490 }
491 
reportAllSensorCdevRequestStats(const std::shared_ptr<IStats> & stats_client)492 int ThermalStatsHelper::reportAllSensorCdevRequestStats(
493         const std::shared_ptr<IStats> &stats_client) {
494     int count_failed_reporting = 0;
495     std::unique_lock<std::shared_mutex> _lock(sensor_cdev_request_stats_map_mutex_);
496     for (auto &[sensor, cdev_request_stats_map] : sensor_cdev_request_stats_map_) {
497         for (auto &[cdev, request_stats] : cdev_request_stats_map) {
498             for (size_t threshold_set_idx = 0;
499                  threshold_set_idx < request_stats.stats_by_custom_threshold.size();
500                  threshold_set_idx++) {
501                 auto &stats_by_threshold =
502                         request_stats.stats_by_custom_threshold[threshold_set_idx];
503                 std::string cdev_name = stats_by_threshold.logging_name.value_or(
504                         cdev + kCustomThresholdSetSuffix.data() +
505                         std::to_string(threshold_set_idx));
506                 if (!reportSensorCdevRequestStats(stats_client, sensor, cdev_name,
507                                                   &stats_by_threshold.stats_record)) {
508                     count_failed_reporting++;
509                 }
510             }
511 
512             if (request_stats.stats_by_default_threshold.has_value()) {
513                 if (!reportSensorCdevRequestStats(
514                             stats_client, sensor, cdev,
515                             &request_stats.stats_by_default_threshold.value())) {
516                     count_failed_reporting++;
517                 }
518             }
519         }
520     }
521     return count_failed_reporting;
522 }
523 
reportSensorCdevRequestStats(const std::shared_ptr<IStats> & stats_client,std::string_view sensor,std::string_view cdev,StatsRecord * stats_record)524 bool ThermalStatsHelper::reportSensorCdevRequestStats(const std::shared_ptr<IStats> &stats_client,
525                                                       std::string_view sensor,
526                                                       std::string_view cdev,
527                                                       StatsRecord *stats_record) {
528     LOG(VERBOSE) << "Reporting bindedCdev stats for sensor: " << sensor
529                  << " cooling_device: " << cdev;
530     // maintain a copy in case reporting fails
531     StatsRecord thermal_stats_before_reporting = *stats_record;
532     std::vector<VendorAtomValue> values(3);
533     values[0].set<VendorAtomValue::stringValue>(sensor);
534     values[1].set<VendorAtomValue::stringValue>(cdev);
535     std::vector<int64_t> time_in_state_ms = processStatsRecordForReporting(stats_record);
536     const auto since_last_update_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
537             stats_record->cur_state_start_time - stats_record->last_stats_report_time);
538     values[2].set<VendorAtomValue::longValue>(since_last_update_ms.count());
539     VendorAtomValue tmp;
540     for (auto &time_in_state : time_in_state_ms) {
541         tmp.set<VendorAtomValue::longValue>(time_in_state);
542         values.push_back(tmp);
543     }
544 
545     if (!reportAtom(stats_client, PixelAtoms::Atom::kVendorSensorCoolingDeviceStats,
546                     std::move(values))) {
547         LOG(ERROR) << "Unable to report VendorSensorCoolingDeviceStats to Stats "
548                       "service for sensor: "
549                    << sensor << " cooling_device: " << cdev;
550         *stats_record = restoreStatsRecordOnFailure(std::move(thermal_stats_before_reporting));
551         return false;
552     }
553     // Update last time of stats reporting
554     stats_record->last_stats_report_time = boot_clock::now();
555     return true;
556 }
557 
processStatsRecordForReporting(StatsRecord * stats_record)558 std::vector<int64_t> ThermalStatsHelper::processStatsRecordForReporting(StatsRecord *stats_record) {
559     // update the last unclosed entry and start new record with same state
560     updateStatsRecord(stats_record, stats_record->cur_state);
561     std::vector<std::chrono::milliseconds> &time_in_state_ms = stats_record->time_in_state_ms;
562     // convert std::chrono::milliseconds time_in_state to int64_t vector for reporting
563     std::vector<int64_t> stats_residency(time_in_state_ms.size());
564     std::transform(time_in_state_ms.begin(), time_in_state_ms.end(), stats_residency.begin(),
565                    [](std::chrono::milliseconds time_ms) { return time_ms.count(); });
566     // clear previous stats
567     std::fill(time_in_state_ms.begin(), time_in_state_ms.end(), std::chrono::milliseconds::zero());
568     return stats_residency;
569 }
570 
reportThermalAbnormality(const ThermalSensorAbnormalityDetected::AbnormalityType & type,std::string_view name,std::optional<int> reading)571 bool ThermalStatsHelper::reportThermalAbnormality(
572         const ThermalSensorAbnormalityDetected::AbnormalityType &type, std::string_view name,
573         std::optional<int> reading) {
574     const auto value_str = reading.has_value() ? std::to_string(reading.value()) : "undefined";
575     if (abnormal_stats_reported_per_update_interval >= kMaxAbnormalLoggingPerUpdateInterval) {
576         LOG(ERROR) << "Thermal abnormal atom logging rate limited for " << name.data()
577                    << " with value " << value_str;
578         return true;
579     }
580     const std::shared_ptr<IStats> stats_client = getStatsService();
581     if (!stats_client) {
582         LOG(ERROR) << "Unable to get AIDL Stats service";
583         return false;
584     }
585     std::vector<VendorAtomValue> values(3);
586     values[ThermalSensorAbnormalityDetected::kTypeFieldNumber - kVendorAtomOffset] =
587             VendorAtomValue::make<VendorAtomValue::intValue>(type);
588     values[ThermalSensorAbnormalityDetected::kSensorFieldNumber - kVendorAtomOffset] =
589             VendorAtomValue::make<VendorAtomValue::stringValue>(name);
590     if (reading.has_value()) {
591         values[ThermalSensorAbnormalityDetected::kTempFieldNumber - kVendorAtomOffset] =
592                 VendorAtomValue::make<VendorAtomValue::intValue>(reading.value());
593     }
594 
595     // Dump additional traces before reporting abnormal event
596     if (thermal_helper_handle_) {
597         thermal_helper_handle_->dumpTraces(name);
598     }
599 
600     if (!reportAtom(stats_client, PixelAtoms::Atom::kThermalSensorAbnormalityDetected,
601                     std::move(values))) {
602         LOG(ERROR) << "Failed to log thermal abnormal atom for " << name.data() << " with value "
603                    << value_str;
604         return false;
605     }
606     LOG(INFO) << "Thermal abnormality reported for " << name.data() << " with value " << value_str;
607     abnormal_stats_reported_per_update_interval++;
608     return true;
609 }
610 
reportAtom(const std::shared_ptr<IStats> & stats_client,const int32_t & atom_id,std::vector<VendorAtomValue> && values)611 bool ThermalStatsHelper::reportAtom(const std::shared_ptr<IStats> &stats_client,
612                                     const int32_t &atom_id, std::vector<VendorAtomValue> &&values) {
613     LOG(VERBOSE) << "Reporting thermal stats for atom_id " << atom_id;
614     // Send vendor atom to IStats HAL
615     VendorAtom event = {.reverseDomainName = "", .atomId = atom_id, .values = std::move(values)};
616     const ndk::ScopedAStatus ret = stats_client->reportVendorAtom(event);
617     return ret.isOk();
618 }
619 
restoreStatsRecordOnFailure(StatsRecord && stats_record_before_failure)620 StatsRecord ThermalStatsHelper::restoreStatsRecordOnFailure(
621         StatsRecord &&stats_record_before_failure) {
622     stats_record_before_failure.report_fail_count += 1;
623     // If consecutive count of failure is high, reset stat to avoid overflow
624     if (stats_record_before_failure.report_fail_count >= kMaxStatsReportingFailCount) {
625         return StatsRecord(stats_record_before_failure.time_in_state_ms.size(),
626                            stats_record_before_failure.cur_state);
627     } else {
628         return stats_record_before_failure;
629     }
630 }
631 
GetSensorTempStatsSnapshot()632 std::unordered_map<std::string, SensorTempStats> ThermalStatsHelper::GetSensorTempStatsSnapshot() {
633     auto sensor_temp_stats_snapshot = sensor_stats.temp_stats_map_;
634     for (auto &sensor_temp_stats_pair : sensor_temp_stats_snapshot) {
635         for (auto &temp_stats : sensor_temp_stats_pair.second.stats_by_custom_threshold) {
636             // update the last unclosed entry and start new record with same state
637             updateStatsRecord(&temp_stats.stats_record, temp_stats.stats_record.cur_state);
638         }
639         if (sensor_temp_stats_pair.second.stats_by_default_threshold.has_value()) {
640             auto &stats_by_default_threshold =
641                     sensor_temp_stats_pair.second.stats_by_default_threshold.value();
642             // update the last unclosed entry and start new record with same state
643             updateStatsRecord(&stats_by_default_threshold, stats_by_default_threshold.cur_state);
644         }
645     }
646     return sensor_temp_stats_snapshot;
647 }
648 
649 std::unordered_map<std::string, std::unordered_map<std::string, ThermalStats<int>>>
GetSensorCoolingDeviceRequestStatsSnapshot()650 ThermalStatsHelper::GetSensorCoolingDeviceRequestStatsSnapshot() {
651     auto sensor_cdev_request_stats_snapshot = sensor_cdev_request_stats_map_;
652     for (auto &sensor_cdev_request_stats_pair : sensor_cdev_request_stats_snapshot) {
653         for (auto &cdev_request_stats_pair : sensor_cdev_request_stats_pair.second) {
654             for (auto &request_stats : cdev_request_stats_pair.second.stats_by_custom_threshold) {
655                 // update the last unclosed entry and start new record with same state
656                 updateStatsRecord(&request_stats.stats_record,
657                                   request_stats.stats_record.cur_state);
658             }
659             if (cdev_request_stats_pair.second.stats_by_default_threshold.has_value()) {
660                 auto &stats_by_default_threshold =
661                         cdev_request_stats_pair.second.stats_by_default_threshold.value();
662                 // update the last unclosed entry and start new record with same state
663                 updateStatsRecord(&stats_by_default_threshold,
664                                   stats_by_default_threshold.cur_state);
665             }
666         }
667     }
668     return sensor_cdev_request_stats_snapshot;
669 }
670 
671 }  // namespace implementation
672 }  // namespace thermal
673 }  // namespace hardware
674 }  // namespace android
675 }  // namespace aidl
676