• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CPU_SAMPLING_H
17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_CPU_SAMPLING_H
18 
19 #include <memory>
20 #include <string>
21 #include <unordered_map>
22 #include <vector>
23 #include <nlohmann/json.hpp>
24 #include "minddata/dataset/engine/perf/profiling.h"
25 #include "minddata/dataset/engine/datasetops/dataset_op.h"
26 
27 namespace mindspore {
28 namespace dataset {
29 class ExecutionTree;
30 
31 // CPU information from /proc/stat or /proc/pid/stat file
32 typedef struct CpuStat_s {
33   uint64_t user_stat_;
34   uint64_t sys_stat_;
35   uint64_t io_stat_;
36   uint64_t idle_stat_;
37   uint64_t total_stat_;
38 } CpuStat;
39 
40 // Cpu utilization
41 typedef struct CpuInfo_s {
42   uint8_t user_utilization_;
43   uint8_t sys_utilization_;
44   uint8_t io_utilization_;
45   uint8_t idle_utilization_;
46 } CpuUtil;
47 
48 // CPU utilization of operator
49 typedef struct CpuOpInfo_s {
50   float user_utilization_;
51   float sys_utilization_;
52   int32_t op_id_;
53 } CpuOpUtil;
54 
55 // CPU utilization of process
56 typedef struct CpuProcessInfo_s {
57   float user_utilization_;
58   float sys_utilization_;
59 } CpuProcessUtil;
60 
61 // CPU stat of operator
62 typedef struct CpuOpStat_s {
63   uint64_t user_stat_;
64   uint64_t sys_stat_;
65 } CpuOpStat;
66 
67 class BaseCpu {
68  public:
69   BaseCpu();
70   ~BaseCpu() = default;
71   // Collect CPU information
72   virtual Status Collect(const ExecutionTree *tree) = 0;
73   virtual Status SaveToFile(const std::string &file_path) = 0;
74   virtual Status Analyze(std::string *name, double *utilization, std::string *extra_message) = 0;
75   // Get the total CPU time of device
76   Status GetTotalCpuTime(uint64_t *total_stat);
77 
78  protected:
79   std::vector<CpuUtil> cpu_util_;
80   CpuStat pre_cpu_stat_;
81   static bool fetched_all_process_shared_;
82   static std::unordered_map<int32_t, std::vector<pid_t>> op_process_shared_;
83   bool fetched_all_process_;
84   bool pre_fetched_state_;
85   std::unordered_map<int32_t, std::vector<pid_t>> op_process_;
86   int32_t cpu_processor_num_;
87 };
88 
89 // Collect device CPU information
90 class DeviceCpu : public BaseCpu {
91  public:
DeviceCpu()92   DeviceCpu() : pre_running_process_(0), pre_context_switch_count_(0), first_collect_(true) {}
93   ~DeviceCpu() = default;
94   Status Collect(const ExecutionTree *tree) override;
95   Status SaveToFile(const std::string &file_path) override;
96   Status Analyze(std::string *name, double *utilization, std::string *extra_message) override;
97 
98  private:
99   // Get CPU information, include use/sys/idle/io utilization
100   Status ParseCpuInfo(const std::string &str);
101 
102   // Get context switch count
103   Status ParseCtxt(const std::string &str);
104 
105   // Get running process count
106   Status ParseRunningProcess(const std::string &str);
107 
108   std::vector<uint32_t> running_process_;
109   std::vector<uint64_t> context_switch_count_;
110   uint32_t pre_running_process_;
111   uint64_t pre_context_switch_count_;
112   bool first_collect_;
113 };
114 
115 // Collect operator CPU information
116 class OperatorCpu : public BaseCpu {
117  public:
OperatorCpu()118   OperatorCpu() : first_collect_(true), pre_total_stat_(0), id_count_(0) {}
119   ~OperatorCpu() = default;
120   Status Collect(const ExecutionTree *tree) override;
121   Status SaveToFile(const std::string &file_path) override;
122   // Analyze will output the name of the metric, the avg utiliization of highest
123   // object within the class and any extra message that would be useful for the user.
124   // The Higher level CPUSampling class will combine information from different classes
125   // to decide if warning should be output.
126   Status Analyze(std::string *name, double *utilization, std::string *extra_message) override;
127 
128  private:
129   // Get cpu information, include use/sys/idle/io utilization
130   Status ParseCpuInfo(int32_t op_id, int64_t thread_id,
131                       std::unordered_map<int32_t, std::unordered_map<int64_t, CpuOpStat>> *op_stat);
132 
133   // Store the CPU utilization of each operator
134   std::vector<std::vector<CpuOpUtil>> cpu_op_util_;
135 
136   bool first_collect_;
137 
138   // Store the id and its corresponding threads.
139   std::unordered_map<int32_t, std::vector<pid_t>> op_thread_;
140   std::unordered_map<int32_t, std::string> op_name_;
141   std::unordered_map<int32_t, int32_t> op_parallel_workers_;
142   std::unordered_map<int32_t, std::unordered_map<int64_t, CpuOpStat>> pre_op_stat_;
143   uint64_t pre_total_stat_;
144   int32_t id_count_;
145 };
146 
147 // Collect operator CPU information
148 class ProcessCpu : public BaseCpu {
149  public:
ProcessCpu()150   ProcessCpu() : first_collect_(true), pre_total_stat_(0) {}
151   ~ProcessCpu() = default;
152   Status Collect(const ExecutionTree *tree) override;
153   Status SaveToFile(const std::string &file_path) override;
154   Status Analyze(std::string *name, double *utilization, std::string *extra_message) override;
155 
156  private:
157   // Get CPU information, include use/sys/idle/io utilization
158   Status ParseCpuInfo();
159 
160   bool first_collect_;
161   std::vector<CpuProcessUtil> process_util_;
162   uint64_t pre_total_stat_;
163   std::unordered_map<int64_t, CpuOpStat> pre_process_stat_;
164   std::vector<pid_t> process_id_;
165 };
166 
167 // Sampling CPU information
168 // It support JSON serialization for external usage.
169 class CpuSampling : public Sampling {
170   using TimeStamp = std::vector<uint32_t>;
171 
172  public:
CpuSampling(ExecutionTree * tree)173   explicit CpuSampling(ExecutionTree *tree) : tree_(tree) {}
174 
175   ~CpuSampling() = default;
176 
177   // Driver function for CPU sampling.
178   // This function samples the CPU information of device/process/op
179   Status Sample() override;
180 
Name()181   std::string Name() const override { return kCpuSamplingName; }
182 
183   // Save sampling data to file
184   // @return Status - The error code return
185   Status SaveToFile() override;
186 
187   Status Init(const std::string &dir_path, const std::string &device_id) override;
188 
189   // Change file mode after save CPU data
190   Status ChangeFileMode() override;
191 
192   // Analyze sampling data and print message to log
193   Status Analyze() override;
194 
195  private:
196   Status CollectTimeStamp();
197 
198   Status SaveTimeStampToFile();
199 
200   Status SaveSamplingItervalToFile();
201 
202   ExecutionTree *tree_ = nullptr;              // ExecutionTree pointer
203   std::vector<std::shared_ptr<BaseCpu>> cpu_;  // CPU information of device/process/op
204   TimeStamp time_stamp_;                       // Time stamp
205 };
206 
207 }  // namespace dataset
208 }  // namespace mindspore
209 
210 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_CPU_SAMPLING_H
211