• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021-2022 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_PERF_CPU_SAMPLER_H_
17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_PERF_CPU_SAMPLER_H_
18 
19 #include <memory>
20 #include <string>
21 #include <unordered_map>
22 #include <vector>
23 #include <nlohmann/json.hpp>
24 #include "minddata/dataset/engine/perf/profiling.h"
25 #include "minddata/dataset/engine/datasetops/dataset_op.h"
26 
27 namespace mindspore {
28 namespace dataset {
29 
30 class ExecutionTree;
31 
32 typedef struct SystemStat_s {
33   uint64_t user_stat;
34   uint64_t sys_stat;
35   uint64_t io_stat;
36   uint64_t idle_stat;
37   uint64_t total_stat;
38 } SystemStat;
39 
40 typedef struct SystemUtil_s {
41   uint8_t user_utilization;
42   uint8_t sys_utilization;
43   uint8_t io_utilization;
44   uint8_t idle_utilization;
45 } SystemUtil;
46 
47 typedef struct TaskStat_s {
48   uint64_t user_stat;
49   uint64_t sys_stat;
50 } TaskStat;
51 
52 struct TaskUtil_s {
53   float user_utilization;
54   float sys_utilization;
55 };
56 
57 typedef struct MemoryInfo_s {
58   float vss;
59   float rss;
60   float pss;
61 } MemoryInfo;
62 
63 typedef struct SystemMemInfo_s {
64   float total_mem;
65   float available_mem;
66   float used_mem;
67 } SystemMemInfo;
68 
69 typedef struct TaskUtil_s TaskUtil;
70 typedef struct TaskUtil_s OpUtil;
71 
72 class SystemInfo {
73  public:
SystemInfo()74   SystemInfo()
75       : first_sample_(true),
76         prev_context_switch_count_(0),
77         last_mem_sampling_failed_(false),
78         prev_system_memory_info_({0, 0, 0}) {}
79   // Read in current stats and return previous and currently read stats
80   Status SampleAndGetCurrPrevStat(SystemStat *current_stat, SystemStat *previous_stat);
81   static int32_t num_cpu_;
GetRunningProcess()82   const std::vector<uint32_t> &GetRunningProcess() const { return running_process_; }
GetContextSwitchCount()83   const std::vector<uint64_t> &GetContextSwitchCount() const { return context_switch_count_; }
84   Status GetUserCpuUtil(uint64_t start_index, uint64_t end_index, std::vector<uint8_t> *result) const;
85   Status GetSysCpuUtil(uint64_t start_index, uint64_t end_index, std::vector<uint8_t> *result) const;
86   std::vector<uint8_t> GetIOCpuUtil() const;
87   std::vector<uint8_t> GetIdleCpuUtil() const;
88   Status GetSystemMemInfo(SystemMemoryMetric metric, uint64_t start_index, uint64_t end_index,
89                           std::vector<float> *result) const;
90 
91  private:
92   Status ParseCpuInfo(const std::string &str);
93   Status ParseCtxt(const std::string &str);
94   Status ParseRunningProcess(const std::string &str);
95   Status SampleSystemMemInfo();
96   SystemStat prev_sys_stat_{};                  // last read data /proc/stat file
97   std::vector<SystemUtil> sys_cpu_util_;        // vector of system cpu utilization
98   std::vector<uint32_t> running_process_;       // vector of running processes in system
99   std::vector<uint64_t> context_switch_count_;  // vector of number of context switches between two sampling points
100   bool first_sample_;                           // flag to indicate first time sampling
101   uint64_t prev_context_switch_count_;          // last read context switch count from /proc/stat file
102   std::vector<SystemMemInfo> system_memory_info_;
103   SystemMemInfo prev_system_memory_info_;
104   bool last_mem_sampling_failed_;
105 };
106 
107 class TaskCpuInfo {
108  public:
TaskCpuInfo(pid_t pid)109   explicit TaskCpuInfo(pid_t pid)
110       : pid_(pid), prev_task_stat_(TaskStat{0, 0}), first_sample_(true), last_sampling_failed_(false) {}
111   virtual ~TaskCpuInfo() = default;
112   virtual Status Sample(uint64_t total_time_elapsed) = 0;
113   virtual pid_t GetId() = 0;
114   TaskUtil GetLatestCpuUtil() const;
115   std::vector<uint16_t> GetSysCpuUtil() const;
116   std::vector<uint16_t> GetUserCpuUtil() const;
117 
118  protected:
119   pid_t pid_;
120   TaskStat prev_task_stat_;
121   std::vector<TaskUtil> task_cpu_util_;
122   bool first_sample_;
123   bool last_sampling_failed_;
124 };
125 
126 class ProcessInfo : public TaskCpuInfo {
127  public:
128   explicit ProcessInfo(pid_t pid, bool track_history = false)
TaskCpuInfo(pid)129       : TaskCpuInfo(pid),
130         prev_memory_info_(MemoryInfo{0.0, 0.0, 0.0}),
131         last_mem_sampling_failed_(false),
132         track_sampled_history_(track_history) {}
133   ~ProcessInfo() override = default;
134   Status Sample(uint64_t total_time_elapsed) override;
GetId()135   pid_t GetId() override { return pid_; }
136   Status GetMemoryInfo(ProcessMemoryMetric metric, uint64_t start_index, uint64_t end_index,
137                        std::vector<float> *result) const;
138   void AddChildProcess(const std::shared_ptr<ProcessInfo> &child_ptr);
139 
140  private:
141   Status SampleMemInfo();
142   MemoryInfo GetLatestMemoryInfo() const;
143   MemoryInfo prev_memory_info_;
144   std::vector<MemoryInfo> process_memory_info_;
145   std::vector<std::shared_ptr<ProcessInfo>> child_processes_;
146   bool IsParent();
147   bool last_mem_sampling_failed_;
148   bool track_sampled_history_;
149 };
150 
151 class ThreadCpuInfo : public TaskCpuInfo {
152  public:
ThreadCpuInfo(pid_t pid,pid_t tid)153   explicit ThreadCpuInfo(pid_t pid, pid_t tid) : TaskCpuInfo(pid), tid_(tid) {}
154   ~ThreadCpuInfo() override = default;
155   Status Sample(uint64_t total_time_elapsed) override;
GetId()156   pid_t GetId() override { return tid_; }
157 
158  private:
159   pid_t tid_;
160 };
161 
162 class MDOperatorCpuInfo {
163  public:
164   void AddTask(const std::shared_ptr<TaskCpuInfo> &task_ptr);
165   bool TaskExists(pid_t id) const;
MDOperatorCpuInfo(const int32_t op_id)166   explicit MDOperatorCpuInfo(const int32_t op_id) : id_(op_id) {}
167   void CalculateOperatorUtilization();
168   Status GetUserCpuUtil(uint64_t start_index, uint64_t end_index, std::vector<uint16_t> *result) const;
169   Status GetSysCpuUtil(uint64_t start_index, uint64_t end_index, std::vector<uint16_t> *result) const;
170 
171  private:
172   int32_t id_;
173   // tid is key for threadinfo, pid is key for processinfo
174   std::unordered_map<pid_t, std::shared_ptr<TaskCpuInfo>> task_by_id_;
175   std::vector<OpUtil> op_cpu_util_;
176 };
177 
178 class CpuSampler : public Sampling {
179   using Timestamps = std::vector<uint64_t>;
180 
181  public:
CpuSampler(ExecutionTree * tree)182   explicit CpuSampler(ExecutionTree *tree) : fetched_all_python_multiprocesses_(false), tree(tree) {}
183   ~CpuSampler() = default;
184   Status Sample() override;
185   Status Init() override;
186   Status ChangeFileMode(const std::string &dir_path, const std::string &rank_id) override;
187   Status SaveToFile(const std::string &dir_path, const std::string &rank_id) override;
Name()188   std::string Name() const override { return kCpuSamplerName; }
189   Status GetSystemUserCpuUtil(uint64_t start_ts, uint64_t end_ts, std::vector<uint8_t> *result);
190   Status GetSystemSysCpuUtil(uint64_t start_ts, uint64_t end_ts, std::vector<uint8_t> *result);
191   Status GetOpUserCpuUtil(int32_t op_id, uint64_t start_ts, uint64_t end_ts, std::vector<uint16_t> *result);
192   Status GetOpSysCpuUtil(int32_t op_id, uint64_t start_ts, uint64_t end_ts, std::vector<uint16_t> *result);
193   Status GetProcessMemoryInfo(ProcessMemoryMetric metric, uint64_t start_index, uint64_t end_index,
194                               std::vector<float> *result);
195   Status GetSystemMemoryInfo(SystemMemoryMetric metric, uint64_t start_index, uint64_t end_index,
196                              std::vector<float> *result);
197 
198   // Clear all collected data
199   void Clear() override;
200 
201  private:
202   Status UpdateTaskList();
203   bool fetched_all_python_multiprocesses_{};
204   ExecutionTree *tree = nullptr;
205   pid_t main_pid_{};
206   Timestamps ts_;
207   SystemInfo sys_info_;                              // stores the system cpu utilization
208   std::vector<std::shared_ptr<TaskCpuInfo>> tasks_;  // vector of all process and thread tasks
209   std::shared_ptr<ThreadCpuInfo> main_thread_cpu_info_;
210   std::shared_ptr<ProcessInfo> main_process_info_;
211   std::unordered_map<int32_t, MDOperatorCpuInfo> op_info_by_id_;
212   Path GetFileName(const std::string &dir_path, const std::string &rank_id) override;
213 };
214 }  // namespace dataset
215 }  // namespace mindspore
216 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_PERF_CPU_SAMPLER_H_
217