• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef DEBUG_DBG_SERVICES_H_
17 #define DEBUG_DBG_SERVICES_H_
18 
19 #include <vector>
20 #include <string>
21 #include <map>
22 #include <memory>
23 #include <tuple>
24 #include <iostream>
25 #include <variant>
26 #include "pybind11/pybind11.h"
27 #include "pybind11/stl.h"
28 #include "pybind11/stl_bind.h"
29 
30 #include "utils/ms_utils.h"
31 #include "debug/debug_services.h"
32 namespace py = pybind11;
33 namespace common = mindspore::common;
34 
35 struct parameter_t {
parameter_tparameter_t36   parameter_t(const std::string &name, bool disabled, double value, bool hit, double actual_value)
37       : name(name), disabled(disabled), value(value), hit(hit), actual_value(actual_value) {}
get_nameparameter_t38   const std::string get_name() const { return name; }
get_disabledparameter_t39   const bool get_disabled() const { return disabled; }
get_valueparameter_t40   const double get_value() const { return value; }
get_hitparameter_t41   const bool get_hit() const { return hit; }
get_actual_valueparameter_t42   const double get_actual_value() const { return actual_value; }
43   std::string name;
44   bool disabled;
45   double value;
46   bool hit;
47   double actual_value;
48 };
49 
50 struct watchpoint_hit_t {
watchpoint_hit_twatchpoint_hit_t51   watchpoint_hit_t(const std::string &name, uint32_t slot, int condition, uint32_t watchpoint_id,
52                    const std::vector<parameter_t> &parameters, int32_t error_code, uint32_t rank_id,
53                    uint32_t root_graph_id)
54       : name(name),
55         slot(slot),
56         condition(condition),
57         watchpoint_id(watchpoint_id),
58         parameters(parameters),
59         error_code(error_code),
60         rank_id(rank_id),
61         root_graph_id(root_graph_id) {}
get_namewatchpoint_hit_t62   const std::string get_name() const { return name; }
get_slotwatchpoint_hit_t63   const uint32_t get_slot() const { return slot; }
get_conditionwatchpoint_hit_t64   const int get_condition() const { return condition; }
get_watchpoint_idwatchpoint_hit_t65   const uint32_t get_watchpoint_id() const { return watchpoint_id; }
get_parameterswatchpoint_hit_t66   const std::vector<parameter_t> get_parameters() const { return parameters; }
get_error_codewatchpoint_hit_t67   const int32_t get_error_code() const { return error_code; }
get_rank_idwatchpoint_hit_t68   const uint32_t get_rank_id() const { return rank_id; }
get_root_graph_idwatchpoint_hit_t69   const uint32_t get_root_graph_id() const { return root_graph_id; }
70   std::string name;
71   uint32_t slot;
72   int condition;
73   uint32_t watchpoint_id;
74   std::vector<parameter_t> parameters;
75   int32_t error_code;
76   uint32_t rank_id;
77   uint32_t root_graph_id;
78 };
79 
80 struct tensor_info_t {
tensor_info_ttensor_info_t81   tensor_info_t(const std::string &node_name, uint32_t slot, uint32_t iteration, uint32_t rank_id,
82                 uint32_t root_graph_id, bool is_output)
83       : node_name(node_name),
84         slot(slot),
85         iteration(iteration),
86         rank_id(rank_id),
87         root_graph_id(root_graph_id),
88         is_output(is_output) {}
get_node_nametensor_info_t89   const std::string get_node_name() const { return node_name; }
get_slottensor_info_t90   const uint32_t get_slot() const { return slot; }
get_iterationtensor_info_t91   const uint32_t get_iteration() const { return iteration; }
get_rank_idtensor_info_t92   const uint32_t get_rank_id() const { return rank_id; }
get_root_graph_idtensor_info_t93   const uint32_t get_root_graph_id() const { return root_graph_id; }
get_is_outputtensor_info_t94   const bool get_is_output() const { return is_output; }
95   std::string node_name;
96   uint32_t slot;
97   uint32_t iteration;
98   uint32_t rank_id;
99   uint32_t root_graph_id;
100   bool is_output;
101 };
102 
103 struct tensor_data_t {
tensor_data_ttensor_data_t104   tensor_data_t(const char *data_ptr, uint64_t data_size, int dtype, const std::vector<int64_t> &shape)
105       : data_size(data_size), dtype(dtype), shape(shape) {
106     if (data_ptr != nullptr) {
107       this->data_ptr = py::bytes(data_ptr, data_size);
108     } else {
109       this->data_ptr = py::bytes();
110     }
111   }
get_data_ptrtensor_data_t112   const py::bytes get_data_ptr() const { return data_ptr; }
get_data_sizetensor_data_t113   const uint64_t get_data_size() const { return data_size; }
get_dtypetensor_data_t114   const int get_dtype() const { return dtype; }
get_shapetensor_data_t115   const std::vector<int64_t> &get_shape() const { return shape; }
116   py::bytes data_ptr;
117   uint64_t data_size;
118   int dtype;
119   std::vector<int64_t> shape;
120 };
121 
122 struct TensorBaseData {
TensorBaseDataTensorBaseData123   TensorBaseData(uint64_t data_size, int dtype, const std::vector<int64_t> &shape)
124       : data_size_(data_size), dtype_(dtype), shape_(shape) {}
125 
data_sizeTensorBaseData126   const uint64_t data_size() const { return data_size_; }
dtypeTensorBaseData127   const int dtype() const { return dtype_; }
shapeTensorBaseData128   const std::vector<int64_t> &shape() const { return shape_; }
129   uint64_t data_size_;
130   int dtype_;
131   std::vector<int64_t> shape_;
132 };
133 
134 struct TensorStatData {
TensorStatDataTensorStatData135   TensorStatData(uint64_t data_size, int dtype, const std::vector<int64_t> &shape, bool is_bool, double max_value,
136                  double min_value, double avg_value, int count, int neg_zero_count, int pos_zero_count, int nan_count,
137                  int neg_inf_count, int pos_inf_count, int zero_count)
138       : data_size_(data_size),
139         dtype_(dtype),
140         shape_(shape),
141         is_bool_(is_bool),
142         max_value_(max_value),
143         min_value_(min_value),
144         avg_value_(avg_value),
145         count_(count),
146         neg_zero_count_(neg_zero_count),
147         pos_zero_count_(pos_zero_count),
148         nan_count_(nan_count),
149         neg_inf_count_(neg_inf_count),
150         pos_inf_count_(pos_inf_count),
151         zero_count_(zero_count) {}
152 
data_sizeTensorStatData153   const uint64_t data_size() const { return data_size_; }
dtypeTensorStatData154   const int dtype() const { return dtype_; }
shapeTensorStatData155   const std::vector<int64_t> &shape() const { return shape_; }
is_boolTensorStatData156   const bool is_bool() const { return is_bool_; }
max_valueTensorStatData157   const double max_value() const { return max_value_; }
min_valueTensorStatData158   const double min_value() const { return min_value_; }
avg_valueTensorStatData159   const double avg_value() const { return avg_value_; }
countTensorStatData160   const int count() const { return count_; }
neg_zero_countTensorStatData161   const int neg_zero_count() const { return neg_zero_count_; }
pos_zero_countTensorStatData162   const int pos_zero_count() const { return pos_zero_count_; }
nan_countTensorStatData163   const int nan_count() const { return nan_count_; }
neg_inf_countTensorStatData164   const int neg_inf_count() const { return neg_inf_count_; }
pos_inf_countTensorStatData165   const int pos_inf_count() const { return pos_inf_count_; }
zero_countTensorStatData166   const int zero_count() const { return zero_count_; }
167 
168   uint64_t data_size_;
169   int dtype_;
170   std::vector<int64_t> shape_;
171   bool is_bool_;
172   double max_value_;
173   double min_value_;
174   double avg_value_;
175   int count_;
176   int neg_zero_count_;
177   int pos_zero_count_;
178   int nan_count_;
179   int neg_inf_count_;
180   int pos_inf_count_;
181   int zero_count_;
182 };
183 
184 class DbgServices {
185  public:
186   DbgServices();
187 
188   DbgServices(const DbgServices &other);
189 
190   DbgServices &operator=(const DbgServices &other);
191 
192   ~DbgServices();
193 
194   int32_t Initialize(const std::string net_name, const std::string dump_folder_path, bool is_sync_mode,
195                      uint64_t max_mem_usage);
196 
197   int32_t AddWatchpoint(
198     unsigned int id, unsigned int watch_condition,
199     std::map<std::string, std::map<std::string, std::variant<bool, std::vector<std::string>>>> check_nodes,
200     std::vector<parameter_t> parameter_list);
201 
202   int32_t RemoveWatchpoint(unsigned int id);
203 
204   std::vector<watchpoint_hit_t> CheckWatchpoints(unsigned int iteration);
205 
206   std::vector<std::shared_ptr<TensorData>> ReadTensorsUtil(std::vector<tensor_info_t> info);
207 
208   std::vector<tensor_data_t> ReadTensors(const std::vector<tensor_info_t> info);
209 
210   std::vector<TensorBaseData> ReadTensorsBase(const std::vector<tensor_info_t> info);
211 
212   std::vector<TensorStatData> ReadTensorsStat(const std::vector<tensor_info_t> info);
213 
214   std::string GetVersion() const;
215 
216  private:
217   std::shared_ptr<DebugServices> debug_services_ = nullptr;
218 };
219 
220 #endif  // DEBUG_DBG_SERVICES_H_
221