• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/**
2 * Copyright 2019-2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17syntax = "proto3";
18
19package debugger;
20
21import "debug_graph.proto";
22
23service EventListener {
24  rpc WaitCMD (Metadata) returns (EventReply) {};
25  rpc SendMetadata (Metadata) returns (EventReply) {};
26  rpc SendGraph (stream Chunk) returns (EventReply) {};
27  rpc SendTensors (stream TensorProto) returns (EventReply) {};
28  rpc SendTensorBase (stream TensorBase) returns (EventReply) {};
29  rpc SendTensorStats (stream TensorSummary) returns (EventReply) {};
30  rpc SendWatchpointHits (stream WatchpointHit) returns (EventReply) {};
31  rpc SendMultiGraphs (stream Chunk) returns (EventReply) {};
32  rpc SendHeartbeat (Heartbeat) returns (EventReply) {};
33}
34
35message Metadata {
36  string device_name = 1;
37  int32 cur_step = 2;
38  // define the backend is 'GPU' or "Ascend"
39  string backend = 3;
40  // the full name of current node
41  string cur_node = 4;
42  // check if training is done.
43  bool training_done = 5;
44  // the number of total graphs
45  int32 graph_num = 6;
46  // mindspore version
47  string ms_version = 7;
48}
49
50message Chunk {
51  bytes buffer = 1;
52  bool finished = 2;
53}
54
55message EventReply {
56  enum Status {
57    OK = 0;
58    FAILED = 1;
59    PENDING = 2;
60  }
61
62  Status status = 1;
63
64  oneof cmd {
65    bool exit = 2;
66    RunCMD run_cmd = 3;
67    SetCMD set_cmd = 4;
68    ViewCMD view_cmd = 5;
69    bool version_matched = 6;
70  }
71}
72
73message RunCMD {
74  // step level or node level.  "step" or "node"
75  string run_level = 1;
76  oneof cmd {
77    int32 run_steps = 2;
78    // the next node full name
79    string node_name = 3;
80  }
81}
82
83message SetCMD {
84  repeated WatchNode watch_nodes = 1;
85  WatchCondition watch_condition = 2;
86  bool delete = 3;
87  int32 id = 4;
88}
89
90message ViewCMD {
91  repeated TensorProto tensors = 1;
92  enum Level {
93    value = 0;
94    statistics = 1;
95    base = 2;
96  }
97  Level level = 2;
98}
99
100message WatchCondition {
101  enum Condition {
102    nan = 0;
103    inf = 1;
104    overflow = 2;
105    max_gt = 3;
106    max_lt = 4;
107    min_gt = 5;
108    min_lt = 6;
109    max_min_gt = 7;
110    max_min_lt = 8;
111    mean_gt = 9;
112    mean_lt = 10;
113    sd_gt = 11;
114    sd_lt = 12;
115    tensor_general_overflow = 13;
116    tensor_initialization = 14;
117    tensor_too_large = 15;
118    tensor_too_small = 16;
119    tensor_all_zero = 17;
120    tensor_change_too_large = 18;
121    tensor_change_too_small = 19;
122    tensor_not_changed = 20;
123    tensor_range = 21;
124  }
125  Condition condition = 1;
126  float value = 2;
127  message Parameter {
128    string name = 1;
129    bool disabled = 2;
130    double value = 3;
131    bool hit = 4;  // Whether this parameter is hit when checking tensor.
132    double actual_value = 5;
133  }
134  repeated Parameter params = 4;
135}
136
137message WatchNode {
138  string node_name = 1;
139  string node_type = 2;
140}
141
142message WatchpointHit {
143  TensorProto tensor = 1;
144  WatchCondition watch_condition = 2;
145  int32 id = 3;
146  int32 error_code = 4;
147}
148
149message Heartbeat {
150  string message = 1;
151  int32 period = 2;
152}
153
154message TensorSummary{
155 TensorBase tensor_base = 1;
156 Statistics statistics = 2;
157}
158
159message Statistics {
160 bool is_bool = 1;
161 float max_value = 2;
162 float min_value = 3;
163 float avg_value = 4;
164 int32 count = 5;
165 int32 neg_zero_count = 6;
166 int32 pos_zero_count = 7;
167 int32 nan_count = 8;
168 int32 neg_inf_count = 9;
169 int32 pos_inf_count = 10;
170 int32 zero_count = 11;
171}
172
173message TensorBase{
174 int32 data_type = 1;
175 repeated int64 shape = 2;
176 int64 data_size = 3;
177}