1/** 2 * Copyright 2019-2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17syntax = "proto3"; 18 19package debugger; 20 21import "debug_graph.proto"; 22 23service EventListener { 24 rpc WaitCMD (Metadata) returns (EventReply) {}; 25 rpc SendMetadata (Metadata) returns (EventReply) {}; 26 rpc SendGraph (stream Chunk) returns (EventReply) {}; 27 rpc SendTensors (stream TensorProto) returns (EventReply) {}; 28 rpc SendTensorBase (stream TensorBase) returns (EventReply) {}; 29 rpc SendTensorStats (stream TensorSummary) returns (EventReply) {}; 30 rpc SendWatchpointHits (stream WatchpointHit) returns (EventReply) {}; 31 rpc SendMultiGraphs (stream Chunk) returns (EventReply) {}; 32 rpc SendHeartbeat (Heartbeat) returns (EventReply) {}; 33} 34 35message Metadata { 36 string device_name = 1; 37 int32 cur_step = 2; 38 // define the backend is 'GPU' or "Ascend" 39 string backend = 3; 40 // the full name of current node 41 string cur_node = 4; 42 // check if training is done. 43 bool training_done = 5; 44 // the number of total graphs 45 int32 graph_num = 6; 46 // mindspore version 47 string ms_version = 7; 48} 49 50message Chunk { 51 bytes buffer = 1; 52 bool finished = 2; 53} 54 55message EventReply { 56 enum Status { 57 OK = 0; 58 FAILED = 1; 59 PENDING = 2; 60 } 61 62 Status status = 1; 63 64 oneof cmd { 65 bool exit = 2; 66 RunCMD run_cmd = 3; 67 SetCMD set_cmd = 4; 68 ViewCMD view_cmd = 5; 69 bool version_matched = 6; 70 } 71} 72 73message RunCMD { 74 // step level or node level. "step" or "node" 75 string run_level = 1; 76 oneof cmd { 77 int32 run_steps = 2; 78 // the next node full name 79 string node_name = 3; 80 } 81} 82 83message SetCMD { 84 repeated WatchNode watch_nodes = 1; 85 WatchCondition watch_condition = 2; 86 bool delete = 3; 87 int32 id = 4; 88} 89 90message ViewCMD { 91 repeated TensorProto tensors = 1; 92 enum Level { 93 value = 0; 94 statistics = 1; 95 base = 2; 96 } 97 Level level = 2; 98} 99 100message WatchCondition { 101 enum Condition { 102 nan = 0; 103 inf = 1; 104 overflow = 2; 105 max_gt = 3; 106 max_lt = 4; 107 min_gt = 5; 108 min_lt = 6; 109 max_min_gt = 7; 110 max_min_lt = 8; 111 mean_gt = 9; 112 mean_lt = 10; 113 sd_gt = 11; 114 sd_lt = 12; 115 tensor_general_overflow = 13; 116 tensor_initialization = 14; 117 tensor_too_large = 15; 118 tensor_too_small = 16; 119 tensor_all_zero = 17; 120 tensor_change_too_large = 18; 121 tensor_change_too_small = 19; 122 tensor_not_changed = 20; 123 tensor_range = 21; 124 } 125 Condition condition = 1; 126 float value = 2; 127 message Parameter { 128 string name = 1; 129 bool disabled = 2; 130 double value = 3; 131 bool hit = 4; // Whether this parameter is hit when checking tensor. 132 double actual_value = 5; 133 } 134 repeated Parameter params = 4; 135} 136 137message WatchNode { 138 string node_name = 1; 139 string node_type = 2; 140} 141 142message WatchpointHit { 143 TensorProto tensor = 1; 144 WatchCondition watch_condition = 2; 145 int32 id = 3; 146 int32 error_code = 4; 147} 148 149message Heartbeat { 150 string message = 1; 151 int32 period = 2; 152} 153 154message TensorSummary{ 155 TensorBase tensor_base = 1; 156 Statistics statistics = 2; 157} 158 159message Statistics { 160 bool is_bool = 1; 161 float max_value = 2; 162 float min_value = 3; 163 float avg_value = 4; 164 int32 count = 5; 165 int32 neg_zero_count = 6; 166 int32 pos_zero_count = 7; 167 int32 nan_count = 8; 168 int32 neg_inf_count = 9; 169 int32 pos_inf_count = 10; 170 int32 zero_count = 11; 171} 172 173message TensorBase{ 174 int32 data_type = 1; 175 repeated int64 shape = 2; 176 int64 data_size = 3; 177}