1/** 2 * Copyright 2020 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17syntax = "proto3"; 18package mindspore.ps.core; 19option optimize_for = LITE_RUNTIME; 20 21enum NodeCommand { 22 TERMINATE = 0; 23 REGISTER = 1; 24 HEARTBEAT = 2; 25 SEND_DATA = 3; 26 // The worker or server asks the scheduler for metadata 27 FETCH_METADATA = 4; 28 FINISH = 5; 29 COLLECTIVE_SEND_DATA = 6; 30 // The scheduler actively sends metadata to the worker and server 31 SEND_METADATA = 7; 32 // This command is used to start scale out 33 SCALE_OUT = 8; 34 // This command is used to start scale in 35 SCALE_IN = 9; 36 // This command is used to synchronize the scale out status of the cluster 37 SCALE_OUT_DONE = 10; 38 // This command is used to synchronize the scale in status of the cluster 39 SCALE_IN_DONE = 11; 40 // This command is used to send user defined event. 41 SEND_EVENT = 12; 42 // This command is used to send scheduler recovery event. 43 SCHEDULER_RECOVERY = 13; 44 // This command is used to send prepare building network msg. 45 PREPARE_BUILDING_NETWORK = 14; 46 // Register address for actor's route table. 47 REGISTER_ACTOR_ROUTE = 15; 48 // Delete address of actor. 49 DELETE_ACTOR_ROUTE = 16; 50 // Lookup address of the actor. 51 LOOKUP_ACTOR_ROUTE = 17; 52 // Send host name to scheduler. 53 SEND_HOST_NAME = 18; 54 // Query all worker nodes' host name. 55 QUERY_HOST_NAMES = 19; 56 // Send unique id used to initialize collective communication. 57 SEND_UNIQUE_ID = 20; 58 // Query unique id used to initialize collective communication. 59 QUERY_UNIQUE_ID = 21; 60 // Send the ready status to finish transform graph of computed node, 61 // used in disaster recovery mode to prevent timeout of waiting for graph transformation. 62 SEND_FINISH_TRANSFORM = 22; 63 // Query the ready status to finish transform graph of computed node, 64 // used in disaster recovery mode to prevent timeout of waiting for graph transformation. 65 QUERY_FINISH_TRANSFORM = 23; 66 // This command is used to start scale out rollback 67 SCALE_OUT_ROLLBACK = 24; 68 // Record the failure information, such as node restart 69 FAILURE_EVENT_INFO = 25; 70} 71 72enum NodeRole { 73 SERVER = 0; 74 WORKER = 1; 75 SCHEDULER = 2; 76} 77 78enum PersistentCommand { 79 DEFAULT = 0; 80 PREPARE_PERSIST = 1; 81 BEGIN_PERSIST = 2; 82} 83 84enum PersistentState { 85 NOT_ENABLE_PERSIST = 0; 86 PREPARING_PERSIST = 1; 87 READY_PERSIST = 2; 88 PERSISTING = 3; 89 FINISH_PERSIST = 4; 90} 91 92message CollectiveMessageMeta { 93 bool enable_flag = 1; 94 uint32 send_rank_id = 2; 95 uint32 recv_rank_id = 3; 96 uint32 iteration = 4; 97 bytes weight_name = 5; 98 bytes phase = 6; // ring, gather, reduce, broadcast 99 uint32 chunk_index = 7; 100 uint32 for_index = 8; 101} 102 103message MessageMeta { 104 // the command of this message,for example: register,heartbeat,data 105 NodeCommand cmd = 1; 106 // the request id of this message 107 uint64 request_id = 2; 108 // the role of the current node: worker,server,scheduler 109 NodeRole role = 3; 110 // the current Node rank id,the worker node range is:[0,numOfWorker-1], the server node range is:[0, numOfServer-1] 111 uint32 rank_id = 4; 112 // User-defined commands 113 int32 user_cmd = 5; 114 115 CollectiveMessageMeta collective_meta = 6; 116} 117 118message RegisterMessage { 119 // ip 120 string ip = 1; 121 // the port of this node 122 uint32 port = 2; 123 // the current Node unique id:0,1,2... 124 string node_id = 3; 125 // the role of the node: worker,server,scheduler 126 NodeRole role = 4; 127 // the number of the fl job iteration 128 uint64 fl_iteration_num = 5; 129 // if node start with recovery 130 bool is_recover = 6; 131} 132 133message RegisterRespMessage { 134 string node_id = 1; 135 uint32 rank_id = 2; 136} 137 138message HeartbeatMessage { 139 // the current Node unique id:0,1,2... 140 string node_id = 1; 141 PersistentState persistent_state = 2; 142 bool has_address = 3; 143 string ip = 4; 144 uint32 port = 5; 145} 146 147message FailureEventMessage { 148 string node_role = 1; 149 string ip = 2; 150 uint32 port = 3; 151 string time = 4; 152 string event = 5; 153} 154 155enum NodeState { 156 NODE_STARTING = 0; 157 NODE_FINISH = 1; 158 NODE_READY = 2; 159} 160 161enum ClusterState { 162 CLUSTER_STARTING = 0; 163 CLUSTER_READY = 1; 164 CLUSTER_EXIT = 2; 165 NODE_TIMEOUT = 3; 166 CLUSTER_SCALE_OUT = 4; 167 CLUSTER_SCALE_IN = 5; 168 CLUSTER_NEW_INSTANCE = 6; 169 CLUSTER_ENABLE_FLS = 7; 170 CLUSTER_DISABLE_FLS = 8; 171 CLUSTER_SCHEDULER_RECOVERY = 9; 172 CLUSTER_SCALE_OUT_ROLLBACK = 10; 173} 174 175message HeartbeatRespMessage { 176 ClusterState cluster_state = 1; 177 repeated ServersMeta servers_meta = 2; 178 bool is_worker = 3; 179 PersistentCommand persistent_cmd = 5; 180} 181 182message FetchServersMessage { 183 string node_id = 1; 184} 185 186message FetchServersRespMessage { 187 repeated ServersMeta servers_meta = 1; 188} 189 190message ServersMeta { 191 uint32 rank_id = 1; 192 string ip = 2; 193 uint32 port = 3; 194 bool is_alive = 4; 195 NodeRole role = 5; 196 string node_id = 6; 197 PersistentState persistent_state = 7; 198} 199 200message SendMetadataMessage { 201 repeated ServersMeta servers_meta = 1; 202 // the current worker number. 203 uint32 worker_num = 2; 204 // the current server number. 205 uint32 server_num = 3; 206 // the current cluster state. 207 ClusterState cluster_state = 4; 208 // The rank id of the node that received this message. 209 uint32 rank_id = 5; 210} 211 212message FinishMessage { 213 // the current Node unique id:0,1,2... 214 string node_id = 1; 215} 216 217message CommMessage { 218 MessageMeta pb_meta = 1; 219 bytes data = 2; 220} 221 222// The scheduler will broadcast the worker/server numbers after scale out to all nodes. 223message ScaleOutMessage { 224 // the worker number after scale out 225 uint32 worker_num = 1; 226 // the server number after scale out 227 uint32 server_num = 2; 228} 229 230// The scheduler will broadcast the worker/server numbers after scale in to all nodes. 231message ScaleInMessage { 232 // the worker number after scale in. 233 uint32 worker_num = 1; 234 // the server number after scale in. 235 uint32 server_num = 2; 236 // Determine whether the current node is a scale in node. 237 bool is_node_scale_in = 3; 238} 239 240// This message is sent to the scheduler to notify the completion of scale out 241message ScaleOutDoneMessage { 242 string node_id = 1; 243} 244 245// This message is sent to the scheduler to notify the completion of scale out 246message ScaleInDoneMessage { 247 string node_id = 1; 248} 249 250// This message is sent by the worker/server to the scheduler, and the scheduler is broadcast the event to all other nodes. 251message EventMessage { 252 uint32 event = 1; 253 string node_id = 2; 254} 255 256// scheduler broadcasts the event to all other nodes through this message 257message EventRespMessage { 258 uint32 event = 1; 259} 260 261message ScaleInFinishMessage { 262 bool is_all_nodes_registered = 1; 263} 264 265message GeneralResponseMsg { 266 bool is_success = 1; 267 string error = 2; 268} 269 270message ActorAddress { 271 string actor_id = 1; 272 string ip = 2; 273 uint32 port = 3; 274} 275 276message GeneralQueryMessage { 277 // The unique node id. 278 string node_id = 1; 279 // The rank id of the node in the cluster. 280 uint32 rank_id = 2; 281} 282 283message SendHostHashNameMessage { 284 // The unique node id. 285 string node_id = 1; 286 // The rank id of the node in the cluster. 287 uint32 rank_id = 2; 288 // The host hash name of the node. 289 uint64 host_hash_name = 3; 290} 291 292message QueryHostHashNameRespMessage { 293 bool is_success = 1; 294 // The host hash names of all worker nodes. 295 repeated uint64 host_hash_names = 2; 296} 297 298message SendUniqueIDMessage { 299 // The unique node id. 300 string node_id = 1; 301 // The rank id of the node in the cluster. 302 uint32 rank_id = 2; 303 // The group name of goupt which need to initialize collective communication. 304 string group_name = 3; 305 // The unique id used to initialize collective communication. 306 bytes unique_id = 4; 307} 308 309message QueryUniqueIDMessage { 310 // The unique node id. 311 string node_id = 1; 312 // The group name of goupt which need to initialize collective communication. 313 string group_name = 2; 314} 315 316message QueryUniqueIDRespMessage { 317 bool is_success = 1; 318 // The unique id used to initialize collective communication. 319 bytes unique_id = 2; 320} 321 322message SendFinishTransformMessage { 323 // The unique node id. 324 string node_id = 1; 325 // The rank id of the node in the cluster. 326 uint32 rank_id = 2; 327 // Whether finish transform graph. 328 bool is_ready = 3; 329 // The actor set name of actor set which finish transform. 330 string actor_set_name = 4; 331} 332 333message QueryFinishTransformMessage { 334 // The unique node id. 335 string node_id = 1; 336 // The rank id of the node in the cluster. 337 uint32 rank_id = 2; 338 // The actor set name of actor set which queries finish transform status. 339 string actor_set_name = 3; 340} 341 342message QueryFinishTransformRespMessage { 343 // Whether all computed nodes are ready to run dag. 344 bool is_ready = 1; 345 // Whether there is any worker timeout. 346 bool is_worker_timeout = 2; 347} 348