1 /** 2 * Copyright 2022 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_DISTRIBUTED_CLUSTER_ACTOR_ROUTE_TABLE_PROXY_H_ 18 #define MINDSPORE_CCSRC_DISTRIBUTED_CLUSTER_ACTOR_ROUTE_TABLE_PROXY_H_ 19 20 #include <string> 21 #include <memory> 22 #include <chrono> 23 #include "proto/topology.pb.h" 24 #include "include/backend/distributed/constants.h" 25 #include "include/backend/distributed/cluster/topology/compute_graph_node.h" 26 27 namespace mindspore { 28 namespace distributed { 29 namespace cluster { 30 // The timeout in milliseconds for one lookup. 31 constexpr uint32_t kDefaultLookupTimeout = 300 * 1000; 32 33 // The time in milliseconds between two lookup operations. 34 constexpr uint32_t kLookupInterval = 3000; 35 36 // Actor route table proxy for nodes like workers and server. This class helps update actor route table in scheduler 37 // across the network. 38 class BACKEND_EXPORT ActorRouteTableProxy { 39 public: 40 explicit ActorRouteTableProxy(const std::shared_ptr<topology::ComputeGraphNode> &cgn, 41 uint32_t lookup_timeout = kDefaultLookupTimeout) cgn_(cgn)42 : cgn_(cgn), lookup_timeout_(std::chrono::milliseconds(lookup_timeout)) {} 43 ~ActorRouteTableProxy() = default; 44 45 // Register actor address to the route table stored in scheduler. 46 bool RegisterRoute(const std::string &actor_id, const topology::ActorAddress &actor_addr); 47 48 // Get the actor address for the specified actor_id from the route table stored in scheduler. 49 topology::ActorAddress LookupRoute(const std::string &actor_id) const; 50 51 private: 52 // The cgn variable helps proxy to communicate with meta server. 53 std::shared_ptr<topology::ComputeGraphNode> cgn_; 54 55 // The timeout window for lookup route operation because time of route lookup_timeout of each process is different. 56 std::chrono::milliseconds lookup_timeout_; 57 }; 58 59 using ActorRouteTableProxyPtr = std::shared_ptr<ActorRouteTableProxy>; 60 } // namespace cluster 61 } // namespace distributed 62 } // namespace mindspore 63 #endif // MINDSPORE_CCSRC_DISTRIBUTED_CLUSTER_ACTOR_ROUTE_TABLE_PROXY_H_ 64