• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2022 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_DISTRIBUTED_CLUSTER_ACTOR_ROUTE_TABLE_PROXY_H_
18 #define MINDSPORE_CCSRC_DISTRIBUTED_CLUSTER_ACTOR_ROUTE_TABLE_PROXY_H_
19 
20 #include <string>
21 #include <memory>
22 #include <chrono>
23 #include "proto/topology.pb.h"
24 #include "include/backend/distributed/constants.h"
25 #include "include/backend/distributed/cluster/topology/compute_graph_node.h"
26 
27 namespace mindspore {
28 namespace distributed {
29 namespace cluster {
30 // The timeout in milliseconds for one lookup.
31 constexpr uint32_t kDefaultLookupTimeout = 300 * 1000;
32 
33 // The time in milliseconds between two lookup operations.
34 constexpr uint32_t kLookupInterval = 3000;
35 
36 // Actor route table proxy for nodes like workers and server. This class helps update actor route table in scheduler
37 // across the network.
38 class BACKEND_EXPORT ActorRouteTableProxy {
39  public:
40   explicit ActorRouteTableProxy(const std::shared_ptr<topology::ComputeGraphNode> &cgn,
41                                 uint32_t lookup_timeout = kDefaultLookupTimeout)
cgn_(cgn)42       : cgn_(cgn), lookup_timeout_(std::chrono::milliseconds(lookup_timeout)) {}
43   ~ActorRouteTableProxy() = default;
44 
45   // Register actor address to the route table stored in scheduler.
46   bool RegisterRoute(const std::string &actor_id, const topology::ActorAddress &actor_addr);
47 
48   // Get the actor address for the specified actor_id from the route table stored in scheduler.
49   topology::ActorAddress LookupRoute(const std::string &actor_id) const;
50 
51  private:
52   // The cgn variable helps proxy to communicate with meta server.
53   std::shared_ptr<topology::ComputeGraphNode> cgn_;
54 
55   // The timeout window for lookup route operation because time of route lookup_timeout of each process is different.
56   std::chrono::milliseconds lookup_timeout_;
57 };
58 
59 using ActorRouteTableProxyPtr = std::shared_ptr<ActorRouteTableProxy>;
60 }  // namespace cluster
61 }  // namespace distributed
62 }  // namespace mindspore
63 #endif  // MINDSPORE_CCSRC_DISTRIBUTED_CLUSTER_ACTOR_ROUTE_TABLE_PROXY_H_
64