• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_PS_CORE_COMM_UTIL_H_
18 #define MINDSPORE_CCSRC_PS_CORE_COMM_UTIL_H_
19 
20 #include <unistd.h>
21 #ifdef _MSC_VER
22 #include <iphlpapi.h>
23 #include <tchar.h>
24 #include <windows.h>
25 #include <winsock2.h>
26 #else
27 #include <arpa/inet.h>
28 #include <ifaddrs.h>
29 #include <net/if.h>
30 #include <netinet/in.h>
31 #endif
32 
33 #include <assert.h>
34 #include <event2/buffer.h>
35 #include <event2/event.h>
36 #include <event2/http.h>
37 #include <event2/keyvalq_struct.h>
38 #include <event2/listener.h>
39 #include <event2/util.h>
40 #include <openssl/bio.h>
41 #include <openssl/err.h>
42 #include <openssl/evp.h>
43 #include <openssl/pkcs12.h>
44 #include <openssl/rand.h>
45 #include <openssl/ssl.h>
46 #include <openssl/x509v3.h>
47 #include <algorithm>
48 #include <cstdio>
49 #include <cstdlib>
50 #include <cstring>
51 #include <fstream>
52 #include <functional>
53 #include <iostream>
54 #include <map>
55 #include <random>
56 #include <sstream>
57 #include <string>
58 #include <thread>
59 #include <utility>
60 #include <vector>
61 #include "proto/comm.pb.h"
62 #include "proto/ps.pb.h"
63 #include "ps/core/cluster_metadata.h"
64 #include "ps/core/cluster_config.h"
65 #include "utils/log_adapter.h"
66 #include "include/backend/distributed/ps/ps_context.h"
67 #include "utils/convert_utils_base.h"
68 #include "ps/core/configuration.h"
69 
70 namespace mindspore {
71 namespace ps {
72 namespace core {
73 constexpr int kGroup1RandomLength = 8;
74 constexpr int kGroup2RandomLength = 4;
75 constexpr int kGroup3RandomLength = 4;
76 constexpr int kGroup4RandomLength = 4;
77 constexpr int kGroup5RandomLength = 12;
78 constexpr int kMillSecondLength = 3;
79 
80 // The size of the buffer for sending and receiving data is 4096 bytes.
81 constexpr int kMessageChunkLength = 4096;
82 // The timeout period for the http client to connect to the http server is 120 seconds.
83 constexpr int kConnectionTimeout = 120;
84 constexpr char kLibeventLogPrefix[] = "[libevent log]:";
85 constexpr char kFailureEvent[] = "failureEvent";
86 
87 // Find the corresponding string style of cluster state through the subscript of the enum:ClusterState
88 const std::vector<std::string> kClusterState = {
89   "CLUSTER_STARTING",            // Initialization state when the cluster is just started.
90   "CLUSTER_READY",               // The state after all nodes are successfully registered.
91   "CLUSTER_EXIT",                // The state after the cluster exits successfully.
92   "NODE_TIMEOUT",                // When a node has a heartbeat timeout
93   "CLUSTER_SCALE_OUT",           // When the cluster is scale out.
94   "CLUSTER_SCALE_IN",            // When the cluster is scale in.
95   "CLUSTER_NEW_INSTANCE",        // When the cluster is doing NEW_INSTANCE.
96   "CLUSTER_ENABLE_FLS",          // When the cluster is doing ENABLE_FLS.
97   "CLUSTER_DISABLE_FLS",         // When the cluster is doing DISABLE_FLS.
98   "CLUSTER_SCHEDULER_RECOVERY",  // When the cluster is doing SCHEDULER_RECOVERY.
99   "CLUSTER_SCALE_OUT_ROLLBACK",  // When the cluster is scale out rollback.
100 };
101 
102 const std::map<std::string, ClusterState> kClusterStateMap = {
103   {"CLUSTER_STARTING", ClusterState::CLUSTER_STARTING},
104   {"CLUSTER_READY", ClusterState::CLUSTER_READY},
105   {"CLUSTER_EXIT", ClusterState::CLUSTER_EXIT},
106   {"NODE_TIMEOUT", ClusterState::NODE_TIMEOUT},
107   {"CLUSTER_SCALE_OUT", ClusterState::CLUSTER_SCALE_OUT},
108   {"CLUSTER_SCALE_IN", ClusterState::CLUSTER_SCALE_IN},
109   {"CLUSTER_NEW_INSTANCE", ClusterState::CLUSTER_NEW_INSTANCE},
110   {"CLUSTER_ENABLE_FLS", ClusterState::CLUSTER_ENABLE_FLS},
111   {"CLUSTER_DISABLE_FLS", ClusterState::CLUSTER_DISABLE_FLS},
112   {"CLUSTER_SCHEDULER_RECOVERY", ClusterState::CLUSTER_SCHEDULER_RECOVERY},
113   {"CLUSTER_SCALE_OUT_ROLLBACK", ClusterState::CLUSTER_SCALE_OUT_ROLLBACK}};
114 
115 struct Time {
116   uint64_t time_stamp;
117   std::string time_str_mill;
118 };
119 
120 struct FileConfig {
121   uint32_t storage_type;
122   std::string storage_file_path;
123 };
124 
125 class CommUtil {
126  public:
127   static bool CheckIpWithRegex(const std::string &ip);
128   static bool CheckIp(const std::string &ip);
129   static bool CheckPort(const uint16_t &port);
130   static void GetAvailableInterfaceAndIP(std::string *interface, std::string *ip);
131   static std::string GetLoopBackInterfaceName();
132   static std::string GenerateUUID();
133   static std::string NodeRoleToString(const NodeRole &role);
134   static NodeRole StringToNodeRole(const std::string &roleStr);
135   static std::string BoolToString(bool alive);
136   static bool StringToBool(const std::string &alive);
137   static bool ValidateRankId(const enum NodeRole &node_role, const uint32_t &rank_id, const int32_t &total_worker_num,
138                              const int32_t &total_server_num);
139   static bool Retry(const std::function<bool()> &func, size_t max_attempts, size_t interval_milliseconds);
140   static void LogCallback(int severity, const char *msg);
141 
142   // Check if the file exists.
143   static bool IsFileExists(const std::string &file);
144   // Check whether the file is empty or not.
145   static bool IsFileEmpty(const std::string &file);
146   // Convert cluster state to string when response the http request.
147   static std::string ClusterStateToString(const ClusterState &state);
148 
149   // Parse the configuration file according to the key.
150   static std::string ParseConfig(const Configuration &config, const std::string &key);
151 
152   // Init openssl lib
153   static void InitOpensslLib();
154 
155   // verify valid of certificate time
156   static bool VerifyCertTime(const X509 *cert, int64_t time = 0);
157   static bool verifyCertTimeStamp(const X509 *cert);
158   // verify valid of equip certificate with CRL
159   static bool VerifyCRL(const X509 *cert, const std::string &crl_path, X509_CRL **crl);
160   static bool VerifyCommonName(const X509 *caCert, const X509 *subCert);
161   static std::vector<std::string> Split(const std::string &s, char delim);
162   static bool VerifyCipherList(const std::vector<std::string> &list);
163   static bool verifyCertKeyID(const X509 *caCert, const X509 *subCert);
164   static bool verifySingature(const X509 *caCert, const X509 *subCert);
165   static bool verifyExtendedAttributes(const X509 *caCert);
166   static void verifyCertPipeline(const X509 *caCert, const X509 *subCert);
167   static bool checkCRLTime(const std::string &crlPath);
168   static bool CreateDirectory(const std::string &directoryPath);
169   static bool CheckHttpUrl(const std::string &http_url);
170   static bool IsFileReadable(const std::string &file);
171   template <typename T>
JsonGetKeyWithException(const nlohmann::json & json,const std::string & key)172   static T JsonGetKeyWithException(const nlohmann::json &json, const std::string &key) {
173     if (!json.contains(key)) {
174       MS_LOG(EXCEPTION) << "The key " << key << "does not exist in json " << json.dump();
175     }
176     return json[key].get<T>();
177   }
178   static Time GetNowTime();
179   static bool ParseAndCheckConfigJson(Configuration *file_configuration, const std::string &key,
180                                       FileConfig *file_config);
181 
182  private:
183   static std::random_device rd;
184   static std::mt19937_64 gen;
185   static std::uniform_int_distribution<> dis;
186   static std::uniform_int_distribution<> dis2;
187 };
188 }  // namespace core
189 }  // namespace ps
190 }  // namespace mindspore
191 #endif  // MINDSPORE_CCSRC_PS_CORE_COMM_UTIL_H_
192