1 /** 2 * Copyright 2020 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_PS_CORE_COMM_UTIL_H_ 18 #define MINDSPORE_CCSRC_PS_CORE_COMM_UTIL_H_ 19 20 #include <unistd.h> 21 #ifdef _MSC_VER 22 #include <iphlpapi.h> 23 #include <tchar.h> 24 #include <windows.h> 25 #include <winsock2.h> 26 #else 27 #include <arpa/inet.h> 28 #include <ifaddrs.h> 29 #include <net/if.h> 30 #include <netinet/in.h> 31 #endif 32 33 #include <assert.h> 34 #include <event2/buffer.h> 35 #include <event2/event.h> 36 #include <event2/http.h> 37 #include <event2/keyvalq_struct.h> 38 #include <event2/listener.h> 39 #include <event2/util.h> 40 #include <openssl/bio.h> 41 #include <openssl/err.h> 42 #include <openssl/evp.h> 43 #include <openssl/pkcs12.h> 44 #include <openssl/rand.h> 45 #include <openssl/ssl.h> 46 #include <openssl/x509v3.h> 47 #include <algorithm> 48 #include <cstdio> 49 #include <cstdlib> 50 #include <cstring> 51 #include <fstream> 52 #include <functional> 53 #include <iostream> 54 #include <map> 55 #include <random> 56 #include <sstream> 57 #include <string> 58 #include <thread> 59 #include <utility> 60 #include <vector> 61 #include "proto/comm.pb.h" 62 #include "proto/ps.pb.h" 63 #include "ps/core/cluster_metadata.h" 64 #include "ps/core/cluster_config.h" 65 #include "utils/log_adapter.h" 66 #include "include/backend/distributed/ps/ps_context.h" 67 #include "utils/convert_utils_base.h" 68 #include "ps/core/configuration.h" 69 70 namespace mindspore { 71 namespace ps { 72 namespace core { 73 constexpr int kGroup1RandomLength = 8; 74 constexpr int kGroup2RandomLength = 4; 75 constexpr int kGroup3RandomLength = 4; 76 constexpr int kGroup4RandomLength = 4; 77 constexpr int kGroup5RandomLength = 12; 78 constexpr int kMillSecondLength = 3; 79 80 // The size of the buffer for sending and receiving data is 4096 bytes. 81 constexpr int kMessageChunkLength = 4096; 82 // The timeout period for the http client to connect to the http server is 120 seconds. 83 constexpr int kConnectionTimeout = 120; 84 constexpr char kLibeventLogPrefix[] = "[libevent log]:"; 85 constexpr char kFailureEvent[] = "failureEvent"; 86 87 // Find the corresponding string style of cluster state through the subscript of the enum:ClusterState 88 const std::vector<std::string> kClusterState = { 89 "CLUSTER_STARTING", // Initialization state when the cluster is just started. 90 "CLUSTER_READY", // The state after all nodes are successfully registered. 91 "CLUSTER_EXIT", // The state after the cluster exits successfully. 92 "NODE_TIMEOUT", // When a node has a heartbeat timeout 93 "CLUSTER_SCALE_OUT", // When the cluster is scale out. 94 "CLUSTER_SCALE_IN", // When the cluster is scale in. 95 "CLUSTER_NEW_INSTANCE", // When the cluster is doing NEW_INSTANCE. 96 "CLUSTER_ENABLE_FLS", // When the cluster is doing ENABLE_FLS. 97 "CLUSTER_DISABLE_FLS", // When the cluster is doing DISABLE_FLS. 98 "CLUSTER_SCHEDULER_RECOVERY", // When the cluster is doing SCHEDULER_RECOVERY. 99 "CLUSTER_SCALE_OUT_ROLLBACK", // When the cluster is scale out rollback. 100 }; 101 102 const std::map<std::string, ClusterState> kClusterStateMap = { 103 {"CLUSTER_STARTING", ClusterState::CLUSTER_STARTING}, 104 {"CLUSTER_READY", ClusterState::CLUSTER_READY}, 105 {"CLUSTER_EXIT", ClusterState::CLUSTER_EXIT}, 106 {"NODE_TIMEOUT", ClusterState::NODE_TIMEOUT}, 107 {"CLUSTER_SCALE_OUT", ClusterState::CLUSTER_SCALE_OUT}, 108 {"CLUSTER_SCALE_IN", ClusterState::CLUSTER_SCALE_IN}, 109 {"CLUSTER_NEW_INSTANCE", ClusterState::CLUSTER_NEW_INSTANCE}, 110 {"CLUSTER_ENABLE_FLS", ClusterState::CLUSTER_ENABLE_FLS}, 111 {"CLUSTER_DISABLE_FLS", ClusterState::CLUSTER_DISABLE_FLS}, 112 {"CLUSTER_SCHEDULER_RECOVERY", ClusterState::CLUSTER_SCHEDULER_RECOVERY}, 113 {"CLUSTER_SCALE_OUT_ROLLBACK", ClusterState::CLUSTER_SCALE_OUT_ROLLBACK}}; 114 115 struct Time { 116 uint64_t time_stamp; 117 std::string time_str_mill; 118 }; 119 120 struct FileConfig { 121 uint32_t storage_type; 122 std::string storage_file_path; 123 }; 124 125 class CommUtil { 126 public: 127 static bool CheckIpWithRegex(const std::string &ip); 128 static bool CheckIp(const std::string &ip); 129 static bool CheckPort(const uint16_t &port); 130 static void GetAvailableInterfaceAndIP(std::string *interface, std::string *ip); 131 static std::string GetLoopBackInterfaceName(); 132 static std::string GenerateUUID(); 133 static std::string NodeRoleToString(const NodeRole &role); 134 static NodeRole StringToNodeRole(const std::string &roleStr); 135 static std::string BoolToString(bool alive); 136 static bool StringToBool(const std::string &alive); 137 static bool ValidateRankId(const enum NodeRole &node_role, const uint32_t &rank_id, const int32_t &total_worker_num, 138 const int32_t &total_server_num); 139 static bool Retry(const std::function<bool()> &func, size_t max_attempts, size_t interval_milliseconds); 140 static void LogCallback(int severity, const char *msg); 141 142 // Check if the file exists. 143 static bool IsFileExists(const std::string &file); 144 // Check whether the file is empty or not. 145 static bool IsFileEmpty(const std::string &file); 146 // Convert cluster state to string when response the http request. 147 static std::string ClusterStateToString(const ClusterState &state); 148 149 // Parse the configuration file according to the key. 150 static std::string ParseConfig(const Configuration &config, const std::string &key); 151 152 // Init openssl lib 153 static void InitOpensslLib(); 154 155 // verify valid of certificate time 156 static bool VerifyCertTime(const X509 *cert, int64_t time = 0); 157 static bool verifyCertTimeStamp(const X509 *cert); 158 // verify valid of equip certificate with CRL 159 static bool VerifyCRL(const X509 *cert, const std::string &crl_path, X509_CRL **crl); 160 static bool VerifyCommonName(const X509 *caCert, const X509 *subCert); 161 static std::vector<std::string> Split(const std::string &s, char delim); 162 static bool VerifyCipherList(const std::vector<std::string> &list); 163 static bool verifyCertKeyID(const X509 *caCert, const X509 *subCert); 164 static bool verifySingature(const X509 *caCert, const X509 *subCert); 165 static bool verifyExtendedAttributes(const X509 *caCert); 166 static void verifyCertPipeline(const X509 *caCert, const X509 *subCert); 167 static bool checkCRLTime(const std::string &crlPath); 168 static bool CreateDirectory(const std::string &directoryPath); 169 static bool CheckHttpUrl(const std::string &http_url); 170 static bool IsFileReadable(const std::string &file); 171 template <typename T> JsonGetKeyWithException(const nlohmann::json & json,const std::string & key)172 static T JsonGetKeyWithException(const nlohmann::json &json, const std::string &key) { 173 if (!json.contains(key)) { 174 MS_LOG(EXCEPTION) << "The key " << key << "does not exist in json " << json.dump(); 175 } 176 return json[key].get<T>(); 177 } 178 static Time GetNowTime(); 179 static bool ParseAndCheckConfigJson(Configuration *file_configuration, const std::string &key, 180 FileConfig *file_config); 181 182 private: 183 static std::random_device rd; 184 static std::mt19937_64 gen; 185 static std::uniform_int_distribution<> dis; 186 static std::uniform_int_distribution<> dis2; 187 }; 188 } // namespace core 189 } // namespace ps 190 } // namespace mindspore 191 #endif // MINDSPORE_CCSRC_PS_CORE_COMM_UTIL_H_ 192