• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2022 Huawei Technologies Co., Ltd
3 
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7 
8  * http://www.apache.org/licenses/LICENSE-2.0
9 
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15 */
16 #include "minddata/dataset/engine/cache/cache_hw.h"
17 
18 #ifdef NUMA_ENABLED
19 #include <numa.h>
20 #endif
21 #include <sched.h>
22 #include <cstdlib>
23 #include <cstring>
24 #include <cctype>
25 #include <fstream>
26 #include <regex>
27 #include <thread>
28 
29 #include "utils/file_utils.h"
30 #include "minddata/dataset/util/log_adapter.h"
31 
32 namespace mindspore {
33 namespace dataset {
CacheServerHW()34 CacheServerHW::CacheServerHW() {
35   num_cpus_ = std::thread::hardware_concurrency();
36   MS_LOG(DEBUG) << "Number of cpu(s) : " << num_cpus_;
37 #ifdef NUMA_ENABLED
38   if (numa_enabled()) {
39     MS_LOG(INFO) << "Numa support enabled";
40     for (auto i = 0; i <= numa_max_node(); ++i) {
41       long long free_avail = 0;  // NOLINT
42       int64_t mem_avail = numa_node_size64(i, &free_avail);
43       MS_LOG(INFO) << "Total physical/free RAM in bytes at node " << i << " : " << mem_avail << "/" << free_avail;
44     }
45   }
46 #endif
47 }
48 
GetTotalSystemMemory()49 int64_t CacheServerHW::GetTotalSystemMemory() {
50   auto pages = sysconf(_SC_PHYS_PAGES);
51   auto page_size = sysconf(_SC_PAGE_SIZE);
52   auto total = static_cast<int64_t>(pages) * static_cast<int64_t>(page_size);
53   MS_LOG(INFO) << "Total physical RAM in bytes: " << total;
54   return total;
55 }
56 
SetDefaultMemoryPolicy(CachePoolPolicy policy)57 Status CacheServerHW::SetDefaultMemoryPolicy(CachePoolPolicy policy) {
58 #ifdef NUMA_ENABLED
59   if (numa_enabled()) {
60     // Set our default memory policy.
61     switch (policy) {
62       case kLocal:
63         numa_set_localalloc();
64         MS_LOG(DEBUG) << "Setting memory default policy to local node. Low level code may override the setting";
65         break;
66       case kInterleave:
67         numa_set_interleave_mask(numa_all_nodes_ptr);
68         MS_LOG(DEBUG) << "Numa affinity is turned off. Use interleave memory policy as default.";
69         break;
70       case kOnNode:
71       case kPreferred:
72         RETURN_STATUS_UNEXPECTED("Unsupported memory policy");
73         break;
74       case kNone:
75       default:
76         // No action taken.
77         break;
78     }
79   }
80 #endif
81   return Status::OK();
82 }
83 
GetNumaNodeInfo()84 Status CacheServerHW::GetNumaNodeInfo() {
85   std::set<Path> numa_nodes_;
86   Path node(kSysNodePath);
87   auto it = Path::DirIterator::OpenDirectory(&node);
88   if (it == nullptr) {
89     MS_LOG(WARNING) << "Unable to open directory " << kSysNodePath << ". Skip scanning hardware info";
90     return Status::OK();
91   }
92   auto isdigit_string = [](const char *str) -> bool {
93     bool r = true;
94     for (size_t i = 0; i < strlen(str); ++i) {
95       if (!std::isdigit(str[i])) {
96         r = false;
97         break;
98       }
99     }
100     return r;
101   };
102   // Look for name starts with 'node' and followed by digits.
103   const char kNodeName[] = "node";
104   while (it->HasNext()) {
105     auto p = it->Next();
106     const std::string entry = p.Basename();
107     const char *name = entry.data();
108     if (strncmp(name, kNodeName, strlen(kNodeName)) == 0 && isdigit_string(name + strlen(kNodeName))) {
109       numa_nodes_.insert(p);
110     }
111   }
112   // There should be at least one. But if not found in any case, just move on the
113   // rest of the server start up.
114   if (numa_nodes_.empty()) {
115     MS_LOG(WARNING) << "No numa nodes ? Skip scanning hardware info";
116     return Status::OK();
117   }
118   // For each numa node, get a list of CPU that is associated with it.
119   const char kCpuList[] = "cpulist";
120   auto r = std::regex("[0-9]*-[0-9]*");
121   for (Path p : numa_nodes_) {
122     auto node_dir = p.Basename();
123     numa_id_t numa_node = static_cast<numa_id_t>(strtol(node_dir.data() + strlen(kNodeName), nullptr, kDecimal));
124     Path f = p / kCpuList;
125 
126     auto realpath = FileUtils::GetRealPath(f.ToString().c_str());
127     if (!realpath.has_value()) {
128       MS_LOG(ERROR) << "Get real path failed, path=" << f.ToString();
129       RETURN_STATUS_UNEXPECTED("Get real path failed, path=" + f.ToString());
130     }
131 
132     std::ifstream fs(realpath.value(), std::ifstream::in);
133     CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "Fail to open file: " + f.ToString());
134     std::string cpu_string;
135     cpu_set_t cpuset;
136     CPU_ZERO(&cpuset);
137     int32_t cpu_cnt = 0;
138     while (getline(fs, cpu_string)) {
139       // Now we parse the content of cpu_string.
140       std::sregex_iterator iter(cpu_string.begin(), cpu_string.end(), r);
141       std::sregex_iterator end;
142       while (iter != end) {
143         auto match = iter->str();
144         auto pos = match.find_first_of('-');
145         if (pos == std::string::npos) {
146           fs.close();
147           RETURN_STATUS_UNEXPECTED("Failed to parse numa node file");
148         }
149         std::string min = match.substr(0, pos);
150         std::string max = match.substr(pos + 1);
151         cpu_id_t cpu_min = static_cast<cpu_id_t>(strtol(min.data(), nullptr, kDecimal));
152         cpu_id_t cpu_max = static_cast<cpu_id_t>(strtol(max.data(), nullptr, kDecimal));
153         MS_LOG(DEBUG) << "Numa node " << numa_node << " CPU(s) : " << cpu_min << "-" << cpu_max;
154         for (int i = cpu_min; i <= cpu_max; ++i) {
155           CPU_SET(i, &cpuset);
156           ++cpu_cnt;
157         }
158         ++iter;
159       }
160     }
161     CHECK_FAIL_RETURN_UNEXPECTED(!fs.bad(), "Fail to read file: " + f.ToString());
162     fs.close();
163     // Remember which cpu is attached to this numa node.
164     numa_cpuset_.emplace(numa_node, cpuset);
165     numa_cpu_cnt_.emplace(numa_node, cpu_cnt);
166   }
167   MS_LOG(DEBUG) << "Number of numa nodes : " << numa_cpuset_.size();
168   return Status::OK();
169 }
170 
SetAffinity(const Task & tk,numa_id_t numa_node)171 Status CacheServerHW::SetAffinity(const Task &tk, numa_id_t numa_node) {
172 #if defined(__APPLE__)
173   return Status::OK();
174 #else
175   auto r = numa_cpuset_.find(numa_node);
176   if (r != numa_cpuset_.end()) {
177     auto err = pthread_setaffinity_np(tk.GetNativeHandle(), sizeof(r->second), &r->second);
178     if (err) {
179       std::string errMsg = "Unable to set affiity. Errno = " + std::to_string(errno);
180       RETURN_STATUS_UNEXPECTED(errMsg);
181     }
182   } else {
183     RETURN_STATUS_UNEXPECTED("Numa node " + std::to_string(numa_node) + " not found");
184   }
185   return Status::OK();
186 #endif
187 }
188 
GetCpuList(numa_id_t numa_id)189 std::vector<cpu_id_t> CacheServerHW::GetCpuList(numa_id_t numa_id) {
190   std::vector<cpu_id_t> v;
191   auto it = numa_cpuset_.find(numa_id);
192   if (it != numa_cpuset_.end()) {
193     auto &cpu_set = it->second;
194     for (auto i = 0; i < num_cpus_; ++i) {
195       if (CPU_ISSET(i, &cpu_set)) {
196         v.push_back(i);
197       }
198     }
199   }
200   return v;
201 }
202 
GetMyNode() const203 numa_id_t CacheServerHW::GetMyNode() const {
204 #if defined(__APPLE__)
205   numa_id_t node_id = -1;
206 #else
207   numa_id_t node_id = 0;
208   auto cpu = sched_getcpu();
209 #ifdef NUMA_ENABLED
210   node_id = numa_node_of_cpu(cpu);
211 #else
212   bool found = false;
213   for (auto it : numa_cpuset_) {
214     cpu_set_t &cpu_set = it.second;
215     if (CPU_ISSET(cpu, &cpu_set)) {
216       node_id = it.first;
217       found = true;
218       break;
219     }
220   }
221   MS_LOG(DEBUG) << "cpu id " << cpu << " found : " << std::boolalpha << found;
222 #endif  // end NUMA_ENABLED
223 #endif  // end __APPLE__
224   return node_id;
225 }
226 
InterleaveMemory(void * ptr,size_t sz)227 void CacheServerHW::InterleaveMemory(void *ptr, size_t sz) {
228 #ifdef NUMA_ENABLED
229   if (numa_enabled()) {
230     numa_interleave_memory(ptr, sz, numa_all_nodes_ptr);
231   }
232 #endif
233 }
234 
AssignToNode(numa_id_t numa_id,void * ptr,size_t sz) const235 void CacheServerHW::AssignToNode(numa_id_t numa_id, void *ptr, size_t sz) const {
236 #ifdef NUMA_ENABLED
237   if (numa_enabled()) {
238     numa_tonode_memory(ptr, sz, numa_id);
239   }
240 #endif
241 }
242 
numa_enabled()243 bool CacheServerHW::numa_enabled() {
244 #ifdef NUMA_ENABLED
245   return (numa_available() != -1);
246 #else
247   return false;
248 #endif
249 }
250 
GetAvailableMemory()251 uint64_t CacheServerHW::GetAvailableMemory() {
252   auto realpath = FileUtils::GetRealPath(kMemInfoFileName);
253   if (!realpath.has_value()) {
254     MS_LOG(ERROR) << "Get real path failed, path=" << kMemInfoFileName;
255     return 0;
256   }
257 
258   std::ifstream mem_file(realpath.value(), std::ifstream::in);
259   if (mem_file.fail()) {
260     MS_LOG(WARNING) << "Fail to open file: " << kMemInfoFileName;
261     return 0;
262   }
263 
264   std::string line;
265   uint64_t mem_available_in_kb = 0L;
266   while (std::getline(mem_file, line)) {
267     // get title
268     std::string::size_type position = line.find(":");
269     std::string title = line.substr(0, position);
270     // get the value of MemAvailable
271     if (title == "MemAvailable") {
272       std::string::size_type pos1 = line.find_last_of(" ");
273       std::string::size_type pos2 = line.find_last_of(" ", pos1 - 1);
274       if (pos1 != std::string::npos && pos2 != std::string::npos && line.size() > pos1) {
275         mem_available_in_kb = static_cast<uint64_t>(std::stol(line.substr(pos2, pos1 - pos2)));
276       }
277       break;
278     }
279   }
280   mem_file.close();
281 
282   return mem_available_in_kb * 1024;
283 }
284 }  // namespace dataset
285 }  // namespace mindspore
286