• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3 
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7 
8  * http://www.apache.org/licenses/LICENSE-2.0
9 
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15 */
16 #include "minddata/dataset/engine/cache/cache_hw.h"
17 
18 #ifdef NUMA_ENABLED
19 #include <numa.h>
20 #endif
21 #include <sched.h>
22 #include <cstdlib>
23 #include <cstring>
24 #include <cctype>
25 #include <fstream>
26 #include <regex>
27 #include <thread>
28 
29 #include "utils/file_utils.h"
30 #include "utils/log_adapter.h"
31 
32 namespace mindspore {
33 namespace dataset {
CacheServerHW()34 CacheServerHW::CacheServerHW() {
35   num_cpus_ = std::thread::hardware_concurrency();
36   MS_LOG(DEBUG) << "Number of cpu(s) : " << num_cpus_;
37 #ifdef NUMA_ENABLED
38   if (numa_enabled()) {
39     MS_LOG(INFO) << "Numa support enabled";
40     for (auto i = 0; i <= numa_max_node(); ++i) {
41       long long free_avail = 0;  // NOLINT
42       int64_t mem_avail = numa_node_size64(i, &free_avail);
43       MS_LOG(INFO) << "Total physical/free RAM in bytes at node " << i << " : " << mem_avail << "/" << free_avail;
44     }
45   }
46 #endif
47 }
48 
GetTotalSystemMemory()49 int64_t CacheServerHW::GetTotalSystemMemory() {
50   auto pages = sysconf(_SC_PHYS_PAGES);
51   auto page_size = sysconf(_SC_PAGE_SIZE);
52   auto total = static_cast<int64_t>(pages) * static_cast<int64_t>(page_size);
53   MS_LOG(INFO) << "Total physical RAM in bytes: " << total;
54   return total;
55 }
56 
SetDefaultMemoryPolicy(CachePoolPolicy policy)57 Status CacheServerHW::SetDefaultMemoryPolicy(CachePoolPolicy policy) {
58 #ifdef NUMA_ENABLED
59   if (numa_enabled()) {
60     // Set our default memory policy.
61     switch (policy) {
62       case kLocal:
63         numa_set_localalloc();
64         MS_LOG(DEBUG) << "Setting memory default policy to local node. Low level code may override the setting";
65         break;
66       case kInterleave:
67         numa_set_interleave_mask(numa_all_nodes_ptr);
68         MS_LOG(DEBUG) << "Numa affinity is turned off. Use interleave memory policy as default.";
69         break;
70       case kOnNode:
71       case kPreferred:
72         RETURN_STATUS_UNEXPECTED("Unsupported memory policy");
73         break;
74       case kNone:
75       default:
76         // No action taken.
77         break;
78     }
79   }
80 #endif
81   return Status::OK();
82 }
83 
GetNumaNodeInfo()84 Status CacheServerHW::GetNumaNodeInfo() {
85   std::set<Path> numa_nodes_;
86   Path node(kSysNodePath);
87   auto it = Path::DirIterator::OpenDirectory(&node);
88   if (it == nullptr) {
89     MS_LOG(WARNING) << "Unable to open directory " << kSysNodePath << ". Skip scanning hardware info";
90     return Status::OK();
91   }
92   auto isdigit_string = [](const char *str) -> bool {
93     bool r = true;
94     for (size_t i = 0; i < strlen(str); ++i) {
95       if (!std::isdigit(str[i])) {
96         r = false;
97         break;
98       }
99     }
100     return r;
101   };
102   // Look for name starts with 'node' and followed by digits.
103   const char kNodeName[] = "node";
104   while (it->HasNext()) {
105     auto p = it->Next();
106     const std::string entry = p.Basename();
107     const char *name = entry.data();
108     if (strncmp(name, kNodeName, strlen(kNodeName)) == 0 && isdigit_string(name + strlen(kNodeName))) {
109       numa_nodes_.insert(p);
110     }
111   }
112   // There should be at least one. But if not found in any case, just move on the
113   // rest of the server start up.
114   if (numa_nodes_.empty()) {
115     MS_LOG(WARNING) << "No numa nodes ? Skip scanning hardware info";
116     return Status::OK();
117   }
118   // For each numa node, get a list of CPU that is associated with it.
119   const char kCpuList[] = "cpulist";
120   auto r = std::regex("[0-9]*-[0-9]*");
121   for (Path p : numa_nodes_) {
122     auto node_dir = p.Basename();
123     numa_id_t numa_node = static_cast<numa_id_t>(strtol(node_dir.data() + strlen(kNodeName), nullptr, kDecimal));
124     Path f = p / kCpuList;
125 
126     auto realpath = FileUtils::GetRealPath(f.ToString().data());
127     if (!realpath.has_value()) {
128       MS_LOG(ERROR) << "Get real path failed, path=" << f.ToString();
129       RETURN_STATUS_UNEXPECTED("Get real path failed, path=" + f.ToString());
130     }
131 
132     std::ifstream fs(realpath.value());
133     CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "Fail to open file: " + f.ToString());
134     std::string cpu_string;
135     cpu_set_t cpuset;
136     CPU_ZERO(&cpuset);
137     int32_t cpu_cnt = 0;
138     while (getline(fs, cpu_string)) {
139       // Now we parse the content of cpu_string.
140       std::sregex_iterator iter(cpu_string.begin(), cpu_string.end(), r);
141       std::sregex_iterator end;
142       while (iter != end) {
143         auto match = iter->str();
144         auto pos = match.find_first_of('-');
145         CHECK_FAIL_RETURN_UNEXPECTED(pos != std::string::npos, "Failed to parse numa node file");
146         std::string min = match.substr(0, pos);
147         std::string max = match.substr(pos + 1);
148         cpu_id_t cpu_min = static_cast<cpu_id_t>(strtol(min.data(), nullptr, kDecimal));
149         cpu_id_t cpu_max = static_cast<cpu_id_t>(strtol(max.data(), nullptr, kDecimal));
150         MS_LOG(DEBUG) << "Numa node " << numa_node << " CPU(s) : " << cpu_min << "-" << cpu_max;
151         for (int i = cpu_min; i <= cpu_max; ++i) {
152           CPU_SET(i, &cpuset);
153           ++cpu_cnt;
154         }
155         ++iter;
156       }
157     }
158     CHECK_FAIL_RETURN_UNEXPECTED(!fs.bad(), "Fail to read file: " + f.ToString());
159     fs.close();
160     // Remember which cpu is attached to this numa node.
161     numa_cpuset_.emplace(numa_node, cpuset);
162     numa_cpu_cnt_.emplace(numa_node, cpu_cnt);
163   }
164   MS_LOG(DEBUG) << "Number of numa nodes : " << numa_cpuset_.size();
165   return Status::OK();
166 }
167 
SetAffinity(const Task & tk,numa_id_t numa_node)168 Status CacheServerHW::SetAffinity(const Task &tk, numa_id_t numa_node) {
169 #if defined(__APPLE__)
170   return Status::OK();
171 #else
172   auto r = numa_cpuset_.find(numa_node);
173   if (r != numa_cpuset_.end()) {
174     auto err = pthread_setaffinity_np(tk.GetNativeHandle(), sizeof(r->second), &r->second);
175     if (err) {
176       std::string errMsg = "Unable to set affiity. Errno = " + std::to_string(errno);
177       RETURN_STATUS_UNEXPECTED(errMsg);
178     }
179   } else {
180     RETURN_STATUS_UNEXPECTED("Numa node " + std::to_string(numa_node) + " not found");
181   }
182   return Status::OK();
183 #endif
184 }
185 
GetCpuList(numa_id_t numa_id)186 std::vector<cpu_id_t> CacheServerHW::GetCpuList(numa_id_t numa_id) {
187   std::vector<cpu_id_t> v;
188   auto it = numa_cpuset_.find(numa_id);
189   if (it != numa_cpuset_.end()) {
190     auto &cpu_set = it->second;
191     for (auto i = 0; i < num_cpus_; ++i) {
192       if (CPU_ISSET(i, &cpu_set)) {
193         v.push_back(i);
194       }
195     }
196   }
197   return v;
198 }
199 
GetMyNode() const200 numa_id_t CacheServerHW::GetMyNode() const {
201 #if defined(__APPLE__)
202   numa_id_t node_id = -1;
203 #else
204   numa_id_t node_id = 0;
205   auto cpu = sched_getcpu();
206 #ifdef NUMA_ENABLED
207   node_id = numa_node_of_cpu(cpu);
208 #else
209   bool found = false;
210   for (auto it : numa_cpuset_) {
211     cpu_set_t &cpu_set = it.second;
212     if (CPU_ISSET(cpu, &cpu_set)) {
213       node_id = it.first;
214       found = true;
215       break;
216     }
217   }
218   MS_LOG(DEBUG) << "cpu id " << cpu << " found : " << std::boolalpha << found;
219 #endif  // end NUMA_ENABLED
220 #endif  // end __APPLE__
221   return node_id;
222 }
223 
InterleaveMemory(void * ptr,size_t sz)224 void CacheServerHW::InterleaveMemory(void *ptr, size_t sz) {
225 #ifdef NUMA_ENABLED
226   if (numa_enabled()) {
227     numa_interleave_memory(ptr, sz, numa_all_nodes_ptr);
228   }
229 #endif
230 }
231 
AssignToNode(numa_id_t numa_id,void * ptr,size_t sz) const232 void CacheServerHW::AssignToNode(numa_id_t numa_id, void *ptr, size_t sz) const {
233 #ifdef NUMA_ENABLED
234   if (numa_enabled()) {
235     numa_tonode_memory(ptr, sz, numa_id);
236   }
237 #endif
238 }
239 
numa_enabled()240 bool CacheServerHW::numa_enabled() {
241 #ifdef NUMA_ENABLED
242   return (numa_available() != -1);
243 #else
244   return false;
245 #endif
246 }
247 
GetAvailableMemory()248 uint64_t CacheServerHW::GetAvailableMemory() {
249   auto realpath = FileUtils::GetRealPath(kMemInfoFileName);
250   if (!realpath.has_value()) {
251     MS_LOG(ERROR) << "Get real path failed, path=" << kMemInfoFileName;
252     return 0;
253   }
254 
255   std::ifstream mem_file(realpath.value());
256   if (mem_file.fail()) {
257     MS_LOG(WARNING) << "Fail to open file: " << kMemInfoFileName;
258     return 0;
259   }
260 
261   std::string line;
262   uint64_t mem_available_in_kb = 0L;
263   while (std::getline(mem_file, line)) {
264     // get title
265     std::string::size_type position = line.find(":");
266     std::string title = line.substr(0, position);
267     // get the value of MemAvailable
268     if (title == "MemAvailable") {
269       std::string::size_type pos1 = line.find_last_of(" ");
270       std::string::size_type pos2 = line.find_last_of(" ", pos1 - 1);
271       if (pos1 != std::string::npos && pos2 != std::string::npos && line.size() > pos1) {
272         mem_available_in_kb = std::stol(line.substr(pos2, pos1 - pos2));
273       }
274       break;
275     }
276   }
277   mem_file.close();
278 
279   return mem_available_in_kb * 1024;
280 }
281 }  // namespace dataset
282 }  // namespace mindspore
283