1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7
8 * http://www.apache.org/licenses/LICENSE-2.0
9
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "minddata/dataset/engine/cache/cache_hw.h"
17
18 #ifdef NUMA_ENABLED
19 #include <numa.h>
20 #endif
21 #include <sched.h>
22 #include <cstdlib>
23 #include <cstring>
24 #include <cctype>
25 #include <fstream>
26 #include <regex>
27 #include <thread>
28
29 #include "utils/file_utils.h"
30 #include "utils/log_adapter.h"
31
32 namespace mindspore {
33 namespace dataset {
CacheServerHW()34 CacheServerHW::CacheServerHW() {
35 num_cpus_ = std::thread::hardware_concurrency();
36 MS_LOG(DEBUG) << "Number of cpu(s) : " << num_cpus_;
37 #ifdef NUMA_ENABLED
38 if (numa_enabled()) {
39 MS_LOG(INFO) << "Numa support enabled";
40 for (auto i = 0; i <= numa_max_node(); ++i) {
41 long long free_avail = 0; // NOLINT
42 int64_t mem_avail = numa_node_size64(i, &free_avail);
43 MS_LOG(INFO) << "Total physical/free RAM in bytes at node " << i << " : " << mem_avail << "/" << free_avail;
44 }
45 }
46 #endif
47 }
48
GetTotalSystemMemory()49 int64_t CacheServerHW::GetTotalSystemMemory() {
50 auto pages = sysconf(_SC_PHYS_PAGES);
51 auto page_size = sysconf(_SC_PAGE_SIZE);
52 auto total = static_cast<int64_t>(pages) * static_cast<int64_t>(page_size);
53 MS_LOG(INFO) << "Total physical RAM in bytes: " << total;
54 return total;
55 }
56
SetDefaultMemoryPolicy(CachePoolPolicy policy)57 Status CacheServerHW::SetDefaultMemoryPolicy(CachePoolPolicy policy) {
58 #ifdef NUMA_ENABLED
59 if (numa_enabled()) {
60 // Set our default memory policy.
61 switch (policy) {
62 case kLocal:
63 numa_set_localalloc();
64 MS_LOG(DEBUG) << "Setting memory default policy to local node. Low level code may override the setting";
65 break;
66 case kInterleave:
67 numa_set_interleave_mask(numa_all_nodes_ptr);
68 MS_LOG(DEBUG) << "Numa affinity is turned off. Use interleave memory policy as default.";
69 break;
70 case kOnNode:
71 case kPreferred:
72 RETURN_STATUS_UNEXPECTED("Unsupported memory policy");
73 break;
74 case kNone:
75 default:
76 // No action taken.
77 break;
78 }
79 }
80 #endif
81 return Status::OK();
82 }
83
GetNumaNodeInfo()84 Status CacheServerHW::GetNumaNodeInfo() {
85 std::set<Path> numa_nodes_;
86 Path node(kSysNodePath);
87 auto it = Path::DirIterator::OpenDirectory(&node);
88 if (it == nullptr) {
89 MS_LOG(WARNING) << "Unable to open directory " << kSysNodePath << ". Skip scanning hardware info";
90 return Status::OK();
91 }
92 auto isdigit_string = [](const char *str) -> bool {
93 bool r = true;
94 for (size_t i = 0; i < strlen(str); ++i) {
95 if (!std::isdigit(str[i])) {
96 r = false;
97 break;
98 }
99 }
100 return r;
101 };
102 // Look for name starts with 'node' and followed by digits.
103 const char kNodeName[] = "node";
104 while (it->HasNext()) {
105 auto p = it->Next();
106 const std::string entry = p.Basename();
107 const char *name = entry.data();
108 if (strncmp(name, kNodeName, strlen(kNodeName)) == 0 && isdigit_string(name + strlen(kNodeName))) {
109 numa_nodes_.insert(p);
110 }
111 }
112 // There should be at least one. But if not found in any case, just move on the
113 // rest of the server start up.
114 if (numa_nodes_.empty()) {
115 MS_LOG(WARNING) << "No numa nodes ? Skip scanning hardware info";
116 return Status::OK();
117 }
118 // For each numa node, get a list of CPU that is associated with it.
119 const char kCpuList[] = "cpulist";
120 auto r = std::regex("[0-9]*-[0-9]*");
121 for (Path p : numa_nodes_) {
122 auto node_dir = p.Basename();
123 numa_id_t numa_node = static_cast<numa_id_t>(strtol(node_dir.data() + strlen(kNodeName), nullptr, kDecimal));
124 Path f = p / kCpuList;
125
126 auto realpath = FileUtils::GetRealPath(f.ToString().data());
127 if (!realpath.has_value()) {
128 MS_LOG(ERROR) << "Get real path failed, path=" << f.ToString();
129 RETURN_STATUS_UNEXPECTED("Get real path failed, path=" + f.ToString());
130 }
131
132 std::ifstream fs(realpath.value());
133 CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "Fail to open file: " + f.ToString());
134 std::string cpu_string;
135 cpu_set_t cpuset;
136 CPU_ZERO(&cpuset);
137 int32_t cpu_cnt = 0;
138 while (getline(fs, cpu_string)) {
139 // Now we parse the content of cpu_string.
140 std::sregex_iterator iter(cpu_string.begin(), cpu_string.end(), r);
141 std::sregex_iterator end;
142 while (iter != end) {
143 auto match = iter->str();
144 auto pos = match.find_first_of('-');
145 CHECK_FAIL_RETURN_UNEXPECTED(pos != std::string::npos, "Failed to parse numa node file");
146 std::string min = match.substr(0, pos);
147 std::string max = match.substr(pos + 1);
148 cpu_id_t cpu_min = static_cast<cpu_id_t>(strtol(min.data(), nullptr, kDecimal));
149 cpu_id_t cpu_max = static_cast<cpu_id_t>(strtol(max.data(), nullptr, kDecimal));
150 MS_LOG(DEBUG) << "Numa node " << numa_node << " CPU(s) : " << cpu_min << "-" << cpu_max;
151 for (int i = cpu_min; i <= cpu_max; ++i) {
152 CPU_SET(i, &cpuset);
153 ++cpu_cnt;
154 }
155 ++iter;
156 }
157 }
158 CHECK_FAIL_RETURN_UNEXPECTED(!fs.bad(), "Fail to read file: " + f.ToString());
159 fs.close();
160 // Remember which cpu is attached to this numa node.
161 numa_cpuset_.emplace(numa_node, cpuset);
162 numa_cpu_cnt_.emplace(numa_node, cpu_cnt);
163 }
164 MS_LOG(DEBUG) << "Number of numa nodes : " << numa_cpuset_.size();
165 return Status::OK();
166 }
167
SetAffinity(const Task & tk,numa_id_t numa_node)168 Status CacheServerHW::SetAffinity(const Task &tk, numa_id_t numa_node) {
169 #if defined(__APPLE__)
170 return Status::OK();
171 #else
172 auto r = numa_cpuset_.find(numa_node);
173 if (r != numa_cpuset_.end()) {
174 auto err = pthread_setaffinity_np(tk.GetNativeHandle(), sizeof(r->second), &r->second);
175 if (err) {
176 std::string errMsg = "Unable to set affiity. Errno = " + std::to_string(errno);
177 RETURN_STATUS_UNEXPECTED(errMsg);
178 }
179 } else {
180 RETURN_STATUS_UNEXPECTED("Numa node " + std::to_string(numa_node) + " not found");
181 }
182 return Status::OK();
183 #endif
184 }
185
GetCpuList(numa_id_t numa_id)186 std::vector<cpu_id_t> CacheServerHW::GetCpuList(numa_id_t numa_id) {
187 std::vector<cpu_id_t> v;
188 auto it = numa_cpuset_.find(numa_id);
189 if (it != numa_cpuset_.end()) {
190 auto &cpu_set = it->second;
191 for (auto i = 0; i < num_cpus_; ++i) {
192 if (CPU_ISSET(i, &cpu_set)) {
193 v.push_back(i);
194 }
195 }
196 }
197 return v;
198 }
199
GetMyNode() const200 numa_id_t CacheServerHW::GetMyNode() const {
201 #if defined(__APPLE__)
202 numa_id_t node_id = -1;
203 #else
204 numa_id_t node_id = 0;
205 auto cpu = sched_getcpu();
206 #ifdef NUMA_ENABLED
207 node_id = numa_node_of_cpu(cpu);
208 #else
209 bool found = false;
210 for (auto it : numa_cpuset_) {
211 cpu_set_t &cpu_set = it.second;
212 if (CPU_ISSET(cpu, &cpu_set)) {
213 node_id = it.first;
214 found = true;
215 break;
216 }
217 }
218 MS_LOG(DEBUG) << "cpu id " << cpu << " found : " << std::boolalpha << found;
219 #endif // end NUMA_ENABLED
220 #endif // end __APPLE__
221 return node_id;
222 }
223
InterleaveMemory(void * ptr,size_t sz)224 void CacheServerHW::InterleaveMemory(void *ptr, size_t sz) {
225 #ifdef NUMA_ENABLED
226 if (numa_enabled()) {
227 numa_interleave_memory(ptr, sz, numa_all_nodes_ptr);
228 }
229 #endif
230 }
231
AssignToNode(numa_id_t numa_id,void * ptr,size_t sz) const232 void CacheServerHW::AssignToNode(numa_id_t numa_id, void *ptr, size_t sz) const {
233 #ifdef NUMA_ENABLED
234 if (numa_enabled()) {
235 numa_tonode_memory(ptr, sz, numa_id);
236 }
237 #endif
238 }
239
numa_enabled()240 bool CacheServerHW::numa_enabled() {
241 #ifdef NUMA_ENABLED
242 return (numa_available() != -1);
243 #else
244 return false;
245 #endif
246 }
247
GetAvailableMemory()248 uint64_t CacheServerHW::GetAvailableMemory() {
249 auto realpath = FileUtils::GetRealPath(kMemInfoFileName);
250 if (!realpath.has_value()) {
251 MS_LOG(ERROR) << "Get real path failed, path=" << kMemInfoFileName;
252 return 0;
253 }
254
255 std::ifstream mem_file(realpath.value());
256 if (mem_file.fail()) {
257 MS_LOG(WARNING) << "Fail to open file: " << kMemInfoFileName;
258 return 0;
259 }
260
261 std::string line;
262 uint64_t mem_available_in_kb = 0L;
263 while (std::getline(mem_file, line)) {
264 // get title
265 std::string::size_type position = line.find(":");
266 std::string title = line.substr(0, position);
267 // get the value of MemAvailable
268 if (title == "MemAvailable") {
269 std::string::size_type pos1 = line.find_last_of(" ");
270 std::string::size_type pos2 = line.find_last_of(" ", pos1 - 1);
271 if (pos1 != std::string::npos && pos2 != std::string::npos && line.size() > pos1) {
272 mem_available_in_kb = std::stol(line.substr(pos2, pos1 - pos2));
273 }
274 break;
275 }
276 }
277 mem_file.close();
278
279 return mem_available_in_kb * 1024;
280 }
281 } // namespace dataset
282 } // namespace mindspore
283