1 /**
2 * Copyright 2020-2022 Huawei Technologies Co., Ltd
3
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7
8 * http://www.apache.org/licenses/LICENSE-2.0
9
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "minddata/dataset/engine/cache/cache_hw.h"
17
18 #ifdef NUMA_ENABLED
19 #include <numa.h>
20 #endif
21 #include <sched.h>
22 #include <cstdlib>
23 #include <cstring>
24 #include <cctype>
25 #include <fstream>
26 #include <regex>
27 #include <thread>
28
29 #include "utils/file_utils.h"
30 #include "minddata/dataset/util/log_adapter.h"
31
32 namespace mindspore {
33 namespace dataset {
CacheServerHW()34 CacheServerHW::CacheServerHW() {
35 num_cpus_ = std::thread::hardware_concurrency();
36 MS_LOG(DEBUG) << "Number of cpu(s) : " << num_cpus_;
37 #ifdef NUMA_ENABLED
38 if (numa_enabled()) {
39 MS_LOG(INFO) << "Numa support enabled";
40 for (auto i = 0; i <= numa_max_node(); ++i) {
41 long long free_avail = 0; // NOLINT
42 int64_t mem_avail = numa_node_size64(i, &free_avail);
43 MS_LOG(INFO) << "Total physical/free RAM in bytes at node " << i << " : " << mem_avail << "/" << free_avail;
44 }
45 }
46 #endif
47 }
48
GetTotalSystemMemory()49 int64_t CacheServerHW::GetTotalSystemMemory() {
50 auto pages = sysconf(_SC_PHYS_PAGES);
51 auto page_size = sysconf(_SC_PAGE_SIZE);
52 auto total = static_cast<int64_t>(pages) * static_cast<int64_t>(page_size);
53 MS_LOG(INFO) << "Total physical RAM in bytes: " << total;
54 return total;
55 }
56
SetDefaultMemoryPolicy(CachePoolPolicy policy)57 Status CacheServerHW::SetDefaultMemoryPolicy(CachePoolPolicy policy) {
58 #ifdef NUMA_ENABLED
59 if (numa_enabled()) {
60 // Set our default memory policy.
61 switch (policy) {
62 case kLocal:
63 numa_set_localalloc();
64 MS_LOG(DEBUG) << "Setting memory default policy to local node. Low level code may override the setting";
65 break;
66 case kInterleave:
67 numa_set_interleave_mask(numa_all_nodes_ptr);
68 MS_LOG(DEBUG) << "Numa affinity is turned off. Use interleave memory policy as default.";
69 break;
70 case kOnNode:
71 case kPreferred:
72 RETURN_STATUS_UNEXPECTED("Unsupported memory policy");
73 break;
74 case kNone:
75 default:
76 // No action taken.
77 break;
78 }
79 }
80 #endif
81 return Status::OK();
82 }
83
GetNumaNodeInfo()84 Status CacheServerHW::GetNumaNodeInfo() {
85 std::set<Path> numa_nodes_;
86 Path node(kSysNodePath);
87 auto it = Path::DirIterator::OpenDirectory(&node);
88 if (it == nullptr) {
89 MS_LOG(WARNING) << "Unable to open directory " << kSysNodePath << ". Skip scanning hardware info";
90 return Status::OK();
91 }
92 auto isdigit_string = [](const char *str) -> bool {
93 bool r = true;
94 for (size_t i = 0; i < strlen(str); ++i) {
95 if (!std::isdigit(str[i])) {
96 r = false;
97 break;
98 }
99 }
100 return r;
101 };
102 // Look for name starts with 'node' and followed by digits.
103 const char kNodeName[] = "node";
104 while (it->HasNext()) {
105 auto p = it->Next();
106 const std::string entry = p.Basename();
107 const char *name = entry.data();
108 if (strncmp(name, kNodeName, strlen(kNodeName)) == 0 && isdigit_string(name + strlen(kNodeName))) {
109 numa_nodes_.insert(p);
110 }
111 }
112 // There should be at least one. But if not found in any case, just move on the
113 // rest of the server start up.
114 if (numa_nodes_.empty()) {
115 MS_LOG(WARNING) << "No numa nodes ? Skip scanning hardware info";
116 return Status::OK();
117 }
118 // For each numa node, get a list of CPU that is associated with it.
119 const char kCpuList[] = "cpulist";
120 auto r = std::regex("[0-9]*-[0-9]*");
121 for (Path p : numa_nodes_) {
122 auto node_dir = p.Basename();
123 numa_id_t numa_node = static_cast<numa_id_t>(strtol(node_dir.data() + strlen(kNodeName), nullptr, kDecimal));
124 Path f = p / kCpuList;
125
126 auto realpath = FileUtils::GetRealPath(f.ToString().c_str());
127 if (!realpath.has_value()) {
128 MS_LOG(ERROR) << "Get real path failed, path=" << f.ToString();
129 RETURN_STATUS_UNEXPECTED("Get real path failed, path=" + f.ToString());
130 }
131
132 std::ifstream fs(realpath.value(), std::ifstream::in);
133 CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "Fail to open file: " + f.ToString());
134 std::string cpu_string;
135 cpu_set_t cpuset;
136 CPU_ZERO(&cpuset);
137 int32_t cpu_cnt = 0;
138 while (getline(fs, cpu_string)) {
139 // Now we parse the content of cpu_string.
140 std::sregex_iterator iter(cpu_string.begin(), cpu_string.end(), r);
141 std::sregex_iterator end;
142 while (iter != end) {
143 auto match = iter->str();
144 auto pos = match.find_first_of('-');
145 if (pos == std::string::npos) {
146 fs.close();
147 RETURN_STATUS_UNEXPECTED("Failed to parse numa node file");
148 }
149 std::string min = match.substr(0, pos);
150 std::string max = match.substr(pos + 1);
151 cpu_id_t cpu_min = static_cast<cpu_id_t>(strtol(min.data(), nullptr, kDecimal));
152 cpu_id_t cpu_max = static_cast<cpu_id_t>(strtol(max.data(), nullptr, kDecimal));
153 MS_LOG(DEBUG) << "Numa node " << numa_node << " CPU(s) : " << cpu_min << "-" << cpu_max;
154 for (int i = cpu_min; i <= cpu_max; ++i) {
155 CPU_SET(i, &cpuset);
156 ++cpu_cnt;
157 }
158 ++iter;
159 }
160 }
161 CHECK_FAIL_RETURN_UNEXPECTED(!fs.bad(), "Fail to read file: " + f.ToString());
162 fs.close();
163 // Remember which cpu is attached to this numa node.
164 numa_cpuset_.emplace(numa_node, cpuset);
165 numa_cpu_cnt_.emplace(numa_node, cpu_cnt);
166 }
167 MS_LOG(DEBUG) << "Number of numa nodes : " << numa_cpuset_.size();
168 return Status::OK();
169 }
170
SetAffinity(const Task & tk,numa_id_t numa_node)171 Status CacheServerHW::SetAffinity(const Task &tk, numa_id_t numa_node) {
172 #if defined(__APPLE__)
173 return Status::OK();
174 #else
175 auto r = numa_cpuset_.find(numa_node);
176 if (r != numa_cpuset_.end()) {
177 auto err = pthread_setaffinity_np(tk.GetNativeHandle(), sizeof(r->second), &r->second);
178 if (err) {
179 std::string errMsg = "Unable to set affiity. Errno = " + std::to_string(errno);
180 RETURN_STATUS_UNEXPECTED(errMsg);
181 }
182 } else {
183 RETURN_STATUS_UNEXPECTED("Numa node " + std::to_string(numa_node) + " not found");
184 }
185 return Status::OK();
186 #endif
187 }
188
GetCpuList(numa_id_t numa_id)189 std::vector<cpu_id_t> CacheServerHW::GetCpuList(numa_id_t numa_id) {
190 std::vector<cpu_id_t> v;
191 auto it = numa_cpuset_.find(numa_id);
192 if (it != numa_cpuset_.end()) {
193 auto &cpu_set = it->second;
194 for (auto i = 0; i < num_cpus_; ++i) {
195 if (CPU_ISSET(i, &cpu_set)) {
196 v.push_back(i);
197 }
198 }
199 }
200 return v;
201 }
202
GetMyNode() const203 numa_id_t CacheServerHW::GetMyNode() const {
204 #if defined(__APPLE__)
205 numa_id_t node_id = -1;
206 #else
207 numa_id_t node_id = 0;
208 auto cpu = sched_getcpu();
209 #ifdef NUMA_ENABLED
210 node_id = numa_node_of_cpu(cpu);
211 #else
212 bool found = false;
213 for (auto it : numa_cpuset_) {
214 cpu_set_t &cpu_set = it.second;
215 if (CPU_ISSET(cpu, &cpu_set)) {
216 node_id = it.first;
217 found = true;
218 break;
219 }
220 }
221 MS_LOG(DEBUG) << "cpu id " << cpu << " found : " << std::boolalpha << found;
222 #endif // end NUMA_ENABLED
223 #endif // end __APPLE__
224 return node_id;
225 }
226
InterleaveMemory(void * ptr,size_t sz)227 void CacheServerHW::InterleaveMemory(void *ptr, size_t sz) {
228 #ifdef NUMA_ENABLED
229 if (numa_enabled()) {
230 numa_interleave_memory(ptr, sz, numa_all_nodes_ptr);
231 }
232 #endif
233 }
234
AssignToNode(numa_id_t numa_id,void * ptr,size_t sz) const235 void CacheServerHW::AssignToNode(numa_id_t numa_id, void *ptr, size_t sz) const {
236 #ifdef NUMA_ENABLED
237 if (numa_enabled()) {
238 numa_tonode_memory(ptr, sz, numa_id);
239 }
240 #endif
241 }
242
numa_enabled()243 bool CacheServerHW::numa_enabled() {
244 #ifdef NUMA_ENABLED
245 return (numa_available() != -1);
246 #else
247 return false;
248 #endif
249 }
250
GetAvailableMemory()251 uint64_t CacheServerHW::GetAvailableMemory() {
252 auto realpath = FileUtils::GetRealPath(kMemInfoFileName);
253 if (!realpath.has_value()) {
254 MS_LOG(ERROR) << "Get real path failed, path=" << kMemInfoFileName;
255 return 0;
256 }
257
258 std::ifstream mem_file(realpath.value(), std::ifstream::in);
259 if (mem_file.fail()) {
260 MS_LOG(WARNING) << "Fail to open file: " << kMemInfoFileName;
261 return 0;
262 }
263
264 std::string line;
265 uint64_t mem_available_in_kb = 0L;
266 while (std::getline(mem_file, line)) {
267 // get title
268 std::string::size_type position = line.find(":");
269 std::string title = line.substr(0, position);
270 // get the value of MemAvailable
271 if (title == "MemAvailable") {
272 std::string::size_type pos1 = line.find_last_of(" ");
273 std::string::size_type pos2 = line.find_last_of(" ", pos1 - 1);
274 if (pos1 != std::string::npos && pos2 != std::string::npos && line.size() > pos1) {
275 mem_available_in_kb = static_cast<uint64_t>(std::stol(line.substr(pos2, pos1 - pos2)));
276 }
277 break;
278 }
279 }
280 mem_file.close();
281
282 return mem_available_in_kb * 1024;
283 }
284 } // namespace dataset
285 } // namespace mindspore
286