1 /*
2 * Copyright (C) 2012 Linux Test Project, Inc.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include "config.h"
20 #include <errno.h>
21 #if HAVE_NUMA_H
22 #include <numa.h>
23 #endif
24 #if HAVE_NUMAIF_H
25 #include <numaif.h>
26 #endif
27 #include <stdarg.h>
28 #include <stdio.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include <unistd.h>
32 #include <errno.h>
33
34 #include "test.h"
35 #include "safe_macros.h"
36 #include "numa_helper.h"
37 #include "linux_syscall_numbers.h"
38
get_max_node(void)39 unsigned long get_max_node(void)
40 {
41 unsigned long max_node = 0;
42 #if HAVE_NUMA_H
43 #if !defined(LIBNUMA_API_VERSION) || LIBNUMA_API_VERSION < 2
44 max_node = NUMA_NUM_NODES;
45 /*
46 * NUMA_NUM_NODES is not reliable, libnuma >=2 is looking
47 * at /proc/self/status to figure out correct number.
48 * If buffer is not large enough get_mempolicy will fail with EINVAL.
49 */
50 if (max_node < 1024)
51 max_node = 1024;
52 #else
53 max_node = numa_max_possible_node() + 1;
54 #endif
55 #endif /* HAVE_NUMA_H */
56 return max_node;
57 }
58
59 #if HAVE_NUMA_H
get_nodemask_allnodes(nodemask_t * nodemask,unsigned long max_node)60 static void get_nodemask_allnodes(nodemask_t * nodemask, unsigned long max_node)
61 {
62 unsigned long nodemask_size = max_node / 8;
63 int i;
64 char fn[64];
65 struct stat st;
66
67 memset(nodemask, 0, nodemask_size);
68 for (i = 0; i < max_node; i++) {
69 sprintf(fn, "/sys/devices/system/node/node%d", i);
70 if (stat(fn, &st) == 0)
71 nodemask_set(nodemask, i);
72 }
73 }
74
filter_nodemask_mem(nodemask_t * nodemask,unsigned long max_node)75 static int filter_nodemask_mem(nodemask_t * nodemask, unsigned long max_node)
76 {
77 #if MPOL_F_MEMS_ALLOWED
78 unsigned long nodemask_size = max_node / 8;
79 memset(nodemask, 0, nodemask_size);
80 /*
81 * avoid numa_get_mems_allowed(), because of bug in getpol()
82 * utility function in older versions:
83 * http://www.spinics.net/lists/linux-numa/msg00849.html
84 *
85 * At the moment numa_available() implementation also uses
86 * get_mempolicy, but let's make explicit check for ENOSYS
87 * here as well in case it changes in future. Silent ignore
88 * of ENOSYS is OK, because without NUMA caller gets empty
89 * set of nodes anyway.
90 */
91 if (syscall(__NR_get_mempolicy, NULL, nodemask->n,
92 max_node, 0, MPOL_F_MEMS_ALLOWED) < 0) {
93 if (errno == ENOSYS)
94 return 0;
95 return -2;
96 }
97 #else
98 int i;
99 /*
100 * old libnuma/kernel don't have MPOL_F_MEMS_ALLOWED, so let's assume
101 * that we can use any node with memory > 0
102 */
103 for (i = 0; i < max_node; i++) {
104 if (!nodemask_isset(nodemask, i))
105 continue;
106 if (numa_node_size64(i, NULL) <= 0)
107 nodemask_clr(nodemask, i);
108 }
109 #endif /* MPOL_F_MEMS_ALLOWED */
110 return 0;
111 }
112
cpumask_has_cpus(char * cpumask,size_t len)113 static int cpumask_has_cpus(char *cpumask, size_t len)
114 {
115 int j;
116 for (j = 0; j < len; j++)
117 if (cpumask[j] == '\0')
118 return 0;
119 else if ((cpumask[j] > '0' && cpumask[j] <= '9') ||
120 (cpumask[j] >= 'a' && cpumask[j] <= 'f'))
121 return 1;
122 return 0;
123
124 }
125
filter_nodemask_cpu(nodemask_t * nodemask,unsigned long max_node)126 static void filter_nodemask_cpu(nodemask_t * nodemask, unsigned long max_node)
127 {
128 char *cpumask = NULL;
129 char fn[64];
130 FILE *f;
131 size_t len;
132 int i, ret;
133
134 for (i = 0; i < max_node; i++) {
135 if (!nodemask_isset(nodemask, i))
136 continue;
137 sprintf(fn, "/sys/devices/system/node/node%d/cpumap", i);
138 f = fopen(fn, "r");
139 if (f) {
140 ret = getdelim(&cpumask, &len, '\n', f);
141 if ((ret > 0) && (!cpumask_has_cpus(cpumask, len)))
142 nodemask_clr(nodemask, i);
143 fclose(f);
144 }
145 }
146 free(cpumask);
147 }
148 #endif /* HAVE_NUMA_H */
149
150 /*
151 * get_allowed_nodes_arr - get number and array of available nodes
152 * @num_nodes: pointer where number of available nodes will be stored
153 * @nodes: array of available node ids, this is MPOL_F_MEMS_ALLOWED
154 * node bitmask compacted (without holes), so that each field
155 * contains node number. If NULL only num_nodes is
156 * returned, otherwise it cotains new allocated array,
157 * which caller is responsible to free.
158 * RETURNS:
159 * 0 on success
160 * -1 on allocation failure
161 * -2 on get_mempolicy failure
162 */
get_allowed_nodes_arr(int flag,int * num_nodes,int ** nodes)163 int get_allowed_nodes_arr(int flag, int *num_nodes, int **nodes)
164 {
165 int ret = 0;
166 #if HAVE_NUMA_H
167 int i;
168 nodemask_t *nodemask = NULL;
169 #endif
170 *num_nodes = 0;
171 if (nodes)
172 *nodes = NULL;
173
174 #if HAVE_NUMA_H
175 unsigned long max_node, nodemask_size;
176
177 if (numa_available() == -1)
178 return 0;
179
180 max_node = LTP_ALIGN(get_max_node(), sizeof(unsigned long)*8);
181 nodemask_size = max_node / 8;
182
183 nodemask = malloc(nodemask_size);
184 if (nodes)
185 *nodes = malloc(sizeof(int) * max_node);
186
187 do {
188 if (nodemask == NULL || (nodes && (*nodes == NULL))) {
189 ret = -1;
190 break;
191 }
192
193 /* allow all nodes at start, then filter based on flags */
194 get_nodemask_allnodes(nodemask, max_node);
195 if ((flag & NH_MEMS) == NH_MEMS) {
196 ret = filter_nodemask_mem(nodemask, max_node);
197 if (ret < 0)
198 break;
199 }
200 if ((flag & NH_CPUS) == NH_CPUS)
201 filter_nodemask_cpu(nodemask, max_node);
202
203 for (i = 0; i < max_node; i++) {
204 if (nodemask_isset(nodemask, i)) {
205 if (nodes)
206 (*nodes)[*num_nodes] = i;
207 (*num_nodes)++;
208 }
209 }
210 } while (0);
211 free(nodemask);
212 #endif
213 return ret;
214 }
215
216 /*
217 * get_allowed_nodes - convenience function to get fixed number of nodes
218 * @count: how many nodes to get
219 * @...: int pointers, where node ids will be stored
220 * RETURNS:
221 * 0 on success
222 * -1 on allocation failure
223 * -2 on get_mempolicy failure
224 * -3 on not enough allowed nodes
225 */
get_allowed_nodes(int flag,int count,...)226 int get_allowed_nodes(int flag, int count, ...)
227 {
228 int ret;
229 int i, *nodep;
230 va_list ap;
231 int num_nodes = 0;
232 int *nodes = NULL;
233
234 ret = get_allowed_nodes_arr(flag, &num_nodes, &nodes);
235 if (ret < 0)
236 return ret;
237
238 va_start(ap, count);
239 for (i = 0; i < count; i++) {
240 nodep = va_arg(ap, int *);
241 if (i < num_nodes) {
242 *nodep = nodes[i];
243 } else {
244 ret = -3;
245 errno = EINVAL;
246 break;
247 }
248 }
249 free(nodes);
250 va_end(ap);
251
252 return ret;
253 }
254
print_node_info(int flag)255 static void print_node_info(int flag)
256 {
257 int *allowed_nodes = NULL;
258 int i, ret, num_nodes;
259
260 ret = get_allowed_nodes_arr(flag, &num_nodes, &allowed_nodes);
261 printf("nodes (flag=%d): ", flag);
262 if (ret == 0) {
263 for (i = 0; i < num_nodes; i++)
264 printf("%d ", allowed_nodes[i]);
265 printf("\n");
266 } else
267 printf("error(%d)\n", ret);
268 free(allowed_nodes);
269 }
270
271 /*
272 * nh_dump_nodes - dump info about nodes to stdout
273 */
nh_dump_nodes(void)274 void nh_dump_nodes(void)
275 {
276 print_node_info(0);
277 print_node_info(NH_MEMS);
278 print_node_info(NH_CPUS);
279 print_node_info(NH_MEMS | NH_CPUS);
280 }
281
282 /*
283 * is_numa - judge a system is NUMA system or not
284 * @flag: NH_MEMS and/or NH_CPUS
285 * @min_nodes: find at least 'min_nodes' nodes with memory
286 * NOTE: the function is designed to try to find at least 'min_nodes'
287 * available nodes, where each node contains memory.
288 * WARN: Don't use this func in child, as it calls tst_brkm()
289 * RETURNS:
290 * 0 - it's not a NUMA system
291 * 1 - it's a NUMA system
292 */
is_numa(void (* cleanup_fn)(void),int flag,int min_nodes)293 int is_numa(void (*cleanup_fn)(void), int flag, int min_nodes)
294 {
295 int ret;
296 int numa_nodes = 0;
297
298 ret = get_allowed_nodes_arr(flag, &numa_nodes, NULL);
299 if (ret < 0)
300 tst_brkm(TBROK | TERRNO, cleanup_fn, "get_allowed_nodes_arr");
301
302 if (numa_nodes >= min_nodes)
303 return 1;
304 else
305 return 0;
306 }
307