1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (c) 2023 Oracle and/or its affiliates.
4 */
5
6 /*\
7 * [Description]
8 *
9 * Stress a possible race condition between memory pages allocation
10 * and soft-offline of unrelated pages as explained in the commit:
11 * d4ae9916ea29 (mm: soft-offline: close the race against page allocation)
12 *
13 * Control that soft-offlined pages get correctly replaced: with the
14 * same content and without SIGBUS generation when accessed.
15 */
16
17 #include <errno.h>
18 #include <mntent.h>
19 #include <pthread.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <time.h>
23 #include <unistd.h>
24 #include <sys/types.h>
25 #include <sys/klog.h>
26
27 #include "tst_test.h"
28 #include "tst_safe_pthread.h"
29 #include "tst_safe_stdio.h"
30 #include "lapi/mmap.h"
31
32 #define NUM_LOOPS 5
33 #define NUM_PAGES 32
34 #define NUM_PAGES_OFFSET 5
35
36 /* Needed module to online back memory pages */
37 #define HW_MODULE "hwpoison_inject"
38
39 static pthread_t *thread_ids;
40 static int number_threads;
41 static int run_iterations;
42 static int maximum_pfns;
43
44 static volatile int sigbus_received;
45 static pthread_cond_t sigbus_received_cv;
46 static pthread_mutex_t sigbus_received_mtx = PTHREAD_MUTEX_INITIALIZER;
47
48 static long pagesize;
49 static char beginning_tag[BUFSIZ];
50 static int hwpoison_probe;
51
my_yield(void)52 static void my_yield(void)
53 {
54 static const struct timespec t0 = { 0, 0 };
55
56 nanosleep(&t0, NULL);
57 }
58
59 /* a SIGBUS received is a confirmation of test failure */
sigbus_handler(int signum LTP_ATTRIBUTE_UNUSED)60 static void sigbus_handler(int signum LTP_ATTRIBUTE_UNUSED)
61 {
62 pthread_mutex_lock(&sigbus_received_mtx);
63 sigbus_received++;
64 pthread_cond_signal(&sigbus_received_cv);
65 pthread_mutex_unlock(&sigbus_received_mtx);
66 pause();
67 }
68
sigbus_monitor(void * arg LTP_ATTRIBUTE_UNUSED)69 static void *sigbus_monitor(void *arg LTP_ATTRIBUTE_UNUSED)
70 {
71 pthread_mutex_lock(&sigbus_received_mtx);
72 while (!sigbus_received)
73 pthread_cond_wait(&sigbus_received_cv, &sigbus_received_mtx);
74 pthread_mutex_unlock(&sigbus_received_mtx);
75 tst_res(TFAIL, "SIGBUS Received");
76 exit(1);
77 }
78
79 /*
80 * Allocate a page and write a sentinel value into it.
81 */
allocate_write(int sentinel)82 static void *allocate_write(int sentinel)
83 {
84 void *p;
85 int *s;
86
87 p = SAFE_MMAP(NULL, pagesize, PROT_READ|PROT_WRITE,
88 MAP_SHARED|MAP_ANONYMOUS, -1, 0);
89 s = (int *)p;
90 *s = sentinel;
91 return p;
92 }
93
94 /*
95 * Verify and unmap the given page.
96 */
verif_unmap(void * page,int sentinel)97 static int verif_unmap(void *page, int sentinel)
98 {
99 int *s = (int *)page;
100
101 if (*s != sentinel) {
102 tst_res(TFAIL, "pid[%d]: fail: bad sentinel value seen: %d expected: %d\n", getpid(), *s, sentinel);
103 return 1;
104 }
105
106 return SAFE_MUNMAP(page, pagesize);
107 }
108
109 /*
110 * allocate_offline() - Allocate and offline test called per-thread
111 *
112 * This function does the allocation and offline by mmapping an
113 * anonymous page and offlining it.
114 */
allocate_offline(int tnum)115 static int allocate_offline(int tnum)
116 {
117 int loop;
118
119 for (loop = 0; loop < NUM_LOOPS; loop++) {
120 long *ptrs[NUM_PAGES];
121 int num_alloc;
122 int i;
123
124 for (num_alloc = 0; num_alloc < NUM_PAGES; num_alloc++) {
125
126 ptrs[num_alloc] = allocate_write((tnum << NUM_PAGES_OFFSET) | num_alloc);
127 if (ptrs[num_alloc] == NULL)
128 return -1;
129
130 if (madvise(ptrs[num_alloc], pagesize, MADV_SOFT_OFFLINE) == -1) {
131 if (errno != EINVAL)
132 tst_res(TFAIL | TERRNO, "madvise failed");
133 if (errno == EINVAL)
134 tst_res(TCONF, "madvise() didn't support MADV_SOFT_OFFLINE");
135 return errno;
136 }
137 }
138
139 for (i = 0; i < num_alloc; i++) {
140 if (verif_unmap(ptrs[i], (tnum << NUM_PAGES_OFFSET) | i) != 0)
141 return 1;
142 }
143
144 my_yield();
145 if (!tst_remaining_runtime()) {
146 tst_res(TINFO, "Thread [%d]: Test runtime is over, exiting", tnum);
147 break;
148 }
149 }
150
151 return 0;
152 }
153
alloc_mem(void * threadnum)154 static void *alloc_mem(void *threadnum)
155 {
156 int err;
157 int tnum = (int)(uintptr_t)threadnum;
158
159 /* waiting for other threads starting */
160 TST_CHECKPOINT_WAIT(0);
161
162 err = allocate_offline(tnum);
163 tst_res(TINFO,
164 "Thread [%d] returned %d, %s.", tnum, err, (err ? "failed" : "succeeded"));
165 return (void *)(uintptr_t) (err ? -1 : 0);
166 }
167
stress_alloc_offl(void)168 static void stress_alloc_offl(void)
169 {
170 int thread_index;
171 int thread_failure = 0;
172 pthread_t sigbus_monitor_t;
173
174 run_iterations++;
175
176 SAFE_PTHREAD_CREATE(&sigbus_monitor_t, NULL, sigbus_monitor, NULL);
177 pthread_detach(sigbus_monitor_t);
178
179 for (thread_index = 0; thread_index < number_threads; thread_index++) {
180 SAFE_PTHREAD_CREATE(&thread_ids[thread_index], NULL, alloc_mem,
181 (void *)(uintptr_t)thread_index);
182 }
183
184 TST_CHECKPOINT_WAKE2(0, number_threads);
185
186 for (thread_index = 0; thread_index < number_threads; thread_index++) {
187 void *status;
188
189 SAFE_PTHREAD_JOIN(thread_ids[thread_index], &status);
190 if ((intptr_t)status != 0) {
191 tst_res(TFAIL, "thread [%d] - exited with errors",
192 thread_index);
193 thread_failure++;
194 }
195 }
196
197 if (thread_failure == 0)
198 tst_res(TPASS, "soft-offline / mmap race still clean");
199 }
200
201 /*
202 * ------------
203 * Cleanup code:
204 * The idea is to retrieve all the pfn numbers that have been soft-offined
205 * (generating a "Soft offlining pfn 0x..." message in the kernel ring buffer)
206 * by the current test (since a "beginning_tag" message we write when starting).
207 * And to put these pages back online by writing the pfn number to the
208 * <debugfs>/hwpoison/unpoison-pfn special file.
209 * ------------
210 */
211 #define OFFLINE_PATTERN "Soft offlining pfn 0x"
212 #define OFFLINE_PATTERN_LEN sizeof(OFFLINE_PATTERN)
213
214 /* return the pfn if the kmsg msg is a soft-offline indication*/
parse_kmsg_soft_offlined_pfn(char * line,ssize_t len)215 static unsigned long parse_kmsg_soft_offlined_pfn(char *line, ssize_t len)
216 {
217 char *pos;
218 unsigned long addr = 0UL;
219
220 pos = strstr(line, OFFLINE_PATTERN);
221 if (pos == NULL)
222 return 0UL;
223
224 pos += OFFLINE_PATTERN_LEN-1;
225 if (pos > (line + len))
226 return 0UL;
227
228 addr = strtoul(pos, NULL, 16);
229 if ((addr == ULONG_MAX) && (errno == ERANGE))
230 return 0UL;
231
232 return addr;
233 }
234
235 /* return the pfns seen in kernel message log */
populate_from_klog(char * begin_tag,unsigned long * pfns,int max)236 static int populate_from_klog(char *begin_tag, unsigned long *pfns, int max)
237 {
238 int found = 0, fd, beginning_tag_found = 0;
239 ssize_t sz;
240 unsigned long pfn;
241 char buf[BUFSIZ];
242
243 fd = SAFE_OPEN("/dev/kmsg", O_RDONLY|O_NONBLOCK);
244
245 while (found < max) {
246 sz = read(fd, buf, sizeof(buf));
247 /* kmsg returns EPIPE if record was modified while reading */
248 if (sz < 0 && errno == EPIPE)
249 continue;
250 if (sz <= 0)
251 break;
252 if (!beginning_tag_found) {
253 if (strstr(buf, begin_tag))
254 beginning_tag_found = 1;
255 continue;
256 }
257 pfn = parse_kmsg_soft_offlined_pfn(buf, sz);
258 if (pfn)
259 pfns[found++] = pfn;
260 }
261 SAFE_CLOSE(fd);
262 return found;
263 }
264
265 /*
266 * Read the given file to search for the key.
267 * Return 1 if the key is found.
268 */
find_in_file(char * path,char * key)269 static int find_in_file(char *path, char *key)
270 {
271 char line[4096];
272 int found = 0;
273 FILE *file = SAFE_FOPEN(path, "r");
274
275 while (fgets(line, sizeof(line), file)) {
276 if (strstr(line, key)) {
277 found = 1;
278 break;
279 }
280 }
281 SAFE_FCLOSE(file);
282 return found;
283 }
284
unpoison_this_pfn(unsigned long pfn,int fd)285 static void unpoison_this_pfn(unsigned long pfn, int fd)
286 {
287 char pfn_str[19];
288
289 snprintf(pfn_str, sizeof(pfn_str), "0x%lx", pfn);
290 SAFE_WRITE(0, fd, pfn_str, strlen(pfn_str));
291 }
292
293 /* Find and open the <debugfs>/hwpoison/unpoison-pfn special file */
open_unpoison_pfn(void)294 static int open_unpoison_pfn(void)
295 {
296 char *added_file_path = "/hwpoison/unpoison-pfn";
297 const char *const cmd_modprobe[] = {"modprobe", HW_MODULE, NULL};
298 char debugfs_fp[4096];
299 struct mntent *mnt;
300 FILE *mntf;
301
302 if (!find_in_file("/proc/modules", HW_MODULE) && tst_check_builtin_driver(HW_MODULE))
303 hwpoison_probe = 1;
304
305 /* probe hwpoison only if it isn't already there */
306 if (hwpoison_probe)
307 SAFE_CMD(cmd_modprobe, NULL, NULL);
308
309 /* debugfs mount point */
310 mntf = setmntent("/etc/mtab", "r");
311 if (!mntf) {
312 tst_brk(TBROK | TERRNO, "Can't open /etc/mtab");
313 return -1;
314 }
315 while ((mnt = getmntent(mntf)) != NULL) {
316 if (strcmp(mnt->mnt_type, "debugfs") == 0) {
317 strcpy(debugfs_fp, mnt->mnt_dir);
318 strcat(debugfs_fp, added_file_path);
319 break;
320 }
321 }
322 endmntent(mntf);
323 if (!mnt)
324 return -1;
325
326 TEST(open(debugfs_fp, O_WRONLY));
327
328 if (TST_RET == -1 && TST_ERR == EPERM && tst_lockdown_enabled() > 0) {
329 tst_res(TINFO,
330 "Cannot restore soft-offlined memory due to lockdown");
331 return TST_RET;
332 }
333
334 if (TST_RET == -1) {
335 tst_brk(TBROK | TTERRNO, "open(%s) failed", debugfs_fp);
336 } else if (TST_RET < 0) {
337 tst_brk(TBROK | TTERRNO, "Invalid open() return value %ld",
338 TST_RET);
339 }
340
341 return TST_RET;
342 }
343
344 /*
345 * Get all the Offlined PFNs indicated in the dmesg output
346 * starting after the given beginning tag, and request a debugfs
347 * hwpoison/unpoison-pfn for each of them.
348 */
unpoison_pfn(char * begin_tag)349 static void unpoison_pfn(char *begin_tag)
350 {
351 unsigned long *pfns;
352 const char *const cmd_rmmod[] = {"rmmod", HW_MODULE, NULL};
353 int found_pfns, fd;
354
355 pfns = SAFE_MALLOC(sizeof(pfns) * maximum_pfns * run_iterations);
356
357 fd = open_unpoison_pfn();
358 if (fd >= 0) {
359 found_pfns = populate_from_klog(begin_tag, pfns, maximum_pfns * run_iterations);
360
361 tst_res(TINFO, "Restore %d Soft-offlined pages", found_pfns);
362 /* unpoison in reverse order */
363 while (found_pfns-- > 0)
364 unpoison_this_pfn(pfns[found_pfns], fd);
365
366 SAFE_CLOSE(fd);
367 }
368 /* remove hwpoison only if we probed it */
369 if (hwpoison_probe)
370 SAFE_CMD(cmd_rmmod, NULL, NULL);
371 }
372
373 /*
374 * Create and write a beginning tag to the kernel buffer to be used on cleanup
375 * when trying to restore the soft-offlined pages of our test run.
376 */
write_beginning_tag_to_kmsg(void)377 static void write_beginning_tag_to_kmsg(void)
378 {
379 int fd;
380
381 fd = SAFE_OPEN("/dev/kmsg", O_WRONLY);
382 snprintf(beginning_tag, sizeof(beginning_tag),
383 "Soft-offlining pages test starting (pid: %ld)",
384 (long)getpid());
385 SAFE_WRITE(1, fd, beginning_tag, strlen(beginning_tag));
386 SAFE_CLOSE(fd);
387 }
388
setup(void)389 static void setup(void)
390 {
391 struct sigaction my_sigaction;
392
393 number_threads = (int)sysconf(_SC_NPROCESSORS_ONLN) * 2;
394 if (number_threads <= 1)
395 number_threads = 2;
396 else if (number_threads > 5)
397 number_threads = 5;
398
399 maximum_pfns = number_threads * NUM_LOOPS * NUM_PAGES;
400 thread_ids = SAFE_MALLOC(sizeof(pthread_t) * number_threads);
401 pagesize = sysconf(_SC_PAGESIZE);
402
403 /* SIGBUS is the main failure criteria */
404 my_sigaction.sa_handler = sigbus_handler;
405 if (sigaction(SIGBUS, &my_sigaction, NULL) == -1)
406 tst_res(TFAIL | TERRNO, "Signal handler attach failed");
407
408 write_beginning_tag_to_kmsg();
409 tst_res(TINFO, "Spawning %d threads, with a total of %d memory pages",
410 number_threads, maximum_pfns);
411 }
412
cleanup(void)413 static void cleanup(void)
414 {
415 unpoison_pfn(beginning_tag);
416 }
417
418 static struct tst_test test = {
419 .needs_root = 1,
420 .needs_drivers = (const char *const []) {
421 HW_MODULE,
422 NULL
423 },
424 .needs_cmds = (const char *[]) {
425 "modprobe",
426 "rmmod",
427 NULL
428 },
429 .max_runtime = 30,
430 .needs_checkpoints = 1,
431 .setup = setup,
432 .cleanup = cleanup,
433 .test_all = stress_alloc_offl,
434 .tags = (const struct tst_tag[]) {
435 {"linux-git", "d4ae9916ea29"},
436 {}
437 }
438 };
439