• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2021 Richard Palethorpe <rpalethorpe@suse.com>
4  */
5 /*
6  * [Description]
7  *
8  * This verifies Fuzzy Sync's ability to reproduce a particular
9  * outcome to a data race when multiple races are present.
10  *
11  * We make the simplifying assumptions that:
12  * - There is one data race we want to hit and one to avoid.
13  * - Each thread contains two contiguous critical sections. One for each race.
14  * - The threads only interact through two variables(H: Hit, D: Avoid), one for each race.
15  * - If we hit the race we want to avoid then it causes thread A to exit early.
16  *
17  * We don't consider more complicated dynamic interactions between the
18  * two threads. Fuzzy Sync will eventually trigger a race so long as
19  * the delay range is large enough. Assuming the race is possible to
20  * reproduce without further tampering to increase the race window (a
21  * technique specific to each race). So I conject that beyond a lower
22  * threshold of complexity, increasing the complexity of the race is
23  * no different from adding random noise.
24  *
25  * Empirically this appears to be true. So far we have seen in
26  * reproducers that there are no more than two significant data
27  * races. One we wish to reproduce and one we wish to avoid. It is
28  * possible that the code contains multiple data races, but that they
29  * appear only as two to us.
30  *
31  * Indeed it is also only possible to add a delay to A or B. So
32  * regardless of the underlying complexity we really only have two
33  * options.
34  *
35  * Here we only test a bias to delay B. A delay of A would be
36  * identical except that the necessary delay bias would be negative.
37  *
38  */
39 
40 #include "tst_test.h"
41 #include "tst_fuzzy_sync.h"
42 
43 /* The time signature of a code path containing a critical section. */
44 struct window {
45 	/* The delay until the start of the critical section */
46 	const int critical_s;
47 	/* The length of the critical section */
48 	const int critical_t;
49 	/* The remaining delay until the method returns */
50 	const int return_t;
51 };
52 
53 /* The time signatures of threads A and B. We interlace the two
54  * windows for each thread. bd.return_t is ignored, but ad.return_t is
55  * used instead of a.return_t if the ad and bd critical sections
56  * overlap. This may result in the critical section of a never being
57  * reached.
58  */
59 struct race {
60 	const struct window ad;
61 	const struct window a;
62 	const struct window bd;
63 	const struct window b;
64 };
65 
66 static int H, D;
67 static struct tst_fzsync_pair pair;
68 
69 /**
70  * Race 1:
71  *  Thread A:   |---(1)--|[1]|---(1)---|
72  *  Thread B:   |---(1)--|[1]|---(1)---|
73  *  ad (A):     |---(0)|[1]|(0)---|
74  *  bd (B):     |---(0)|[1]|(0)---|
75  *
76  * Race 2:
77  *  Thread A:   |------------------(30)------------------|[1]|---(1)---|
78  *  Thread B:   |---(1)--|[1]|---(1)---|
79  *  ad (A):     |---(0)|[1]|--(0)---|
80  *  bd (B):     |---(0)|[20]|--(0)---|
81  *
82  * Race 3:
83  *  Thread A:   |--------------------(40)--------------------|[1]|---(0)---|
84  *  Thread B:   |---(1)--|[1]|------------------(20)------------------|
85  *  ad (A):     |---(1)--|[10]|--(0)---|
86  *  bd (B):     |---(1)--|[10]|--(0)---|
87  */
88 static const struct race races[] = {
89 	{ .a =  { 1, 1, 1 }, .b =  { 1, 1, 1 },
90 	  .ad = { 0, 1, 0 }, .bd = { 0, 1, 0 } },
91 
92 	{ .a =  { 30, 1, 1 }, .b =  { 1, 1,  1 },
93 	  .ad = { 0,  1, 0 }, .bd = { 0, 20, 0 } },
94 
95 	{ .a =  { 40, 1,  0 }, .b =  { 1, 1,  20 },
96 	  .ad = { 1,  10, 0 }, .bd = { 1, 10, 0 } },
97 };
98 
cleanup(void)99 static void cleanup(void)
100 {
101 	tst_fzsync_pair_cleanup(&pair);
102 }
103 
setup(void)104 static void setup(void)
105 {
106 	pair.min_samples = 10000;
107 
108 	tst_fzsync_pair_init(&pair);
109 }
110 
111 /**
112  * to_abs() - Convert relative time intervals to absolute time points
113  * @w: The input window structure containing relative time intervals
114  *
115  * This function converts relative time intervals (represented in the
116  * struct window) into absolute time points, where:
117  *
118  * - The start of the critical section is `critical_s`.
119  * - The end of the critical section is calculated as `critical_s + critical_t`.
120  * - The end of execution is calculated as `critical_s + critical_t + return_t`.
121  *
122  * Return:
123  * A new window structure (`wc`) with absolute time points.
124  */
to_abs(const struct window w)125 static struct window to_abs(const struct window w)
126 {
127 	const struct window wc = {
128 		w.critical_s,
129 		w.critical_s + w.critical_t,
130 		w.critical_s + w.critical_t + w.return_t,
131 	};
132 
133 	return wc;
134 }
135 
worker(void * v)136 static void *worker(void *v)
137 {
138 	unsigned int i = *(unsigned int *)v;
139 	const struct window b = to_abs(races[i].b);
140 	const struct window bd = to_abs(races[i].bd);
141 	int now, fin = MAX(b.return_t, bd.return_t);
142 
143 	while (tst_fzsync_run_b(&pair)) {
144 		tst_fzsync_start_race_b(&pair);
145 		for (now = 0; now <= fin; now++) {
146 			if (now == b.critical_s || now == b.critical_t)
147 				tst_atomic_add_return(1, &H);
148 			if (now == bd.critical_s || now == bd.critical_t)
149 				tst_atomic_add_return(1, &D);
150 
151 			sched_yield();
152 		}
153 		tst_fzsync_end_race_b(&pair);
154 	}
155 
156 	return NULL;
157 }
158 
run(unsigned int i)159 static void run(unsigned int i)
160 {
161 	const struct window a = to_abs(races[i].a);
162 	const struct window ad = to_abs(races[i].ad);
163 	int critical = 0;
164 	int now, fin;
165 
166 	tst_fzsync_pair_reset(&pair, NULL);
167 	SAFE_PTHREAD_CREATE(&pair.thread_b, 0, worker, &i);
168 
169 	while (tst_fzsync_run_a(&pair)) {
170 		H = 0;
171 		D = 0;
172 		fin = a.return_t;
173 
174 		tst_fzsync_start_race_a(&pair);
175 		for (now = 0; now <= fin; now++) {
176 			if (now >= ad.critical_s &&
177 			    now <= ad.critical_t && tst_atomic_load(&D) > 0)
178 				fin = ad.return_t;
179 
180 			if (now >= a.critical_s &&
181 			    now <= a.critical_t && tst_atomic_load(&H) == 1) {
182 				tst_atomic_add_return(1, &H);
183 				critical++;
184 			}
185 
186 			sched_yield();
187 		}
188 		tst_fzsync_end_race_a(&pair);
189 
190 		if (fin == ad.return_t)
191 			tst_fzsync_pair_add_bias(&pair, 1);
192 
193 		if (critical > 100) {
194 			tst_fzsync_pair_cleanup(&pair);
195 			tst_atomic_store(0, &pair.exit);
196 			break;
197 		}
198 	}
199 
200 	/*
201 	 * If `pair->exit` is true, the test may fail to meet expected
202 	 * results due to resource constraints in shared CI environments
203 	 * (e.g., GitHub Actions). Limited control over CPU allocation
204 	 * can cause delays or interruptions in CPU time slices due to
205 	 * contention with other jobs.
206 	 *
207 	 * Binding the test to a single CPU core (e.g., via `taskset -c 0`)
208 	 * can worsen this by increasing contention, leading to performance
209 	 * degradation and premature loop termination.
210 	 *
211 	 * To ensure valid and reliable results in scenarios (e.g., HW, VM, CI),
212 	 * it is best to ignore test result when loop termination occurs,
213 	 * avoiding unnecessary false positive.
214 	 */
215 	if (pair.exit) {
216 		tst_res(TCONF, "Test may not be able to generate a valid result");
217 		return;
218 	}
219 
220 	tst_res(critical > 50 ? TPASS : TFAIL, "%d| =:%-4d", i, critical);
221 }
222 
223 static struct tst_test test = {
224 	.tcnt = ARRAY_SIZE(races),
225 	.test = run,
226 	.setup = setup,
227 	.cleanup = cleanup,
228 	.runtime = 150,
229 };
230