1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (c) 2021 Richard Palethorpe <rpalethorpe@suse.com>
4 */
5 /*
6 * [Description]
7 *
8 * This verifies Fuzzy Sync's ability to reproduce a particular
9 * outcome to a data race when multiple races are present.
10 *
11 * We make the simplifying assumptions that:
12 * - There is one data race we want to hit and one to avoid.
13 * - Each thread contains two contiguous critical sections. One for each race.
14 * - The threads only interact through two variables(H: Hit, D: Avoid), one for each race.
15 * - If we hit the race we want to avoid then it causes thread A to exit early.
16 *
17 * We don't consider more complicated dynamic interactions between the
18 * two threads. Fuzzy Sync will eventually trigger a race so long as
19 * the delay range is large enough. Assuming the race is possible to
20 * reproduce without further tampering to increase the race window (a
21 * technique specific to each race). So I conject that beyond a lower
22 * threshold of complexity, increasing the complexity of the race is
23 * no different from adding random noise.
24 *
25 * Empirically this appears to be true. So far we have seen in
26 * reproducers that there are no more than two significant data
27 * races. One we wish to reproduce and one we wish to avoid. It is
28 * possible that the code contains multiple data races, but that they
29 * appear only as two to us.
30 *
31 * Indeed it is also only possible to add a delay to A or B. So
32 * regardless of the underlying complexity we really only have two
33 * options.
34 *
35 * Here we only test a bias to delay B. A delay of A would be
36 * identical except that the necessary delay bias would be negative.
37 *
38 */
39
40 #include "tst_test.h"
41 #include "tst_fuzzy_sync.h"
42
43 /* The time signature of a code path containing a critical section. */
44 struct window {
45 /* The delay until the start of the critical section */
46 const int critical_s;
47 /* The length of the critical section */
48 const int critical_t;
49 /* The remaining delay until the method returns */
50 const int return_t;
51 };
52
53 /* The time signatures of threads A and B. We interlace the two
54 * windows for each thread. bd.return_t is ignored, but ad.return_t is
55 * used instead of a.return_t if the ad and bd critical sections
56 * overlap. This may result in the critical section of a never being
57 * reached.
58 */
59 struct race {
60 const struct window ad;
61 const struct window a;
62 const struct window bd;
63 const struct window b;
64 };
65
66 static int H, D;
67 static struct tst_fzsync_pair pair;
68
69 /**
70 * Race 1:
71 * Thread A: |---(1)--|[1]|---(1)---|
72 * Thread B: |---(1)--|[1]|---(1)---|
73 * ad (A): |---(0)|[1]|(0)---|
74 * bd (B): |---(0)|[1]|(0)---|
75 *
76 * Race 2:
77 * Thread A: |------------------(30)------------------|[1]|---(1)---|
78 * Thread B: |---(1)--|[1]|---(1)---|
79 * ad (A): |---(0)|[1]|--(0)---|
80 * bd (B): |---(0)|[20]|--(0)---|
81 *
82 * Race 3:
83 * Thread A: |--------------------(40)--------------------|[1]|---(0)---|
84 * Thread B: |---(1)--|[1]|------------------(20)------------------|
85 * ad (A): |---(1)--|[10]|--(0)---|
86 * bd (B): |---(1)--|[10]|--(0)---|
87 */
88 static const struct race races[] = {
89 { .a = { 1, 1, 1 }, .b = { 1, 1, 1 },
90 .ad = { 0, 1, 0 }, .bd = { 0, 1, 0 } },
91
92 { .a = { 30, 1, 1 }, .b = { 1, 1, 1 },
93 .ad = { 0, 1, 0 }, .bd = { 0, 20, 0 } },
94
95 { .a = { 40, 1, 0 }, .b = { 1, 1, 20 },
96 .ad = { 1, 10, 0 }, .bd = { 1, 10, 0 } },
97 };
98
cleanup(void)99 static void cleanup(void)
100 {
101 tst_fzsync_pair_cleanup(&pair);
102 }
103
setup(void)104 static void setup(void)
105 {
106 pair.min_samples = 10000;
107
108 tst_fzsync_pair_init(&pair);
109 }
110
111 /**
112 * to_abs() - Convert relative time intervals to absolute time points
113 * @w: The input window structure containing relative time intervals
114 *
115 * This function converts relative time intervals (represented in the
116 * struct window) into absolute time points, where:
117 *
118 * - The start of the critical section is `critical_s`.
119 * - The end of the critical section is calculated as `critical_s + critical_t`.
120 * - The end of execution is calculated as `critical_s + critical_t + return_t`.
121 *
122 * Return:
123 * A new window structure (`wc`) with absolute time points.
124 */
to_abs(const struct window w)125 static struct window to_abs(const struct window w)
126 {
127 const struct window wc = {
128 w.critical_s,
129 w.critical_s + w.critical_t,
130 w.critical_s + w.critical_t + w.return_t,
131 };
132
133 return wc;
134 }
135
worker(void * v)136 static void *worker(void *v)
137 {
138 unsigned int i = *(unsigned int *)v;
139 const struct window b = to_abs(races[i].b);
140 const struct window bd = to_abs(races[i].bd);
141 int now, fin = MAX(b.return_t, bd.return_t);
142
143 while (tst_fzsync_run_b(&pair)) {
144 tst_fzsync_start_race_b(&pair);
145 for (now = 0; now <= fin; now++) {
146 if (now == b.critical_s || now == b.critical_t)
147 tst_atomic_add_return(1, &H);
148 if (now == bd.critical_s || now == bd.critical_t)
149 tst_atomic_add_return(1, &D);
150
151 sched_yield();
152 }
153 tst_fzsync_end_race_b(&pair);
154 }
155
156 return NULL;
157 }
158
run(unsigned int i)159 static void run(unsigned int i)
160 {
161 const struct window a = to_abs(races[i].a);
162 const struct window ad = to_abs(races[i].ad);
163 int critical = 0;
164 int now, fin;
165
166 tst_fzsync_pair_reset(&pair, NULL);
167 SAFE_PTHREAD_CREATE(&pair.thread_b, 0, worker, &i);
168
169 while (tst_fzsync_run_a(&pair)) {
170 H = 0;
171 D = 0;
172 fin = a.return_t;
173
174 tst_fzsync_start_race_a(&pair);
175 for (now = 0; now <= fin; now++) {
176 if (now >= ad.critical_s &&
177 now <= ad.critical_t && tst_atomic_load(&D) > 0)
178 fin = ad.return_t;
179
180 if (now >= a.critical_s &&
181 now <= a.critical_t && tst_atomic_load(&H) == 1) {
182 tst_atomic_add_return(1, &H);
183 critical++;
184 }
185
186 sched_yield();
187 }
188 tst_fzsync_end_race_a(&pair);
189
190 if (fin == ad.return_t)
191 tst_fzsync_pair_add_bias(&pair, 1);
192
193 if (critical > 100) {
194 tst_fzsync_pair_cleanup(&pair);
195 tst_atomic_store(0, &pair.exit);
196 break;
197 }
198 }
199
200 /*
201 * If `pair->exit` is true, the test may fail to meet expected
202 * results due to resource constraints in shared CI environments
203 * (e.g., GitHub Actions). Limited control over CPU allocation
204 * can cause delays or interruptions in CPU time slices due to
205 * contention with other jobs.
206 *
207 * Binding the test to a single CPU core (e.g., via `taskset -c 0`)
208 * can worsen this by increasing contention, leading to performance
209 * degradation and premature loop termination.
210 *
211 * To ensure valid and reliable results in scenarios (e.g., HW, VM, CI),
212 * it is best to ignore test result when loop termination occurs,
213 * avoiding unnecessary false positive.
214 */
215 if (pair.exit) {
216 tst_res(TCONF, "Test may not be able to generate a valid result");
217 return;
218 }
219
220 tst_res(critical > 50 ? TPASS : TFAIL, "%d| =:%-4d", i, critical);
221 }
222
223 static struct tst_test test = {
224 .tcnt = ARRAY_SIZE(races),
225 .test = run,
226 .setup = setup,
227 .cleanup = cleanup,
228 .runtime = 150,
229 };
230