1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (c) 2020 Cyril Hrubis <chrubis@suse.cz>
4 */
5 /*
6 * Boolean expression is evaluated in three steps.
7 *
8 * First of all the string containing the expression is tokenized. The
9 * tokenizer runs twice and we only count number of tokens in the first pass in
10 * order to simplify the memory allocation.
11 *
12 * Secondly the the expression is transformed to a postfix (RPN) notation by
13 * the shunting yard algorithm and the correctness of the expression is checked
14 * during the transformation as well. The fact that parenthesis are matched is
15 * asserted by the shunting yard algorithm itself while the rest is checked
16 * simply by checking if the preceding token is in a set of allowed tokens.
17 * This could be thought of as a simple open-coded state machine.
18 *
19 * An expression in the RPN form can be evaluated given a variable mapping
20 * function. The evaluation ignores most of errors because invalid expression
21 * will be rejected in the second step.
22 */
23
24 #include <string.h>
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include "tst_bool_expr.h"
28
char_to_op(char c)29 static int char_to_op(char c)
30 {
31 switch (c) {
32 case '(':
33 return TST_OP_LPAR;
34 case ')':
35 return TST_OP_RPAR;
36 case '&':
37 return TST_OP_AND;
38 case '|':
39 return TST_OP_OR;
40 case '!':
41 return TST_OP_NOT;
42 default:
43 return TST_OP_VAR;
44 }
45 }
46
new_tok(struct tst_expr_tok ** last,const char * tok,size_t tok_len)47 static int new_tok(struct tst_expr_tok **last, const char *tok, size_t tok_len)
48 {
49 if (!tok_len)
50 return 0;
51
52 if (!(*last))
53 return 1;
54
55 (*last)->tok = tok;
56 (*last)->tok_len = tok_len;
57 (*last)->op = char_to_op(tok[0]);
58 (*last)->priv = NULL;
59 (*last)++;
60
61 return 1;
62 }
63
tokenize(const char * expr,struct tst_expr_tok * last)64 static unsigned int tokenize(const char *expr, struct tst_expr_tok *last)
65 {
66 size_t i, j;
67 unsigned int token_cnt = 0;
68
69 for (j = i = 0; expr[i]; i++) {
70 switch (expr[i]) {
71 case '(':
72 case ')':
73 case '!':
74 case '&':
75 case '|':
76 token_cnt += new_tok(&last, &expr[j], i - j);
77 token_cnt += new_tok(&last, &expr[i], 1);
78 j = i+1;
79 break;
80 case '\t':
81 case ' ':
82 token_cnt += new_tok(&last, &expr[j], i - j);
83 j = i+1;
84 break;
85 case '"':
86 while (expr[i+1] != '"' && expr[i+1])
87 i++;
88
89 if (expr[i+1] == '"')
90 i++;
91 break;
92 default:
93 break;
94 }
95 }
96
97 token_cnt += new_tok(&last, &expr[j], i - j);
98
99 return token_cnt;
100 }
101
tst_bool_expr_print(FILE * f,struct tst_expr * expr)102 void tst_bool_expr_print(FILE *f, struct tst_expr *expr)
103 {
104 struct tst_expr_tok *i;
105 size_t j;
106
107 for (i = expr->rpn; i; i = i->next) {
108 for (j = 0; j < i->tok_len; j++)
109 putc(i->tok[j], f);
110
111 if (i->next)
112 putc(' ', f);
113 }
114 }
115
print_spaces(FILE * f,unsigned int spaces)116 static void print_spaces(FILE *f, unsigned int spaces)
117 {
118 while (spaces--)
119 putc(' ', f);
120 }
121
tst_bool_expr_err(FILE * f,struct tst_expr * expr,unsigned int cnt)122 static void tst_bool_expr_err(FILE *f, struct tst_expr *expr, unsigned int cnt)
123 {
124 unsigned int i, spaces, err_len;
125 const char *err;
126
127 fprintf(f, "%s", expr->buf->tok);
128 fprintf(f, "\n");
129
130 for (i = 0; i < cnt; i++) {
131 if (expr->buf[i].priv)
132 break;
133 }
134
135 spaces = expr->buf[i].tok - expr->buf[0].tok;
136 err = expr->buf[i].priv;
137 err_len = strlen(err);
138
139 print_spaces(f, spaces);
140
141 fprintf(f, "^\n");
142
143 if (err_len < spaces)
144 print_spaces(f, spaces - err_len + 1);
145
146 fprintf(f, "%s\n", err);
147 }
148
stack_push(struct tst_expr_tok * stack[],unsigned int * op_stack_pos,struct tst_expr_tok * op)149 static inline void stack_push(struct tst_expr_tok *stack[], unsigned int *op_stack_pos,
150 struct tst_expr_tok *op)
151 {
152 stack[(*op_stack_pos)++] = op;
153 }
154
stack_empty(unsigned int op_stack_pos)155 static inline int stack_empty(unsigned int op_stack_pos)
156 {
157 return op_stack_pos == 0;
158 }
159
stack_pop(struct tst_expr_tok * stack[],unsigned int * op_stack_pos)160 static inline struct tst_expr_tok *stack_pop(struct tst_expr_tok *stack[],
161 unsigned int *op_stack_pos)
162 {
163 if (stack_empty(*op_stack_pos))
164 return NULL;
165
166 return stack[--(*op_stack_pos)];
167 }
168
169 #define TST_OP_NONE -1
170
stack_peek_op(struct tst_expr_tok * stack[],unsigned int op_stack_pos)171 static inline int stack_peek_op(struct tst_expr_tok *stack[],
172 unsigned int op_stack_pos)
173 {
174 if (stack_empty(op_stack_pos))
175 return TST_OP_NONE;
176
177 return stack[op_stack_pos - 1]->op;
178 }
179
180 /*
181 * This is a complete list of which tokens can happen before any of:
182 * - variable
183 * - left parentesis
184 * - negation
185 *
186 * The -1 represents start of the expression.
187 */
check_one(int op)188 static inline int check_one(int op)
189 {
190 switch (op) {
191 case TST_OP_AND:
192 case TST_OP_OR:
193 case TST_OP_NOT:
194 case TST_OP_NONE:
195 case TST_OP_LPAR:
196 return 0;
197 default:
198 return 1;
199 }
200 }
201
202 /*
203 * And this checks for tokens that can happen before any of:
204 * - right parentesis
205 * - and
206 * - or
207 *
208 * This is also used to check that the last token in expression is correct one.
209 */
check_two(int op)210 static inline int check_two(int op)
211 {
212 switch (op) {
213 case TST_OP_VAR:
214 case TST_OP_RPAR:
215 return 1;
216 default:
217 return 0;
218 }
219 }
220
shunting_yard(struct tst_expr * expr,unsigned int cnt)221 static int shunting_yard(struct tst_expr *expr, unsigned int cnt)
222 {
223 struct tst_expr_tok *op_stack[cnt];
224 unsigned int op_stack_pos = 0;
225 struct tst_expr_tok *out[cnt + 1];
226 unsigned int out_pos = 0;
227 struct tst_expr_tok *i;
228 unsigned int j;
229 int prev_op = TST_OP_NONE;
230
231 for (i = expr->buf; i < &(expr->buf[cnt]); i++) {
232 switch (i->op) {
233 case TST_OP_VAR:
234 if (check_one(prev_op)) {
235 i->priv = "Expected operation";
236 goto err;
237 }
238
239 stack_push(out, &out_pos, i);
240
241 while (stack_peek_op(op_stack, op_stack_pos) == TST_OP_NOT)
242 stack_push(out, &out_pos, stack_pop(op_stack, &op_stack_pos));
243 break;
244 case TST_OP_LPAR:
245 if (check_one(prev_op)) {
246 i->priv = "Expected operation";
247 goto err;
248 }
249
250 stack_push(op_stack, &op_stack_pos, i);
251 break;
252 case TST_OP_RPAR:
253 if (!check_two(prev_op)) {
254 i->priv = "Expected variable or )";
255 goto err;
256 }
257
258 /* pop everything till ( */
259 for (;;) {
260 struct tst_expr_tok *op = stack_pop(op_stack, &op_stack_pos);
261
262 if (!op) {
263 i->priv = "Missing (";
264 goto err;
265 }
266
267 if (op->op == TST_OP_LPAR)
268 break;
269
270 stack_push(out, &out_pos, op);
271 }
272
273 while (stack_peek_op(op_stack, op_stack_pos) == TST_OP_NOT)
274 stack_push(out, &out_pos, stack_pop(op_stack, &op_stack_pos));
275 break;
276 case TST_OP_NOT:
277 if (check_one(prev_op)) {
278 i->priv = "Expected operation";
279 goto err;
280 }
281 stack_push(op_stack, &op_stack_pos, i);
282 break;
283 case TST_OP_AND:
284 case TST_OP_OR:
285 if (!check_two(prev_op)) {
286 i->priv = "Expected variable or (";
287 goto err;
288 }
289
290 /*
291 * There can be at most one binary op on the stack
292 * since we pop the one present on the stack before we
293 * attempt to push new one they so never accumulate.
294 */
295 switch (stack_peek_op(op_stack, op_stack_pos)) {
296 case TST_OP_AND:
297 case TST_OP_OR:
298 stack_push(out, &out_pos, stack_pop(op_stack, &op_stack_pos));
299 break;
300 }
301
302 stack_push(op_stack, &op_stack_pos, i);
303 break;
304 }
305
306 prev_op = i->op;
307 }
308
309 if (!check_two(expr->buf[cnt-1].op)) {
310 expr->buf[cnt-1].priv = "Unfinished expression";
311 goto err;
312 }
313
314 /* pop remaining operations */
315 while ((i = stack_pop(op_stack, &op_stack_pos))) {
316 if (i->op == TST_OP_LPAR) {
317 i->priv = "Missing )";
318 goto err;
319 }
320
321 stack_push(out, &out_pos, i);
322 }
323
324 /* construct the list */
325 out[out_pos] = NULL;
326
327 for (j = 0; j < out_pos; j++)
328 out[j]->next = out[j + 1];
329
330 expr->rpn = out[0];
331
332 return 0;
333 err:
334 return 1;
335 }
336
tst_bool_expr_parse(const char * expr)337 struct tst_expr *tst_bool_expr_parse(const char *expr)
338 {
339 struct tst_expr *ret;
340 unsigned int tok_cnt = tokenize(expr, NULL);
341
342 if (!tok_cnt)
343 return NULL;
344
345 ret = malloc(sizeof(struct tst_expr) + sizeof(struct tst_expr_tok) * tok_cnt);
346 if (!ret)
347 return NULL;
348
349 tokenize(expr, ret->buf);
350
351 if (shunting_yard(ret, tok_cnt)) {
352 tst_bool_expr_err(stderr, ret, tok_cnt);
353 tst_bool_expr_free(ret);
354 return NULL;
355 }
356
357 return ret;
358 }
359
360 #define MAX_STACK 16
361
tst_bool_expr_eval(struct tst_expr * expr,int (* map)(struct tst_expr_tok * var))362 int tst_bool_expr_eval(struct tst_expr *expr,
363 int (*map)(struct tst_expr_tok *var))
364 {
365 struct tst_expr_tok *i;
366 int stack[MAX_STACK];
367 int pos = -1;
368
369 for (i = expr->rpn; i; i = i->next) {
370 switch (i->op) {
371 case TST_OP_NOT:
372 stack[pos] = !stack[pos];
373 break;
374 case TST_OP_OR:
375 stack[pos-1] = stack[pos] || stack[pos-1];
376 pos--;
377 break;
378 case TST_OP_AND:
379 stack[pos-1] = stack[pos] && stack[pos-1];
380 pos--;
381 break;
382 case TST_OP_VAR:
383 if (pos + 1 >= MAX_STACK) {
384 fprintf(stderr, "Eval out of stack!\n");
385 return -1;
386 }
387
388 stack[++pos] = map(i);
389
390 /* map reported undefined variable -> abort */
391 if (stack[pos] == -1)
392 return -1;
393 break;
394 /* does not happen */
395 default:
396 break;
397 }
398 }
399
400 return stack[0];
401 }
402
tst_bool_expr_free(struct tst_expr * expr)403 void tst_bool_expr_free(struct tst_expr *expr)
404 {
405 free(expr);
406 }
407