1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (c) 2020 Cyril Hrubis <chrubis@suse.cz>
4 */
5 /*
6 * Boolean expression is evaluated in three steps.
7 *
8 * First of all the string containing the expression is tokenized. The
9 * tokenizer runs twice and we only count number of tokens in the first pass in
10 * order to simplify the memory allocation.
11 *
12 * Secondly the the expression is transformed to a postfix (RPN) notation by
13 * the shunting yard algorithm and the correctness of the expression is checked
14 * during the transformation as well. The fact that parenthesis are matched is
15 * asserted by the shunting yard algorithm itself while the rest is checked
16 * simply by checking if the preceding token is in a set of allowed tokens.
17 * This could be thought of as a simple open-coded state machine.
18 *
19 * An expression in the RPN form can be evaluated given a variable mapping
20 * function. The evaluation ignores most of errors because invalid expression
21 * will be rejected in the second step.
22 */
23
24 #include <string.h>
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include "tst_bool_expr.h"
28
char_to_op(char c)29 static int char_to_op(char c)
30 {
31 switch (c) {
32 case '(':
33 return TST_OP_LPAR;
34 case ')':
35 return TST_OP_RPAR;
36 case '&':
37 return TST_OP_AND;
38 case '|':
39 return TST_OP_OR;
40 case '!':
41 return TST_OP_NOT;
42 default:
43 return TST_OP_VAR;
44 }
45 }
46
new_tok(struct tst_expr_tok ** last,const char * tok,size_t tok_len)47 static int new_tok(struct tst_expr_tok **last, const char *tok, size_t tok_len)
48 {
49 if (!tok_len)
50 return 0;
51
52 if (!(*last))
53 return 1;
54
55 (*last)->tok = tok;
56 (*last)->tok_len = tok_len;
57 (*last)->op = char_to_op(tok[0]);
58 (*last)++;
59
60 return 1;
61 }
62
tokenize(const char * expr,struct tst_expr_tok * last)63 static unsigned int tokenize(const char *expr, struct tst_expr_tok *last)
64 {
65 size_t i, j;
66 unsigned int token_cnt = 0;
67
68 for (j = i = 0; expr[i]; i++) {
69 switch (expr[i]) {
70 case '(':
71 case ')':
72 case '!':
73 case '&':
74 case '|':
75 token_cnt += new_tok(&last, &expr[j], i - j);
76 token_cnt += new_tok(&last, &expr[i], 1);
77 j = i+1;
78 break;
79 case '\t':
80 case ' ':
81 token_cnt += new_tok(&last, &expr[j], i - j);
82 j = i+1;
83 break;
84 case '"':
85 while (expr[i+1] != '"' && expr[i+1])
86 i++;
87
88 if (expr[i+1] == '"')
89 i++;
90 break;
91 default:
92 break;
93 }
94 }
95
96 token_cnt += new_tok(&last, &expr[j], i - j);
97
98 return token_cnt;
99 }
100
tst_bool_expr_print(FILE * f,struct tst_expr * expr)101 void tst_bool_expr_print(FILE *f, struct tst_expr *expr)
102 {
103 struct tst_expr_tok *i;
104 size_t j;
105
106 for (i = expr->rpn; i; i = i->next) {
107 for (j = 0; j < i->tok_len; j++)
108 putc(i->tok[j], f);
109
110 if (i->next)
111 putc(' ', f);
112 }
113 }
114
print_spaces(FILE * f,unsigned int spaces)115 static void print_spaces(FILE *f, unsigned int spaces)
116 {
117 while (spaces--)
118 putc(' ', f);
119 }
120
tst_bool_expr_err(FILE * f,struct tst_expr * expr,unsigned int cnt)121 static void tst_bool_expr_err(FILE *f, struct tst_expr *expr, unsigned int cnt)
122 {
123 unsigned int i, spaces, err_len;
124 const char *err;
125
126 fprintf(f, "%s", expr->buf->tok);
127 fprintf(f, "\n");
128
129 for (i = 0; i < cnt; i++) {
130 if (expr->buf[i].priv)
131 break;
132 }
133
134 spaces = expr->buf[i].tok - expr->buf[0].tok;
135 err = expr->buf[i].priv;
136 err_len = strlen(err);
137
138 print_spaces(f, spaces);
139
140 fprintf(f, "^\n");
141
142 if (err_len < spaces)
143 print_spaces(f, spaces - err_len + 1);
144
145 fprintf(f, "%s\n", err);
146 }
147
stack_push(struct tst_expr_tok * stack[],unsigned int * op_stack_pos,struct tst_expr_tok * op)148 static inline void stack_push(struct tst_expr_tok *stack[], unsigned int *op_stack_pos,
149 struct tst_expr_tok *op)
150 {
151 stack[(*op_stack_pos)++] = op;
152 }
153
stack_empty(unsigned int op_stack_pos)154 static inline int stack_empty(unsigned int op_stack_pos)
155 {
156 return op_stack_pos == 0;
157 }
158
stack_pop(struct tst_expr_tok * stack[],unsigned int * op_stack_pos)159 static inline struct tst_expr_tok *stack_pop(struct tst_expr_tok *stack[],
160 unsigned int *op_stack_pos)
161 {
162 if (stack_empty(*op_stack_pos))
163 return NULL;
164
165 return stack[--(*op_stack_pos)];
166 }
167
168 #define TST_OP_NONE -1
169
stack_peek_op(struct tst_expr_tok * stack[],unsigned int op_stack_pos)170 static inline int stack_peek_op(struct tst_expr_tok *stack[],
171 unsigned int op_stack_pos)
172 {
173 if (stack_empty(op_stack_pos))
174 return TST_OP_NONE;
175
176 return stack[op_stack_pos - 1]->op;
177 }
178
179 /*
180 * This is a complete list of which tokens can happen before any of:
181 * - variable
182 * - left parentesis
183 * - negation
184 *
185 * The -1 represents start of the expression.
186 */
check_one(int op)187 static inline int check_one(int op)
188 {
189 switch (op) {
190 case TST_OP_AND:
191 case TST_OP_OR:
192 case TST_OP_NOT:
193 case TST_OP_NONE:
194 case TST_OP_LPAR:
195 return 0;
196 default:
197 return 1;
198 }
199 }
200
201 /*
202 * And this checks for tokens that can happen before any of:
203 * - right parentesis
204 * - and
205 * - or
206 *
207 * This is also used to check that the last token in expression is correct one.
208 */
check_two(int op)209 static inline int check_two(int op)
210 {
211 switch (op) {
212 case TST_OP_VAR:
213 case TST_OP_RPAR:
214 return 1;
215 default:
216 return 0;
217 }
218 }
219
shunting_yard(struct tst_expr * expr,unsigned int cnt)220 static int shunting_yard(struct tst_expr *expr, unsigned int cnt)
221 {
222 struct tst_expr_tok *op_stack[cnt];
223 unsigned int op_stack_pos = 0;
224 struct tst_expr_tok *out[cnt + 1];
225 unsigned int out_pos = 0;
226 struct tst_expr_tok *i;
227 unsigned int j;
228 int prev_op = TST_OP_NONE;
229
230 for (i = expr->buf; i < &(expr->buf[cnt]); i++) {
231 switch (i->op) {
232 case TST_OP_VAR:
233 if (check_one(prev_op)) {
234 i->priv = "Expected operation";
235 goto err;
236 }
237
238 stack_push(out, &out_pos, i);
239
240 while (stack_peek_op(op_stack, op_stack_pos) == TST_OP_NOT)
241 stack_push(out, &out_pos, stack_pop(op_stack, &op_stack_pos));
242 break;
243 case TST_OP_LPAR:
244 if (check_one(prev_op)) {
245 i->priv = "Expected operation";
246 goto err;
247 }
248
249 stack_push(op_stack, &op_stack_pos, i);
250 break;
251 case TST_OP_RPAR:
252 if (!check_two(prev_op)) {
253 i->priv = "Expected variable or )";
254 goto err;
255 }
256
257 /* pop everything till ( */
258 for (;;) {
259 struct tst_expr_tok *op = stack_pop(op_stack, &op_stack_pos);
260
261 if (!op) {
262 i->priv = "Missing (";
263 goto err;
264 }
265
266 if (op->op == TST_OP_LPAR)
267 break;
268
269 stack_push(out, &out_pos, op);
270 }
271
272 while (stack_peek_op(op_stack, op_stack_pos) == TST_OP_NOT)
273 stack_push(out, &out_pos, stack_pop(op_stack, &op_stack_pos));
274 break;
275 case TST_OP_NOT:
276 if (check_one(prev_op)) {
277 i->priv = "Expected operation";
278 goto err;
279 }
280 stack_push(op_stack, &op_stack_pos, i);
281 break;
282 case TST_OP_AND:
283 case TST_OP_OR:
284 if (!check_two(prev_op)) {
285 i->priv = "Expected variable or (";
286 goto err;
287 }
288
289 /*
290 * There can be at most one binary op on the stack
291 * since we pop the one present on the stack before we
292 * attempt to push new one they so never accumulate.
293 */
294 switch (stack_peek_op(op_stack, op_stack_pos)) {
295 case TST_OP_AND:
296 case TST_OP_OR:
297 stack_push(out, &out_pos, stack_pop(op_stack, &op_stack_pos));
298 break;
299 }
300
301 stack_push(op_stack, &op_stack_pos, i);
302 break;
303 }
304
305 prev_op = i->op;
306 }
307
308 if (!check_two(expr->buf[cnt-1].op)) {
309 expr->buf[cnt-1].priv = "Unfinished expression";
310 goto err;
311 }
312
313 /* pop remaining operations */
314 while ((i = stack_pop(op_stack, &op_stack_pos))) {
315 if (i->op == TST_OP_LPAR) {
316 i->priv = "Missing )";
317 goto err;
318 }
319
320 stack_push(out, &out_pos, i);
321 }
322
323 /* construct the list */
324 out[out_pos] = NULL;
325
326 for (j = 0; j < out_pos; j++)
327 out[j]->next = out[j + 1];
328
329 expr->rpn = out[0];
330
331 return 0;
332 err:
333 return 1;
334 }
335
tst_bool_expr_parse(const char * expr)336 struct tst_expr *tst_bool_expr_parse(const char *expr)
337 {
338 struct tst_expr *ret;
339 unsigned int tok_cnt = tokenize(expr, NULL);
340
341 if (!tok_cnt)
342 return NULL;
343
344 ret = malloc(sizeof(struct tst_expr) + sizeof(struct tst_expr_tok) * tok_cnt);
345 if (!ret)
346 return NULL;
347
348 tokenize(expr, ret->buf);
349
350 if (shunting_yard(ret, tok_cnt)) {
351 tst_bool_expr_err(stderr, ret, tok_cnt);
352 tst_bool_expr_free(ret);
353 return NULL;
354 }
355
356 return ret;
357 }
358
359 #define MAX_STACK 16
360
tst_bool_expr_eval(struct tst_expr * expr,int (* map)(struct tst_expr_tok * var))361 int tst_bool_expr_eval(struct tst_expr *expr,
362 int (*map)(struct tst_expr_tok *var))
363 {
364 struct tst_expr_tok *i;
365 int stack[MAX_STACK];
366 int pos = -1;
367
368 for (i = expr->rpn; i; i = i->next) {
369 switch (i->op) {
370 case TST_OP_NOT:
371 stack[pos] = !stack[pos];
372 break;
373 case TST_OP_OR:
374 stack[pos-1] = stack[pos] || stack[pos-1];
375 pos--;
376 break;
377 case TST_OP_AND:
378 stack[pos-1] = stack[pos] && stack[pos-1];
379 pos--;
380 break;
381 case TST_OP_VAR:
382 if (pos + 1 >= MAX_STACK) {
383 fprintf(stderr, "Eval out of stack!\n");
384 return -1;
385 }
386
387 stack[++pos] = map(i);
388
389 /* map reported undefined variable -> abort */
390 if (stack[pos] == -1)
391 return -1;
392 break;
393 /* does not happen */
394 default:
395 break;
396 }
397 }
398
399 return stack[0];
400 }
401
tst_bool_expr_free(struct tst_expr * expr)402 void tst_bool_expr_free(struct tst_expr *expr)
403 {
404 free(expr);
405 }
406