1 /* expr.c - evaluate expression
2 *
3 * Copyright 2016 Google Inc.
4 * Copyright 2013 Daniel Verkamp <daniel@drv.nu>
5 *
6 * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/expr.html
7 *
8 * The web standard is incomplete (precedence grouping missing), see:
9 * http://permalink.gmane.org/gmane.comp.standards.posix.austin.general/10141
10 *
11 * eval_expr() uses the recursive "Precedence Climbing" algorithm:
12 *
13 * Clarke, Keith. "The top-down parsing of expressions." University of London.
14 * Queen Mary College. Department of Computer Science and Statistics, 1986.
15 *
16 * http://www.antlr.org/papers/Clarke-expr-parsing-1986.pdf
17 *
18 * Nice explanation and Python implementation:
19 * http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing
20
21 USE_EXPR(NEWTOY(expr, NULL, TOYFLAG_USR|TOYFLAG_BIN))
22
23 config EXPR
24 bool "expr"
25 default n
26 help
27 usage: expr ARG1 OPERATOR ARG2...
28
29 Evaluate expression and print result. For example, "expr 1 + 2".
30
31 The supported operators are (grouped from highest to lowest priority):
32
33 ( ) : * / % + - != <= < >= > = & |
34
35 Each constant and operator must be a separate command line argument.
36 All operators are infix, meaning they expect a constant (or expression
37 that resolves to a constant) on each side of the operator. Operators of
38 the same priority (within each group above) are evaluated left to right.
39 Parentheses may be used (as separate arguments) to elevate the priority
40 of expressions.
41
42 Calling expr from a command shell requires a lot of \( or '*' escaping
43 to avoid interpreting shell control characters.
44
45 The & and | operators are logical (not bitwise) and may operate on
46 strings (a blank string is "false"). Comparison operators may also
47 operate on strings (alphabetical sort).
48
49 Constants may be strings or integers. Comparison, logical, and regex
50 operators may operate on strings (a blank string is "false"), other
51 operators require integers.
52 */
53
54 // TODO: int overflow checking
55
56 #define FOR_expr
57 #include "toys.h"
58
59 GLOBALS(
60 char **tok; // current token, not on the stack since recursive calls mutate it
61
62 char *refree;
63 )
64
65 // Scalar value. If s != NULL, it's a string, otherwise it's an int.
66 struct value {
67 char *s;
68 long long i;
69 };
70
71 // Get the value as a string.
get_str(struct value * v)72 char *get_str(struct value *v)
73 {
74 if (v->s) return v->s;
75 else return xmprintf("%lld", v->i);
76 }
77
78 // Get the value as an integer and return 1, or return 0 on error.
get_int(struct value * v,long long * ret)79 int get_int(struct value *v, long long *ret)
80 {
81 if (v->s) {
82 char *endp;
83
84 *ret = strtoll(v->s, &endp, 10);
85
86 if (*endp) return 0; // If endp points to NUL, all chars were converted
87 } else *ret = v->i;
88
89 return 1;
90 }
91
92 // Preserve the invariant that v.s is NULL when the value is an integer.
assign_int(struct value * v,long long i)93 void assign_int(struct value *v, long long i)
94 {
95 v->i = i;
96 v->s = NULL;
97 }
98
99 // Check if v is 0 or the empty string.
is_false(struct value * v)100 static int is_false(struct value *v)
101 {
102 return get_int(v, &v->i) && !v->i;
103 }
104
105 // 'ret' is filled with a string capture or int match position.
re(char * target,char * pattern,struct value * ret)106 static void re(char *target, char *pattern, struct value *ret)
107 {
108 regex_t pat;
109 regmatch_t m[2];
110
111 xregcomp(&pat, pattern, 0);
112 // must match at pos 0
113 if (!regexec(&pat, target, 2, m, 0) && !m[0].rm_so) {
114 // Return first parenthesized subexpression as string, or length of match
115 if (pat.re_nsub>0) {
116 ret->s = xmprintf("%.*s", (int)(m[1].rm_eo-m[1].rm_so),
117 target+m[1].rm_so);
118 if (TT.refree) free(TT.refree);
119 TT.refree = ret->s;
120 } else assign_int(ret, m[0].rm_eo);
121 } else {
122 if (pat.re_nsub>0) ret->s = "";
123 else assign_int(ret, 0);
124 }
125 regfree(&pat);
126 }
127
128 // 4 different signatures of operators. S = string, I = int, SI = string or
129 // int.
130 enum { SI_TO_SI = 1, SI_TO_I, I_TO_I, S_TO_SI };
131
132 enum { OR = 1, AND, EQ, NE, GT, GTE, LT, LTE, ADD, SUB, MUL, DIVI, MOD, RE };
133
134 // operators grouped by precedence
135 static struct op_def {
136 char *tok;
137 char prec, sig, op; // precedence, signature for type coercion, operator ID
138 } OPS[] = {
139 // logical ops, precedence 1 and 2, signature SI_TO_SI
140 {"|", 1, SI_TO_SI, OR },
141 {"&", 2, SI_TO_SI, AND },
142 // comparison ops, precedence 3, signature SI_TO_I
143 {"=", 3, SI_TO_I, EQ }, {"==", 3, SI_TO_I, EQ }, {"!=", 3, SI_TO_I, NE },
144 {">", 3, SI_TO_I, GT }, {">=", 3, SI_TO_I, GTE },
145 {"<", 3, SI_TO_I, LT }, {"<=", 3, SI_TO_I, LTE },
146 // arithmetic ops, precedence 4 and 5, signature I_TO_I
147 {"+", 4, I_TO_I, ADD }, {"-", 4, I_TO_I, SUB },
148 {"*", 5, I_TO_I, MUL }, {"/", 5, I_TO_I, DIVI }, {"%", 5, I_TO_I, MOD },
149 // regex match, precedence 6, signature S_TO_SI
150 {":", 6, S_TO_SI, RE },
151 {NULL, 0, 0, 0}, // sentinel
152 };
153
eval_op(struct op_def * o,struct value * ret,struct value * rhs)154 void eval_op(struct op_def *o, struct value *ret, struct value *rhs)
155 {
156 long long a, b, x = 0; // x = a OP b for ints.
157 char *s, *t; // string operands
158 int cmp;
159
160 switch (o->sig) {
161
162 case SI_TO_SI:
163 switch (o->op) {
164 case OR: if (is_false(ret)) *ret = *rhs; break;
165 case AND: if (is_false(ret) || is_false(rhs)) assign_int(ret, 0); break;
166 }
167 break;
168
169 case SI_TO_I:
170 if (get_int(ret, &a) && get_int(rhs, &b)) { // both are ints
171 cmp = a - b;
172 } else { // otherwise compare both as strings
173 cmp = strcmp(s = get_str(ret), t = get_str(rhs));
174 if (ret->s != s) free(s);
175 if (rhs->s != t) free(t);
176 }
177 switch (o->op) {
178 case EQ: x = cmp == 0; break;
179 case NE: x = cmp != 0; break;
180 case GT: x = cmp > 0; break;
181 case GTE: x = cmp >= 0; break;
182 case LT: x = cmp < 0; break;
183 case LTE: x = cmp <= 0; break;
184 }
185 assign_int(ret, x);
186 break;
187
188 case I_TO_I:
189 if (!get_int(ret, &a) || !get_int(rhs, &b))
190 error_exit("non-integer argument");
191 switch (o->op) {
192 case ADD: x = a + b; break;
193 case SUB: x = a - b; break;
194 case MUL: x = a * b; break;
195 case DIVI: if (b == 0) error_exit("division by zero"); x = a / b; break;
196 case MOD: if (b == 0) error_exit("division by zero"); x = a % b; break;
197 }
198 assign_int(ret, x);
199 break;
200
201 case S_TO_SI: // op == RE
202 s = get_str(ret);
203 cmp = ret->s!=s; // ret overwritten by re so check now
204 re(s, t = get_str(rhs), ret);
205 if (cmp) free(s);
206 if (rhs->s!=t) free(t);
207 break;
208 }
209 }
210
211 // Evalute a compound expression using recursive "Precedence Climbing"
212 // algorithm, setting 'ret'.
eval_expr(struct value * ret,int min_prec)213 static void eval_expr(struct value *ret, int min_prec)
214 {
215 if (!*TT.tok) error_exit("Unexpected end of input");
216
217 // Evaluate LHS atom, setting 'ret'.
218 if (!strcmp(*TT.tok, "(")) { // parenthesized expression
219 TT.tok++; // consume (
220
221 eval_expr(ret, 1); // We're inside ( ), so min_prec = 1
222 if (ret->s && !strcmp(ret->s, ")")) error_exit("empty ( )");
223 if (!*TT.tok) error_exit("Expected )");
224 if (strcmp(*TT.tok, ")")) error_exit("Expected ) but got %s", *TT.tok);
225 } else ret->s = *TT.tok; // simple literal, all values start as strings
226 TT.tok++;
227
228 // Evaluate RHS and apply operator until precedence is too low.
229 struct value rhs;
230 while (*TT.tok) {
231 struct op_def *o = OPS;
232
233 while (o->tok) { // Look up operator
234 if (!strcmp(*TT.tok, o->tok)) break;
235 o++;
236 }
237 if (!o->tok) break; // Not an operator (extra input will fail later)
238 if (o->prec < min_prec) break; // Precedence too low, pop a stack frame
239 TT.tok++;
240
241 eval_expr(&rhs, o->prec + 1); // Evaluate RHS, with higher min precedence
242 eval_op(o, ret, &rhs); // Apply operator, setting 'ret'
243 }
244 }
245
expr_main(void)246 void expr_main(void)
247 {
248 struct value ret = {0};
249
250 toys.exitval = 2; // if exiting early, indicate error
251 TT.tok = toys.optargs; // initialize global token
252 eval_expr(&ret, 1);
253 if (*TT.tok) error_exit("Unexpected extra input '%s'\n", *TT.tok);
254
255 if (ret.s) printf("%s\n", ret.s);
256 else printf("%lld\n", ret.i);
257
258 toys.exitval = is_false(&ret);
259
260 if (TT.refree) free(TT.refree);
261 }
262