1 /* args.c - Command line argument parsing.
2 *
3 * Copyright 2006 Rob Landley <rob@landley.net>
4 */
5
6 // NOTE: If option parsing segfaults, switch on TOYBOX_DEBUG in menuconfig to
7 // add syntax checks to option string parsing which aren't needed in the final
8 // code (since get_opt string is hardwired and should be correct when you ship)
9
10 #include "toys.h"
11
12 // Design goals:
13 // Don't use getopt() out of libc.
14 // Don't permute original arguments (screwing up ps/top output).
15 // Integrated --long options "(noshort)a(along)b(blong1)(blong2)"
16
17 /* This uses a getopt-like option string, but not getopt() itself. We call
18 * it the get_opt string.
19 *
20 * Each option in the get_opt string corresponds to a bit position in the
21 * return value. The rightmost argument is (1<<0), the next to last is (1<<1)
22 * and so on. If the option isn't seen in argv[], its bit remains 0.
23 *
24 * Options which have an argument fill in the corresponding slot in the global
25 * union "this" (see generated/globals.h), which it treats as an array of longs
26 * (note that sizeof(long)==sizeof(pointer) is guaranteed by LP64).
27 *
28 * You don't have to free the option strings, which point into the environment
29 * space. List objects should be freed by main() when command_main() returns.
30 *
31 * Example:
32 * Calling get_optflags() when toys.which->options="ab:c:d" and
33 * argv = ["command", "-b", "fruit", "-d", "walrus"] results in:
34 *
35 * Changes to struct toys:
36 * toys.optflags = 5 (I.E. 0101 so -b = 4 | -d = 1)
37 * toys.optargs[0] = "walrus" (leftover argument)
38 * toys.optargs[1] = NULL (end of list)
39 * toys.optc = 1 (there was 1 leftover argument)
40 *
41 * Changes to union this:
42 * this[0]=NULL (because -c didn't get an argument this time)
43 * this[1]="fruit" (argument to -b)
44 */
45
46 // What you can put in a get_opt string:
47 // Any otherwise unused character (all letters, unprefixed numbers) specify
48 // an option that sets a flag. The bit value is the same as the binary digit
49 // if you string the option characters together in order.
50 // So in "abcdefgh" a = 128, h = 1
51 //
52 // Suffixes specify that this option takes an argument (stored in GLOBALS):
53 // Note that pointer and long are always the same size, even on 64 bit.
54 // : string argument, keep most recent if more than one
55 // * string argument, appended to a struct arg_list linked list.
56 // # signed long argument
57 // <LOW - die if less than LOW
58 // >HIGH - die if greater than HIGH
59 // =DEFAULT - value if not specified
60 // - signed long argument defaulting to negative (say + for positive)
61 // . double precision floating point argument (with CFG_TOYBOX_FLOAT)
62 // Chop this option out with USE_TOYBOX_FLOAT() in option string
63 // Same <LOW>HIGH=DEFAULT as #
64 // @ occurrence counter (which is a long)
65 // % time offset in milliseconds with optional s/m/h/d suffix
66 // (longopt)
67 // | this is required. If more than one marked, only one required.
68 // ; Option's argument is optional, and must be collated: -aARG or --a=ARG
69 // ^ Stop parsing after encountering this argument
70 // " " (space char) the "plus an argument" must be separate
71 // I.E. "-j 3" not "-j3". So "kill -stop" != "kill -s top"
72 //
73 // At the beginning of the get_opt string (before any options):
74 // <0 die if less than # leftover arguments (default 0)
75 // >9 die if > # leftover arguments (default MAX_INT)
76 // 0 Include argv[0] in optargs
77 // ^ stop at first nonoption argument
78 // ? Pass unknown arguments through to command (implied when no flags).
79 // & first arg has imaginary dash (ala tar/ps/ar) which sets FLAGS_NODASH
80 // ~ Collate following bare longopts (as if under short opt, repeatable)
81 //
82 // At the end: [groups] of previously seen options
83 // - Only one in group (switch off) [-abc] means -ab=-b, -ba=-a, -abc=-c
84 // + Synonyms (switch on all) [+abc] means -ab=-abc, -c=-abc
85 // ! More than one in group is error [!abc] means -ab calls error_exit()
86 // primarily useful if you can switch things back off again.
87 //
88 // You may use octal escapes with the high bit (128) set to use a control
89 // character as an option flag. For example, \300 would be the option -@
90
91 // Notes from getopt man page
92 // - and -- cannot be arguments.
93 // -- force end of arguments
94 // - is a synonym for stdin in file arguments
95 // -abcd means -a -b -c -d (but if -b takes an argument, then it's -a -b cd)
96
97 // Linked list of all known options (option string parsed into this).
98 // Hangs off getoptflagstate, freed at end of option parsing.
99 struct opts {
100 struct opts *next;
101 long *arg; // Pointer into union "this" to store arguments at.
102 int c; // Argument character to match
103 int flags; // |=1, ^=2, " "=4, ;=8
104 unsigned long long dex[3]; // bits to disable/enable/exclude in toys.optflags
105 char type; // Type of arguments to store union "this"
106 union {
107 long l;
108 FLOAT f;
109 } val[3]; // low, high, default - range of allowed values
110 };
111
112 // linked list of long options. (Hangs off getoptflagstate, free at end of
113 // option parsing, details about flag to set and global slot to fill out
114 // stored in related short option struct, but if opt->c = -1 the long option
115 // is "bare" (has no corresponding short option).
116 struct longopts {
117 struct longopts *next;
118 struct opts *opt;
119 char *str;
120 int len;
121 };
122
123 // State during argument parsing.
124 struct getoptflagstate
125 {
126 int argc, minargs, maxargs;
127 char *arg;
128 struct opts *opts;
129 struct longopts *longopts;
130 int noerror, nodash_now, stopearly;
131 unsigned excludes, requires;
132 };
133
forget_arg(struct opts * opt)134 static void forget_arg(struct opts *opt)
135 {
136 if (opt->arg) {
137 if (opt->type=='*') llist_traverse((void *)*opt->arg, free);
138 *opt->arg = 0;
139 }
140 }
141
142 // Use getoptflagstate to parse one command line option from argv
143 // Sets flags, saves/clears opt->arg, advances gof->arg/gof->argc as necessary
gotflag(struct getoptflagstate * gof,struct opts * opt,int longopt)144 static void gotflag(struct getoptflagstate *gof, struct opts *opt, int longopt)
145 {
146 unsigned long long i;
147 struct opts *and;
148 char *arg;
149 int type;
150
151 // Did we recognize this option?
152 if (!opt) help_exit("Unknown option '%s'", gof->arg);
153
154 // Might enabling this switch off something else?
155 if (toys.optflags & opt->dex[0]) {
156 // Forget saved argument for flag we switch back off
157 for (and = gof->opts, i = 1; and; and = and->next, i<<=1)
158 if (i & toys.optflags & opt->dex[0]) forget_arg(and);
159 toys.optflags &= ~opt->dex[0];
160 }
161
162 // Set flags
163 toys.optflags |= opt->dex[1];
164 gof->excludes |= opt->dex[2];
165 if (opt->flags&2) gof->stopearly=2;
166
167 if (toys.optflags & gof->excludes) {
168 for (and = gof->opts, i = 1; and; and = and->next, i<<=1) {
169 if (opt == and || !(i & toys.optflags)) continue;
170 if (toys.optflags & and->dex[2]) break;
171 }
172 if (and) help_exit("No '%c' with '%c'", opt->c, and->c);
173 }
174
175 // Are we NOT saving an argument? (Type 0, '@', unattached ';', short ' ')
176 if (*(arg = gof->arg)) gof->arg++;
177 if ((type = opt->type) == '@') {
178 ++*opt->arg;
179 return;
180 }
181 if (!longopt && *gof->arg && (opt->flags & 4)) return forget_arg(opt);
182 if (!type || (!arg[!longopt] && (opt->flags & 8))) return forget_arg(opt);
183
184 // Handle "-xblah" and "-x blah", but also a third case: "abxc blah"
185 // to make "tar xCjfv blah1 blah2 thingy" work like
186 // "tar -x -C blah1 -j -f blah2 -v thingy"
187
188 if (longopt && *arg) arg++;
189 else arg = (gof->nodash_now||!*gof->arg) ? toys.argv[++gof->argc] : gof->arg;
190 if (!gof->nodash_now) gof->arg = "";
191 if (!arg) {
192 struct longopts *lo;
193
194 arg = "Missing argument to ";
195 if (opt->c != -1) help_exit("%s-%c", arg, opt->c);
196 for (lo = gof->longopts; lo->opt != opt; lo = lo->next);
197 help_exit("%s--%.*s", arg, lo->len, lo->str);
198 }
199
200 // Parse argument by type
201 if (type == ':') *(opt->arg) = (long)arg;
202 else if (type == '*') {
203 struct arg_list **list;
204
205 list = (struct arg_list **)opt->arg;
206 while (*list) list=&((*list)->next);
207 *list = xzalloc(sizeof(struct arg_list));
208 (*list)->arg = arg;
209 } else if (type == '#' || type == '-') {
210 long l = atolx(arg);
211 if (type == '-' && !ispunct(*arg)) l*=-1;
212 if (l < opt->val[0].l) help_exit("-%c < %ld", opt->c, opt->val[0].l);
213 if (l > opt->val[1].l) help_exit("-%c > %ld", opt->c, opt->val[1].l);
214
215 *(opt->arg) = l;
216 } else if (CFG_TOYBOX_FLOAT && type == '.') {
217 FLOAT *f = (FLOAT *)(opt->arg);
218
219 *f = strtod(arg, &arg);
220 if (opt->val[0].l != LONG_MIN && *f < opt->val[0].f)
221 help_exit("-%c < %lf", opt->c, (double)opt->val[0].f);
222 if (opt->val[1].l != LONG_MAX && *f > opt->val[1].f)
223 help_exit("-%c > %lf", opt->c, (double)opt->val[1].f);
224 } else if (type=='%') *(opt->arg) = xparsemillitime(arg);
225 }
226
227 // Parse this command's options string into struct getoptflagstate, which
228 // includes a struct opts linked list in reverse order (I.E. right-to-left)
parse_optflaglist(struct getoptflagstate * gof)229 static int parse_optflaglist(struct getoptflagstate *gof)
230 {
231 char *options = toys.which->options;
232 long *nextarg = (long *)&this;
233 struct opts *new = 0;
234 int idx, rc = 0;
235
236 // Parse option format string
237 memset(gof, 0, sizeof(struct getoptflagstate));
238 gof->maxargs = INT_MAX;
239 if (!options) return 0;
240
241 // Parse leading special behavior indicators
242 for (;;) {
243 if (*options == '^') gof->stopearly++;
244 else if (*options == '<') gof->minargs=*(++options)-'0';
245 else if (*options == '>') gof->maxargs=*(++options)-'0';
246 else if (*options == '?') gof->noerror++;
247 else if (*options == '&') gof->nodash_now = 1;
248 else if (*options == '0') rc = 1;
249 else break;
250 options++;
251 }
252
253 // Parse option string into a linked list of options with attributes.
254
255 if (!*options) gof->noerror++;
256 while (*options) {
257 char *temp;
258
259 // Option groups come after all options are defined
260 if (*options == '[') break;
261
262 // Allocate a new list entry when necessary
263 if (!new) {
264 new = xzalloc(sizeof(struct opts));
265 new->next = gof->opts;
266 gof->opts = new;
267 new->val[0].l = LONG_MIN;
268 new->val[1].l = LONG_MAX;
269 }
270 // Each option must start with "(" or an option character. (Bare
271 // longopts only come at the start of the string.)
272 if (*options == '(' && new->c != -1) {
273 char *end;
274 struct longopts *lo;
275
276 // Find the end of the longopt
277 for (end = ++options; *end && *end != ')'; end++);
278 if (CFG_TOYBOX_DEBUG && !*end) error_exit("(longopt) didn't end");
279
280 // init a new struct longopts
281 lo = xmalloc(sizeof(struct longopts));
282 lo->next = gof->longopts;
283 lo->opt = new;
284 lo->str = options;
285 lo->len = end-options;
286 gof->longopts = lo;
287 options = ++end;
288
289 // Mark this struct opt as used, even when no short opt.
290 if (!new->c) new->c = -1;
291
292 continue;
293
294 // If this is the start of a new option that wasn't a longopt,
295
296 } else if (strchr(":*#@.-%", *options)) {
297 if (CFG_TOYBOX_DEBUG && new->type)
298 error_exit("multiple types %c:%c%c", new->c, new->type, *options);
299 new->type = *options;
300 } else if (-1 != (idx = stridx("|^ ;", *options))) new->flags |= 1<<idx;
301 // bounds checking
302 else if (-1 != (idx = stridx("<>=", *options))) {
303 if (new->type == '#' || new->type == '%') {
304 long l = strtol(++options, &temp, 10);
305 if (temp != options) new->val[idx].l = l;
306 } else if (CFG_TOYBOX_FLOAT && new->type == '.') {
307 FLOAT f = strtod(++options, &temp);
308 if (temp != options) new->val[idx].f = f;
309 } else error_exit("<>= only after .#%%");
310 options = --temp;
311
312 // At this point, we've hit the end of the previous option. The
313 // current character is the start of a new option. If we've already
314 // assigned an option to this struct, loop to allocate a new one.
315 // (It'll get back here afterwards and fall through to next else.)
316 } else if (new->c) {
317 new = 0;
318 continue;
319
320 // Claim this option, loop to see what's after it.
321 } else new->c = *options;
322
323 options++;
324 }
325
326 // Initialize enable/disable/exclude masks and pointers to store arguments.
327 // (This goes right to left so we need the whole list before we can start.)
328 idx = 0;
329 for (new = gof->opts; new; new = new->next) {
330 unsigned long long u = 1LL<<idx++;
331
332 if (new->c == 1 || new->c=='~') new->c = 0;
333 else new->c &= 127;
334 new->dex[1] = u;
335 if (new->flags & 1) gof->requires |= u;
336 if (new->type) {
337 new->arg = (void *)nextarg;
338 *(nextarg++) = new->val[2].l;
339 }
340 }
341
342 // Parse trailing group indicators
343 while (*options) {
344 unsigned long long bits = 0;
345
346 if (CFG_TOYBOX_DEBUG && *options != '[') error_exit("trailing %s", options);
347
348 idx = stridx("-+!", *++options);
349 if (CFG_TOYBOX_DEBUG && idx == -1) error_exit("[ needs +-!");
350 if (CFG_TOYBOX_DEBUG && (options[1] == ']' || !options[1]))
351 error_exit("empty []");
352
353 // Don't advance past ] but do process it once in loop.
354 while (*options++ != ']') {
355 struct opts *opt;
356 long long ll;
357
358 if (CFG_TOYBOX_DEBUG && !*options) error_exit("[ without ]");
359 // Find this option flag (in previously parsed struct opt)
360 for (ll = 1, opt = gof->opts; ; ll <<= 1, opt = opt->next) {
361 if (*options == ']') {
362 if (!opt) break;
363 if (bits&ll) opt->dex[idx] |= bits&~ll;
364 } else {
365 if (*options==1) break;
366 if (CFG_TOYBOX_DEBUG && !opt)
367 error_exit("[] unknown target %c", *options);
368 if (opt->c == *options) {
369 bits |= ll;
370 break;
371 }
372 }
373 }
374 }
375 }
376
377 return rc;
378 }
379
380 // Fill out toys.optflags, toys.optargs, and this[] from toys.argv
381
get_optflags(void)382 void get_optflags(void)
383 {
384 struct getoptflagstate gof;
385 struct opts *catch;
386 unsigned long long saveflags;
387 char *letters[]={"s",""}, *ss;
388
389 // Option parsing is a two stage process: parse the option string into
390 // a struct opts list, then use that list to process argv[];
391
392 toys.exitval = toys.which->flags >> 24;
393
394 // Allocate memory for optargs
395 saveflags = toys.optc = parse_optflaglist(&gof);
396 while (toys.argv[saveflags++]);
397 toys.optargs = xzalloc(sizeof(char *)*saveflags);
398 if (toys.optc) *toys.optargs = *toys.argv;
399
400 if (toys.argv[1] && toys.argv[1][0] == '-') gof.nodash_now = 0;
401
402 // Iterate through command line arguments, skipping argv[0]
403 for (gof.argc=1; toys.argv[gof.argc]; gof.argc++) {
404 gof.arg = toys.argv[gof.argc];
405 catch = 0;
406
407 // Parse this argument
408 if (gof.stopearly>1) goto notflag;
409
410 if (gof.argc>1 || *gof.arg=='-') gof.nodash_now = 0;
411
412 // Various things with dashes
413 if (*gof.arg == '-') {
414
415 // Handle -
416 if (!gof.arg[1]) goto notflag;
417 gof.arg++;
418 if (*gof.arg=='-') {
419 struct longopts *lo;
420 struct arg_list *al = 0, *al2;
421 int ii;
422
423 gof.arg++;
424 // Handle --
425 if (!*gof.arg) {
426 gof.stopearly += 2;
427 continue;
428 }
429
430 // unambiguously match the start of a known --longopt?
431 check_help(toys.argv+gof.argc);
432 for (lo = gof.longopts; lo; lo = lo->next) {
433 for (ii = 0; ii<lo->len; ii++) if (gof.arg[ii] != lo->str[ii]) break;
434
435 // = only terminates when we can take an argument, not type 0 or '@'
436 if (!gof.arg[ii] || (gof.arg[ii]=='=' && !strchr("@", lo->opt->type)))
437 {
438 al2 = xmalloc(sizeof(struct arg_list));
439 al2->next = al;
440 al2->arg = (void *)lo;
441 al = al2;
442
443 // Exact match is unambigous even when longer options available
444 if (ii==lo->len) {
445 llist_traverse(al, free);
446 al = 0;
447
448 break;
449 }
450 }
451 }
452 // How many matches?
453 if (al) {
454 *libbuf = 0;
455 if (al->next) for (ss = libbuf, al2 = al; al2; al2 = al2->next) {
456 lo = (void *)al2->arg;
457 ss += sprintf(ss, " %.*s"+(al2==al), lo->len, lo->str);
458 } else lo = (void *)al->arg;
459 llist_traverse(al, free);
460 if (*libbuf) error_exit("bad --%s (%s)", gof.arg, libbuf);
461 }
462
463 // One unambiguous match?
464 if (lo) {
465 catch = lo->opt;
466 while (!strchr("=", *gof.arg)) gof.arg++;
467 // Should we handle this --longopt as a non-option argument?
468 } else if (gof.noerror) {
469 gof.arg -= 2;
470 goto notflag;
471 }
472
473 // Long option parsed, handle option.
474 gotflag(&gof, catch, 1);
475 continue;
476 }
477
478 // Handle things that don't start with a dash.
479 } else {
480 if (gof.nodash_now) toys.optflags |= FLAGS_NODASH;
481 else goto notflag;
482 }
483
484 // At this point, we have the args part of -args. Loop through
485 // each entry (could be -abc meaning -a -b -c)
486 saveflags = toys.optflags;
487 while (gof.arg && *gof.arg) {
488
489 // Identify next option char.
490 for (catch = gof.opts; catch; catch = catch->next)
491 if (*gof.arg == catch->c)
492 if (!gof.arg[1] || (catch->flags&(4|8))!=4) break;
493
494 if (!catch && gof.noerror) {
495 toys.optflags = saveflags;
496 gof.arg = toys.argv[gof.argc];
497 goto notflag;
498 }
499
500 // Handle option char (advancing past what was used)
501 gotflag(&gof, catch, 0);
502 }
503 continue;
504
505 // Not a flag, save value in toys.optargs[]
506 notflag:
507 if (gof.stopearly) gof.stopearly++;
508 toys.optargs[toys.optc++] = toys.argv[gof.argc];
509 }
510
511 // Sanity check
512 if (toys.optc<gof.minargs)
513 help_exit("Need%s %d argument%s", letters[!!(gof.minargs-1)],
514 gof.minargs, letters[!(gof.minargs-1)]);
515 if (toys.optc>gof.maxargs)
516 help_exit("Max %d argument%s", gof.maxargs, letters[!(gof.maxargs-1)]);
517 if (gof.requires && !(gof.requires & toys.optflags)) {
518 struct opts *req;
519 char needs[32], *s = needs;
520
521 for (req = gof.opts; req; req = req->next)
522 if (req->flags & 1) *(s++) = req->c;
523 *s = 0;
524
525 help_exit("Needs %s-%s", s[1] ? "one of " : "", needs);
526 }
527
528 toys.exitval = 0;
529
530 if (CFG_TOYBOX_FREE) {
531 llist_traverse(gof.opts, free);
532 llist_traverse(gof.longopts, free);
533 }
534 }
535