1 /* sort.c - put input lines into order
2 *
3 * Copyright 2004, 2008 Rob Landley <rob@landley.net>
4 *
5 * See http://opengroup.org/onlinepubs/007904975/utilities/sort.html
6 *
7 * Deviations from POSIX: Lots.
8 * We invented -x
9
10 USE_SORT(NEWTOY(sort, USE_SORT_FLOAT("g")"S:T:m" "o:k*t:" "xVbMcszdfirun", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)))
11
12 config SORT
13 bool "sort"
14 default y
15 help
16 usage: sort [-runbcdfiMsz] [FILE...] [-k#[,#[x]] [-t X]] [-o FILE]
17
18 Sort all lines of text from input files (or stdin) to stdout.
19
20 -r Reverse
21 -u Unique lines only
22 -n Numeric order (instead of alphabetical)
23 -b Ignore leading blanks (or trailing blanks in second part of key)
24 -c Check whether input is sorted
25 -d Dictionary order (use alphanumeric and whitespace chars only)
26 -f Force uppercase (case insensitive sort)
27 -i Ignore nonprinting characters
28 -M Month sort (jan, feb, etc)
29 -x Hexadecimal numerical sort
30 -s Skip fallback sort (only sort with keys)
31 -z Zero (null) terminated lines
32 -k Sort by "key" (see below)
33 -t Use a key separator other than whitespace
34 -o Output to FILE instead of stdout
35 -V Version numbers (name-1.234-rc6.5b.tgz)
36
37 Sorting by key looks at a subset of the words on each line. -k2 uses the
38 second word to the end of the line, -k2,2 looks at only the second word,
39 -k2,4 looks from the start of the second to the end of the fourth word.
40 -k2.4,5 starts from the fourth character of the second word, to the end
41 of the fifth word. Specifying multiple keys uses the later keys as tie
42 breakers, in order. A type specifier appended to a sort key (such as -2,2n)
43 applies only to sorting that key.
44
45 config SORT_FLOAT
46 bool
47 default y
48 depends on TOYBOX_FLOAT
49 help
50 usage: sort [-g]
51
52 -g General numeric sort (double precision with nan and inf)
53 */
54
55 #define FOR_sort
56 #include "toys.h"
57
58 GLOBALS(
59 char *t;
60 struct arg_list *k;
61 char *o, *T, S;
62
63 void *key_list;
64 int linecount;
65 char **lines;
66 char *name;
67 )
68
69 // The sort types are n, g, and M.
70 // u, c, s, and z apply to top level only, not to keys.
71 // b at top level implies bb.
72 // The remaining options can be applied to search keys.
73
74 #define FLAG_bb (1<<31) // Ignore trailing blanks
75
76 struct sort_key
77 {
78 struct sort_key *next_key; // linked list
79 unsigned range[4]; // start word, start char, end word, end char
80 int flags;
81 };
82
83 // Copy of the part of this string corresponding to a key/flags.
84
get_key_data(char * str,struct sort_key * key,int flags)85 static char *get_key_data(char *str, struct sort_key *key, int flags)
86 {
87 int start=0, end, len, i, j;
88
89 // Special case whole string, so we don't have to make a copy
90
91 if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3]
92 && !(flags&(FLAG_b|FLAG_d|FLAG_i|FLAG_bb))) return str;
93
94 // Find start of key on first pass, end on second pass
95
96 len = strlen(str);
97 for (j=0; j<2; j++) {
98 if (!key->range[2*j]) end=len;
99
100 // Loop through fields
101 else {
102 end=0;
103 for (i=1; i < key->range[2*j]+j; i++) {
104
105 // Skip leading blanks
106 if (str[end] && !TT.t) while (isspace(str[end])) end++;
107
108 // Skip body of key
109 for (; str[end]; end++) {
110 if (TT.t) {
111 if (str[end]==*TT.t) {
112 end++;
113 break;
114 }
115 } else if (isspace(str[end])) break;
116 }
117 }
118 }
119 if (!j) start=end;
120 }
121
122 // Key with explicit separator starts after the separator
123 if (TT.t && str[start]==*TT.t) start++;
124
125 // Strip leading and trailing whitespace if necessary
126 if ((flags&FLAG_b) || (!TT.t && !key->range[3]))
127 while (isspace(str[start])) start++;
128 if (flags&FLAG_bb) while (end>start && isspace(str[end-1])) end--;
129
130 // Handle offsets on start and end
131 if (key->range[3]) {
132 end += key->range[3]-1;
133 if (end>len) end=len;
134 }
135 if (key->range[1]) {
136 start += key->range[1]-1;
137 if (start>len) start=len;
138 }
139
140 // Make the copy
141 if (end<start) end=start;
142 str = xstrndup(str+start, end-start);
143
144 // Handle -d
145 if (flags&FLAG_d) {
146 for (start = end = 0; str[end]; end++)
147 if (isspace(str[end]) || isalnum(str[end])) str[start++] = str[end];
148 str[start] = 0;
149 }
150
151 // Handle -i
152 if (flags&FLAG_i) {
153 for (start = end = 0; str[end]; end++)
154 if (isprint(str[end])) str[start++] = str[end];
155 str[start] = 0;
156 }
157
158 return str;
159 }
160
161 // append a sort_key to key_list.
162
add_key(void)163 static struct sort_key *add_key(void)
164 {
165 void **stupid_compiler = &TT.key_list;
166 struct sort_key **pkey = (struct sort_key **)stupid_compiler;
167
168 while (*pkey) pkey = &((*pkey)->next_key);
169 return *pkey = xzalloc(sizeof(struct sort_key));
170 }
171
172 // Perform actual comparison
compare_values(int flags,char * x,char * y)173 static int compare_values(int flags, char *x, char *y)
174 {
175 if (CFG_SORT_FLOAT && (flags & FLAG_g)) {
176 char *xx,*yy;
177 double dx = strtod(x,&xx), dy = strtod(y,&yy);
178 int xinf, yinf;
179
180 // not numbers < NaN < -infinity < numbers < +infinity
181
182 if (x==xx) return y==yy ? 0 : -1;
183 if (y==yy) return 1;
184
185 // Check for isnan
186 if (dx!=dx) return (dy!=dy) ? 0 : -1;
187 if (dy!=dy) return 1;
188
189 // Check for infinity. (Could underflow, but avoids needing libm.)
190 xinf = (1.0/dx == 0.0);
191 yinf = (1.0/dy == 0.0);
192 if (xinf) {
193 if(dx<0) return (yinf && dy<0) ? 0 : -1;
194 return (yinf && dy>0) ? 0 : 1;
195 }
196 if (yinf) return dy<0 ? 1 : -1;
197
198 return dx>dy ? 1 : (dx<dy ? -1 : 0);
199 } else if (flags & FLAG_M) {
200 struct tm thyme;
201 int dx;
202 char *xx,*yy;
203
204 xx = strptime(x,"%b",&thyme);
205 dx = thyme.tm_mon;
206 yy = strptime(y,"%b",&thyme);
207 if (!xx) return !yy ? 0 : -1;
208 else if (!yy) return 1;
209 else return dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon;
210
211 } else if (flags & FLAG_x) return strtol(x, NULL, 16)-strtol(y, NULL, 16);
212 else if (flags & FLAG_V) {
213 while (*x && *y) {
214 while (*x && *x == *y) x++, y++;
215 if (isdigit(*x) && isdigit(*y)) {
216 long long xx = strtoll(x, &x, 10), yy = strtoll(y, &y, 10);
217
218 if (xx<yy) return -1;
219 if (xx>yy) return 1;
220 } else {
221 char xx = *x ? *x : x[-1], yy = *y ? *y : y[-1];
222
223 // -rc/-pre hack so abc-123 > abc-123-rc1 (other way already - < 0-9)
224 if (xx != yy) {
225 if (xx<yy && !strstart(&y, "-rc") && !strstart(&y, "-pre")) return -1;
226 else return 1;
227 }
228 }
229 }
230 return *x ? !!*y : -1;
231 } else if (flags & FLAG_n) {
232 // Full floating point version of -n
233 if (CFG_SORT_FLOAT) {
234 double dx = atof(x), dy = atof(y);
235
236 return dx>dy ? 1 : (dx<dy ? -1 : 0);
237 // Integer version of -n for tiny systems
238 } else return atoi(x)-atoi(y);
239
240 // Ascii sort
241 } else return ((flags&FLAG_f) ? strcasecmp : strcmp)(x, y);
242 }
243
244 // Callback from qsort(): Iterate through key_list and perform comparisons.
compare_keys(const void * xarg,const void * yarg)245 static int compare_keys(const void *xarg, const void *yarg)
246 {
247 int flags = toys.optflags, retval = 0;
248 char *x, *y, *xx = *(char **)xarg, *yy = *(char **)yarg;
249 struct sort_key *key;
250
251 for (key=(struct sort_key *)TT.key_list; !retval && key; key = key->next_key){
252 flags = key->flags ? key->flags : toys.optflags;
253
254 // Chop out and modify key chunks, handling -dfib
255
256 x = get_key_data(xx, key, flags);
257 y = get_key_data(yy, key, flags);
258
259 retval = compare_values(flags, x, y);
260
261 // Free the copies get_key_data() made.
262
263 if (x != xx) free(x);
264 if (y != yy) free(y);
265
266 if (retval) break;
267 }
268
269 // Perform fallback sort if necessary (always case insensitive, no -f,
270 // the point is to get a stable order even for -f sorts)
271 if (!retval && !FLAG(s)) {
272 flags = toys.optflags;
273 retval = strcmp(xx, yy);
274 }
275
276 return retval * ((flags&FLAG_r) ? -1 : 1);
277 }
278
279 // Read each line from file, appending to a big array.
sort_lines(char ** pline,long len)280 static void sort_lines(char **pline, long len)
281 {
282 char * line;
283
284 if (!pline) return;
285 line = *pline;
286 if (!FLAG(z) && len && line[len-1]=='\n') line[--len] = 0;
287 *pline = NULL;
288
289 // handle -c here so we don't allocate more memory than necessary.
290 if (FLAG(c)) {
291 int j = FLAG(u) ? -1 : 0;
292
293 if (TT.lines && compare_keys((void *)&TT.lines, &line)>j)
294 error_exit("%s: Check line %d\n", TT.name, TT.linecount);
295 free(TT.lines);
296 TT.lines = (char **)line;
297 } else {
298 if (!(TT.linecount&63))
299 TT.lines = xrealloc(TT.lines, sizeof(char *)*(TT.linecount+64));
300 TT.lines[TT.linecount] = line;
301 }
302 TT.linecount++;
303 }
304
305 // Callback from loopfiles to handle input files.
sort_read(int fd,char * name)306 static void sort_read(int fd, char *name)
307 {
308 TT.name = name;
309 do_lines(fd, FLAG(z) ? '\0' : '\n', sort_lines);
310 }
311
sort_main(void)312 void sort_main(void)
313 {
314 int idx, fd = 1;
315
316 // Parse -k sort keys.
317 if (TT.k) {
318 struct arg_list *arg;
319
320 for (arg = TT.k; arg; arg = arg->next) {
321 struct sort_key *key = add_key();
322 char *temp;
323 int flag;
324
325 idx = 0;
326 temp = arg->arg;
327 while (*temp) {
328 // Start of range
329 key->range[2*idx] = (unsigned)strtol(temp, &temp, 10);
330 if (*temp=='.')
331 key->range[(2*idx)+1] = (unsigned)strtol(temp+1, &temp, 10);
332
333 // Handle flags appended to a key type.
334 for (;*temp;temp++) {
335 char *temp2, *optlist;
336
337 // Note that a second comma becomes an "Unknown key" error.
338
339 if (*temp==',' && !idx++) {
340 temp++;
341 break;
342 }
343
344 // Which flag is this?
345
346 optlist = toys.which->options;
347 temp2 = strchr(optlist, *temp);
348 flag = (1<<(optlist-temp2+strlen(optlist)-1));
349
350 // Was it a flag that can apply to a key?
351
352 if (!temp2 || flag>FLAG_x
353 || (flag&(FLAG_u|FLAG_c|FLAG_s|FLAG_z)))
354 {
355 toys.exitval = 2;
356 error_exit("Unknown key option.");
357 }
358 // b after , means strip _trailing_ space, not leading.
359 if (idx && flag==FLAG_b) flag = FLAG_bb;
360 key->flags |= flag;
361 }
362 }
363 }
364 }
365
366 // global b flag strips both leading and trailing spaces
367 if (FLAG(b)) toys.optflags |= FLAG_bb;
368
369 // If no keys, perform alphabetic sort over the whole line.
370 if (!TT.key_list) add_key()->range[0] = 1;
371
372 // Open input files and read data, populating TT.lines[TT.linecount]
373 loopfiles(toys.optargs, sort_read);
374
375 // The compare (-c) logic was handled in sort_read(),
376 // so if we got here, we're done.
377 if (FLAG(c)) goto exit_now;
378
379 // Perform the actual sort
380 qsort(TT.lines, TT.linecount, sizeof(char *), compare_keys);
381
382 // handle unique (-u)
383 if (FLAG(u)) {
384 int jdx;
385
386 for (jdx=0, idx=1; idx<TT.linecount; idx++) {
387 if (!compare_keys(&TT.lines[jdx], &TT.lines[idx]))
388 free(TT.lines[idx]);
389 else TT.lines[++jdx] = TT.lines[idx];
390 }
391 if (TT.linecount) TT.linecount = jdx+1;
392 }
393
394 // Open output file if necessary. We can't do this until we've finished
395 // reading in case the output file is one of the input files.
396 if (TT.o) fd = xcreate(TT.o, O_CREAT|O_TRUNC|O_WRONLY, 0666);
397
398 // Output result
399 for (idx = 0; idx<TT.linecount; idx++) {
400 char *s = TT.lines[idx];
401 unsigned i = strlen(s);
402
403 if (!FLAG(z)) s[i] = '\n';
404 xwrite(fd, s, i+1);
405 if (CFG_TOYBOX_FREE) free(s);
406 }
407
408 exit_now:
409 if (CFG_TOYBOX_FREE) {
410 if (fd != 1) close(fd);
411 free(TT.lines);
412 }
413 }
414