• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* cut.c - print selected ranges from a file
2  *
3  * Copyright 2016 Rob Landley <rob@landley.net>
4  *
5  * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/cut.html
6  *
7  * Deviations from posix: added -DF. We can only accept 512 selections, and
8  * "-" counts as start to end. Using spaces to separate a comma-separated list
9  * is silly and inconsistent with dd, ps, cp, and mount.
10  *
11  * TODO: -s with -c
12 
13 USE_CUT(NEWTOY(cut, "b*|c*|f*|F(regex-fields)*|C*|O(output-delimiter):d:sD(allow-duplicates)n[!cbfF]", TOYFLAG_USR|TOYFLAG_BIN))
14 
15 config CUT
16   bool "cut"
17   default y
18   help
19     usage: cut [-Ds] [-bcCfF LIST] [-dO DELIM] [FILE...]
20 
21     Print selected parts of lines from each FILE to standard output.
22 
23     Each selection LIST is comma separated, either numbers (counting from 1)
24     or dash separated ranges (inclusive, with X- meaning to end of line and -X
25     from start). By default selection ranges are sorted and collated, use -D
26     to prevent that.
27 
28     -b	Select bytes (with -n round start/end down to start of utf8 char)
29     -c	Select UTF-8 characters
30     -C	Select unicode columns
31     -d	Use DELIM (default is TAB for -f, run of whitespace for -F)
32     -D	Don't sort/collate selections or match -fF lines without delimiter
33     -f	Select fields (words) separated by single DELIM character
34     -F	Select fields separated by DELIM regex
35     -O	Output delimiter (default one space for -F, input delim for -f)
36     -s	Skip lines without delimiters
37 */
38 #define FOR_cut
39 #include "toys.h"
40 
41 GLOBALS(
42   char *d, *O;
43   struct arg_list *select[5]; // we treat them the same, so loop through
44 
45   unsigned line;
46   int pairs;
47   regex_t reg;
48 )
49 
50 // Apply selections to an input line, producing output
cut_line(char ** pline,long len)51 static void cut_line(char **pline, long len)
52 {
53   unsigned *pairs = (void *)toybuf, wc;
54   char *line;
55   int i, j, k;
56 
57   if (!pline) return;
58   line = *pline;
59   if (len && line[len-1]=='\n') line[--len] = 0;
60   TT.line++;
61 
62   // Loop through selections
63   for (i=0; i<TT.pairs; i++) {
64     unsigned start = pairs[2*i], end = pairs[(2*i)+1], count;
65     char *s = line, *ss, *sss;
66 
67     // when the delimiter is \n output lines.
68     if (*TT.d == '\n') {
69       if (TT.line<start || TT.line>end) {
70         if (i+1 == TT.pairs) return;
71         continue;
72       }
73       goto write_line;
74     }
75 
76     // input: start/end position, count=difference between them
77     // output: s = start of string, len = bytes to output
78 
79     if (start) start--;
80     if (start>=len) continue;
81     if (!end || end>len) end = len;
82     count = end-start;
83 
84     // Find start and end of output string for the relevant selection type
85     if (FLAG(b)) {
86       if (!FLAG(n)) s += start;
87       else {
88         if (end>len) end = len;
89         for (sss = ss = s; (k = (ss-line))<end;) {
90           if (0>(j = utf8towc(&wc, ss, len))) ss++;
91           else {
92             if (((ss += j)-line)<=end) sss = ss;
93             if ((ss-line)<=start) s = ss;
94           }
95         }
96         if (!(count = sss-s)) continue;
97       }
98     } else if (FLAG(C)) {
99       // crunch_str() currently assumes that combining characters get
100       // escaped, to provide an unambiguous visual representation.
101       // This assumes the input string is null terminated.
102       if (start) crunch_str(&s, start, 0, 0, 0);
103       if (!*s) continue;
104       start = s-line;
105       ss = s;
106       crunch_str(&ss, count, 0, 0, 0);
107       count = ss-s;
108 
109     } else if (FLAG(c)) {
110 
111       // Find start
112       ss = line+len;
113       while (start && s<ss) {
114         if (0<=(j = utf8towc(&wc, s, len))) start--;
115         s += (j<1) ? 1 : j;
116       }
117       if (s == ss) continue;
118 
119       // Find end
120       end = count;
121       sss = s;
122       while (end && sss<ss) {
123         if (0<=(j = utf8towc(&wc, sss, len))) end--;
124         sss += (j<1) ? 1 : j;
125       }
126       count = sss-s;
127     } else {
128       regmatch_t match;
129 
130       // Loop through skipping appropriate number of fields
131       for (j = 0; j<2; j++) {
132         ss = s;
133         if (j) start = count;
134         else end = start;
135         while (*ss && start) {
136           if (FLAG(f)) {
137             if (!strchr(TT.d, *ss++)) continue;
138             if (!--start && j) ss--;
139           } else {
140             if (regexec(&TT.reg, ss, 1, &match, REG_NOTBOL|REG_NOTEOL)) {
141               ss = line+len;
142               continue;
143             }
144             if (!match.rm_eo) break; // zero length match == no delimiter
145             ss += (!--start && j) ? match.rm_so : match.rm_eo;
146           }
147         }
148         if (!j && !*(s = ss)) break;
149       }
150 
151       // If we never encountered even one separator, print whole line (posix!)
152       if (!j && end == start) {
153         if (FLAG(D)) break;
154         if (FLAG(s)) return;
155 write_line:
156         fwrite(line, len, 1, stdout);
157         break;
158       } else if (!*s) continue;
159       count = ss-s;
160     }
161     if (i && TT.O) fputs(TT.O, stdout);
162 #ifdef TOYBOX_OH_ADAPT
163     /* fix "cut -s -d, -f-3 A.txt" not filter problem*/
164     if (!FLAG(s) || strchr(s, *TT.d)) {
165       fwrite(s, count, 1, stdout);
166     }
167 #else
168     fwrite(s, count, 1, stdout);
169 #endif
170   }
171   xputc('\n');
172 }
173 
compar(unsigned * a,unsigned * b)174 static int compar(unsigned *a, unsigned *b)
175 {
176   if (*a<*b) return -1;
177   if (*a>*b) return 1;
178   if (a[1]<b[1]) return -1;
179   if (a[1]>b[1]) return 1;
180 
181   return 0;
182 }
183 
184 // parse A or A-B or A- or -B
get_range(void * data,char * str,int len)185 static char *get_range(void *data, char *str, int len)
186 {
187   char *end = str;
188   unsigned *pairs = (void *)toybuf, i;
189 
190   // Using toybuf[] to store ranges means we can have 512 selections max.
191   if (TT.pairs == sizeof(toybuf)/sizeof(int)) perror_exit("select limit");
192   pairs += 2*TT.pairs++;
193 
194   pairs[1] = UINT_MAX;
195   for (i = 0; ;i++) {
196     if (i==2) return end;
197     if (isdigit(*end)) {
198       long long ll = estrtol(end, &end, 10);
199 
200       if (ll<1 || ll>UINT_MAX || errno) return end;
201       pairs[i] = ll;
202     }
203     if (*end++ != '-') break;
204   }
205   if (!i) pairs[1] = pairs[0];
206   if ((end-str)<len) return end;
207   if (pairs[0]>pairs[1]) return str;
208 
209   // No error
210   return 0;
211 }
212 
cut_main(void)213 void cut_main(void)
214 {
215   int i;
216   char buf[8];
217 
218   // Parse command line arguments
219   if ((toys.optflags&(FLAG_s|FLAG_f|FLAG_F))==FLAG_s)
220     error_exit("-s needs -Ff");
221   if ((toys.optflags&(FLAG_d|FLAG_f|FLAG_F))==FLAG_d)
222     error_exit("-d needs -Ff");
223   if (!TT.d) TT.d = (FLAG(F)) ? "[[:space:]][[:space:]]*" : "\t";
224   if (FLAG(F)) xregcomp(&TT.reg, TT.d, REG_EXTENDED);
225   if (!TT.O) {
226     if (FLAG(F)) TT.O = " ";
227     else if (FLAG(f)) TT.O = TT.d;
228   }
229 
230   // Parse ranges, which are attached to a selection type (only one can be set)
231   for (i = 0; i<ARRAY_LEN(TT.select); i++) {
232     sprintf(buf, "bad -%c", "CFfcb"[i]); // reverse order from newtoy optstr
233     if (TT.select[i]) comma_args(TT.select[i], 0, buf, get_range);
234   }
235   if (!TT.pairs) error_exit("no selections");
236 
237   // Sort and collate selections
238   if (!FLAG(D)) {
239     int from, to;
240     unsigned *pairs = (void *)toybuf;
241 
242     qsort(toybuf, TT.pairs, 8, (void *)compar);
243     for (to = 0, from = 2; from/2 < TT.pairs; from += 2) {
244       if (pairs[from] > pairs[to+1]) {
245         to += 2;
246         memcpy(pairs+to, pairs+from, 2*sizeof(unsigned));
247       } else if (pairs[from+1] > pairs[to+1]) pairs[to+1] = pairs[from+1];
248     }
249     TT.pairs = (to/2)+1;
250   }
251 
252   // For each argument, loop through lines of file and call cut_line() on each
253   loopfiles_lines(toys.optargs, cut_line);
254 }
255