1 /* tr.c - translate or delete characters
2 *
3 * Copyright 2014 Sandeep Sharma <sandeep.jack2756@gmail.com>
4 *
5 * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/tr.html
6
7 USE_TR(NEWTOY(tr, "^>2<1Ccsd[+cC]", TOYFLAG_USR|TOYFLAG_BIN))
8
9 config TR
10 bool "tr"
11 default n
12 help
13 usage: tr [-cds] SET1 [SET2]
14
15 Translate, squeeze, or delete characters from stdin, writing to stdout
16
17 -c/-C Take complement of SET1
18 -d Delete input characters coded SET1
19 -s Squeeze multiple output characters of SET2 into one character
20 */
21
22 #define FOR_tr
23 #include "toys.h"
24
25 GLOBALS(
26 short map[256]; //map of chars
27 int len1, len2;
28 )
29
30 enum {
31 class_alpha, class_alnum, class_digit,
32 class_lower,class_upper,class_space,class_blank,
33 class_punct,class_cntrl,class_xdigit,class_invalid
34 };
35
map_translation(char * set1,char * set2)36 static void map_translation(char *set1 , char *set2)
37 {
38 int i = TT.len1, k = 0;
39
40 if (toys.optflags & FLAG_d)
41 for (; i; i--, k++) TT.map[set1[k]] = set1[k]|0x100; //set delete bit
42
43 if (toys.optflags & FLAG_s) {
44 for (i = TT.len1, k = 0; i; i--, k++)
45 TT.map[set1[k]] = TT.map[set1[k]]|0x200;
46 for (i = TT.len2, k = 0; i; i--, k++)
47 TT.map[set2[k]] = TT.map[set2[k]]|0x200;
48 }
49 i = k = 0;
50 while (!(toys.optflags & FLAG_d) && set2 && TT.len1--) { //ignore set2 if -d present
51 TT.map[set1[i]] = ((TT.map[set1[i]] & 0xFF00) | set2[k]);
52 if (set2[k + 1]) k++;
53 i++;
54 }
55 }
56
handle_escape_char(char ** esc_val)57 static int handle_escape_char(char **esc_val) //taken from printf
58 {
59 char *ptr = *esc_val;
60 int esc_length = 0;
61 unsigned base = 0, num = 0, result = 0, count = 0;
62
63 if (*ptr == 'x') {
64 ptr++;
65 esc_length++;
66 base = 16;
67 } else if (isdigit(*ptr)) base = 8;
68
69 while (esc_length < 3 && base) {
70 num = tolower(*ptr) - '0';
71 if (num > 10) num += ('0' - 'a' + 10);
72 if (num >= base) {
73 if (base == 16) {
74 esc_length--;
75 if (!esc_length) {// Invalid hex value eg. /xvd, print as it is /xvd
76 result = '\\';
77 ptr--;
78 }
79 }
80 break;
81 }
82 esc_length++;
83 count = result = (count * base) + num;
84 ptr++;
85 }
86 if (base) {
87 ptr--;
88 *esc_val = ptr;
89 return (char)result;
90 } else {
91 switch (*ptr) {
92 case 'n': result = '\n'; break;
93 case 't': result = '\t'; break;
94 case 'e': result = (char)27; break;
95 case 'b': result = '\b'; break;
96 case 'a': result = '\a'; break;
97 case 'f': result = '\f'; break;
98 case 'v': result = '\v'; break;
99 case 'r': result = '\r'; break;
100 case '\\': result = '\\'; break;
101 default :
102 result = '\\';
103 ptr--; // Let pointer pointing to / we will increment after returning.
104 break;
105 }
106 }
107 *esc_val = ptr;
108 return (char)result;
109 }
110
find_class(char * class_name)111 static int find_class(char *class_name)
112 {
113 int i;
114 static char *class[] = {
115 "[:alpha:]","[:alnum:]","[:digit:]",
116 "[:lower:]","[:upper:]","[:space:]",
117 "[:blank:]","[:punct:]","[:cntrl:]",
118 "[:xdigit:]","NULL"
119 };
120
121 for (i = 0; i != class_invalid; i++) {
122 if (!memcmp(class_name, class[i], (class_name[0] == 'x')?10:9)) break;
123 }
124 return i;
125 }
126
expand_set(char * arg,int * len)127 static char *expand_set(char *arg, int *len)
128 {
129 int i = 0, j, k, size = 256;
130 char *set = xzalloc(size*sizeof(char));
131
132 while (*arg) {
133
134 if (i >= size) {
135 size += 256;
136 set = xrealloc(set, size);
137 }
138 if (*arg == '\\') {
139 arg++;
140 set[i++] = (int)handle_escape_char(&arg);
141 arg++;
142 continue;
143 }
144 if (arg[1] == '-') {
145 if (arg[2] == '\0') goto save;
146 j = arg[0];
147 k = arg[2];
148 if (j > k) perror_exit("reverse colating order");
149 while (j <= k) set[i++] = j++;
150 arg += 3;
151 continue;
152 }
153 if (arg[0] == '[' && arg[1] == ':') {
154
155 if ((j = find_class(arg)) == class_invalid) goto save;
156
157 if ((j == class_alpha) || (j == class_upper) || (j == class_alnum)) {
158 for (k = 'A'; k <= 'Z'; k++) set[i++] = k;
159 }
160 if ((j == class_alpha) || (j == class_lower) || (j == class_alnum)) {
161 for (k = 'a'; k <= 'z'; k++) set[i++] = k;
162 }
163 if ((j == class_alnum) || (j == class_digit) || (j == class_xdigit)) {
164 for (k = '0'; k <= '9'; k++) set[i++] = k;
165 }
166 if (j == class_space || j == class_blank) {
167 set[i++] = '\t';
168 if (j == class_space) {
169 set[i++] = '\n';
170 set[i++] = '\f';
171 set[i++] = '\r';
172 set[i++] = '\v';
173 }
174 set[i++] = ' ';
175 }
176 if (j == class_punct) {
177 for (k = 0; k <= 255; k++)
178 if (ispunct(k)) set[i++] = k;
179 }
180 if (j == class_cntrl) {
181 for (k = 0; k <= 255; k++)
182 if (iscntrl(k)) set[i++] = k;
183 }
184 if (j == class_xdigit) {
185 for (k = 'A'; k <= 'F'; k++) {
186 set[i + 6] = k | 0x20;
187 set[i++] = k;
188 }
189 i += 6;
190 arg += 10;
191 continue;
192 }
193
194 arg += 9; //never here for class_xdigit.
195 continue;
196 }
197 if (arg[0] == '[' && arg[1] == '=') { //[=char=] only
198 arg += 2;
199 if (*arg) set[i++] = *arg;
200 if (!arg[1] || arg[1] != '=' || arg[2] != ']')
201 error_exit("bad equiv class");
202 continue;
203 }
204 save:
205 set[i++] = *arg++;
206 }
207 *len = i;
208 return set;
209 }
210
print_map(char * set1,char * set2)211 static void print_map(char *set1, char *set2)
212 {
213 int n, src, dst, prev = -1;
214
215 while ((n = read(0, toybuf, sizeof(toybuf)))) {
216 if (!FLAG(d) && !FLAG(s)) {
217 for (dst = 0; dst < n; dst++) toybuf[dst] = TT.map[toybuf[dst]];
218 } else {
219 for (src = dst = 0; src < n; src++) {
220 int ch = TT.map[toybuf[src]];
221
222 if (FLAG(d) && (ch & 0x100)) continue;
223 if (FLAG(s) && ((ch & 0x200) && prev == ch)) continue;
224 toybuf[dst++] = prev = ch;
225 }
226 }
227 xwrite(1, toybuf, dst);
228 }
229 }
230
do_complement(char ** set)231 static void do_complement(char **set)
232 {
233 int i, j;
234 char *comp = xmalloc(256);
235
236 for (i = 0, j = 0;i < 256; i++) {
237 if (memchr(*set, i, TT.len1)) continue;
238 else comp[j++] = (char)i;
239 }
240 free(*set);
241 TT.len1 = j;
242 *set = comp;
243 }
244
tr_main(void)245 void tr_main(void)
246 {
247 char *set1, *set2 = NULL;
248 int i;
249
250 for (i = 0; i < 256; i++) TT.map[i] = i; //init map
251
252 set1 = expand_set(toys.optargs[0], &TT.len1);
253 if (toys.optflags & FLAG_c) do_complement(&set1);
254 if (toys.optargs[1]) {
255 if (toys.optargs[1][0] == '\0') error_exit("set2 can't be empty string");
256 set2 = expand_set(toys.optargs[1], &TT.len2);
257 }
258 map_translation(set1, set2);
259
260 print_map(set1, set2);
261 free(set1);
262 free(set2);
263 }
264