• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* diff.c - compare files line by line
2  *
3  * Copyright 2014 Sandeep Sharma <sandeep.jack2756@gmail.com>
4  * Copyright 2014 Ashwini Kumar <ak.ashwini1981@gmail.com>
5  *
6  * See https://pubs.opengroup.org/onlinepubs/9699919799/utilities/diff.html
7  * and https://www.cs.dartmouth.edu/~doug/diff.pdf
8  *
9  * Deviations from posix: always does -u
10 
11 USE_DIFF(NEWTOY(diff, "<2>2(unchanged-line-format):;(old-line-format):;(new-line-format):;(color)(strip-trailing-cr)B(ignore-blank-lines)d(minimal)b(ignore-space-change)ut(expand-tabs)w(ignore-all-space)i(ignore-case)T(initial-tab)s(report-identical-files)q(brief)a(text)S(starting-file):F(show-function-line):;L(label)*N(new-file)r(recursive)U(unified)#<0=3", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)))
12 
13 config DIFF
14   bool "diff"
15   default n
16   help
17   usage: diff [-abBdiNqrTstw] [-L LABEL] [-S FILE] [-U LINES] [-F REGEX ] FILE1 FILE2
18 
19   -a	Treat all files as text
20   -b	Ignore changes in the amount of whitespace
21   -B	Ignore changes whose lines are all blank
22   -d	Try hard to find a smaller set of changes
23   -F 	Show the most recent line matching the regex
24   -i	Ignore case differences
25   -L	Use LABEL instead of the filename in the unified header
26   -N	Treat absent files as empty
27   -q	Output only whether files differ
28   -r	Recurse
29   -S	Start with FILE when comparing directories
30   -s	Report when two files are the same
31   -T	Make tabs line up by prefixing a tab when necessary
32   -t	Expand tabs to spaces in output
33   -u	Unified diff
34   -U	Output LINES lines of context
35   -w	Ignore all whitespace
36 
37   --color     Color output   --strip-trailing-cr   Strip '\r' from input lines
38   --TYPE-line-format=FORMAT  Display TYPE (unchanged/old/new) lines using FORMAT
39     FORMAT uses printf integer escapes (ala %-2.4x) followed by LETTER: FELMNn
40   Supported format specifiers are:
41   * %l, the contents of the line, without the trailing newline
42   * %L, the contents of the line, including the trailing newline
43   * %%, the character '%'
44 */
45 
46 #define FOR_diff
47 #include "toys.h"
48 
49 GLOBALS(
50   long U;
51   struct arg_list *L;
52   char *F, *S, *new_line_format, *old_line_format, *unchanged_line_format;
53 
54   int dir_num, size, is_binary, differ, change, len[2], *offset[2];
55   struct stat st[2];
56   struct {
57     char **list;
58     int nr_elm;
59   } dir[2];
60   struct {
61     FILE *fp;
62     int len;
63   } file[2];
64 )
65 
66 #define IS_STDIN(s)     (*(s)=='-' && !(s)[1])
67 
68 struct v_vector {
69   unsigned serial:31;
70   unsigned last:1;
71   union {
72     unsigned hash;
73     unsigned p;
74   };
75 };
76 
77 struct diff {
78   long a, b, c, d, prev, suff;
79 };
80 
81 struct candidate {
82   struct candidate *next, *prev;
83   int a, b;
84 };
85 
86 enum {
87   empty = 1 << 9,
88   eol = 1 << 10,
89   eof = 1 << 11,
90   space = 1 << 12
91 };
92 
comp(void * a,void * b)93 static int comp(void *a, void *b)
94 {
95   int i = ((struct v_vector *)a)->hash - ((struct v_vector *)b)->hash;
96 
97   return i ? : ((struct v_vector *)a)->serial - ((struct v_vector *)b)->serial;
98 }
99 
search(struct candidate ** K,int r,int k,int j)100 static int search(struct candidate **K, int r, int k, int j)
101 {
102   int low = r, upper = k, mid;
103 
104   while (low<=(mid = (low+upper)/2)) {
105     if (K[mid]->b < j && K[mid + 1]->b > j) return mid;
106     if (K[mid]->b < j) low = mid + 1;
107     else if (K[mid]->b > j) upper = mid - 1;
108     else return -1;
109   }
110   return -1;
111 }
112 
new_candidate(int i,int j,struct candidate * prev)113 static struct candidate *new_candidate(int i, int j, struct candidate *prev)
114 {
115   struct candidate *c = xzalloc(sizeof(struct candidate));
116 
117   c->a = i;
118   c->b = j;
119   c->prev = prev;
120   return c;
121 }
122 
123 /* 1. Search K[r: k] for an element K[s] such that K[s]-> b < j and K[s + 1]->b > j
124  * 2. if found do
125  *  2.a. If K[s + 1]->b > j do K[r] = c; r = s+1 and c = candidate(i, j, K[s]) //we have a candidate
126  *  2.b. if s = k (fence reached move it further) do K[k + 2] = K[k + 1], k++
127  * 3. if E[p].last true break i.e we have reached at the end of an equiv class
128  *    else p = p + 1 //keep traversing the equiv class.
129  * 4. K[r] = c //Save the sucessfully filled k-candidate.
130  */
do_merge(struct candidate ** K,int * k,int i,struct v_vector * E,int p)131 static void do_merge(struct candidate **K, int *k, int i,
132     struct v_vector *E, int p)
133 {
134   int r = 0, s, j;
135   struct candidate *pr = 0, *c = K[0];
136 
137   for (;;) {
138     j = E[p].serial;
139     s = search(K, r, *k, j);
140     if (s>=0 && K[s]->b<j && K[s+1]->b>j) {
141       if (K[s+1]->b>j) {
142         pr = K[s];
143         if (r && K[r]) c->next = K[r];
144         K[r] = c;
145         r = s+1;
146         c = new_candidate(i , j, pr);
147       }
148       if (s == *k) {
149         ++*k;
150         K[*k+1] = K[*k];
151         break;
152       }
153     }
154     if (E[p].last) break;
155     else p++;
156   }
157   K[r] = c;
158 }
159 
read_tok(FILE * fp,off_t * off,int tok)160 static int read_tok(FILE *fp, off_t *off, int tok)
161 {
162   int t = 0, is_space;
163 
164   tok |= empty;
165   while (!(tok & eol)) {
166     t = fgetc(fp);
167 
168     if (FLAG(strip_trailing_cr) && t == '\r') {
169       int t2 = fgetc(fp);
170       if (t2 == '\n') {
171         t = t2;
172         if (off) (*off)++;
173       } else {
174         ungetc(t2, fp);
175       }
176     }
177 
178     if (off && t != EOF) *off += 1;
179     is_space = isspace(t) || (t == EOF);
180     tok |= (t & (eof + eol)); //set tok eof+eol when t is eof
181 
182     if (t == '\n') tok |= eol;
183     if (FLAG(i)) if (t >= 'A' && t <= 'Z') t = tolower(t);
184 
185     if (FLAG(w) && is_space) continue;
186 
187     if (FLAG(b)) {
188       if (tok & space) {
189         if (is_space) continue;
190         tok &= ~space;
191       } else if (is_space) t = space + ' ';
192     }
193     tok &= ~(empty + 0xff);  //remove empty and char too.
194     tok |= t; //add most recent char
195     break;
196   }
197 
198   return tok;
199 }
200 
bcomp(void * a,void * b)201 int bcomp(void *a, void *b)
202 {
203   struct v_vector *l = (struct v_vector *)a, *r = (struct v_vector *)b;
204 
205   return (l->hash-r->hash) ? : r[-1].last ? 0 : -1;
206 }
207 
208 /*  file[0] corresponds file 1 and file[1] correspond file 2.
209  * 1. calc hashes for both the files and store them in vector(v[0], v[1])
210  * 2. sort file[1] with hash as primary and serial as sec. key
211  * 3. Form the equivalance class of file[1] stored in e vector. It lists all the equivalence
212  *    classes of lines in file[1], with e.last = true on the last element of each class.
213  *    The elements are ordered by serial within classes.
214  * 4. Form the p vector stored in  p_vector. p_vector[i], if non-zero, now points in e vector
215  *    to the beginning of the equiv class of lines in file[1] equivalent to line
216  *    i in file[0].
217  * 5. Form the k-candidates as discribed in do_merge.
218  * 6. Create a vector J[i] = j, such that i'th line in file[0] is j'th line of
219  *    file[1], i.e J comprises LCS
220  */
create_j_vector()221 static int *create_j_vector()
222 {
223   int tok, i, j, size = 100, k;
224   off_t off;
225   long hash;
226   int *p_vector, *J;
227   struct v_vector *v[2], *e;
228   struct candidate **kcand, *pr;
229 
230   for (i = 0; i < 2; i++) {
231     tok = off = 0;
232     hash = 5831;
233     v[i] = xzalloc(size * sizeof(struct v_vector));
234     TT.offset[i] = xzalloc(size * sizeof(int));
235     TT.file[i].len = 0;
236     if (fseek(TT.file[i].fp, 0, SEEK_SET)) perror_exit("fseek failed");
237 
238     while (1) {
239       tok  = read_tok(TT.file[i].fp, &off, tok);
240       if (!(tok & empty)) {
241         hash = ((hash << 5) + hash) + (tok & 0xff);
242         continue;
243       }
244 
245       if (size == ++TT.file[i].len) {
246         size = size * 11 / 10;
247         v[i] = xrealloc(v[i], size*sizeof(struct v_vector));
248         TT.offset[i] = xrealloc(TT.offset[i], size*sizeof(int));
249       }
250 
251       v[i][TT.file[i].len].hash = hash & INT_MAX;
252       TT.offset[i][TT.file[i].len] = off;
253       if ((tok & eof)) {
254         TT.offset[i][TT.file[i].len] = ++off;
255         break;
256       }
257       hash = 5831;  //next line
258       tok = 0;
259     }
260     if (TT.offset[i][TT.file[i].len]-TT.offset[i][TT.file[i].len-1] == 1)
261       TT.file[i].len--;
262   }
263 
264   for (i = 0; i<=TT.file[1].len; i++) v[1][i].serial = i;
265   qsort(v[1]+1, TT.file[1].len, sizeof(struct v_vector), (void *)comp);
266 
267   e = v[1];
268   e[0].serial = 0;
269   e[0].last = 1;
270   for (i = 1; i<=TT.file[1].len; i++)
271     e[i].last = i==TT.file[1].len || v[1][i].hash!=v[1][i+1].hash;
272 
273   p_vector = xzalloc((TT.file[0].len+2)*sizeof(int));
274   for (i = 1; i<=TT.file[0].len; i++) {
275     void *r = bsearch(&v[0][i], e+1, TT.file[1].len, sizeof(*e), (void *)bcomp);
276     if (r) p_vector[i] = (struct v_vector *)r - e;
277   }
278 
279   for (i = 1; i<=TT.file[0].len; i++) e[i].p = p_vector[i];
280   free(p_vector);
281 
282   size = 100;
283   kcand = xzalloc(size * sizeof(struct candidate *));
284 
285   kcand[0] = new_candidate(0 , 0, 0);
286   kcand[1] = new_candidate(TT.file[0].len+1, TT.file[1].len+1, 0); //the fence
287 
288   k = 0;  //last successfully filled k candidate.
289   for (i = 1; i<=TT.file[0].len; i++) {
290     if (!e[i].p) continue;
291     if ((size - 2) == k) {
292       size = size * 11 / 10;
293       kcand = xrealloc(kcand, (size*sizeof(struct candidate *)));
294     }
295     do_merge(kcand, &k, i, e, e[i].p);
296   }
297   free(v[0]); //no need for v_vector now.
298   free(v[1]);
299 
300   J = xzalloc((TT.file[0].len+2)*sizeof(int));
301 
302   for (pr = kcand[k]; pr; pr = pr->prev) J[pr->a] = pr->b;
303   J[TT.file[0].len+1] = TT.file[1].len+1; //mark boundary
304 
305   for (i = k+1; i>=0; i--) llist_traverse(kcand[i], free);
306   free(kcand);
307 
308   for (i = 1; i<=TT.file[0].len; i++) { // jackpot?
309     if (!J[i]) continue;
310 
311     if (fseek(TT.file[0].fp, TT.offset[0][i-1], SEEK_SET)
312      || fseek(TT.file[1].fp, TT.offset[1][J[i]-1], SEEK_SET))
313        perror_exit("fseek");
314 
315     for (j = J[i]; i<=TT.file[0].len && J[i]==j; i++, j++) {
316       int tok0 = 0, tok1 = 0;
317 
318       do {
319         tok0 = read_tok(TT.file[0].fp, NULL, tok0);
320         tok1 = read_tok(TT.file[1].fp, NULL, tok1);
321         if (((tok0 ^ tok1) & empty) || ((tok0 & 0xff) != (tok1 & 0xff)))
322           J[i] = 0;
323       } while (!(tok0 & tok1 & empty));
324     }
325   }
326   return J;
327 }
328 
diff(char ** files)329 static int *diff(char **files)
330 {
331   size_t i ,j;
332   int s, t;
333   char *bufi, *bufj;
334 
335   TT.is_binary = 0; //loop calls to diff
336   TT.differ = 0;
337 
338   for (i = 0; i < 2; i++) {
339     if ((j = !strcmp(files[i], "-")) || S_ISFIFO(TT.st[i].st_mode)) {
340       char *tmp_name;
341       int srcfd = j ? 0 : open(files[i], O_RDONLY),
342         tmpfd = xtempfile("fifo", &tmp_name);
343 
344       unlink(tmp_name);
345       free(tmp_name);
346 
347       xsendfile(srcfd, tmpfd);
348       if (!j) close(srcfd);
349       TT.file[i].fp = fdopen(tmpfd, "r");
350     } else TT.file[i].fp = fopen(files[i], "r");
351 
352     if (!TT.file[i].fp) {
353       perror_msg("%s", files[i]);
354       TT.differ = 2;
355       return 0; //return SAME
356     }
357   }
358 
359   s = sizeof(toybuf)/2;
360   bufi = toybuf;
361   bufj = toybuf+s;
362 
363   if (fseek(TT.file[0].fp, 0, SEEK_SET) || fseek(TT.file[1].fp, 0, SEEK_SET))
364     perror_exit("fseek");
365 
366   if (FLAG(a)) return create_j_vector();
367 
368   while (1) {
369     i = fread(bufi, 1, s, TT.file[0].fp);
370     j = fread(bufj, 1, s, TT.file[1].fp);
371 
372     if (i != j) TT.differ = 1;
373 
374     for (t = 0; t < i && !TT.is_binary; t++) if (!bufi[t]) TT.is_binary = 1;
375     for (t = 0; t < j && !TT.is_binary; t++) if (!bufj[t]) TT.is_binary = 1;
376 
377     i = minof(i, j);
378     for (t = 0; t < i; t++) if (bufi[t] != bufj[t]) TT.differ = 1;
379 
380     if (!i || !j) break;
381   }
382   if (TT.is_binary || !TT.differ) return 0;
383 
384   return create_j_vector();
385 }
386 
print_line_matching_regex(int a,regex_t * reg,int * off_set,FILE * fp)387 static void print_line_matching_regex(int a, regex_t *reg, int *off_set, FILE *fp) {
388   int i = 0, j = 0, line_buf_size = 100, cc = 0;
389   char* line = xzalloc(line_buf_size * sizeof(char));
390   for (i = a; a > 0; --i) {
391     int line_len = 0;
392     if (fseek(fp, off_set[i - 1], SEEK_SET)) perror_exit("fseek failed");
393     for (j = 0; j < (off_set[i] - off_set[i - 1]); j++) {
394       cc = fgetc(fp);
395       if (cc == EOF || cc == '\n') {
396         break;
397       }
398       ++line_len;
399       if (line_len >= line_buf_size) {
400         line_buf_size = line_buf_size * 11 / 10;
401         line = xrealloc(line, line_buf_size*sizeof(char));
402       }
403       line[j] = cc;
404     }
405     line[line_len] = '\0';
406     if (!regexec0(reg, line, line_len, 0, NULL, 0)) {
407       printf(" %s", line);
408       break;
409     }
410   }
411   free(line);
412 }
413 
print_diff(int a,int b,char c,int * off_set,FILE * fp)414 static void print_diff(int a, int b, char c, int *off_set, FILE *fp)
415 {
416   int i, j, cc, cl;
417   char *reset = 0, *fmt = 0;
418 
419   if (!TT.new_line_format && c!=' ' && FLAG(color)) {
420     printf("\e[%dm", 31+(c=='+'));
421     reset = "\e[0m";
422   }
423 
424   for (i = a; i <= b; i++) {
425     if (fseek(fp, off_set[i - 1], SEEK_SET)) perror_exit("fseek failed");
426     if (TT.new_line_format) {
427       if (c == '+') fmt = TT.new_line_format;
428       else if (c == '-') fmt = TT.old_line_format;
429       else fmt = TT.unchanged_line_format;
430       while (*fmt) {
431         if (*fmt == '%') {
432           fmt++;
433           char f = *fmt++;
434           if (f == '%') putchar('%');
435           else if (f == 'l' || f == 'L') {
436             for (j = 0; j <  (off_set[i] - off_set[i - 1]); j++) {
437               cc = fgetc(fp);
438               if (cc == EOF) break;
439               if (cc != '\n' || f == 'L') putchar(cc);
440             }
441           } else error_exit("Unrecognized format specifier %%%c", f);
442         } else putchar(*fmt++);
443       }
444       continue;
445     }
446     putchar(c);
447     if (FLAG(T)) putchar('\t');
448     for (j = 0, cl = 0; j <  (off_set[i] - off_set[i - 1]); j++) {
449       cc = fgetc(fp);
450       if (cc == EOF) {
451         printf("%s\n\\ No newline at end of file\n", reset ? : "");
452         return;
453       }
454       if ((cc == '\t') && FLAG(t)) do putchar(' '); while (++cl & 7);
455       else {
456         putchar(cc); //xputc has calls to fflush, it hurts performance badly.
457         cl++;
458       }
459     }
460   }
461   if (reset) xputsn(reset);
462 }
463 
concat_file_path(char * path,char * default_path)464 static char *concat_file_path(char *path, char *default_path)
465 {
466   char *final_path;
467 
468   if ('/' == path[strlen(path) - 1]) {
469     while (*default_path == '/') ++default_path;
470     final_path = xmprintf("%s%s", path, default_path);
471   }
472   else if (*default_path != '/')
473     final_path = xmprintf("%s/%s", path, default_path);
474   else final_path = xmprintf("%s%s", path, default_path);
475   return final_path;
476 }
477 
skip(struct dirtree * node)478 static int skip(struct dirtree *node)
479 {
480   int len = strlen(toys.optargs[TT.dir_num]), ret = 0;
481   char *tmp = NULL, *ptr, *f_path = dirtree_path(node, NULL);
482   struct stat st;
483 
484   ptr = f_path;
485   ptr += len;
486   if (ptr[0]) {
487     tmp = concat_file_path(toys.optargs[1 - TT.dir_num], ptr);
488     if (tmp && !stat(tmp, &st)) ret = 0; //it is there on other side
489     else ret = 1; //not present on other side.
490   }
491   free(f_path);
492   if (tmp) free(tmp);
493   return ret; //add otherwise
494 }
495 
add_to_list(struct dirtree * node)496 static void add_to_list(struct dirtree *node)
497 {
498   char *full_path;
499 
500   TT.dir[TT.dir_num].list = xrealloc(TT.dir[TT.dir_num].list,
501       (TT.size + 1)*sizeof(char*));
502   TT.size++;
503   full_path = dirtree_path(node, NULL);
504   TT.dir[TT.dir_num].list[TT.size - 1] = full_path;
505 }
506 
list_dir(struct dirtree * node)507 static int list_dir(struct dirtree *node)
508 {
509   int ret = 0;
510 
511   if (!dirtree_notdotdot(node)) return 0;
512 
513   if (S_ISDIR(node->st.st_mode) && !node->parent) { //add root dirs.
514     add_to_list(node);
515     return (DIRTREE_RECURSE|DIRTREE_SYMFOLLOW);
516   }
517 
518   if (S_ISDIR(node->st.st_mode) && FLAG(r)) {
519     if (!FLAG(N)) ret = skip(node);
520     if (!ret) return DIRTREE_RECURSE|DIRTREE_SYMFOLLOW;
521     else {
522       add_to_list(node); //only at one side.
523       return 0;
524     }
525   } else {
526     add_to_list(node);
527     return S_ISDIR(node->st.st_mode) ? 0 : (DIRTREE_RECURSE|DIRTREE_SYMFOLLOW);
528   }
529 }
530 
cmp(void * p1,void * p2)531 static int cmp(void *p1, void *p2)
532 {
533    return strcmp(*(char **)p1, *(char **)p2);
534 }
535 
536 // quote and escape filenames that have awkward characters
quote_filename(char * filename)537 char *quote_filename(char *filename)
538 {
539   char *to = "abfnrtv\"\\", *from = "\a\b\f\n\r\t\v\"\\", *s, *t=0, *u;
540   int len, quote = 0;
541 
542   for (;;) {
543     // measure escapes on first pass, write on second
544     len = 0;
545     for (s = filename; *s; s++) {
546       if ((u = strchr(from, *s))) {
547         if (t) t[len] = '\\', t[len+1] = to[u-from];
548         len += 2;
549       } else if (*s<0x20 || *s>=0x80)
550         len += snprintf(t+len, 5*!!t, "\\%.3o", *s);
551       else {
552         if (t) t[len] = *s;
553         len++;
554       }
555     }
556     if (t) {
557       if (quote) t[len++] = '"';
558       t[len] = 0;
559 
560       return t-quote;
561     }
562 
563     // construct the new string
564     quote = strlen(filename)!=len || strchr(filename, ' ');
565     t = xmalloc(len+1+2*quote);
566     if (quote) *t++ = '"';
567   }
568 }
569 
show_label(char * prefix,char * filename,struct stat * sb)570 static void show_label(char *prefix, char *filename, struct stat *sb)
571 {
572   char date[36];
573   char *quoted_file;
574 
575   quoted_file = quote_filename(filename);
576   printf("%s %s\t%s\n", prefix, quoted_file,
577     format_iso_time(date, sizeof(date), &sb->st_mtim));
578   free(quoted_file);
579 }
580 
do_diff(char ** files)581 static void do_diff(char **files)
582 {
583   long i = 1, size = 1, x = 0, change = 0, ignore_white,
584    start1, end1, start2, end2;
585   struct diff *d;
586   struct arg_list *llist = TT.L;
587   int *J;
588   regex_t reg;
589 
590   TT.offset[0] = TT.offset[1] = NULL;
591   J = diff(files);
592 
593   if (!J) return; //No need to compare, have to status only
594 
595   if (TT.F) {
596     xregcomp(&reg, TT.F, 0);
597   }
598 
599   d = xzalloc(size *sizeof(struct diff));
600   do {
601     ignore_white = 0;
602     for (d[x].a = i; d[x].a<=TT.file[0].len; d[x].a++) {
603       if (J[d[x].a] != (J[d[x].a - 1] + 1)) break;
604       else continue;
605     }
606     d[x].c = (J[d[x].a - 1] + 1);
607 
608     for (d[x].b = (d[x].a - 1); d[x].b<=TT.file[0].len; d[x].b++) {
609       if (J[d[x].b + 1]) break;
610       else continue;
611     }
612     d[x].d = (J[d[x].b + 1] - 1);
613 
614     if (FLAG(B)) {
615       if (d[x].a <= d[x].b) {
616         if ((TT.offset[0][d[x].b] - TT.offset[0][d[x].a - 1])
617             == (d[x].b - d[x].a + 1))
618           ignore_white = 1;
619       } else if (d[x].c <= d[x].d){
620         if ((TT.offset[1][d[x].d] - TT.offset[1][d[x].c - 1])
621             == (d[x].d - d[x].c + 1))
622           ignore_white = 1;
623       }
624     }
625 
626     //is we have diff ?   TODO: lolcat?
627     if ((d[x].a <= d[x].b || d[x].c <= d[x].d) && !ignore_white) change = 1;
628 
629     if (!ignore_white) d = xrealloc(d, (x + 2) *sizeof(struct diff));
630     i = d[x].b + 1;
631     if (i>TT.file[0].len) break;
632     J[d[x].b] = d[x].d;
633     if (!ignore_white) x++;
634   } while (i<=TT.file[0].len);
635 
636   i = x+1;
637   TT.differ = change; //update status, may change bcoz of -w etc.
638 
639   if (!FLAG(q) && change) {
640     if (!TT.new_line_format) {
641       if (FLAG(color)) printf("\e[1m");
642       if (FLAG(L)) printf("--- %s\n", llist->arg);
643       else show_label("---", files[0], &(TT).st[0]);
644       if (!FLAG(L) || !llist->next) show_label("+++", files[1], &(TT).st[1]);
645       else {
646         while (llist->next) llist = llist->next;
647         printf("+++ %s\n", llist->arg);
648       }
649       if (FLAG(color)) printf("\e[0m");
650     }
651 
652     struct diff *t, *ptr1 = d, *ptr2 = d;
653     while (i) {
654       long a,b;
655 
656       // trim context to file len.
657       if (TT.new_line_format || TT.U>TT.file[0].len) TT.U = TT.file[0].len;
658       if (ptr1->b < ptr1->a && ptr1->d < ptr1->c) {
659         i--;
660         continue;
661       }
662       //Handle the context stuff
663       a =  ptr1->a;
664       b = minof(TT.file[0].len, ptr1->b);
665       if (i == x + 1) ptr1->suff = maxof(1, a-TT.U);
666       else if (ptr1[-1].prev >= ptr1->a-TT.U) ptr1->suff = ptr1[-1].prev+1;
667       else ptr1->suff =  ptr1->a-TT.U;
668 calc_ct:
669       if (i > 1) {
670         if ((ptr2->b + TT.U) >= (ptr2  + 1)->a) {
671           ptr2++;
672           i--;
673           goto calc_ct;
674         } else ptr2->prev = ptr2->b + TT.U;
675       } else ptr2->prev = ptr2->b;
676       start1 = (ptr2->prev - ptr1->suff + 1);
677       end1 = (start1 == 1) ? -1 : start1;
678       start2 = maxof(1, ptr1->c - (ptr1->a - ptr1->suff));
679       end2 = ptr2->prev - ptr2->b + ptr2->d;
680 
681       if (!TT.new_line_format) {
682         if (FLAG(color)) printf("\e[36m");
683         printf("@@ -%ld", start1 ? ptr1->suff: (ptr1->suff -1));
684         if (end1 != -1) printf(",%ld ", ptr2->prev-ptr1->suff + 1);
685         else putchar(' ');
686 
687         printf("+%ld", (end2 - start2 + 1) ? start2: (start2 -1));
688         if ((end2 - start2 +1) != 1) printf(",%ld ", (end2 - start2 +1));
689         else putchar(' ');
690         printf("@@");
691         if (FLAG(color)) printf("\e[0m");
692         if (TT.F) {
693           print_line_matching_regex(ptr1->suff-1, &reg, TT.offset[0], TT.file[0].fp);
694         }
695         putchar('\n');
696       }
697 
698       for (t = ptr1; t <= ptr2; t++) {
699         if (t==ptr1) print_diff(t->suff, t->a-1, ' ', TT.offset[0], TT.file[0].fp);
700         print_diff(t->a, t->b, '-', TT.offset[0], TT.file[0].fp);
701         print_diff(t->c, t->d, '+', TT.offset[1], TT.file[1].fp);
702         if (t == ptr2)
703           print_diff(t->b+1, (t)->prev, ' ', TT.offset[0], TT.file[0].fp);
704         else print_diff(t->b+1, (t+1)->a-1, ' ', TT.offset[0], TT.file[0].fp);
705       }
706       ptr2++;
707       ptr1 = ptr2;
708       i--;
709     } //end of while
710   } //End of !FLAG_q
711   free(d);
712   free(J);
713   free(TT.offset[0]);
714   free(TT.offset[1]);
715 }
716 
show_status(char ** files)717 static void show_status(char **files)
718 {
719   if (TT.differ==2) return; // TODO: needed?
720   if (TT.differ ? FLAG(q) || TT.is_binary : FLAG(s))
721     printf("Files %s and %s %s\n", files[0], files[1],
722       TT.differ ? "differ" : "are identical");
723 }
724 
create_empty_entry(int l,int r,int j)725 static void create_empty_entry(int l , int r, int j)
726 {
727   struct stat st[2];
728   char *f[2], *path[2];
729   int i;
730 
731   for (i = 0; i < 2; i++) {
732     if (j) {
733       if (!FLAG(N) || i!=(j>0)) continue;
734       path[!i] = concat_file_path(TT.dir[!i].list[0],
735         TT.dir[i].list[i ? r : l]+TT.len[i]);
736       f[!i] = "/dev/null";
737     }
738     path[i] = f[i] = TT.dir[i].list[i ? r : l];
739     stat(f[i], st+i);
740     if (j) st[!i] = st[i];
741   }
742 
743   for (i = 0; i<2; i++) {
744     if (!S_ISREG(st[i].st_mode) && !S_ISDIR(st[i].st_mode)) {
745       printf("File %s is not a regular file or directory and was skipped\n",
746         path[i]);
747       break;
748     }
749   }
750 
751   if (i != 2);
752   else if (S_ISDIR(st[0].st_mode) && S_ISDIR(st[1].st_mode))
753     printf("Common subdirectories: %s and %s\n", path[0], path[1]);
754   else if ((i = S_ISDIR(st[0].st_mode)) != S_ISDIR(st[1].st_mode)) {
755     char *fidir[] = {"directory", "regular file"};
756     printf("File %s is a %s while file %s is a %s\n",
757       path[0], fidir[!i], path[1], fidir[i]);
758   } else {
759     do_diff(f);
760     show_status(path);
761     if (TT.file[0].fp) fclose(TT.file[0].fp);
762     if (TT.file[1].fp) fclose(TT.file[1].fp);
763   }
764 
765   if (FLAG(N) && j) free(path[j<=0]);
766 }
767 
diff_dir(int * start)768 static void diff_dir(int *start)
769 {
770   int l, r, j = 0;
771 
772   l = start[0]; //left side file start
773   r = start[1]; //right side file start
774   while (l < TT.dir[0].nr_elm && r < TT.dir[1].nr_elm) {
775     if ((j = strcmp (TT.dir[0].list[l]+TT.len[0],
776             (TT.dir[1].list[r]+TT.len[1]))) && !FLAG(N)) {
777       if (j > 0) {
778         printf("Only in %s: %s\n", TT.dir[1].list[0], TT.dir[1].list[r]+TT.len[1]);
779         free(TT.dir[1].list[r++]);
780       } else {
781         printf ("Only in %s: %s\n", TT.dir[0].list[0], TT.dir[0].list[l]+TT.len[0]);
782         free(TT.dir[0].list[l++]);
783       }
784       TT.differ = 1;
785     } else {
786       create_empty_entry(l, r, j); //create non empty dirs/files if -N.
787       if (j>=0) free(TT.dir[1].list[r++]);
788       if (j<=0) free(TT.dir[0].list[l++]);
789     }
790   }
791 
792   if (l == TT.dir[0].nr_elm) {
793     while (r<TT.dir[1].nr_elm) {
794       if (!FLAG(N)) {
795         printf ("Only in %s: %s\n", TT.dir[1].list[0], TT.dir[1].list[r]+TT.len[1]);
796         TT.differ = 1;
797       } else create_empty_entry(l, r, 1);
798       free(TT.dir[1].list[r++]);
799     }
800   } else if (r == TT.dir[1].nr_elm) {
801     while (l<TT.dir[0].nr_elm) {
802       if (!FLAG(N)) {
803         printf ("Only in %s: %s\n", TT.dir[0].list[0], TT.dir[0].list[l]+TT.len[0]);
804         TT.differ = 1;
805       } else create_empty_entry(l, r, -1);
806       free(TT.dir[0].list[l++]);
807     }
808   }
809   free(TT.dir[0].list[0]); //we are done, free root nodes too
810   free(TT.dir[0].list);
811   free(TT.dir[1].list[0]);
812   free(TT.dir[1].list);
813 }
814 
diff_main(void)815 void diff_main(void)
816 {
817   int j = 0, k = 1, start[2] = {1, 1};
818   char **files = toys.optargs;
819 
820   toys.exitval = 2;
821   if (FLAG(color) && !isatty(1)) toys.optflags ^= FLAG_color;
822 
823   for (j = 0; j < 2; j++) {
824     if (IS_STDIN(files[j])) fstat(0, &TT.st[j]);
825     else xstat(files[j], &TT.st[j]);
826   }
827 
828   if (S_ISDIR(TT.st[0].st_mode) != S_ISDIR(TT.st[1].st_mode))
829     error_exit("can't compare directory to non-directory");
830 
831   if (TT.unchanged_line_format || TT.old_line_format || TT.new_line_format) {
832     if (S_ISDIR(TT.st[0].st_mode) && S_ISDIR(TT.st[1].st_mode))
833       error_exit("can't use line format with directories");
834     if (!TT.unchanged_line_format) TT.unchanged_line_format = "%l\n";
835     if (!TT.old_line_format) TT.old_line_format = "%l\n";
836     if (!TT.new_line_format) TT.new_line_format = "%l\n";
837   }
838 
839   if (same_file(TT.st, TT.st+1)) {
840     toys.exitval = 0;
841     return show_status(files);
842   }
843 
844   if (S_ISDIR(TT.st[0].st_mode) && S_ISDIR(TT.st[1].st_mode)) {
845     for (j = 0; j < 2; j++) {
846       memset(TT.dir+j, 0, sizeof(*TT.dir));
847       dirtree_flagread(files[j], DIRTREE_SYMFOLLOW, list_dir);
848       TT.dir[j].nr_elm = TT.size; //size updated in list_dir
849       qsort(&TT.dir[j].list[1], TT.size-1, sizeof(char *), (void *)cmp);
850 
851       TT.len[j] = strlen(TT.dir[j].list[0]); //calc root node len
852       TT.len[j] += TT.dir[j].list[0][TT.len[j]-1] != '/';
853 
854       if (FLAG(S)) {
855         while (k<TT.size && strcmp(TT.dir[j].list[k]+TT.len[j], TT.S)<0) {
856           start[j]++;
857           k++;
858         }
859       }
860       TT.dir_num++;
861       TT.size = 0;
862       k = 1;
863     }
864     diff_dir(start);
865   } else {
866     if (S_ISDIR(TT.st[0].st_mode) || S_ISDIR(TT.st[1].st_mode)) {
867       int d = S_ISDIR(TT.st[0].st_mode);
868       char *slash = strrchr(files[d], '/');
869 
870       files[!d] = concat_file_path(files[!d], slash ? slash+1 : files[d]);
871       if (stat(files[!d], &TT.st[!d])) perror_exit("%s", files[!d]);
872     }
873     do_diff(files);
874     show_status(files);
875     if (TT.file[0].fp) fclose(TT.file[0].fp);
876     if (TT.file[1].fp) fclose(TT.file[1].fp);
877   }
878   toys.exitval = TT.differ; //exit status will be the status
879 }
880