1 /*
2 * Common text filter routines for CUPS.
3 *
4 * Copyright 2007-2011 by Apple Inc.
5 * Copyright 1997-2007 by Easy Software Products.
6 *
7 * These coded instructions, statements, and computer programs are the
8 * property of Apple Inc. and are protected by Federal copyright
9 * law. Distribution and use rights are outlined in the file "COPYING"
10 * which should have been included with this file.
11 *
12 * Contents:
13 *
14 * TextMain() - Standard main entry for text filters.
15 * compare_keywords() - Compare two C/C++ keywords.
16 * getutf8() - Get a UTF-8 encoded wide character...
17 */
18
19 /*
20 * Include necessary headers...
21 */
22
23 #include "textcommon.h"
24 #include <limits.h>
25
26
27 /*
28 * Globals...
29 */
30
31 int WrapLines = 1, /* Wrap text in lines */
32 SizeLines = 60, /* Number of lines on a page */
33 SizeColumns = 80, /* Number of columns on a line */
34 PageColumns = 1, /* Number of columns on a page */
35 ColumnGutter = 0, /* Number of characters between text columns */
36 ColumnWidth = 80, /* Width of each column */
37 PrettyPrint = 0, /* Do pretty code formatting */
38 Copies = 1; /* Number of copies */
39 lchar_t **Page = NULL; /* Page characters */
40 int NumPages = 0; /* Number of pages in document */
41 float CharsPerInch = 10; /* Number of character columns per inch */
42 float LinesPerInch = 6; /* Number of lines per inch */
43 int NumKeywords = 0; /* Number of known keywords */
44 char **Keywords = NULL; /* List of known keywords */
45
46
47 /*
48 * Local globals...
49 */
50
51 static char *code_keywords[] = /* List of known C/C++ keywords... */
52 {
53 "and",
54 "and_eq",
55 "asm",
56 "auto",
57 "bitand",
58 "bitor",
59 "bool",
60 "break",
61 "case",
62 "catch",
63 "char",
64 "class",
65 "compl",
66 "const",
67 "const_cast",
68 "continue",
69 "default",
70 "delete",
71 "do",
72 "double",
73 "dynamic_cast",
74 "else",
75 "enum",
76 "explicit",
77 "extern",
78 "false",
79 "float",
80 "for",
81 "friend",
82 "goto",
83 "if",
84 "inline",
85 "int",
86 "long",
87 "mutable",
88 "namespace",
89 "new",
90 "not",
91 "not_eq",
92 "operator",
93 "or",
94 "or_eq",
95 "private",
96 "protected",
97 "public",
98 "register",
99 "reinterpret_cast",
100 "return",
101 "short",
102 "signed",
103 "sizeof",
104 "static",
105 "static_cast",
106 "struct",
107 "switch",
108 "template",
109 "this",
110 "throw",
111 "true",
112 "try",
113 "typedef",
114 "typename",
115 "union",
116 "unsigned",
117 "virtual",
118 "void",
119 "volatile",
120 "while",
121 "xor",
122 "xor_eq"
123 },
124 *sh_keywords[] = /* List of known Boure/Korn/zsh/bash keywords... */
125 {
126 "alias",
127 "bg",
128 "break",
129 "case",
130 "cd",
131 "command",
132 "continue",
133 "do",
134 "done",
135 "echo",
136 "elif",
137 "else",
138 "esac",
139 "eval",
140 "exec",
141 "exit",
142 "export",
143 "fc",
144 "fg",
145 "fi",
146 "for",
147 "function",
148 "getopts",
149 "if",
150 "in",
151 "jobs",
152 "kill",
153 "let",
154 "limit",
155 "newgrp",
156 "print",
157 "pwd",
158 "read",
159 "readonly",
160 "return",
161 "select",
162 "set",
163 "shift",
164 "test",
165 "then",
166 "time",
167 "times",
168 "trap",
169 "typeset",
170 "ulimit",
171 "umask",
172 "unalias",
173 "unlimit",
174 "unset",
175 "until",
176 "wait",
177 "whence"
178 "while",
179 },
180 *csh_keywords[] = /* List of known csh/tcsh keywords... */
181 {
182 "alias",
183 "aliases",
184 "bg",
185 "bindkey",
186 "break",
187 "breaksw",
188 "builtins",
189 "case",
190 "cd",
191 "chdir",
192 "complete",
193 "continue",
194 "default",
195 "dirs",
196 "echo",
197 "echotc",
198 "else",
199 "end",
200 "endif",
201 "eval",
202 "exec",
203 "exit",
204 "fg",
205 "foreach",
206 "glob",
207 "goto",
208 "history",
209 "if",
210 "jobs",
211 "kill",
212 "limit",
213 "login",
214 "logout",
215 "ls",
216 "nice",
217 "nohup",
218 "notify",
219 "onintr",
220 "popd",
221 "pushd",
222 "pwd",
223 "rehash",
224 "repeat",
225 "set",
226 "setenv",
227 "settc",
228 "shift",
229 "source",
230 "stop",
231 "suspend",
232 "switch",
233 "telltc",
234 "then",
235 "time",
236 "umask",
237 "unalias",
238 "unbindkey",
239 "unhash",
240 "unlimit",
241 "unset",
242 "unsetenv",
243 "wait",
244 "where",
245 "which",
246 "while"
247 },
248 *perl_keywords[] = /* List of known perl keywords... */
249 {
250 "abs",
251 "accept",
252 "alarm",
253 "and",
254 "atan2",
255 "bind",
256 "binmode",
257 "bless",
258 "caller",
259 "chdir",
260 "chmod",
261 "chomp",
262 "chop",
263 "chown",
264 "chr",
265 "chroot",
266 "closdir",
267 "close",
268 "connect",
269 "continue",
270 "cos",
271 "crypt",
272 "dbmclose",
273 "dbmopen",
274 "defined",
275 "delete",
276 "die",
277 "do",
278 "dump",
279 "each",
280 "else",
281 "elsif",
282 "endgrent",
283 "endhostent",
284 "endnetent",
285 "endprotoent",
286 "endpwent",
287 "endservent",
288 "eof",
289 "eval",
290 "exec",
291 "exists",
292 "exit",
293 "exp",
294 "fcntl",
295 "fileno",
296 "flock",
297 "for",
298 "foreach",
299 "fork",
300 "format",
301 "formline",
302 "getc",
303 "getgrent",
304 "getgrgid",
305 "getgrnam",
306 "gethostbyaddr",
307 "gethostbyname",
308 "gethostent",
309 "getlogin",
310 "getnetbyaddr",
311 "getnetbyname",
312 "getnetent",
313 "getpeername",
314 "getpgrp",
315 "getppid",
316 "getpriority",
317 "getprotobyname",
318 "getprotobynumber",
319 "getprotoent",
320 "getpwent",
321 "getpwnam",
322 "getpwuid",
323 "getservbyname",
324 "getservbyport",
325 "getservent",
326 "getsockname",
327 "getsockopt",
328 "glob",
329 "gmtime",
330 "goto",
331 "grep",
332 "hex",
333 "if",
334 "import",
335 "index",
336 "int",
337 "ioctl",
338 "join",
339 "keys",
340 "kill",
341 "last",
342 "lc",
343 "lcfirst",
344 "length",
345 "link",
346 "listen",
347 "local",
348 "localtime",
349 "log",
350 "lstat",
351 "map",
352 "mkdir",
353 "msgctl",
354 "msgget",
355 "msgrcv",
356 "msgsend",
357 "my",
358 "next",
359 "no",
360 "not",
361 "oct",
362 "open",
363 "opendir",
364 "or",
365 "ord",
366 "pack",
367 "package",
368 "pipe",
369 "pop",
370 "pos",
371 "print",
372 "printf",
373 "push",
374 "quotemeta",
375 "rand",
376 "read",
377 "readdir",
378 "readlink",
379 "recv",
380 "redo",
381 "ref",
382 "rename",
383 "require",
384 "reset",
385 "return",
386 "reverse",
387 "rewinddir",
388 "rindex",
389 "rmdir",
390 "scalar",
391 "seek",
392 "seekdir",
393 "select",
394 "semctl",
395 "semget",
396 "semop",
397 "send",
398 "setgrent",
399 "sethostent",
400 "setnetent",
401 "setpgrp",
402 "setpriority",
403 "setprotoent",
404 "setpwent",
405 "setservent",
406 "setsockopt",
407 "shift",
408 "shmctl",
409 "shmget",
410 "shmread",
411 "shmwrite",
412 "shutdown",
413 "sin",
414 "sleep",
415 "socket",
416 "socketpair",
417 "sort",
418 "splice",
419 "split",
420 "sprintf",
421 "sqrt",
422 "srand",
423 "stat",
424 "study",
425 "sub",
426 "substr",
427 "symlink",
428 "syscall",
429 "sysread",
430 "sysseek",
431 "system",
432 "syswrite",
433 "tell",
434 "telldir",
435 "tie",
436 "tied",
437 "time",
438 "times"
439 "times",
440 "truncate",
441 "uc",
442 "ucfirst",
443 "umask",
444 "undef",
445 "unless",
446 "unlink",
447 "unpack",
448 "unshift",
449 "untie",
450 "until",
451 "use",
452 "utime",
453 "values",
454 "vec",
455 "wait",
456 "waitpid",
457 "wantarray",
458 "warn",
459 "while",
460 "write"
461 };
462
463
464 /*
465 * Local functions...
466 */
467
468 static int compare_keywords(const void *, const void *);
469 static int getutf8(FILE *fp);
470
471
472 /*
473 * 'TextMain()' - Standard main entry for text filters.
474 */
475
476 int /* O - Exit status */
TextMain(const char * name,int argc,char * argv[])477 TextMain(const char *name, /* I - Name of filter */
478 int argc, /* I - Number of command-line arguments */
479 char *argv[]) /* I - Command-line arguments */
480 {
481 FILE *fp; /* Print file */
482 ppd_file_t *ppd; /* PPD file */
483 int i, /* Looping var */
484 empty, /* Is the input empty? */
485 ch, /* Current char from file */
486 lastch, /* Previous char from file */
487 attr, /* Current attribute */
488 line, /* Current line */
489 column, /* Current column */
490 page_column; /* Current page column */
491 int num_options; /* Number of print options */
492 cups_option_t *options; /* Print options */
493 const char *val; /* Option value */
494 char keyword[64], /* Keyword string */
495 *keyptr; /* Pointer into string */
496 int keycol; /* Column where keyword starts */
497 enum {NLstyl=-1, NoCmnt, SNTXstyl}
498 cmntState; /* Inside a comment */
499 enum {StrBeg=-1, NoStr, StrEnd}
500 strState; /* Inside a dbl-quoted string */
501
502
503 /*
504 * Make sure status messages are not buffered...
505 */
506
507 setbuf(stderr, NULL);
508
509 /*
510 * Check command-line...
511 */
512
513 if (argc < 6 || argc > 7)
514 {
515 fprintf(stderr, "Usage: %s job-id user title copies options [file]\n",
516 name);
517 return (1);
518 }
519
520 /*
521 * If we have 7 arguments, print the file named on the command-line.
522 * Otherwise, send stdin instead...
523 */
524
525 if (argc == 6)
526 fp = stdin;
527 else
528 {
529 /*
530 * Try to open the print file...
531 */
532
533 if ((fp = fopen(argv[6], "rb")) == NULL)
534 {
535 perror("DEBUG: unable to open print file - ");
536 return (1);
537 }
538 }
539
540 /*
541 * Process command-line options and write the prolog...
542 */
543
544 options = NULL;
545 num_options = cupsParseOptions(argv[5], 0, &options);
546
547 if ((val = cupsGetOption("prettyprint", num_options, options)) != NULL &&
548 strcasecmp(val, "no") && strcasecmp(val, "off") &&
549 strcasecmp(val, "false"))
550 {
551 PageLeft = 72.0f;
552 PageRight = PageWidth - 36.0f;
553 PageBottom = PageBottom > 36.0f ? PageBottom : 36.0f;
554 PageTop = PageLength - 36.0f;
555 CharsPerInch = 12;
556 LinesPerInch = 8;
557
558 if ((val = getenv("CONTENT_TYPE")) == NULL)
559 {
560 PrettyPrint = PRETTY_PLAIN;
561 NumKeywords = 0;
562 Keywords = NULL;
563 }
564 else if (strcasecmp(val, "application/x-cshell") == 0)
565 {
566 PrettyPrint = PRETTY_SHELL;
567 NumKeywords = sizeof(csh_keywords) / sizeof(csh_keywords[0]);
568 Keywords = csh_keywords;
569 }
570 else if (strcasecmp(val, "application/x-csource") == 0)
571 {
572 PrettyPrint = PRETTY_CODE;
573 NumKeywords = sizeof(code_keywords) / sizeof(code_keywords[0]);
574 Keywords = code_keywords;
575 }
576 else if (strcasecmp(val, "application/x-perl") == 0)
577 {
578 PrettyPrint = PRETTY_PERL;
579 NumKeywords = sizeof(perl_keywords) / sizeof(perl_keywords[0]);
580 Keywords = perl_keywords;
581 }
582 else if (strcasecmp(val, "application/x-shell") == 0)
583 {
584 PrettyPrint = PRETTY_SHELL;
585 NumKeywords = sizeof(sh_keywords) / sizeof(sh_keywords[0]);
586 Keywords = sh_keywords;
587 }
588 else
589 {
590 PrettyPrint = PRETTY_PLAIN;
591 NumKeywords = 0;
592 Keywords = NULL;
593 }
594 }
595
596 ppd = SetCommonOptions(num_options, options, 1);
597
598 if ((val = cupsGetOption("wrap", num_options, options)) == NULL)
599 WrapLines = 1;
600 else
601 WrapLines = !strcasecmp(val, "true") || !strcasecmp(val, "on") ||
602 !strcasecmp(val, "yes");
603
604 if ((val = cupsGetOption("columns", num_options, options)) != NULL)
605 {
606 PageColumns = atoi(val);
607
608 if (PageColumns < 1)
609 {
610 if (fp != stdin)
611 fclose(fp);
612 fprintf(stderr, "ERROR: Bad columns value %d.\n", PageColumns);
613 return (1);
614 }
615 }
616
617 if ((val = cupsGetOption("cpi", num_options, options)) != NULL)
618 {
619 CharsPerInch = atof(val);
620
621 if (CharsPerInch <= 0.0)
622 {
623 if (fp != stdin)
624 fclose(fp);
625 fprintf(stderr, "ERROR: Bad cpi value %f.\n", CharsPerInch);
626 return (1);
627 }
628 }
629
630 if ((val = cupsGetOption("lpi", num_options, options)) != NULL)
631 {
632 LinesPerInch = atof(val);
633
634 if (LinesPerInch <= 0.0)
635 {
636 if (fp != stdin)
637 fclose(fp);
638 fprintf(stderr, "ERROR: Bad lpi value %f.", LinesPerInch);
639 return (1);
640 }
641 }
642
643 if (PrettyPrint)
644 PageTop -= 216.0f / LinesPerInch;
645
646 /*
647 * Allocate memory for the page...
648 */
649
650 SizeColumns = (PageRight - PageLeft) / 72.0 * CharsPerInch;
651 SizeLines = (PageTop - PageBottom) / 72.0 * LinesPerInch;
652
653 /*
654 * Enforce minimum size...
655 */
656 if (SizeColumns < 1)
657 SizeColumns = 1;
658 if (SizeLines < 1)
659 SizeLines = 1;
660
661 if (SizeLines >= INT_MAX / SizeColumns / sizeof(lchar_t))
662 {
663 fprintf(stderr, "ERROR: bad page size\n");
664 exit(1);
665 }
666
667 Page = calloc(sizeof(lchar_t *), SizeLines);
668 if (!Page)
669 {
670 fprintf(stderr, "ERROR: cannot allocate memory for page\n");
671 exit(1);
672 }
673
674 Page[0] = calloc(sizeof(lchar_t), SizeColumns * SizeLines);
675 if (!Page[0])
676 {
677 free(Page);
678 fprintf(stderr, "ERROR: cannot allocate memory for page\n");
679 exit(1);
680 }
681
682 for (i = 1; i < SizeLines; i ++)
683 Page[i] = Page[0] + i * SizeColumns;
684
685 Copies = atoi(argv[4]);
686
687 /*
688 * Read text from the specified source and print it...
689 */
690
691 empty = 1;
692 lastch = 0;
693 column = 0;
694 line = 0;
695 page_column = 0;
696 attr = 0;
697 keyptr = keyword;
698 keycol = 0;
699 cmntState = NoCmnt;
700 strState = NoStr;
701
702 while ((ch = getutf8(fp)) >= 0)
703 {
704 if (empty)
705 {
706 /* Found the first valid character, write file header */
707 empty = 0;
708 WriteProlog(argv[3], argv[2], getenv("CLASSIFICATION"),
709 cupsGetOption("page-label", num_options, options), ppd);
710 }
711
712 /*
713 * Control codes:
714 *
715 * BS Backspace (0x08)
716 * HT Horizontal tab; next 8th column (0x09)
717 * LF Line feed; forward full line (0x0a)
718 * VT Vertical tab; reverse full line (0x0b)
719 * FF Form feed (0x0c)
720 * CR Carriage return (0x0d)
721 * ESC 7 Reverse full line (0x1b 0x37)
722 * ESC 8 Reverse half line (0x1b 0x38)
723 * ESC 9 Forward half line (0x1b 0x39)
724 */
725
726 switch (ch)
727 {
728 case 0x08 : /* BS - backspace for boldface & underline */
729 if (column > 0)
730 column --;
731
732 keyptr = keyword;
733 keycol = column;
734 break;
735
736 case 0x09 : /* HT - tab to next 8th column */
737 if (PrettyPrint && keyptr > keyword)
738 {
739 *keyptr = '\0';
740 keyptr = keyword;
741
742 if (bsearch(&keyptr, Keywords, NumKeywords, sizeof(char *),
743 compare_keywords))
744 {
745 /*
746 * Put keywords in boldface...
747 */
748
749 i = page_column * (ColumnWidth + ColumnGutter);
750
751 while (keycol < column)
752 {
753 Page[line][keycol + i].attr |= ATTR_BOLD;
754 keycol ++;
755 }
756 }
757 }
758
759 column = (column + 8) & ~7;
760
761 if (column >= ColumnWidth && WrapLines)
762 { /* Wrap text to margins */
763 line ++;
764 column = 0;
765
766 if (line >= SizeLines)
767 {
768 page_column ++;
769 line = 0;
770
771 if (page_column >= PageColumns)
772 {
773 WritePage();
774 page_column = 0;
775 }
776 }
777 }
778
779 keycol = column;
780
781 attr &= ~ATTR_BOLD;
782 break;
783
784 case 0x0d : /* CR */
785 #ifndef __APPLE__
786 /*
787 * All but MacOS/Darwin treat CR as was intended by ANSI
788 * folks, namely to move to column 0/1. Some programs still
789 * use this to do boldfacing and underlining...
790 */
791
792 column = 0;
793 break;
794 #else
795 /*
796 * MacOS/Darwin still need to treat CR as a line ending.
797 */
798
799 {
800 int nextch;
801 if ((nextch = getc(fp)) != 0x0a)
802 ungetc(nextch, fp);
803 else
804 ch = nextch;
805 }
806 #endif /* !__APPLE__ */
807
808 case 0x0a : /* LF - output current line */
809 if (PrettyPrint && keyptr > keyword)
810 {
811 *keyptr = '\0';
812 keyptr = keyword;
813
814 if (bsearch(&keyptr, Keywords, NumKeywords, sizeof(char *),
815 compare_keywords))
816 {
817 /*
818 * Put keywords in boldface...
819 */
820
821 i = page_column * (ColumnWidth + ColumnGutter);
822
823 while (keycol < column)
824 {
825 Page[line][keycol + i].attr |= ATTR_BOLD;
826 keycol ++;
827 }
828 }
829 }
830
831 line ++;
832 column = 0;
833 keycol = 0;
834
835 if (cmntState == NLstyl)
836 cmntState = NoCmnt;
837
838 if (!cmntState && !strState)
839 attr &= ~(ATTR_ITALIC | ATTR_BOLD | ATTR_RED | ATTR_GREEN | ATTR_BLUE);
840
841 if (line >= SizeLines)
842 {
843 page_column ++;
844 line = 0;
845
846 if (page_column >= PageColumns)
847 {
848 WritePage();
849 page_column = 0;
850 }
851 }
852 break;
853
854 case 0x0b : /* VT - move up 1 line */
855 if (line > 0)
856 line --;
857
858 keyptr = keyword;
859 keycol = column;
860
861 if (cmntState == NLstyl)
862 cmntState = NoCmnt;
863
864 if (!cmntState && !strState)
865 attr &= ~(ATTR_ITALIC | ATTR_BOLD | ATTR_RED | ATTR_GREEN | ATTR_BLUE);
866 break;
867
868 case 0x0c : /* FF - eject current page... */
869 if (PrettyPrint && keyptr > keyword)
870 {
871 *keyptr = '\0';
872 keyptr = keyword;
873
874 if (bsearch(&keyptr, Keywords, NumKeywords, sizeof(char *),
875 compare_keywords))
876 {
877 /*
878 * Put keywords in boldface...
879 */
880
881 i = page_column * (ColumnWidth + ColumnGutter);
882
883 while (keycol < column)
884 {
885 Page[line][keycol + i].attr |= ATTR_BOLD;
886 keycol ++;
887 }
888 }
889 }
890
891 page_column ++;
892 column = 0;
893 keycol = 0;
894 line = 0;
895
896 if (cmntState == NLstyl)
897 cmntState = NoCmnt;
898
899 if (!cmntState && !strState)
900 attr &= ~(ATTR_ITALIC | ATTR_BOLD | ATTR_RED | ATTR_GREEN | ATTR_BLUE);
901
902 if (page_column >= PageColumns)
903 {
904 WritePage();
905 page_column = 0;
906 }
907 break;
908
909 case 0x1b : /* Escape sequence */
910 ch = getutf8(fp);
911 if (ch == '7')
912 {
913 /*
914 * ESC 7 Reverse full line (0x1b 0x37)
915 */
916
917 if (line > 0)
918 line --;
919 }
920 else if (ch == '8')
921 {
922 /*
923 * ESC 8 Reverse half line (0x1b 0x38)
924 */
925
926 if ((attr & ATTR_RAISED) && line > 0)
927 {
928 attr &= ~ATTR_RAISED;
929 line --;
930 }
931 else if (attr & ATTR_LOWERED)
932 attr &= ~ATTR_LOWERED;
933 else
934 attr |= ATTR_RAISED;
935 }
936 else if (ch == '9')
937 {
938 /*
939 * ESC 9 Forward half line (0x1b 0x39)
940 */
941
942 if ((attr & ATTR_LOWERED) && line < (SizeLines - 1))
943 {
944 attr &= ~ATTR_LOWERED;
945 line ++;
946 }
947 else if (attr & ATTR_RAISED)
948 attr &= ~ATTR_RAISED;
949 else
950 attr |= ATTR_LOWERED;
951 }
952 break;
953
954 default : /* All others... */
955 if (ch < ' ')
956 break; /* Ignore other control chars */
957
958 if (PrettyPrint > PRETTY_PLAIN)
959 {
960 /*
961 * Do highlighting of C/C++ keywords, preprocessor commands,
962 * and comments...
963 */
964
965 if (ch == ' ' && (attr & ATTR_BOLD))
966 {
967 /*
968 * Stop bolding preprocessor command...
969 */
970
971 attr &= ~ATTR_BOLD;
972 }
973 else if (!(isalnum(ch & 255) || ch == '_') && keyptr > keyword)
974 {
975 /*
976 * Look for a keyword...
977 */
978
979 *keyptr = '\0';
980 keyptr = keyword;
981
982 if (bsearch(&keyptr, Keywords, NumKeywords, sizeof(char *),
983 compare_keywords))
984 {
985 /*
986 * Put keywords in boldface...
987 */
988
989 i = page_column * (ColumnWidth + ColumnGutter);
990
991 while (keycol < column)
992 {
993 Page[line][keycol + i].attr |= ATTR_BOLD;
994 keycol ++;
995 }
996 }
997 }
998
999 /*
1000 * Look for Syntax-transition Starts...
1001 */
1002 if (!cmntState && !strState)
1003 {
1004 if ((isalnum(ch & 255) || ch == '_'))
1005 {
1006 /*
1007 * Add characters to the current keyword (if they'll fit).
1008 */
1009
1010 if (keyptr == keyword)
1011 keycol = column;
1012
1013 if (keyptr < (keyword + sizeof(keyword) - 1))
1014 *keyptr++ = ch;
1015 }
1016 else if (ch == '\"' && lastch != '\\')
1017 {
1018 /*
1019 * Start a dbl-quote string constant...
1020 */
1021
1022 strState = StrBeg;
1023 attr = ATTR_BLUE;
1024 }
1025 else if (ch == '*' && lastch == '/' &&
1026 PrettyPrint != PRETTY_SHELL)
1027 {
1028 /*
1029 * Start a C-style comment...
1030 */
1031
1032 cmntState = SNTXstyl;
1033 attr = ATTR_ITALIC | ATTR_GREEN;
1034 }
1035 else if (ch == '/' && lastch == '/' &&
1036 PrettyPrint == PRETTY_CODE)
1037 {
1038 /*
1039 * Start a C++-style comment...
1040 */
1041
1042 cmntState = NLstyl;
1043 attr = ATTR_ITALIC | ATTR_GREEN;
1044 }
1045 else if (ch == '#' && PrettyPrint != PRETTY_CODE)
1046 {
1047 /*
1048 * Start a shell-style comment...
1049 */
1050
1051 cmntState = NLstyl;
1052 attr = ATTR_ITALIC | ATTR_GREEN;
1053 }
1054 else if (ch == '#' && column == 0 &&
1055 PrettyPrint == PRETTY_CODE)
1056 {
1057 /*
1058 * Start a preprocessor command...
1059 */
1060
1061 attr = ATTR_BOLD | ATTR_RED;
1062 }
1063 }
1064 }
1065
1066 if (column >= ColumnWidth && WrapLines)
1067 { /* Wrap text to margins */
1068 column = 0;
1069 line ++;
1070
1071 if (line >= SizeLines)
1072 {
1073 page_column ++;
1074 line = 0;
1075
1076 if (page_column >= PageColumns)
1077 {
1078 WritePage();
1079 page_column = 0;
1080 }
1081 }
1082 }
1083
1084 /*
1085 * Add text to the current column & line...
1086 */
1087
1088 if (column < ColumnWidth)
1089 {
1090 i = column + page_column * (ColumnWidth + ColumnGutter);
1091
1092 if (PrettyPrint)
1093 Page[line][i].attr = attr;
1094
1095 if (ch == ' ' && Page[line][i].ch)
1096 ch = Page[line][i].ch;
1097 else if (ch == Page[line][i].ch)
1098 Page[line][i].attr |= ATTR_BOLD;
1099 else if (Page[line][i].ch == '_')
1100 Page[line][i].attr |= ATTR_UNDERLINE;
1101 else if (ch == '_')
1102 {
1103 Page[line][i].attr |= ATTR_UNDERLINE;
1104
1105 if (Page[line][i].ch)
1106 ch = Page[line][i].ch;
1107 }
1108 else
1109 Page[line][i].attr = attr;
1110
1111 Page[line][i].ch = ch;
1112 }
1113
1114 if (PrettyPrint)
1115 {
1116 if ((ch == '{' || ch == '}') && !cmntState && !strState &&
1117 column < ColumnWidth)
1118 {
1119 /*
1120 * Highlight curley braces...
1121 */
1122
1123 Page[line][column].attr |= ATTR_BOLD;
1124 }
1125 else if ((ch == '/' || ch == '*') && lastch == '/' &&
1126 column < ColumnWidth && PrettyPrint != PRETTY_SHELL)
1127 {
1128 /*
1129 * Highlight first comment character...
1130 */
1131
1132 Page[line][column - 1].attr = attr;
1133 }
1134 else if (ch == '\"' && lastch != '\\' && !cmntState && strState == StrEnd)
1135 {
1136 /*
1137 * End a dbl-quote string constant...
1138 */
1139
1140 strState = NoStr;
1141 attr &= ~ATTR_BLUE;
1142 }
1143 else if (ch == '/' && lastch == '*' && cmntState)
1144 {
1145 /*
1146 * End a C-style comment...
1147 */
1148
1149 cmntState = NoCmnt;
1150 attr &= ~(ATTR_ITALIC | ATTR_GREEN);
1151 }
1152
1153 if (strState == StrBeg)
1154 strState = StrEnd;
1155 }
1156
1157 column ++;
1158 break;
1159 }
1160
1161 /*
1162 * Save this character for the next cycle.
1163 */
1164
1165 lastch = ch;
1166 }
1167
1168 /* Do not write anything if the input file is empty */
1169 if (empty)
1170 {
1171 fprintf(stderr, "DEBUG: Input is empty, outputting empty file.\n");
1172 if (fp != stdin)
1173 fclose(fp);
1174 return 0;
1175 }
1176
1177 /*
1178 * Write any remaining page data...
1179 */
1180
1181 if (line > 0 || page_column > 0 || column > 0)
1182 WritePage();
1183
1184 /*
1185 * Write the epilog and return...
1186 */
1187
1188 WriteEpilogue();
1189
1190 if (fp != stdin)
1191 fclose(fp);
1192
1193 if (ppd != NULL)
1194 ppdClose(ppd);
1195
1196 free(Page[0]);
1197 free(Page);
1198 return (0);
1199 }
1200
1201
1202 /*
1203 * 'compare_keywords()' - Compare two C/C++ keywords.
1204 */
1205
1206 static int /* O - Result of strcmp */
compare_keywords(const void * k1,const void * k2)1207 compare_keywords(const void *k1, /* I - First keyword */
1208 const void *k2) /* I - Second keyword */
1209 {
1210 return (strcmp(*((const char **)k1), *((const char **)k2)));
1211 }
1212
1213
1214 /*
1215 * 'getutf8()' - Get a UTF-8 encoded wide character...
1216 */
1217
1218 static int /* O - Character or -1 on error */
getutf8(FILE * fp)1219 getutf8(FILE *fp) /* I - File to read from */
1220 {
1221 int ch; /* Current character value */
1222 int next; /* Next character from file */
1223
1224
1225 /*
1226 * Read the first character and process things accordingly...
1227 *
1228 * UTF-8 maps 16-bit characters to:
1229 *
1230 * 0 to 127 = 0xxxxxxx
1231 * 128 to 2047 = 110xxxxx 10yyyyyy (xxxxxyyyyyy)
1232 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz (xxxxyyyyyyzzzzzz)
1233 *
1234 * We also accept:
1235 *
1236 * 128 to 191 = 10xxxxxx
1237 *
1238 * since this range of values is otherwise undefined unless you are
1239 * in the middle of a multi-byte character...
1240 *
1241 * This code currently does not support anything beyond 16-bit
1242 * characters, in part because PostScript doesn't support more than
1243 * 16-bit characters...
1244 */
1245
1246 if ((ch = getc(fp)) == EOF)
1247 return (EOF);
1248
1249 if (ch < 0xc0) /* One byte character? */
1250 return (ch);
1251 else if ((ch & 0xe0) == 0xc0)
1252 {
1253 /*
1254 * Two byte character...
1255 */
1256
1257 if ((next = getc(fp)) == EOF)
1258 return (EOF);
1259 else
1260 return (((ch & 0x1f) << 6) | (next & 0x3f));
1261 }
1262 else if ((ch & 0xf0) == 0xe0)
1263 {
1264 /*
1265 * Three byte character...
1266 */
1267
1268 if ((next = getc(fp)) == EOF)
1269 return (EOF);
1270
1271 ch = ((ch & 0x0f) << 6) | (next & 0x3f);
1272
1273 if ((next = getc(fp)) == EOF)
1274 return (EOF);
1275 else
1276 return ((ch << 6) | (next & 0x3f));
1277 }
1278 else
1279 {
1280 /*
1281 * More than three bytes... We don't support that...
1282 */
1283
1284 return (EOF);
1285 }
1286 }
1287
1288