• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /***************************************************
2 * A program for testing the Unicode property table *
3 ***************************************************/
4 
5 /* Copyright (c) University of Cambridge 2008-2020 */
6 
7 /* Compile thus:
8 
9    gcc -DHAVE_CONFIG_H -DPCRE2_CODE_UNIT_WIDTH=8 -o ucptest \
10      ucptest.c ../src/pcre2_ucd.c ../src/pcre2_tables.c
11 
12    Add -lreadline or -ledit if PCRE2 was configured with readline or libedit
13    support in pcre2test.
14 */
15 
16 /* This is a hacked-up program for testing the Unicode properties tables of
17 PCRE2. It can also be used for finding characters with certain properties.
18 I wrote it to help with debugging PCRE, and have added things that I found
19 useful, in a rather haphazard way. The code has never been seriously tidied or
20 checked for robustness, but it shouldn't now give compiler warnings.
21 
22 There is only one option: "-s". If given, it applies only to the "findprop"
23 command. It causes the UTF-8 sequence of bytes that encode the character to be
24 output between angle brackets at the end of the line. On a UTF-8 terminal, this
25 will show the appropriate graphic for the code point.
26 
27 If the command has arguments, they are concatenated into a buffer, separated by
28 spaces. If the first argument starts "U+" or consists entirely of hexadecimal
29 digits, "findprop" is inserted at the start. The buffer is then processed as a
30 single line file, after which the program exits. If there are no arguments, the
31 program reads commands line by line on stdin and writes output to stdout. The
32 return code is always zero.
33 
34 There are three commands:
35 
36 "findprop" must be followed by a space-separated list of Unicode code points as
37 hex numbers, either without any prefix or starting with "U+". The output is one
38 line per character, giving its Unicode properties followed by its other case or
39 cases if one or more exist, followed by its Script Extension list if it is not
40 just the same as the base script. This list is in square brackets. The
41 properties are:
42 
43 General type        e.g. Letter
44 Specific type       e.g. Upper case letter
45 Script              e.g. Medefaidrin
46 Grapheme break type e.g. Extend (most common is Other)
47 
48 "find" must be followed by a list of property names and their values. The
49 values are case-sensitive. This finds characters that have those properties. If
50 multiple properties are listed, they must all be matched. Currently supported:
51 
52   script <name>    The character must have this script property. Only one
53                      such script may be given.
54   scriptx <name>   This script must be in the character's Script Extension
55                      property list. If this is used many times, all the given
56                      scripts must be present.
57   type <abbrev>    The character's specific type (e.g. Lu or Nd) must match.
58   gbreak <name>    The grapheme break property must match.
59 
60 If a <name> or <abbrev> is preceded by !, the value must NOT be present. For
61 Script Extensions, there may be a mixture of positive and negative
62 requirements. All must be satisfied.
63 
64 Sequences of two or more characters are shown as ranges, for example
65 U+0041..U+004A. No more than 100 lines are are output. If there are more
66 characters, the list ends with ...
67 
68 "list" must be followed by a property name (script, type, or gbreak). The
69 defined values for that property are listed. */
70 
71 
72 #ifdef HAVE_CONFIG_H
73 #include "../src/config.h"
74 #endif
75 
76 #ifndef SUPPORT_UNICODE
77 #define SUPPORT_UNICODE
78 #endif
79 
80 #include <ctype.h>
81 #include <stdio.h>
82 #include <stdlib.h>
83 #include <string.h>
84 #include "../src/pcre2_internal.h"
85 #include "../src/pcre2_ucp.h"
86 
87 #ifdef HAVE_UNISTD_H
88 #include <unistd.h>
89 #endif
90 
91 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
92 #if defined(SUPPORT_LIBREADLINE)
93 #include <readline/readline.h>
94 #include <readline/history.h>
95 #else
96 #if defined(HAVE_EDITLINE_READLINE_H)
97 #include <editline/readline.h>
98 #else
99 #include <readline/readline.h>
100 #endif
101 #endif
102 #endif
103 
104 
105 /* -------------------------------------------------------------------*/
106 
107 #define CS   (char *)
108 #define CCS  (const char *)
109 #define CSS  (char **)
110 #define US   (unsigned char *)
111 #define CUS  (const unsigned char *)
112 #define USS  (unsigned char **)
113 
114 /* -------------------------------------------------------------------*/
115 
116 static BOOL show_character = FALSE;
117 
118 static const unsigned char *type_names[] = {
119   US"Cc", US"Control",
120   US"Cf", US"Format",
121   US"Cn", US"Unassigned",
122   US"Co", US"Private use",
123   US"Cs", US"Surrogate",
124   US"Ll", US"Lower case letter",
125   US"Lm", US"Modifier letter",
126   US"Lo", US"Other letter",
127   US"Lt", US"Title case letter",
128   US"Lu", US"Upper case letter",
129   US"Mc", US"Spacing mark",
130   US"Me", US"Enclosing mark",
131   US"Mn", US"Non-spacing mark",
132   US"Nd", US"Decimal number",
133   US"Nl", US"Letter number",
134   US"No", US"Other number",
135   US"Pc", US"Connector punctuation",
136   US"Pd", US"Dash punctuation",
137   US"Pe", US"Close punctuation",
138   US"Pf", US"Final punctuation",
139   US"Pi", US"Initial punctuation",
140   US"Po", US"Other punctuation",
141   US"Ps", US"Open punctuation",
142   US"Sc", US"Currency symbol",
143   US"Sk", US"Modifier symbol",
144   US"Sm", US"Mathematical symbol",
145   US"So", US"Other symbol",
146   US"Zl", US"Line separator",
147   US"Zp", US"Paragraph separator",
148   US"Zs", US"Space separator"
149 };
150 
151 static const unsigned char *gb_names[] = {
152   US"CR",                    US"carriage return",
153   US"LF",                    US"linefeed",
154   US"Control",               US"",
155   US"Extend",                US"",
156   US"Prepend",               US"",
157   US"SpacingMark",           US"",
158   US"L",                     US"Hangul syllable type L",
159   US"V",                     US"Hangul syllable type V",
160   US"T",                     US"Hangul syllable type T",
161   US"LV",                    US"Hangul syllable type LV",
162   US"LVT",                   US"Hangul syllable type LVT",
163   US"RegionalIndicator",     US"",
164   US"Other",                 US"",
165   US"ZWJ",                   US"zero width joiner",
166   US"Extended_Pictographic", US""
167 };
168 
169 
170 static const unsigned int utf8_table1[] = {
171   0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
172 
173 static const int utf8_table2[] = {
174   0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
175 
176 
177 /*************************************************
178 *       Convert character value to UTF-8         *
179 *************************************************/
180 
181 /* This function takes an unsigned long integer value in the range 0 -
182 0x7fffffff and encodes it as a UTF-8 character in 1 to 6 bytes.
183 
184 Arguments:
185   cvalue     the character value
186   buffer     pointer to buffer for result - at least 6 bytes long
187 
188 Returns:     number of bytes placed in the buffer
189              0 if input code point is too big
190 */
191 
192 static size_t
ord2utf8(unsigned int cvalue,unsigned char * buffer)193 ord2utf8(unsigned int cvalue, unsigned char *buffer)
194 {
195 size_t i, j;
196 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
197   if (cvalue <= utf8_table1[i]) break;
198 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
199 buffer += i;
200 for (j = i; j > 0; j--)
201  {
202  *buffer-- = 0x80 | (cvalue & 0x3f);
203  cvalue >>= 6;
204  }
205 *buffer = utf8_table2[i] | cvalue;
206 return i + 1;
207 }
208 
209 
210 
211 /*************************************************
212 *             Test for interaction               *
213 *************************************************/
214 
215 static BOOL
is_stdin_tty(void)216 is_stdin_tty(void)
217 {
218 #if defined WIN32
219 return _isatty(_fileno(stdin));
220 #else
221 return isatty(fileno(stdin));
222 #endif
223 }
224 
225 
226 /*************************************************
227 *      Get script name from ucp ident            *
228 *************************************************/
229 
230 static const char *
get_scriptname(int script)231 get_scriptname(int script)
232 {
233 size_t i;
234 const ucp_type_table *u;
235 
236 for (i = 0; i < PRIV(utt_size); i++)
237   {
238   u = PRIV(utt) + i;
239   if (u->type == PT_SC && u->value == script) break;
240   }
241 if (i < PRIV(utt_size))
242   return PRIV(utt_names) + u->name_offset;
243 
244 return "??";
245 }
246 
247 
248 /*************************************************
249 *      Print Unicode property info for a char    *
250 *************************************************/
251 
252 static void
print_prop(unsigned int c,BOOL is_just_one)253 print_prop(unsigned int c, BOOL is_just_one)
254 {
255 int type = UCD_CATEGORY(c);
256 int fulltype = UCD_CHARTYPE(c);
257 int script = UCD_SCRIPT(c);
258 int scriptx = UCD_SCRIPTX(c);
259 int gbprop = UCD_GRAPHBREAK(c);
260 unsigned int othercase = UCD_OTHERCASE(c);
261 int caseset = UCD_CASESET(c);
262 
263 const unsigned char *fulltypename = US"??";
264 const unsigned char *typename = US"??";
265 const unsigned char *graphbreak = US"??";
266 const unsigned char *scriptname = CUS get_scriptname(script);
267 
268 switch (type)
269   {
270   case ucp_C: typename = US"Control"; break;
271   case ucp_L: typename = US"Letter"; break;
272   case ucp_M: typename = US"Mark"; break;
273   case ucp_N: typename = US"Number"; break;
274   case ucp_P: typename = US"Punctuation"; break;
275   case ucp_S: typename = US"Symbol"; break;
276   case ucp_Z: typename = US"Separator"; break;
277   }
278 
279 switch (fulltype)
280   {
281   case ucp_Cc: fulltypename = US"Control"; break;
282   case ucp_Cf: fulltypename = US"Format"; break;
283   case ucp_Cn: fulltypename = US"Unassigned"; break;
284   case ucp_Co: fulltypename = US"Private use"; break;
285   case ucp_Cs: fulltypename = US"Surrogate"; break;
286   case ucp_Ll: fulltypename = US"Lower case letter"; break;
287   case ucp_Lm: fulltypename = US"Modifier letter"; break;
288   case ucp_Lo: fulltypename = US"Other letter"; break;
289   case ucp_Lt: fulltypename = US"Title case letter"; break;
290   case ucp_Lu: fulltypename = US"Upper case letter"; break;
291   case ucp_Mc: fulltypename = US"Spacing mark"; break;
292   case ucp_Me: fulltypename = US"Enclosing mark"; break;
293   case ucp_Mn: fulltypename = US"Non-spacing mark"; break;
294   case ucp_Nd: fulltypename = US"Decimal number"; break;
295   case ucp_Nl: fulltypename = US"Letter number"; break;
296   case ucp_No: fulltypename = US"Other number"; break;
297   case ucp_Pc: fulltypename = US"Connector punctuation"; break;
298   case ucp_Pd: fulltypename = US"Dash punctuation"; break;
299   case ucp_Pe: fulltypename = US"Close punctuation"; break;
300   case ucp_Pf: fulltypename = US"Final punctuation"; break;
301   case ucp_Pi: fulltypename = US"Initial punctuation"; break;
302   case ucp_Po: fulltypename = US"Other punctuation"; break;
303   case ucp_Ps: fulltypename = US"Open punctuation"; break;
304   case ucp_Sc: fulltypename = US"Currency symbol"; break;
305   case ucp_Sk: fulltypename = US"Modifier symbol"; break;
306   case ucp_Sm: fulltypename = US"Mathematical symbol"; break;
307   case ucp_So: fulltypename = US"Other symbol"; break;
308   case ucp_Zl: fulltypename = US"Line separator"; break;
309   case ucp_Zp: fulltypename = US"Paragraph separator"; break;
310   case ucp_Zs: fulltypename = US"Space separator"; break;
311   }
312 
313 switch(gbprop)
314   {
315   case ucp_gbCR:           graphbreak = US"CR"; break;
316   case ucp_gbLF:           graphbreak = US"LF"; break;
317   case ucp_gbControl:      graphbreak = US"Control"; break;
318   case ucp_gbExtend:       graphbreak = US"Extend"; break;
319   case ucp_gbPrepend:      graphbreak = US"Prepend"; break;
320   case ucp_gbSpacingMark:  graphbreak = US"SpacingMark"; break;
321   case ucp_gbL:            graphbreak = US"Hangul syllable type L"; break;
322   case ucp_gbV:            graphbreak = US"Hangul syllable type V"; break;
323   case ucp_gbT:            graphbreak = US"Hangul syllable type T"; break;
324   case ucp_gbLV:           graphbreak = US"Hangul syllable type LV"; break;
325   case ucp_gbLVT:          graphbreak = US"Hangul syllable type LVT"; break;
326   case ucp_gbRegionalIndicator:
327                            graphbreak = US"Regional Indicator"; break;
328   case ucp_gbOther:        graphbreak = US"Other"; break;
329   case ucp_gbZWJ:          graphbreak = US"Zero Width Joiner"; break;
330   case ucp_gbExtended_Pictographic:
331                            graphbreak = US"Extended Pictographic"; break;
332   default:                 graphbreak = US"Unknown"; break;
333   }
334 
335 printf("U+%04X %s: %s, %s, %s", c, typename, fulltypename, scriptname, graphbreak);
336 if (is_just_one && othercase != c)
337   {
338   printf(", U+%04X", othercase);
339   if (caseset != 0)
340     {
341     const uint32_t *p = PRIV(ucd_caseless_sets) + caseset - 1;
342     while (*(++p) < NOTACHAR)
343       {
344       unsigned int d = *p;
345       if (d != othercase && d != c) printf(", U+%04X", d);
346       }
347     }
348   }
349 
350 if (scriptx != script)
351   {
352   printf(", [");
353   if (scriptx >= 0)
354     printf("%s", get_scriptname(scriptx));
355   else
356     {
357     const char *sep = "";
358     const uint8_t *p = PRIV(ucd_script_sets) - scriptx;
359     while (*p != 0)
360       {
361       printf("%s%s", sep, get_scriptname(*p++));
362       sep = ", ";
363       }
364     }
365   printf("]");
366   }
367 
368 if (show_character && is_just_one)
369   {
370   unsigned char buffer[8];
371   size_t len = ord2utf8(c, buffer);
372   printf(", >%.*s<", (int)len, buffer);
373   }
374 
375 printf("\n");
376 }
377 
378 
379 
380 /*************************************************
381 *   Find character(s) with given property/ies    *
382 *************************************************/
383 
384 static void
find_chars(unsigned char * s)385 find_chars(unsigned char *s)
386 {
387 unsigned char name[24];
388 unsigned char value[24];
389 unsigned char *t;
390 unsigned int count= 0;
391 int scriptx_list[24];
392 unsigned int scriptx_count = 0;
393 uint32_t i, c;
394 int script = -1;
395 int type = -1;
396 int gbreak = -1;
397 BOOL script_not = FALSE;
398 BOOL type_not = FALSE;
399 BOOL gbreak_not = FALSE;
400 BOOL hadrange = FALSE;
401 const ucd_record *ucd, *next_ucd;
402 const char *pad = "        ";
403 
404 while (*s != 0)
405   {
406   unsigned int offset = 0;
407   BOOL scriptx_not = FALSE;
408 
409   for (t = name; *s != 0 && !isspace(*s); s++) *t++ = *s;
410   *t = 0;
411   while (isspace(*s)) s++;
412 
413   for (t = value; *s != 0 && !isspace(*s); s++) *t++ = *s;
414   *t = 0;
415   while (isspace(*s)) s++;
416 
417   if (strcmp(CS name, "script") == 0 ||
418       strcmp(CS name, "scriptx") == 0)
419     {
420     if (value[0] == '!')
421       {
422       if (name[6] == 'x') scriptx_not = TRUE;
423         else script_not = TRUE;
424       offset = 1;
425       }
426 
427     for (i = 0; i < PRIV(utt_size); i++)
428       {
429       const ucp_type_table *u = PRIV(utt) + i;
430       if (u->type == PT_SC && strcmp(CS(value + offset),
431             PRIV(utt_names) + u->name_offset) == 0)
432         {
433         c = u->value;
434         if (name[6] == 'x')
435           {
436           scriptx_list[scriptx_count++] = scriptx_not? (-c):c;
437           }
438         else
439           {
440           if (script < 0) script = c; else
441             {
442             printf("** Only 1 script value allowed\n");
443             return;
444             }
445           }
446         break;
447         }
448       }
449 
450     if (i >= PRIV(utt_size))
451       {
452       printf("** Unrecognized script name \"%s\"\n", value);
453       return;
454       }
455     }
456 
457   else if (strcmp(CS name, "type") == 0)
458     {
459     if (type >= 0)
460       {
461       printf("** Only 1 type value allowed\n");
462       return;
463       }
464     else
465       {
466       if (value[0] == '!')
467         {
468         type_not = TRUE;
469         offset = 1;
470         }
471 
472       for (i = 0; i < sizeof(type_names)/sizeof(char *); i += 2)
473         {
474         if (strcmp(CS (value + offset), CS type_names[i]) == 0)
475           {
476           type = i/2;
477           break;
478           }
479         }
480       if (i >= sizeof(type_names)/sizeof(char *))
481         {
482         printf("** Unrecognized type name \"%s\"\n", value);
483         return;
484         }
485       }
486     }
487 
488   else if (strcmp(CS name, "gbreak") == 0)
489     {
490     if (gbreak >= 0)
491       {
492       printf("** Only 1 grapheme break value allowed\n");
493       return;
494       }
495     else
496       {
497       if (value[0] == '!')
498         {
499         gbreak_not = TRUE;
500         offset = 1;
501         }
502 
503       for (i = 0; i < sizeof(gb_names)/sizeof(char *); i += 2)
504         {
505         if (strcmp(CS (value + offset), CS gb_names[i]) == 0)
506           {
507           gbreak = i/2;
508           break;
509           }
510         }
511       if (i >= sizeof(gb_names)/sizeof(char *))
512         {
513         printf("** Unrecognized gbreak name \"%s\"\n", value);
514         return;
515         }
516       }
517     }
518 
519   else
520     {
521     printf("** Unrecognized property name \"%s\"\n", name);
522     return;
523     }
524   }
525 
526 if (script < 0 && scriptx_count == 0 && type < 0 && gbreak < 0)
527   {
528   printf("** No properties specified\n");
529   return;
530   }
531 
532 for (c = 0; c <= 0x10ffff; c++)
533   {
534   if (script >= 0 && (script == UCD_SCRIPT(c)) == script_not) continue;
535 
536   if (scriptx_count > 0)
537     {
538     const uint8_t *char_scriptx = NULL;
539     unsigned int found = 0;
540     int scriptx = UCD_SCRIPTX(c);
541 
542     if (scriptx < 0) char_scriptx = PRIV(ucd_script_sets) - scriptx;
543 
544     for (i = 0; i < scriptx_count; i++)
545       {
546       /* Positive requirment */
547       if (scriptx_list[i] >= 0)
548         {
549         if (scriptx >= 0)
550           {
551           if (scriptx == scriptx_list[i]) found++;
552           }
553 
554         else
555           {
556           const uint8_t *p;
557           for (p = char_scriptx; *p != 0; p++)
558             {
559             if (scriptx_list[i] == *p)
560               {
561               found++;
562               break;
563               }
564             }
565           }
566         }
567       /* Negative requirement */
568       else
569         {
570         if (scriptx >= 0)
571           {
572           if (scriptx != -scriptx_list[i]) found++;
573           }
574         else
575           {
576           const uint8_t *p;
577           for (p = char_scriptx; *p != 0; p++)
578             if (-scriptx_list[i] == *p) break;
579           if (*p == 0) found++;
580           }
581         }
582       }
583 
584     if (found != scriptx_count) continue;
585     }
586 
587   if (type >= 0)
588     {
589     if (type_not)
590       {
591       if (type == UCD_CHARTYPE(c)) continue;
592       }
593     else
594       {
595       if (type != UCD_CHARTYPE(c)) continue;
596       }
597     }
598 
599   if (gbreak >= 0)
600     {
601     if (gbreak_not)
602       {
603       if (gbreak == UCD_GRAPHBREAK(c)) continue;
604       }
605     else
606       {
607       if (gbreak != UCD_GRAPHBREAK(c)) continue;
608       }
609     }
610 
611   /* All conditions are met. Look for runs. */
612 
613   ucd = GET_UCD(c);
614 
615   for (i = c + 1; i < 0x10ffff; i++)
616     {
617     next_ucd = GET_UCD(i);
618     if (memcmp(ucd, next_ucd, sizeof(ucd_record)) != 0) break;
619     }
620 
621   if (--i > c)
622     {
623     printf("U+%04X..", c);
624     c = i;
625     hadrange = TRUE;
626     }
627   else if (hadrange) printf("%s", pad);
628 
629   print_prop(c, FALSE);
630   if (c >= 0x100000) pad = "        ";
631     else if (c >= 0x10000) pad = "       ";
632   count++;
633   if (count >= 100)
634     {
635     printf("...\n");
636     break;
637     }
638   }
639 
640 if (count == 0) printf("No characters found\n");
641 }
642 
643 
644 /*************************************************
645 *        Process command line                    *
646 *************************************************/
647 
648 static void
process_command_line(unsigned char * buffer)649 process_command_line(unsigned char *buffer)
650 {
651 unsigned char *s, *t;
652 unsigned char name[24];
653 
654 s = buffer;
655 while (isspace(*s)) s++;
656 if (*s == 0) return;
657 
658 for (t = name; *s != 0 && !isspace(*s); s++) *t++ = *s;
659 *t = 0;
660 while (isspace(*s)) s++;
661 
662 if (strcmp(CS name, "findprop") == 0)
663   {
664   while (*s != 0)
665     {
666     unsigned int c;
667     unsigned char *endptr;
668     t = s;
669     if (strncmp(CS t, "U+", 2) == 0) t += 2;
670     c = strtoul(CS t, CSS(&endptr), 16);
671     if (*endptr != 0 && !isspace(*endptr))
672       {
673       while (*endptr != 0 && !isspace(*endptr)) endptr++;
674       printf("** Invalid hex number: ignored \"%.*s\"\n", (int)(endptr-s), s);
675       }
676     else
677       {
678       if (c > 0x10ffff)
679         printf("** U+%x is too big for a Unicode code point\n", c);
680       else
681         print_prop(c, TRUE);
682       }
683     s = endptr;
684     while (isspace(*s)) s++;
685     }
686   }
687 
688 else if (strcmp(CS name, "find") == 0)
689   {
690   find_chars(s);
691   }
692 
693 else if (strcmp(CS name, "list") == 0)
694   {
695   while (*s != 0)
696     {
697     size_t i;
698     for (t = name; *s != 0 && !isspace(*s); s++) *t++ = *s;
699     *t = 0;
700     while (isspace(*s)) s++;
701 
702     if (strcmp(CS name, "script") == 0 || strcmp(CS name, "scripts") == 0)
703       {
704       for (i = 0; i < PRIV(utt_size); i++)
705         if (PRIV(utt)[i].type == PT_SC)
706           printf("%s\n", PRIV(utt_names) + PRIV(utt)[i].name_offset);
707       }
708 
709     else if (strcmp(CS name, "type") == 0 || strcmp(CS name, "types") == 0)
710       {
711       for (i = 0; i < sizeof(type_names)/sizeof(char *); i += 2)
712         printf("%s %s\n", type_names[i], type_names[i+1]);
713       }
714 
715     else if (strcmp(CS name, "gbreak") == 0 || strcmp(CS name, "gbreaks") == 0)
716       {
717       for (i = 0; i < sizeof(gb_names)/sizeof(char *); i += 2)
718         {
719         if (gb_names[i+1][0] != 0)
720           printf("%-3s (%s)\n", gb_names[i], gb_names[i+1]);
721         else
722           printf("%s\n", gb_names[i]);
723         }
724       }
725 
726     else
727       {
728       printf("** Unknown property \"%s\"\n", name);
729       break;
730       }
731     }
732   }
733 
734 else printf("** Unknown test command \"%s\"\n", name);
735 }
736 
737 
738 
739 /*************************************************
740 *               Main program                     *
741 *************************************************/
742 
743 int
main(int argc,char ** argv)744 main(int argc, char **argv)
745 {
746 BOOL interactive;
747 int first_arg = 1;
748 unsigned char buffer[1024];
749 
750 if (argc > 1 && strcmp(argv[1], "-s") == 0)
751   {
752   show_character = TRUE;
753   first_arg++;
754   }
755 
756 if (argc > first_arg)
757   {
758   int i;
759   BOOL hexfirst = TRUE;
760   char *arg = argv[first_arg];
761   unsigned char *s = buffer;
762 
763   if (strncmp(arg, "U+", 2) != 0 && !isdigit(*arg))
764     {
765     while (*arg != 0)
766       {
767       if (!isxdigit(*arg++)) { hexfirst = FALSE; break; }
768       }
769     }
770 
771   if (hexfirst)
772     {
773     strcpy(CS s, "findprop ");
774     s += 9;
775     }
776 
777   for (i = first_arg; i < argc; i++)
778     {
779     s += sprintf(CS s, "%s ", argv[i]);
780     }
781 
782   process_command_line(buffer);
783   return 0;
784   }
785 
786 interactive = is_stdin_tty();
787 
788 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
789 if (interactive) using_history();
790 #endif
791 
792 for(;;)
793   {
794 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
795   if (interactive)
796     {
797     size_t len;
798     unsigned char *s = US readline("> ");
799     if (s == NULL) break;
800     len = strlen(CS s);
801     if (len > 0) add_history(CS s);
802     memcpy(buffer, s, len);
803     buffer[len] = '\n';
804     buffer[len+1] = 0;
805     free(s);
806     }
807   else
808 #endif
809 
810     {
811     if (interactive) printf("> ");
812     if (fgets(CS buffer, sizeof(buffer), stdin) == NULL) break;
813     if (!interactive) printf("%s", buffer);
814     }
815 
816   process_command_line(buffer);
817   }
818 
819 if (interactive) printf("\n");
820 
821 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
822 if (interactive) clear_history();
823 #endif
824 
825 return 0;
826 }
827 
828 /* End */
829