1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
10 New API code Copyright (c) 2016 University of Cambridge
11
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40
41
42 /* This module contains a PCRE private debugging function for printing out the
43 internal form of a compiled regular expression, along with some supporting
44 local functions. This source file is #included in pcre2test.c at each supported
45 code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
46 that comprise the library. It can also optionally be included in
47 pcre2_compile.c for detailed debugging in error situations. */
48
49
50 /* Tables of operator names. The same 8-bit table is used for all code unit
51 widths, so it must be defined only once. The list itself is defined in
52 pcre2_internal.h, which is #included by pcre2test before this file. */
53
54 #ifndef OP_LISTS_DEFINED
55 static const char *OP_names[] = { OP_NAME_LIST };
56 #define OP_LISTS_DEFINED
57 #endif
58
59 /* The functions and tables herein must all have mode-dependent names. */
60
61 #define OP_lengths PCRE2_SUFFIX(OP_lengths_)
62 #define get_ucpname PCRE2_SUFFIX(get_ucpname_)
63 #define pcre2_printint PCRE2_SUFFIX(pcre2_printint_)
64 #define print_char PCRE2_SUFFIX(print_char_)
65 #define print_custring PCRE2_SUFFIX(print_custring_)
66 #define print_custring_bylen PCRE2_SUFFIX(print_custring_bylen_)
67 #define print_prop PCRE2_SUFFIX(print_prop_)
68
69 /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
70 the definition is next to the definition of the opcodes in pcre2_internal.h.
71 The contents of the table are, however, mode-dependent. */
72
73 static const uint8_t OP_lengths[] = { OP_LENGTHS };
74
75
76
77 /*************************************************
78 * Print one character from a string *
79 *************************************************/
80
81 /* In UTF mode the character may occupy more than one code unit.
82
83 Arguments:
84 f file to write to
85 ptr pointer to first code unit of the character
86 utf TRUE if string is UTF (will be FALSE if UTF is not supported)
87
88 Returns: number of additional code units used
89 */
90
91 static unsigned int
print_char(FILE * f,PCRE2_SPTR ptr,BOOL utf)92 print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
93 {
94 uint32_t c = *ptr;
95 BOOL one_code_unit = !utf;
96
97 /* If UTF is supported and requested, check for a valid single code unit. */
98
99 #ifdef SUPPORT_UNICODE
100 if (utf)
101 {
102 #if PCRE2_CODE_UNIT_WIDTH == 8
103 one_code_unit = c < 0x80;
104 #elif PCRE2_CODE_UNIT_WIDTH == 16
105 one_code_unit = (c & 0xfc00) != 0xd800;
106 #else
107 one_code_unit = (c & 0xfffff800u) != 0xd800u;
108 #endif /* CODE_UNIT_WIDTH */
109 }
110 #endif /* SUPPORT_UNICODE */
111
112 /* Handle a valid one-code-unit character at any width. */
113
114 if (one_code_unit)
115 {
116 if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
117 else if (c < 0x80) fprintf(f, "\\x%02x", c);
118 else fprintf(f, "\\x{%02x}", c);
119 return 0;
120 }
121
122 /* Code for invalid UTF code units and multi-unit UTF characters is different
123 for each width. If UTF is not supported, control should never get here, but we
124 need a return statement to keep the compiler happy. */
125
126 #ifndef SUPPORT_UNICODE
127 return 0;
128 #else
129
130 /* Malformed UTF-8 should occur only if the sanity check has been turned off.
131 Rather than swallow random bytes, just stop if we hit a bad one. Print it with
132 \X instead of \x as an indication. */
133
134 #if PCRE2_CODE_UNIT_WIDTH == 8
135 if ((c & 0xc0) != 0xc0)
136 {
137 fprintf(f, "\\X{%x}", c); /* Invalid starting byte */
138 return 0;
139 }
140 else
141 {
142 int i;
143 int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
144 int s = 6*a;
145 c = (c & PRIV(utf8_table3)[a]) << s;
146 for (i = 1; i <= a; i++)
147 {
148 if ((ptr[i] & 0xc0) != 0x80)
149 {
150 fprintf(f, "\\X{%x}", c); /* Invalid secondary byte */
151 return i - 1;
152 }
153 s -= 6;
154 c |= (ptr[i] & 0x3f) << s;
155 }
156 fprintf(f, "\\x{%x}", c);
157 return a;
158 }
159 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
160
161 /* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
162 Print it with \X instead of \x as an indication. */
163
164 #if PCRE2_CODE_UNIT_WIDTH == 16
165 if ((ptr[1] & 0xfc00) != 0xdc00)
166 {
167 fprintf(f, "\\X{%x}", c);
168 return 0;
169 }
170 c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
171 fprintf(f, "\\x{%x}", c);
172 return 1;
173 #endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
174
175 /* For UTF-32 we get here only for a malformed code unit, which should only
176 occur if the sanity check has been turned off. Print it with \X instead of \x
177 as an indication. */
178
179 #if PCRE2_CODE_UNIT_WIDTH == 32
180 fprintf(f, "\\X{%x}", c);
181 return 0;
182 #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
183 #endif /* SUPPORT_UNICODE */
184 }
185
186
187
188 /*************************************************
189 * Print string as a list of code units *
190 *************************************************/
191
192 /* These take no account of UTF as they always print each individual code unit.
193 The string is zero-terminated for print_custring(); the length is given for
194 print_custring_bylen().
195
196 Arguments:
197 f file to write to
198 ptr point to the string
199 len length for print_custring_bylen()
200
201 Returns: nothing
202 */
203
204 static void
print_custring(FILE * f,PCRE2_SPTR ptr)205 print_custring(FILE *f, PCRE2_SPTR ptr)
206 {
207 while (*ptr != '\0')
208 {
209 register uint32_t c = *ptr++;
210 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
211 }
212 }
213
214 static void
print_custring_bylen(FILE * f,PCRE2_SPTR ptr,PCRE2_UCHAR len)215 print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len)
216 {
217 for (; len > 0; len--)
218 {
219 register uint32_t c = *ptr++;
220 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
221 }
222 }
223
224
225
226 /*************************************************
227 * Find Unicode property name *
228 *************************************************/
229
230 /* When there is no UTF/UCP support, the table of names does not exist. This
231 function should not be called in such configurations, because a pattern that
232 tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
233 into the main code, however, we just put one into this function. */
234
235 static const char *
get_ucpname(unsigned int ptype,unsigned int pvalue)236 get_ucpname(unsigned int ptype, unsigned int pvalue)
237 {
238 #ifdef SUPPORT_UNICODE
239 int i;
240 for (i = PRIV(utt_size) - 1; i >= 0; i--)
241 {
242 if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break;
243 }
244 return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??";
245 #else /* No UTF support */
246 (void)ptype;
247 (void)pvalue;
248 return "??";
249 #endif /* SUPPORT_UNICODE */
250 }
251
252
253
254 /*************************************************
255 * Print Unicode property value *
256 *************************************************/
257
258 /* "Normal" properties can be printed from tables. The PT_CLIST property is a
259 pseudo-property that contains a pointer to a list of case-equivalent
260 characters.
261
262 Arguments:
263 f file to write to
264 code pointer in the compiled code
265 before text to print before
266 after text to print after
267
268 Returns: nothing
269 */
270
271 static void
print_prop(FILE * f,PCRE2_SPTR code,const char * before,const char * after)272 print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after)
273 {
274 if (code[1] != PT_CLIST)
275 {
276 fprintf(f, "%s%s %s%s", before, OP_names[*code], get_ucpname(code[1],
277 code[2]), after);
278 }
279 else
280 {
281 const char *not = (*code == OP_PROP)? "" : "not ";
282 const uint32_t *p = PRIV(ucd_caseless_sets) + code[2];
283 fprintf (f, "%s%sclist", before, not);
284 while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
285 fprintf(f, "%s", after);
286 }
287 }
288
289
290
291 /*************************************************
292 * Print compiled pattern *
293 *************************************************/
294
295 /* The print_lengths flag controls whether offsets and lengths of items are
296 printed. Lenths can be turned off from pcre2test so that automatic tests on
297 bytecode can be written that do not depend on the value of LINK_SIZE.
298
299 Arguments:
300 re a compiled pattern
301 f the file to write to
302 print_lengths show various lengths
303
304 Returns: nothing
305 */
306
307 static void
pcre2_printint(pcre2_code * re,FILE * f,BOOL print_lengths)308 pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths)
309 {
310 PCRE2_SPTR codestart, nametable, code;
311 uint32_t nesize = re->name_entry_size;
312 BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
313
314 nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
315 code = codestart = nametable + re->name_count * re->name_entry_size;
316
317 for(;;)
318 {
319 PCRE2_SPTR ccode;
320 uint32_t c;
321 int i;
322 const char *flag = " ";
323 unsigned int extra = 0;
324
325 if (print_lengths)
326 fprintf(f, "%3d ", (int)(code - codestart));
327 else
328 fprintf(f, " ");
329
330 switch(*code)
331 {
332 /* ========================================================================== */
333 /* These cases are never obeyed. This is a fudge that causes a compile-
334 time error if the vectors OP_names or OP_lengths, which are indexed
335 by opcode, are not the correct length. It seems to be the only way to do
336 such a check at compile time, as the sizeof() operator does not work in
337 the C preprocessor. */
338
339 case OP_TABLE_LENGTH:
340 case OP_TABLE_LENGTH +
341 ((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
342 (sizeof(OP_lengths) == OP_TABLE_LENGTH)):
343 break;
344 /* ========================================================================== */
345
346 case OP_END:
347 fprintf(f, " %s\n", OP_names[*code]);
348 fprintf(f, "------------------------------------------------------------------\n");
349 return;
350
351 case OP_CHAR:
352 fprintf(f, " ");
353 do
354 {
355 code++;
356 code += 1 + print_char(f, code, utf);
357 }
358 while (*code == OP_CHAR);
359 fprintf(f, "\n");
360 continue;
361
362 case OP_CHARI:
363 fprintf(f, " /i ");
364 do
365 {
366 code++;
367 code += 1 + print_char(f, code, utf);
368 }
369 while (*code == OP_CHARI);
370 fprintf(f, "\n");
371 continue;
372
373 case OP_CBRA:
374 case OP_CBRAPOS:
375 case OP_SCBRA:
376 case OP_SCBRAPOS:
377 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
378 else fprintf(f, " ");
379 fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
380 break;
381
382 case OP_BRA:
383 case OP_BRAPOS:
384 case OP_SBRA:
385 case OP_SBRAPOS:
386 case OP_KETRMAX:
387 case OP_KETRMIN:
388 case OP_KETRPOS:
389 case OP_ALT:
390 case OP_KET:
391 case OP_ASSERT:
392 case OP_ASSERT_NOT:
393 case OP_ASSERTBACK:
394 case OP_ASSERTBACK_NOT:
395 case OP_ONCE:
396 case OP_ONCE_NC:
397 case OP_COND:
398 case OP_SCOND:
399 case OP_REVERSE:
400 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
401 else fprintf(f, " ");
402 fprintf(f, "%s", OP_names[*code]);
403 break;
404
405 case OP_CLOSE:
406 fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
407 break;
408
409 case OP_CREF:
410 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
411 break;
412
413 case OP_DNCREF:
414 {
415 PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
416 fprintf(f, " %s Cond ref <", flag);
417 print_custring(f, entry);
418 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
419 }
420 break;
421
422 case OP_RREF:
423 c = GET2(code, 1);
424 if (c == RREF_ANY)
425 fprintf(f, " Cond recurse any");
426 else
427 fprintf(f, " Cond recurse %d", c);
428 break;
429
430 case OP_DNRREF:
431 {
432 PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
433 fprintf(f, " %s Cond recurse <", flag);
434 print_custring(f, entry);
435 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
436 }
437 break;
438
439 case OP_FALSE:
440 fprintf(f, " Cond false");
441 break;
442
443 case OP_TRUE:
444 fprintf(f, " Cond true");
445 break;
446
447 case OP_STARI:
448 case OP_MINSTARI:
449 case OP_POSSTARI:
450 case OP_PLUSI:
451 case OP_MINPLUSI:
452 case OP_POSPLUSI:
453 case OP_QUERYI:
454 case OP_MINQUERYI:
455 case OP_POSQUERYI:
456 flag = "/i";
457 /* Fall through */
458 case OP_STAR:
459 case OP_MINSTAR:
460 case OP_POSSTAR:
461 case OP_PLUS:
462 case OP_MINPLUS:
463 case OP_POSPLUS:
464 case OP_QUERY:
465 case OP_MINQUERY:
466 case OP_POSQUERY:
467 case OP_TYPESTAR:
468 case OP_TYPEMINSTAR:
469 case OP_TYPEPOSSTAR:
470 case OP_TYPEPLUS:
471 case OP_TYPEMINPLUS:
472 case OP_TYPEPOSPLUS:
473 case OP_TYPEQUERY:
474 case OP_TYPEMINQUERY:
475 case OP_TYPEPOSQUERY:
476 fprintf(f, " %s ", flag);
477
478 if (*code >= OP_TYPESTAR)
479 {
480 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
481 {
482 print_prop(f, code + 1, "", " ");
483 extra = 2;
484 }
485 else fprintf(f, "%s", OP_names[code[1]]);
486 }
487 else extra = print_char(f, code+1, utf);
488 fprintf(f, "%s", OP_names[*code]);
489 break;
490
491 case OP_EXACTI:
492 case OP_UPTOI:
493 case OP_MINUPTOI:
494 case OP_POSUPTOI:
495 flag = "/i";
496 /* Fall through */
497 case OP_EXACT:
498 case OP_UPTO:
499 case OP_MINUPTO:
500 case OP_POSUPTO:
501 fprintf(f, " %s ", flag);
502 extra = print_char(f, code + 1 + IMM2_SIZE, utf);
503 fprintf(f, "{");
504 if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,");
505 fprintf(f, "%d}", GET2(code,1));
506 if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?");
507 else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+");
508 break;
509
510 case OP_TYPEEXACT:
511 case OP_TYPEUPTO:
512 case OP_TYPEMINUPTO:
513 case OP_TYPEPOSUPTO:
514 if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
515 {
516 print_prop(f, code + IMM2_SIZE + 1, " ", " ");
517 extra = 2;
518 }
519 else fprintf(f, " %s", OP_names[code[1 + IMM2_SIZE]]);
520 fprintf(f, "{");
521 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
522 fprintf(f, "%d}", GET2(code,1));
523 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
524 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
525 break;
526
527 case OP_NOTI:
528 flag = "/i";
529 /* Fall through */
530 case OP_NOT:
531 fprintf(f, " %s [^", flag);
532 extra = print_char(f, code + 1, utf);
533 fprintf(f, "]");
534 break;
535
536 case OP_NOTSTARI:
537 case OP_NOTMINSTARI:
538 case OP_NOTPOSSTARI:
539 case OP_NOTPLUSI:
540 case OP_NOTMINPLUSI:
541 case OP_NOTPOSPLUSI:
542 case OP_NOTQUERYI:
543 case OP_NOTMINQUERYI:
544 case OP_NOTPOSQUERYI:
545 flag = "/i";
546 /* Fall through */
547
548 case OP_NOTSTAR:
549 case OP_NOTMINSTAR:
550 case OP_NOTPOSSTAR:
551 case OP_NOTPLUS:
552 case OP_NOTMINPLUS:
553 case OP_NOTPOSPLUS:
554 case OP_NOTQUERY:
555 case OP_NOTMINQUERY:
556 case OP_NOTPOSQUERY:
557 fprintf(f, " %s [^", flag);
558 extra = print_char(f, code + 1, utf);
559 fprintf(f, "]%s", OP_names[*code]);
560 break;
561
562 case OP_NOTEXACTI:
563 case OP_NOTUPTOI:
564 case OP_NOTMINUPTOI:
565 case OP_NOTPOSUPTOI:
566 flag = "/i";
567 /* Fall through */
568
569 case OP_NOTEXACT:
570 case OP_NOTUPTO:
571 case OP_NOTMINUPTO:
572 case OP_NOTPOSUPTO:
573 fprintf(f, " %s [^", flag);
574 extra = print_char(f, code + 1 + IMM2_SIZE, utf);
575 fprintf(f, "]{");
576 if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
577 fprintf(f, "%d}", GET2(code,1));
578 if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
579 else
580 if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
581 break;
582
583 case OP_RECURSE:
584 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
585 else fprintf(f, " ");
586 fprintf(f, "%s", OP_names[*code]);
587 break;
588
589 case OP_REFI:
590 flag = "/i";
591 /* Fall through */
592 case OP_REF:
593 fprintf(f, " %s \\%d", flag, GET2(code,1));
594 ccode = code + OP_lengths[*code];
595 goto CLASS_REF_REPEAT;
596
597 case OP_DNREFI:
598 flag = "/i";
599 /* Fall through */
600 case OP_DNREF:
601 {
602 PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
603 fprintf(f, " %s \\k<", flag);
604 print_custring(f, entry);
605 fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
606 }
607 ccode = code + OP_lengths[*code];
608 goto CLASS_REF_REPEAT;
609
610 case OP_CALLOUT:
611 fprintf(f, " %s %d %d %d", OP_names[*code], code[1 + 2*LINK_SIZE],
612 GET(code, 1), GET(code, 1 + LINK_SIZE));
613 break;
614
615 case OP_CALLOUT_STR:
616 c = code[1 + 4*LINK_SIZE];
617 fprintf(f, " %s %c", OP_names[*code], c);
618 extra = GET(code, 1 + 2*LINK_SIZE);
619 print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE);
620 for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
621 if (c == PRIV(callout_start_delims)[i])
622 {
623 c = PRIV(callout_end_delims)[i];
624 break;
625 }
626 fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1),
627 GET(code, 1 + LINK_SIZE));
628 break;
629
630 case OP_PROP:
631 case OP_NOTPROP:
632 print_prop(f, code, " ", "");
633 break;
634
635 /* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm
636 in having this code always here, and it makes it less messy without all
637 those #ifdefs. */
638
639 case OP_CLASS:
640 case OP_NCLASS:
641 case OP_XCLASS:
642 {
643 unsigned int min, max;
644 BOOL printmap;
645 BOOL invertmap = FALSE;
646 uint8_t *map;
647 uint8_t inverted_map[32];
648
649 fprintf(f, " [");
650
651 if (*code == OP_XCLASS)
652 {
653 extra = GET(code, 1);
654 ccode = code + LINK_SIZE + 1;
655 printmap = (*ccode & XCL_MAP) != 0;
656 if ((*ccode & XCL_NOT) != 0)
657 {
658 invertmap = (*ccode & XCL_HASPROP) == 0;
659 fprintf(f, "^");
660 }
661 ccode++;
662 }
663 else
664 {
665 printmap = TRUE;
666 ccode = code + 1;
667 }
668
669 /* Print a bit map */
670
671 if (printmap)
672 {
673 map = (uint8_t *)ccode;
674 if (invertmap)
675 {
676 for (i = 0; i < 32; i++) inverted_map[i] = ~map[i];
677 map = inverted_map;
678 }
679
680 for (i = 0; i < 256; i++)
681 {
682 if ((map[i/8] & (1 << (i&7))) != 0)
683 {
684 int j;
685 for (j = i+1; j < 256; j++)
686 if ((map[j/8] & (1 << (j&7))) == 0) break;
687 if (i == '-' || i == ']') fprintf(f, "\\");
688 if (PRINTABLE(i)) fprintf(f, "%c", i);
689 else fprintf(f, "\\x%02x", i);
690 if (--j > i)
691 {
692 if (j != i + 1) fprintf(f, "-");
693 if (j == '-' || j == ']') fprintf(f, "\\");
694 if (PRINTABLE(j)) fprintf(f, "%c", j);
695 else fprintf(f, "\\x%02x", j);
696 }
697 i = j;
698 }
699 }
700 ccode += 32 / sizeof(PCRE2_UCHAR);
701 }
702
703 /* For an XCLASS there is always some additional data */
704
705 if (*code == OP_XCLASS)
706 {
707 PCRE2_UCHAR ch;
708 while ((ch = *ccode++) != XCL_END)
709 {
710 BOOL not = FALSE;
711 const char *notch = "";
712
713 switch(ch)
714 {
715 case XCL_NOTPROP:
716 not = TRUE;
717 notch = "^";
718 /* Fall through */
719
720 case XCL_PROP:
721 {
722 unsigned int ptype = *ccode++;
723 unsigned int pvalue = *ccode++;
724
725 switch(ptype)
726 {
727 case PT_PXGRAPH:
728 fprintf(f, "[:%sgraph:]", notch);
729 break;
730
731 case PT_PXPRINT:
732 fprintf(f, "[:%sprint:]", notch);
733 break;
734
735 case PT_PXPUNCT:
736 fprintf(f, "[:%spunct:]", notch);
737 break;
738
739 default:
740 fprintf(f, "\\%c{%s}", (not? 'P':'p'),
741 get_ucpname(ptype, pvalue));
742 break;
743 }
744 }
745 break;
746
747 default:
748 ccode += 1 + print_char(f, ccode, utf);
749 if (ch == XCL_RANGE)
750 {
751 fprintf(f, "-");
752 ccode += 1 + print_char(f, ccode, utf);
753 }
754 break;
755 }
756 }
757 }
758
759 /* Indicate a non-UTF class which was created by negation */
760
761 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
762
763 /* Handle repeats after a class or a back reference */
764
765 CLASS_REF_REPEAT:
766 switch(*ccode)
767 {
768 case OP_CRSTAR:
769 case OP_CRMINSTAR:
770 case OP_CRPLUS:
771 case OP_CRMINPLUS:
772 case OP_CRQUERY:
773 case OP_CRMINQUERY:
774 case OP_CRPOSSTAR:
775 case OP_CRPOSPLUS:
776 case OP_CRPOSQUERY:
777 fprintf(f, "%s", OP_names[*ccode]);
778 extra += OP_lengths[*ccode];
779 break;
780
781 case OP_CRRANGE:
782 case OP_CRMINRANGE:
783 case OP_CRPOSRANGE:
784 min = GET2(ccode,1);
785 max = GET2(ccode,1 + IMM2_SIZE);
786 if (max == 0) fprintf(f, "{%u,}", min);
787 else fprintf(f, "{%u,%u}", min, max);
788 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
789 else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
790 extra += OP_lengths[*ccode];
791 break;
792
793 /* Do nothing if it's not a repeat; this code stops picky compilers
794 warning about the lack of a default code path. */
795
796 default:
797 break;
798 }
799 }
800 break;
801
802 case OP_MARK:
803 case OP_PRUNE_ARG:
804 case OP_SKIP_ARG:
805 case OP_THEN_ARG:
806 fprintf(f, " %s ", OP_names[*code]);
807 print_custring_bylen(f, code + 2, code[1]);
808 extra += code[1];
809 break;
810
811 case OP_THEN:
812 fprintf(f, " %s", OP_names[*code]);
813 break;
814
815 case OP_CIRCM:
816 case OP_DOLLM:
817 flag = "/m";
818 /* Fall through */
819
820 /* Anything else is just an item with no data, but possibly a flag. */
821
822 default:
823 fprintf(f, " %s %s", flag, OP_names[*code]);
824 break;
825 }
826
827 code += OP_lengths[*code] + extra;
828 fprintf(f, "\n");
829 }
830 }
831
832 /* End of pcre2_printint.c */
833