• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# This set of tests is for UTF-16 and UTF-32 support, including Unicode
2# properties. It is relevant only to the 16-bit and 32-bit libraries. The
3# output is different for each library, so there are separate output files.
4
5/���xxx/IB,utf,no_utf_check
6** Failed: invalid UTF-8 string cannot be converted to 32-bit string
7
8/abc/utf
9    �]
10** Failed: invalid UTF-8 string cannot be used as input in UTF mode
11
12# Check maximum character size
13
14/\x{ffff}/IB,utf
15------------------------------------------------------------------
16        Bra
17        \x{ffff}
18        Ket
19        End
20------------------------------------------------------------------
21Capturing subpattern count = 0
22Options: utf
23First code unit = \x{ffff}
24Subject length lower bound = 1
25
26/\x{10000}/IB,utf
27------------------------------------------------------------------
28        Bra
29        \x{10000}
30        Ket
31        End
32------------------------------------------------------------------
33Capturing subpattern count = 0
34Options: utf
35First code unit = \x{10000}
36Subject length lower bound = 1
37
38/\x{100}/IB,utf
39------------------------------------------------------------------
40        Bra
41        \x{100}
42        Ket
43        End
44------------------------------------------------------------------
45Capturing subpattern count = 0
46Options: utf
47First code unit = \x{100}
48Subject length lower bound = 1
49
50/\x{1000}/IB,utf
51------------------------------------------------------------------
52        Bra
53        \x{1000}
54        Ket
55        End
56------------------------------------------------------------------
57Capturing subpattern count = 0
58Options: utf
59First code unit = \x{1000}
60Subject length lower bound = 1
61
62/\x{10000}/IB,utf
63------------------------------------------------------------------
64        Bra
65        \x{10000}
66        Ket
67        End
68------------------------------------------------------------------
69Capturing subpattern count = 0
70Options: utf
71First code unit = \x{10000}
72Subject length lower bound = 1
73
74/\x{100000}/IB,utf
75------------------------------------------------------------------
76        Bra
77        \x{100000}
78        Ket
79        End
80------------------------------------------------------------------
81Capturing subpattern count = 0
82Options: utf
83First code unit = \x{100000}
84Subject length lower bound = 1
85
86/\x{10ffff}/IB,utf
87------------------------------------------------------------------
88        Bra
89        \x{10ffff}
90        Ket
91        End
92------------------------------------------------------------------
93Capturing subpattern count = 0
94Options: utf
95First code unit = \x{10ffff}
96Subject length lower bound = 1
97
98/[\x{ff}]/IB,utf
99------------------------------------------------------------------
100        Bra
101        \x{ff}
102        Ket
103        End
104------------------------------------------------------------------
105Capturing subpattern count = 0
106Options: utf
107First code unit = \xff
108Subject length lower bound = 1
109
110/[\x{100}]/IB,utf
111------------------------------------------------------------------
112        Bra
113        \x{100}
114        Ket
115        End
116------------------------------------------------------------------
117Capturing subpattern count = 0
118Options: utf
119First code unit = \x{100}
120Subject length lower bound = 1
121
122/\x80/IB,utf
123------------------------------------------------------------------
124        Bra
125        \x{80}
126        Ket
127        End
128------------------------------------------------------------------
129Capturing subpattern count = 0
130Options: utf
131First code unit = \x80
132Subject length lower bound = 1
133
134/\xff/IB,utf
135------------------------------------------------------------------
136        Bra
137        \x{ff}
138        Ket
139        End
140------------------------------------------------------------------
141Capturing subpattern count = 0
142Options: utf
143First code unit = \xff
144Subject length lower bound = 1
145
146/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
147------------------------------------------------------------------
148        Bra
149        \x{d55c}\x{ad6d}\x{c5b4}
150        Ket
151        End
152------------------------------------------------------------------
153Capturing subpattern count = 0
154Options: utf
155First code unit = \x{d55c}
156Last code unit = \x{c5b4}
157Subject length lower bound = 3
158    \x{D55c}\x{ad6d}\x{C5B4}
159 0: \x{d55c}\x{ad6d}\x{c5b4}
160
161/\x{65e5}\x{672c}\x{8a9e}/IB,utf
162------------------------------------------------------------------
163        Bra
164        \x{65e5}\x{672c}\x{8a9e}
165        Ket
166        End
167------------------------------------------------------------------
168Capturing subpattern count = 0
169Options: utf
170First code unit = \x{65e5}
171Last code unit = \x{8a9e}
172Subject length lower bound = 3
173    \x{65e5}\x{672c}\x{8a9e}
174 0: \x{65e5}\x{672c}\x{8a9e}
175
176/\x{80}/IB,utf
177------------------------------------------------------------------
178        Bra
179        \x{80}
180        Ket
181        End
182------------------------------------------------------------------
183Capturing subpattern count = 0
184Options: utf
185First code unit = \x80
186Subject length lower bound = 1
187
188/\x{084}/IB,utf
189------------------------------------------------------------------
190        Bra
191        \x{84}
192        Ket
193        End
194------------------------------------------------------------------
195Capturing subpattern count = 0
196Options: utf
197First code unit = \x84
198Subject length lower bound = 1
199
200/\x{104}/IB,utf
201------------------------------------------------------------------
202        Bra
203        \x{104}
204        Ket
205        End
206------------------------------------------------------------------
207Capturing subpattern count = 0
208Options: utf
209First code unit = \x{104}
210Subject length lower bound = 1
211
212/\x{861}/IB,utf
213------------------------------------------------------------------
214        Bra
215        \x{861}
216        Ket
217        End
218------------------------------------------------------------------
219Capturing subpattern count = 0
220Options: utf
221First code unit = \x{861}
222Subject length lower bound = 1
223
224/\x{212ab}/IB,utf
225------------------------------------------------------------------
226        Bra
227        \x{212ab}
228        Ket
229        End
230------------------------------------------------------------------
231Capturing subpattern count = 0
232Options: utf
233First code unit = \x{212ab}
234Subject length lower bound = 1
235
236/[^ab\xC0-\xF0]/IB,utf
237------------------------------------------------------------------
238        Bra
239        [\x00-`c-\xbf\xf1-\xff] (neg)
240        Ket
241        End
242------------------------------------------------------------------
243Capturing subpattern count = 0
244Options: utf
245Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
246  \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
247  \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
248  5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
249  Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
250  \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e
251  \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d
252  \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac
253  \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb
254  \xbc \xbd \xbe \xbf \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb
255  \xfc \xfd \xfe \xff
256Subject length lower bound = 1
257    \x{f1}
258 0: \x{f1}
259    \x{bf}
260 0: \x{bf}
261    \x{100}
262 0: \x{100}
263    \x{1000}
264 0: \x{1000}
265\= Expect no match
266    \x{c0}
267No match
268    \x{f0}
269No match
270
271/Ā{3,4}/IB,utf
272------------------------------------------------------------------
273        Bra
274        \x{100}{3}
275        \x{100}?+
276        Ket
277        End
278------------------------------------------------------------------
279Capturing subpattern count = 0
280Options: utf
281First code unit = \x{100}
282Last code unit = \x{100}
283Subject length lower bound = 3
284  \x{100}\x{100}\x{100}\x{100\x{100}
285 0: \x{100}\x{100}\x{100}
286
287/(\x{100}+|x)/IB,utf
288------------------------------------------------------------------
289        Bra
290        CBra 1
291        \x{100}++
292        Alt
293        x
294        Ket
295        Ket
296        End
297------------------------------------------------------------------
298Capturing subpattern count = 1
299Options: utf
300Starting code units: x \xff
301Subject length lower bound = 1
302
303/(\x{100}*a|x)/IB,utf
304------------------------------------------------------------------
305        Bra
306        CBra 1
307        \x{100}*+
308        a
309        Alt
310        x
311        Ket
312        Ket
313        End
314------------------------------------------------------------------
315Capturing subpattern count = 1
316Options: utf
317Starting code units: a x \xff
318Subject length lower bound = 1
319
320/(\x{100}{0,2}a|x)/IB,utf
321------------------------------------------------------------------
322        Bra
323        CBra 1
324        \x{100}{0,2}+
325        a
326        Alt
327        x
328        Ket
329        Ket
330        End
331------------------------------------------------------------------
332Capturing subpattern count = 1
333Options: utf
334Starting code units: a x \xff
335Subject length lower bound = 1
336
337/(\x{100}{1,2}a|x)/IB,utf
338------------------------------------------------------------------
339        Bra
340        CBra 1
341        \x{100}
342        \x{100}{0,1}+
343        a
344        Alt
345        x
346        Ket
347        Ket
348        End
349------------------------------------------------------------------
350Capturing subpattern count = 1
351Options: utf
352Starting code units: x \xff
353Subject length lower bound = 1
354
355/\x{100}/IB,utf
356------------------------------------------------------------------
357        Bra
358        \x{100}
359        Ket
360        End
361------------------------------------------------------------------
362Capturing subpattern count = 0
363Options: utf
364First code unit = \x{100}
365Subject length lower bound = 1
366
367/a\x{100}\x{101}*/IB,utf
368------------------------------------------------------------------
369        Bra
370        a\x{100}
371        \x{101}*+
372        Ket
373        End
374------------------------------------------------------------------
375Capturing subpattern count = 0
376Options: utf
377First code unit = 'a'
378Last code unit = \x{100}
379Subject length lower bound = 2
380
381/a\x{100}\x{101}+/IB,utf
382------------------------------------------------------------------
383        Bra
384        a\x{100}
385        \x{101}++
386        Ket
387        End
388------------------------------------------------------------------
389Capturing subpattern count = 0
390Options: utf
391First code unit = 'a'
392Last code unit = \x{101}
393Subject length lower bound = 3
394
395/[^\x{c4}]/IB
396------------------------------------------------------------------
397        Bra
398        [^\x{c4}]
399        Ket
400        End
401------------------------------------------------------------------
402Capturing subpattern count = 0
403Subject length lower bound = 1
404
405/[\x{100}]/IB,utf
406------------------------------------------------------------------
407        Bra
408        \x{100}
409        Ket
410        End
411------------------------------------------------------------------
412Capturing subpattern count = 0
413Options: utf
414First code unit = \x{100}
415Subject length lower bound = 1
416    \x{100}
417 0: \x{100}
418    Z\x{100}
419 0: \x{100}
420    \x{100}Z
421 0: \x{100}
422
423/[\xff]/IB,utf
424------------------------------------------------------------------
425        Bra
426        \x{ff}
427        Ket
428        End
429------------------------------------------------------------------
430Capturing subpattern count = 0
431Options: utf
432First code unit = \xff
433Subject length lower bound = 1
434    >\x{ff}<
435 0: \x{ff}
436
437/[^\xff]/IB,utf
438------------------------------------------------------------------
439        Bra
440        [^\x{ff}]
441        Ket
442        End
443------------------------------------------------------------------
444Capturing subpattern count = 0
445Options: utf
446Subject length lower bound = 1
447
448/\x{100}abc(xyz(?1))/IB,utf
449------------------------------------------------------------------
450        Bra
451        \x{100}abc
452        CBra 1
453        xyz
454        Recurse
455        Ket
456        Ket
457        End
458------------------------------------------------------------------
459Capturing subpattern count = 1
460Options: utf
461First code unit = \x{100}
462Last code unit = 'z'
463Subject length lower bound = 7
464
465/\777/I,utf
466Capturing subpattern count = 0
467Options: utf
468First code unit = \x{1ff}
469Subject length lower bound = 1
470  \x{1ff}
471 0: \x{1ff}
472  \777
473 0: \x{1ff}
474
475/\x{100}+\x{200}/IB,utf
476------------------------------------------------------------------
477        Bra
478        \x{100}++
479        \x{200}
480        Ket
481        End
482------------------------------------------------------------------
483Capturing subpattern count = 0
484Options: utf
485First code unit = \x{100}
486Last code unit = \x{200}
487Subject length lower bound = 2
488
489/\x{100}+X/IB,utf
490------------------------------------------------------------------
491        Bra
492        \x{100}++
493        X
494        Ket
495        End
496------------------------------------------------------------------
497Capturing subpattern count = 0
498Options: utf
499First code unit = \x{100}
500Last code unit = 'X'
501Subject length lower bound = 2
502
503/^[\QĀ\E-\QŐ\E/B,utf
504Failed: error 106 at offset 13: missing terminating ] for character class
505
506/X/utf
507    XX\x{d800}\=no_utf_check
508 0: X
509    XX\x{da00}\=no_utf_check
510 0: X
511    XX\x{dc00}\=no_utf_check
512 0: X
513    XX\x{de00}\=no_utf_check
514 0: X
515    XX\x{dfff}\=no_utf_check
516 0: X
517\= Expect UTF error
518    XX\x{d800}
519Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
520    XX\x{da00}
521Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
522    XX\x{dc00}
523Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
524    XX\x{de00}
525Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
526    XX\x{dfff}
527Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
528    XX\x{110000}
529Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined at offset 2
530    XX\x{d800}\x{1234}
531Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
532\= Expect no match
533    XX\x{d800}\=offset=3
534No match
535
536/(?<=.)X/utf
537    XX\x{d800}\=offset=3
538Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
539
540/(*UTF16)\x{11234}/
541Failed: error 160 at offset 5: (*VERB) not recognized or malformed
542  abcd\x{11234}pqr
543
544/(*UTF)\x{11234}/I
545Capturing subpattern count = 0
546Compile options: <none>
547Overall options: utf
548First code unit = \x{11234}
549Subject length lower bound = 1
550  abcd\x{11234}pqr
551 0: \x{11234}
552
553/(*UTF-32)\x{11234}/
554Failed: error 160 at offset 5: (*VERB) not recognized or malformed
555  abcd\x{11234}pqr
556
557/(*UTF-32)\x{112}/
558Failed: error 160 at offset 5: (*VERB) not recognized or malformed
559  abcd\x{11234}pqr
560
561/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
562Failed: error 160 at offset 12: (*VERB) not recognized or malformed
563
564/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I
565Capturing subpattern count = 0
566Compile options: <none>
567Overall options: utf
568\R matches any Unicode newline
569Forced newline is CRLF
570First code unit = 'a'
571Last code unit = 'b'
572Subject length lower bound = 3
573
574/\h/I,utf
575Capturing subpattern count = 0
576Options: utf
577Starting code units: \x09 \x20 \xa0 \xff
578Subject length lower bound = 1
579    ABC\x{09}
580 0: \x{09}
581    ABC\x{20}
582 0:
583    ABC\x{a0}
584 0: \x{a0}
585    ABC\x{1680}
586 0: \x{1680}
587    ABC\x{180e}
588 0: \x{180e}
589    ABC\x{2000}
590 0: \x{2000}
591    ABC\x{202f}
592 0: \x{202f}
593    ABC\x{205f}
594 0: \x{205f}
595    ABC\x{3000}
596 0: \x{3000}
597
598/\v/I,utf
599Capturing subpattern count = 0
600Options: utf
601Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
602Subject length lower bound = 1
603    ABC\x{0a}
604 0: \x{0a}
605    ABC\x{0b}
606 0: \x{0b}
607    ABC\x{0c}
608 0: \x{0c}
609    ABC\x{0d}
610 0: \x{0d}
611    ABC\x{85}
612 0: \x{85}
613    ABC\x{2028}
614 0: \x{2028}
615
616/\h*A/I,utf
617Capturing subpattern count = 0
618Options: utf
619Starting code units: \x09 \x20 A \xa0 \xff
620Last code unit = 'A'
621Subject length lower bound = 1
622    CDBABC
623 0: A
624    \x{2000}ABC
625 0: \x{2000}A
626
627/\R*A/I,bsr=unicode,utf
628Capturing subpattern count = 0
629Options: utf
630\R matches any Unicode newline
631Starting code units: \x0a \x0b \x0c \x0d A \x85 \xff
632Last code unit = 'A'
633Subject length lower bound = 1
634    CDBABC
635 0: A
636    \x{2028}A
637 0: \x{2028}A
638
639/\v+A/I,utf
640Capturing subpattern count = 0
641Options: utf
642Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
643Last code unit = 'A'
644Subject length lower bound = 2
645
646/\s?xxx\s/I,utf
647Capturing subpattern count = 0
648Options: utf
649Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x
650Last code unit = 'x'
651Subject length lower bound = 4
652
653/\sxxx\s/I,utf,tables=2
654Capturing subpattern count = 0
655Options: utf
656Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \x85 \xa0
657Last code unit = 'x'
658Subject length lower bound = 5
659    AB\x{85}xxx\x{a0}XYZ
660 0: \x{85}xxx\x{a0}
661    AB\x{a0}xxx\x{85}XYZ
662 0: \x{a0}xxx\x{85}
663
664/\S \S/I,utf,tables=2
665Capturing subpattern count = 0
666Options: utf
667Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
668  \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
669  \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
670  D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
671  i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84
672  \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94
673  \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3 \xa4
674  \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3
675  \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2
676  \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
677  \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
678  \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
679  \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
680  \xff
681Last code unit = ' '
682Subject length lower bound = 3
683    \x{a2} \x{84}
684 0: \x{a2} \x{84}
685    A Z
686 0: A Z
687
688/a+/utf
689    a\x{123}aa\=offset=1
690 0: aa
691    a\x{123}aa\=offset=2
692 0: aa
693    a\x{123}aa\=offset=3
694 0: a
695\= Expect no match
696    a\x{123}aa\=offset=4
697No match
698\= Expect bad offset error
699    a\x{123}aa\=offset=5
700Failed: error -33: bad offset value
701    a\x{123}aa\=offset=6
702Failed: error -33: bad offset value
703
704/\x{1234}+/Ii,utf
705Capturing subpattern count = 0
706Options: caseless utf
707First code unit = \x{1234}
708Subject length lower bound = 1
709
710/\x{1234}+?/Ii,utf
711Capturing subpattern count = 0
712Options: caseless utf
713First code unit = \x{1234}
714Subject length lower bound = 1
715
716/\x{1234}++/Ii,utf
717Capturing subpattern count = 0
718Options: caseless utf
719First code unit = \x{1234}
720Subject length lower bound = 1
721
722/\x{1234}{2}/Ii,utf
723Capturing subpattern count = 0
724Options: caseless utf
725First code unit = \x{1234}
726Last code unit = \x{1234}
727Subject length lower bound = 2
728
729/[^\x{c4}]/IB,utf
730------------------------------------------------------------------
731        Bra
732        [^\x{c4}]
733        Ket
734        End
735------------------------------------------------------------------
736Capturing subpattern count = 0
737Options: utf
738Subject length lower bound = 1
739
740/X+\x{200}/IB,utf
741------------------------------------------------------------------
742        Bra
743        X++
744        \x{200}
745        Ket
746        End
747------------------------------------------------------------------
748Capturing subpattern count = 0
749Options: utf
750First code unit = 'X'
751Last code unit = \x{200}
752Subject length lower bound = 2
753
754/\R/I,utf
755Capturing subpattern count = 0
756Options: utf
757Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
758Subject length lower bound = 1
759
760# Check bad offset
761
762/a/utf
763\= Expect bad UTF-16 offset, or no match in 32-bit
764    \x{10000}\=offset=1
765No match
766    \x{10000}ab\=offset=1
767 0: a
768\= Expect 16-bit match, 32-bit no match
769    \x{10000}ab\=offset=2
770No match
771\= Expect no match
772    \x{10000}ab\=offset=3
773No match
774\= Expect no match in 16-bit, bad offset in 32-bit
775    \x{10000}ab\=offset=4
776Failed: error -33: bad offset value
777\= Expect bad offset
778    \x{10000}ab\=offset=5
779Failed: error -33: bad offset value
780
781/�/utf
782Failed: error -27 at offset 0: UTF-32 error: code points 0xd800-0xdfff are not defined
783
784/\w+\x{C4}/B,utf
785------------------------------------------------------------------
786        Bra
787        \w++
788        \x{c4}
789        Ket
790        End
791------------------------------------------------------------------
792    a\x{C4}\x{C4}
793 0: a\x{c4}
794
795/\w+\x{C4}/B,utf,tables=2
796------------------------------------------------------------------
797        Bra
798        \w+
799        \x{c4}
800        Ket
801        End
802------------------------------------------------------------------
803    a\x{C4}\x{C4}
804 0: a\x{c4}\x{c4}
805
806/\W+\x{C4}/B,utf
807------------------------------------------------------------------
808        Bra
809        \W+
810        \x{c4}
811        Ket
812        End
813------------------------------------------------------------------
814    !\x{C4}
815 0: !\x{c4}
816
817/\W+\x{C4}/B,utf,tables=2
818------------------------------------------------------------------
819        Bra
820        \W++
821        \x{c4}
822        Ket
823        End
824------------------------------------------------------------------
825    !\x{C4}
826 0: !\x{c4}
827
828/\W+\x{A1}/B,utf
829------------------------------------------------------------------
830        Bra
831        \W+
832        \x{a1}
833        Ket
834        End
835------------------------------------------------------------------
836    !\x{A1}
837 0: !\x{a1}
838
839/\W+\x{A1}/B,utf,tables=2
840------------------------------------------------------------------
841        Bra
842        \W+
843        \x{a1}
844        Ket
845        End
846------------------------------------------------------------------
847    !\x{A1}
848 0: !\x{a1}
849
850/X\s+\x{A0}/B,utf
851------------------------------------------------------------------
852        Bra
853        X
854        \s++
855        \x{a0}
856        Ket
857        End
858------------------------------------------------------------------
859    X\x20\x{A0}\x{A0}
860 0: X \x{a0}
861
862/X\s+\x{A0}/B,utf,tables=2
863------------------------------------------------------------------
864        Bra
865        X
866        \s+
867        \x{a0}
868        Ket
869        End
870------------------------------------------------------------------
871    X\x20\x{A0}\x{A0}
872 0: X \x{a0}\x{a0}
873
874/\S+\x{A0}/B,utf
875------------------------------------------------------------------
876        Bra
877        \S+
878        \x{a0}
879        Ket
880        End
881------------------------------------------------------------------
882    X\x{A0}\x{A0}
883 0: X\x{a0}\x{a0}
884
885/\S+\x{A0}/B,utf,tables=2
886------------------------------------------------------------------
887        Bra
888        \S++
889        \x{a0}
890        Ket
891        End
892------------------------------------------------------------------
893    X\x{A0}\x{A0}
894 0: X\x{a0}
895
896/\x{a0}+\s!/B,utf
897------------------------------------------------------------------
898        Bra
899        \x{a0}++
900        \s
901        !
902        Ket
903        End
904------------------------------------------------------------------
905    \x{a0}\x20!
906 0: \x{a0} !
907
908/\x{a0}+\s!/B,utf,tables=2
909------------------------------------------------------------------
910        Bra
911        \x{a0}+
912        \s
913        !
914        Ket
915        End
916------------------------------------------------------------------
917    \x{a0}\x20!
918 0: \x{a0} !
919
920/(*UTF)abc/never_utf
921Failed: error 174 at offset 6: using UTF is disabled by the application
922
923/abc/utf,never_utf
924Failed: error 174 at offset 0: using UTF is disabled by the application
925
926/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
927------------------------------------------------------------------
928        Bra
929     /i A\x{391}\x{10427}\x{ff3a}\x{1fb0}
930        Ket
931        End
932------------------------------------------------------------------
933Capturing subpattern count = 0
934Options: caseless utf
935First code unit = 'A' (caseless)
936Last code unit = \x{1fb0} (caseless)
937Subject length lower bound = 5
938
939/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
940------------------------------------------------------------------
941        Bra
942        A\x{391}\x{10427}\x{ff3a}\x{1fb0}
943        Ket
944        End
945------------------------------------------------------------------
946Capturing subpattern count = 0
947Options: utf
948First code unit = 'A'
949Last code unit = \x{1fb0}
950Subject length lower bound = 5
951
952/AB\x{1fb0}/IB,utf
953------------------------------------------------------------------
954        Bra
955        AB\x{1fb0}
956        Ket
957        End
958------------------------------------------------------------------
959Capturing subpattern count = 0
960Options: utf
961First code unit = 'A'
962Last code unit = \x{1fb0}
963Subject length lower bound = 3
964
965/AB\x{1fb0}/IBi,utf
966------------------------------------------------------------------
967        Bra
968     /i AB\x{1fb0}
969        Ket
970        End
971------------------------------------------------------------------
972Capturing subpattern count = 0
973Options: caseless utf
974First code unit = 'A' (caseless)
975Last code unit = \x{1fb0} (caseless)
976Subject length lower bound = 3
977
978/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
979Capturing subpattern count = 0
980Options: caseless utf
981First code unit = \x{401} (caseless)
982Last code unit = \x{42f} (caseless)
983Subject length lower bound = 17
984    \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
985 0: \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
986    \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
987 0: \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
988
989/[ⱥ]/Bi,utf
990------------------------------------------------------------------
991        Bra
992     /i \x{2c65}
993        Ket
994        End
995------------------------------------------------------------------
996
997/[^ⱥ]/Bi,utf
998------------------------------------------------------------------
999        Bra
1000     /i [^\x{2c65}]
1001        Ket
1002        End
1003------------------------------------------------------------------
1004
1005/[[:blank:]]/B,ucp
1006------------------------------------------------------------------
1007        Bra
1008        [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
1009        Ket
1010        End
1011------------------------------------------------------------------
1012
1013/\x{212a}+/Ii,utf
1014Capturing subpattern count = 0
1015Options: caseless utf
1016Starting code units: K k \xff
1017Subject length lower bound = 1
1018    KKkk\x{212a}
1019 0: KKkk\x{212a}
1020
1021/s+/Ii,utf
1022Capturing subpattern count = 0
1023Options: caseless utf
1024Starting code units: S s \xff
1025Subject length lower bound = 1
1026    SSss\x{17f}
1027 0: SSss\x{17f}
1028
1029# Non-UTF characters should give errors in both 16-bit and 32-bit modes.
1030
1031/\x{110000}/utf
1032Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
1033
1034/\o{4200000}/utf
1035Failed: error 134 at offset 10: character code point value in \x{} or \o{} is too large
1036
1037/\x{100}*A/IB,utf
1038------------------------------------------------------------------
1039        Bra
1040        \x{100}*+
1041        A
1042        Ket
1043        End
1044------------------------------------------------------------------
1045Capturing subpattern count = 0
1046Options: utf
1047Starting code units: A \xff
1048Last code unit = 'A'
1049Subject length lower bound = 1
1050    A
1051 0: A
1052
1053/\x{100}*\d(?R)/IB,utf
1054------------------------------------------------------------------
1055        Bra
1056        \x{100}*+
1057        \d
1058        Recurse
1059        Ket
1060        End
1061------------------------------------------------------------------
1062Capturing subpattern count = 0
1063Options: utf
1064Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff
1065Subject length lower bound = 1
1066
1067/[Z\x{100}]/IB,utf
1068------------------------------------------------------------------
1069        Bra
1070        [Z\x{100}]
1071        Ket
1072        End
1073------------------------------------------------------------------
1074Capturing subpattern count = 0
1075Options: utf
1076Starting code units: Z \xff
1077Subject length lower bound = 1
1078    Z\x{100}
1079 0: Z
1080    \x{100}
1081 0: \x{100}
1082    \x{100}Z
1083 0: \x{100}
1084
1085/[z-\x{100}]/IB,utf
1086------------------------------------------------------------------
1087        Bra
1088        [z-\xff\x{100}]
1089        Ket
1090        End
1091------------------------------------------------------------------
1092Capturing subpattern count = 0
1093Options: utf
1094Starting code units: z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87
1095  \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96
1096  \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5
1097  \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4
1098  \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3
1099  \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2
1100  \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1
1101  \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0
1102  \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
1103Subject length lower bound = 1
1104
1105/[z\Qa-d]Ā\E]/IB,utf
1106------------------------------------------------------------------
1107        Bra
1108        [\-\]adz\x{100}]
1109        Ket
1110        End
1111------------------------------------------------------------------
1112Capturing subpattern count = 0
1113Options: utf
1114Starting code units: - ] a d z \xff
1115Subject length lower bound = 1
1116    \x{100}
1117 0: \x{100}
1118    Ā
1119 0: \x{100}
1120
1121/[ab\x{100}]abc(xyz(?1))/IB,utf
1122------------------------------------------------------------------
1123        Bra
1124        [ab\x{100}]
1125        abc
1126        CBra 1
1127        xyz
1128        Recurse
1129        Ket
1130        Ket
1131        End
1132------------------------------------------------------------------
1133Capturing subpattern count = 1
1134Options: utf
1135Starting code units: a b \xff
1136Last code unit = 'z'
1137Subject length lower bound = 7
1138
1139/\x{100}*\s/IB,utf
1140------------------------------------------------------------------
1141        Bra
1142        \x{100}*+
1143        \s
1144        Ket
1145        End
1146------------------------------------------------------------------
1147Capturing subpattern count = 0
1148Options: utf
1149Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xff
1150Subject length lower bound = 1
1151
1152/\x{100}*\d/IB,utf
1153------------------------------------------------------------------
1154        Bra
1155        \x{100}*+
1156        \d
1157        Ket
1158        End
1159------------------------------------------------------------------
1160Capturing subpattern count = 0
1161Options: utf
1162Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff
1163Subject length lower bound = 1
1164
1165/\x{100}*\w/IB,utf
1166------------------------------------------------------------------
1167        Bra
1168        \x{100}*+
1169        \w
1170        Ket
1171        End
1172------------------------------------------------------------------
1173Capturing subpattern count = 0
1174Options: utf
1175Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
1176  Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
1177  \xff
1178Subject length lower bound = 1
1179
1180/\x{100}*\D/IB,utf
1181------------------------------------------------------------------
1182        Bra
1183        \x{100}*
1184        \D
1185        Ket
1186        End
1187------------------------------------------------------------------
1188Capturing subpattern count = 0
1189Options: utf
1190Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
1191  \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
1192  \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = >
1193  ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c
1194  d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82
1195  \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91
1196  \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0
1197  \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf
1198  \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe
1199  \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd
1200  \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc
1201  \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb
1202  \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa
1203  \xfb \xfc \xfd \xfe \xff
1204Subject length lower bound = 1
1205
1206/\x{100}*\S/IB,utf
1207------------------------------------------------------------------
1208        Bra
1209        \x{100}*
1210        \S
1211        Ket
1212        End
1213------------------------------------------------------------------
1214Capturing subpattern count = 0
1215Options: utf
1216Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
1217  \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
1218  \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
1219  D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
1220  i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84
1221  \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93
1222  \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2
1223  \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1
1224  \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0
1225  \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf
1226  \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde
1227  \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed
1228  \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc
1229  \xfd \xfe \xff
1230Subject length lower bound = 1
1231
1232/\x{100}*\W/IB,utf
1233------------------------------------------------------------------
1234        Bra
1235        \x{100}*
1236        \W
1237        Ket
1238        End
1239------------------------------------------------------------------
1240Capturing subpattern count = 0
1241Options: utf
1242Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
1243  \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
1244  \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = >
1245  ? @ [ \ ] ^ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89
1246  \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98
1247  \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7
1248  \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6
1249  \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5
1250  \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4
1251  \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3
1252  \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2
1253  \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
1254Subject length lower bound = 1
1255
1256/[\x{105}-\x{109}]/IBi,utf
1257------------------------------------------------------------------
1258        Bra
1259        [\x{104}-\x{109}]
1260        Ket
1261        End
1262------------------------------------------------------------------
1263Capturing subpattern count = 0
1264Options: caseless utf
1265Starting code units: \xff
1266Subject length lower bound = 1
1267    \x{104}
1268 0: \x{104}
1269    \x{105}
1270 0: \x{105}
1271    \x{109}
1272 0: \x{109}
1273\= Expect no match
1274    \x{100}
1275No match
1276    \x{10a}
1277No match
1278
1279/[z-\x{100}]/IBi,utf
1280------------------------------------------------------------------
1281        Bra
1282        [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
1283        Ket
1284        End
1285------------------------------------------------------------------
1286Capturing subpattern count = 0
1287Options: caseless utf
1288Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86
1289  \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95
1290  \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4
1291  \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3
1292  \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2
1293  \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
1294  \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
1295  \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
1296  \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
1297  \xff
1298Subject length lower bound = 1
1299    Z
1300 0: Z
1301    z
1302 0: z
1303    \x{39c}
1304 0: \x{39c}
1305    \x{178}
1306 0: \x{178}
1307    |
1308 0: |
1309    \x{80}
1310 0: \x{80}
1311    \x{ff}
1312 0: \x{ff}
1313    \x{100}
1314 0: \x{100}
1315    \x{101}
1316 0: \x{101}
1317\= Expect no match
1318    \x{102}
1319No match
1320    Y
1321No match
1322    y
1323No match
1324
1325/[z-\x{100}]/IBi,utf
1326------------------------------------------------------------------
1327        Bra
1328        [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
1329        Ket
1330        End
1331------------------------------------------------------------------
1332Capturing subpattern count = 0
1333Options: caseless utf
1334Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86
1335  \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95
1336  \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4
1337  \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3
1338  \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2
1339  \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
1340  \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
1341  \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
1342  \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
1343  \xff
1344Subject length lower bound = 1
1345
1346/\x{3a3}B/IBi,utf
1347------------------------------------------------------------------
1348        Bra
1349        clist 03a3 03c2 03c3
1350     /i B
1351        Ket
1352        End
1353------------------------------------------------------------------
1354Capturing subpattern count = 0
1355Options: caseless utf
1356Starting code units: \xff
1357Last code unit = 'B' (caseless)
1358Subject length lower bound = 2
1359
1360/./utf
1361    \x{110000}
1362Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined at offset 0
1363
1364# End of testinput12
1365