• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# This set of tests checks the API, internals, and non-Perl stuff for UTF
2# support, including Unicode properties. However, tests that give different
3# results in 8-bit, 16-bit, and 32-bit modes are excluded (see tests 10 and
4# 12).
5
6# PCRE2 and Perl disagree about the characteristics of certain Unicode
7# characters. For example, 061C is considered by Perl to be Arabic, though
8# is it not listed as such in the Unicode Scripts.txt file, and 2066-2069 are
9# graphic and printable according to Perl, though they are actually "isolate"
10# control characters. That is why the following tests are here rather than in
11# test 4.
12
13/^[\p{Arabic}]/utf
14\= Expect no match
15    \x{061c}
16
17/^[[:graph:]]+$/utf,ucp
18\= Expect no match
19    \x{61c}
20    \x{2066}
21    \x{2067}
22    \x{2068}
23    \x{2069}
24
25/^[[:print:]]+$/utf,ucp
26\= Expect no match
27    \x{61c}
28    \x{2066}
29    \x{2067}
30    \x{2068}
31    \x{2069}
32
33/^[[:^graph:]]+$/utf,ucp
34    \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}
35    \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069}
36
37/^[[:^print:]]+$/utf,ucp
38    \x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067}
39    \x{2068}\x{2069}
40
41# Perl does not consider U+180e to be a space character. It is true that it
42# does not appear in the Unicode PropList.txt file as such, but in many other
43# sources it is listed as a space, and has been treated as such in PCRE for
44# a long time.
45
46/^>[[:blank:]]*/utf,ucp
47    >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028}
48
49/^A\s+Z/utf,ucp
50    A\x{85}\x{180e}\x{2005}Z
51
52/^A[\s]+Z/utf,ucp
53    A\x{2005}Z
54    A\x{85}\x{2005}Z
55
56/^[[:graph:]]+$/utf,ucp
57\= Expect no match
58    \x{180e}
59
60/^[[:print:]]+$/utf,ucp
61    \x{180e}
62
63/^[[:^graph:]]+$/utf,ucp
64    \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e}
65
66/^[[:^print:]]+$/utf,ucp
67\= Expect no match
68    \x{180e}
69
70# End of U+180E tests.
71
72# ---------------------------------------------------------------------
73
74/\x{110000}/IB,utf
75
76/\o{4200000}/IB,utf
77
78/\x{ffffffff}/utf
79
80/\o{37777777777}/utf
81
82/\x{100000000}/utf
83
84/\o{77777777777}/utf
85
86/\x{d800}/utf
87
88/\o{154000}/utf
89
90/\x{dfff}/utf
91
92/\o{157777}/utf
93
94/\x{d7ff}/utf
95
96/\o{153777}/utf
97
98/\x{e000}/utf
99
100/\o{170000}/utf
101
102/^\x{100}a\x{1234}/utf
103    \x{100}a\x{1234}bcd
104
105/\x{0041}\x{2262}\x{0391}\x{002e}/IB,utf
106    \x{0041}\x{2262}\x{0391}\x{002e}
107
108/.{3,5}X/IB,utf
109    \x{212ab}\x{212ab}\x{212ab}\x{861}X
110
111/.{3,5}?/IB,utf
112    \x{212ab}\x{212ab}\x{212ab}\x{861}
113
114/^[ab]/IB,utf
115    bar
116\= Expect no match
117    c
118    \x{ff}
119    \x{100}
120
121/^[^ab]/IB,utf
122    c
123    \x{ff}
124    \x{100}
125\= Expect no match
126    aaa
127
128/\x{100}*(\d+|"(?1)")/utf
129    1234
130    "1234"
131    \x{100}1234
132    "\x{100}1234"
133    \x{100}\x{100}12ab
134    \x{100}\x{100}"12"
135\= Expect no match
136    \x{100}\x{100}abcd
137
138/\x{100}*/IB,utf
139
140/a\x{100}*/IB,utf
141
142/ab\x{100}*/IB,utf
143
144/[\x{200}-\x{100}]/utf
145
146/[Ā-Ą]/utf
147    \x{100}
148    \x{104}
149\= Expect no match
150    \x{105}
151    \x{ff}
152
153/[\xFF]/IB
154    >\xff<
155
156/[^\xFF]/IB
157
158/[Ä-Ü]/utf
159    Ö # Matches without Study
160    \x{d6}
161
162/[Ä-Ü]/utf
163    Ö <-- Same with Study
164    \x{d6}
165
166/[\x{c4}-\x{dc}]/utf
167    Ö # Matches without Study
168    \x{d6}
169
170/[\x{c4}-\x{dc}]/utf
171    Ö <-- Same with Study
172    \x{d6}
173
174/[^\x{100}]abc(xyz(?1))/IB,utf
175
176/(\x{100}(b(?2)c))?/IB,utf
177
178/(\x{100}(b(?2)c)){0,2}/IB,utf
179
180/(\x{100}(b(?1)c))?/IB,utf
181
182/(\x{100}(b(?1)c)){0,2}/IB,utf
183
184/\W/utf
185    A.B
186    A\x{100}B
187
188/\w/utf
189    \x{100}X
190
191/^\ሴ/IB,utf
192
193/()()()()()()()()()()
194 ()()()()()()()()()()
195 ()()()()()()()()()()
196 ()()()()()()()()()()
197 A (x) (?41) B/x,utf
198    AxxB
199
200/^[\x{100}\E-\Q\E\x{150}]/B,utf
201
202/^[\QĀ\E-\QŐ\E]/B,utf
203
204/^abc./gmx,newline=any,utf
205    abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK
206
207/abc.$/gmx,newline=any,utf
208    abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x{0085} abc7\x{2028} abc8\x{2029} abc9
209
210/^a\Rb/bsr=unicode,utf
211    a\nb
212    a\rb
213    a\r\nb
214    a\x0bb
215    a\x0cb
216    a\x{85}b
217    a\x{2028}b
218    a\x{2029}b
219\= Expect no match
220    a\n\rb
221
222/^a\R*b/bsr=unicode,utf
223    ab
224    a\nb
225    a\rb
226    a\r\nb
227    a\x0bb
228    a\x0c\x{2028}\x{2029}b
229    a\x{85}b
230    a\n\rb
231    a\n\r\x{85}\x0cb
232
233/^a\R+b/bsr=unicode,utf
234    a\nb
235    a\rb
236    a\r\nb
237    a\x0bb
238    a\x0c\x{2028}\x{2029}b
239    a\x{85}b
240    a\n\rb
241    a\n\r\x{85}\x0cb
242\= Expect no match
243    ab
244
245/^a\R{1,3}b/bsr=unicode,utf
246    a\nb
247    a\n\rb
248    a\n\r\x{85}b
249    a\r\n\r\nb
250    a\r\n\r\n\r\nb
251    a\n\r\n\rb
252    a\n\n\r\nb
253\= Expect no match
254    a\n\n\n\rb
255    a\r
256
257/\H\h\V\v/utf
258    X X\x0a
259    X\x09X\x0b
260\= Expect no match
261    \x{a0} X\x0a
262
263/\H*\h+\V?\v{3,4}/utf
264    \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a
265    \x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a
266    \x09\x20\x{a0}\x0a\x0b\x0c
267\= Expect no match
268    \x09\x20\x{a0}\x0a\x0b
269
270/\H\h\V\v/utf
271    \x{3001}\x{3000}\x{2030}\x{2028}
272    X\x{180e}X\x{85}
273\= Expect no match
274    \x{2009} X\x0a
275
276/\H*\h+\V?\v{3,4}/utf
277    \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a
278    \x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a
279    \x09\x20\x{202f}\x0a\x0b\x0c
280\= Expect no match
281    \x09\x{200a}\x{a0}\x{2028}\x0b
282
283/[\h]/B,utf
284    >\x{1680}
285
286/[\h]{3,}/B,utf
287    >\x{1680}\x{180e}\x{2000}\x{2003}\x{200a}\x{202f}\x{205f}\x{3000}<
288
289/[\v]/B,utf
290
291/[\H]/B,utf
292
293/[\V]/B,utf
294
295/.*$/newline=any,utf
296    \x{1ec5}
297
298/a\Rb/I,bsr=anycrlf,utf
299    a\rb
300    a\nb
301    a\r\nb
302\= Expect no match
303    a\x{85}b
304    a\x0bb
305
306/a\Rb/I,bsr=unicode,utf
307    a\rb
308    a\nb
309    a\r\nb
310    a\x{85}b
311    a\x0bb
312
313/a\R?b/I,bsr=anycrlf,utf
314    a\rb
315    a\nb
316    a\r\nb
317\= Expect no match
318    a\x{85}b
319    a\x0bb
320
321/a\R?b/I,bsr=unicode,utf
322    a\rb
323    a\nb
324    a\r\nb
325    a\x{85}b
326    a\x0bb
327
328/.*a.*=.b.*/utf,newline=any
329    QQQ\x{2029}ABCaXYZ=!bPQR
330\= Expect no match
331    a\x{2029}b
332    \x61\xe2\x80\xa9\x62
333
334/[[:a\x{100}b:]]/utf
335
336/a[^]b/utf,alt_bsux,allow_empty_class,match_unset_backref
337    a\x{1234}b
338    a\nb
339\= Expect no match
340    ab
341
342/a[^]+b/utf,alt_bsux,allow_empty_class,match_unset_backref
343    aXb
344    a\nX\nX\x{1234}b
345\= Expect no match
346    ab
347
348/(\x{de})\1/
349    \x{de}\x{de}
350
351/X/newline=any,utf,firstline
352    A\x{1ec5}ABCXYZ
353
354/Xa{2,4}b/utf
355    X\=ps
356    Xa\=ps
357    Xaa\=ps
358    Xaaa\=ps
359    Xaaaa\=ps
360
361/Xa{2,4}?b/utf
362    X\=ps
363    Xa\=ps
364    Xaa\=ps
365    Xaaa\=ps
366    Xaaaa\=ps
367
368/Xa{2,4}+b/utf
369    X\=ps
370    Xa\=ps
371    Xaa\=ps
372    Xaaa\=ps
373    Xaaaa\=ps
374
375/X\x{123}{2,4}b/utf
376    X\=ps
377    X\x{123}\=ps
378    X\x{123}\x{123}\=ps
379    X\x{123}\x{123}\x{123}\=ps
380    X\x{123}\x{123}\x{123}\x{123}\=ps
381
382/X\x{123}{2,4}?b/utf
383    X\=ps
384    X\x{123}\=ps
385    X\x{123}\x{123}\=ps
386    X\x{123}\x{123}\x{123}\=ps
387    X\x{123}\x{123}\x{123}\x{123}\=ps
388
389/X\x{123}{2,4}+b/utf
390    X\=ps
391    X\x{123}\=ps
392    X\x{123}\x{123}\=ps
393    X\x{123}\x{123}\x{123}\=ps
394    X\x{123}\x{123}\x{123}\x{123}\=ps
395
396/X\x{123}{2,4}b/utf
397\= Expect no match
398    Xx\=ps
399    X\x{123}x\=ps
400    X\x{123}\x{123}x\=ps
401    X\x{123}\x{123}\x{123}x\=ps
402    X\x{123}\x{123}\x{123}\x{123}x\=ps
403
404/X\x{123}{2,4}?b/utf
405\= Expect no match
406    Xx\=ps
407    X\x{123}x\=ps
408    X\x{123}\x{123}x\=ps
409    X\x{123}\x{123}\x{123}x\=ps
410    X\x{123}\x{123}\x{123}\x{123}x\=ps
411
412/X\x{123}{2,4}+b/utf
413\= Expect no match
414    Xx\=ps
415    X\x{123}x\=ps
416    X\x{123}\x{123}x\=ps
417    X\x{123}\x{123}\x{123}x\=ps
418    X\x{123}\x{123}\x{123}\x{123}x\=ps
419
420/X\d{2,4}b/utf
421    X\=ps
422    X3\=ps
423    X33\=ps
424    X333\=ps
425    X3333\=ps
426
427/X\d{2,4}?b/utf
428    X\=ps
429    X3\=ps
430    X33\=ps
431    X333\=ps
432    X3333\=ps
433
434/X\d{2,4}+b/utf
435    X\=ps
436    X3\=ps
437    X33\=ps
438    X333\=ps
439    X3333\=ps
440
441/X\D{2,4}b/utf
442    X\=ps
443    Xa\=ps
444    Xaa\=ps
445    Xaaa\=ps
446    Xaaaa\=ps
447
448/X\D{2,4}?b/utf
449    X\=ps
450    Xa\=ps
451    Xaa\=ps
452    Xaaa\=ps
453    Xaaaa\=ps
454
455/X\D{2,4}+b/utf
456    X\=ps
457    Xa\=ps
458    Xaa\=ps
459    Xaaa\=ps
460    Xaaaa\=ps
461
462/X\D{2,4}b/utf
463    X\=ps
464    X\x{123}\=ps
465    X\x{123}\x{123}\=ps
466    X\x{123}\x{123}\x{123}\=ps
467    X\x{123}\x{123}\x{123}\x{123}\=ps
468
469/X\D{2,4}?b/utf
470    X\=ps
471    X\x{123}\=ps
472    X\x{123}\x{123}\=ps
473    X\x{123}\x{123}\x{123}\=ps
474    X\x{123}\x{123}\x{123}\x{123}\=ps
475
476/X\D{2,4}+b/utf
477    X\=ps
478    X\x{123}\=ps
479    X\x{123}\x{123}\=ps
480    X\x{123}\x{123}\x{123}\=ps
481    X\x{123}\x{123}\x{123}\x{123}\=ps
482
483/X[abc]{2,4}b/utf
484    X\=ps
485    Xa\=ps
486    Xaa\=ps
487    Xaaa\=ps
488    Xaaaa\=ps
489
490/X[abc]{2,4}?b/utf
491    X\=ps
492    Xa\=ps
493    Xaa\=ps
494    Xaaa\=ps
495    Xaaaa\=ps
496
497/X[abc]{2,4}+b/utf
498    X\=ps
499    Xa\=ps
500    Xaa\=ps
501    Xaaa\=ps
502    Xaaaa\=ps
503
504/X[abc\x{123}]{2,4}b/utf
505    X\=ps
506    X\x{123}\=ps
507    X\x{123}\x{123}\=ps
508    X\x{123}\x{123}\x{123}\=ps
509    X\x{123}\x{123}\x{123}\x{123}\=ps
510
511/X[abc\x{123}]{2,4}?b/utf
512    X\=ps
513    X\x{123}\=ps
514    X\x{123}\x{123}\=ps
515    X\x{123}\x{123}\x{123}\=ps
516    X\x{123}\x{123}\x{123}\x{123}\=ps
517
518/X[abc\x{123}]{2,4}+b/utf
519    X\=ps
520    X\x{123}\=ps
521    X\x{123}\x{123}\=ps
522    X\x{123}\x{123}\x{123}\=ps
523    X\x{123}\x{123}\x{123}\x{123}\=ps
524
525/X[^a]{2,4}b/utf
526    X\=ps
527    Xz\=ps
528    Xzz\=ps
529    Xzzz\=ps
530    Xzzzz\=ps
531
532/X[^a]{2,4}?b/utf
533    X\=ps
534    Xz\=ps
535    Xzz\=ps
536    Xzzz\=ps
537    Xzzzz\=ps
538
539/X[^a]{2,4}+b/utf
540    X\=ps
541    Xz\=ps
542    Xzz\=ps
543    Xzzz\=ps
544    Xzzzz\=ps
545
546/X[^a]{2,4}b/utf
547    X\=ps
548    X\x{123}\=ps
549    X\x{123}\x{123}\=ps
550    X\x{123}\x{123}\x{123}\=ps
551    X\x{123}\x{123}\x{123}\x{123}\=ps
552
553/X[^a]{2,4}?b/utf
554    X\=ps
555    X\x{123}\=ps
556    X\x{123}\x{123}\=ps
557    X\x{123}\x{123}\x{123}\=ps
558    X\x{123}\x{123}\x{123}\x{123}\=ps
559
560/X[^a]{2,4}+b/utf
561    X\=ps
562    X\x{123}\=ps
563    X\x{123}\x{123}\=ps
564    X\x{123}\x{123}\x{123}\=ps
565    X\x{123}\x{123}\x{123}\x{123}\=ps
566
567/(Y)X\1{2,4}b/utf
568    YX\=ps
569    YXY\=ps
570    YXYY\=ps
571    YXYYY\=ps
572    YXYYYY\=ps
573
574/(Y)X\1{2,4}?b/utf
575    YX\=ps
576    YXY\=ps
577    YXYY\=ps
578    YXYYY\=ps
579    YXYYYY\=ps
580
581/(Y)X\1{2,4}+b/utf
582    YX\=ps
583    YXY\=ps
584    YXYY\=ps
585    YXYYY\=ps
586    YXYYYY\=ps
587
588/(\x{123})X\1{2,4}b/utf
589    \x{123}X\=ps
590    \x{123}X\x{123}\=ps
591    \x{123}X\x{123}\x{123}\=ps
592    \x{123}X\x{123}\x{123}\x{123}\=ps
593    \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
594
595/(\x{123})X\1{2,4}?b/utf
596    \x{123}X\=ps
597    \x{123}X\x{123}\=ps
598    \x{123}X\x{123}\x{123}\=ps
599    \x{123}X\x{123}\x{123}\x{123}\=ps
600    \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
601
602/(\x{123})X\1{2,4}+b/utf
603    \x{123}X\=ps
604    \x{123}X\x{123}\=ps
605    \x{123}X\x{123}\x{123}\=ps
606    \x{123}X\x{123}\x{123}\x{123}\=ps
607    \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
608
609/\bthe cat\b/utf
610    the cat\=ps
611    the cat\=ph
612
613/abcd*/utf
614    xxxxabcd\=ps
615    xxxxabcd\=ph
616
617/abcd*/i,utf
618    xxxxabcd\=ps
619    xxxxabcd\=ph
620    XXXXABCD\=ps
621    XXXXABCD\=ph
622
623/abc\d*/utf
624    xxxxabc1\=ps
625    xxxxabc1\=ph
626
627/(a)bc\1*/utf
628    xxxxabca\=ps
629    xxxxabca\=ph
630
631/abc[de]*/utf
632    xxxxabcde\=ps
633    xxxxabcde\=ph
634
635/X\W{3}X/utf
636    X\=ps
637
638/\sxxx\s/utf,tables=2
639    AB\x{85}xxx\x{a0}XYZ
640    AB\x{a0}xxx\x{85}XYZ
641
642/\S \S/utf,tables=2
643    \x{a2} \x{84}
644
645'A#хц'Bx,newline=any,utf
646
647'A#хц
648  PQ'Bx,newline=any,utf
649
650/a+#хaa
651  z#XX?/Bx,newline=any,utf
652
653/a+#хaa
654  z#х?/Bx,newline=any,utf
655
656/\g{A}xxx#bXX(?'A'123)
657(?'A'456)/Bx,newline=any,utf
658
659/\g{A}xxx#bх(?'A'123)
660(?'A'456)/Bx,newline=any,utf
661
662/^\cģ/utf
663
664/(\R*)(.)/s,utf
665    \r\n
666    \r\r\n\n\r
667    \r\r\n\n\r\n
668
669/(\R)*(.)/s,utf
670    \r\n
671    \r\r\n\n\r
672    \r\r\n\n\r\n
673
674/[^\x{1234}]+/Ii,utf
675
676/[^\x{1234}]+?/Ii,utf
677
678/[^\x{1234}]++/Ii,utf
679
680/[^\x{1234}]{2}/Ii,utf
681
682/f.*/
683    for\=ph
684
685/f.*/s
686    for\=ph
687
688/f.*/utf
689    for\=ph
690
691/f.*/s,utf
692    for\=ph
693
694/\x{d7ff}\x{e000}/utf
695
696/\x{d800}/utf
697
698/\x{dfff}/utf
699
700/\h+/utf
701    \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
702    \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000}
703
704/[\h\x{e000}]+/B,utf
705    \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
706    \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000}
707
708/\H+/utf
709    \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
710    \x{2000}\x{200a}\x{1fff}\x{200b}
711    \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
712    \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001}
713
714/[\H\x{d7ff}]+/B,utf
715    \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
716    \x{2000}\x{200a}\x{1fff}\x{200b}
717    \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
718    \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001}
719
720/\v+/utf
721    \x{2027}\x{2030}\x{2028}\x{2029}
722    \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
723
724/[\v\x{e000}]+/B,utf
725    \x{2027}\x{2030}\x{2028}\x{2029}
726    \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
727
728/\V+/utf
729    \x{2028}\x{2029}\x{2027}\x{2030}
730    \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86}
731
732/[\V\x{d7ff}]+/B,utf
733    \x{2028}\x{2029}\x{2027}\x{2030}
734    \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86}
735
736/\R+/bsr=unicode,utf
737    \x{2027}\x{2030}\x{2028}\x{2029}
738    \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
739
740/(..)\1/utf
741    ab\=ps
742    aba\=ps
743    abab\=ps
744
745/(..)\1/i,utf
746    ab\=ps
747    abA\=ps
748    aBAb\=ps
749
750/(..)\1{2,}/utf
751    ab\=ps
752    aba\=ps
753    abab\=ps
754    ababa\=ps
755    ababab\=ps
756    ababab\=ph
757    abababa\=ps
758    abababa\=ph
759
760/(..)\1{2,}/i,utf
761    ab\=ps
762    aBa\=ps
763    aBAb\=ps
764    AbaBA\=ps
765    abABAb\=ps
766    aBAbaB\=ph
767    abABabA\=ps
768    abaBABa\=ph
769
770/(..)\1{2,}?x/i,utf
771    ab\=ps
772    abA\=ps
773    aBAb\=ps
774    abaBA\=ps
775    abAbaB\=ps
776    abaBabA\=ps
777    abAbABaBx\=ps
778
779/./utf,newline=crlf
780    \r\=ps
781    \r\=ph
782
783/.{2,3}/utf,newline=crlf
784    \r\=ps
785    \r\=ph
786    \r\r\=ps
787    \r\r\=ph
788    \r\r\r\=ps
789    \r\r\r\=ph
790
791/.{2,3}?/utf,newline=crlf
792    \r\=ps
793    \r\=ph
794    \r\r\=ps
795    \r\r\=ph
796    \r\r\r\=ps
797    \r\r\r\=ph
798
799/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/B,utf
800
801/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/Bi,utf
802
803/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/B,utf
804
805/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/Bi,utf
806
807/(?<=\x{1234}\x{1234})\bxy/I,utf
808
809/(?<!^)ETA/utf
810\= Expect no match
811    ETA
812
813/\u0100/B,utf,alt_bsux,allow_empty_class,match_unset_backref
814
815/[\u0100-\u0200]/B,utf,alt_bsux,allow_empty_class,match_unset_backref
816
817/\ud800/utf,alt_bsux,allow_empty_class,match_unset_backref
818
819/^a+[a\x{200}]/B,utf
820    aa
821
822/[b-d\x{200}-\x{250}]*[ae-h]?#[\x{200}-\x{250}]{0,8}[\x00-\xff]*#[\x{200}-\x{250}]+[a-z]/B,utf
823
824/[\p{L}]/IB
825
826/[\p{^L}]/IB
827
828/[\P{L}]/IB
829
830/[\P{^L}]/IB
831
832/[abc\p{L}\x{0660}]/IB,utf
833
834/[\p{Nd}]/IB,utf
835    1234
836
837/[\p{Nd}+-]+/IB,utf
838    1234
839    12-34
840    12+\x{661}-34
841\= Expect no match
842    abcd
843
844/(?:[\PPa*]*){8,}/
845
846/[\P{Any}]/B
847
848/[\P{Any}\E]/B
849
850/(\P{Yi}+\277)/
851
852/(\P{Yi}+\277)?/
853
854/(?<=\P{Yi}{3}A)X/
855
856/\p{Yi}+(\P{Yi}+)(?1)/
857
858/(\P{Yi}{2}\277)?/
859
860/[\P{Yi}A]/
861
862/[\P{Yi}\P{Yi}\P{Yi}A]/
863
864/[^\P{Yi}A]/
865
866/[^\P{Yi}\P{Yi}\P{Yi}A]/
867
868/(\P{Yi}*\277)*/
869
870/(\P{Yi}*?\277)*/
871
872/(\p{Yi}*+\277)*/
873
874/(\P{Yi}?\277)*/
875
876/(\P{Yi}??\277)*/
877
878/(\p{Yi}?+\277)*/
879
880/(\P{Yi}{0,3}\277)*/
881
882/(\P{Yi}{0,3}?\277)*/
883
884/(\p{Yi}{0,3}+\277)*/
885
886/\p{Zl}{2,3}+/B,utf
887    


888    \x{2028}\x{2028}\x{2028}
889
890/\p{Zl}/B,utf
891
892/\p{Lu}{3}+/B,utf
893
894/\pL{2}+/B,utf
895
896/\p{Cc}{2}+/B,utf
897
898/^\p{Cf}/utf
899    \x{180e}
900    \x{061c}
901    \x{2066}
902    \x{2067}
903    \x{2068}
904    \x{2069}
905
906/^\p{Cs}/utf
907    \x{dfff}\=no_utf_check
908\= Expect no match
909    \x{09f}
910
911/^\p{Mn}/utf
912    \x{1a1b}
913
914/^\p{Pe}/utf
915    \x{2309}
916    \x{230b}
917
918/^\p{Ps}/utf
919    \x{2308}
920    \x{230a}
921
922/^\p{Sc}+/utf
923    $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6}
924    \x{9f2}
925\= Expect no match
926    X
927    \x{2c2}
928
929/^\p{Zs}/utf
930    \ \
931    \x{a0}
932    \x{1680}
933    \x{2000}
934    \x{2001}
935\= Expect no match
936    \x{2028}
937    \x{200d}
938
939# These are here because Perl has problems with the negative versions of the
940# properties and has changed how it behaves for caseless matching.
941
942/\p{^Lu}/i,utf
943    1234
944\= Expect no match
945    ABC
946
947/\P{Lu}/i,utf
948    1234
949\= Expect no match
950    ABC
951
952/\p{Ll}/i,utf
953    a
954    Az
955\= Expect no match
956    ABC
957
958/\p{Lu}/i,utf
959    A
960    a\x{10a0}B
961\= Expect no match
962    a
963    \x{1d00}
964
965/\p{Lu}/i,utf
966    A
967    aZ
968\= Expect no match
969    abc
970
971/[\x{c0}\x{391}]/i,utf
972    \x{c0}
973    \x{e0}
974
975# The next two are special cases where the lengths of the different cases of
976# the same character differ. The first went wrong with heap frame storage; the
977# second was broken in all cases.
978
979/^\x{023a}+?(\x{0130}+)/i,utf
980  \x{023a}\x{2c65}\x{0130}
981
982/^\x{023a}+([^X])/i,utf
983  \x{023a}\x{2c65}X
984
985/\x{c0}+\x{116}+/i,utf
986    \x{c0}\x{e0}\x{116}\x{117}
987
988/[\x{c0}\x{116}]+/i,utf
989    \x{c0}\x{e0}\x{116}\x{117}
990
991/(\x{de})\1/i,utf
992    \x{de}\x{de}
993    \x{de}\x{fe}
994    \x{fe}\x{fe}
995    \x{fe}\x{de}
996
997/^\x{c0}$/i,utf
998    \x{c0}
999    \x{e0}
1000
1001/^\x{e0}$/i,utf
1002    \x{c0}
1003    \x{e0}
1004
1005# The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE
1006# will match it only with UCP support, because without that it has no notion
1007# of case for anything other than the ASCII letters.
1008
1009/((?i)[\x{c0}])/utf
1010    \x{c0}
1011    \x{e0}
1012
1013/(?i:[\x{c0}])/utf
1014    \x{c0}
1015    \x{e0}
1016
1017# These are PCRE's extra properties to help with Unicodizing \d etc.
1018
1019/^\p{Xan}/utf
1020    ABCD
1021    1234
1022    \x{6ca}
1023    \x{a6c}
1024    \x{10a7}
1025\= Expect no match
1026    _ABC
1027
1028/^\p{Xan}+/utf
1029    ABCD1234\x{6ca}\x{a6c}\x{10a7}_
1030\= Expect no match
1031    _ABC
1032
1033/^\p{Xan}+?/utf
1034    \x{6ca}\x{a6c}\x{10a7}_
1035
1036/^\p{Xan}*/utf
1037    ABCD1234\x{6ca}\x{a6c}\x{10a7}_
1038
1039/^\p{Xan}{2,9}/utf
1040    ABCD1234\x{6ca}\x{a6c}\x{10a7}_
1041
1042/^\p{Xan}{2,9}?/utf
1043    \x{6ca}\x{a6c}\x{10a7}_
1044
1045/^[\p{Xan}]/utf
1046    ABCD1234_
1047    1234abcd_
1048    \x{6ca}
1049    \x{a6c}
1050    \x{10a7}
1051\= Expect no match
1052    _ABC
1053
1054/^[\p{Xan}]+/utf
1055    ABCD1234\x{6ca}\x{a6c}\x{10a7}_
1056\= Expect no match
1057    _ABC
1058
1059/^>\p{Xsp}/utf
1060    >\x{1680}\x{2028}\x{0b}
1061    >\x{a0}
1062\= Expect no match
1063    \x{0b}
1064
1065/^>\p{Xsp}+/utf
1066    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1067
1068/^>\p{Xsp}+?/utf
1069    >\x{1680}\x{2028}\x{0b}
1070
1071/^>\p{Xsp}*/utf
1072    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1073
1074/^>\p{Xsp}{2,9}/utf
1075    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1076
1077/^>\p{Xsp}{2,9}?/utf
1078    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1079
1080/^>[\p{Xsp}]/utf
1081    >\x{2028}\x{0b}
1082
1083/^>[\p{Xsp}]+/utf
1084    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1085
1086/^>\p{Xps}/utf
1087    >\x{1680}\x{2028}\x{0b}
1088    >\x{a0}
1089\= Expect no match
1090    \x{0b}
1091
1092/^>\p{Xps}+/utf
1093    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1094
1095/^>\p{Xps}+?/utf
1096    >\x{1680}\x{2028}\x{0b}
1097
1098/^>\p{Xps}*/utf
1099    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1100
1101/^>\p{Xps}{2,9}/utf
1102    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1103
1104/^>\p{Xps}{2,9}?/utf
1105    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1106
1107/^>[\p{Xps}]/utf
1108    >\x{2028}\x{0b}
1109
1110/^>[\p{Xps}]+/utf
1111    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
1112
1113/^\p{Xwd}/utf
1114    ABCD
1115    1234
1116    \x{6ca}
1117    \x{a6c}
1118    \x{10a7}
1119    _ABC
1120\= Expect no match
1121    []
1122
1123/^\p{Xwd}+/utf
1124    ABCD1234\x{6ca}\x{a6c}\x{10a7}_
1125
1126/^\p{Xwd}+?/utf
1127    \x{6ca}\x{a6c}\x{10a7}_
1128
1129/^\p{Xwd}*/utf
1130    ABCD1234\x{6ca}\x{a6c}\x{10a7}_
1131
1132/^\p{Xwd}{2,9}/utf
1133    A_B12\x{6ca}\x{a6c}\x{10a7}
1134
1135/^\p{Xwd}{2,9}?/utf
1136    \x{6ca}\x{a6c}\x{10a7}_
1137
1138/^[\p{Xwd}]/utf
1139    ABCD1234_
1140    1234abcd_
1141    \x{6ca}
1142    \x{a6c}
1143    \x{10a7}
1144    _ABC
1145\= Expect no match
1146    []
1147
1148/^[\p{Xwd}]+/utf
1149    ABCD1234\x{6ca}\x{a6c}\x{10a7}_
1150
1151# A check not in UTF-8 mode
1152
1153/^[\p{Xwd}]+/
1154    ABCD1234_
1155
1156# Some negative checks
1157
1158/^[\P{Xwd}]+/utf
1159    !.+\x{019}\x{35a}AB
1160
1161/^[\p{^Xwd}]+/utf
1162    !.+\x{019}\x{35a}AB
1163
1164/[\D]/B,utf,ucp
1165    1\x{3c8}2
1166
1167/[\d]/B,utf,ucp
1168    >\x{6f4}<
1169
1170/[\S]/B,utf,ucp
1171    \x{1680}\x{6f4}\x{1680}
1172
1173/[\s]/B,utf,ucp
1174    >\x{1680}<
1175
1176/[\W]/B,utf,ucp
1177    A\x{1712}B
1178
1179/[\w]/B,utf,ucp
1180    >\x{1723}<
1181
1182/\D/B,utf,ucp
1183    1\x{3c8}2
1184
1185/\d/B,utf,ucp
1186    >\x{6f4}<
1187
1188/\S/B,utf,ucp
1189    \x{1680}\x{6f4}\x{1680}
1190
1191/\s/B,utf,ucp
1192    >\x{1680}>
1193
1194/\W/B,utf,ucp
1195    A\x{1712}B
1196
1197/\w/B,utf,ucp
1198    >\x{1723}<
1199
1200/[[:alpha:]]/B,ucp
1201
1202/[[:lower:]]/B,ucp
1203
1204/[[:upper:]]/B,ucp
1205
1206/[[:alnum:]]/B,ucp
1207
1208/[[:ascii:]]/B,ucp
1209
1210/[[:cntrl:]]/B,ucp
1211
1212/[[:digit:]]/B,ucp
1213
1214/[[:graph:]]/B,ucp
1215
1216/[[:print:]]/B,ucp
1217
1218/[[:punct:]]/B,ucp
1219
1220/[[:space:]]/B,ucp
1221
1222/[[:word:]]/B,ucp
1223
1224/[[:xdigit:]]/B,ucp
1225
1226# Unicode properties for \b abd \B
1227
1228/\b...\B/utf,ucp
1229    abc_
1230    \x{37e}abc\x{376}
1231    \x{37e}\x{376}\x{371}\x{393}\x{394}
1232    !\x{c0}++\x{c1}\x{c2}
1233    !\x{c0}+++++
1234
1235# Without PCRE_UCP, non-ASCII always fail, even if < 256
1236
1237/\b...\B/utf
1238    abc_
1239\= Expect no match
1240    \x{37e}abc\x{376}
1241    \x{37e}\x{376}\x{371}\x{393}\x{394}
1242    !\x{c0}++\x{c1}\x{c2}
1243    !\x{c0}+++++
1244
1245# With PCRE_UCP, non-UTF8 chars that are < 256 still check properties
1246
1247/\b...\B/ucp
1248    abc_
1249    !\x{c0}++\x{c1}\x{c2}
1250    !\x{c0}+++++
1251
1252# Some of these are silly, but they check various combinations
1253
1254/[[:^alpha:][:^cntrl:]]+/B,utf,ucp
1255    123
1256    abc
1257
1258/[[:^cntrl:][:^alpha:]]+/B,utf,ucp
1259    123
1260    abc
1261
1262/[[:alpha:]]+/B,utf,ucp
1263    abc
1264
1265/[[:^alpha:]\S]+/B,utf,ucp
1266    123
1267    abc
1268
1269/[^\d]+/B,utf,ucp
1270    abc123
1271    abc\x{123}
1272    \x{660}abc
1273
1274/\p{Lu}+9\p{Lu}+B\p{Lu}+b/B
1275
1276/\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/B
1277
1278/\P{Lu}+9\P{Lu}+B\P{Lu}+b/B
1279
1280/\p{Han}+X\p{Greek}+\x{370}/B,utf
1281
1282/\p{Xan}+!\p{Xan}+A/B
1283
1284/\p{Xsp}+!\p{Xsp}\t/B
1285
1286/\p{Xps}+!\p{Xps}\t/B
1287
1288/\p{Xwd}+!\p{Xwd}_/B
1289
1290/A+\p{N}A+\dB+\p{N}*B+\d*/B,ucp
1291
1292# These behaved oddly in Perl, so they are kept in this test
1293
1294/(\x{23a}\x{23a}\x{23a})?\1/i,utf
1295\= Expect no match
1296    \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
1297
1298/(ȺȺȺ)?\1/i,utf
1299\= Expect no match
1300    ȺȺȺⱥⱥ
1301
1302/(\x{23a}\x{23a}\x{23a})?\1/i,utf
1303    \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
1304
1305/(ȺȺȺ)?\1/i,utf
1306    ȺȺȺⱥⱥⱥ
1307
1308/(\x{23a}\x{23a}\x{23a})\1/i,utf
1309\= Expect no match
1310    \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
1311
1312/(ȺȺȺ)\1/i,utf
1313\= Expect no match
1314    ȺȺȺⱥⱥ
1315
1316/(\x{23a}\x{23a}\x{23a})\1/i,utf
1317    \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
1318
1319/(ȺȺȺ)\1/i,utf
1320    ȺȺȺⱥⱥⱥ
1321
1322/(\x{2c65}\x{2c65})\1/i,utf
1323    \x{2c65}\x{2c65}\x{23a}\x{23a}
1324
1325/(ⱥⱥ)\1/i,utf
1326    ⱥⱥȺȺ
1327
1328/(\x{23a}\x{23a}\x{23a})\1Y/i,utf
1329    X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ
1330
1331/(\x{2c65}\x{2c65})\1Y/i,utf
1332    X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ
1333
1334# These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE
1335
1336/^[\p{Batak}]/utf
1337    \x{1bc0}
1338    \x{1bff}
1339\= Expect no match
1340    \x{1bf4}
1341
1342/^[\p{Brahmi}]/utf
1343    \x{11000}
1344    \x{1106f}
1345\= Expect no match
1346    \x{1104e}
1347
1348/^[\p{Mandaic}]/utf
1349    \x{840}
1350    \x{85e}
1351\= Expect no match
1352    \x{85c}
1353    \x{85d}
1354
1355/(\X*)(.)/s,utf
1356    A\x{300}
1357
1358/^S(\X*)e(\X*)$/utf
1359    Stéréo
1360
1361/^\X/utf
1362    ́réo
1363
1364/^a\X41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames
1365    aX41z
1366\= Expect no match
1367    aAz
1368
1369/\X/
1370    a\=ps
1371    a\=ph
1372
1373/\Xa/
1374    aa\=ps
1375    aa\=ph
1376
1377/\X{2}/
1378    aa\=ps
1379    aa\=ph
1380
1381/\X+a/
1382    a\=ps
1383    aa\=ps
1384    aa\=ph
1385
1386/\X+?a/
1387    a\=ps
1388    ab\=ps
1389    aa\=ps
1390    aa\=ph
1391    aba\=ps
1392
1393# These Unicode 6.1.0 scripts are not known to Perl.
1394
1395/\p{Chakma}\d/utf,ucp
1396    \x{11100}\x{1113c}
1397
1398/\p{Takri}\d/utf,ucp
1399    \x{11680}\x{116c0}
1400
1401/^\X/utf
1402    A\=ps
1403    A\=ph
1404    A\x{300}\x{301}\=ps
1405    A\x{300}\x{301}\=ph
1406    A\x{301}\=ps
1407    A\x{301}\=ph
1408
1409/^\X{2,3}/utf
1410    A\=ps
1411    A\=ph
1412    AA\=ps
1413    AA\=ph
1414    A\x{300}\x{301}\=ps
1415    A\x{300}\x{301}\=ph
1416    A\x{300}\x{301}A\x{300}\x{301}\=ps
1417    A\x{300}\x{301}A\x{300}\x{301}\=ph
1418
1419/^\X{2}/utf
1420    AA\=ps
1421    AA\=ph
1422    A\x{300}\x{301}A\x{300}\x{301}\=ps
1423    A\x{300}\x{301}A\x{300}\x{301}\=ph
1424
1425/^\X+/utf
1426    AA\=ps
1427    AA\=ph
1428
1429/^\X+?Z/utf
1430    AA\=ps
1431    AA\=ph
1432
1433/A\x{3a3}B/IBi,utf
1434
1435/[\x{3a3}]/Bi,utf
1436
1437/[^\x{3a3}]/Bi,utf
1438
1439/[\x{3a3}]+/Bi,utf
1440
1441/[^\x{3a3}]+/Bi,utf
1442
1443/a*\x{3a3}/Bi,utf
1444
1445/\x{3a3}+a/Bi,utf
1446
1447/\x{3a3}*\x{3c2}/Bi,utf
1448
1449/\x{3a3}{3}/i,utf,aftertext
1450    \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
1451
1452/\x{3a3}{2,4}/i,utf,aftertext
1453    \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
1454
1455/\x{3a3}{2,4}?/i,utf,aftertext
1456    \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
1457
1458/\x{3a3}+./i,utf,aftertext
1459    \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
1460
1461/\x{3a3}++./i,utf,aftertext
1462\= Expect no match
1463    \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
1464
1465/\x{3a3}*\x{3c2}/Bi,utf
1466
1467/[^\x{3a3}]*\x{3c2}/Bi,utf
1468
1469/[^a]*\x{3c2}/Bi,utf
1470
1471/ist/Bi,utf
1472\= Expect no match
1473    ikt
1474
1475/is+t/i,utf
1476    iSs\x{17f}t
1477\= Expect no match
1478    ikt
1479
1480/is+?t/i,utf
1481\= Expect no match
1482    ikt
1483
1484/is?t/i,utf
1485\= Expect no match
1486    ikt
1487
1488/is{2}t/i,utf
1489\= Expect no match
1490    iskt
1491
1492# This property is a PCRE special
1493
1494/^\p{Xuc}/utf
1495    $abc
1496    @abc
1497    `abc
1498    \x{1234}abc
1499\= Expect no match
1500    abc
1501
1502/^\p{Xuc}+/utf
1503    $@`\x{a0}\x{1234}\x{e000}**
1504\= Expect no match
1505    \x{9f}
1506
1507/^\p{Xuc}+?/utf
1508    $@`\x{a0}\x{1234}\x{e000}**
1509\= Expect no match
1510    \x{9f}
1511
1512/^\p{Xuc}+?\*/utf
1513    $@`\x{a0}\x{1234}\x{e000}**
1514\= Expect no match
1515    \x{9f}
1516
1517/^\p{Xuc}++/utf
1518    $@`\x{a0}\x{1234}\x{e000}**
1519\= Expect no match
1520    \x{9f}
1521
1522/^\p{Xuc}{3,5}/utf
1523    $@`\x{a0}\x{1234}\x{e000}**
1524\= Expect no match
1525    \x{9f}
1526
1527/^\p{Xuc}{3,5}?/utf
1528    $@`\x{a0}\x{1234}\x{e000}**
1529\= Expect no match
1530    \x{9f}
1531
1532/^[\p{Xuc}]/utf
1533    $@`\x{a0}\x{1234}\x{e000}**
1534\= Expect no match
1535    \x{9f}
1536
1537/^[\p{Xuc}]+/utf
1538    $@`\x{a0}\x{1234}\x{e000}**
1539\= Expect no match
1540    \x{9f}
1541
1542/^\P{Xuc}/utf
1543    abc
1544\= Expect no match
1545    $abc
1546    @abc
1547    `abc
1548    \x{1234}abc
1549
1550/^[\P{Xuc}]/utf
1551    abc
1552\= Expect no match
1553    $abc
1554    @abc
1555    `abc
1556    \x{1234}abc
1557
1558# Some auto-possessification tests
1559
1560/\pN+\z/B
1561
1562/\PN+\z/B
1563
1564/\pN+/B
1565
1566/\PN+/B
1567
1568/\p{Any}+\p{Any} \p{Any}+\P{Any} \p{Any}+\p{L&} \p{Any}+\p{L} \p{Any}+\p{Lu} \p{Any}+\p{Han} \p{Any}+\p{Xan} \p{Any}+\p{Xsp} \p{Any}+\p{Xps} \p{Xwd}+\p{Any} \p{Any}+\p{Xuc}/Bx,ucp
1569
1570/\p{L&}+\p{Any} \p{L&}+\p{L&} \P{L&}+\p{L&} \p{L&}+\p{L} \p{L&}+\p{Lu} \p{L&}+\p{Han} \p{L&}+\p{Xan} \p{L&}+\P{Xan} \p{L&}+\p{Xsp} \p{L&}+\p{Xps} \p{Xwd}+\p{L&} \p{L&}+\p{Xuc}/Bx,ucp
1571
1572/\p{N}+\p{Any} \p{N}+\p{L&} \p{N}+\p{L} \p{N}+\P{L} \p{N}+\P{N} \p{N}+\p{Lu} \p{N}+\p{Han} \p{N}+\p{Xan} \p{N}+\p{Xsp} \p{N}+\p{Xps} \p{Xwd}+\p{N} \p{N}+\p{Xuc}/Bx,ucp
1573
1574/\p{Lu}+\p{Any} \p{Lu}+\p{L&} \p{Lu}+\p{L} \p{Lu}+\p{Lu} \P{Lu}+\p{Lu} \p{Lu}+\p{Nd} \p{Lu}+\P{Nd} \p{Lu}+\p{Han} \p{Lu}+\p{Xan} \p{Lu}+\p{Xsp} \p{Lu}+\p{Xps} \p{Xwd}+\p{Lu} \p{Lu}+\p{Xuc}/Bx,ucp
1575
1576/\p{Han}+\p{Lu} \p{Han}+\p{L&} \p{Han}+\p{L} \p{Han}+\p{Lu} \p{Han}+\p{Arabic} \p{Arabic}+\p{Arabic} \p{Han}+\p{Xan} \p{Han}+\p{Xsp} \p{Han}+\p{Xps} \p{Xwd}+\p{Han} \p{Han}+\p{Xuc}/Bx,ucp
1577
1578/\p{Xan}+\p{Any} \p{Xan}+\p{L&} \P{Xan}+\p{L&} \p{Xan}+\p{L} \p{Xan}+\p{Lu} \p{Xan}+\p{Han} \p{Xan}+\p{Xan} \p{Xan}+\P{Xan} \p{Xan}+\p{Xsp} \p{Xan}+\p{Xps} \p{Xwd}+\p{Xan} \p{Xan}+\p{Xuc}/Bx,ucp
1579
1580/\p{Xsp}+\p{Any} \p{Xsp}+\p{L&} \p{Xsp}+\p{L} \p{Xsp}+\p{Lu} \p{Xsp}+\p{Han} \p{Xsp}+\p{Xan} \p{Xsp}+\p{Xsp} \P{Xsp}+\p{Xsp} \p{Xsp}+\p{Xps} \p{Xwd}+\p{Xsp} \p{Xsp}+\p{Xuc}/Bx,ucp
1581
1582/\p{Xwd}+\p{Any} \p{Xwd}+\p{L&} \p{Xwd}+\p{L} \p{Xwd}+\p{Lu} \p{Xwd}+\p{Han} \p{Xwd}+\p{Xan} \p{Xwd}+\p{Xsp} \p{Xwd}+\p{Xps} \p{Xwd}+\p{Xwd} \p{Xwd}+\P{Xwd} \p{Xwd}+\p{Xuc}/Bx,ucp
1583
1584/\p{Xuc}+\p{Any} \p{Xuc}+\p{L&} \p{Xuc}+\p{L} \p{Xuc}+\p{Lu} \p{Xuc}+\p{Han} \p{Xuc}+\p{Xan} \p{Xuc}+\p{Xsp} \p{Xuc}+\p{Xps} \p{Xwd}+\p{Xuc} \p{Xuc}+\p{Xuc} \p{Xuc}+\P{Xuc}/Bx,ucp
1585
1586/\p{N}+\p{Ll} \p{N}+\p{Nd} \p{N}+\P{Nd}/Bx,ucp
1587
1588/\p{Xan}+\p{L} \p{Xan}+\p{N} \p{Xan}+\p{C} \p{Xan}+\P{L} \P{Xan}+\p{N} \p{Xan}+\P{C}/Bx,ucp
1589
1590/\p{L}+\p{Xan} \p{N}+\p{Xan} \p{C}+\p{Xan} \P{L}+\p{Xan} \p{N}+\p{Xan} \P{C}+\p{Xan} \p{L}+\P{Xan}/Bx,ucp
1591
1592/\p{Xan}+\p{Lu} \p{Xan}+\p{Nd} \p{Xan}+\p{Cc} \p{Xan}+\P{Ll} \P{Xan}+\p{No} \p{Xan}+\P{Cf}/Bx,ucp
1593
1594/\p{Lu}+\p{Xan} \p{Nd}+\p{Xan} \p{Cs}+\p{Xan} \P{Lt}+\p{Xan} \p{Nl}+\p{Xan} \P{Cc}+\p{Xan} \p{Lt}+\P{Xan}/Bx,ucp
1595
1596/\w+\p{P} \w+\p{Po} \w+\s \p{Xan}+\s \s+\p{Xan} \s+\w/Bx,ucp
1597
1598/\w+\P{P} \W+\p{Po} \w+\S \P{Xan}+\s \s+\P{Xan} \s+\W/Bx,ucp
1599
1600/\w+\p{Po} \w+\p{Pc} \W+\p{Po} \W+\p{Pc} \w+\P{Po} \w+\P{Pc}/Bx,ucp
1601
1602/\p{Nl}+\p{Xan} \P{Nl}+\p{Xan} \p{Nl}+\P{Xan} \P{Nl}+\P{Xan}/Bx,ucp
1603
1604/\p{Xan}+\p{Nl} \P{Xan}+\p{Nl} \p{Xan}+\P{Nl} \P{Xan}+\P{Nl}/Bx,ucp
1605
1606/\p{Xan}+\p{Nd} \P{Xan}+\p{Nd} \p{Xan}+\P{Nd} \P{Xan}+\P{Nd}/Bx,ucp
1607
1608# End auto-possessification tests
1609
1610/\w+/B,utf,ucp,auto_callout
1611    abcd
1612
1613/[\p{N}]?+/B,no_auto_possess
1614
1615/[\p{L}ab]{2,3}+/B,no_auto_possess
1616
1617/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx
1618
1619/.+\X/Bsx
1620
1621/\X+$/Bmx
1622
1623/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx
1624
1625/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B,utf,ucp
1626
1627/[RST]+/Bi,utf,ucp
1628
1629/[R-T]+/Bi,utf,ucp
1630
1631/[Q-U]+/Bi,utf,ucp
1632
1633/^s?c/Iim,utf
1634    scat
1635
1636/\X?abc/utf,no_start_optimize
1637    \xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06
1638
1639/\x{100}\x{200}\K\x{300}/utf,startchar
1640    \x{100}\x{200}\x{300}
1641
1642# Test UTF characters in a substitution
1643
1644bc/utf,replace=XሴZ
1645    123ábc123
1646
1647/(?<=abc)(|def)/g,utf,replace=<$0>
1648    123abcáyzabcdef789abcሴqr
1649
1650/[A-`]/iB,utf
1651    abcdefghijklmno
1652
1653/(?<=\K\x{17f})/g,utf,aftertext
1654    \x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
1655
1656/(?<=\K\x{17f})/altglobal,utf,aftertext
1657    \x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
1658
1659"\xa\xf<(.\pZ*\P{Xwd}+^\xa8\3'3yq.::?(?J:()\xd1+!~:3'(8?:)':(?'d'(?'d'^u]!.+.+\\A\Ah(n+?9){7}+\K;(?'X'u'(?'c'(?'z'(?<y>\xb::\xf0'|\xd3(\xae?'w(z\x8?P>l)\x8?P>a)'\H\R\xd1+!!~:3'(?:h$N{26875}\W+?\\=D{2}\x89(?i:Uy0\N({2\xa(\v\x85*){y*\A(()\p{L}+?\P{^Xan}'+?\xff\+pS\?|).{;y*\A(()\p{L}+?\8}\d?1(|)(/1){7}.+[Lp{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(\xbf(R))\x8a\X*?\x8a\xb\xd1^9\3*+(\xc1,\k'R'\xb4)\xcc(z\z(?J)(?'X'\x1b(\xb\xd1^9\?'3*+P{^Xan}+?\xff\+(\xc1.]k+\xb'Pm'\xb4)\xcc4f\xa7'\xd1V(?i:U,{2,2})'(?'X'))?-%--\x95$9*\4'|\xd1(\x9c''%\x94$9)#(?'R')3\x7?('P\xed7'\xa8\xb1^u\xeaw\1\0\0\(|(?1){7}.+[\p{Me}].\s\xdcC*^\x14?(?(<y>))(?<!^)$C((;*?(R*?))+(?(R)\x8a\X*?\x8a\xb\xd1^9\3*+|(\xc1,\k'R'\xb4)\xcc! z)\z(?JJ)(?'X';(\xb\xd1^9\?'3*+(\xc1.]k+\xb'Pm'\xb4))':(?'d')(?'RD'(d')|)|$)'|(?<x>\g{d});\g{x}\x11\g{d}\x81\|$((?'X'\'X'(?'W''\x92()'9'\x83*))\xba*\!?^ <){)':;\xcc4'\xd1'(?'X'28))?-%--\x95$9*\4'|\xd1((''e\x94*$9:)*#(?'R')3)\x7?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+0!~:(?)'d'E:yD!\s(?'R'\x1e;\x10:U))|'\x9g!\xb0*){)\\x16:;()\x1e\x10\x87*:(?<y>)\xd1+!~:(?)'}'\d'E:yD!\s(?'R'\x1e;\x10:U))|'))|)g!\xb0*R+9{29+)#(?'P'})*?pS\{3,}\x85,{0,}l{*UTF)(\xe{7}){3722,{9,}d{2,?|))|{)\(A?&d}}{\xa,}2}){3,}7,l{)22}(,}l:7{2,4}}29\x19+)#?'P'})*v?))\x5"
1660
1661/$(&.+[\p{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(?(R)){0,6}?|){12\x8a\X*?\x8a\x0b\xd1^9\3*+(\xc1,\k'P'\xb4)\xcc(z\z(?JJ)(?'X'8};(\x0b\xd1^9\?'3*+(\xc1.]k+\x0b'Pm'\xb4\xcc4'\xd1'(?'X'))?-%--\x95$9*\4'|\xd1(''%\x95*$9)#(?'R')3\x07?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+!~:(?)''(d'E:yD!\s(?'R'\x1e;\x10:U))|')g!\xb0*){29+))#(?'P'})*?/
1662
1663"(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'"
1664
1665/[\pS#moq]/
1666    =
1667
1668/(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark
1669    cxxxz
1670
1671/abcd/utf,replace=x\x{824}y\o{3333}z(\Q12\$34$$\x34\E5$$),substitute_extended
1672    abcd
1673
1674/a(\x{e0}\x{101})(\x{c0}\x{102})/utf,replace=a\u$1\U$1\E$1\l$2\L$2\Eab\U\x{e0}\x{101}\L\x{d0}\x{160}\EDone,substitute_extended
1675    a\x{e0}\x{101}\x{c0}\x{102}
1676
1677/((?<digit>\d)|(?<letter>\p{L}))/g,substitute_extended,replace=<${digit:+digit; :not digit; }${letter:+letter:not a letter}>
1678    ab12cde
1679
1680/[\W\p{Any}]/B
1681    abc
1682    123
1683
1684/[\W\pL]/B
1685    abc
1686\= Expect no match
1687    123
1688
1689/(*UCP)(*UTF)[[:>:]]X/B
1690
1691/abc/utf,replace=xyz
1692    abc\=zero_terminate
1693
1694/a[[:punct:]b]/ucp,bincode
1695
1696/a[[:punct:]b]/utf,ucp,bincode
1697
1698/a[b[:punct:]]/utf,ucp,bincode
1699
1700/[[:^ascii:]]/utf,ucp,bincode
1701
1702/[[:^ascii:]\w]/utf,ucp,bincode
1703
1704/[\w[:^ascii:]]/utf,ucp,bincode
1705
1706/[^[:ascii:]\W]/utf,ucp,bincode
1707    \x{de}
1708    \x{200}
1709\= Expect no match
1710    \x{300}
1711    \x{37e}
1712
1713/[[:^ascii:]a]/utf,ucp,bincode
1714
1715/L(?#(|++<!(2)?/B,utf,no_auto_possess,auto_callout
1716
1717/L(?#(|++<!(2)?/B,utf,ucp,auto_callout
1718
1719/(*UTF)C\x09((?<!'(?x)!*H? #\xcc\x9a[^$]/
1720
1721# End of testinput5
1722