• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# This set of tests is for UTF-8 support and Unicode property support, with
2# relevance only for the 8-bit library.
3
4# The next 4 patterns have UTF-8 errors
5
6/[�]/utf
7
8/�/utf
9
10/���xxx/utf
11
12/��������/utf
13
14# Now test subjects
15
16/badutf/utf
17\= Expect UTF-8 errors
18    X\xdf
19    XX\xef
20    XXX\xef\x80
21    X\xf7
22    XX\xf7\x80
23    XXX\xf7\x80\x80
24    \xfb
25    \xfb\x80
26    \xfb\x80\x80
27    \xfb\x80\x80\x80
28    \xfd
29    \xfd\x80
30    \xfd\x80\x80
31    \xfd\x80\x80\x80
32    \xfd\x80\x80\x80\x80
33    \xdf\x7f
34    \xef\x7f\x80
35    \xef\x80\x7f
36    \xf7\x7f\x80\x80
37    \xf7\x80\x7f\x80
38    \xf7\x80\x80\x7f
39    \xfb\x7f\x80\x80\x80
40    \xfb\x80\x7f\x80\x80
41    \xfb\x80\x80\x7f\x80
42    \xfb\x80\x80\x80\x7f
43    \xfd\x7f\x80\x80\x80\x80
44    \xfd\x80\x7f\x80\x80\x80
45    \xfd\x80\x80\x7f\x80\x80
46    \xfd\x80\x80\x80\x7f\x80
47    \xfd\x80\x80\x80\x80\x7f
48    \xed\xa0\x80
49    \xc0\x8f
50    \xe0\x80\x8f
51    \xf0\x80\x80\x8f
52    \xf8\x80\x80\x80\x8f
53    \xfc\x80\x80\x80\x80\x8f
54    \x80
55    \xfe
56    \xff
57
58/badutf/utf
59\= Expect UTF-8 errors
60    XX\xfb\x80\x80\x80\x80
61    XX\xfd\x80\x80\x80\x80\x80
62    XX\xf7\xbf\xbf\xbf
63
64/shortutf/utf
65\= Expect UTF-8 errors
66    XX\xdf\=ph
67    XX\xef\=ph
68    XX\xef\x80\=ph
69    \xf7\=ph
70    \xf7\x80\=ph
71    \xf7\x80\x80\=ph
72    \xfb\=ph
73    \xfb\x80\=ph
74    \xfb\x80\x80\=ph
75    \xfb\x80\x80\x80\=ph
76    \xfd\=ph
77    \xfd\x80\=ph
78    \xfd\x80\x80\=ph
79    \xfd\x80\x80\x80\=ph
80    \xfd\x80\x80\x80\x80\=ph
81
82/anything/utf
83\= Expect UTF-8 errors
84    X\xc0\x80
85    XX\xc1\x8f
86    XXX\xe0\x9f\x80
87    \xf0\x8f\x80\x80
88    \xf8\x87\x80\x80\x80
89    \xfc\x83\x80\x80\x80\x80
90    \xfe\x80\x80\x80\x80\x80
91    \xff\x80\x80\x80\x80\x80
92    \xf8\x88\x80\x80\x80
93    \xf9\x87\x80\x80\x80
94    \xfc\x84\x80\x80\x80\x80
95    \xfd\x83\x80\x80\x80\x80
96\= Expect no match
97    \xc3\x8f
98    \xe0\xaf\x80
99    \xe1\x80\x80
100    \xf0\x9f\x80\x80
101    \xf1\x8f\x80\x80
102    \xf8\x88\x80\x80\x80\=no_utf_check
103    \xf9\x87\x80\x80\x80\=no_utf_check
104    \xfc\x84\x80\x80\x80\x80\=no_utf_check
105    \xfd\x83\x80\x80\x80\x80\=no_utf_check
106
107# Similar tests with offsets
108
109/badutf/utf
110\= Expect UTF-8 errors
111    X\xdfabcd
112    X\xdfabcd\=offset=1
113\= Expect no match
114    X\xdfabcd\=offset=2
115
116/(?<=x)badutf/utf
117\= Expect UTF-8 errors
118    X\xdfabcd
119    X\xdfabcd\=offset=1
120    X\xdfabcd\=offset=2
121    X\xdfabcd\xdf\=offset=3
122\= Expect no match
123    X\xdfabcd\=offset=3
124
125/(?<=xx)badutf/utf
126\= Expect UTF-8 errors
127    X\xdfabcd
128    X\xdfabcd\=offset=1
129    X\xdfabcd\=offset=2
130    X\xdfabcd\=offset=3
131
132/(?<=xxxx)badutf/utf
133\= Expect UTF-8 errors
134    X\xdfabcd
135    X\xdfabcd\=offset=1
136    X\xdfabcd\=offset=2
137    X\xdfabcd\=offset=3
138    X\xdfabc\xdf\=offset=6
139    X\xdfabc\xdf\=offset=7
140\= Expect no match
141    X\xdfabcd\=offset=6
142
143/\x{100}/IB,utf
144
145/\x{1000}/IB,utf
146
147/\x{10000}/IB,utf
148
149/\x{100000}/IB,utf
150
151/\x{10ffff}/IB,utf
152
153/[\x{ff}]/IB,utf
154
155/[\x{100}]/IB,utf
156
157/\x80/IB,utf
158
159/\xff/IB,utf
160
161/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
162    \x{D55c}\x{ad6d}\x{C5B4}
163
164/\x{65e5}\x{672c}\x{8a9e}/IB,utf
165    \x{65e5}\x{672c}\x{8a9e}
166
167/\x{80}/IB,utf
168
169/\x{084}/IB,utf
170
171/\x{104}/IB,utf
172
173/\x{861}/IB,utf
174
175/\x{212ab}/IB,utf
176
177/[^ab\xC0-\xF0]/IB,utf
178    \x{f1}
179    \x{bf}
180    \x{100}
181    \x{1000}
182\= Expect no match
183    \x{c0}
184    \x{f0}
185
186/Ā{3,4}/IB,utf
187  \x{100}\x{100}\x{100}\x{100\x{100}
188
189/(\x{100}+|x)/IB,utf
190
191/(\x{100}*a|x)/IB,utf
192
193/(\x{100}{0,2}a|x)/IB,utf
194
195/(\x{100}{1,2}a|x)/IB,utf
196
197/\x{100}/IB,utf
198
199/a\x{100}\x{101}*/IB,utf
200
201/a\x{100}\x{101}+/IB,utf
202
203/[^\x{c4}]/IB
204
205/[\x{100}]/IB,utf
206    \x{100}
207    Z\x{100}
208    \x{100}Z
209
210/[\xff]/IB,utf
211    >\x{ff}<
212
213/[^\xff]/IB,utf
214
215/\x{100}abc(xyz(?1))/IB,utf
216
217/\777/I,utf
218  \x{1ff}
219  \777
220
221/\x{100}+\x{200}/IB,utf
222
223/\x{100}+X/IB,utf
224
225/^[\QĀ\E-\QŐ\E/B,utf
226
227# This tests the stricter UTF-8 check according to RFC 3629.
228
229/X/utf
230\= Expect UTF-8 errors
231    \x{d800}
232    \x{da00}
233    \x{dfff}
234    \x{110000}
235    \x{2000000}
236    \x{7fffffff}
237\= Expect no match
238    \x{d800}\=no_utf_check
239    \x{da00}\=no_utf_check
240    \x{dfff}\=no_utf_check
241    \x{110000}\=no_utf_check
242    \x{2000000}\=no_utf_check
243    \x{7fffffff}\=no_utf_check
244
245/(*UTF8)\x{1234}/
246    abcd\x{1234}pqr
247
248/(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
249
250/\h/I,utf
251    ABC\x{09}
252    ABC\x{20}
253    ABC\x{a0}
254    ABC\x{1680}
255    ABC\x{180e}
256    ABC\x{2000}
257    ABC\x{202f}
258    ABC\x{205f}
259    ABC\x{3000}
260
261/\v/I,utf
262    ABC\x{0a}
263    ABC\x{0b}
264    ABC\x{0c}
265    ABC\x{0d}
266    ABC\x{85}
267    ABC\x{2028}
268
269/\h*A/I,utf
270    CDBABC
271
272/\v+A/I,utf
273
274/\s?xxx\s/I,utf
275
276/\sxxx\s/I,utf,tables=2
277    AB\x{85}xxx\x{a0}XYZ
278    AB\x{a0}xxx\x{85}XYZ
279
280/\S \S/I,utf,tables=2
281    \x{a2} \x{84}
282    A Z
283
284/a+/utf
285    a\x{123}aa\=offset=1
286    a\x{123}aa\=offset=3
287    a\x{123}aa\=offset=4
288\= Expect bad offset value
289    a\x{123}aa\=offset=6
290\= Expect bad UTF-8 offset
291    a\x{123}aa\=offset=2
292\= Expect no match
293    a\x{123}aa\=offset=5
294
295/\x{1234}+/Ii,utf
296
297/\x{1234}+?/Ii,utf
298
299/\x{1234}++/Ii,utf
300
301/\x{1234}{2}/Ii,utf
302
303/[^\x{c4}]/IB,utf
304
305/X+\x{200}/IB,utf
306
307/\R/I,utf
308
309/\777/IB,utf
310
311/\w+\x{C4}/B,utf
312    a\x{C4}\x{C4}
313
314/\w+\x{C4}/B,utf,tables=2
315    a\x{C4}\x{C4}
316
317/\W+\x{C4}/B,utf
318    !\x{C4}
319
320/\W+\x{C4}/B,utf,tables=2
321    !\x{C4}
322
323/\W+\x{A1}/B,utf
324    !\x{A1}
325
326/\W+\x{A1}/B,utf,tables=2
327    !\x{A1}
328
329/X\s+\x{A0}/B,utf
330    X\x20\x{A0}\x{A0}
331
332/X\s+\x{A0}/B,utf,tables=2
333    X\x20\x{A0}\x{A0}
334
335/\S+\x{A0}/B,utf
336    X\x{A0}\x{A0}
337
338/\S+\x{A0}/B,utf,tables=2
339    X\x{A0}\x{A0}
340
341/\x{a0}+\s!/B,utf
342    \x{a0}\x20!
343
344/\x{a0}+\s!/B,utf,tables=2
345    \x{a0}\x20!
346
347/A/utf
348  \x{ff000041}
349  \x{7f000041}
350
351/(*UTF8)abc/never_utf
352
353/abc/utf,never_utf
354
355/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
356
357/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
358
359/AB\x{1fb0}/IB,utf
360
361/AB\x{1fb0}/IBi,utf
362
363/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
364    \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
365    \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
366
367/[ⱥ]/Bi,utf
368
369/[^ⱥ]/Bi,utf
370
371/\h/I
372
373/\v/I
374
375/\R/I
376
377/[[:blank:]]/B,ucp
378
379/\x{212a}+/Ii,utf
380    KKkk\x{212a}
381
382/s+/Ii,utf
383    SSss\x{17f}
384
385/\x{100}*A/IB,utf
386    A
387
388/\x{100}*\d(?R)/IB,utf
389
390/[Z\x{100}]/IB,utf
391    Z\x{100}
392    \x{100}
393    \x{100}Z
394
395/[z-\x{100}]/IB,utf
396
397/[z\Qa-d]Ā\E]/IB,utf
398    \x{100}
399    Ā
400
401/[ab\x{100}]abc(xyz(?1))/IB,utf
402
403/\x{100}*\s/IB,utf
404
405/\x{100}*\d/IB,utf
406
407/\x{100}*\w/IB,utf
408
409/\x{100}*\D/IB,utf
410
411/\x{100}*\S/IB,utf
412
413/\x{100}*\W/IB,utf
414
415/[\x{105}-\x{109}]/IBi,utf
416    \x{104}
417    \x{105}
418    \x{109}
419\= Expect no match
420    \x{100}
421    \x{10a}
422
423/[z-\x{100}]/IBi,utf
424    Z
425    z
426    \x{39c}
427    \x{178}
428    |
429    \x{80}
430    \x{ff}
431    \x{100}
432    \x{101}
433\= Expect no match
434    \x{102}
435    Y
436    y
437
438/[z-\x{100}]/IBi,utf
439
440/\x{3a3}B/IBi,utf
441
442/abc/utf,replace=�
443    abc
444
445/(?<=(a)(?-1))x/I,utf
446    a\x80zx\=offset=3
447
448/[\W\p{Any}]/B
449    abc
450    123
451
452/[\W\pL]/B
453    abc
454\= Expect no match
455    123
456
457/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/utf
458
459/[\s[:^ascii:]]/B,ucp
460
461# A special extra option allows excaped surrogate code points in 8-bit mode,
462# but subjects containing them must not be UTF-checked.
463
464/\x{d800}/I,utf,allow_surrogate_escapes
465    \x{d800}\=no_utf_check
466
467/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
468    \x{dfff}\x{df01}\=no_utf_check
469
470# This has different starting code units in 8-bit mode.
471
472/^[^ab]/IB,utf
473    c
474    \x{ff}
475    \x{100}
476\= Expect no match
477    aaa
478
479# End of testinput10
480