• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1<?xml version="1.0" encoding="UTF-8"?>
2
3<!-- Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html -->
4<!-- Copyright (c) 2007-2009 IBM Corporation and others. All rights reserved -->
5
6<!-- Test data file for string search  -->
7<!DOCTYPE stringsearch-tests [
8<!ELEMENT stringsearch-tests (test-case+)>
9<!ATTLIST stringsearch-tests debug IDREF #IMPLIED >
10<!ELEMENT test-case (pattern, pre?, m?, post?)>
11<!ATTLIST test-case
12          id ID #REQUIRED
13          locale CDATA "en"
14          strength (PRIMARY | SECONDARY | TERTIARY | QUATERNARY | IDENTICAL) "TERTIARY"
15          norm (ON | OFF) "OFF"
16          alternate_handling (NON_IGNORABLE | SHIFTED) "NON_IGNORABLE"
17          >
18
19<!ELEMENT pattern (#PCDATA)>
20<!ELEMENT pre  (#PCDATA)>
21<!ELEMENT m    (#PCDATA)>
22<!ELEMENT post (#PCDATA)>
23]>
24
25<stringsearch-tests>
26  <!-- debug="test11"     (for copying into the above element)  -->
27
28    <!-- Very simple match  -->
29    <test-case id="test01" >
30       <pattern>abc</pattern>
31       <pre>xxx</pre><m>abc</m><post>yyy</post>
32    </test-case>
33
34    <!-- Very simple no-match  -->
35    <test-case id="test02" >
36       <pattern>abc</pattern>
37       <pre>xxx</pre><post>yyy</post>
38    </test-case>
39
40    <!-- Match after several near-misses. -->
41    <test-case id="test03" >
42       <pattern>string</pattern>
43       <pre>silly spring stling strxng strilg strinx stri</pre><m>string</m><post> fling</post>
44    </test-case>
45
46    <test-case id="test04" strength="PRIMARY" >
47       <pattern>FUSS</pattern>
48       <pre>abc</pre><m>fuss</m><post>sss</post>
49    </test-case>
50
51    <test-case id="test05" strength="PRIMARY" >
52       <pattern>FUSS</pattern>
53       <pre>abc</pre><m>fuß</m><post>sss</post>
54    </test-case>
55
56  <test-case id="test05.5" strength="PRIMARY" >
57    <pattern>fuss</pattern>
58    <pre>a </pre>
59    <m>fuß</m>
60    <post>ball table</post>
61  </test-case>
62
63  <test-case id="test06" strength="PRIMARY" >
64      <pattern>fuß</pattern>
65       <pre>abc</pre><m>fuss</m><post>xyz</post>
66    </test-case>
67
68    <test-case id="test07" strength="SECONDARY" >
69      <pattern>fuß</pattern>
70      <pre>abcfussxyz</pre>
71    </test-case>
72
73    <test-case id="test08" strength="PRIMARY" >
74      <pattern>fus</pattern>
75      <pre>abcfuß</pre><post>xyz</post>
76    </test-case>
77
78    <!-- A good match following an initial match that failed because
79         of not ending on a character boundary -->
80    <test-case id="test09" strength="PRIMARY">
81      <pattern>fus</pattern>
82      <pre>fuß  </pre><m>fus</m><post>sss</post>
83    </test-case>
84
85
86    <!-- Test cases from usrchdat.c  BREAKITERATOREXACT -->
87
88    <test-case id="test10" strength="TERTIARY">
89      <pattern>fox</pattern>
90      <m>fox</m><post>y fox</post>
91    </test-case>
92
93    <test-case id="test11" strength="PRIMARY" locale="de_DE@collation=phonebook">
94      <pattern>toe</pattern>
95      <pre>This is a </pre><m>Tö</m><post>ne</post>
96    </test-case>
97
98    <test-case id="test11a" strength="SECONDARY" locale="de_DE@collation=phonebook">
99      <pattern>toe</pattern>
100      <pre>This is a </pre><post>Töne</post>
101    </test-case>
102
103    <test-case id="test12" strength="TERTIARY">
104      <pattern>e</pattern>
105      <pre>tésting that é doés not match </pre><m>e</m><post></post>
106    </test-case>
107
108    <test-case id="test13" strength="PRIMARY" locale="fr">
109      <pattern>e</pattern>
110      <pre></pre><m>É</m><post>É</post>
111    </test-case>
112
113    <test-case id="test14" strength="PRIMARY" locale="fr">
114      <pattern>O</pattern>
115      <pre>C</pre><m>O\u0302</m><post>TÉ</post>
116    </test-case>
117
118
119    <!-- Test cases from usrchdat.c  STRENGTH -->
120
121
122    <test-case id="test15" strength="PRIMARY" locale="en">
123      <pattern>fox</pattern>
124      <pre>The quick brown </pre><m>fox</m><post> jumps over the lazy foxes</post>
125    </test-case>
126
127    <test-case id="test16" strength="PRIMARY" locale="fr">
128      <pattern>peche</pattern>
129      <pre>blackbirds pat </pre><m>p\u00E9ch\u00E9</m><post> </post>
130    </test-case>
131
132    <test-case id="test17" strength="PRIMARY" locale="fr">
133      <pattern>peche</pattern>
134      <pre>blackbirds pat </pre><m>p\u00EAche</m><post> </post>
135    </test-case>
136
137    <test-case id="test18" strength="PRIMARY" locale="fr">
138      <pattern>peche</pattern>
139      <pre>blackbirds pat </pre><m>p\u00E9che</m><post>r </post>
140    </test-case>
141
142    <test-case id="test19" strength="PRIMARY" locale="fr">
143      <pattern>peche</pattern>
144      <pre>blackbirds pat </pre><m>p\u00EAche</m><post>r </post>
145    </test-case>
146
147    <test-case id="test20" strength="PRIMARY" locale="es">
148      <pattern>channel</pattern>
149      <pre>A </pre><m>channel</m><post>, </post>
150    </test-case>
151
152    <test-case id="test21" strength="PRIMARY" locale="es">
153      <pattern>channel</pattern>
154      <pre>A </pre><m>CHANNEL</m><post>, </post>
155    </test-case>
156
157    <test-case id="test22" strength="PRIMARY" locale="es">
158      <pattern>channel</pattern>
159      <pre>A </pre><m>Channel</m><post>s, </post>
160    </test-case>
161
162    <test-case id="test23" strength="PRIMARY" locale="es">
163      <pattern>channel</pattern>
164      <pre>A </pre><m>channel</m><post>... </post>
165    </test-case>
166
167    <test-case id="test24" strength="TERTIARY" locale="en">
168      <pattern>A\u0300</pattern>
169      <pre>A miss, and then </pre><m>\u00c0</m><post> should match but not A"</post>
170    </test-case>
171
172    <!-- TODO:  In the original test data, this test matched at IDENTICAL strength.
173                Doesn't seem right.  The characters are different.
174                -->
175    <test-case id="test24a" strength="IDENTICAL" locale="en">
176      <pattern>A\u0300</pattern>
177      <pre>At IDENTICAL, shoud this match?  </pre><m>\u00c0</m><post></post>
178    </test-case>
179
180  <test-case id="test24b" strength="IDENTICAL" alternate_handling="SHIFTED" locale="en">
181    <pattern>A\u0300</pattern>
182    <pre>At IDENTICAL, shoud this match?  </pre>
183    <m>\u00c0</m>
184    <post></post>
185  </test-case>
186
187  <test-case id="test25" strength="SECONDARY" locale="en">
188      <pattern>Ű</pattern>
189      <pre>12</pre><m>ű</m><post> Ű</post>
190    </test-case>
191
192    <test-case id="test26" strength="SECONDARY" locale="en">
193      <pattern>A</pattern>
194      <pre>12</pre><m>a</m><post>...</post>
195    </test-case>
196
197
198    <!--  Test Cases from usrchdat.c,  VARIABLE -->
199    <test-case id="test27" strength="TERTIARY" locale="en">
200      <pattern>blackbird</pattern>
201      <pre>black-bird </pre><m>blackbird</m><post>...</post>
202    </test-case>
203
204    <test-case id="test28" strength="TERTIARY" locale="en">
205      <pattern>go</pattern>
206      <pre> on</pre>
207    </test-case>
208
209    <!-- TODO:  this gives an U_ILLEGAL_ARGUMENT error when opening
210                the UStringSearch.  How did the orignal test run? -->
211    <!--
212    <test-case id="test29" strength="PRIMARY" locale="en">
213      <pattern>  </pattern>
214      <pre></pre><m></m><post>abc</post>
215    </test-case>
216    -->
217
218    <test-case id="test30" strength="SECONDARY" locale="en">
219      <pattern>abc</pattern>
220      <pre>  a bc   ab c    a  bc     ab  c"</pre>
221    </test-case>
222
223    <test-case id="test31" strength="SECONDARY" locale="en">
224      <pattern>abc</pattern>
225      <pre>           ---------------</pre>
226    </test-case>
227
228
229    <!--  Normalization test cases from usrchdat.c  -->
230    <test-case id="test32" strength="TERTIARY"  norm="ON">
231      <pattern>a\u0325\u0300</pattern>
232      <pre></pre><m>a\u0300\u0325</m>
233    </test-case>
234
235
236    <test-case id="test32a" strength="TERTIARY"  norm="OFF">
237      <pattern>a\u0325\u0300</pattern>
238      <pre>a\u0300\u0325</pre>
239    </test-case>
240
241
242    <!-- COMPOSITEBOUNDARIES from usrchdat.c
243         Boundaries are not identical to orignal test data because
244         of matching only full combining sequences
245    -->
246    <test-case id="test40" strength="TERTIARY">
247      <pattern>A</pattern>
248      <pre>À</pre>   <!-- \u00C0 -->
249    </test-case>
250
251    <test-case id="test41" strength="TERTIARY">
252      <pattern>A</pattern>
253      <pre>À</pre><m>A</m><post>C</post>
254    </test-case>
255
256    <test-case id="test42" strength="TERTIARY">
257      <pattern>A\u030A</pattern>
258      <pre>À\u01FA</pre>
259    </test-case>
260
261
262
263    <!-- SUPPLEMENTARYCANONICAL from usrchdat.c  -->
264    <test-case id="test50" strength="TERTIARY">
265      <pattern>\uD800\uDC00</pattern>
266      <pre>abc \uD802\uDC00 \uD800\uDC01 \uD801\uDC00 </pre><m>\uD800\uDC00</m>
267      <post>abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00</post>
268    </test-case>
269
270    <test-case id="test51" strength="TERTIARY">
271      <pattern>\\uD834\\uDDB9</pattern>
272      <pre>and</pre><m>\\uD834\\uDDB9</m><post>this sentence</post>
273    </test-case>
274
275    <test-case id="test52" strength="TERTIARY">
276      <pattern> \\uD834\\uDDB9 </pattern>
277      <pre>and</pre><m> \\uD834\\uDDB9 </m><post>this sentence</post>
278    </test-case>
279
280    <test-case id="test53" strength="TERTIARY">
281      <pattern>-\\uD834\\uDDB9-</pattern>
282      <pre>and</pre><m>-\\uD834\\uDDB9-</m><post>this sentence</post>
283    </test-case>
284
285    <test-case id="test54" strength="TERTIARY">
286      <pattern>,\\uD834\\uDDB9,</pattern>
287      <pre>and</pre><m>,\\uD834\\uDDB9,</m><post>this sentence</post>
288    </test-case>
289
290    <test-case id="test55" strength="TERTIARY">
291      <pattern>?\\uD834\\uDDB9?</pattern>
292      <pre>and</pre><m>?\\uD834\\uDDB9?</m><post>this sentence</post>
293    </test-case>
294
295
296    <!-- Long combining sequences  -->
297    <!-- Backwards search fails because patterns ends w/ ignorables
298    <test-case id="test60" strength="PRIMARY">
299      <pattern>A\u0301\u0301\u0301\u0301</pattern>
300      <m>A\u0301\u0301\u0301\u0301\u0301</m>
301    </test-case>
302    -->
303
304    <test-case id="test61" strength="TERTIARY">
305      <pattern>A\u0301\u0301\u0301\u0301</pattern>
306          <pre>A\u0301\u0301\u0301\u0301\u0301</pre>
307    </test-case>
308
309    <test-case id="test62" strength="TERTIARY">
310      <pattern>A\u0301\u0301\u0301\u0301</pattern>
311            <m>A\u0301\u0301\u0301\u0301</m>
312    </test-case>
313
314    <!-- stand-alone combining marks don't match attached marks  -->
315    <test-case id="test63" strength="TERTIARY">
316      <pattern>\u0301</pattern>
317      <pre>A\u0301\u0301\u0301\u0301</pre>
318    </test-case>
319
320    <test-case id="test64" strength="TERTIARY">
321      <pattern>\u0301</pattern>
322      <post>\u0301\u0301\u0301\u0301</post>
323    </test-case>
324
325  <!-- stand-alone combining mark does match an un-attached combining mark -->
326    <test-case id="test65" strength="TERTIARY">
327       <pattern>\u0301</pattern>
328       <m>\u0301</m><post>A\u0301\u0301</post>
329    </test-case>
330
331    <test-case id="test66" strength="TERTIARY">
332       <pattern>\u0301</pattern>
333       <m>\u0301</m>
334    </test-case>
335
336    <!-- stand-alone combining marks at end of the target text -->
337    <test-case id="test67" strength="TERTIARY">
338       <pattern>\u0301</pattern>
339       <pre>abcd\r</pre><m>\u0301</m>
340    </test-case>
341
342      <!-- attached combining marks at end of the target text, no match -->
343    <test-case id="test68" strength="TERTIARY">
344       <pattern>\u0301</pattern>
345       <pre>abcd\u0301</pre>
346    </test-case>
347
348
349
350   <!-- no match within expansions at the start -->
351    <test-case id="test70" strength="PRIMARY">
352      <pattern>Eligature</pattern>
353      <pre>Æligature</pre>
354    </test-case>
355
356    <test-case id="test71" strength="PRIMARY">
357      <pattern>AEligature</pattern>
358      <m>Æligature</m>
359    </test-case>
360
361    <test-case id="test72" strength="PRIMARY">
362        <pattern>AEligature</pattern>
363        <m>Æligature</m>
364    </test-case>
365
366    <!-- unattached combining Tilde will not match a Tilde that is
367         part of a composed Ñ  (\u00D1)  -->
368    <test-case id="test73" strength="SECONDARY">
369        <pattern>\u0303</pattern>  <!-- combining tilde -->
370        <pre>Ñ&#x0d;</pre><m>\u0303</m>
371    </test-case>
372
373    <test-case id="test74" strength="SECONDARY">
374        <pattern>\u0303</pattern>  <!-- combining tilde -->
375        <pre>Ñ &#x0d;</pre><m>\u0303</m><post>a</post>
376    </test-case>
377
378  <test-case id="test75" strength="TERTIARY" locale="fr">
379    <pattern>\u00EA</pattern>
380    <pre>p</pre><m>\u00EA</m><post>che</post>
381  </test-case>
382
383  <test-case id="test76" strength="TERTIARY" locale="fr">
384    <pattern>\u00EA</pattern>
385    <pre>p</pre><m>e\u0302</m><post>che</post>
386  </test-case>
387
388  <test-case id="test77" strength="TERTIARY" locale="fr">
389    <pattern>e\u0302</pattern>
390    <pre>p</pre><m>\u00EA</m><post>che</post>
391  </test-case>
392
393  <!-- Test cases from ticket:5382 -->
394  <test-case id="test78" strength="SECONDARY" locale="hu_HU">
395    <pattern>\u0170</pattern>
396    <m>\u0171</m>
397    <post>12</post>
398  </test-case>
399
400  <test-case id="test79" strength="SECONDARY" locale="hu_HU">
401    <pattern>\u0170</pattern>
402    <pre>1</pre>
403    <m>\u0171</m>
404    <post>2</post>
405  </test-case>
406
407  <test-case id="test80" strength="SECONDARY" locale="hu_HU">
408    <pattern>\u0170</pattern>
409    <pre>12</pre>
410    <m>\u0171</m>
411  </test-case>
412
413  <!-- Test cases from ticket:5959 -->
414  <test-case id="test81" strength="SECONDARY">
415    <pattern>\u2166</pattern>
416    <m>VII</m>
417  </test-case>
418
419  <test-case id="test82" strength="SECONDARY">
420    <pattern>VII</pattern>
421    <m>\u2166</m>
422  </test-case>
423
424  <test-case id="test83" strength="IDENTICAL" alternate_handling="SHIFTED" locale="en">
425    <pattern>Universal Declaration of Human Rights</pattern>
426    <pre>Proclaims this </pre><m>Universal Declaration of Human Rights</m><post> as a common standard of achievement for all peoples and all nations</post>
427  </test-case>
428
429  <test-case id="test83b" strength="TERTIARY" alternate_handling="SHIFTED" locale="en">
430    <pattern>Universal Declaration of Human Rights</pattern>
431    <pre>Proclaims this </pre>
432    <m>Universal-Declaration-of-Human-Rights</m>
433    <post> as a common standard of achievement for all peoples and all nations</post>
434  </test-case>
435
436  <test-case id="test84" strength="TERTIARY" locale="en">
437    <pattern>\u05E9\u0591\u05E9</pattern>
438    <m>\u05E9\u0592\u05E9</m>
439  </test-case>
440
441  <test-case id="test84b" strength="IDENTICAL" locale="en">
442    <pattern>\u05E9\u0591\u05E9</pattern>
443    <pre>\u05E9\u0592\u05E9</pre>
444  </test-case>
445</stringsearch-tests>
446
447