1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2009-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: bidiconf.cpp
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2009oct16
16 * created by: Markus W. Scherer
17 *
18 * BiDi conformance test, using the Unicode BidiTest.txt and BidiCharacterTest.txt files.
19 */
20
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include "unicode/utypes.h"
25 #include "unicode/ubidi.h"
26 #include "unicode/errorcode.h"
27 #include "unicode/localpointer.h"
28 #include "unicode/putil.h"
29 #include "unicode/unistr.h"
30 #include "intltest.h"
31 #include "uparse.h"
32
33 class BiDiConformanceTest : public IntlTest {
34 public:
BiDiConformanceTest()35 BiDiConformanceTest() :
36 directionBits(0), lineNumber(0), levelsCount(0), orderingCount(0),
37 errorCount(0) {}
38
39 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL) override;
40
41 void TestBidiTest();
42 void TestBidiCharacterTest();
43 private:
44 UBool parseLevels(const char *&start);
45 UBool parseOrdering(const char *start);
46 UBool parseInputStringFromBiDiClasses(const char *&start);
47
48 UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount);
49 UBool checkOrdering(UBiDi *ubidi);
50
51 void printErrorLine();
52
53 char line[10000];
54 UBiDiLevel levels[1000];
55 uint32_t directionBits;
56 int32_t ordering[1000];
57 int32_t lineNumber;
58 int32_t levelsCount;
59 int32_t orderingCount;
60 int32_t errorCount;
61 UnicodeString inputString;
62 const char *paraLevelName;
63 char levelNameString[12];
64 };
65
createBiDiConformanceTest()66 extern IntlTest *createBiDiConformanceTest() {
67 return new BiDiConformanceTest();
68 }
69
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)70 void BiDiConformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
71 if(exec) {
72 logln("TestSuite BiDiConformanceTest: ");
73 }
74 TESTCASE_AUTO_BEGIN;
75 TESTCASE_AUTO(TestBidiTest);
76 TESTCASE_AUTO(TestBidiCharacterTest);
77 TESTCASE_AUTO_END;
78 }
79
80 U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
81
parseLevels(const char * & start)82 UBool BiDiConformanceTest::parseLevels(const char *&start) {
83 directionBits=0;
84 levelsCount=0;
85 while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
86 if(*start=='x') {
87 levels[levelsCount++]=UBIDI_DEFAULT_LTR;
88 ++start;
89 } else {
90 char *end;
91 uint32_t value=(uint32_t)strtoul(start, &end, 10);
92 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0 && *end!=';')
93 || value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) {
94 errln("\nError on line %d: Levels parse error at %s", (int)lineNumber, start);
95 printErrorLine();
96 return false;
97 }
98 levels[levelsCount++]=(UBiDiLevel)value;
99 directionBits|=(1<<(value&1));
100 start=end;
101 }
102 }
103 return true;
104 }
105
parseOrdering(const char * start)106 UBool BiDiConformanceTest::parseOrdering(const char *start) {
107 orderingCount=0;
108 while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
109 char *end;
110 uint32_t value=(uint32_t)strtoul(start, &end, 10);
111 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0 && *end!=';') || value>=1000) {
112 errln("\nError on line %d: Reorder parse error at %s", (int)lineNumber, start);
113 printErrorLine();
114 return false;
115 }
116 ordering[orderingCount++]=(int32_t)value;
117 start=end;
118 }
119 return true;
120 }
121
122 static const UChar charFromBiDiClass[U_CHAR_DIRECTION_COUNT]={
123 0x6c, // 'l' for L
124 0x52, // 'R' for R
125 0x33, // '3' for EN
126 0x2d, // '-' for ES
127 0x25, // '%' for ET
128 0x39, // '9' for AN
129 0x2c, // ',' for CS
130 0x2f, // '/' for B
131 0x5f, // '_' for S
132 0x20, // ' ' for WS
133 0x3d, // '=' for ON
134 0x65, // 'e' for LRE
135 0x6f, // 'o' for LRO
136 0x41, // 'A' for AL
137 0x45, // 'E' for RLE
138 0x4f, // 'O' for RLO
139 0x2a, // '*' for PDF
140 0x60, // '`' for NSM
141 0x7c, // '|' for BN
142 // new in Unicode 6.3/ICU 52
143 0x53, // 'S' for FSI
144 0x69, // 'i' for LRI
145 0x49, // 'I' for RLI
146 0x2e // '.' for PDI
147 };
148
149 U_CDECL_BEGIN
150
151 static UCharDirection U_CALLCONV
biDiConfUBiDiClassCallback(const void *,UChar32 c)152 biDiConfUBiDiClassCallback(const void * /*context*/, UChar32 c) {
153 for(int i=0; i<U_CHAR_DIRECTION_COUNT; ++i) {
154 if(c==charFromBiDiClass[i]) {
155 return (UCharDirection)i;
156 }
157 }
158 // Character not in our hardcoded table.
159 // Should not occur during testing.
160 return U_BIDI_CLASS_DEFAULT;
161 }
162
163 U_CDECL_END
164
165 static const int8_t biDiClassNameLengths[U_CHAR_DIRECTION_COUNT+1]={
166 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 3, 0
167 };
168
parseInputStringFromBiDiClasses(const char * & start)169 UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) {
170 inputString.remove();
171 /*
172 * Lengthy but fast BiDi class parser.
173 * A simple parser could terminate or extract the name string and use
174 * int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClassString);
175 * but that makes this test take significantly more time.
176 */
177 while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
178 UCharDirection biDiClass=U_CHAR_DIRECTION_COUNT;
179 // Compare each character once until we have a match on
180 // a complete, short BiDi class name.
181 if(start[0]=='L') {
182 if(start[1]=='R') {
183 if(start[2]=='E') {
184 biDiClass=U_LEFT_TO_RIGHT_EMBEDDING;
185 } else if(start[2]=='I') {
186 biDiClass=U_LEFT_TO_RIGHT_ISOLATE;
187 } else if(start[2]=='O') {
188 biDiClass=U_LEFT_TO_RIGHT_OVERRIDE;
189 }
190 } else {
191 biDiClass=U_LEFT_TO_RIGHT;
192 }
193 } else if(start[0]=='R') {
194 if(start[1]=='L') {
195 if(start[2]=='E') {
196 biDiClass=U_RIGHT_TO_LEFT_EMBEDDING;
197 } else if(start[2]=='I') {
198 biDiClass=U_RIGHT_TO_LEFT_ISOLATE;
199 } else if(start[2]=='O') {
200 biDiClass=U_RIGHT_TO_LEFT_OVERRIDE;
201 }
202 } else {
203 biDiClass=U_RIGHT_TO_LEFT;
204 }
205 } else if(start[0]=='E') {
206 if(start[1]=='N') {
207 biDiClass=U_EUROPEAN_NUMBER;
208 } else if(start[1]=='S') {
209 biDiClass=U_EUROPEAN_NUMBER_SEPARATOR;
210 } else if(start[1]=='T') {
211 biDiClass=U_EUROPEAN_NUMBER_TERMINATOR;
212 }
213 } else if(start[0]=='A') {
214 if(start[1]=='L') {
215 biDiClass=U_RIGHT_TO_LEFT_ARABIC;
216 } else if(start[1]=='N') {
217 biDiClass=U_ARABIC_NUMBER;
218 }
219 } else if(start[0]=='C' && start[1]=='S') {
220 biDiClass=U_COMMON_NUMBER_SEPARATOR;
221 } else if(start[0]=='B') {
222 if(start[1]=='N') {
223 biDiClass=U_BOUNDARY_NEUTRAL;
224 } else {
225 biDiClass=U_BLOCK_SEPARATOR;
226 }
227 } else if(start[0]=='S') {
228 biDiClass=U_SEGMENT_SEPARATOR;
229 } else if(start[0]=='W' && start[1]=='S') {
230 biDiClass=U_WHITE_SPACE_NEUTRAL;
231 } else if(start[0]=='O' && start[1]=='N') {
232 biDiClass=U_OTHER_NEUTRAL;
233 } else if(start[0]=='P' && start[1]=='D') {
234 if(start[2]=='F') {
235 biDiClass=U_POP_DIRECTIONAL_FORMAT;
236 } else if(start[2]=='I') {
237 biDiClass=U_POP_DIRECTIONAL_ISOLATE;
238 }
239 } else if(start[0]=='N' && start[1]=='S' && start[2]=='M') {
240 biDiClass=U_DIR_NON_SPACING_MARK;
241 } else if(start[0]=='F' && start[1]=='S' && start[2]=='I') {
242 biDiClass=U_FIRST_STRONG_ISOLATE;
243 }
244 // Now we verify that the class name is terminated properly,
245 // and not just the start of a longer word.
246 int8_t biDiClassNameLength=biDiClassNameLengths[biDiClass];
247 char c=start[biDiClassNameLength];
248 if(biDiClass<U_CHAR_DIRECTION_COUNT && (U_IS_INV_WHITESPACE(c) || c==';' || c==0)) {
249 inputString.append(charFromBiDiClass[biDiClass]);
250 start+=biDiClassNameLength;
251 continue;
252 }
253 errln("\nError on line %d: BiDi class string not recognized at %s", (int)lineNumber, start);
254 printErrorLine();
255 return false;
256 }
257 return true;
258 }
259
TestBidiTest()260 void BiDiConformanceTest::TestBidiTest() {
261 IcuTestErrorCode errorCode(*this, "TestBidiTest");
262 const char *sourceTestDataPath=getSourceTestData(errorCode);
263 if(errorCode.errIfFailureAndReset("unable to find the source/test/testdata "
264 "folder (getSourceTestData())")) {
265 return;
266 }
267 char bidiTestPath[400];
268 strcpy(bidiTestPath, sourceTestDataPath);
269 strcat(bidiTestPath, "BidiTest.txt");
270 LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r"));
271 if(bidiTestFile.isNull()) {
272 errln("unable to open %s", bidiTestPath);
273 return;
274 }
275 LocalUBiDiPointer ubidi(ubidi_open());
276 ubidi_setClassCallback(ubidi.getAlias(), biDiConfUBiDiClassCallback, NULL,
277 NULL, NULL, errorCode);
278 if(errorCode.errIfFailureAndReset("ubidi_setClassCallback()")) {
279 return;
280 }
281 lineNumber=0;
282 levelsCount=0;
283 orderingCount=0;
284 errorCount=0;
285 // paraLevelName must be initialized in case the first non-comment line is in error
286 paraLevelName="N/A";
287 while(errorCount<10 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) {
288 ++lineNumber;
289 // Remove trailing comments and whitespace.
290 char *commentStart=strchr(line, '#');
291 if(commentStart!=NULL) {
292 *commentStart=0;
293 }
294 u_rtrim(line);
295 const char *start=u_skipWhitespace(line);
296 if(*start==0) {
297 continue; // Skip empty and comment-only lines.
298 }
299 if(*start=='@') {
300 ++start;
301 if(0==strncmp(start, "Levels:", 7)) {
302 start+=7;
303 if(!parseLevels(start)) {
304 return;
305 }
306 } else if(0==strncmp(start, "Reorder:", 8)) {
307 if(!parseOrdering(start+8)) {
308 return;
309 }
310 }
311 // Skip unknown @Xyz: ...
312 } else {
313 if(!parseInputStringFromBiDiClasses(start)) {
314 return;
315 }
316 start=u_skipWhitespace(start);
317 if(*start!=';') {
318 errln("missing ; separator on input line %s", line);
319 return;
320 }
321 start=u_skipWhitespace(start+1);
322 char *end;
323 uint32_t bitset=(uint32_t)strtoul(start, &end, 16);
324 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0)) {
325 errln("input bitset parse error at %s", start);
326 return;
327 }
328 // Loop over the bitset.
329 static const UBiDiLevel paraLevels[]={ UBIDI_DEFAULT_LTR, 0, 1, UBIDI_DEFAULT_RTL };
330 static const char *const paraLevelNames[]={ "auto/LTR", "LTR", "RTL", "auto/RTL" };
331 for(int i=0; i<=3; ++i) {
332 if(bitset&(1<<i)) {
333 ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(),
334 paraLevels[i], NULL, errorCode);
335 const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode);
336 if(errorCode.errIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) {
337 errln("Input line %d: %s", (int)lineNumber, line);
338 return;
339 }
340 paraLevelName=paraLevelNames[i];
341 if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) {
342 // continue outerLoop; does not exist in C++
343 // so just break out of the inner loop.
344 break;
345 }
346 if(!checkOrdering(ubidi.getAlias())) {
347 // continue outerLoop; does not exist in C++
348 // so just break out of the inner loop.
349 break;
350 }
351 }
352 }
353 }
354 }
355 }
356
357 /*
358 *******************************************************************************
359 *
360 * created on: 2013jul01
361 * created by: Matitiahu Allouche
362
363 This function performs a conformance test for implementations of the
364 Unicode Bidirectional Algorithm, specified in UAX #9: Unicode
365 Bidirectional Algorithm, at https://www.unicode.org/reports/tr9/
366
367 Each test case is represented in a single line which is read from a file
368 named BidiCharacter.txt. Empty, blank and comment lines may also appear
369 in this file.
370
371 The format of the test data is specified below. Note that each test
372 case constitutes a single line of text; reordering is applied within a
373 single line and independently of a rendering engine, and rules L3 and L4
374 are out of scope.
375
376 The number sign '#' is the comment character: everything is ignored from
377 the occurrence of '#' until the end of the line,
378 Empty lines and lines containing only spaces and/or comments are ignored.
379
380 Lines which represent test cases consist of 4 or 5 fields separated by a
381 semicolon. Each field consists of tokens separated by whitespace (space
382 or Tab). Whitespace before and after semicolons is optional.
383
384 Field 0: A sequence of hexadecimal code point values separated by space
385
386 Field 1: A value representing the paragraph direction, as follows:
387 - 0 represents left-to-right
388 - 1 represents right-to-left
389 - 2 represents auto-LTR according to rules P2 and P3 of the algorithm
390 - 3 represents auto-RTL according to rules P2 and P3 of the algorithm
391 - a negative number whose absolute value is taken as paragraph level;
392 this may be useful to test cases where the embedding level approaches
393 or exceeds the maximum embedding level.
394
395 Field 2: The resolved paragraph embedding level. If the input (field 0)
396 includes more than one paragraph, this field represents the
397 resolved level of the first paragraph.
398
399 Field 3: An ordered list of resulting levels for each token in field 0
400 (each token represents one source character).
401 The UBA does not assign levels to certain characters (e.g. LRO);
402 characters removed in rule X9 are indicated with an 'x'.
403
404 Field 4: An ordered list of indices showing the resulting visual ordering
405 from left to right; characters with a resolved level of 'x' are
406 skipped. The number are zero-based. Each index corresponds to
407 a character in the reordered (visual) string. It represents the
408 index of the source character in the input (field 0).
409 This field is optional. When it is absent, the visual ordering
410 is not verified.
411
412 Examples:
413
414 # This is a comment line.
415 L L ON R ; 0 ; 0 ; 0 0 0 1 ; 0 1 2 3
416 L L ON R;0;0;0 0 0 1;0 1 2 3
417
418 # Note: in the next line, 'B' represents a block separator, not the letter 'B'.
419 LRE A B C PDF;2;0;x 2 0 0 x;1 2 3
420 # Note: in the next line, 'b' represents the letter 'b', not a block separator.
421 a b c 05d0 05d1 x ; 0 ; 0 ; 0 0 0 1 1 0 ; 0 1 2 4 3 5
422
423 a R R x ; 1 ; 1 ; 2 1 1 2
424 L L R R R B R R L L L B ON ON ; 3 ; 0 ; 0 0 1 1 1 0 1 1 2 2 2 1 1 1
425
426 *
427 *******************************************************************************
428 */
TestBidiCharacterTest()429 void BiDiConformanceTest::TestBidiCharacterTest() {
430 IcuTestErrorCode errorCode(*this, "TestBidiCharacterTest");
431 const char *sourceTestDataPath=getSourceTestData(errorCode);
432 if(errorCode.errIfFailureAndReset("unable to find the source/test/testdata "
433 "folder (getSourceTestData())")) {
434 return;
435 }
436 char bidiTestPath[400];
437 strcpy(bidiTestPath, sourceTestDataPath);
438 strcat(bidiTestPath, "BidiCharacterTest.txt");
439 LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r"));
440 if(bidiTestFile.isNull()) {
441 errln("unable to open %s", bidiTestPath);
442 return;
443 }
444 LocalUBiDiPointer ubidi(ubidi_open());
445 lineNumber=0;
446 levelsCount=0;
447 orderingCount=0;
448 errorCount=0;
449 while(errorCount<20 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) {
450 ++lineNumber;
451 paraLevelName="N/A";
452 inputString="N/A";
453 // Remove trailing comments and whitespace.
454 char *commentStart=strchr(line, '#');
455 if(commentStart!=NULL) {
456 *commentStart=0;
457 }
458 u_rtrim(line);
459 const char *start=u_skipWhitespace(line);
460 if(*start==0) {
461 continue; // Skip empty and comment-only lines.
462 }
463 // Parse the code point string in field 0.
464 UChar *buffer=inputString.getBuffer(200);
465 int32_t length=u_parseString(start, buffer, inputString.getCapacity(), NULL, errorCode);
466 if(errorCode.errIfFailureAndReset("Invalid string in field 0")) {
467 errln("Input line %d: %s", (int)lineNumber, line);
468 inputString.remove();
469 continue;
470 }
471 inputString.releaseBuffer(length);
472 start=strchr(start, ';');
473 if(start==NULL) {
474 errorCount++;
475 errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line);
476 continue;
477 }
478 start=u_skipWhitespace(start+1);
479 char *end;
480 int32_t paraDirection=(int32_t)strtol(start, &end, 10);
481 UBiDiLevel paraLevel=UBIDI_MAX_EXPLICIT_LEVEL+2;
482 if(paraDirection==0) {
483 paraLevel=0;
484 paraLevelName="LTR";
485 }
486 else if(paraDirection==1) {
487 paraLevel=1;
488 paraLevelName="RTL";
489 }
490 else if(paraDirection==2) {
491 paraLevel=UBIDI_DEFAULT_LTR;
492 paraLevelName="Auto/LTR";
493 }
494 else if(paraDirection==3) {
495 paraLevel=UBIDI_DEFAULT_RTL;
496 paraLevelName="Auto/RTL";
497 }
498 else if(paraDirection<0 && -paraDirection<=(UBIDI_MAX_EXPLICIT_LEVEL+1)) {
499 paraLevel=(UBiDiLevel)(-paraDirection);
500 sprintf(levelNameString, "%d", (int)paraLevel);
501 paraLevelName=levelNameString;
502 }
503 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) ||
504 paraLevel==(UBIDI_MAX_EXPLICIT_LEVEL+2)) {
505 errln("\nError on line %d: Input paragraph direction incorrect at %s", (int)lineNumber, start);
506 printErrorLine();
507 continue;
508 }
509 start=u_skipWhitespace(end);
510 if(*start!=';') {
511 errorCount++;
512 errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line);
513 continue;
514 }
515 start++;
516 uint32_t resolvedParaLevel=(uint32_t)strtoul(start, &end, 10);
517 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) ||
518 resolvedParaLevel>1) {
519 errln("\nError on line %d: Resolved paragraph level incorrect at %s", (int)lineNumber, start);
520 printErrorLine();
521 continue;
522 }
523 start=u_skipWhitespace(end);
524 if(*start!=';') {
525 errorCount++;
526 errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line);
527 return;
528 }
529 start++;
530 if(!parseLevels(start)) {
531 continue;
532 }
533 start=u_skipWhitespace(start);
534 if(*start==';') {
535 if(!parseOrdering(start+1)) {
536 continue;
537 }
538 }
539 else
540 orderingCount=-1;
541
542 ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(),
543 paraLevel, NULL, errorCode);
544 const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode);
545 if(errorCode.errIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) {
546 errln("Input line %d: %s", (int)lineNumber, line);
547 continue;
548 }
549 UBiDiLevel actualLevel;
550 if((actualLevel=ubidi_getParaLevel(ubidi.getAlias()))!=resolvedParaLevel) {
551 printErrorLine();
552 errln("\nError on line %d: Wrong resolved paragraph level; expected %d actual %d",
553 (int)lineNumber, resolvedParaLevel, actualLevel);
554 continue;
555 }
556 if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) {
557 continue;
558 }
559 if(orderingCount>=0 && !checkOrdering(ubidi.getAlias())) {
560 continue;
561 }
562 }
563 }
564
printLevel(UBiDiLevel level)565 static UChar printLevel(UBiDiLevel level) {
566 if(level<UBIDI_DEFAULT_LTR) {
567 return 0x30+level;
568 } else {
569 return 0x78; // 'x'
570 }
571 }
572
getDirectionBits(const UBiDiLevel actualLevels[],int32_t actualCount)573 static uint32_t getDirectionBits(const UBiDiLevel actualLevels[], int32_t actualCount) {
574 uint32_t actualDirectionBits=0;
575 for(int32_t i=0; i<actualCount; ++i) {
576 actualDirectionBits|=(1<<(actualLevels[i]&1));
577 }
578 return actualDirectionBits;
579 }
580
checkLevels(const UBiDiLevel actualLevels[],int32_t actualCount)581 UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount) {
582 UBool isOk=true;
583 if(levelsCount!=actualCount) {
584 errln("\nError on line %d: Wrong number of level values; expected %d actual %d",
585 (int)lineNumber, (int)levelsCount, (int)actualCount);
586 isOk=false;
587 } else {
588 for(int32_t i=0; i<actualCount; ++i) {
589 if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) {
590 if(directionBits!=3 && directionBits==getDirectionBits(actualLevels, actualCount)) {
591 // ICU used a shortcut:
592 // Since the text is unidirectional, it did not store the resolved
593 // levels but just returns all levels as the paragraph level 0 or 1.
594 // The reordering result is the same, so this is fine.
595 break;
596 } else {
597 errln("\nError on line %d: Wrong level value at index %d; expected %d actual %d",
598 (int)lineNumber, (int)i, levels[i], actualLevels[i]);
599 isOk=false;
600 break;
601 }
602 }
603 }
604 }
605 if(!isOk) {
606 printErrorLine();
607 UnicodeString els("Expected levels: ");
608 int32_t i;
609 for(i=0; i<levelsCount; ++i) {
610 els.append((UChar)0x20).append(printLevel(levels[i]));
611 }
612 UnicodeString als("Actual levels: ");
613 for(i=0; i<actualCount; ++i) {
614 als.append((UChar)0x20).append(printLevel(actualLevels[i]));
615 }
616 errln(els);
617 errln(als);
618 }
619 return isOk;
620 }
621
622 // Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS);
623 // does not work for custom BiDi class assignments
624 // and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here.
625 // Therefore we just skip the indexes for BiDi controls while comparing
626 // with the expected ordering that has them omitted.
checkOrdering(UBiDi * ubidi)627 UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi) {
628 UBool isOk=true;
629 IcuTestErrorCode errorCode(*this, "checkOrdering()");
630 int32_t resultLength=ubidi_getResultLength(ubidi); // visual length including BiDi controls
631 int32_t i, visualIndex;
632 // Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun()
633 // and loop over each run's indexes, but that seems unnecessary for this test code.
634 for(i=visualIndex=0; i<resultLength; ++i) {
635 int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
636 if(errorCode.errIfFailureAndReset("ubidi_getLogicalIndex()")) {
637 errln("Input line %d: %s", (int)lineNumber, line);
638 return false;
639 }
640 if(levels[logicalIndex]>=UBIDI_DEFAULT_LTR) {
641 continue; // BiDi control, omitted from expected ordering.
642 }
643 if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) {
644 errln("\nError on line %d: Wrong ordering value at visual index %d; expected %d actual %d",
645 (int)lineNumber, (int)visualIndex, ordering[visualIndex], logicalIndex);
646 isOk=false;
647 break;
648 }
649 ++visualIndex;
650 }
651 // visualIndex is now the visual length minus the BiDi controls,
652 // which should match the length of the BidiTest.txt ordering.
653 if(isOk && orderingCount!=visualIndex) {
654 errln("\nError on line %d: Wrong number of ordering values; expected %d actual %d",
655 (int)lineNumber, (int)orderingCount, (int)visualIndex);
656 isOk=false;
657 }
658 if(!isOk) {
659 printErrorLine();
660 UnicodeString eord("Expected ordering: ");
661 for(i=0; i<orderingCount; ++i) {
662 eord.append((UChar)0x20).append((UChar)(0x30+ordering[i]));
663 }
664 UnicodeString aord("Actual ordering: ");
665 for(i=0; i<resultLength; ++i) {
666 int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
667 if(levels[logicalIndex]<UBIDI_DEFAULT_LTR) {
668 aord.append((UChar)0x20).append((UChar)(0x30+logicalIndex));
669 }
670 }
671 errln(eord);
672 errln(aord);
673 }
674 return isOk;
675 }
676
printErrorLine()677 void BiDiConformanceTest::printErrorLine() {
678 ++errorCount;
679 errln("Input line %5d: %s", (int)lineNumber, line);
680 errln(UnicodeString("Input string: ")+inputString);
681 errln("Para level: %s", paraLevelName);
682 }
683