• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2009-2010, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  bidiconf.cpp
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2009oct16
14 *   created by: Markus W. Scherer
15 *
16 *   BiDi conformance test, using the Unicode BidiTest.txt file.
17 */
18 
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include "unicode/utypes.h"
23 #include "unicode/ubidi.h"
24 #include "unicode/errorcode.h"
25 #include "unicode/localpointer.h"
26 #include "unicode/putil.h"
27 #include "unicode/unistr.h"
28 #include "intltest.h"
29 #include "uparse.h"
30 
31 class BiDiConformanceTest : public IntlTest {
32 public:
BiDiConformanceTest()33     BiDiConformanceTest() :
34         directionBits(0), lineNumber(0), levelsCount(0), orderingCount(0),
35         errorCount(0) {}
36 
37     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
38 
39     void TestBidiTest();
40 private:
41     char *getUnidataPath(char path[]);
42 
43     UBool parseLevels(const char *start);
44     UBool parseOrdering(const char *start);
45     UBool parseInputStringFromBiDiClasses(const char *&start);
46 
47     UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount,
48                       const char *paraLevelName);
49     UBool checkOrdering(UBiDi *ubidi, const char *paraLevelName);
50 
51     void printErrorLine(const char *paraLevelName);
52 
53     char line[10000];
54     UBiDiLevel levels[1000];
55     uint32_t directionBits;
56     int32_t ordering[1000];
57     int32_t lineNumber;
58     int32_t levelsCount;
59     int32_t orderingCount;
60     int32_t errorCount;
61     UnicodeString inputString;
62 };
63 
createBiDiConformanceTest()64 extern IntlTest *createBiDiConformanceTest() {
65     return new BiDiConformanceTest();
66 }
67 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)68 void BiDiConformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
69     if(exec) {
70         logln("TestSuite BiDiConformanceTest: ");
71     }
72     switch (index) {
73         TESTCASE(0, TestBidiTest);
74         default:
75             name="";
76             break; // needed to end the loop
77     }
78 }
79 
80 // TODO: Move to a common place (IntlTest?) to avoid duplication with UnicodeTest (ucdtest.cpp).
getUnidataPath(char path[])81 char *BiDiConformanceTest::getUnidataPath(char path[]) {
82     IcuTestErrorCode errorCode(*this, "getUnidataPath");
83     const int kUnicodeDataTxtLength=15;  // strlen("UnicodeData.txt")
84 
85     // Look inside ICU_DATA first.
86     strcpy(path, pathToDataDirectory());
87     strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
88     FILE *f=fopen(path, "r");
89     if(f!=NULL) {
90         fclose(f);
91         *(strchr(path, 0)-kUnicodeDataTxtLength)=0;  // Remove the basename.
92         return path;
93     }
94 
95     // As a fallback, try to guess where the source data was located
96     // at the time ICU was built, and look there.
97 #   ifdef U_TOPSRCDIR
98         strcpy(path, U_TOPSRCDIR  U_FILE_SEP_STRING "data");
99 #   else
100         strcpy(path, loadTestData(errorCode));
101         strcat(path, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".."
102                      U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".."
103                      U_FILE_SEP_STRING "data");
104 #   endif
105     strcat(path, U_FILE_SEP_STRING);
106     strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
107     f=fopen(path, "r");
108     if(f!=NULL) {
109         fclose(f);
110         *(strchr(path, 0)-kUnicodeDataTxtLength)=0;  // Remove the basename.
111         return path;
112     }
113     return NULL;
114 }
115 
116 U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
117 
parseLevels(const char * start)118 UBool BiDiConformanceTest::parseLevels(const char *start) {
119     directionBits=0;
120     levelsCount=0;
121     while(*start!=0 && *(start=u_skipWhitespace(start))!=0) {
122         if(*start=='x') {
123             levels[levelsCount++]=UBIDI_DEFAULT_LTR;
124             ++start;
125         } else {
126             char *end;
127             uint32_t value=(uint32_t)strtoul(start, &end, 10);
128             if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) {
129                 errln("@Levels: parse error at %s", start);
130                 return FALSE;
131             }
132             levels[levelsCount++]=(UBiDiLevel)value;
133             directionBits|=(1<<(value&1));
134             start=end;
135         }
136     }
137     return TRUE;
138 }
139 
parseOrdering(const char * start)140 UBool BiDiConformanceTest::parseOrdering(const char *start) {
141     orderingCount=0;
142     while(*start!=0 && *(start=u_skipWhitespace(start))!=0) {
143         char *end;
144         uint32_t value=(uint32_t)strtoul(start, &end, 10);
145         if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>=1000) {
146             errln("@Reorder: parse error at %s", start);
147             return FALSE;
148         }
149         ordering[orderingCount++]=(int32_t)value;
150         start=end;
151     }
152     return TRUE;
153 }
154 
155 static const UChar charFromBiDiClass[U_CHAR_DIRECTION_COUNT]={
156     0x6c,   // 'l' for L
157     0x52,   // 'R' for R
158     0x33,   // '3' for EN
159     0x2d,   // '-' for ES
160     0x25,   // '%' for ET
161     0x39,   // '9' for AN
162     0x2c,   // ',' for CS
163     0x2f,   // '/' for B
164     0x5f,   // '_' for S
165     0x20,   // ' ' for WS
166     0x3d,   // '=' for ON
167     0x65,   // 'e' for LRE
168     0x6f,   // 'o' for LRO
169     0x41,   // 'A' for AL
170     0x45,   // 'E' for RLE
171     0x4f,   // 'O' for RLO
172     0x2a,   // '*' for PDF
173     0x60,   // '`' for NSM
174     0x7c    // '|' for BN
175 };
176 
177 U_CDECL_BEGIN
178 
179 static UCharDirection U_CALLCONV
biDiConfUBiDiClassCallback(const void *,UChar32 c)180 biDiConfUBiDiClassCallback(const void * /*context*/, UChar32 c) {
181     for(int i=0; i<U_CHAR_DIRECTION_COUNT; ++i) {
182         if(c==charFromBiDiClass[i]) {
183             return (UCharDirection)i;
184         }
185     }
186     // Character not in our hardcoded table.
187     // Should not occur during testing.
188     return U_BIDI_CLASS_DEFAULT;
189 }
190 
191 U_CDECL_END
192 
193 static const int8_t biDiClassNameLengths[U_CHAR_DIRECTION_COUNT+1]={
194     1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 0
195 };
196 
parseInputStringFromBiDiClasses(const char * & start)197 UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) {
198     inputString.remove();
199     /*
200      * Lengthy but fast BiDi class parser.
201      * A simple parser could terminate or extract the name string and use
202      *   int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClassString);
203      * but that makes this test take significantly more time.
204      */
205     while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
206         UCharDirection biDiClass=U_CHAR_DIRECTION_COUNT;
207         // Compare each character once until we have a match on
208         // a complete, short BiDi class name.
209         if(start[0]=='L') {
210             if(start[1]=='R') {
211                 if(start[2]=='E') {
212                     biDiClass=U_LEFT_TO_RIGHT_EMBEDDING;
213                 } else if(start[2]=='O') {
214                     biDiClass=U_LEFT_TO_RIGHT_OVERRIDE;
215                 }
216             } else {
217                 biDiClass=U_LEFT_TO_RIGHT;
218             }
219         } else if(start[0]=='R') {
220             if(start[1]=='L') {
221                 if(start[2]=='E') {
222                     biDiClass=U_RIGHT_TO_LEFT_EMBEDDING;
223                 } else if(start[2]=='O') {
224                     biDiClass=U_RIGHT_TO_LEFT_OVERRIDE;
225                 }
226             } else {
227                 biDiClass=U_RIGHT_TO_LEFT;
228             }
229         } else if(start[0]=='E') {
230             if(start[1]=='N') {
231                 biDiClass=U_EUROPEAN_NUMBER;
232             } else if(start[1]=='S') {
233                 biDiClass=U_EUROPEAN_NUMBER_SEPARATOR;
234             } else if(start[1]=='T') {
235                 biDiClass=U_EUROPEAN_NUMBER_TERMINATOR;
236             }
237         } else if(start[0]=='A') {
238             if(start[1]=='L') {
239                 biDiClass=U_RIGHT_TO_LEFT_ARABIC;
240             } else if(start[1]=='N') {
241                 biDiClass=U_ARABIC_NUMBER;
242             }
243         } else if(start[0]=='C' && start[1]=='S') {
244             biDiClass=U_COMMON_NUMBER_SEPARATOR;
245         } else if(start[0]=='B') {
246             if(start[1]=='N') {
247                 biDiClass=U_BOUNDARY_NEUTRAL;
248             } else {
249                 biDiClass=U_BLOCK_SEPARATOR;
250             }
251         } else if(start[0]=='S') {
252             biDiClass=U_SEGMENT_SEPARATOR;
253         } else if(start[0]=='W' && start[1]=='S') {
254             biDiClass=U_WHITE_SPACE_NEUTRAL;
255         } else if(start[0]=='O' && start[1]=='N') {
256             biDiClass=U_OTHER_NEUTRAL;
257         } else if(start[0]=='P' && start[1]=='D' && start[2]=='F') {
258             biDiClass=U_POP_DIRECTIONAL_FORMAT;
259         } else if(start[0]=='N' && start[1]=='S' && start[2]=='M') {
260             biDiClass=U_DIR_NON_SPACING_MARK;
261         }
262         // Now we verify that the class name is terminated properly,
263         // and not just the start of a longer word.
264         int8_t biDiClassNameLength=biDiClassNameLengths[biDiClass];
265         char c=start[biDiClassNameLength];
266         if(biDiClass==U_CHAR_DIRECTION_COUNT || (!U_IS_INV_WHITESPACE(c) && c!=';' && c!=0)) {
267             errln("BiDi class string not recognized at %s", start);
268             return FALSE;
269         }
270         inputString.append(charFromBiDiClass[biDiClass]);
271         start+=biDiClassNameLength;
272     }
273     return TRUE;
274 }
275 
TestBidiTest()276 void BiDiConformanceTest::TestBidiTest() {
277     IcuTestErrorCode errorCode(*this, "TestBidiTest");
278     const char *sourceTestDataPath=getSourceTestData(errorCode);
279     if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata "
280                                       "folder (getSourceTestData())")) {
281         return;
282     }
283     char bidiTestPath[400];
284     strcpy(bidiTestPath, sourceTestDataPath);
285     strcat(bidiTestPath, "BidiTest.txt");
286     LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r"));
287     if(bidiTestFile.isNull()) {
288         errln("unable to open %s", bidiTestPath);
289         return;
290     }
291     LocalUBiDiPointer ubidi(ubidi_open());
292     ubidi_setClassCallback(ubidi.getAlias(), biDiConfUBiDiClassCallback, NULL,
293                            NULL, NULL, errorCode);
294     if(errorCode.logIfFailureAndReset("ubidi_setClassCallback()")) {
295         return;
296     }
297     lineNumber=0;
298     levelsCount=0;
299     orderingCount=0;
300     errorCount=0;
301     while(errorCount<10 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) {
302         ++lineNumber;
303         // Remove trailing comments and whitespace.
304         char *commentStart=strchr(line, '#');
305         if(commentStart!=NULL) {
306             *commentStart=0;
307         }
308         u_rtrim(line);
309         const char *start=u_skipWhitespace(line);
310         if(*start==0) {
311             continue;  // Skip empty and comment-only lines.
312         }
313         if(*start=='@') {
314             ++start;
315             if(0==strncmp(start, "Levels:", 7)) {
316                 if(!parseLevels(start+7)) {
317                     return;
318                 }
319             } else if(0==strncmp(start, "Reorder:", 8)) {
320                 if(!parseOrdering(start+8)) {
321                     return;
322                 }
323             }
324             // Skip unknown @Xyz: ...
325         } else {
326             if(!parseInputStringFromBiDiClasses(start)) {
327                 return;
328             }
329             start=u_skipWhitespace(start);
330             if(*start!=';') {
331                 errln("missing ; separator on input line %s", line);
332                 return;
333             }
334             start=u_skipWhitespace(start+1);
335             char *end;
336             uint32_t bitset=(uint32_t)strtoul(start, &end, 16);
337             if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0)) {
338                 errln("input bitset parse error at %s", start);
339                 return;
340             }
341             // Loop over the bitset.
342             static const UBiDiLevel paraLevels[]={ UBIDI_DEFAULT_LTR, 0, 1, UBIDI_DEFAULT_RTL };
343             static const char *const paraLevelNames[]={ "auto/LTR", "LTR", "RTL", "auto/RTL" };
344             for(int i=0; i<=3; ++i) {
345                 if(bitset&(1<<i)) {
346                     ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(),
347                                   paraLevels[i], NULL, errorCode);
348                     const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode);
349                     if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) {
350                         errln("Input line %d: %s", (int)lineNumber, line);
351                         return;
352                     }
353                     if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()),
354                                     paraLevelNames[i])) {
355                         // continue outerLoop;  does not exist in C++
356                         // so just break out of the inner loop.
357                         break;
358                     }
359                     if(!checkOrdering(ubidi.getAlias(), paraLevelNames[i])) {
360                         // continue outerLoop;  does not exist in C++
361                         // so just break out of the inner loop.
362                         break;
363                     }
364                 }
365             }
366         }
367     }
368 }
369 
printLevel(UBiDiLevel level)370 static UChar printLevel(UBiDiLevel level) {
371     if(level<UBIDI_DEFAULT_LTR) {
372         return 0x30+level;
373     } else {
374         return 0x78;  // 'x'
375     }
376 }
377 
getDirectionBits(const UBiDiLevel actualLevels[],int32_t actualCount)378 static uint32_t getDirectionBits(const UBiDiLevel actualLevels[], int32_t actualCount) {
379     uint32_t actualDirectionBits=0;
380     for(int32_t i=0; i<actualCount; ++i) {
381         actualDirectionBits|=(1<<(actualLevels[i]&1));
382     }
383     return actualDirectionBits;
384 }
385 
checkLevels(const UBiDiLevel actualLevels[],int32_t actualCount,const char * paraLevelName)386 UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount,
387                                        const char *paraLevelName) {
388     UBool isOk=TRUE;
389     if(levelsCount!=actualCount) {
390         errln("Wrong number of level values; expected %d actual %d",
391               (int)levelsCount, (int)actualCount);
392         isOk=FALSE;
393     } else {
394         for(int32_t i=0; i<actualCount; ++i) {
395             if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) {
396                 if(directionBits!=3 && directionBits==getDirectionBits(actualLevels, actualCount)) {
397                     // ICU used a shortcut:
398                     // Since the text is unidirectional, it did not store the resolved
399                     // levels but just returns all levels as the paragraph level 0 or 1.
400                     // The reordering result is the same, so this is fine.
401                     break;
402                 } else {
403                     errln("Wrong level value at index %d; expected %d actual %d",
404                           (int)i, levels[i], actualLevels[i]);
405                     isOk=FALSE;
406                     break;
407                 }
408             }
409         }
410     }
411     if(!isOk) {
412         printErrorLine(paraLevelName);
413         UnicodeString els("Expected levels:   ");
414         int32_t i;
415         for(i=0; i<levelsCount; ++i) {
416             els.append((UChar)0x20).append(printLevel(levels[i]));
417         }
418         UnicodeString als("Actual   levels:   ");
419         for(i=0; i<actualCount; ++i) {
420             als.append((UChar)0x20).append(printLevel(actualLevels[i]));
421         }
422         errln(els);
423         errln(als);
424     }
425     return isOk;
426 }
427 
428 // Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS);
429 // does not work for custom BiDi class assignments
430 // and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here.
431 // Therefore we just skip the indexes for BiDi controls while comparing
432 // with the expected ordering that has them omitted.
checkOrdering(UBiDi * ubidi,const char * paraLevelName)433 UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi, const char *paraLevelName) {
434     UBool isOk=TRUE;
435     IcuTestErrorCode errorCode(*this, "TestBidiTest/checkOrdering()");
436     int32_t resultLength=ubidi_getResultLength(ubidi);  // visual length including BiDi controls
437     int32_t i, visualIndex;
438     // Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun()
439     // and loop over each run's indexes, but that seems unnecessary for this test code.
440     for(i=visualIndex=0; i<resultLength; ++i) {
441         int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
442         if(errorCode.logIfFailureAndReset("ubidi_getLogicalIndex()")) {
443             errln("Input line %d: %s", (int)lineNumber, line);
444             return FALSE;
445         }
446         if(levels[logicalIndex]>=UBIDI_DEFAULT_LTR) {
447             continue;  // BiDi control, omitted from expected ordering.
448         }
449         if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) {
450             errln("Wrong ordering value at visual index %d; expected %d actual %d",
451                   (int)visualIndex, ordering[visualIndex], logicalIndex);
452             isOk=FALSE;
453             break;
454         }
455         ++visualIndex;
456     }
457     // visualIndex is now the visual length minus the BiDi controls,
458     // which should match the length of the BidiTest.txt ordering.
459     if(isOk && orderingCount!=visualIndex) {
460         errln("Wrong number of ordering values; expected %d actual %d",
461               (int)orderingCount, (int)visualIndex);
462         isOk=FALSE;
463     }
464     if(!isOk) {
465         printErrorLine(paraLevelName);
466         UnicodeString eord("Expected ordering: ");
467         for(i=0; i<orderingCount; ++i) {
468             eord.append((UChar)0x20).append((UChar)(0x30+ordering[i]));
469         }
470         UnicodeString aord("Actual   ordering: ");
471         for(i=0; i<resultLength; ++i) {
472             int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
473             if(levels[logicalIndex]<UBIDI_DEFAULT_LTR) {
474                 aord.append((UChar)0x20).append((UChar)(0x30+logicalIndex));
475             }
476         }
477         errln(eord);
478         errln(aord);
479     }
480     return isOk;
481 }
482 
printErrorLine(const char * paraLevelName)483 void BiDiConformanceTest::printErrorLine(const char *paraLevelName) {
484     ++errorCount;
485     errln("Input line %5d:   %s", (int)lineNumber, line);
486     errln(UnicodeString("Input string:       ")+inputString);
487     errln("Para level:         %s", paraLevelName);
488 }
489