• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2009, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  bidiconf.cpp
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2009oct16
14 *   created by: Markus W. Scherer
15 *
16 *   BiDi conformance test, using the Unicode BidiTest.txt file.
17 */
18 
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include "unicode/utypes.h"
23 #include "unicode/ubidi.h"
24 #include "unicode/errorcode.h"
25 #include "unicode/localpointer.h"
26 #include "unicode/putil.h"
27 #include "unicode/unistr.h"
28 #include "intltest.h"
29 #include "uparse.h"
30 
31 class BiDiConformanceTest : public IntlTest {
32 public:
BiDiConformanceTest()33     BiDiConformanceTest() :
34         directionBits(0), lineNumber(0), levelsCount(0), orderingCount(0),
35         errorCount(0) {}
36 
37     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
38 
39     void TestBidiTest();
40 private:
41     char *getUnidataPath(char path[]);
42 
43     UBool parseLevels(const char *start);
44     UBool parseOrdering(const char *start);
45     UBool parseInputStringFromBiDiClasses(const char *&start);
46 
47     UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount,
48                       const char *paraLevelName);
49     UBool checkOrdering(UBiDi *ubidi, const char *paraLevelName);
50 
51     void printErrorLine(const char *paraLevelName);
52 
53     char line[10000];
54     UBiDiLevel levels[1000];
55     uint32_t directionBits;
56     int32_t ordering[1000];
57     int32_t lineNumber;
58     int32_t levelsCount;
59     int32_t orderingCount;
60     int32_t errorCount;
61     UnicodeString inputString;
62 };
63 
createBiDiConformanceTest()64 extern IntlTest *createBiDiConformanceTest() {
65     return new BiDiConformanceTest();
66 }
67 
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)68 void BiDiConformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char *par) {
69     if(exec) {
70         logln("TestSuite BiDiConformanceTest: ");
71     }
72     switch (index) {
73         TESTCASE(0, TestBidiTest);
74         default:
75             name="";
76             break; // needed to end the loop
77     }
78 }
79 
80 // TODO: Move to a common place (IntlTest?) to avoid duplication with UnicodeTest (ucdtest.cpp).
getUnidataPath(char path[])81 char *BiDiConformanceTest::getUnidataPath(char path[]) {
82     IcuTestErrorCode errorCode(*this, "getUnidataPath");
83     const int kUnicodeDataTxtLength=15;  // strlen("UnicodeData.txt")
84 
85     // Look inside ICU_DATA first.
86     strcpy(path, pathToDataDirectory());
87     strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
88     FILE *f=fopen(path, "r");
89     if(f!=NULL) {
90         fclose(f);
91         *(strchr(path, 0)-kUnicodeDataTxtLength)=0;  // Remove the basename.
92         return path;
93     }
94 
95     // As a fallback, try to guess where the source data was located
96     // at the time ICU was built, and look there.
97 #   ifdef U_TOPSRCDIR
98         strcpy(path, U_TOPSRCDIR  U_FILE_SEP_STRING "data");
99 #   else
100         strcpy(path, loadTestData(errorCode));
101         strcat(path, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".."
102                      U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".."
103                      U_FILE_SEP_STRING "data");
104 #   endif
105     strcat(path, U_FILE_SEP_STRING);
106     strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
107     f=fopen(path, "r");
108     if(f!=NULL) {
109         fclose(f);
110         *(strchr(path, 0)-kUnicodeDataTxtLength)=0;  // Remove the basename.
111         return path;
112     }
113     return NULL;
114 }
115 
116 U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
117 
118 // TODO: Make "public" in uparse.h.
119 #define U_IS_INV_WHITESPACE(c) ((c)==' ' || (c)=='\t' || (c)=='\r' || (c)=='\n')
120 
parseLevels(const char * start)121 UBool BiDiConformanceTest::parseLevels(const char *start) {
122     directionBits=0;
123     levelsCount=0;
124     while(*start!=0 && *(start=u_skipWhitespace(start))!=0) {
125         if(*start=='x') {
126             levels[levelsCount++]=UBIDI_DEFAULT_LTR;
127             ++start;
128         } else {
129             char *end;
130             uint32_t value=(uint32_t)strtoul(start, &end, 10);
131             if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) {
132                 errln("@Levels: parse error at %s", start);
133                 return FALSE;
134             }
135             levels[levelsCount++]=(UBiDiLevel)value;
136             directionBits|=(1<<(value&1));
137             start=end;
138         }
139     }
140     return TRUE;
141 }
142 
parseOrdering(const char * start)143 UBool BiDiConformanceTest::parseOrdering(const char *start) {
144     orderingCount=0;
145     while(*start!=0 && *(start=u_skipWhitespace(start))!=0) {
146         char *end;
147         uint32_t value=(uint32_t)strtoul(start, &end, 10);
148         if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>=1000) {
149             errln("@Reorder: parse error at %s", start);
150             return FALSE;
151         }
152         ordering[orderingCount++]=(int32_t)value;
153         start=end;
154     }
155     return TRUE;
156 }
157 
158 static const UChar charFromBiDiClass[U_CHAR_DIRECTION_COUNT]={
159     0x6c,   // 'l' for L
160     0x52,   // 'R' for R
161     0x33,   // '3' for EN
162     0x2d,   // '-' for ES
163     0x25,   // '%' for ET
164     0x39,   // '9' for AN
165     0x2c,   // ',' for CS
166     0x2f,   // '/' for B
167     0x5f,   // '_' for S
168     0x20,   // ' ' for WS
169     0x3d,   // '=' for ON
170     0x65,   // 'e' for LRE
171     0x6f,   // 'o' for LRO
172     0x41,   // 'A' for AL
173     0x45,   // 'E' for RLE
174     0x4f,   // 'O' for RLO
175     0x2a,   // '*' for PDF
176     0x60,   // '`' for NSM
177     0x7c    // '|' for BN
178 };
179 
180 U_CDECL_BEGIN
181 
182 static UCharDirection U_CALLCONV
biDiConfUBiDiClassCallback(const void * context,UChar32 c)183 biDiConfUBiDiClassCallback(const void *context, UChar32 c) {
184     for(int i=0; i<U_CHAR_DIRECTION_COUNT; ++i) {
185         if(c==charFromBiDiClass[i]) {
186             return (UCharDirection)i;
187         }
188     }
189     // Character not in our hardcoded table.
190     // Should not occur during testing.
191     return U_BIDI_CLASS_DEFAULT;
192 }
193 
194 U_CDECL_END
195 
196 static const int8_t biDiClassNameLengths[U_CHAR_DIRECTION_COUNT+1]={
197     1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 0
198 };
199 
parseInputStringFromBiDiClasses(const char * & start)200 UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) {
201     inputString.remove();
202     /*
203      * Lengthy but fast BiDi class parser.
204      * A simple parser could terminate or extract the name string and use
205      *   int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClassString);
206      * but that makes this test take significantly more time.
207      */
208     while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
209         UCharDirection biDiClass=U_CHAR_DIRECTION_COUNT;
210         // Compare each character once until we have a match on
211         // a complete, short BiDi class name.
212         if(start[0]=='L') {
213             if(start[1]=='R') {
214                 if(start[2]=='E') {
215                     biDiClass=U_LEFT_TO_RIGHT_EMBEDDING;
216                 } else if(start[2]=='O') {
217                     biDiClass=U_LEFT_TO_RIGHT_OVERRIDE;
218                 }
219             } else {
220                 biDiClass=U_LEFT_TO_RIGHT;
221             }
222         } else if(start[0]=='R') {
223             if(start[1]=='L') {
224                 if(start[2]=='E') {
225                     biDiClass=U_RIGHT_TO_LEFT_EMBEDDING;
226                 } else if(start[2]=='O') {
227                     biDiClass=U_RIGHT_TO_LEFT_OVERRIDE;
228                 }
229             } else {
230                 biDiClass=U_RIGHT_TO_LEFT;
231             }
232         } else if(start[0]=='E') {
233             if(start[1]=='N') {
234                 biDiClass=U_EUROPEAN_NUMBER;
235             } else if(start[1]=='S') {
236                 biDiClass=U_EUROPEAN_NUMBER_SEPARATOR;
237             } else if(start[1]=='T') {
238                 biDiClass=U_EUROPEAN_NUMBER_TERMINATOR;
239             }
240         } else if(start[0]=='A') {
241             if(start[1]=='L') {
242                 biDiClass=U_RIGHT_TO_LEFT_ARABIC;
243             } else if(start[1]=='N') {
244                 biDiClass=U_ARABIC_NUMBER;
245             }
246         } else if(start[0]=='C' && start[1]=='S') {
247             biDiClass=U_COMMON_NUMBER_SEPARATOR;
248         } else if(start[0]=='B') {
249             if(start[1]=='N') {
250                 biDiClass=U_BOUNDARY_NEUTRAL;
251             } else {
252                 biDiClass=U_BLOCK_SEPARATOR;
253             }
254         } else if(start[0]=='S') {
255             biDiClass=U_SEGMENT_SEPARATOR;
256         } else if(start[0]=='W' && start[1]=='S') {
257             biDiClass=U_WHITE_SPACE_NEUTRAL;
258         } else if(start[0]=='O' && start[1]=='N') {
259             biDiClass=U_OTHER_NEUTRAL;
260         } else if(start[0]=='P' && start[1]=='D' && start[2]=='F') {
261             biDiClass=U_POP_DIRECTIONAL_FORMAT;
262         } else if(start[0]=='N' && start[1]=='S' && start[2]=='M') {
263             biDiClass=U_DIR_NON_SPACING_MARK;
264         }
265         // Now we verify that the class name is terminated properly,
266         // and not just the start of a longer word.
267         int8_t biDiClassNameLength=biDiClassNameLengths[biDiClass];
268         char c=start[biDiClassNameLength];
269         if(biDiClass==U_CHAR_DIRECTION_COUNT || (!U_IS_INV_WHITESPACE(c) && c!=';' && c!=0)) {
270             errln("BiDi class string not recognized at %s", start);
271             return FALSE;
272         }
273         inputString.append(charFromBiDiClass[biDiClass]);
274         start+=biDiClassNameLength;
275     }
276     return TRUE;
277 }
278 
TestBidiTest()279 void BiDiConformanceTest::TestBidiTest() {
280     IcuTestErrorCode errorCode(*this, "TestBidiTest");
281     const char *sourceTestDataPath=getSourceTestData(errorCode);
282     if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata "
283                                       "folder (getSourceTestData())")) {
284         return;
285     }
286     char bidiTestPath[400];
287     strcpy(bidiTestPath, sourceTestDataPath);
288     strcat(bidiTestPath, "BidiTest.txt");
289     LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r"));
290     if(bidiTestFile.isNull()) {
291         errln("unable to open %s", bidiTestPath);
292         return;
293     }
294     LocalUBiDiPointer ubidi(ubidi_open());
295     ubidi_setClassCallback(ubidi.getAlias(), biDiConfUBiDiClassCallback, NULL,
296                            NULL, NULL, errorCode);
297     if(errorCode.logIfFailureAndReset("ubidi_setClassCallback()")) {
298         return;
299     }
300     lineNumber=0;
301     levelsCount=0;
302     orderingCount=0;
303     errorCount=0;
304     while(errorCount<10 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) {
305         ++lineNumber;
306         // Remove trailing comments and whitespace.
307         char *commentStart=strchr(line, '#');
308         if(commentStart!=NULL) {
309             *commentStart=0;
310         }
311         u_rtrim(line);
312         const char *start=u_skipWhitespace(line);
313         if(*start==0) {
314             continue;  // Skip empty and comment-only lines.
315         }
316         if(*start=='@') {
317             ++start;
318             if(0==strncmp(start, "Levels:", 7)) {
319                 if(!parseLevels(start+7)) {
320                     return;
321                 }
322             } else if(0==strncmp(start, "Reorder:", 8)) {
323                 if(!parseOrdering(start+8)) {
324                     return;
325                 }
326             }
327             // Skip unknown @Xyz: ...
328         } else {
329             if(!parseInputStringFromBiDiClasses(start)) {
330                 return;
331             }
332             start=u_skipWhitespace(start);
333             if(*start!=';') {
334                 errln("missing ; separator on input line %s", line);
335                 return;
336             }
337             start=u_skipWhitespace(start+1);
338             char *end;
339             uint32_t bitset=(uint32_t)strtoul(start, &end, 10);
340             if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0)) {
341                 errln("input bitset parse error at %s", start);
342                 return;
343             }
344             // Loop over the bitset.
345             static const UBiDiLevel paraLevels[]={ UBIDI_DEFAULT_LTR, 0, 1 };
346             static const char *const paraLevelNames[]={ "auto/LTR", "LTR", "RTL" };
347             for(int i=0; i<=2; ++i) {
348                 if(bitset&(1<<i)) {
349                     ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(),
350                                   paraLevels[i], NULL, errorCode);
351                     const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode);
352                     if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) {
353                         errln("Input line %d: %s", (int)lineNumber, line);
354                         return;
355                     }
356                     if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()),
357                                     paraLevelNames[i])) {
358                         // continue outerLoop;  does not exist in C++
359                         // so just break out of the inner loop.
360                         break;
361                     }
362                     if(!checkOrdering(ubidi.getAlias(), paraLevelNames[i])) {
363                         // continue outerLoop;  does not exist in C++
364                         // so just break out of the inner loop.
365                         break;
366                     }
367                 }
368             }
369         }
370     }
371 }
372 
printLevel(UBiDiLevel level)373 static UChar printLevel(UBiDiLevel level) {
374     if(level<UBIDI_DEFAULT_LTR) {
375         return 0x30+level;
376     } else {
377         return 0x78;  // 'x'
378     }
379 }
380 
getDirectionBits(const UBiDiLevel actualLevels[],int32_t actualCount)381 static uint32_t getDirectionBits(const UBiDiLevel actualLevels[], int32_t actualCount) {
382     uint32_t actualDirectionBits=0;
383     for(int32_t i=0; i<actualCount; ++i) {
384         actualDirectionBits|=(1<<(actualLevels[i]&1));
385     }
386     return actualDirectionBits;
387 }
388 
checkLevels(const UBiDiLevel actualLevels[],int32_t actualCount,const char * paraLevelName)389 UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount,
390                                        const char *paraLevelName) {
391     UBool isOk=TRUE;
392     if(levelsCount!=actualCount) {
393         errln("Wrong number of level values; expected %d actual %d",
394               (int)levelsCount, (int)actualCount);
395         isOk=FALSE;
396     } else {
397         for(int32_t i=0; i<actualCount; ++i) {
398             if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) {
399                 if(directionBits!=3 && directionBits==getDirectionBits(actualLevels, actualCount)) {
400                     // ICU used a shortcut:
401                     // Since the text is unidirectional, it did not store the resolved
402                     // levels but just returns all levels as the paragraph level 0 or 1.
403                     // The reordering result is the same, so this is fine.
404                     break;
405                 } else {
406                     errln("Wrong level value at index %d; expected %d actual %d",
407                           (int)i, levels[i], actualLevels[i]);
408                     isOk=FALSE;
409                     break;
410                 }
411             }
412         }
413     }
414     if(!isOk) {
415         printErrorLine(paraLevelName);
416         UnicodeString els("Expected levels:   ");
417         int32_t i;
418         for(i=0; i<levelsCount; ++i) {
419             els.append((UChar)0x20).append(printLevel(levels[i]));
420         }
421         UnicodeString als("Actual   levels:   ");
422         for(i=0; i<actualCount; ++i) {
423             als.append((UChar)0x20).append(printLevel(actualLevels[i]));
424         }
425         errln(els);
426         errln(als);
427     }
428     return isOk;
429 }
430 
431 // Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS);
432 // does not work for custom BiDi class assignments
433 // and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here.
434 // Therefore we just skip the indexes for BiDi controls while comparing
435 // with the expected ordering that has them omitted.
checkOrdering(UBiDi * ubidi,const char * paraLevelName)436 UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi, const char *paraLevelName) {
437     UBool isOk=TRUE;
438     IcuTestErrorCode errorCode(*this, "TestBidiTest/checkOrdering()");
439     int32_t resultLength=ubidi_getResultLength(ubidi);  // visual length including BiDi controls
440     int32_t i, visualIndex;
441     // Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun()
442     // and loop over each run's indexes, but that seems unnecessary for this test code.
443     for(i=visualIndex=0; i<resultLength; ++i) {
444         int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
445         if(errorCode.logIfFailureAndReset("ubidi_getLogicalIndex()")) {
446             errln("Input line %d: %s", (int)lineNumber, line);
447             return FALSE;
448         }
449         if(levels[logicalIndex]>=UBIDI_DEFAULT_LTR) {
450             continue;  // BiDi control, omitted from expected ordering.
451         }
452         if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) {
453             errln("Wrong ordering value at visual index %d; expected %d actual %d",
454                   (int)visualIndex, ordering[visualIndex], logicalIndex);
455             isOk=FALSE;
456             break;
457         }
458         ++visualIndex;
459     }
460     // visualIndex is now the visual length minus the BiDi controls,
461     // which should match the length of the BidiTest.txt ordering.
462     if(isOk && orderingCount!=visualIndex) {
463         errln("Wrong number of ordering values; expected %d actual %d",
464               (int)orderingCount, (int)visualIndex);
465         isOk=FALSE;
466     }
467     if(!isOk) {
468         printErrorLine(paraLevelName);
469         UnicodeString eord("Expected ordering: ");
470         for(i=0; i<orderingCount; ++i) {
471             eord.append((UChar)0x20).append((UChar)(0x30+ordering[i]));
472         }
473         UnicodeString aord("Actual   ordering: ");
474         for(i=0; i<resultLength; ++i) {
475             int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
476             if(levels[logicalIndex]<UBIDI_DEFAULT_LTR) {
477                 aord.append((UChar)0x20).append((UChar)(0x30+logicalIndex));
478             }
479         }
480         errln(eord);
481         errln(aord);
482     }
483     return isOk;
484 }
485 
printErrorLine(const char * paraLevelName)486 void BiDiConformanceTest::printErrorLine(const char *paraLevelName) {
487     ++errorCount;
488     errln("Input line %5d:   %s", (int)lineNumber, line);
489     errln(UnicodeString("Input string:       ")+inputString);
490     errln("Para level:         %s", paraLevelName);
491 }
492