• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*************************************************************************
4  * Copyright (c) 1999-2016, International Business Machines
5  * Corporation and others. All Rights Reserved.
6  *************************************************************************
7  *   Date        Name        Description
8  *   12/15/99    Madhu        Creation.
9  *   01/12/2000  Madhu        Updated for changed API and added new tests
10  ************************************************************************/
11 
12 
13 #ifndef RBBITEST_H
14 #define RBBITEST_H
15 
16 #include "unicode/utypes.h"
17 
18 #if !UCONFIG_NO_BREAK_ITERATION
19 
20 #include <memory>
21 
22 #include "intltest.h"
23 #include "unicode/brkiter.h"
24 #include "unicode/rbbi.h"
25 #include "unicode/uscript.h"
26 
27 class  Enumeration;
28 class  BITestData;
29 struct TestParams;
30 class  RBBIMonkeyKind;
31 
32 U_NAMESPACE_BEGIN
33 class  UVector32;
34 U_NAMESPACE_END
35 
36 /**
37  * Test the RuleBasedBreakIterator class giving different rules
38  */
39 class RBBITest: public IntlTest {
40 public:
41 
42     RBBITest();
43     virtual ~RBBITest();
44 
45     void runIndexedTest( int32_t index, UBool exec, const char* &name, char* par = nullptr ) override;
46 
47     void TestGetAvailableLocales();
48     void TestGetDisplayName();
49     void TestEndBehaviour();
50     void TestBug4153072();
51     void TestJapaneseLineBreak();
52     void TestThaiLineBreak();
53     void TestMixedThaiLineBreak();
54     void TestMaiyamok();
55     void TestMonkey();
56 
57     void TestExtended();
58     void executeTest(TestParams *, UErrorCode &status);
59 
60     void TestWordBreaks();
61     void TestWordBoundary();
62     void TestLineBreaks();
63     void TestSentBreaks();
64     void TestBug3818();
65     void TestJapaneseWordBreak();
66     void TestTrieDict();
67     void TestUnicodeFiles();
68     void TestBug5775();
69     void TestTailoredBreaks();
70     void TestDictRules();
71     void TestBug5532();
72     void TestBug9983();
73     void TestBug7547();
74     void TestBug12797();
75     void TestBug12918();
76     void TestBug12932();
77     void TestEmoji();
78     void TestBug12519();
79     void TestBug12677();
80     void TestTableRedundancies();
81     void TestBug13447();
82     void TestReverse();
83     void TestReverse(std::unique_ptr<RuleBasedBreakIterator>bi);
84     void TestBug13692();
85     void TestDebugRules();
86     void TestUnpairedSurrogate();
87 
88     void TestDebug();
89     void TestProperties();
90     void Test8BitsTrieWith8BitStateTable();
91     void Test8BitsTrieWith16BitStateTable();
92     void Test16BitsTrieWith8BitStateTable();
93     void Test16BitsTrieWith16BitStateTable();
94     void TestTable_8_16_Bits();
95     void TestBug13590();
96     void TestLSTMThai();
97     void TestLSTMBurmese();
98     void TestRandomAccess();
99     void TestExternalBreakEngineWithFakeTaiLe();
100     void TestExternalBreakEngineWithFakeYue();
101 
102 #if U_ENABLE_TRACING
103     void TestTraceCreateCharacter();
104     void TestTraceCreateWord();
105     void TestTraceCreateSentence();
106     void TestTraceCreateTitle();
107     void TestTraceCreateLine();
108     void TestTraceCreateLineNormal();
109     void TestTraceCreateLineStrict();
110     void TestTraceCreateLineLoose();
111     void TestTraceCreateLineNormalPhrase();
112     void TestTraceCreateLineLoosePhrase();
113     void TestTraceCreateLineStrictPhrase();
114     void TestTraceCreateLinePhrase();
115     void TestTraceCreateBreakEngine();
116 #endif
117 
118 /***********************/
119 private:
120     /**
121      * internal methods to prepare test data
122      **/
123 
124     void RunMonkey(BreakIterator *bi, RBBIMonkeyKind &mk, const char *name, uint32_t  seed,
125         int32_t loopCount, UBool useUText);
126 
127     // Run one of the Unicode Consortium boundary test data files.
128     void runUnicodeTestData(const char *fileName, RuleBasedBreakIterator *bi);
129 
130     // Run tests from one of the LSTM test files.
131     void runLSTMTestFromFile(const char* filename, UScriptCode script);
132 
133     // Run a single test case from one of the Unicode Consortium test files.
134     void checkUnicodeTestCase(const char *testFileName, int lineNumber,
135                          const UnicodeString &testString,
136                          UVector32 *breakPositions,
137                          RuleBasedBreakIterator *bi);
138 
139     // Run the actual tests for TestTailoredBreaks()
140     void TBTest(BreakIterator* brkitr, int type, const char *locale, const char* escapedText,
141                 const int32_t *expectOffsets, int32_t expectOffsetsCount);
142 
143     /** Filter for test cases from the Unicode test data files.
144      *  Some need to be skipped because ICU is unable to fully implement the
145      *  Unicode boundary specifications.
146      *  @param testCase the test data string.
147      *  @param fileName the Unicode test data file name.
148      *  @return false if the test case should be run, true if it should be skipped.
149      */
150     UBool testCaseIsKnownIssue(const UnicodeString &testCase, const char *fileName);
151 
152     // Test parameters, from the test framework and test invocation.
153     const char* fTestParams;
154 
155     // Helper functions to test different trie bit sizes and state table bit sizes.
156     void testTrieStateTable(int32_t numChar, bool expectedTrieWidthIn8Bits, bool expectedStateRowIn8Bits);
157 
158 #if U_ENABLE_TRACING
159     void assertTestTraceResult(int32_t fnNumber, const char* expectedData);
160 #endif
161 
162 };
163 
164 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
165 
166 #endif
167