• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 //
4 //  rbbisetb.cpp
5 //
6 /*
7 ***************************************************************************
8 *   Copyright (C) 2002-2008 International Business Machines Corporation   *
9 *   and others. All rights reserved.                                      *
10 ***************************************************************************
11 */
12 //
13 //  RBBISetBuilder   Handles processing of Unicode Sets from RBBI rules
14 //                   (part of the rule building process.)
15 //
16 //      Starting with the rules parse tree from the scanner,
17 //
18 //                   -  Enumerate the set of UnicodeSets that are referenced
19 //                      by the RBBI rules.
20 //                   -  compute a set of non-overlapping character ranges
21 //                      with all characters within a range belonging to the same
22 //                      set of input uniocde sets.
23 //                   -  Derive a set of non-overlapping UnicodeSet (like things)
24 //                      that will correspond to columns in the state table for
25 //                      the RBBI execution engine.  All characters within one
26 //                      of these sets belong to the same set of the original
27 //                      UnicodeSets from the user's rules.
28 //                   -  construct the trie table that maps input characters
29 //                      to the index of the matching non-overlapping set of set from
30 //                      the previous step.
31 //
32 
33 #include "unicode/utypes.h"
34 
35 #if !UCONFIG_NO_BREAK_ITERATION
36 
37 #include "unicode/uniset.h"
38 #include "utrie.h"
39 #include "uvector.h"
40 #include "uassert.h"
41 #include "cmemory.h"
42 #include "cstring.h"
43 
44 #include "rbbisetb.h"
45 #include "rbbinode.h"
46 
47 
48 //------------------------------------------------------------------------
49 //
50 //   getFoldedRBBIValue        Call-back function used during building of Trie table.
51 //                             Folding value: just store the offset (16 bits)
52 //                             if there is any non-0 entry.
53 //                             (It'd really be nice if the Trie builder would provide a
54 //                             simple default, so this function could go away from here.)
55 //
56 //------------------------------------------------------------------------
57 /* folding value: just store the offset (16 bits) if there is any non-0 entry */
58 U_CDECL_BEGIN
59 static uint32_t U_CALLCONV
getFoldedRBBIValue(UNewTrie * trie,UChar32 start,int32_t offset)60 getFoldedRBBIValue(UNewTrie *trie, UChar32 start, int32_t offset) {
61     uint32_t value;
62     UChar32 limit;
63     UBool inBlockZero;
64 
65     limit=start+0x400;
66     while(start<limit) {
67         value=utrie_get32(trie, start, &inBlockZero);
68         if(inBlockZero) {
69             start+=UTRIE_DATA_BLOCK_LENGTH;
70         } else if(value!=0) {
71             return (uint32_t)(offset|0x8000);
72         } else {
73             ++start;
74         }
75     }
76     return 0;
77 }
78 
79 
80 U_CDECL_END
81 
82 
83 
84 U_NAMESPACE_BEGIN
85 
86 //------------------------------------------------------------------------
87 //
88 //   Constructor
89 //
90 //------------------------------------------------------------------------
RBBISetBuilder(RBBIRuleBuilder * rb)91 RBBISetBuilder::RBBISetBuilder(RBBIRuleBuilder *rb)
92 {
93     fRB             = rb;
94     fStatus         = rb->fStatus;
95     fRangeList      = 0;
96     fTrie           = 0;
97     fTrieSize       = 0;
98     fGroupCount     = 0;
99     fSawBOF         = FALSE;
100 }
101 
102 
103 //------------------------------------------------------------------------
104 //
105 //   Destructor
106 //
107 //------------------------------------------------------------------------
~RBBISetBuilder()108 RBBISetBuilder::~RBBISetBuilder()
109 {
110     RangeDescriptor   *nextRangeDesc;
111 
112     // Walk through & delete the linked list of RangeDescriptors
113     for (nextRangeDesc = fRangeList; nextRangeDesc!=NULL;) {
114         RangeDescriptor *r = nextRangeDesc;
115         nextRangeDesc      = r->fNext;
116         delete r;
117     }
118 
119     utrie_close(fTrie);
120 }
121 
122 
123 
124 
125 //------------------------------------------------------------------------
126 //
127 //   build          Build the list of non-overlapping character ranges
128 //                  from the Unicode Sets.
129 //
130 //------------------------------------------------------------------------
build()131 void RBBISetBuilder::build() {
132     RBBINode        *usetNode;
133     RangeDescriptor *rlRange;
134 
135     if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "usets")) {printSets();}
136 
137     //
138     //  Initialize the process by creating a single range encompassing all characters
139     //  that is in no sets.
140     //
141     fRangeList                = new RangeDescriptor(*fStatus); // will check for status here
142     if (fRangeList == NULL) {
143         *fStatus = U_MEMORY_ALLOCATION_ERROR;
144         return;
145     }
146     fRangeList->fStartChar    = 0;
147     fRangeList->fEndChar      = 0x10ffff;
148 
149     if (U_FAILURE(*fStatus)) {
150         return;
151     }
152 
153     //
154     //  Find the set of non-overlapping ranges of characters
155     //
156     int  ni;
157     for (ni=0; ; ni++) {        // Loop over each of the UnicodeSets encountered in the input rules
158         usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni);
159         if (usetNode==NULL) {
160             break;
161         }
162 
163         UnicodeSet      *inputSet             = usetNode->fInputSet;
164         int32_t          inputSetRangeCount   = inputSet->getRangeCount();
165         int              inputSetRangeIndex   = 0;
166                          rlRange              = fRangeList;
167 
168         for (;;) {
169             if (inputSetRangeIndex >= inputSetRangeCount) {
170                 break;
171             }
172             UChar32      inputSetRangeBegin  = inputSet->getRangeStart(inputSetRangeIndex);
173             UChar32      inputSetRangeEnd    = inputSet->getRangeEnd(inputSetRangeIndex);
174 
175             // skip over ranges from the range list that are completely
176             //   below the current range from the input unicode set.
177             while (rlRange->fEndChar < inputSetRangeBegin) {
178                 rlRange = rlRange->fNext;
179             }
180 
181             // If the start of the range from the range list is before with
182             //   the start of the range from the unicode set, split the range list range
183             //   in two, with one part being before (wholly outside of) the unicode set
184             //   and the other containing the rest.
185             //   Then continue the loop; the post-split current range will then be skipped
186             //     over
187             if (rlRange->fStartChar < inputSetRangeBegin) {
188                 rlRange->split(inputSetRangeBegin, *fStatus);
189                 if (U_FAILURE(*fStatus)) {
190                     return;
191                 }
192                 continue;
193             }
194 
195             // Same thing at the end of the ranges...
196             // If the end of the range from the range list doesn't coincide with
197             //   the end of the range from the unicode set, split the range list
198             //   range in two.  The first part of the split range will be
199             //   wholly inside the Unicode set.
200             if (rlRange->fEndChar > inputSetRangeEnd) {
201                 rlRange->split(inputSetRangeEnd+1, *fStatus);
202                 if (U_FAILURE(*fStatus)) {
203                     return;
204                 }
205             }
206 
207             // The current rlRange is now entirely within the UnicodeSet range.
208             // Add this unicode set to the list of sets for this rlRange
209             if (rlRange->fIncludesSets->indexOf(usetNode) == -1) {
210                 rlRange->fIncludesSets->addElement(usetNode, *fStatus);
211                 if (U_FAILURE(*fStatus)) {
212                     return;
213                 }
214             }
215 
216             // Advance over ranges that we are finished with.
217             if (inputSetRangeEnd == rlRange->fEndChar) {
218                 inputSetRangeIndex++;
219             }
220             rlRange = rlRange->fNext;
221         }
222     }
223 
224     if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "range")) { printRanges();}
225 
226     //
227     //  Group the above ranges, with each group consisting of one or more
228     //    ranges that are in exactly the same set of original UnicodeSets.
229     //    The groups are numbered, and these group numbers are the set of
230     //    input symbols recognized by the run-time state machine.
231     //
232     //    Numbering: # 0  (state table column 0) is unused.
233     //               # 1  is reserved - table column 1 is for end-of-input
234     //               # 2  is reserved - table column 2 is for beginning-in-input
235     //               # 3  is the first range list.
236     //
237     RangeDescriptor *rlSearchRange;
238     for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
239         for (rlSearchRange=fRangeList; rlSearchRange != rlRange; rlSearchRange=rlSearchRange->fNext) {
240             if (rlRange->fIncludesSets->equals(*rlSearchRange->fIncludesSets)) {
241                 rlRange->fNum = rlSearchRange->fNum;
242                 break;
243             }
244         }
245         if (rlRange->fNum == 0) {
246             fGroupCount ++;
247             rlRange->fNum = fGroupCount+2;
248             rlRange->setDictionaryFlag();
249             addValToSets(rlRange->fIncludesSets, fGroupCount+2);
250         }
251     }
252 
253     // Handle input sets that contain the special string {eof}.
254     //   Column 1 of the state table is reserved for EOF on input.
255     //   Column 2 is reserved for before-the-start-input.
256     //            (This column can be optimized away later if there are no rule
257     //             references to {bof}.)
258     //   Add this column value (1 or 2) to the equivalent expression
259     //     subtree for each UnicodeSet that contains the string {eof}
260     //   Because {bof} and {eof} are not a characters in the normal sense,
261     //   they doesn't affect the computation of ranges or TRIE.
262     static const UChar eofUString[] = {0x65, 0x6f, 0x66, 0};
263     static const UChar bofUString[] = {0x62, 0x6f, 0x66, 0};
264 
265     UnicodeString eofString(eofUString);
266     UnicodeString bofString(bofUString);
267     for (ni=0; ; ni++) {        // Loop over each of the UnicodeSets encountered in the input rules
268         usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni);
269         if (usetNode==NULL) {
270             break;
271         }
272         UnicodeSet      *inputSet = usetNode->fInputSet;
273         if (inputSet->contains(eofString)) {
274             addValToSet(usetNode, 1);
275         }
276         if (inputSet->contains(bofString)) {
277             addValToSet(usetNode, 2);
278             fSawBOF = TRUE;
279         }
280     }
281 
282 
283     if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rgroup")) {printRangeGroups();}
284     if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "esets")) {printSets();}
285 
286     //
287     // Build the Trie table for mapping UChar32 values to the corresponding
288     //   range group number
289     //
290     fTrie = utrie_open(NULL,    //  Pre-existing trie to be filled in
291                       NULL,    //  Data array  (utrie will allocate one)
292                       100000,  //  Max Data Length
293                       0,       //  Initial value for all code points
294                       0,       //  Lead surrogate unit value
295                       TRUE);   //  Keep Latin 1 in separately
296 
297 
298     for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
299         utrie_setRange32(fTrie, rlRange->fStartChar, rlRange->fEndChar+1, rlRange->fNum, TRUE);
300     }
301 }
302 
303 
304 
305 //-----------------------------------------------------------------------------------
306 //
307 //  getTrieSize()    Return the size that will be required to serialize the Trie.
308 //
309 //-----------------------------------------------------------------------------------
getTrieSize()310 int32_t RBBISetBuilder::getTrieSize() /*const*/ {
311     fTrieSize  = utrie_serialize(fTrie,
312                                     NULL,                // Buffer
313                                     0,                   // Capacity
314                                     getFoldedRBBIValue,
315                                     TRUE,                // Reduce to 16 bits
316                                     fStatus);
317     // RBBIDebugPrintf("Trie table size is %d\n", trieSize);
318     return fTrieSize;
319 }
320 
321 
322 //-----------------------------------------------------------------------------------
323 //
324 //  serializeTrie()   Put the serialized trie at the specified address.
325 //                    Trust the caller to have given us enough memory.
326 //                    getTrieSize() MUST be called first.
327 //
328 //-----------------------------------------------------------------------------------
serializeTrie(uint8_t * where)329 void RBBISetBuilder::serializeTrie(uint8_t *where) {
330     utrie_serialize(fTrie,
331                     where,                   // Buffer
332                     fTrieSize,               // Capacity
333                     getFoldedRBBIValue,
334                     TRUE,                    // Reduce to 16 bits
335                     fStatus);
336 }
337 
338 //------------------------------------------------------------------------
339 //
340 //  addValToSets     Add a runtime-mapped input value to each uset from a
341 //                   list of uset nodes. (val corresponds to a state table column.)
342 //                   For each of the original Unicode sets - which correspond
343 //                   directly to uset nodes - a logically equivalent expression
344 //                   is constructed in terms of the remapped runtime input
345 //                   symbol set.  This function adds one runtime input symbol to
346 //                   a list of sets.
347 //
348 //                   The "logically equivalent expression" is the tree for an
349 //                   or-ing together of all of the symbols that go into the set.
350 //
351 //------------------------------------------------------------------------
addValToSets(UVector * sets,uint32_t val)352 void  RBBISetBuilder::addValToSets(UVector *sets, uint32_t val) {
353     int32_t       ix;
354 
355     for (ix=0; ix<sets->size(); ix++) {
356         RBBINode *usetNode = (RBBINode *)sets->elementAt(ix);
357         addValToSet(usetNode, val);
358     }
359 }
360 
addValToSet(RBBINode * usetNode,uint32_t val)361 void  RBBISetBuilder::addValToSet(RBBINode *usetNode, uint32_t val) {
362     RBBINode *leafNode = new RBBINode(RBBINode::leafChar);
363     if (leafNode == NULL) {
364         *fStatus = U_MEMORY_ALLOCATION_ERROR;
365         return;
366     }
367     leafNode->fVal = (unsigned short)val;
368     if (usetNode->fLeftChild == NULL) {
369         usetNode->fLeftChild = leafNode;
370         leafNode->fParent    = usetNode;
371     } else {
372         // There are already input symbols present for this set.
373         // Set up an OR node, with the previous stuff as the left child
374         //   and the new value as the right child.
375         RBBINode *orNode = new RBBINode(RBBINode::opOr);
376         if (orNode == NULL) {
377             *fStatus = U_MEMORY_ALLOCATION_ERROR;
378             return;
379         }
380         orNode->fLeftChild  = usetNode->fLeftChild;
381         orNode->fRightChild = leafNode;
382         orNode->fLeftChild->fParent  = orNode;
383         orNode->fRightChild->fParent = orNode;
384         usetNode->fLeftChild = orNode;
385         orNode->fParent = usetNode;
386     }
387 }
388 
389 
390 //------------------------------------------------------------------------
391 //
392 //   getNumCharCategories
393 //
394 //------------------------------------------------------------------------
getNumCharCategories() const395 int32_t  RBBISetBuilder::getNumCharCategories() const {
396     return fGroupCount + 3;
397 }
398 
399 
400 //------------------------------------------------------------------------
401 //
402 //   sawBOF
403 //
404 //------------------------------------------------------------------------
sawBOF() const405 UBool  RBBISetBuilder::sawBOF() const {
406     return fSawBOF;
407 }
408 
409 
410 //------------------------------------------------------------------------
411 //
412 //   getFirstChar      Given a runtime RBBI character category, find
413 //                     the first UChar32 that is in the set of chars
414 //                     in the category.
415 //------------------------------------------------------------------------
getFirstChar(int32_t category) const416 UChar32  RBBISetBuilder::getFirstChar(int32_t category) const {
417     RangeDescriptor   *rlRange;
418     UChar32            retVal = (UChar32)-1;
419     for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
420         if (rlRange->fNum == category) {
421             retVal = rlRange->fStartChar;
422             break;
423         }
424     }
425     return retVal;
426 }
427 
428 
429 
430 //------------------------------------------------------------------------
431 //
432 //   printRanges        A debugging function.
433 //                      dump out all of the range definitions.
434 //
435 //------------------------------------------------------------------------
436 #ifdef RBBI_DEBUG
printRanges()437 void RBBISetBuilder::printRanges() {
438     RangeDescriptor       *rlRange;
439     int                    i;
440 
441     RBBIDebugPrintf("\n\n Nonoverlapping Ranges ...\n");
442     for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
443         RBBIDebugPrintf("%2i  %4x-%4x  ", rlRange->fNum, rlRange->fStartChar, rlRange->fEndChar);
444 
445         for (i=0; i<rlRange->fIncludesSets->size(); i++) {
446             RBBINode       *usetNode    = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
447             UnicodeString   setName = UNICODE_STRING("anon", 4);
448             RBBINode       *setRef = usetNode->fParent;
449             if (setRef != NULL) {
450                 RBBINode *varRef = setRef->fParent;
451                 if (varRef != NULL  &&  varRef->fType == RBBINode::varRef) {
452                     setName = varRef->fText;
453                 }
454             }
455             RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf("  ");
456         }
457         RBBIDebugPrintf("\n");
458     }
459 }
460 #endif
461 
462 
463 //------------------------------------------------------------------------
464 //
465 //   printRangeGroups     A debugging function.
466 //                        dump out all of the range groups.
467 //
468 //------------------------------------------------------------------------
469 #ifdef RBBI_DEBUG
printRangeGroups()470 void RBBISetBuilder::printRangeGroups() {
471     RangeDescriptor       *rlRange;
472     RangeDescriptor       *tRange;
473     int                    i;
474     int                    lastPrintedGroupNum = 0;
475 
476     RBBIDebugPrintf("\nRanges grouped by Unicode Set Membership...\n");
477     for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
478         int groupNum = rlRange->fNum & 0xbfff;
479         if (groupNum > lastPrintedGroupNum) {
480             lastPrintedGroupNum = groupNum;
481             RBBIDebugPrintf("%2i  ", groupNum);
482 
483             if (rlRange->fNum & 0x4000) { RBBIDebugPrintf(" <DICT> ");}
484 
485             for (i=0; i<rlRange->fIncludesSets->size(); i++) {
486                 RBBINode       *usetNode    = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
487                 UnicodeString   setName = UNICODE_STRING("anon", 4);
488                 RBBINode       *setRef = usetNode->fParent;
489                 if (setRef != NULL) {
490                     RBBINode *varRef = setRef->fParent;
491                     if (varRef != NULL  &&  varRef->fType == RBBINode::varRef) {
492                         setName = varRef->fText;
493                     }
494                 }
495                 RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf(" ");
496             }
497 
498             i = 0;
499             for (tRange = rlRange; tRange != 0; tRange = tRange->fNext) {
500                 if (tRange->fNum == rlRange->fNum) {
501                     if (i++ % 5 == 0) {
502                         RBBIDebugPrintf("\n    ");
503                     }
504                     RBBIDebugPrintf("  %05x-%05x", tRange->fStartChar, tRange->fEndChar);
505                 }
506             }
507             RBBIDebugPrintf("\n");
508         }
509     }
510     RBBIDebugPrintf("\n");
511 }
512 #endif
513 
514 
515 //------------------------------------------------------------------------
516 //
517 //   printSets          A debugging function.
518 //                      dump out all of the set definitions.
519 //
520 //------------------------------------------------------------------------
521 #ifdef RBBI_DEBUG
printSets()522 void RBBISetBuilder::printSets() {
523     int                   i;
524 
525     RBBIDebugPrintf("\n\nUnicode Sets List\n------------------\n");
526     for (i=0; ; i++) {
527         RBBINode        *usetNode;
528         RBBINode        *setRef;
529         RBBINode        *varRef;
530         UnicodeString    setName;
531 
532         usetNode = (RBBINode *)fRB->fUSetNodes->elementAt(i);
533         if (usetNode == NULL) {
534             break;
535         }
536 
537         RBBIDebugPrintf("%3d    ", i);
538         setName = UNICODE_STRING("anonymous", 9);
539         setRef = usetNode->fParent;
540         if (setRef != NULL) {
541             varRef = setRef->fParent;
542             if (varRef != NULL  &&  varRef->fType == RBBINode::varRef) {
543                 setName = varRef->fText;
544             }
545         }
546         RBBI_DEBUG_printUnicodeString(setName);
547         RBBIDebugPrintf("   ");
548         RBBI_DEBUG_printUnicodeString(usetNode->fText);
549         RBBIDebugPrintf("\n");
550         if (usetNode->fLeftChild != NULL) {
551             RBBINode::printTree(usetNode->fLeftChild, TRUE);
552         }
553     }
554     RBBIDebugPrintf("\n");
555 }
556 #endif
557 
558 
559 
560 //-------------------------------------------------------------------------------------
561 //
562 //  RangeDescriptor copy constructor
563 //
564 //-------------------------------------------------------------------------------------
565 
RangeDescriptor(const RangeDescriptor & other,UErrorCode & status)566 RangeDescriptor::RangeDescriptor(const RangeDescriptor &other, UErrorCode &status) {
567     int  i;
568 
569     this->fStartChar    = other.fStartChar;
570     this->fEndChar      = other.fEndChar;
571     this->fNum          = other.fNum;
572     this->fNext         = NULL;
573     UErrorCode oldstatus = status;
574     this->fIncludesSets = new UVector(status);
575     if (U_FAILURE(oldstatus)) {
576         status = oldstatus;
577     }
578     if (U_FAILURE(status)) {
579         return;
580     }
581     /* test for NULL */
582     if (this->fIncludesSets == 0) {
583         status = U_MEMORY_ALLOCATION_ERROR;
584         return;
585     }
586 
587     for (i=0; i<other.fIncludesSets->size(); i++) {
588         this->fIncludesSets->addElement(other.fIncludesSets->elementAt(i), status);
589     }
590 }
591 
592 
593 //-------------------------------------------------------------------------------------
594 //
595 //  RangeDesriptor default constructor
596 //
597 //-------------------------------------------------------------------------------------
RangeDescriptor(UErrorCode & status)598 RangeDescriptor::RangeDescriptor(UErrorCode &status) {
599     this->fStartChar    = 0;
600     this->fEndChar      = 0;
601     this->fNum          = 0;
602     this->fNext         = NULL;
603     UErrorCode oldstatus = status;
604     this->fIncludesSets = new UVector(status);
605     if (U_FAILURE(oldstatus)) {
606         status = oldstatus;
607     }
608     if (U_FAILURE(status)) {
609         return;
610     }
611     /* test for NULL */
612     if(this->fIncludesSets == 0) {
613         status = U_MEMORY_ALLOCATION_ERROR;
614         return;
615     }
616 
617 }
618 
619 
620 //-------------------------------------------------------------------------------------
621 //
622 //  RangeDesriptor Destructor
623 //
624 //-------------------------------------------------------------------------------------
~RangeDescriptor()625 RangeDescriptor::~RangeDescriptor() {
626     delete  fIncludesSets;
627     fIncludesSets = NULL;
628 }
629 
630 //-------------------------------------------------------------------------------------
631 //
632 //  RangeDesriptor::split()
633 //
634 //-------------------------------------------------------------------------------------
split(UChar32 where,UErrorCode & status)635 void RangeDescriptor::split(UChar32 where, UErrorCode &status) {
636     U_ASSERT(where>fStartChar && where<=fEndChar);
637     RangeDescriptor *nr = new RangeDescriptor(*this, status);
638     if(nr == 0) {
639         status = U_MEMORY_ALLOCATION_ERROR;
640         return;
641     }
642     if (U_FAILURE(status)) {
643         delete nr;
644         return;
645     }
646     //  RangeDescriptor copy constructor copies all fields.
647     //  Only need to update those that are different after the split.
648     nr->fStartChar = where;
649     this->fEndChar = where-1;
650     nr->fNext      = this->fNext;
651     this->fNext    = nr;
652 }
653 
654 
655 //-------------------------------------------------------------------------------------
656 //
657 //   RangeDescriptor::setDictionaryFlag
658 //
659 //            Character Category Numbers that include characters from
660 //            the original Unicode Set named "dictionary" have bit 14
661 //            set to 1.  The RBBI runtime engine uses this to trigger
662 //            use of the word dictionary.
663 //
664 //            This function looks through the Unicode Sets that it
665 //            (the range) includes, and sets the bit in fNum when
666 //            "dictionary" is among them.
667 //
668 //            TODO:  a faster way would be to find the set node for
669 //                   "dictionary" just once, rather than looking it
670 //                   up by name every time.
671 //
672 //-------------------------------------------------------------------------------------
setDictionaryFlag()673 void RangeDescriptor::setDictionaryFlag() {
674     int i;
675 
676     for (i=0; i<this->fIncludesSets->size(); i++) {
677         RBBINode       *usetNode    = (RBBINode *)fIncludesSets->elementAt(i);
678         UnicodeString   setName;
679         RBBINode       *setRef = usetNode->fParent;
680         if (setRef != NULL) {
681             RBBINode *varRef = setRef->fParent;
682             if (varRef != NULL  &&  varRef->fType == RBBINode::varRef) {
683                 setName = varRef->fText;
684             }
685         }
686         if (setName.compare(UNICODE_STRING("dictionary", 10)) == 0) {   // TODO:  no string literals.
687             this->fNum |= 0x4000;
688             break;
689         }
690     }
691 }
692 
693 
694 
695 U_NAMESPACE_END
696 
697 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
698