• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5  *******************************************************************************
6  * Copyright (C) 2014, International Business Machines Corporation and         *
7  * others. All Rights Reserved.                                                *
8  *******************************************************************************
9  */
10 package ohos.global.icu.text;
11 
12 import java.text.CharacterIterator;
13 
14 import ohos.global.icu.impl.CharacterIteration;
15 
16 abstract class DictionaryBreakEngine implements LanguageBreakEngine {
17 
18     /* Helper class for improving readability of the Thai/Lao/Khmer word break
19      * algorithm.
20      */
21     static class PossibleWord {
22         // List size, limited by the maximum number of words in the dictionary
23         // that form a nested sequence.
24         private final static int POSSIBLE_WORD_LIST_MAX = 20;
25         //list of word candidate lengths, in increasing length order
26         private int lengths[];
27         private int count[];    // Count of candidates
28         private int prefix;     // The longest match with a dictionary word
29         private int offset;     // Offset in the text of these candidates
30         private int mark;       // The preferred candidate's offset
31         private int current;    // The candidate we're currently looking at
32 
33         // Default constructor
PossibleWord()34         public PossibleWord() {
35             lengths = new int[POSSIBLE_WORD_LIST_MAX];
36             count = new int[1]; // count needs to be an array of 1 so that it can be pass as reference
37             offset = -1;
38         }
39 
40         // Fill the list of candidates if needed, select the longest, and return the number found
candidates(CharacterIterator fIter, DictionaryMatcher dict, int rangeEnd)41         public int candidates(CharacterIterator fIter, DictionaryMatcher dict, int rangeEnd) {
42             int start = fIter.getIndex();
43             if (start != offset) {
44                 offset = start;
45                 prefix = dict.matches(fIter, rangeEnd - start, lengths, count, lengths.length);
46                 // Dictionary leaves text after longest prefix, not longest word. Back up.
47                 if (count[0] <= 0) {
48                     fIter.setIndex(start);
49                 }
50             }
51             if (count[0] > 0) {
52                 fIter.setIndex(start + lengths[count[0]-1]);
53             }
54             current = count[0] - 1;
55             mark = current;
56             return count[0];
57         }
58 
59         // Select the currently marked candidate, point after it in the text, and invalidate self
acceptMarked(CharacterIterator fIter)60         public int acceptMarked(CharacterIterator fIter) {
61             fIter.setIndex(offset + lengths[mark]);
62             return lengths[mark];
63         }
64 
65         // Backup from the current candidate to the next shorter one; return true if that exists
66         // and point the text after it
backUp(CharacterIterator fIter)67         public boolean backUp(CharacterIterator fIter) {
68             if (current > 0) {
69                 fIter.setIndex(offset + lengths[--current]);
70                 return true;
71             }
72             return false;
73         }
74 
75         // Return the longest prefix this candidate location shares with a dictionary word
longestPrefix()76         public int longestPrefix() {
77             return prefix;
78         }
79 
80         // Mark the current candidate as the one we like
markCurrent()81         public void markCurrent() {
82             mark = current;
83         }
84     }
85 
86     /**
87      *  A deque-like structure holding raw ints.
88      *  Partial, limited implementation, only what is needed by the dictionary implementation.
89      *  For internal use only.
90      * @hide draft / provisional / internal are hidden on OHOS
91      */
92     static class DequeI implements Cloneable {
93         private int[] data = new int[50];
94         private int lastIdx = 4;   // or base of stack. Index of element.
95         private int firstIdx = 4;  // or Top of Stack. Index of element + 1.
96 
97         @Override
clone()98         public Object clone() throws CloneNotSupportedException {
99             DequeI result = (DequeI)super.clone();
100             result.data = data.clone();
101             return result;
102         }
103 
size()104         int size() {
105             return firstIdx - lastIdx;
106         }
107 
isEmpty()108         boolean isEmpty() {
109             return size() == 0;
110         }
111 
grow()112         private void grow() {
113             int[] newData = new int[data.length * 2];
114             System.arraycopy(data,  0,  newData,  0, data.length);
115             data = newData;
116         }
117 
offer(int v)118         void offer(int v) {
119             // Note that the actual use cases of offer() add at most one element.
120             //   We make no attempt to handle more than a few.
121             assert lastIdx > 0;
122             data[--lastIdx] = v;
123         }
124 
push(int v)125         void push(int v) {
126             if (firstIdx >= data.length) {
127                 grow();
128             }
129             data[firstIdx++] = v;
130         }
131 
pop()132         int pop() {
133             assert size() > 0;
134             return data[--firstIdx];
135         }
136 
peek()137         int peek() {
138             assert size() > 0;
139             return data[firstIdx - 1];
140         }
141 
peekLast()142         int peekLast() {
143             assert size() > 0;
144             return data[lastIdx];
145         }
146 
pollLast()147         int pollLast() {
148             assert size() > 0;
149             return data[lastIdx++];
150         }
151 
contains(int v)152         boolean contains(int v) {
153             for (int i=lastIdx; i< firstIdx; i++) {
154                 if (data[i] == v) {
155                     return true;
156                 }
157             }
158             return false;
159         }
160 
elementAt(int i)161         int elementAt(int i) {
162             assert i < size();
163             return data[lastIdx + i];
164         }
165 
166         void removeAllElements() {
167             lastIdx = firstIdx = 4;
168         }
169     }
170 
171     UnicodeSet fSet = new UnicodeSet();
172 
173     /**
174      *  Constructor
175      */
176     public DictionaryBreakEngine() {
177     }
178 
179     @Override
180     public boolean handles(int c) {
181         return fSet.contains(c);        // we recognize the character
182     }
183 
184     @Override
185     public int findBreaks(CharacterIterator text, int startPos, int endPos,
186             DequeI foundBreaks) {
187         int result = 0;
188 
189          // Find the span of characters included in the set.
190          //   The span to break begins at the current position int the text, and
191          //   extends towards the start or end of the text, depending on 'reverse'.
192 
193         int start = text.getIndex();
194         int current;
195         int rangeStart;
196         int rangeEnd;
197         int c = CharacterIteration.current32(text);
198         while ((current = text.getIndex()) < endPos && fSet.contains(c)) {
199             CharacterIteration.next32(text);
200             c = CharacterIteration.current32(text);
201         }
202         rangeStart = start;
203         rangeEnd = current;
204 
205         result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
206         text.setIndex(current);
207 
208         return result;
209     }
210 
211     void setCharacters(UnicodeSet set) {
212         fSet = new UnicodeSet(set);
213         fSet.compact();
214     }
215 
216     /**
217      * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
218      *
219      * @param text A UText representing the text
220      * @param rangeStart The start of the range of dictionary characters
221      * @param rangeEnd The end of the range of dictionary characters
222      * @param foundBreaks Output of break positions. Positions are pushed.
223      *                    Pre-existing contents of the output stack are unaltered.
224      * @return The number of breaks found
225      */
226      abstract int divideUpDictionaryRange(CharacterIterator text,
227                                           int               rangeStart,
228                                           int               rangeEnd,
229                                           DequeI            foundBreaks );
230 }
231