• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5 **********************************************************************
6 *   Copyright (c) 2002-2007, International Business Machines Corporation
7 *   and others.  All Rights Reserved.
8 **********************************************************************
9 *   Date        Name        Description
10 *   01/14/2002  aliu        Creation.
11 **********************************************************************
12 */
13 
14 package ohos.global.icu.text;
15 import ohos.global.icu.impl.Utility;
16 
17 /**
18  * A replacer that produces static text as its output.  The text may
19  * contain transliterator stand-in characters that represent nested
20  * UnicodeReplacer objects, making it possible to encode a tree of
21  * replacers in a StringReplacer.  A StringReplacer that contains such
22  * stand-ins is called a <em>complex</em> StringReplacer.  A complex
23  * StringReplacer has a slower processing loop than a non-complex one.
24  * @author Alan Liu
25  */
26 class StringReplacer implements UnicodeReplacer {
27 
28     /**
29      * Output text, possibly containing stand-in characters that
30      * represent nested UnicodeReplacers.
31      */
32     private String output;
33 
34     /**
35      * Cursor position.  Value is ignored if hasCursor is false.
36      */
37     private int cursorPos;
38 
39     /**
40      * True if this object outputs a cursor position.
41      */
42     private boolean hasCursor;
43 
44     /**
45      * A complex object contains nested replacers and requires more
46      * complex processing.  StringReplacers are initially assumed to
47      * be complex.  If no nested replacers are seen during processing,
48      * then isComplex is set to false, and future replacements are
49      * short circuited for better performance.
50      */
51     private boolean isComplex;
52 
53     /**
54      * Object that translates stand-in characters in 'output' to
55      * UnicodeReplacer objects.
56      */
57     private final RuleBasedTransliterator.Data data;
58 
59     /**
60      * Construct a StringReplacer that sets the emits the given output
61      * text and sets the cursor to the given position.
62      * @param theOutput text that will replace input text when the
63      * replace() method is called.  May contain stand-in characters
64      * that represent nested replacers.
65      * @param theCursorPos cursor position that will be returned by
66      * the replace() method
67      * @param theData transliterator context object that translates
68      * stand-in characters to UnicodeReplacer objects
69      */
StringReplacer(String theOutput, int theCursorPos, RuleBasedTransliterator.Data theData)70     public StringReplacer(String theOutput,
71                           int theCursorPos,
72                           RuleBasedTransliterator.Data theData) {
73         output = theOutput;
74         cursorPos = theCursorPos;
75         hasCursor = true;
76         data = theData;
77         isComplex = true;
78     }
79 
80     /**
81      * Construct a StringReplacer that sets the emits the given output
82      * text and does not modify the cursor.
83      * @param theOutput text that will replace input text when the
84      * replace() method is called.  May contain stand-in characters
85      * that represent nested replacers.
86      * @param theData transliterator context object that translates
87      * stand-in characters to UnicodeReplacer objects
88      */
StringReplacer(String theOutput, RuleBasedTransliterator.Data theData)89     public StringReplacer(String theOutput,
90                           RuleBasedTransliterator.Data theData) {
91         output = theOutput;
92         cursorPos = 0;
93         hasCursor = false;
94         data = theData;
95         isComplex = true;
96     }
97 
98 //=    public static UnicodeReplacer valueOf(String output,
99 //=                                          int cursorPos,
100 //=                                          RuleBasedTransliterator.Data data) {
101 //=        if (output.length() == 1) {
102 //=            char c = output.charAt(0);
103 //=            UnicodeReplacer r = data.lookupReplacer(c);
104 //=            if (r != null) {
105 //=                return r;
106 //=            }
107 //=        }
108 //=        return new StringReplacer(output, cursorPos, data);
109 //=    }
110 
111     /**
112      * UnicodeReplacer API
113      */
114     @Override
replace(Replaceable text, int start, int limit, int[] cursor)115     public int replace(Replaceable text,
116                        int start,
117                        int limit,
118                        int[] cursor) {
119         int outLen;
120         int newStart = 0;
121 
122         // NOTE: It should be possible to _always_ run the complex
123         // processing code; just slower.  If not, then there is a bug
124         // in the complex processing code.
125 
126         // Simple (no nested replacers) Processing Code :
127         if (!isComplex) {
128             text.replace(start, limit, output);
129             outLen = output.length();
130 
131             // Setup default cursor position (for cursorPos within output)
132             newStart = cursorPos;
133         }
134 
135         // Complex (nested replacers) Processing Code :
136         else {
137             /* When there are segments to be copied, use the Replaceable.copy()
138              * API in order to retain out-of-band data.  Copy everything to the
139              * end of the string, then copy them back over the key.  This preserves
140              * the integrity of indices into the key and surrounding context while
141              * generating the output text.
142              */
143             StringBuffer buf = new StringBuffer();
144             int oOutput; // offset into 'output'
145             isComplex = false;
146 
147             // The temporary buffer starts at tempStart, and extends
148             // to destLimit + tempExtra.  The start of the buffer has a single
149             // character from before the key.  This provides style
150             // data when addition characters are filled into the
151             // temporary buffer.  If there is nothing to the left, use
152             // the non-character U+FFFF, which Replaceable subclasses
153             // should treat specially as a "no-style character."
154             // destStart points to the point after the style context
155             // character, so it is tempStart+1 or tempStart+2.
156             int tempStart = text.length(); // start of temp buffer
157             int destStart = tempStart; // copy new text to here
158             if (start > 0) {
159                 int len = UTF16.getCharCount(text.char32At(start-1));
160                 text.copy(start-len, start, tempStart);
161                 destStart += len;
162             } else {
163                 text.replace(tempStart, tempStart, "\uFFFF");
164                 destStart++;
165             }
166             int destLimit = destStart;
167             int tempExtra = 0; // temp chars after destLimit
168 
169             for (oOutput=0; oOutput<output.length(); ) {
170                 if (oOutput == cursorPos) {
171                     // Record the position of the cursor
172                     newStart = buf.length() + destLimit - destStart; // relative to start
173                     // the buf.length() was inserted for bug 5789
174                     // the problem is that if we are accumulating into a buffer (when r == null below)
175                     // then the actual length of the text at that point needs to add the buf length.
176                     // there was an alternative suggested in #5789, but that looks like it won't work
177                     // if we have accumulated some stuff in the dest part AND have a non-zero buffer.
178                 }
179                 int c = UTF16.charAt(output, oOutput);
180 
181                 // When we are at the last position copy the right style
182                 // context character into the temporary buffer.  We don't
183                 // do this before because it will provide an incorrect
184                 // right context for previous replace() operations.
185                 int nextIndex = oOutput + UTF16.getCharCount(c);
186                 if (nextIndex == output.length()) {
187                     tempExtra = UTF16.getCharCount(text.char32At(limit));
188                     text.copy(limit, limit+tempExtra, destLimit);
189                 }
190 
191                 UnicodeReplacer r = data.lookupReplacer(c);
192                 if (r == null) {
193                     // Accumulate straight (non-segment) text.
194                     UTF16.append(buf, c);
195                 } else {
196                     isComplex = true;
197 
198                     // Insert any accumulated straight text.
199                     if (buf.length() > 0) {
200                         text.replace(destLimit, destLimit, buf.toString());
201                         destLimit += buf.length();
202                         buf.setLength(0);
203                     }
204 
205                     // Delegate output generation to replacer object
206                     int len = r.replace(text, destLimit, destLimit, cursor);
207                     destLimit += len;
208                 }
209                 oOutput = nextIndex;
210             }
211             // Insert any accumulated straight text.
212             if (buf.length() > 0) {
213                 text.replace(destLimit, destLimit, buf.toString());
214                 destLimit += buf.length();
215             }
216             if (oOutput == cursorPos) {
217                 // Record the position of the cursor
218                 newStart = destLimit - destStart; // relative to start
219             }
220 
221             outLen = destLimit - destStart;
222 
223             // Copy new text to start, and delete it
224             text.copy(destStart, destLimit, start);
225             text.replace(tempStart + outLen, destLimit + tempExtra + outLen, "");
226 
227             // Delete the old text (the key)
228             text.replace(start + outLen, limit + outLen, "");
229         }
230 
231         if (hasCursor) {
232             // Adjust the cursor for positions outside the key.  These
233             // refer to code points rather than code units.  If cursorPos
234             // is within the output string, then use newStart, which has
235             // already been set above.
236             if (cursorPos < 0) {
237                 newStart = start;
238                 int n = cursorPos;
239                 // Outside the output string, cursorPos counts code points
240                 while (n < 0 && newStart > 0) {
241                     newStart -= UTF16.getCharCount(text.char32At(newStart-1));
242                     ++n;
243                 }
244                 newStart += n;
245             } else if (cursorPos > output.length()) {
246                 newStart = start + outLen;
247                 int n = cursorPos - output.length();
248                 // Outside the output string, cursorPos counts code points
249                 while (n > 0 && newStart < text.length()) {
250                     newStart += UTF16.getCharCount(text.char32At(newStart));
251                     --n;
252                 }
253                 newStart += n;
254             } else {
255                 // Cursor is within output string.  It has been set up above
256                 // to be relative to start.
257                 newStart += start;
258             }
259 
260             cursor[0] = newStart;
261         }
262 
263         return outLen;
264     }
265 
266     /**
267      * UnicodeReplacer API
268      */
269     @Override
toReplacerPattern(boolean escapeUnprintable)270     public String toReplacerPattern(boolean escapeUnprintable) {
271         StringBuffer rule = new StringBuffer();
272         StringBuffer quoteBuf = new StringBuffer();
273 
274         int cursor = cursorPos;
275 
276         // Handle a cursor preceding the output
277         if (hasCursor && cursor < 0) {
278             while (cursor++ < 0) {
279                 Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf);
280             }
281             // Fall through and append '|' below
282         }
283 
284         for (int i=0; i<output.length(); ++i) {
285             if (hasCursor && i == cursor) {
286                 Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf);
287             }
288             char c = output.charAt(i); // Ok to use 16-bits here
289 
290             UnicodeReplacer r = data.lookupReplacer(c);
291             if (r == null) {
292                 Utility.appendToRule(rule, c, false, escapeUnprintable, quoteBuf);
293             } else {
294                 StringBuffer buf = new StringBuffer(" ");
295                 buf.append(r.toReplacerPattern(escapeUnprintable));
296                 buf.append(' ');
297                 Utility.appendToRule(rule, buf.toString(),
298                                      true, escapeUnprintable, quoteBuf);
299             }
300         }
301 
302         // Handle a cursor after the output.  Use > rather than >= because
303         // if cursor == output.length() it is at the end of the output,
304         // which is the default position, so we need not emit it.
305         if (hasCursor && cursor > output.length()) {
306             cursor -= output.length();
307             while (cursor-- > 0) {
308                 Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf);
309             }
310             Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf);
311         }
312         // Flush quoteBuf out to result
313         Utility.appendToRule(rule, -1,
314                              true, escapeUnprintable, quoteBuf);
315 
316         return rule.toString();
317     }
318 
319     /**
320      * Union the set of all characters that may output by this object
321      * into the given set.
322      * @param toUnionTo the set into which to union the output characters
323      */
324     @Override
addReplacementSetTo(UnicodeSet toUnionTo)325     public void addReplacementSetTo(UnicodeSet toUnionTo) {
326         int ch;
327         for (int i=0; i<output.length(); i+=UTF16.getCharCount(ch)) {
328             ch = UTF16.charAt(output, i);
329             UnicodeReplacer r = data.lookupReplacer(ch);
330             if (r == null) {
331                 toUnionTo.add(ch);
332             } else {
333                 r.addReplacementSetTo(toUnionTo);
334             }
335         }
336     }
337 }
338 
339 //eof
340