• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 package org.apache.commons.lang3;
18 
19 /**
20  * Operations on {@link CharSequence} that are
21  * {@code null} safe.
22  *
23  * @see CharSequence
24  * @since 3.0
25  */
26 public class CharSequenceUtils {
27 
28     private static final int NOT_FOUND = -1;
29 
30     static final int TO_STRING_LIMIT = 16;
31 
checkLaterThan1(final CharSequence cs, final CharSequence searchChar, final int len2, final int start1)32     private static boolean checkLaterThan1(final CharSequence cs, final CharSequence searchChar, final int len2, final int start1) {
33         for (int i = 1, j = len2 - 1; i <= j; i++, j--) {
34             if (cs.charAt(start1 + i) != searchChar.charAt(i) || cs.charAt(start1 + j) != searchChar.charAt(j)) {
35                 return false;
36             }
37         }
38         return true;
39     }
40 
41     /**
42      * Used by the indexOf(CharSequence methods) as a green implementation of indexOf.
43      *
44      * @param cs the {@link CharSequence} to be processed
45      * @param searchChar the {@link CharSequence} to be searched for
46      * @param start the start index
47      * @return the index where the search sequence was found
48      */
indexOf(final CharSequence cs, final CharSequence searchChar, final int start)49     static int indexOf(final CharSequence cs, final CharSequence searchChar, final int start) {
50         if (cs instanceof String) {
51             return ((String) cs).indexOf(searchChar.toString(), start);
52         }
53         if (cs instanceof StringBuilder) {
54             return ((StringBuilder) cs).indexOf(searchChar.toString(), start);
55         }
56         if (cs instanceof StringBuffer) {
57             return ((StringBuffer) cs).indexOf(searchChar.toString(), start);
58         }
59         return cs.toString().indexOf(searchChar.toString(), start);
60 //        if (cs instanceof String && searchChar instanceof String) {
61 //            // TODO: Do we assume searchChar is usually relatively small;
62 //            //       If so then calling toString() on it is better than reverting to
63 //            //       the green implementation in the else block
64 //            return ((String) cs).indexOf((String) searchChar, start);
65 //        } else {
66 //            // TODO: Implement rather than convert to String
67 //            return cs.toString().indexOf(searchChar.toString(), start);
68 //        }
69     }
70 
71     /**
72      * Returns the index within {@code cs} of the first occurrence of the
73      * specified character, starting the search at the specified index.
74      * <p>
75      * If a character with value {@code searchChar} occurs in the
76      * character sequence represented by the {@code cs}
77      * object at an index no smaller than {@code start}, then
78      * the index of the first such occurrence is returned. For values
79      * of {@code searchChar} in the range from 0 to 0xFFFF (inclusive),
80      * this is the smallest value <i>k</i> such that:
81      * </p>
82      * <blockquote><pre>
83      * (this.charAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &gt;= start)
84      * </pre></blockquote>
85      * is true. For other values of {@code searchChar}, it is the
86      * smallest value <i>k</i> such that:
87      * <blockquote><pre>
88      * (this.codePointAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &gt;= start)
89      * </pre></blockquote>
90      * <p>
91      * is true. In either case, if no such character occurs inm {@code cs}
92      * at or after position {@code start}, then
93      * {@code -1} is returned.
94      * </p>
95      * <p>
96      * There is no restriction on the value of {@code start}. If it
97      * is negative, it has the same effect as if it were zero: the entire
98      * {@link CharSequence} may be searched. If it is greater than
99      * the length of {@code cs}, it has the same effect as if it were
100      * equal to the length of {@code cs}: {@code -1} is returned.
101      * </p>
102      * <p>All indices are specified in {@code char} values
103      * (Unicode code units).
104      * </p>
105      *
106      * @param cs  the {@link CharSequence} to be processed, not null
107      * @param searchChar  the char to be searched for
108      * @param start  the start index, negative starts at the string start
109      * @return the index where the search char was found, -1 if not found
110      * @since 3.6 updated to behave more like {@link String}
111      */
indexOf(final CharSequence cs, final int searchChar, int start)112     static int indexOf(final CharSequence cs, final int searchChar, int start) {
113         if (cs instanceof String) {
114             return ((String) cs).indexOf(searchChar, start);
115         }
116         final int sz = cs.length();
117         if (start < 0) {
118             start = 0;
119         }
120         if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
121             for (int i = start; i < sz; i++) {
122                 if (cs.charAt(i) == searchChar) {
123                     return i;
124                 }
125             }
126             return NOT_FOUND;
127         }
128         //supplementary characters (LANG1300)
129         if (searchChar <= Character.MAX_CODE_POINT) {
130             final char[] chars = Character.toChars(searchChar);
131             for (int i = start; i < sz - 1; i++) {
132                 final char high = cs.charAt(i);
133                 final char low = cs.charAt(i + 1);
134                 if (high == chars[0] && low == chars[1]) {
135                     return i;
136                 }
137             }
138         }
139         return NOT_FOUND;
140     }
141 
142     /**
143      * Used by the lastIndexOf(CharSequence methods) as a green implementation of lastIndexOf
144      *
145      * @param cs the {@link CharSequence} to be processed
146      * @param searchChar the {@link CharSequence} to find
147      * @param start the start index
148      * @return the index where the search sequence was found
149      */
lastIndexOf(final CharSequence cs, final CharSequence searchChar, int start)150     static int lastIndexOf(final CharSequence cs, final CharSequence searchChar, int start) {
151         if (searchChar == null || cs == null) {
152             return NOT_FOUND;
153         }
154         if (searchChar instanceof String) {
155             if (cs instanceof String) {
156                 return ((String) cs).lastIndexOf((String) searchChar, start);
157             }
158             if (cs instanceof StringBuilder) {
159                 return ((StringBuilder) cs).lastIndexOf((String) searchChar, start);
160             }
161             if (cs instanceof StringBuffer) {
162                 return ((StringBuffer) cs).lastIndexOf((String) searchChar, start);
163             }
164         }
165 
166         final int len1 = cs.length();
167         final int len2 = searchChar.length();
168 
169         if (start > len1) {
170             start = len1;
171         }
172 
173         if (start < 0 || len2 > len1) {
174             return NOT_FOUND;
175         }
176 
177         if (len2 == 0) {
178             return start;
179         }
180 
181         if (len2 <= TO_STRING_LIMIT) {
182             if (cs instanceof String) {
183                 return ((String) cs).lastIndexOf(searchChar.toString(), start);
184             }
185             if (cs instanceof StringBuilder) {
186                 return ((StringBuilder) cs).lastIndexOf(searchChar.toString(), start);
187             }
188             if (cs instanceof StringBuffer) {
189                 return ((StringBuffer) cs).lastIndexOf(searchChar.toString(), start);
190             }
191         }
192 
193         if (start + len2 > len1) {
194             start = len1 - len2;
195         }
196 
197         final char char0 = searchChar.charAt(0);
198 
199         int i = start;
200         while (true) {
201             while (cs.charAt(i) != char0) {
202                 i--;
203                 if (i < 0) {
204                     return NOT_FOUND;
205                 }
206             }
207             if (checkLaterThan1(cs, searchChar, len2, i)) {
208                 return i;
209             }
210             i--;
211             if (i < 0) {
212                 return NOT_FOUND;
213             }
214         }
215     }
216 
217     /**
218      * Returns the index within {@code cs} of the last occurrence of
219      * the specified character, searching backward starting at the
220      * specified index. For values of {@code searchChar} in the range
221      * from 0 to 0xFFFF (inclusive), the index returned is the largest
222      * value <i>k</i> such that:
223      * <blockquote><pre>
224      * (this.charAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &lt;= start)
225      * </pre></blockquote>
226      * is true. For other values of {@code searchChar}, it is the
227      * largest value <i>k</i> such that:
228      * <blockquote><pre>
229      * (this.codePointAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &lt;= start)
230      * </pre></blockquote>
231      * is true. In either case, if no such character occurs in {@code cs}
232      * at or before position {@code start}, then {@code -1} is returned.
233      *
234      * <p>
235      * All indices are specified in {@code char} values
236      * (Unicode code units).
237      * </p>
238      *
239      * @param cs  the {@link CharSequence} to be processed
240      * @param searchChar  the char to be searched for
241      * @param start  the start index, negative returns -1, beyond length starts at end
242      * @return the index where the search char was found, -1 if not found
243      * @since 3.6 updated to behave more like {@link String}
244      */
lastIndexOf(final CharSequence cs, final int searchChar, int start)245     static int lastIndexOf(final CharSequence cs, final int searchChar, int start) {
246         if (cs instanceof String) {
247             return ((String) cs).lastIndexOf(searchChar, start);
248         }
249         final int sz = cs.length();
250         if (start < 0) {
251             return NOT_FOUND;
252         }
253         if (start >= sz) {
254             start = sz - 1;
255         }
256         if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
257             for (int i = start; i >= 0; --i) {
258                 if (cs.charAt(i) == searchChar) {
259                     return i;
260                 }
261             }
262             return NOT_FOUND;
263         }
264         //supplementary characters (LANG1300)
265         //NOTE - we must do a forward traversal for this to avoid duplicating code points
266         if (searchChar <= Character.MAX_CODE_POINT) {
267             final char[] chars = Character.toChars(searchChar);
268             //make sure it's not the last index
269             if (start == sz - 1) {
270                 return NOT_FOUND;
271             }
272             for (int i = start; i >= 0; i--) {
273                 final char high = cs.charAt(i);
274                 final char low = cs.charAt(i + 1);
275                 if (chars[0] == high && chars[1] == low) {
276                     return i;
277                 }
278             }
279         }
280         return NOT_FOUND;
281     }
282 
283     /**
284      * Green implementation of regionMatches.
285      *
286      * @param cs the {@link CharSequence} to be processed
287      * @param ignoreCase whether or not to be case-insensitive
288      * @param thisStart the index to start on the {@code cs} CharSequence
289      * @param substring the {@link CharSequence} to be looked for
290      * @param start the index to start on the {@code substring} CharSequence
291      * @param length character length of the region
292      * @return whether the region matched
293      */
regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart, final CharSequence substring, final int start, final int length)294     static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart,
295             final CharSequence substring, final int start, final int length)    {
296         if (cs instanceof String && substring instanceof String) {
297             return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length);
298         }
299         int index1 = thisStart;
300         int index2 = start;
301         int tmpLen = length;
302 
303         // Extract these first so we detect NPEs the same as the java.lang.String version
304         final int srcLen = cs.length() - thisStart;
305         final int otherLen = substring.length() - start;
306 
307         // Check for invalid parameters
308         if (thisStart < 0 || start < 0 || length < 0) {
309             return false;
310         }
311 
312         // Check that the regions are long enough
313         if (srcLen < length || otherLen < length) {
314             return false;
315         }
316 
317         while (tmpLen-- > 0) {
318             final char c1 = cs.charAt(index1++);
319             final char c2 = substring.charAt(index2++);
320 
321             if (c1 == c2) {
322                 continue;
323             }
324 
325             if (!ignoreCase) {
326                 return false;
327             }
328 
329             // The real same check as in String.regionMatches():
330             final char u1 = Character.toUpperCase(c1);
331             final char u2 = Character.toUpperCase(c2);
332             if (u1 != u2 && Character.toLowerCase(u1) != Character.toLowerCase(u2)) {
333                 return false;
334             }
335         }
336 
337         return true;
338     }
339 
340     /**
341      * Returns a new {@link CharSequence} that is a subsequence of this
342      * sequence starting with the {@code char} value at the specified index.
343      *
344      * <p>This provides the {@link CharSequence} equivalent to {@link String#substring(int)}.
345      * The length (in {@code char}) of the returned sequence is {@code length() - start},
346      * so if {@code start == end} then an empty sequence is returned.</p>
347      *
348      * @param cs  the specified subsequence, null returns null
349      * @param start  the start index, inclusive, valid
350      * @return a new subsequence, may be null
351      * @throws IndexOutOfBoundsException if {@code start} is negative or if
352      *  {@code start} is greater than {@code length()}
353      */
subSequence(final CharSequence cs, final int start)354     public static CharSequence subSequence(final CharSequence cs, final int start) {
355         return cs == null ? null : cs.subSequence(start, cs.length());
356     }
357 
358     /**
359      * Converts the given CharSequence to a char[].
360      *
361      * @param source the {@link CharSequence} to be processed.
362      * @return the resulting char array, never null.
363      * @since 3.11
364      */
toCharArray(final CharSequence source)365     public static char[] toCharArray(final CharSequence source) {
366         final int len = StringUtils.length(source);
367         if (len == 0) {
368             return ArrayUtils.EMPTY_CHAR_ARRAY;
369         }
370         if (source instanceof String) {
371             return ((String) source).toCharArray();
372         }
373         final char[] array = new char[len];
374         for (int i = 0; i < len; i++) {
375             array[i] = source.charAt(i);
376         }
377         return array;
378     }
379 
380     /**
381      * {@link CharSequenceUtils} instances should NOT be constructed in
382      * standard programming.
383      *
384      * <p>This constructor is public to permit tools that require a JavaBean
385      * instance to operate.</p>
386      */
CharSequenceUtils()387     public CharSequenceUtils() {
388     }
389 }
390