1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.lang3; 18 19 /** 20 * Operations on {@link CharSequence} that are 21 * {@code null} safe. 22 * 23 * @see CharSequence 24 * @since 3.0 25 */ 26 public class CharSequenceUtils { 27 28 private static final int NOT_FOUND = -1; 29 30 static final int TO_STRING_LIMIT = 16; 31 checkLaterThan1(final CharSequence cs, final CharSequence searchChar, final int len2, final int start1)32 private static boolean checkLaterThan1(final CharSequence cs, final CharSequence searchChar, final int len2, final int start1) { 33 for (int i = 1, j = len2 - 1; i <= j; i++, j--) { 34 if (cs.charAt(start1 + i) != searchChar.charAt(i) || cs.charAt(start1 + j) != searchChar.charAt(j)) { 35 return false; 36 } 37 } 38 return true; 39 } 40 41 /** 42 * Used by the indexOf(CharSequence methods) as a green implementation of indexOf. 43 * 44 * @param cs the {@link CharSequence} to be processed 45 * @param searchChar the {@link CharSequence} to be searched for 46 * @param start the start index 47 * @return the index where the search sequence was found 48 */ indexOf(final CharSequence cs, final CharSequence searchChar, final int start)49 static int indexOf(final CharSequence cs, final CharSequence searchChar, final int start) { 50 if (cs instanceof String) { 51 return ((String) cs).indexOf(searchChar.toString(), start); 52 } 53 if (cs instanceof StringBuilder) { 54 return ((StringBuilder) cs).indexOf(searchChar.toString(), start); 55 } 56 if (cs instanceof StringBuffer) { 57 return ((StringBuffer) cs).indexOf(searchChar.toString(), start); 58 } 59 return cs.toString().indexOf(searchChar.toString(), start); 60 // if (cs instanceof String && searchChar instanceof String) { 61 // // TODO: Do we assume searchChar is usually relatively small; 62 // // If so then calling toString() on it is better than reverting to 63 // // the green implementation in the else block 64 // return ((String) cs).indexOf((String) searchChar, start); 65 // } else { 66 // // TODO: Implement rather than convert to String 67 // return cs.toString().indexOf(searchChar.toString(), start); 68 // } 69 } 70 71 /** 72 * Returns the index within {@code cs} of the first occurrence of the 73 * specified character, starting the search at the specified index. 74 * <p> 75 * If a character with value {@code searchChar} occurs in the 76 * character sequence represented by the {@code cs} 77 * object at an index no smaller than {@code start}, then 78 * the index of the first such occurrence is returned. For values 79 * of {@code searchChar} in the range from 0 to 0xFFFF (inclusive), 80 * this is the smallest value <i>k</i> such that: 81 * </p> 82 * <blockquote><pre> 83 * (this.charAt(<i>k</i>) == searchChar) && (<i>k</i> >= start) 84 * </pre></blockquote> 85 * is true. For other values of {@code searchChar}, it is the 86 * smallest value <i>k</i> such that: 87 * <blockquote><pre> 88 * (this.codePointAt(<i>k</i>) == searchChar) && (<i>k</i> >= start) 89 * </pre></blockquote> 90 * <p> 91 * is true. In either case, if no such character occurs inm {@code cs} 92 * at or after position {@code start}, then 93 * {@code -1} is returned. 94 * </p> 95 * <p> 96 * There is no restriction on the value of {@code start}. If it 97 * is negative, it has the same effect as if it were zero: the entire 98 * {@link CharSequence} may be searched. If it is greater than 99 * the length of {@code cs}, it has the same effect as if it were 100 * equal to the length of {@code cs}: {@code -1} is returned. 101 * </p> 102 * <p>All indices are specified in {@code char} values 103 * (Unicode code units). 104 * </p> 105 * 106 * @param cs the {@link CharSequence} to be processed, not null 107 * @param searchChar the char to be searched for 108 * @param start the start index, negative starts at the string start 109 * @return the index where the search char was found, -1 if not found 110 * @since 3.6 updated to behave more like {@link String} 111 */ indexOf(final CharSequence cs, final int searchChar, int start)112 static int indexOf(final CharSequence cs, final int searchChar, int start) { 113 if (cs instanceof String) { 114 return ((String) cs).indexOf(searchChar, start); 115 } 116 final int sz = cs.length(); 117 if (start < 0) { 118 start = 0; 119 } 120 if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 121 for (int i = start; i < sz; i++) { 122 if (cs.charAt(i) == searchChar) { 123 return i; 124 } 125 } 126 return NOT_FOUND; 127 } 128 //supplementary characters (LANG1300) 129 if (searchChar <= Character.MAX_CODE_POINT) { 130 final char[] chars = Character.toChars(searchChar); 131 for (int i = start; i < sz - 1; i++) { 132 final char high = cs.charAt(i); 133 final char low = cs.charAt(i + 1); 134 if (high == chars[0] && low == chars[1]) { 135 return i; 136 } 137 } 138 } 139 return NOT_FOUND; 140 } 141 142 /** 143 * Used by the lastIndexOf(CharSequence methods) as a green implementation of lastIndexOf 144 * 145 * @param cs the {@link CharSequence} to be processed 146 * @param searchChar the {@link CharSequence} to find 147 * @param start the start index 148 * @return the index where the search sequence was found 149 */ lastIndexOf(final CharSequence cs, final CharSequence searchChar, int start)150 static int lastIndexOf(final CharSequence cs, final CharSequence searchChar, int start) { 151 if (searchChar == null || cs == null) { 152 return NOT_FOUND; 153 } 154 if (searchChar instanceof String) { 155 if (cs instanceof String) { 156 return ((String) cs).lastIndexOf((String) searchChar, start); 157 } 158 if (cs instanceof StringBuilder) { 159 return ((StringBuilder) cs).lastIndexOf((String) searchChar, start); 160 } 161 if (cs instanceof StringBuffer) { 162 return ((StringBuffer) cs).lastIndexOf((String) searchChar, start); 163 } 164 } 165 166 final int len1 = cs.length(); 167 final int len2 = searchChar.length(); 168 169 if (start > len1) { 170 start = len1; 171 } 172 173 if (start < 0 || len2 > len1) { 174 return NOT_FOUND; 175 } 176 177 if (len2 == 0) { 178 return start; 179 } 180 181 if (len2 <= TO_STRING_LIMIT) { 182 if (cs instanceof String) { 183 return ((String) cs).lastIndexOf(searchChar.toString(), start); 184 } 185 if (cs instanceof StringBuilder) { 186 return ((StringBuilder) cs).lastIndexOf(searchChar.toString(), start); 187 } 188 if (cs instanceof StringBuffer) { 189 return ((StringBuffer) cs).lastIndexOf(searchChar.toString(), start); 190 } 191 } 192 193 if (start + len2 > len1) { 194 start = len1 - len2; 195 } 196 197 final char char0 = searchChar.charAt(0); 198 199 int i = start; 200 while (true) { 201 while (cs.charAt(i) != char0) { 202 i--; 203 if (i < 0) { 204 return NOT_FOUND; 205 } 206 } 207 if (checkLaterThan1(cs, searchChar, len2, i)) { 208 return i; 209 } 210 i--; 211 if (i < 0) { 212 return NOT_FOUND; 213 } 214 } 215 } 216 217 /** 218 * Returns the index within {@code cs} of the last occurrence of 219 * the specified character, searching backward starting at the 220 * specified index. For values of {@code searchChar} in the range 221 * from 0 to 0xFFFF (inclusive), the index returned is the largest 222 * value <i>k</i> such that: 223 * <blockquote><pre> 224 * (this.charAt(<i>k</i>) == searchChar) && (<i>k</i> <= start) 225 * </pre></blockquote> 226 * is true. For other values of {@code searchChar}, it is the 227 * largest value <i>k</i> such that: 228 * <blockquote><pre> 229 * (this.codePointAt(<i>k</i>) == searchChar) && (<i>k</i> <= start) 230 * </pre></blockquote> 231 * is true. In either case, if no such character occurs in {@code cs} 232 * at or before position {@code start}, then {@code -1} is returned. 233 * 234 * <p> 235 * All indices are specified in {@code char} values 236 * (Unicode code units). 237 * </p> 238 * 239 * @param cs the {@link CharSequence} to be processed 240 * @param searchChar the char to be searched for 241 * @param start the start index, negative returns -1, beyond length starts at end 242 * @return the index where the search char was found, -1 if not found 243 * @since 3.6 updated to behave more like {@link String} 244 */ lastIndexOf(final CharSequence cs, final int searchChar, int start)245 static int lastIndexOf(final CharSequence cs, final int searchChar, int start) { 246 if (cs instanceof String) { 247 return ((String) cs).lastIndexOf(searchChar, start); 248 } 249 final int sz = cs.length(); 250 if (start < 0) { 251 return NOT_FOUND; 252 } 253 if (start >= sz) { 254 start = sz - 1; 255 } 256 if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 257 for (int i = start; i >= 0; --i) { 258 if (cs.charAt(i) == searchChar) { 259 return i; 260 } 261 } 262 return NOT_FOUND; 263 } 264 //supplementary characters (LANG1300) 265 //NOTE - we must do a forward traversal for this to avoid duplicating code points 266 if (searchChar <= Character.MAX_CODE_POINT) { 267 final char[] chars = Character.toChars(searchChar); 268 //make sure it's not the last index 269 if (start == sz - 1) { 270 return NOT_FOUND; 271 } 272 for (int i = start; i >= 0; i--) { 273 final char high = cs.charAt(i); 274 final char low = cs.charAt(i + 1); 275 if (chars[0] == high && chars[1] == low) { 276 return i; 277 } 278 } 279 } 280 return NOT_FOUND; 281 } 282 283 /** 284 * Green implementation of regionMatches. 285 * 286 * @param cs the {@link CharSequence} to be processed 287 * @param ignoreCase whether or not to be case-insensitive 288 * @param thisStart the index to start on the {@code cs} CharSequence 289 * @param substring the {@link CharSequence} to be looked for 290 * @param start the index to start on the {@code substring} CharSequence 291 * @param length character length of the region 292 * @return whether the region matched 293 */ regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart, final CharSequence substring, final int start, final int length)294 static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart, 295 final CharSequence substring, final int start, final int length) { 296 if (cs instanceof String && substring instanceof String) { 297 return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length); 298 } 299 int index1 = thisStart; 300 int index2 = start; 301 int tmpLen = length; 302 303 // Extract these first so we detect NPEs the same as the java.lang.String version 304 final int srcLen = cs.length() - thisStart; 305 final int otherLen = substring.length() - start; 306 307 // Check for invalid parameters 308 if (thisStart < 0 || start < 0 || length < 0) { 309 return false; 310 } 311 312 // Check that the regions are long enough 313 if (srcLen < length || otherLen < length) { 314 return false; 315 } 316 317 while (tmpLen-- > 0) { 318 final char c1 = cs.charAt(index1++); 319 final char c2 = substring.charAt(index2++); 320 321 if (c1 == c2) { 322 continue; 323 } 324 325 if (!ignoreCase) { 326 return false; 327 } 328 329 // The real same check as in String.regionMatches(): 330 final char u1 = Character.toUpperCase(c1); 331 final char u2 = Character.toUpperCase(c2); 332 if (u1 != u2 && Character.toLowerCase(u1) != Character.toLowerCase(u2)) { 333 return false; 334 } 335 } 336 337 return true; 338 } 339 340 /** 341 * Returns a new {@link CharSequence} that is a subsequence of this 342 * sequence starting with the {@code char} value at the specified index. 343 * 344 * <p>This provides the {@link CharSequence} equivalent to {@link String#substring(int)}. 345 * The length (in {@code char}) of the returned sequence is {@code length() - start}, 346 * so if {@code start == end} then an empty sequence is returned.</p> 347 * 348 * @param cs the specified subsequence, null returns null 349 * @param start the start index, inclusive, valid 350 * @return a new subsequence, may be null 351 * @throws IndexOutOfBoundsException if {@code start} is negative or if 352 * {@code start} is greater than {@code length()} 353 */ subSequence(final CharSequence cs, final int start)354 public static CharSequence subSequence(final CharSequence cs, final int start) { 355 return cs == null ? null : cs.subSequence(start, cs.length()); 356 } 357 358 /** 359 * Converts the given CharSequence to a char[]. 360 * 361 * @param source the {@link CharSequence} to be processed. 362 * @return the resulting char array, never null. 363 * @since 3.11 364 */ toCharArray(final CharSequence source)365 public static char[] toCharArray(final CharSequence source) { 366 final int len = StringUtils.length(source); 367 if (len == 0) { 368 return ArrayUtils.EMPTY_CHAR_ARRAY; 369 } 370 if (source instanceof String) { 371 return ((String) source).toCharArray(); 372 } 373 final char[] array = new char[len]; 374 for (int i = 0; i < len; i++) { 375 array[i] = source.charAt(i); 376 } 377 return array; 378 } 379 380 /** 381 * {@link CharSequenceUtils} instances should NOT be constructed in 382 * standard programming. 383 * 384 * <p>This constructor is public to permit tools that require a JavaBean 385 * instance to operate.</p> 386 */ CharSequenceUtils()387 public CharSequenceUtils() { 388 } 389 } 390