1 /* 2 * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.util.NoSuchElementException; 29 import java.util.Objects; 30 import java.util.PrimitiveIterator; 31 import java.util.Spliterator; 32 import java.util.Spliterators; 33 import java.util.function.IntConsumer; 34 import java.util.stream.IntStream; 35 import java.util.stream.StreamSupport; 36 37 /** 38 * A {@code CharSequence} is a readable sequence of {@code char} values. This 39 * interface provides uniform, read-only access to many different kinds of 40 * {@code char} sequences. 41 * A {@code char} value represents a character in the <i>Basic 42 * Multilingual Plane (BMP)</i> or a surrogate. Refer to <a 43 * href="Character.html#unicode">Unicode Character Representation</a> for details. 44 * 45 * <p> This interface does not refine the general contracts of the {@link 46 * java.lang.Object#equals(java.lang.Object) equals} and {@link 47 * java.lang.Object#hashCode() hashCode} methods. The result of testing two objects 48 * that implement {@code CharSequence} for equality is therefore, in general, undefined. 49 * Each object may be implemented by a different class, and there 50 * is no guarantee that each class will be capable of testing its instances 51 * for equality with those of the other. It is therefore inappropriate to use 52 * arbitrary {@code CharSequence} instances as elements in a set or as keys in 53 * a map. </p> 54 * 55 * @author Mike McCloskey 56 * @since 1.4 57 * @spec JSR-51 58 */ 59 60 public interface CharSequence { 61 62 /** 63 * Returns the length of this character sequence. The length is the number 64 * of 16-bit {@code char}s in the sequence. 65 * 66 * @return the number of {@code char}s in this sequence 67 */ length()68 int length(); 69 70 /** 71 * Returns the {@code char} value at the specified index. An index ranges from zero 72 * to {@code length() - 1}. The first {@code char} value of the sequence is at 73 * index zero, the next at index one, and so on, as for array 74 * indexing. 75 * 76 * <p>If the {@code char} value specified by the index is a 77 * <a href="{@docRoot}/java.base/java/lang/Character.html#unicode">surrogate</a>, the surrogate 78 * value is returned. 79 * 80 * @param index the index of the {@code char} value to be returned 81 * 82 * @return the specified {@code char} value 83 * 84 * @throws IndexOutOfBoundsException 85 * if the {@code index} argument is negative or not less than 86 * {@code length()} 87 */ charAt(int index)88 char charAt(int index); 89 90 /** 91 * Returns a {@code CharSequence} that is a subsequence of this sequence. 92 * The subsequence starts with the {@code char} value at the specified index and 93 * ends with the {@code char} value at index {@code end - 1}. The length 94 * (in {@code char}s) of the 95 * returned sequence is {@code end - start}, so if {@code start == end} 96 * then an empty sequence is returned. 97 * 98 * @param start the start index, inclusive 99 * @param end the end index, exclusive 100 * 101 * @return the specified subsequence 102 * 103 * @throws IndexOutOfBoundsException 104 * if {@code start} or {@code end} are negative, 105 * if {@code end} is greater than {@code length()}, 106 * or if {@code start} is greater than {@code end} 107 */ subSequence(int start, int end)108 CharSequence subSequence(int start, int end); 109 110 /** 111 * Returns a string containing the characters in this sequence in the same 112 * order as this sequence. The length of the string will be the length of 113 * this sequence. 114 * 115 * @return a string consisting of exactly this sequence of characters 116 */ toString()117 public String toString(); 118 119 /** 120 * Returns a stream of {@code int} zero-extending the {@code char} values 121 * from this sequence. Any char which maps to a <a 122 * href="{@docRoot}/java.base/java/lang/Character.html#unicode">surrogate code 123 * point</a> is passed through uninterpreted. 124 * 125 * <p>The stream binds to this sequence when the terminal stream operation 126 * commences (specifically, for mutable sequences the spliterator for the 127 * stream is <a href="../util/Spliterator.html#binding"><em>late-binding</em></a>). 128 * If the sequence is modified during that operation then the result is 129 * undefined. 130 * 131 * @return an IntStream of char values from this sequence 132 * @since 1.8 133 */ chars()134 public default IntStream chars() { 135 class CharIterator implements PrimitiveIterator.OfInt { 136 int cur = 0; 137 138 public boolean hasNext() { 139 return cur < length(); 140 } 141 142 public int nextInt() { 143 if (hasNext()) { 144 return charAt(cur++); 145 } else { 146 throw new NoSuchElementException(); 147 } 148 } 149 150 @Override 151 public void forEachRemaining(IntConsumer block) { 152 for (; cur < length(); cur++) { 153 block.accept(charAt(cur)); 154 } 155 } 156 } 157 158 return StreamSupport.intStream(() -> 159 Spliterators.spliterator( 160 new CharIterator(), 161 length(), 162 Spliterator.ORDERED), 163 Spliterator.SUBSIZED | Spliterator.SIZED | Spliterator.ORDERED, 164 false); 165 } 166 167 /** 168 * Returns a stream of code point values from this sequence. Any surrogate 169 * pairs encountered in the sequence are combined as if by {@linkplain 170 * Character#toCodePoint Character.toCodePoint} and the result is passed 171 * to the stream. Any other code units, including ordinary BMP characters, 172 * unpaired surrogates, and undefined code units, are zero-extended to 173 * {@code int} values which are then passed to the stream. 174 * 175 * <p>The stream binds to this sequence when the terminal stream operation 176 * commences (specifically, for mutable sequences the spliterator for the 177 * stream is <a href="../util/Spliterator.html#binding"><em>late-binding</em></a>). 178 * If the sequence is modified during that operation then the result is 179 * undefined. 180 * 181 * @return an IntStream of Unicode code points from this sequence 182 * @since 1.8 183 */ codePoints()184 public default IntStream codePoints() { 185 class CodePointIterator implements PrimitiveIterator.OfInt { 186 int cur = 0; 187 188 @Override 189 public void forEachRemaining(IntConsumer block) { 190 final int length = length(); 191 int i = cur; 192 try { 193 while (i < length) { 194 char c1 = charAt(i++); 195 if (!Character.isHighSurrogate(c1) || i >= length) { 196 block.accept(c1); 197 } else { 198 char c2 = charAt(i); 199 if (Character.isLowSurrogate(c2)) { 200 i++; 201 block.accept(Character.toCodePoint(c1, c2)); 202 } else { 203 block.accept(c1); 204 } 205 } 206 } 207 } finally { 208 cur = i; 209 } 210 } 211 212 public boolean hasNext() { 213 return cur < length(); 214 } 215 216 public int nextInt() { 217 final int length = length(); 218 219 if (cur >= length) { 220 throw new NoSuchElementException(); 221 } 222 char c1 = charAt(cur++); 223 if (Character.isHighSurrogate(c1) && cur < length) { 224 char c2 = charAt(cur); 225 if (Character.isLowSurrogate(c2)) { 226 cur++; 227 return Character.toCodePoint(c1, c2); 228 } 229 } 230 return c1; 231 } 232 } 233 234 return StreamSupport.intStream(() -> 235 Spliterators.spliteratorUnknownSize( 236 new CodePointIterator(), 237 Spliterator.ORDERED), 238 Spliterator.ORDERED, 239 false); 240 } 241 242 /** 243 * Compares two {@code CharSequence} instances lexicographically. Returns a 244 * negative value, zero, or a positive value if the first sequence is lexicographically 245 * less than, equal to, or greater than the second, respectively. 246 * 247 * <p> 248 * The lexicographical ordering of {@code CharSequence} is defined as follows. 249 * Consider a {@code CharSequence} <i>cs</i> of length <i>len</i> to be a 250 * sequence of char values, <i>cs[0]</i> to <i>cs[len-1]</i>. Suppose <i>k</i> 251 * is the lowest index at which the corresponding char values from each sequence 252 * differ. The lexicographic ordering of the sequences is determined by a numeric 253 * comparison of the char values <i>cs1[k]</i> with <i>cs2[k]</i>. If there is 254 * no such index <i>k</i>, the shorter sequence is considered lexicographically 255 * less than the other. If the sequences have the same length, the sequences are 256 * considered lexicographically equal. 257 * 258 * 259 * @param cs1 the first {@code CharSequence} 260 * @param cs2 the second {@code CharSequence} 261 * 262 * @return the value {@code 0} if the two {@code CharSequence} are equal; 263 * a negative integer if the first {@code CharSequence} 264 * is lexicographically less than the second; or a 265 * positive integer if the first {@code CharSequence} is 266 * lexicographically greater than the second. 267 * 268 * @since 11 269 */ 270 @SuppressWarnings("unchecked") compare(CharSequence cs1, CharSequence cs2)271 public static int compare(CharSequence cs1, CharSequence cs2) { 272 if (Objects.requireNonNull(cs1) == Objects.requireNonNull(cs2)) { 273 return 0; 274 } 275 276 if (cs1.getClass() == cs2.getClass() && cs1 instanceof Comparable) { 277 return ((Comparable<Object>) cs1).compareTo(cs2); 278 } 279 280 for (int i = 0, len = Math.min(cs1.length(), cs2.length()); i < len; i++) { 281 char a = cs1.charAt(i); 282 char b = cs2.charAt(i); 283 if (a != b) { 284 return a - b; 285 } 286 } 287 288 return cs1.length() - cs2.length(); 289 } 290 291 } 292