1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2010-2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * UTF16CollationIterator.java, ported from utf16collationiterator.h/.cpp 9 * 10 * C++ version created on: 2010oct27 11 * created by: Markus W. Scherer 12 */ 13 14 package com.ibm.icu.impl.coll; 15 16 /** 17 * UTF-16 collation element and character iterator. 18 * Handles normalized UTF-16 text, with length or NUL-terminated. 19 * Unnormalized text is handled by a subclass. 20 */ 21 public class UTF16CollationIterator extends CollationIterator { 22 /** 23 * Partial constructor, see {@link CollationIterator#CollationIterator(CollationData)}. 24 */ UTF16CollationIterator(CollationData d)25 public UTF16CollationIterator(CollationData d) { 26 super(d); 27 } 28 UTF16CollationIterator(CollationData d, boolean numeric, CharSequence s, int p)29 public UTF16CollationIterator(CollationData d, boolean numeric, CharSequence s, int p) { 30 super(d, numeric); 31 seq = s; 32 start = 0; 33 pos = p; 34 limit = s.length(); 35 } 36 37 @Override equals(Object other)38 public boolean equals(Object other) { 39 if(!super.equals(other)) { return false; } 40 UTF16CollationIterator o = (UTF16CollationIterator)other; 41 // Compare the iterator state but not the text: Assume that the caller does that. 42 return (pos - start) == (o.pos - o.start); 43 } 44 45 @Override hashCode()46 public int hashCode() { 47 assert false : "hashCode not designed"; 48 return 42; // any arbitrary constant will do 49 } 50 51 @Override resetToOffset(int newOffset)52 public void resetToOffset(int newOffset) { 53 reset(); 54 pos = start + newOffset; 55 } 56 57 @Override getOffset()58 public int getOffset() { 59 return pos - start; 60 } 61 setText(boolean numeric, CharSequence s, int p)62 public void setText(boolean numeric, CharSequence s, int p) { 63 reset(numeric); 64 seq = s; 65 start = 0; 66 pos = p; 67 limit = s.length(); 68 } 69 70 @Override nextCodePoint()71 public int nextCodePoint() { 72 if(pos == limit) { 73 return Collation.SENTINEL_CP; 74 } 75 char c = seq.charAt(pos++); 76 char trail; 77 if(Character.isHighSurrogate(c) && pos != limit && 78 Character.isLowSurrogate(trail = seq.charAt(pos))) { 79 ++pos; 80 return Character.toCodePoint(c, trail); 81 } else { 82 return c; 83 } 84 } 85 86 @Override previousCodePoint()87 public int previousCodePoint() { 88 if(pos == start) { 89 return Collation.SENTINEL_CP; 90 } 91 char c = seq.charAt(--pos); 92 char lead; 93 if(Character.isLowSurrogate(c) && pos != start && 94 Character.isHighSurrogate(lead = seq.charAt(pos - 1))) { 95 --pos; 96 return Character.toCodePoint(lead, c); 97 } else { 98 return c; 99 } 100 } 101 102 @Override handleNextCE32()103 protected long handleNextCE32() { 104 if(pos == limit) { 105 return NO_CP_AND_CE32; 106 } 107 char c = seq.charAt(pos++); 108 return makeCodePointAndCE32Pair(c, trie.getFromU16SingleLead(c)); 109 } 110 111 @Override handleGetTrailSurrogate()112 protected char handleGetTrailSurrogate() { 113 if(pos == limit) { return 0; } 114 char trail; 115 if(Character.isLowSurrogate(trail = seq.charAt(pos))) { ++pos; } 116 return trail; 117 } 118 119 /* boolean foundNULTerminator(); */ 120 121 @Override forwardNumCodePoints(int num)122 protected void forwardNumCodePoints(int num) { 123 while(num > 0 && pos != limit) { 124 char c = seq.charAt(pos++); 125 --num; 126 if(Character.isHighSurrogate(c) && pos != limit && 127 Character.isLowSurrogate(seq.charAt(pos))) { 128 ++pos; 129 } 130 } 131 } 132 133 @Override backwardNumCodePoints(int num)134 protected void backwardNumCodePoints(int num) { 135 while(num > 0 && pos != start) { 136 char c = seq.charAt(--pos); 137 --num; 138 if(Character.isLowSurrogate(c) && pos != start && 139 Character.isHighSurrogate(seq.charAt(pos-1))) { 140 --pos; 141 } 142 } 143 } 144 145 protected CharSequence seq; 146 protected int start; 147 protected int pos; 148 protected int limit; 149 } 150