• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4  *******************************************************************************
5  * Copyright (C) 1996-2015, Google, Inc., International Business Machines Corporation and
6  * others. All Rights Reserved.
7  *******************************************************************************
8  */
9 package com.ibm.icu.impl;
10 
11 import java.util.Collection;
12 import java.util.Comparator;
13 import java.util.Iterator;
14 import java.util.LinkedList;
15 import java.util.Map.Entry;
16 import java.util.Set;
17 import java.util.TreeMap;
18 import java.util.TreeSet;
19 
20 import com.ibm.icu.lang.CharSequences;
21 import com.ibm.icu.util.ICUException;
22 
23 @SuppressWarnings("deprecation")
24 public class StringRange {
25     private static final boolean DEBUG = false;
26 
27     public interface Adder {
28         /**
29          * @param start
30          * @param end   may be null, for adding single string
31          */
add(String start, String end)32         void add(String start, String end);
33     }
34 
35     public static final Comparator<int[]> COMPARE_INT_ARRAYS = new Comparator<int[]>() {
36         @Override
37         public int compare(int[] o1, int[] o2) {
38             int minIndex = Math.min(o1.length, o2.length);
39             for (int i = 0; i < minIndex; ++i) {
40                 int diff = o1[i] - o2[i];
41                 if (diff != 0) {
42                     return diff;
43                 }
44             }
45             return o1.length - o2.length;
46         }
47     };
48 
49     /**
50      * Compact the set of strings.
51      * @param source set of strings
52      * @param adder adds each pair to the output. See the {@link Adder} interface.
53      * @param shorterPairs use abc-d instead of abc-abd
54      * @param moreCompact use a more compact form, at the expense of more processing. If false, source must be sorted.
55      */
compact(Set<String> source, Adder adder, boolean shorterPairs, boolean moreCompact)56     public static void compact(Set<String> source, Adder adder, boolean shorterPairs, boolean moreCompact) {
57         if (!moreCompact) {
58             String start = null;
59             String end = null;
60             int lastCp = 0;
61             int prefixLen = 0;
62             for (String s : source) {
63                 if (start != null) { // We have something queued up
64                     if (s.regionMatches(0, start, 0, prefixLen)) {
65                         int currentCp = s.codePointAt(prefixLen);
66                         if (currentCp == 1+lastCp && s.length() == prefixLen + Character.charCount(currentCp)) {
67                             end = s;
68                             lastCp = currentCp;
69                             continue;
70                         }
71                     }
72                     // We failed to find continuation. Add what we have and restart
73                     adder.add(start, end == null ? null
74                         : !shorterPairs ? end
75                             : end.substring(prefixLen, end.length()));
76                 }
77                 // new possible range
78                 start = s;
79                 end = null;
80                 lastCp = s.codePointBefore(s.length());
81                 prefixLen = s.length() - Character.charCount(lastCp);
82             }
83             adder.add(start, end == null ? null
84                 : !shorterPairs ? end
85                     : end.substring(prefixLen, end.length()));
86         } else {
87             // not a fast algorithm, but ok for now
88             // TODO rewire to use the first (slower) algorithm to generate the ranges, then compact them from there.
89             // first sort by lengths
90             Relation<Integer,Ranges> lengthToArrays = Relation.of(new TreeMap<Integer,Set<Ranges>>(), TreeSet.class);
91             for (String s : source) {
92                 Ranges item = new Ranges(s);
93                 lengthToArrays.put(item.size(), item);
94             }
95             // then compact items of each length and emit compacted sets
96             for (Entry<Integer, Set<Ranges>> entry : lengthToArrays.keyValuesSet()) {
97                 LinkedList<Ranges> compacted = compact(entry.getKey(), entry.getValue());
98                 for (Ranges ranges : compacted) {
99                     adder.add(ranges.start(), ranges.end(shorterPairs));
100                 }
101             }
102         }
103     }
104 
105     /**
106      * Faster but not as good compaction. Only looks at final codepoint.
107      * @param source set of strings
108      * @param adder adds each pair to the output. See the {@link Adder} interface.
109      * @param shorterPairs use abc-d instead of abc-abd
110      */
compact(Set<String> source, Adder adder, boolean shorterPairs)111     public static void compact(Set<String> source, Adder adder, boolean shorterPairs) {
112         compact(source,adder,shorterPairs,false);
113     }
114 
compact(int size, Set<Ranges> inputRanges)115     private static LinkedList<Ranges> compact(int size, Set<Ranges> inputRanges) {
116         LinkedList<Ranges> ranges = new LinkedList<Ranges>(inputRanges);
117         for (int i = size-1; i >= 0; --i) {
118             Ranges last = null;
119             for (Iterator<Ranges> it = ranges.iterator(); it.hasNext();) {
120                 Ranges item = it.next();
121                 if (last == null) {
122                     last = item;
123                 } else if (last.merge(i, item)) {
124                     it.remove();
125                 } else {
126                     last = item; // go to next
127                 }
128             }
129         };
130         return ranges;
131     }
132 
133     static final class Range implements Comparable<Range>{
134         int min;
135         int max;
Range(int min, int max)136         public Range(int min, int max) {
137             this.min = min;
138             this.max = max;
139         }
140         @Override
equals(Object obj)141         public boolean equals(Object obj) {
142             return this == obj || (obj != null && obj instanceof Range && compareTo((Range)obj) == 0);
143         }
144         @Override
compareTo(Range that)145         public int compareTo(Range that) {
146             int diff = min - that.min;
147             if (diff != 0) {
148                 return diff;
149             }
150             return max - that.max;
151         }
152         @Override
hashCode()153         public int hashCode() {
154             return min * 37 + max;
155         }
156         @Override
toString()157         public String toString() {
158             StringBuilder result = new StringBuilder().appendCodePoint(min);
159             return min == max ? result.toString() : result.append('~').appendCodePoint(max).toString();
160         }
161     }
162 
163     static final class Ranges implements Comparable<Ranges> {
164         private final Range[] ranges;
Ranges(String s)165         public Ranges(String s) {
166             int[] array = CharSequences.codePoints(s);
167             ranges = new Range[array.length];
168             for (int i = 0; i < array.length; ++i) {
169                 ranges[i] = new Range(array[i], array[i]);
170             }
171         }
merge(int pivot, Ranges other)172         public boolean merge(int pivot, Ranges other) {
173            // We merge items if the pivot is adjacent, and all later ranges are equal.
174            for (int i = ranges.length-1; i >= 0; --i) {
175                if (i == pivot) {
176                    if (ranges[i].max != other.ranges[i].min-1) { // not adjacent
177                        return false;
178                    }
179                } else {
180                    if (!ranges[i].equals(other.ranges[i])) {
181                        return false;
182                    }
183                }
184            }
185            if (DEBUG) System.out.print("Merging: " + this + ", " + other);
186            ranges[pivot].max = other.ranges[pivot].max;
187            if (DEBUG) System.out.println(" => " + this);
188            return true;
189         }
190 
start()191         public String start() {
192             StringBuilder result = new StringBuilder();
193             for (int i = 0; i < ranges.length; ++i) {
194                 result.appendCodePoint(ranges[i].min);
195             }
196             return result.toString();
197         }
end(boolean mostCompact)198         public String end(boolean mostCompact) {
199             int firstDiff = firstDifference();
200             if (firstDiff == ranges.length) {
201                 return null;
202             }
203             StringBuilder result = new StringBuilder();
204             for (int i = mostCompact ? firstDiff : 0; i < ranges.length; ++i) {
205                 result.appendCodePoint(ranges[i].max);
206             }
207             return result.toString();
208         }
firstDifference()209         public int firstDifference() {
210             for (int i = 0; i < ranges.length; ++i) {
211                 if (ranges[i].min != ranges[i].max){
212                     return i;
213                 }
214             }
215             return ranges.length;
216         }
size()217         public Integer size() {
218             return ranges.length;
219         }
220         @Override
compareTo(Ranges other)221         public int compareTo(Ranges other) {
222             int diff = ranges.length - other.ranges.length;
223             if (diff != 0) {
224                 return diff;
225             }
226             for (int i = 0; i < ranges.length; ++i) {
227                 diff = ranges[i].compareTo(other.ranges[i]);
228                 if (diff != 0) {
229                     return diff;
230                 }
231             }
232             return 0;
233         }
234         @Override
toString()235         public String toString() {
236             String start = start();
237             String end = end(false);
238             return end == null ? start : start + "~" + end;
239         }
240     }
241 
expand(String start, String end, boolean requireSameLength, Collection<String> output)242     public static Collection<String> expand(String start, String end, boolean requireSameLength, Collection<String> output) {
243         if (start == null || end == null) {
244             throw new ICUException("Range must have 2 valid strings");
245         }
246         int[] startCps = CharSequences.codePoints(start);
247         int[] endCps = CharSequences.codePoints(end);
248         int startOffset = startCps.length - endCps.length;
249 
250         if (requireSameLength && startOffset != 0) {
251             throw new ICUException("Range must have equal-length strings");
252         } else if (startOffset < 0) {
253             throw new ICUException("Range must have start-length ≥ end-length");
254         } else if (endCps.length == 0) {
255             throw new ICUException("Range must have end-length > 0");
256         }
257 
258         StringBuilder builder = new StringBuilder();
259         for (int i = 0; i < startOffset; ++i) {
260             builder.appendCodePoint(startCps[i]);
261         }
262         add(0, startOffset, startCps, endCps, builder, output);
263         return output;
264     }
265 
add(int endIndex, int startOffset, int[] starts, int[] ends, StringBuilder builder, Collection<String> output)266     private static void add(int endIndex, int startOffset, int[] starts, int[] ends, StringBuilder builder, Collection<String> output) {
267         int start = starts[endIndex+startOffset];
268         int end = ends[endIndex];
269         if (start > end) {
270             throw new ICUException("Range must have xᵢ ≤ yᵢ for each index i");
271         }
272         boolean last = endIndex == ends.length - 1;
273         int startLen = builder.length();
274         for (int i = start; i <= end; ++i) {
275             builder.appendCodePoint(i);
276             if (last) {
277                 output.add(builder.toString());
278             } else {
279                 add(endIndex+1, startOffset, starts, ends, builder, output);
280             }
281             builder.setLength(startLen);
282         }
283     }
284 }
285