• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Libphonenumber Authors.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.google.i18n.phonenumbers.metadata;
17 
18 import static com.google.common.base.Preconditions.checkArgument;
19 import static com.google.i18n.phonenumbers.metadata.DigitSequence.domain;
20 import static java.lang.Integer.numberOfLeadingZeros;
21 import static java.lang.Integer.numberOfTrailingZeros;
22 
23 import com.google.common.collect.ContiguousSet;
24 import com.google.common.collect.ImmutableList;
25 import com.google.common.collect.Iterables;
26 import com.google.common.collect.Range;
27 import com.google.common.collect.RangeSet;
28 import java.util.ArrayList;
29 import java.util.Arrays;
30 import java.util.Comparator;
31 import java.util.Iterator;
32 import java.util.List;
33 import java.util.Set;
34 
35 /**
36  * A compact representation of a disjoint set of ranges of digit sequences. This is a compact way
37  * to represent one or many ranges of digit sequences which share the same length. Examples include:
38  * <pre>{@code
39  * "01234" --> the singleton range containing only the digit sequence "01234"
40  * "012xx" --> the contiguous digit sequence range ["01200".."01299"]
41  * "012[3-5]6xx" --> the disjoint set of contiguous digit sequence ranges
42  *     ["0123600".."0123699"], ["0124600".."0124699"], ["0125600".."0125699"]
43  * }</pre>
44  * Note that the sets of contiguous ranges defined by a {@code RangeSpecification} are always
45  * mutually disjoint.
46  *
47  * <p>Range specifications have a natural prefix based lexicographical ordering (based on the
48  * most-significant point at which a difference appears), but if you are comparing a disjoint set
49  * of range specifications (e.g. from a {@link RangeTree}) then it can be more intuitive to use an
50  * ordering based on the minimum digit sequence, but note this approach fails if the range
51  * specifications can overlap (e.g. comparing "1xx" and "100").
52  */
53 public final class RangeSpecification implements Comparable<RangeSpecification> {
54   /** The mask of all possible digits. */
55   public static final char ALL_DIGITS_MASK = (1 << 10) - 1;
56 
57   private static final RangeSpecification EMPTY = new RangeSpecification("");
58 
59   /** Returns the empty range specification, which matches only the empty digit sequence. */
empty()60   public static RangeSpecification empty() {
61     return EMPTY;
62   }
63 
64   /** Returns the range specification of length one which matches any of the given digits. */
singleton(Iterable<Integer> digits)65   public static RangeSpecification singleton(Iterable<Integer> digits) {
66     int mask = 0;
67     for (int digit : digits) {
68       checkArgument(0 <= digit && digit <= 9, "bad digit value '%s'", digit);
69       mask |= (1 << digit);
70     }
71     return new RangeSpecification(String.valueOf((char) mask));
72   }
73 
74   /** Returns a new range specification which matches only the given non-empty digit sequence. */
from(DigitSequence s)75   public static RangeSpecification from(DigitSequence s) {
76     if (s.length() == 0) {
77       return RangeSpecification.empty();
78     }
79     char[] masks = new char[s.length()];
80     for (int n = 0; n < masks.length; n++) {
81       masks[n] = (char) (1 << s.getDigit(n));
82     }
83     return new RangeSpecification(new String(masks));
84   }
85 
86   /** Returns a new range specification which matches any digit sequence of the specified length. */
any(int length)87   public static RangeSpecification any(int length) {
88     checkArgument(length >= 0);
89     if (length == 0) {
90       return RangeSpecification.empty();
91     }
92     char[] masks = new char[length];
93     Arrays.fill(masks, ALL_DIGITS_MASK);
94     return new RangeSpecification(new String(masks));
95   }
96 
97   /**
98    * Parses the string form of a range specification (e.g. "1234[57-9]xxx"). This must be
99    * correctly formed, including having all ranges be well formed (e.g. not "[33]", "[3-3]" or
100    * "[6-4]").
101    *
102    * <p>Note that non-canonical ranges are permitted if the digits are in order (e.g. "[1234]",
103    * "[4-5]" or "[0-9]" but not "[4321]"). The returned range specification is canonical (e.g.
104    * {@code parse("12[34569]").toString() == "12[3-69]"}).
105    *
106    * <p>The empty string is parsed as the empty range specification.
107    *
108    * <p>The use of single ASCII underscores ("_") to group ranges and aid readability is supported
109    * during parsing but is not retained in the parsed result (e.g.
110    * {@code parse("12_34[5-8]_xxx_xxx").toString() == "1234[5-8]xxxxxx"}). Note that underscore may
111    * not be present inside ranges (e.g. "1_4") or at the ends of the range (e.g. "123xxx_").
112    */
parse(String s)113   public static RangeSpecification parse(String s) {
114     if (s.isEmpty()) {
115       return empty();
116     }
117     checkArgument(!s.startsWith("_") && !s.endsWith("_"), "cannot start/end with '_': %s", s);
118     StringBuilder bitmasks = new StringBuilder();
119     boolean lastCharWasUnderscore = false;
120     for (int n = 0; n < s.length(); n++) {
121       char c = s.charAt(n);
122       switch (c) {
123         case '_':
124           checkArgument(!lastCharWasUnderscore, "cannot have multiple '_' in a row: %s", s);
125           lastCharWasUnderscore = true;
126           // Continue the for-loop rather than breaking out the switch to avoid resetting the flag.
127           continue;
128         case 'x':
129           bitmasks.append(ALL_DIGITS_MASK);
130           break;
131         case '[':
132           n += 1;
133           int end = s.indexOf(']', n);
134           checkArgument(end != -1, "unclosed range in specification: %s", s);
135           checkArgument(end > n, "empty range in specification: %s", s);
136           bitmasks.append(parseRange(s, n, end));
137           n = end;
138           break;
139         default:
140           checkArgument('0' <= c && c <= '9',
141               "bad digit value '%s' in range specification: %s", c, s);
142           bitmasks.append((char) (1 << (c - '0')));
143           break;
144       }
145       lastCharWasUnderscore = false;
146     }
147     return new RangeSpecification(bitmasks.toString());
148   }
149 
parseRange(String s, int start, int end)150   private static char parseRange(String s, int start, int end) {
151     int mask = 0;
152     for (int n = start; n < end;) {
153       char c = s.charAt(n++);
154       checkArgument('0' <= c && c <= '9',
155           "bad digit value '%s' in range specification: %s", c, s);
156       int shift = (c - '0');
157       // check that this bit and all above it are zero (to ensure correct ordering).
158       checkArgument(mask >> shift == 0, "unordered range in specification: %s", s);
159       if (n == end || s.charAt(n) != '-') {
160         // Single digit not in a range.
161         mask |= 1 << shift;
162         continue;
163       }
164       n++;
165       checkArgument(n < end, "unclosed range in specification: %s", s);
166       c = s.charAt(n++);
167       checkArgument('0' <= c && c <= '9',
168           "bad digit value '%s' in range specification: %s", c, s);
169       int rshift = (c - '0');
170       checkArgument(rshift > shift, "unordered range in specification: %s", s);
171       // Set bits from shift to rshift inclusive (e.g. 11111 & ~11 = 11100).
172       mask |= ((1 << (rshift + 1)) - 1) & ~((1 << shift) - 1);
173     }
174     return (char) mask;
175   }
176 
177   /**
178    * Returns the canonical representation of the given ranges. The number of range specifications
179    * in the returned instance may be higher or lower than the number of given ranges.
180    * <p>
181    * NOTE: This is only used by RangeTree for generating a RangeTree from a RangeSet, and is not
182    * suitable as a public API (one day we might generate the RangeTree directly and be able to
183    * delete this code).
184    */
185   static ImmutableList<RangeSpecification> from(RangeSet<DigitSequence> ranges) {
186     List<RangeSpecification> specs = new ArrayList<>();
187     Set<Range<DigitSequence>> s = ranges.asRanges();
188     checkArgument(!s.isEmpty(), "empty range set not permitted");
189     // Make sure are ranges we use are canonicalized over the domain of DigitSequences (so Range
190     // operations (e.g. isConnected()) work as expected. See Range for more on why this matters.
191     Range<DigitSequence> cur = s.iterator().next().canonical(domain());
192     checkArgument(!cur.contains(DigitSequence.empty()),
193         "empty digit sequence not permitted in range set");
194     for (Range<DigitSequence> next : Iterables.skip(ranges.asRanges(), 1)) {
195       next = next.canonical(domain());
196       if (cur.isConnected(next)) {
197         // Even though 'cur' and 'next' are both canonicalized, it's not guaranteed that they are
198         // closed-open (singleton ranges are fully closed and any range containing the maximum
199         // value must be closed. To "union" the two ranges we must also preserve the bound types.
200         cur = Range.range(
201             cur.lowerEndpoint(), cur.lowerBoundType(),
202             next.upperEndpoint(), next.upperBoundType())
203             .canonical(domain());
204         continue;
205       }
206       addRangeSpecsOf(cur, specs);
207       cur = next;
208     }
209     addRangeSpecsOf(cur, specs);
210     return ImmutableList.sortedCopyOf(Comparator.comparing(RangeSpecification::min), specs);
211   }
212 
213   /** Adds the canonical minimal range specifications for a single range to the given list. */
214   private static void addRangeSpecsOf(Range<DigitSequence> r, List<RangeSpecification> specs) {
215     // Given range is already canonical but may span multiple lengths. It's easier to view this
216     // as a contiguous set when finding first/last elements however to avoid worrying about bound
217     // types. A contiguous set is not an expensive class to create.
218     ContiguousSet<DigitSequence> s = ContiguousSet.create(r, domain());
219     DigitSequence start = s.first();
220     DigitSequence end = s.last();
221     while (start.length() < end.length()) {
222       // Add <start> to "999..." for the current block length (the max domain value is all 9's).
223       DigitSequence blockEnd = DigitSequence.nines(start.length());
224       addRangeSpecs(start, blockEnd, specs);
225       // Reset the start to the next length up (i.e. the "000..." sequence that's one longer).
226       start = blockEnd.next();
227     }
228     // Finally and the range specs up to (and including) the end value.
229     addRangeSpecs(start, end, specs);
230   }
231 
232   // Adds canonical minimal range specifications for the range of same-length digit sequences.
233   private static void addRangeSpecs(
234       DigitSequence start, DigitSequence end, List<RangeSpecification> specs) {
235     int length = start.length();
236     checkArgument(end.length() == length);
237 
238     // Masks contains a running total of the bitmasks we want to convert to RangeSpecifications.
239     // As processing proceeds, the mask array is reused. This is because the prefix used for
240     // successive range specifications is always a subset of the previous specifications and the
241     // trailing part of the array always fills up with the range mask for 'x' (i.e. [0-9]).
242     int[] masks = new int[length];
243 
244     // Stage 1:
245     // Starting from the last digit in the 'start' sequence, work up until we find something that
246     // is not a '0'. This is the first digit that needs to be adjusted to create a range
247     // specification covering it and the digits 'below' it. For example, the first specification
248     // for the range ["1200".."9999"] is "1[2-9]xx".
249     // Once a specification is emitted, the start value is adjusted to the next digit sequence
250     // immediately above the end of the emitted range, so after emitting "1[2-9]xx", start="2000".
251     // Once each range specification is emitted, we continue working 'up' the digit sequence until
252     // the next calculated start value exceeds the 'end' of our range. This specification cannot
253     // be emitted and signals the end of stage 1.
254     setBitmasks(masks, start);
255     for (int n = previousNon(0, start, length); n != -1; n = previousNon(0, start, n)) {
256       int loDigit = start.getDigit(n);
257       DigitSequence prefix = start.first(n);
258       DigitSequence blockEnd = prefix.extendBy(DigitSequence.nines(length - n));
259       if (blockEnd.compareTo(end) > 0) {
260         // The end of this block would exceed the end of the main range, so we must stop.
261         break;
262       }
263       // The bitmasks we want is:
264       // <first (n-1) digits of 'start'> [loDigit..9] <any digits mask...>
265       masks[n] = bitmaskUpFrom(loDigit);
266       fillBitmasksAfter(masks, n);
267       specs.add(RangeSpecification.fromBitmasks(masks));
268       // Adjust the range start now we have emitted the range specification.
269       start = blockEnd.next();
270     }
271 
272     // Stage 2:
273     // Very similar to stage 1, but work up from the last digit in the 'end' sequence. The
274     // difference now is that we look for the first digit that's not '9' and generate ranges that
275     // go down to the start of the range, not up to the end. Thus for ["0000", "1299"] the first
276     // specification generated is "1[0-2]xx", which is emitted at the end of the list.
277     int midIdx = specs.size();
278     setBitmasks(masks, end);
279     for (int n = previousNon(9, end, length); n != -1; n = previousNon(9, end, n)) {
280       int hiDigit = end.getDigit(n);
281       DigitSequence prefix = end.first(n);
282       DigitSequence blockStart = prefix.extendBy(DigitSequence.zeros(length - n));
283       if (blockStart.compareTo(start) < 0) {
284         // The start of this block would precede the start of the main range, so we must stop.
285         break;
286       }
287       // The bitmasks we want is:
288       // <first (n-1) digits of 'end'> [0..hiDigit] <any digits mask...>
289       masks[n] = bitmaskDownFrom(hiDigit);
290       fillBitmasksAfter(masks, n);
291       specs.add(midIdx, RangeSpecification.fromBitmasks(masks));
292       // Adjust the range end now we have emitted the range specification.
293       end = blockStart.previous();
294     }
295 
296     // Stage 3: Having emitted the first and last set of range specifications, it only remains to
297     // emit the "center" specification in the middle of the list. This is special as neither bound
298     // is the end of a block. In previous stages, all partial ranges are either "up to 9" or
299     // "down to zero". For example: ["1234".."1789"] has the center range "1[3-6]xx", and
300     // ["1234".."1345"] has no center range at all.
301     if (start.compareTo(end) < 0) {
302       // Find the last digit before start and end combine (ie, 1200, 1299 --> 12xx --> n=1). We
303       // know that 'start' and 'end' are the same length and bound a range like:
304       //   <prefix> [X..Y] [000..999]
305       // but X or Y could be 0 or 9 respectively (just not both).
306       //
307       // Note that we don't even both to test the first digit in the sequences because if 'start'
308       // and 'end' span a full range (e.g. [000.999]) we can just use the same code to fill the
309       // masks correctly anyway.
310       int n = start.length();
311       while (--n > 0 && start.getDigit(n) == 0 && end.getDigit(n) == 9) {}
312       // Bitwise AND the masks for [X..9] and [0..Y] to get the mask for [X..Y].
313       // Note that the "masks" array already contains the correct prefix digits up to (n-1).
314       masks[n] = bitmaskUpFrom(start.getDigit(n)) & bitmaskDownFrom(end.getDigit(n));
315       fillBitmasksAfter(masks, n);
316       specs.add(midIdx, RangeSpecification.fromBitmasks(masks));
317     }
318   }
319 
320   // Sets the values in the given array to correspond to the digits in the given sequence. If a
321   // range specification were made from the resulting array it would match only that digit sequence.
322   private static void setBitmasks(int[] masks, DigitSequence s) {
323     for (int n = 0; n < s.length(); n++) {
324       masks[n] = 1 << s.getDigit(n);
325     }
326   }
327 
328   /**
329    * Creates a range specification from a given array of integer masks. The Nth element of the
330    * array corresponds to the Nth element in the range specification, and mask values must be
331    * non-zero and have only bits 0 to 9 set.
332    */
333   private static RangeSpecification fromBitmasks(int[] bitmasks) {
334     checkArgument(bitmasks.length <= DigitSequence.MAX_DIGITS,
335         "range specification too large");
336     StringBuilder s = new StringBuilder(bitmasks.length);
337     s.setLength(bitmasks.length);
338     for (int n = 0; n < bitmasks.length; n++) {
339       int mask = bitmasks[n];
340       checkArgument(mask > 0 && mask <= ALL_DIGITS_MASK, "invalid bitmask: %s", mask);
341       s.setCharAt(n, (char) mask);
342     }
343     return new RangeSpecification(s.toString());
344   }
345 
346   // Fills the bitmasks after the given index with the "all digits" mask (i.e. matching [0-9]).
347   // This can accept -1 as the index since it always pre-increments before using it.
348   private static void fillBitmasksAfter(int[] masks, int n) {
349     // Because of the iterative way the mask array is handled, we can stop filling when we hit
350     // ALL_DIGITS_MASK because everything past that must already be filled.
351     while (++n < masks.length && masks[n] != ALL_DIGITS_MASK) {
352       masks[n] = ALL_DIGITS_MASK;
353     }
354   }
355 
356   // Starting at digit-N, returns the index of the nearest preceding digit that's not equal to the
357   // given value (or -1 if no such digit exists).
358   private static int previousNon(int digit, DigitSequence s, int n) {
359     while (--n >= 0 && s.getDigit(n) == digit) {}
360     return n;
361   }
362 
363   /** Returns the bitmask for the range {@code [n-9]}. */
364   private static int bitmaskUpFrom(int n) {
365     return (-1 << n) & ALL_DIGITS_MASK;
366   }
367 
368   /** Returns the bitmask for the range {@code [0-n]}. */
369   private static int bitmaskDownFrom(int n) {
370     return ALL_DIGITS_MASK >>> (9 - n);
371   }
372 
373 
374   // String containing one bitmasks per character (bits 0..9).
375   private final String bitmasks;
376   // Minimum and maximum sequences (inclusive) which span the ranges defined by this specification.
377   // Caching this is deliberate, since we sort disjoint ranges using the minimum value. It might
378   // not be so useful to cache the maximum value though.
379   private final DigitSequence min;
380   private final DigitSequence max;
381   // Total number of sequences matched by this specification.
382   private final long sequenceCount;
383 
384   private RangeSpecification(String bitmasks) {
385     int length = bitmasks.length();
386     checkArgument(length <= DigitSequence.MAX_DIGITS,
387         "Range specification too long (%s digits)", length);
388     this.bitmasks = bitmasks;
389     long minValue = 0;
390     long maxValue = 0;
391     long sequenceCount = 1;
392     for (int n = 0; n < length; n++) {
393       int mask = bitmasks.charAt(n);
394       checkArgument(mask > 0 && mask <= ALL_DIGITS_MASK, "invalid bitmask: %s", mask);
395       minValue = (minValue * 10) + numberOfTrailingZeros(mask);
396       maxValue = (maxValue * 10) + (31 - numberOfLeadingZeros(mask));
397       sequenceCount *= Integer.bitCount(mask);
398     }
399     this.min = new DigitSequence(length, minValue);
400     this.max = new DigitSequence(length, maxValue);
401     this.sequenceCount = sequenceCount;
402   }
403 
404   /**
405    * Returns the number of digits that this specification can match. This is the length of all
406    * digit sequences which can match this specification.
407    */
408   public int length() {
409     return bitmasks.length();
410   }
411 
412   /** Returns the smallest digit sequence matched by this range. */
413   public DigitSequence min() {
414     return min;
415   }
416 
417   /** Returns the largest digit sequence matched by this range. */
418   public DigitSequence max() {
419     return max;
420   }
421 
422   /** Returns the total number of digit sequences matched by (contained in) this specification. */
423   public long getSequenceCount() {
424     return sequenceCount;
425   }
426 
427   /**
428    * Returns the bitmask of the Nth range in this specification. Bit-X (0 ≤ X ≤ 9) corresponds to
429    * the digit with value X. As every range in a specification must match at least one digit, this
430    * mask can never be zero.
431    */
432   public int getBitmask(int n) {
433     return bitmasks.charAt(n);
434   }
435 
436   /**
437    * Returns whether the given digit sequence is in one of the ranges specified by this instance.
438    * This is more efficient that obtaining the associated {@code RangeSet} and checking that.
439    */
440   public boolean matches(DigitSequence digits) {
441     if (digits.length() != length()) {
442       return false;
443     }
444     for (int n = 0; n < length(); n++) {
445       if ((bitmasks.charAt(n) & (1 << digits.getDigit(n))) == 0) {
446         return false;
447       }
448     }
449     return true;
450   }
451 
452   // Returns the next sequence in forward order which is contained by a range defined by this
453   // range specification, or null if none exists. The given sequence must not be matched by this
454   // specification.
455   private DigitSequence nextRangeStart(DigitSequence s) {
456     // Easy length based checks (this is where the fact that range specification only define ranges
457     // of the same length really simplifies things).
458     if (s.length() < length()) {
459       return min();
460     } else if (s.length() > length()) {
461       return null;
462     }
463     // Algorithm:
464     // 1) Find the highest digit that isn't in the corresponding bitmask for the range.
465     // 2) Try and increase the digit value until it's inside the next available range.
466     // 3) If that fails, move back up the sequence and increment the next digit up.
467     // 4) Repeat until a digit can be adjusted to start a new range, or all digits are exhausted.
468     // If all digits exhausted, the sequence was above all ranges in this specification.
469     // Otherwise return a new sequence using the unchanged prefix of the original sequence, the
470     // newly adjusted digit and the trailing digits of the minimal sequence.
471     for (int n = 0; n < length(); n++) {
472       int d = s.getDigit(n);
473       int mask = bitmasks.charAt(n);
474       if ((mask & (1 << d)) != 0) {
475         continue;
476       }
477       while (true) {
478         // Digit 'd' is either outside the range mask (first time though the loop) or inside a
479         // range. Either way we want to find the next digit above it which is inside a range.
480         // First increment 'd', and then find the next set bit in the mask at or above that point.
481         // Not extra check is needed at the end of ranges because numberOfTrailingZeros(0)==32
482         // which neatly ensures that the new value of 'd' must be out-of-range.
483         // If mask=[3-58]: d=1-->d'=3, d=4-->d'=5, d=5-->d'=8, d=8-->d'>9
484         d++;
485         d += numberOfTrailingZeros(mask >>> d);
486         if (d <= 9) {
487           // Found the value of the largest digit which can be adjusted to start the next range.
488           // Everything higher than this digit is the same as the original sequence and everything
489           // lower that this digit is the same as the corresponding digit in the minimal value.
490           return s.first(n).extendBy(d).extendBy(min.last((length() - n) - 1));
491         }
492         // No more bits available in this range, so go back up to the previous range.
493         if (--n < 0) {
494           // The sequence was above the last element in the set.
495           // Example: Range Spec: 1[2-8][3-8]456, Sequence: 188457
496           return null;
497         }
498         d = s.getDigit(n);
499         mask = bitmasks.charAt(n);
500       }
501     }
502     // If we finish the outer loop the given sequence was in a range (which is an error).
503     throw new IllegalArgumentException(
504         "Digit sequence '" + s + "' is in the range specified by: " + this);
505   }
506 
507   // Given a sequence inside a range defined by this specification, return the highest sequence
508   // in the current range (possibly just the given sequence).
509   private DigitSequence currentRangeEnd(DigitSequence s) {
510     // Build up a value representing the trailing digits (which must always be 9's).
511     long nines = 0;
512     for (int n = length() - 1; n >= 0; n--, nines = (10 * nines) + 9) {
513       int mask = bitmasks.charAt(n);
514       if (mask == ALL_DIGITS_MASK) {
515         continue;
516       }
517       // The new digit is the top of the current range that the current sequence digit is in.
518       int d = nextUnsetBit(mask, s.getDigit(n)) - 1;
519       DigitSequence end =
520           s.first(n).extendBy(d).extendBy(new DigitSequence((length() - n) - 1, nines));
521       // Edge case for cases like "12[34][09]x" where "1239x" and "1240x" abut. This adjustment
522       // will happen at most once because the second range cannot also include an upper bound
523       // ending at '9', since otherwise (mask == ALL_DIGITS_MASK) at this position. The next
524       // sequence must be terminated with zeros starting at the current position having "rolled
525       // over" on the digit above.
526       if (d == 9) {
527         DigitSequence next = end.next();
528         if (matches(next)) {
529           d = nextUnsetBit(mask, 0) - 1;
530           end = next.first(n).extendBy(d).extendBy(new DigitSequence((length() - n) - 1, nines));
531         }
532       }
533       return end;
534     }
535     // The range specification is entirely 'x', which means it's a single range.
536     return max;
537   }
538 
539   /**
540    * Returns a generating iterator which iterates in forward order over the disjoint ranges defined
541    * by this specification. This is not actually as useful as you might expect because in a lot of
542    * cases you would be dealing with a sequence of range specifications and it's not true that all
543    * ranges from multiple specifications are disjoint.
544    */
545   Iterable<Range<DigitSequence>> asRanges() {
546     return () -> new Iterator<Range<DigitSequence>>() {
547       // Start is always in a range.
548       private DigitSequence start = min;
549 
550       @Override
551       public boolean hasNext() {
552         return start != null;
553       }
554 
555       @Override
556       public Range<DigitSequence> next() {
557         DigitSequence end = currentRangeEnd(start);
558         Range<DigitSequence> r = Range.closed(start, end).canonical(DigitSequence.domain());
559         start = nextRangeStart(end.next());
560         return r;
561       }
562     };
563   }
564 
565   /**
566    * Returns a new range specification which is extended by the given mask value. For example:
567    * <pre>{@code
568    * "0123[4-6]".extendByMask(7) == "0123[4-6][0-2]"
569    * }</pre>
570    */
571   public RangeSpecification extendByMask(int mask) {
572     checkArgument(mask > 0 && mask <= ALL_DIGITS_MASK, "bad mask value '%s'", mask);
573     return new RangeSpecification(bitmasks + ((char) mask));
574   }
575 
576   /**
577    * Returns a new range specification which is extended by the given specification. For example:
578    * <pre>{@code
579    * "0123[4-6]".extendBy("7[89]") == "0123[4-6]7[89]"
580    * }</pre>
581    */
582   public RangeSpecification extendBy(RangeSpecification extra) {
583     return new RangeSpecification(bitmasks + extra.bitmasks);
584   }
585 
586   /**
587    * Returns a new range specification which is extended by a sequence of any digits of the given
588    * length. For example:
589    * <pre>{@code
590    * "012".extendByLength(4) == "012xxxx"
591    * }</pre>
592    */
593   public RangeSpecification extendByLength(int length) {
594     return this.extendBy(any(length));
595   }
596 
597   /**
598    * Returns a range specification containing only the first {@code n} digits. If the given length
599    * is the same or greater than the specification's length, this specification is returned.
600    * For example:
601    * <pre>{@code
602    * "01[2-4]xx".first(8) == "01[2-4]xx" (same instance)
603    * "01[2-4]xx".first(5) == "01[2-4]xx" (same instance)
604    * "01[2-4]xx".first(3) == "01[2-4]"
605    * "01[2-4]xx".first(0) == "" (the empty specification)
606    * }</pre>
607    */
608   public RangeSpecification first(int n) {
609     checkArgument(n >= 0);
610     if (n == 0) {
611       return empty();
612     }
613     return n < length() ? new RangeSpecification(bitmasks.substring(0, n)) : this;
614   }
615 
616   /**
617    * Returns a range specification containing only the last {@code n} digits. If the given length
618    * is the same or greater than the specification's length, this specification is returned.
619    * For example:
620    * <pre>{@code
621    * "01[2-4]xx".last(8) == "01[2-4]xx" (same instance)
622    * "01[2-4]xx".last(5) == "01[2-4]xx" (same instance)
623    * "01[2-4]xx".last(3) == "[2-4]xx"
624    * "01[2-4]xx".last(0) == "" (the empty specification)
625    * }</pre>
626    */
627   public RangeSpecification last(int n) {
628     checkArgument(n >= 0);
629     if (n == 0) {
630       return empty();
631     }
632     return n < length() ? new RangeSpecification(bitmasks.substring(length() - n)) : this;
633   }
634 
635   /**
636    * Returns a range specification with any trailing "any digit" sequence removed. For example:
637    * <pre>{@code
638    * "0123".getPrefix() == "0123" (same instance)
639    * "0123xx".getPrefix() == "0123"
640    * "xxx".getPrefix() == "" (the empty specification)
641    * }</pre>
642    */
643   public RangeSpecification getPrefix() {
644     int length = length();
645     while (length > 0 && getBitmask(length - 1) == ALL_DIGITS_MASK) {
646       length--;
647     }
648     return first(length);
649   }
650 
651   @Override
652   public int compareTo(RangeSpecification other) {
653     int length = Math.min(length(), other.length());
654     for (int i = 0; i < length; i++) {
655       int mask = getBitmask(i);
656       int otherMask = other.getBitmask(i);
657       if (mask == otherMask) {
658         continue;
659       }
660       int commonBits = mask & otherMask;
661       mask -= commonBits;
662       otherMask -= commonBits;
663       // At least one mask is still non-zero and they don't overlap.
664       //
665       // The mask with the lowest set bit is the smaller mask in the ordering, since that bit
666       // distinguishes a smaller prefix than can never exist in the other specification.
667       // Testing the number of trailing zeros is equivalent to finding the lowest set bit.
668       return Integer.compare(numberOfTrailingZeros(mask), numberOfTrailingZeros(otherMask));
669     }
670     return Integer.compare(length(), other.length());
671   }
672 
673   @Override
674   public boolean equals(Object o) {
675     return (o instanceof RangeSpecification) && bitmasks.equals(((RangeSpecification) o).bitmasks);
676   }
677 
678   @Override
679   public int hashCode() {
680     return bitmasks.hashCode();
681   }
682 
683   /**
684    * If you want lexicographical ordering of range specifications, don't use this method, use the
685    * {@code min().toString()}. This works assuming the ranges being compared are disjoint.
686    */
687   @Override
688   public String toString() {
689     // Consider caching if it turns out that we are serializing a lot of these.
690     StringBuilder s = new StringBuilder();
691     for (int n = 0; n < bitmasks.length(); n++) {
692       appendMask(bitmasks.charAt(n), s);
693     }
694     return s.toString();
695   }
696 
697   /** Returns the string representation of a single bit-mask. */
698   public static String toString(int bitMask) {
699     checkArgument(bitMask > 0 && bitMask < (1 << 10), "bad mask value: %s", bitMask);
700     return appendMask(bitMask, new StringBuilder()).toString();
701   }
702 
703   static StringBuilder appendMask(int mask, StringBuilder out) {
704     if (mask == ALL_DIGITS_MASK) {
705       out.append('x');
706     } else if (hasOneBit(mask)) {
707       out.append(asChar(numberOfTrailingZeros(mask)));
708     } else {
709       out.append('[');
710       for (int loBit = numberOfTrailingZeros(mask);
711           loBit != 32;
712           loBit = numberOfTrailingZeros(mask)) {
713         // Always append the loBit digit into the range.
714         out.append(asChar(loBit));
715         int hiBit = nextUnsetBit(mask, loBit);
716         int numBits = hiBit - loBit;
717         if (numBits > 1) {
718           // Stylistically prefer "[34]" to "[3-4]" for compactness.
719           if (numBits > 2) {
720             out.append('-');
721           }
722           out.append(asChar(hiBit - 1));
723         }
724         // Clear the bits we've just processed before going back round the loop.
725         mask &= ~((1 << hiBit) - 1);
726       }
727       out.append(']');
728     }
729     return out;
730   }
731 
732   // Turns a value in the range [0-9] into the corresponding ASCII character.
733   private static char asChar(int digit) {
734     return (char) ('0' + digit);
735   }
736 
737   // Determines if the given bit-mask has only one bit set.
738   private static boolean hasOneBit(int mask) {
739     return (mask & (mask - 1)) == 0;
740   }
741 
742   private static int nextUnsetBit(int mask, int bit) {
743     // Example mask transform for [013-589] if bit=3:
744     //        v-- bit=3
745     // 01100111011
746     // 00000000111  (1 << 3) - 1
747     // 01100111111  OR with mask
748     // 10011000000  Bitwise NOT
749     //     ^-- return=6
750     return numberOfTrailingZeros(~(mask | ((1 << bit) - 1)));
751   }
752 }
753