• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Libphonenumber Authors.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.google.i18n.phonenumbers.metadata.table;
17 
18 import static com.google.common.base.Preconditions.checkArgument;
19 import static com.google.common.base.Preconditions.checkNotNull;
20 import static com.google.common.collect.ImmutableList.toImmutableList;
21 import static com.google.common.collect.ImmutableMap.toImmutableMap;
22 import static com.google.common.collect.ImmutableSet.toImmutableSet;
23 import static com.google.common.collect.Iterables.transform;
24 import static com.google.common.collect.Maps.immutableEntry;
25 import static java.util.Comparator.comparing;
26 import static java.util.Map.Entry.comparingByKey;
27 import static java.util.stream.Collectors.joining;
28 
29 import com.google.auto.value.AutoValue;
30 import com.google.common.collect.HashBasedTable;
31 import com.google.common.collect.ImmutableList;
32 import com.google.common.collect.ImmutableMap;
33 import com.google.common.collect.ImmutableSet;
34 import com.google.common.collect.ImmutableTable;
35 import com.google.common.collect.Iterables;
36 import com.google.common.collect.Sets;
37 import com.google.common.collect.Table;
38 import com.google.common.collect.TreeBasedTable;
39 import com.google.common.collect.UnmodifiableIterator;
40 import com.google.i18n.phonenumbers.metadata.PrefixTree;
41 import com.google.i18n.phonenumbers.metadata.RangeSpecification;
42 import com.google.i18n.phonenumbers.metadata.RangeTree;
43 import java.util.ArrayList;
44 import java.util.Collection;
45 import java.util.Comparator;
46 import java.util.Iterator;
47 import java.util.List;
48 import java.util.Map;
49 import java.util.Map.Entry;
50 import java.util.NoSuchElementException;
51 import java.util.Optional;
52 import java.util.Set;
53 import java.util.SortedMap;
54 import java.util.TreeMap;
55 import java.util.function.Function;
56 import javax.annotation.Nullable;
57 
58 /**
59  * A tabular representation of attributes, assigned to number ranges.
60  * <p>
61  * A {@code RangeTable} is equivalent to {@code Table&lt;RangeSpecification, Column, Value&gt;},
62  * but is expressed as a mapping of {@code (Column, Value) -> RangeTree} (since {@code RangeTree}
63  * is not a good key). To keep the data structurally equivalent to its tabular form, it's important
64  * that within a column, all assigned ranges are mutually disjoint (and thus a digit sequence can
65  * have at most one value assigned in any column).
66  *
67  * <h3>Table Schemas</h3>
68  * A table requires a {@link Schema}, which defines the columns which can be present and their
69  * order. Column ordering is important since it relates to how rules are applied (see below).
70  *
71  * <h3>Columns and Column Groups</h3>
72  * A {@link Column} defines a category of values of a particular type (e.g. String, Boolean,
73  * Integer or user specified enums) and a default value. New columns can be implemented easily and
74  * can choose to limit their values to some known set.
75  * <p>
76  * A {@link ColumnGroup} defines a related set of columns of the same type. The exact set of
77  * columns available in a group is not necessarily known in advance. A good example of a column
78  * group is having columns for names is different languages. A column group of "Name" could define
79  * columns such as "Name:en", "Name:fr", "Name:ja" etc. which contain the various translations of
80  * the value. The first time a value is added for a column inferred by a column group, that column
81  * is created.
82  * <p>
83  * An {@link Assignment} is a useful way to encapsulate "a value in a column" and can be used to
84  * assign or unassign values to ranges, or query for the ranges which have that assignment.
85  *
86  * <h3>Builders and Unassigned Values</h3>
87  * To allow a {@code RangeTable} to fully represent data in a tabular way, it must be possible to
88  * have rows in a table for which no value is assigned in any column. Unassigned ranges can be
89  * added to a builder via the {@link Builder#add(RangeTree)} method, and these "empty rows" are
90  * preserved in the final table.
91  * <p>
92  * This is useful since it allows a {@link Change} to affect no columns, but still have an effect
93  * on the final table. It's also useful when applying rules to infer values and fill-in column
94  * defaults.
95  */
96 public final class RangeTable {
97 
98   /** Overwrite rules for modifying range categorization. */
99   public enum OverwriteMode {
100     /** Only assign ranges that were previously unassigned. */
101     NEVER,
102     /** Only assign ranges that were either unassigned or had the same value. */
103     SAME,
104     /** Always assign ranges (and unassign them from any other values in the same category). */
105     ALWAYS;
106   }
107 
108   /** A builder for an immutable range table to which changes and rules can be applied. */
109   public static final class Builder {
110     // The schema for the table to be built.
111     private final Schema schema;
112     // The map of per-column ranges.
113     private final SortedMap<Column<?>, DisjointRangeMap.Builder<?>> columnRanges;
114     // The union of all ranges added to the builder (either by assignment or range addition).
115     // This is not just a cache of all the assigned ranges, since assigning and unassigning a range
116     // will not cause it to be removed from the table altogether (even if it is no longer assigned
117     // in any column).
118     private RangeTree allRanges = RangeTree.empty();
119 
Builder(Schema schema)120     private Builder(Schema schema) {
121       this.schema = checkNotNull(schema);
122       this.columnRanges = new TreeMap<>(schema.ordering());
123     }
124 
125     // Helper to return an on-demand builder for a column.
getOrAddRangeMap(Column<T> c)126     private <T extends Comparable<T>> DisjointRangeMap.Builder<T> getOrAddRangeMap(Column<T> c) {
127       // The generic type of the builder is defined by the column it's building for, and the map
128       // just uses that column as its key. Thus, if the given column is recognized by the schema,
129       // the returned builder must be of the same type.
130       @SuppressWarnings("unchecked")
131       DisjointRangeMap.Builder<T> ranges = (DisjointRangeMap.Builder<T>)
132           columnRanges.computeIfAbsent(schema.checkColumn(c), DisjointRangeMap.Builder::new);
133       return ranges;
134     }
135 
136     // ---- Read-only API ----
137 
138     /** Returns the schema for this builder. */
getSchema()139     public Schema getSchema() {
140       return schema;
141     }
142 
143     /**
144      * Returns ranges for the given assignment. If the value is {@code empty}, then the unassigned
145      * ranges in the column are returned.
146      */
getRanges(Assignment<?> assignment)147     public RangeTree getRanges(Assignment<?> assignment) {
148       return getRanges(assignment.column(), assignment.value().orElse(null));
149     }
150 
151     /**
152      * Returns ranges for the given value in the specified column. If the value is {@code null},
153      * then the unassigned ranges in the column are returned. If the column has no values assigned,
154      * then the empty range is returned (or, if {@code value == null}, all ranges in the table).
155      */
getRanges(Column<?> column, @Nullable Object value)156     public RangeTree getRanges(Column<?> column, @Nullable Object value) {
157       getSchema().checkColumn(column);
158       DisjointRangeMap.Builder<?> rangeMap = columnRanges.get(column);
159       if (value != null) {
160         return rangeMap != null ? rangeMap.getRanges(value) : RangeTree.empty();
161       } else {
162         RangeTree all = getAllRanges();
163         return rangeMap != null ? all.subtract(rangeMap.getAssignedRanges()) : all;
164       }
165     }
166 
167     /**
168      * Returns all assigned ranges in the specified column. If the column doesn't exist in the
169      * table, the empty range is returned).
170      */
getAssignedRanges(Column<?> column)171     public RangeTree getAssignedRanges(Column<?> column) {
172       getSchema().checkColumn(column);
173       DisjointRangeMap.Builder<?> rangeMap = columnRanges.get(column);
174       return rangeMap != null ? rangeMap.getAssignedRanges() : RangeTree.empty();
175     }
176 
177     /**
178      * Returns ranges which were added to this builder, either directly via {@link #add(RangeTree)}
179      * or indirectly via assignment.
180      */
getAllRanges()181     public RangeTree getAllRanges() {
182       return allRanges;
183     }
184 
185     /** Returns all ranges present in this table which are not assigned in any column. */
getUnassignedRanges()186     public RangeTree getUnassignedRanges() {
187       RangeTree allAssigned = columnRanges.values().stream()
188           .map(DisjointRangeMap.Builder::getAssignedRanges)
189           .reduce(RangeTree.empty(), RangeTree::union);
190       return allRanges.subtract(allAssigned);
191     }
192 
193     /**
194      * Returns a snapshot of the columns in schema order (including empty columns which may have
195      * been added explicitly or exist due to values being unassigned).
196      */
getColumns()197     public ImmutableSet<Column<?>> getColumns() {
198       return columnRanges.entrySet().stream()
199           .map(Entry::getKey)
200           .collect(toImmutableSet());
201     }
202 
203     // ---- Range assignment/addition/removal ----
204 
205     /**
206      * Assigns the specified ranges to the given assignment. If the value is {@code empty}, then
207      * this has the effect of unassigning the given ranges, but does not remove them from the
208      * table. If {@code ranges} is empty, this method has no effect.
209      *
210      * @throws RangeException if assignment cannot be performed according to the overwrite mode
211      *     (no change will have occurred in the table if this occurs).
212      */
assign(Assignment<?> assignment, RangeTree ranges, OverwriteMode mode)213     public Builder assign(Assignment<?> assignment, RangeTree ranges, OverwriteMode mode) {
214       assign(assignment.column(), assignment.value().orElse(null), ranges, mode);
215       return this;
216     }
217 
218     /**
219      * Assigns the specified ranges to a value within a column (other columns unaffected). If the
220      * value is {@code null}, then this has the effect of unassigning the given ranges, but does
221      * not remove them from the table. If {@code ranges} is empty, this method has no effect.
222      *
223      * @throws RangeException if assignment cannot be performed according to the overwrite mode
224      *     (no change will have occurred in the table if this occurs).
225      */
assign( Column<?> column, @Nullable Object value, RangeTree ranges, OverwriteMode mode)226     public Builder assign(
227         Column<?> column, @Nullable Object value, RangeTree ranges, OverwriteMode mode) {
228       if (!ranges.isEmpty()) {
229         getOrAddRangeMap(column).assign(value, ranges, mode);
230         allRanges = allRanges.union(ranges);
231       }
232       return this;
233     }
234 
235     /**
236      * Unconditionally assigns all values, ranges and columns in the given table. This does not
237      * clear any already assigned ranges.
238      */
add(RangeTable table)239     public Builder add(RangeTable table) {
240       add(table.getAllRanges());
241       add(table.getColumns());
242       for (Column<?> column : table.getColumns()) {
243         for (Object value : table.getAssignedValues(column)) {
244           assign(column, value, table.getRanges(column, value), OverwriteMode.ALWAYS);
245         }
246       }
247       return this;
248     }
249 
250     /**
251      * Ensures that the given ranges exist in the table, even if no assignments are ever made in
252      * any columns.
253      */
add(RangeTree ranges)254     public Builder add(RangeTree ranges) {
255       allRanges = allRanges.union(ranges);
256       return this;
257     }
258 
259     /** Ensures that the given column exists in the table (even if there are no assignments). */
add(Column<?> column)260     public Builder add(Column<?> column) {
261       getOrAddRangeMap(checkNotNull(column));
262       return this;
263     }
264 
265     /** Ensures that the given columns exist in the table (even if there are no assignments). */
add(Collection<Column<?>> columns)266     public Builder add(Collection<Column<?>> columns) {
267       columns.forEach(this::add);
268       return this;
269     }
270 
271     /** Removes the given ranges from the table, including all assignments in all columns. */
remove(RangeTree ranges)272     public Builder remove(RangeTree ranges) {
273       for (DisjointRangeMap.Builder<?> rangeMap : columnRanges.values()) {
274         rangeMap.assign(null, ranges, OverwriteMode.ALWAYS);
275       }
276       allRanges = allRanges.subtract(ranges);
277       return this;
278     }
279 
280     /** Removes the given column from the table (has no effect if the column is not present). */
remove(Column<?> column)281     public Builder remove(Column<?> column) {
282       columnRanges.remove(checkNotNull(column));
283       return this;
284     }
285 
286     /** Removes the given columns from the table (has no effect if columns are not present). */
remove(Collection<Column<?>> columns)287     public Builder remove(Collection<Column<?>> columns) {
288       columns.forEach(this::remove);
289       return this;
290     }
291 
292     /** Copies the assigned, non-default, values of the specified column. */
copyNonDefaultValues( Column<T> column, RangeTable src, OverwriteMode mode)293     public <T extends Comparable<T>> Builder copyNonDefaultValues(
294         Column<T> column, RangeTable src, OverwriteMode mode) {
295       for (T v : src.getAssignedValues(column)) {
296         if (!column.defaultValue().equals(v)) {
297           assign(column, v, src.getRanges(column, v), mode);
298         }
299       }
300       return this;
301     }
302 
303     // ---- Applying changes ----
304 
305     /**
306      * Unconditionally applies the given change to this range table. Unlike
307      * {@link #apply(Change, OverwriteMode)}, this method cannot fail, since changes are applied
308      * unconditionally.
309      */
apply(Change change)310     public Builder apply(Change change) {
311       return apply(change, OverwriteMode.ALWAYS);
312     }
313 
314     /**
315      * Applies the given change to this range table. A change adds ranges to the table, optionally
316      * assigning them specific category values within columns.
317      *
318      * @throws RangeException if the overwrite mode prohibits the modification in this change (the
319      *    builder remains unchanged).
320      */
apply(Change change, OverwriteMode mode)321     public Builder apply(Change change, OverwriteMode mode) {
322       RangeTree ranges = change.getRanges();
323       if (!ranges.isEmpty()) {
324         // Check first that the assignments will succeed before attempting them (so as not to
325         // leave the builder in an inconsistent state if it fails).
326         if (mode != OverwriteMode.ALWAYS) {
327           for (Assignment<?> a : change.getAssignments()) {
328             getOrAddRangeMap(a.column()).checkAssign(a.value().orElse(null), ranges, mode);
329           }
330         }
331         for (Assignment<?> a : change.getAssignments()) {
332           getOrAddRangeMap(a.column()).assign(a.value().orElse(null), ranges, mode);
333         }
334         allRanges = allRanges.union(ranges);
335       }
336       return this;
337     }
338 
339     // ---- Builder related methods ----
340 
341     /** Builds the range table from the current state of the builder. */
build()342     public RangeTable build() {
343       ImmutableMap<Column<?>, DisjointRangeMap<?>> columnMap = columnRanges.entrySet().stream()
344           .map(e -> immutableEntry(e.getKey(), e.getValue().build()))
345           .sorted(comparingByKey(schema.ordering()))
346           .collect(toImmutableMap(Entry::getKey, Entry::getValue));
347       return new RangeTable(schema, columnMap, allRanges, getUnassignedRanges());
348     }
349 
350     /**
351      * Returns a new builder with the same state as the current builder. This is useful when state
352      * is being built up incrementally.
353      */
copy()354     public Builder copy() {
355       // Can be made more efficient if necessary...
356       return build().toBuilder();
357     }
358 
359     /** Builds a minimal version of this table in which empty columns are no longer present. */
buildMinimal()360     public RangeTable buildMinimal() {
361       ImmutableSet<Column<?>> empty = columnRanges.entrySet().stream()
362           .filter(e -> e.getValue().getAssignedRanges().isEmpty())
363           .map(Entry::getKey)
364           .collect(toImmutableSet());
365       remove(empty);
366       return build();
367     }
368 
369     @Override
toString()370     public final String toString() {
371       return build().toString();
372     }
373   }
374 
375   /** Returns a builder for a range table with the specified column mapping. */
builder(Schema schema)376   public static Builder builder(Schema schema) {
377     return new Builder(schema);
378   }
379 
from( Schema schema, Table<RangeSpecification, Column<?>, Optional<?>> t)380   public static RangeTable from(
381       Schema schema, Table<RangeSpecification, Column<?>, Optional<?>> t) {
382     Builder table = builder(schema);
383     for (Entry<RangeSpecification, Map<Column<?>, Optional<?>>> row : t.rowMap().entrySet()) {
384       List<Assignment<?>> assignments = row.getValue().entrySet().stream()
385           .map(e -> Assignment.ofOptional(e.getKey(), e.getValue()))
386           .collect(toImmutableList());
387       table.apply(Change.of(RangeTree.from(row.getKey()), assignments));
388     }
389     return table.build();
390   }
391 
392   // Definition of table columns.
393   private final Schema schema;
394   // Mapping to the assigned ranges for each column type.
395   private final ImmutableMap<Column<?>, DisjointRangeMap<?>> columnRanges;
396   // All ranges in this table (possibly larger than union of all assigned ranges in all columns).
397   private final RangeTree allRanges;
398   // Ranges unassigned in any column (a subset of, or equal to allRanges).
399   private final RangeTree unassigned;
400 
RangeTable( Schema schema, ImmutableMap<Column<?>, DisjointRangeMap<?>> columnRanges, RangeTree allRanges, RangeTree unassigned)401   private RangeTable(
402       Schema schema,
403       ImmutableMap<Column<?>, DisjointRangeMap<?>> columnRanges,
404       RangeTree allRanges,
405       RangeTree unassigned) {
406     this.schema = checkNotNull(schema);
407     this.columnRanges = checkNotNull(columnRanges);
408     this.allRanges = checkNotNull(allRanges);
409     this.unassigned = checkNotNull(unassigned);
410   }
411 
412   /** Returns a builder initialized to the ranges and assignements in this table. */
toBuilder()413   public Builder toBuilder() {
414     // Any mode would work here (the builder is empty) but the "always overwrite" mode is fastest.
415     return new Builder(schema).add(this);
416   }
417 
getRangeMap(Column<?> column)418   private Optional<DisjointRangeMap<?>> getRangeMap(Column<?> column) {
419     return Optional.ofNullable(columnRanges.get(schema.checkColumn(column)));
420   }
421 
getSchema()422   public Schema getSchema() {
423     return schema;
424   }
425 
getColumns()426   public ImmutableSet<Column<?>> getColumns() {
427     return columnRanges.keySet();
428   }
429 
430   /**
431    * Returns the set of values with assigned ranges in the given column.
432    *
433    * @throws IllegalArgumentException if the specified column does not exist in this table.
434    */
getAssignedValues(Column<T> column)435   public <T extends Comparable<T>> ImmutableSet<T> getAssignedValues(Column<T> column) {
436     getSchema().checkColumn(column);
437     // Safe since if the column is in the schema the values must have been checked when added.
438     @SuppressWarnings("unchecked")
439     DisjointRangeMap<T> rangeMap =
440         (DisjointRangeMap<T>) columnRanges.get(schema.checkColumn(column));
441     return rangeMap != null ? rangeMap.getAssignedValues() : ImmutableSet.of();
442   }
443 
444   /** Returns all assigned ranges in the specified column. */
getAssignedRanges(Column<?> column)445   public RangeTree getAssignedRanges(Column<?> column) {
446     return getRangeMap(column).map(DisjointRangeMap::getAssignedRanges).orElse(RangeTree.empty());
447   }
448 
449   /**
450    * Returns ranges for the given assignment. If the value is {@code empty}, then the unassigned
451    * ranges in the column are returned.
452    */
getRanges(Assignment<?> assignment)453   public RangeTree getRanges(Assignment<?> assignment) {
454     return getRanges(assignment.column(), assignment.value().orElse(null));
455   }
456 
457   /**
458    * Returns ranges for the given value in the specified column. If the value is {@code null}, then
459    * the unassigned ranges in the column are returned.
460    */
getRanges(Column<?> column, @Nullable Object value)461   public RangeTree getRanges(Column<?> column, @Nullable Object value) {
462     getSchema().checkColumn(column);
463     if (value == null) {
464       return getAllRanges().subtract(getAssignedRanges(column));
465     } else {
466       return getRangeMap(column).map(m -> m.getRanges(value)).orElse(RangeTree.empty());
467     }
468   }
469 
470   /** Returns all ranges present in this table. */
getAllRanges()471   public RangeTree getAllRanges() {
472     return allRanges;
473   }
474 
475   /** Returns all ranges present in this table which are not assigned in any column. */
getUnassignedRanges()476   public RangeTree getUnassignedRanges() {
477     return unassigned;
478   }
479 
480   /**
481    * Returns whether this table contains no ranges (assigned or unassigned). Note that not all
482    * empty tables are equal, since they may still differ by the columns they have.
483    */
isEmpty()484   public boolean isEmpty() {
485     return allRanges.isEmpty();
486   }
487 
488   /**
489    * Returns a sub-table with rows and columns limited by the specified bounds. The schema of the
490    * returned table is the same as this table.
491    */
subTable(RangeTree bounds, Set<Column<?>> columns)492   public RangeTable subTable(RangeTree bounds, Set<Column<?>> columns) {
493     // Columns must be a subset of what's allowed in this schema.
494     columns.forEach(getSchema()::checkColumn);
495     return subTable(bounds, getSchema(), columns);
496   }
497 
498   /**
499    * Returns a sub-table with rows and columns limited by the specified bounds. The schema of the
500    * returned table is the same as this table.
501    */
subTable(RangeTree bounds, Column<?> first, Column<?>... rest)502   public RangeTable subTable(RangeTree bounds, Column<?> first, Column<?>... rest) {
503     return subTable(bounds, ImmutableSet.<Column<?>>builder().add(first).add(rest).build());
504   }
505 
506   /**
507    * Returns a table with rows and columns limited by the specified bounds. The schema of the
508    * returned table is the given sub-schema.
509    */
subTable(RangeTree bounds, Schema subSchema)510   public RangeTable subTable(RangeTree bounds, Schema subSchema) {
511     checkArgument(subSchema.isSubSchemaOf(getSchema()),
512         "expected sub-schema of %s, got %s", getSchema(), subSchema);
513     return subTable(bounds, subSchema, Sets.filter(getColumns(), subSchema::isValidColumn));
514   }
515 
516   // Callers MUST validate that the given set of columns are all valid in the subSchema.
subTable(RangeTree bounds, Schema subSchema, Set<Column<?>> columns)517   private RangeTable subTable(RangeTree bounds, Schema subSchema, Set<Column<?>> columns) {
518     ImmutableMap<Column<?>, DisjointRangeMap<?>> columnMap = columns.stream()
519         // Bound the given columns which exist in this table.
520         .map(c -> immutableEntry(c, getRangeMap(c).map(r -> r.intersect(bounds))))
521         // Reject columns we didn't already have (but allow empty columns if they exist).
522         .filter(e -> e.getValue().isPresent())
523         // Sort to our schema (since the given set of columns is not required to be sorted).
524         .sorted(comparingByKey(schema.ordering()))
525         .collect(toImmutableMap(Entry::getKey, e -> e.getValue().get()));
526     return new RangeTable(
527         subSchema, columnMap, allRanges.intersect(bounds), unassigned.intersect(bounds));
528   }
529 
530   /**
531    * Returns the assigned rows of a RangeTable as a minimal list of disjoint changes, which can
532    * be applied to an empty table to recreate this table. No two changes affect the same columns
533    * in the same way and changes are ordered by the minimal values of their ranges. This is
534    * essentially the same information as returned in {@link #toImmutableTable()} but does not
535    * decompose ranges into range specifications, and it thus more amenable to compact
536    * serialization.
537    */
538   // Note that the minimal nature of the returned changes is essential for some algorithms that
539   // operate on tables and this must not be changed.
toChanges()540   public ImmutableList<Change> toChanges() {
541     Table<Column<?>, Optional<?>, RangeTree> table = HashBasedTable.create();
542     for (Column<?> c : getColumns()) {
543       for (Object v : getAssignedValues(c)) {
544         table.put(c, Optional.of(v), getRanges(c, v));
545       }
546     }
547     return toChanges(schema, table, getAllRanges());
548   }
549 
550   /**
551    * Returns a minimum set of changes based on a table of assignments (column plus value). This is
552    * not expected to be used often (since RangeTable is usually a better representation of the data
553    * but can be useful in representing things like updates and patches in which only some rows or
554    * columns are represented.
555    *  @param schema a schema for the columns in the given Table (used to determine column order).
556    * @param table the table of assignments to assigned ranges.
557    * @param allRanges the set of all ranges affected by the changes (this might include ranges not
558  *     present anywhere in the table, which correspond to empty rows).
559    */
toChanges( Schema schema, Table<Column<?>, Optional<?>, RangeTree> table, RangeTree allRanges)560   public static ImmutableList<Change> toChanges(
561       Schema schema, Table<Column<?>, Optional<?>, RangeTree> table, RangeTree allRanges) {
562     return ImmutableList.copyOf(
563         transform(toRows(table, allRanges, schema.ordering()), Row::toChange));
564   }
565 
566   /**
567    * Returns the data in this table represented as a {@link ImmutableTable}. Row keys are disjoint
568    * range specifications (in order). The returned table has the smallest number of rows necessary
569    * to represent the data in this range table. This is useful as a human readable serialized form
570    * since any digit sequence in the table is contained in a unique row.
571    */
toImmutableTable()572   public ImmutableTable<RangeSpecification, Column<?>, Optional<?>> toImmutableTable() {
573     Table<Column<?>, Optional<?>, RangeTree> table = HashBasedTable.create();
574     for (Column<?> c : getColumns()) {
575       for (Object v : getAssignedValues(c)) {
576         table.put(c, Optional.of(v), getRanges(c, v));
577       }
578       RangeTree unassigned = getAllRanges().subtract(getAssignedRanges(c));
579       if (!unassigned.isEmpty()) {
580         table.put(c, Optional.empty(), unassigned);
581       }
582     }
583     // Unique changes contain disjoint ranges, each associated with a unique combination of
584     // assignments.
585     TreeBasedTable<RangeSpecification, Column<?>, Optional<?>> out =
586         TreeBasedTable.create(comparing(RangeSpecification::min), schema.ordering());
587     for (Change c : toChanges(schema, table, getAllRanges())) {
588       List<RangeSpecification> keys = c.getRanges().asRangeSpecifications();
589       for (Assignment<?> a : c.getAssignments()) {
590         for (RangeSpecification k : keys) {
591           out.put(k, a.column(), a.value());
592         }
593       }
594     }
595     return ImmutableTable.copyOf(out);
596   }
597 
598   /**
599    * Extracts a map for a single column in this table containing the minimal prefix tree for each
600    * of the assigned values. The returned prefixes are the shortest prefixes possible for
601    * distinguishing each value in the column. This method is especially useful if you want to
602    * categorize partial digit sequences efficiently (i.e. prefix matching).
603    *
604    * <p>A minimal length can be specified to avoid creating prefixes that are "too short" for some
605    * circumstances. Note that returned prefixes are never zero length, so {@code 1} is the lowest
606    * meaningful value (although zero is still accepted to imply "no length restriction").
607    *
608    * <p>Note that for some table data, it is technically impossible to obtain perfect prefix
609    * information and in cases where overlap occurs, this method returns the shortest prefixes. This
610    * means that for some valid inputs it might be true that more than one prefix is matched. It
611    * is therefore up to the caller to determine a "best order" for testing the prefixes if this
612    * matters. See {@link PrefixTree#minimal(RangeTree, RangeTree, int)} for more information.
613    *
614    * <p>An example of an "impossible" prefix would be if "123" has value A, "1234" has value B and
615    * "12345" has value A again. In this case there is no prefix which can distinguish A and B
616    * (the calculated map would be { "123" ⟹ A, "1234" ⟹ B }). In this situation, testing for the
617    * longer prefix would help preserve as much of the original mapping as possible, but it would
618    * never be possible to correctly distinguish all inputs.
619    */
getPrefixMap( Column<T> column, int minPrefixLength)620   public <T extends Comparable<T>> ImmutableMap<T, PrefixTree> getPrefixMap(
621       Column<T> column, int minPrefixLength) {
622     ImmutableMap.Builder<T, PrefixTree> map = ImmutableMap.builder();
623     // Important: Don't just use the assigned ranges in the column, use the assigned ranges of the
624     // entire table. This ensures unassigned ranges in the column are not accidentally captured by
625     // any of the generated prefixes.
626     RangeTree allRanges = getAllRanges();
627     for (T value : getAssignedValues(column)) {
628       RangeTree include = getRanges(column, value);
629       map.put(value, PrefixTree.minimal(include, allRanges.subtract(include), minPrefixLength));
630     }
631     return map.build();
632   }
633 
634   // Constants for the simplification routine below.
635   // Use -1 for unassigned rows (these are the "overlap" ranges and they don't have an index).
636   private static final Column<Integer> INDEX =
637       Column.create(Integer.class, "Change Index", -1, Integer::parseInt);
638   private static final Schema INDEX_SCHEMA = Schema.builder().add(INDEX).build();
639 
640   /**
641    * Applies a simplification function to the rows defined by the given columns of this table. The
642    * returned table will only have (at most) the specified columns present.
643    *
644    * <p>The simplification function is used to produce ranges which satisfy some business logic
645    * criteria (such as having at most N significant digits, or merging lengths). Range
646    * simplification enables easier comparison between data sources of differing precision, and
647    * helps to reduce unnecessary complexity in generated regular expressions.
648    *
649    * <p>The simplification function should return a range that's at least as large as the input
650    * range. This is to ensure that simplification cannot unassign ranges, even accidentally. The
651    * returned range is automatically restricted to preserve disjoint ranges in the final table.
652    *
653    * <p>By passing a {@link Change} rather than just a {@link RangeTree}, the simplification
654    * function has access to the row assignments for the range it is simplifying. This allows it to
655    * select different strategies according to the values in specific columns (e.g. area code
656    * length).
657    *
658    * <p>Note that unassigned ranges in the original table will be preserved and simplified ranges
659    * will not overwrite them. This can be useful for defining "no go" ranges which should be left
660    * alone.
661    */
simplify( Function<Change, RangeTree> simplifyFn, int minPrefixLength, Column<?> first, Column<?>... rest)662   public RangeTable simplify(
663       Function<Change, RangeTree> simplifyFn,
664       int minPrefixLength,
665       Column<?> first,
666       Column<?>... rest) {
667     // Build the single column "index" table (one index for each change) and simplify its ranges.
668     // This only works because "toChanges()" produces the minimal set of changes such that each
669     // unique combination of assignments appears only once.
670     ImmutableList<Change> rows = subTable(getAllRanges(), first, rest).toChanges();
671     RangeTable simplifiedIndexTable = simplifyIndexTable(rows, simplifyFn, minPrefixLength);
672 
673     // Reconstruct the output table by assigning values from the original change set according to
674     // the indices in the simplified index table.
675     Builder simplified = RangeTable.builder(getSchema()).add(simplifiedIndexTable.getAllRanges());
676     for (int i : simplifiedIndexTable.getAssignedValues(INDEX)) {
677       RangeTree simplifiedRange = simplifiedIndexTable.getRanges(INDEX, i);
678       for (Assignment<?> a : rows.get(i).getAssignments()) {
679         simplified.assign(a, simplifiedRange, OverwriteMode.NEVER);
680       }
681     }
682     return simplified.build();
683   }
684 
685   /**
686    * Helper function to simplify an index table based on the given rows. The resulting table will
687    * have a single "index" column with simplified ranges, where the index value {@code N}
688    * references the Nth row in the given list of disjoint changes. This is a 3 stage process:
689    * <ol>
690    *   <li>Step 1: Determine which ranges can overlap with respect to set of range prefixes.
691    *   <li>Step 2: Do simplification on the non-overlapping "prefix disjoint" ranges in the table,
692    *   which are then be re-partitioned by the disjoint prefixes.
693    *   <li>Step 3: Copy over any overlapping ranges from the original table (these don't get
694    *   simplified since it's not possible to easily re-pertition them).
695    * </ol>
696    */
simplifyIndexTable( ImmutableList<Change> rows, Function<Change, RangeTree> simplifyFn, int minPrefixLength)697   private static <T extends Comparable<T>> RangeTable simplifyIndexTable(
698       ImmutableList<Change> rows, Function<Change, RangeTree> simplifyFn, int minPrefixLength) {
699     RangeTable indexTable = makeIndexTable(rows);
700 
701     // Step 1: Determine overlapping ranges from the index table, retaining minimum prefix length.
702     ImmutableMap<Integer, PrefixTree> nonDisjointPrefixes =
703         indexTable.getPrefixMap(INDEX, minPrefixLength);
704     // Don't just use the assigned ranges (we need to account for valid but unassigned ranges when
705     // determining overlaps).
706     RangeTree allRanges = indexTable.getAllRanges();
707     RangeTree overlaps = RangeTree.empty();
708     for (int n : indexTable.getAssignedValues(INDEX)) {
709       RangeTree otherRanges = allRanges.subtract(indexTable.getRanges(INDEX, n));
710       overlaps = overlaps.union(nonDisjointPrefixes.get(n).retainFrom(otherRanges));
711     }
712 
713     // Step 2: Determine the "prefix disjoint" ranges in a new table and simplify it.
714     //
715     // Before getting the new set of prefixes, add the overlapping ranges back to the table, but
716     // without assigning them to anything. This keeps the generated prefixes as long as necessary
717     // to avoid creating conflicting assignments for different values. Essentially we're trying to
718     // keep ranges "away from" any overlaps. Note however that it is still possible for simplified
719     // ranges encroach on the overlapping areas, so we must still forcibly overwrite the original
720     // overlapping values after siplification. Consider:
721     //   A = { "12x", "12xxx" }, B = { "123x" }
722     // where the simplification function just creates any "any" range for all lengths between the
723     // minimum and maximum range lengths (e.g. { "123", "45678" } ==> { "xxx", "xxxx", "xxxxx" }.
724     //
725     // The (non disjoint) prefix table is Pre(A) => { "12" }, Pre(B) => { "123" } and this
726     // captures the overlaps:
727     //   Pre(A).retainFrom(B) = { "123x" } = B
728     //   Pre(B).retainFrom(A) = { "123xx" }
729     //
730     // Since is of "B" is entirely contained by the overlap, it is not simplified, but A is
731     // simplified to:
732     //   { "xxx", "xxxx", "xxxxx" }
733     // and the re-captured by the "disjoint" prefix (which is still just "12") to:
734     //   { "12x", "12xx", "12xxx" }
735     //
736     // However now, when the original overlaps are added back at the end (in step 3) we find that
737     // both "123xx" already exists (with the same index) and "123x" exists with a different index.
738     // The resolution is to just overwrite all overlaps back into the table, since these represent
739     // the original (unsimplified) values.
740     //
741     // Thus in this case, the simplified table is:
742     //   Sim(A) = { "12x", "12[0-24-9]x", "12xxx" }, Sim(B) = { "123x" }
743     //
744     // And it is still true that: Sim(A).containsAll(A) and Sim(B).containsAll(B)
745     RangeTable prefixDisjointTable = indexTable
746         .subTable(allRanges.subtract(overlaps), INDEX)
747         .toBuilder()
748         .add(overlaps)
749         .build();
750 
751     // NOTE: Another way to do this would be to implement an "exclusive prefix" method which could
752     // be used to immediately return a set of truly "disjoint" prefixes (although this would change
753     // the algorithm's behaviour since more ranges would be considered "overlapping" than now).
754     // TODO: Experiment with an alternate "exclusive" prefix function.
755     ImmutableMap<Integer, PrefixTree> disjointPrefixes = prefixDisjointTable.getPrefixMap(INDEX, 1);
756     // Not all values from the original table need be present in the derived table (since some
757     // overlaps account for all the ranges of a value).
758     Builder simplified = RangeTable.builder(INDEX_SCHEMA);
759     for (int n : prefixDisjointTable.getAssignedValues(INDEX)) {
760       RangeTree disjointRange = prefixDisjointTable.getRanges(INDEX, n);
761       // Pass just the assignments, not the whole row (Change) because that also contains a range,
762       // which might not be the same as the disjoint range (so it could be rather confusing).
763       PrefixTree disjointPrefix = disjointPrefixes.get(n);
764       RangeTree simplifiedRange =
765           simplifyFn.apply(Change.of(disjointRange, rows.get(n).getAssignments()));
766       // Technically this check is not strictly required, but there's probably no good use-case in
767       // which you'd want to remove assignments via the simplification process.
768       checkArgument(simplifiedRange.containsAll(disjointRange),
769           "simplification should return a superset of the given range\n"
770               + "input: %s\n"
771               + "output: %s\n"
772               + "missing: %s",
773           disjointRange, simplifiedRange, disjointRange.subtract(simplifiedRange));
774       // Repartition the simplified ranges by the "disjoint" prefixes to restore most of the
775       // simplified ranges. These ranges should never overlap with each other.
776       RangeTree repartitionedRange = disjointPrefix.retainFrom(simplifiedRange);
777       simplified.assign(INDEX, n, repartitionedRange, OverwriteMode.NEVER);
778     }
779 
780     // Step 3: Copy remaining overlapping ranges from the original table back into the result.
781     // Note that we may end up overwriting values here, but that's correct since it restores
782     // original "unsimplifiable" ranges.
783     for (int n : indexTable.getAssignedValues(INDEX)) {
784       simplified.assign(
785           INDEX, n, indexTable.getRanges(INDEX, n).intersect(overlaps), OverwriteMode.ALWAYS);
786     }
787     return simplified.build();
788   }
789 
790   // Helper to make a table with a single column than references a list of disjoint changes by
791   // index (against the range of that change).
makeIndexTable(ImmutableList<Change> rows)792   private static RangeTable makeIndexTable(ImmutableList<Change> rows) {
793     Builder indexTable = RangeTable.builder(INDEX_SCHEMA);
794     for (int i = 0; i < rows.size(); i++) {
795       // Empty rows are added to the table, but not assigned an index. Their existence in the index
796       // table prevents over simplification from affecting unassigned rows of the original table.
797       if (rows.get(i).getAssignments().isEmpty()) {
798         indexTable.add(rows.get(i).getRanges());
799       } else {
800         indexTable.assign(INDEX, i, rows.get(i).getRanges(), OverwriteMode.NEVER);
801       }
802     }
803     return indexTable.build();
804   }
805 
806   @Override
equals(Object obj)807   public boolean equals(Object obj) {
808     if (!(obj instanceof RangeTable)) {
809       return false;
810     }
811     RangeTable other = (RangeTable) obj;
812     return this == other
813         || (schema.equals(other.schema)
814             && allRanges.equals(other.allRanges)
815             && columnRanges.values().asList().equals(other.columnRanges.values().asList()));
816   }
817 
818   @Override
hashCode()819   public int hashCode() {
820     // This could be memoized if it turns out to be slow.
821     return schema.hashCode() ^ columnRanges.hashCode() ^ allRanges.hashCode();
822   }
823 
824   // TODO: Prettier format for toString().
825   @Override
toString()826   public final String toString() {
827     ImmutableTable<RangeSpecification, Column<?>, Optional<?>> table = toImmutableTable();
828     return table.rowMap().entrySet().stream()
829         .map(e -> String.format("%s, %s", e.getKey(), rowToString(e.getValue())))
830         .collect(joining("\n"));
831   }
832 
rowToString(Map<Column<?>, Optional<?>> r)833   private static String rowToString(Map<Column<?>, Optional<?>> r) {
834     return r.values().stream()
835         .map(v -> v.map(Object::toString).orElse("UNSET"))
836         .collect(joining(", "));
837   }
838 
839   // Helper method to convert a table of values into a minimal set of changes. This is used to
840   // turn a single RangeTable into an ImmutableTable, but also to convert a Patch into a minimal
841   // sequence of Changes. Each returned "row" defines a range, and a unique sequence of assignments
842   // over that range (i.e. no two rows have the same assignments in). The assignments are ordered
843   // in column order within each row, and the rows are ordered by the minimum digit sequence in
844   // each range and the ranges form a disjoint covering of the ranges in the original table.
845   //
846   // See go/phonenumber-v2-data-structure for more details.
toRows( Table<Column<?>, Optional<?>, RangeTree> src, RangeTree allRanges, Comparator<Column<?>> columnOrdering)847   private static ImmutableList<Row> toRows(
848       Table<Column<?>, Optional<?>, RangeTree> src,
849       RangeTree allRanges,
850       Comparator<Column<?>> columnOrdering) {
851     // Get the non-empty columns in _reverse_ iteration order. We build up rows as a linked list
852     // structure, started from the "right hand side". This avoids a lot of copying as new columns
853     // are processed.
854     ImmutableList<Column<?>> reversedColumns = src.rowMap().entrySet().stream()
855         .filter(e -> !e.getValue().isEmpty())
856         .map(Entry::getKey)
857         .sorted(columnOrdering.reversed())
858         .collect(toImmutableList());
859     List<Row> uniqueRows = new ArrayList<>();
860     uniqueRows.add(Row.empty(allRanges));
861     for (Column<?> col : reversedColumns) {
862       // Loop backward here so that rows can be (a) removed in place and (b) added at the end.
863       for (int i = uniqueRows.size() - 1; i >= 0; i--) {
864         Row row = uniqueRows.get(i);
865         // Track the unprocessed range for each row as we extend it.
866         RangeTree remainder = row.getRanges();
867         for (Entry<Optional<?>, RangeTree> e : src.row(col).entrySet()) {
868           RangeTree overlap = e.getValue().intersect(remainder);
869           if (overlap.isEmpty()) {
870             continue;
871           }
872           // Extend the existing row by the current column value and reduce the remaining ranges.
873           uniqueRows.add(Row.of(overlap, col, e.getKey(), row));
874           remainder = remainder.subtract(overlap);
875           if (remainder.isEmpty()) {
876             // We've accounted for all of the existing row in the new column, so remove it.
877             uniqueRows.remove(i);
878             break;
879           }
880         }
881         if (!remainder.isEmpty()) {
882           // The existing row is not completely covered by the new column, so retain what's left.
883           uniqueRows.set(i, row.bound(remainder));
884         }
885       }
886     }
887     return ImmutableList.sortedCopyOf(comparing(r -> r.getRanges().first()), uniqueRows);
888   }
889 
890   /**
891    * A notional "row" with some set of assignments in a range table or table like structure. Note
892    * that a Row can represent unassignment as well as assignment, and not all rows need to contain
893    * all columns. Rows are used for representing value in a table, but also changes between tables.
894    */
895   @AutoValue
896   abstract static class Row implements Iterable<Assignment<?>> {
empty(RangeTree row)897     private static Row empty(RangeTree row) {
898       return new AutoValue_RangeTable_Row(row, null);
899     }
900 
of(RangeTree row, Column<?> col, Optional<?> val, Row next)901     private static Row of(RangeTree row, Column<?> col, Optional<?> val, Row next) {
902       checkArgument(!row.isEmpty(), "empty ranges not permitted (col=%s, val=%s)", col, val);
903       return new AutoValue_RangeTable_Row(
904           row, new AutoValue_RangeTable_Cell(Assignment.ofOptional(col, val), next.head()));
905     }
906 
getRanges()907     public abstract RangeTree getRanges();
head()908     @Nullable abstract Cell head();
909 
toChange()910     Change toChange() {
911       return Change.of(getRanges(), this);
912     }
913 
bound(RangeTree ranges)914     private Row bound(RangeTree ranges) {
915       return new AutoValue_RangeTable_Row(getRanges().intersect(ranges), head());
916     }
917 
918     @Override
iterator()919     public Iterator<Assignment<?>> iterator() {
920       return new UnmodifiableIterator<Assignment<?>>() {
921         @Nullable private Cell cur = Row.this.head();
922 
923         @Override
924         public boolean hasNext() {
925           return cur != null;
926         }
927 
928         @Override
929         public Assignment<?> next() {
930           Cell c = cur;
931           if (c == null) {
932             throw new NoSuchElementException();
933           }
934           cur = cur.next();
935           return c.assignment();
936         }
937       };
938     }
939 
940     @Override
toString()941     public final String toString() {
942       return "Row{" + getRanges() + " >> " + Iterables.toString(this) + "}";
943     }
944   }
945 
946   @AutoValue
947   abstract static class Cell {
assignment()948     abstract Assignment<?> assignment();
next()949     @Nullable abstract Cell next();
950   }
951 }
952