1 /* 2 * Copyright (C) 2017 The Libphonenumber Authors. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.google.i18n.phonenumbers.metadata.table; 17 18 import static com.google.common.base.Preconditions.checkArgument; 19 import static com.google.common.base.Preconditions.checkNotNull; 20 import static com.google.common.collect.ImmutableList.toImmutableList; 21 import static com.google.common.collect.ImmutableMap.toImmutableMap; 22 import static com.google.common.collect.ImmutableSet.toImmutableSet; 23 import static com.google.common.collect.Iterables.transform; 24 import static com.google.common.collect.Maps.immutableEntry; 25 import static java.util.Comparator.comparing; 26 import static java.util.Map.Entry.comparingByKey; 27 import static java.util.stream.Collectors.joining; 28 29 import com.google.auto.value.AutoValue; 30 import com.google.common.collect.HashBasedTable; 31 import com.google.common.collect.ImmutableList; 32 import com.google.common.collect.ImmutableMap; 33 import com.google.common.collect.ImmutableSet; 34 import com.google.common.collect.ImmutableTable; 35 import com.google.common.collect.Iterables; 36 import com.google.common.collect.Sets; 37 import com.google.common.collect.Table; 38 import com.google.common.collect.TreeBasedTable; 39 import com.google.common.collect.UnmodifiableIterator; 40 import com.google.i18n.phonenumbers.metadata.PrefixTree; 41 import com.google.i18n.phonenumbers.metadata.RangeSpecification; 42 import com.google.i18n.phonenumbers.metadata.RangeTree; 43 import java.util.ArrayList; 44 import java.util.Collection; 45 import java.util.Comparator; 46 import java.util.Iterator; 47 import java.util.List; 48 import java.util.Map; 49 import java.util.Map.Entry; 50 import java.util.NoSuchElementException; 51 import java.util.Optional; 52 import java.util.Set; 53 import java.util.SortedMap; 54 import java.util.TreeMap; 55 import java.util.function.Function; 56 import javax.annotation.Nullable; 57 58 /** 59 * A tabular representation of attributes, assigned to number ranges. 60 * <p> 61 * A {@code RangeTable} is equivalent to {@code Table<RangeSpecification, Column, Value>}, 62 * but is expressed as a mapping of {@code (Column, Value) -> RangeTree} (since {@code RangeTree} 63 * is not a good key). To keep the data structurally equivalent to its tabular form, it's important 64 * that within a column, all assigned ranges are mutually disjoint (and thus a digit sequence can 65 * have at most one value assigned in any column). 66 * 67 * <h3>Table Schemas</h3> 68 * A table requires a {@link Schema}, which defines the columns which can be present and their 69 * order. Column ordering is important since it relates to how rules are applied (see below). 70 * 71 * <h3>Columns and Column Groups</h3> 72 * A {@link Column} defines a category of values of a particular type (e.g. String, Boolean, 73 * Integer or user specified enums) and a default value. New columns can be implemented easily and 74 * can choose to limit their values to some known set. 75 * <p> 76 * A {@link ColumnGroup} defines a related set of columns of the same type. The exact set of 77 * columns available in a group is not necessarily known in advance. A good example of a column 78 * group is having columns for names is different languages. A column group of "Name" could define 79 * columns such as "Name:en", "Name:fr", "Name:ja" etc. which contain the various translations of 80 * the value. The first time a value is added for a column inferred by a column group, that column 81 * is created. 82 * <p> 83 * An {@link Assignment} is a useful way to encapsulate "a value in a column" and can be used to 84 * assign or unassign values to ranges, or query for the ranges which have that assignment. 85 * 86 * <h3>Builders and Unassigned Values</h3> 87 * To allow a {@code RangeTable} to fully represent data in a tabular way, it must be possible to 88 * have rows in a table for which no value is assigned in any column. Unassigned ranges can be 89 * added to a builder via the {@link Builder#add(RangeTree)} method, and these "empty rows" are 90 * preserved in the final table. 91 * <p> 92 * This is useful since it allows a {@link Change} to affect no columns, but still have an effect 93 * on the final table. It's also useful when applying rules to infer values and fill-in column 94 * defaults. 95 */ 96 public final class RangeTable { 97 98 /** Overwrite rules for modifying range categorization. */ 99 public enum OverwriteMode { 100 /** Only assign ranges that were previously unassigned. */ 101 NEVER, 102 /** Only assign ranges that were either unassigned or had the same value. */ 103 SAME, 104 /** Always assign ranges (and unassign them from any other values in the same category). */ 105 ALWAYS; 106 } 107 108 /** A builder for an immutable range table to which changes and rules can be applied. */ 109 public static final class Builder { 110 // The schema for the table to be built. 111 private final Schema schema; 112 // The map of per-column ranges. 113 private final SortedMap<Column<?>, DisjointRangeMap.Builder<?>> columnRanges; 114 // The union of all ranges added to the builder (either by assignment or range addition). 115 // This is not just a cache of all the assigned ranges, since assigning and unassigning a range 116 // will not cause it to be removed from the table altogether (even if it is no longer assigned 117 // in any column). 118 private RangeTree allRanges = RangeTree.empty(); 119 Builder(Schema schema)120 private Builder(Schema schema) { 121 this.schema = checkNotNull(schema); 122 this.columnRanges = new TreeMap<>(schema.ordering()); 123 } 124 125 // Helper to return an on-demand builder for a column. getOrAddRangeMap(Column<T> c)126 private <T extends Comparable<T>> DisjointRangeMap.Builder<T> getOrAddRangeMap(Column<T> c) { 127 // The generic type of the builder is defined by the column it's building for, and the map 128 // just uses that column as its key. Thus, if the given column is recognized by the schema, 129 // the returned builder must be of the same type. 130 @SuppressWarnings("unchecked") 131 DisjointRangeMap.Builder<T> ranges = (DisjointRangeMap.Builder<T>) 132 columnRanges.computeIfAbsent(schema.checkColumn(c), DisjointRangeMap.Builder::new); 133 return ranges; 134 } 135 136 // ---- Read-only API ---- 137 138 /** Returns the schema for this builder. */ getSchema()139 public Schema getSchema() { 140 return schema; 141 } 142 143 /** 144 * Returns ranges for the given assignment. If the value is {@code empty}, then the unassigned 145 * ranges in the column are returned. 146 */ getRanges(Assignment<?> assignment)147 public RangeTree getRanges(Assignment<?> assignment) { 148 return getRanges(assignment.column(), assignment.value().orElse(null)); 149 } 150 151 /** 152 * Returns ranges for the given value in the specified column. If the value is {@code null}, 153 * then the unassigned ranges in the column are returned. If the column has no values assigned, 154 * then the empty range is returned (or, if {@code value == null}, all ranges in the table). 155 */ getRanges(Column<?> column, @Nullable Object value)156 public RangeTree getRanges(Column<?> column, @Nullable Object value) { 157 getSchema().checkColumn(column); 158 DisjointRangeMap.Builder<?> rangeMap = columnRanges.get(column); 159 if (value != null) { 160 return rangeMap != null ? rangeMap.getRanges(value) : RangeTree.empty(); 161 } else { 162 RangeTree all = getAllRanges(); 163 return rangeMap != null ? all.subtract(rangeMap.getAssignedRanges()) : all; 164 } 165 } 166 167 /** 168 * Returns all assigned ranges in the specified column. If the column doesn't exist in the 169 * table, the empty range is returned). 170 */ getAssignedRanges(Column<?> column)171 public RangeTree getAssignedRanges(Column<?> column) { 172 getSchema().checkColumn(column); 173 DisjointRangeMap.Builder<?> rangeMap = columnRanges.get(column); 174 return rangeMap != null ? rangeMap.getAssignedRanges() : RangeTree.empty(); 175 } 176 177 /** 178 * Returns ranges which were added to this builder, either directly via {@link #add(RangeTree)} 179 * or indirectly via assignment. 180 */ getAllRanges()181 public RangeTree getAllRanges() { 182 return allRanges; 183 } 184 185 /** Returns all ranges present in this table which are not assigned in any column. */ getUnassignedRanges()186 public RangeTree getUnassignedRanges() { 187 RangeTree allAssigned = columnRanges.values().stream() 188 .map(DisjointRangeMap.Builder::getAssignedRanges) 189 .reduce(RangeTree.empty(), RangeTree::union); 190 return allRanges.subtract(allAssigned); 191 } 192 193 /** 194 * Returns a snapshot of the columns in schema order (including empty columns which may have 195 * been added explicitly or exist due to values being unassigned). 196 */ getColumns()197 public ImmutableSet<Column<?>> getColumns() { 198 return columnRanges.entrySet().stream() 199 .map(Entry::getKey) 200 .collect(toImmutableSet()); 201 } 202 203 // ---- Range assignment/addition/removal ---- 204 205 /** 206 * Assigns the specified ranges to the given assignment. If the value is {@code empty}, then 207 * this has the effect of unassigning the given ranges, but does not remove them from the 208 * table. If {@code ranges} is empty, this method has no effect. 209 * 210 * @throws RangeException if assignment cannot be performed according to the overwrite mode 211 * (no change will have occurred in the table if this occurs). 212 */ assign(Assignment<?> assignment, RangeTree ranges, OverwriteMode mode)213 public Builder assign(Assignment<?> assignment, RangeTree ranges, OverwriteMode mode) { 214 assign(assignment.column(), assignment.value().orElse(null), ranges, mode); 215 return this; 216 } 217 218 /** 219 * Assigns the specified ranges to a value within a column (other columns unaffected). If the 220 * value is {@code null}, then this has the effect of unassigning the given ranges, but does 221 * not remove them from the table. If {@code ranges} is empty, this method has no effect. 222 * 223 * @throws RangeException if assignment cannot be performed according to the overwrite mode 224 * (no change will have occurred in the table if this occurs). 225 */ assign( Column<?> column, @Nullable Object value, RangeTree ranges, OverwriteMode mode)226 public Builder assign( 227 Column<?> column, @Nullable Object value, RangeTree ranges, OverwriteMode mode) { 228 if (!ranges.isEmpty()) { 229 getOrAddRangeMap(column).assign(value, ranges, mode); 230 allRanges = allRanges.union(ranges); 231 } 232 return this; 233 } 234 235 /** 236 * Unconditionally assigns all values, ranges and columns in the given table. This does not 237 * clear any already assigned ranges. 238 */ add(RangeTable table)239 public Builder add(RangeTable table) { 240 add(table.getAllRanges()); 241 add(table.getColumns()); 242 for (Column<?> column : table.getColumns()) { 243 for (Object value : table.getAssignedValues(column)) { 244 assign(column, value, table.getRanges(column, value), OverwriteMode.ALWAYS); 245 } 246 } 247 return this; 248 } 249 250 /** 251 * Ensures that the given ranges exist in the table, even if no assignments are ever made in 252 * any columns. 253 */ add(RangeTree ranges)254 public Builder add(RangeTree ranges) { 255 allRanges = allRanges.union(ranges); 256 return this; 257 } 258 259 /** Ensures that the given column exists in the table (even if there are no assignments). */ add(Column<?> column)260 public Builder add(Column<?> column) { 261 getOrAddRangeMap(checkNotNull(column)); 262 return this; 263 } 264 265 /** Ensures that the given columns exist in the table (even if there are no assignments). */ add(Collection<Column<?>> columns)266 public Builder add(Collection<Column<?>> columns) { 267 columns.forEach(this::add); 268 return this; 269 } 270 271 /** Removes the given ranges from the table, including all assignments in all columns. */ remove(RangeTree ranges)272 public Builder remove(RangeTree ranges) { 273 for (DisjointRangeMap.Builder<?> rangeMap : columnRanges.values()) { 274 rangeMap.assign(null, ranges, OverwriteMode.ALWAYS); 275 } 276 allRanges = allRanges.subtract(ranges); 277 return this; 278 } 279 280 /** Removes the given column from the table (has no effect if the column is not present). */ remove(Column<?> column)281 public Builder remove(Column<?> column) { 282 columnRanges.remove(checkNotNull(column)); 283 return this; 284 } 285 286 /** Removes the given columns from the table (has no effect if columns are not present). */ remove(Collection<Column<?>> columns)287 public Builder remove(Collection<Column<?>> columns) { 288 columns.forEach(this::remove); 289 return this; 290 } 291 292 /** Copies the assigned, non-default, values of the specified column. */ copyNonDefaultValues( Column<T> column, RangeTable src, OverwriteMode mode)293 public <T extends Comparable<T>> Builder copyNonDefaultValues( 294 Column<T> column, RangeTable src, OverwriteMode mode) { 295 for (T v : src.getAssignedValues(column)) { 296 if (!column.defaultValue().equals(v)) { 297 assign(column, v, src.getRanges(column, v), mode); 298 } 299 } 300 return this; 301 } 302 303 // ---- Applying changes ---- 304 305 /** 306 * Unconditionally applies the given change to this range table. Unlike 307 * {@link #apply(Change, OverwriteMode)}, this method cannot fail, since changes are applied 308 * unconditionally. 309 */ apply(Change change)310 public Builder apply(Change change) { 311 return apply(change, OverwriteMode.ALWAYS); 312 } 313 314 /** 315 * Applies the given change to this range table. A change adds ranges to the table, optionally 316 * assigning them specific category values within columns. 317 * 318 * @throws RangeException if the overwrite mode prohibits the modification in this change (the 319 * builder remains unchanged). 320 */ apply(Change change, OverwriteMode mode)321 public Builder apply(Change change, OverwriteMode mode) { 322 RangeTree ranges = change.getRanges(); 323 if (!ranges.isEmpty()) { 324 // Check first that the assignments will succeed before attempting them (so as not to 325 // leave the builder in an inconsistent state if it fails). 326 if (mode != OverwriteMode.ALWAYS) { 327 for (Assignment<?> a : change.getAssignments()) { 328 getOrAddRangeMap(a.column()).checkAssign(a.value().orElse(null), ranges, mode); 329 } 330 } 331 for (Assignment<?> a : change.getAssignments()) { 332 getOrAddRangeMap(a.column()).assign(a.value().orElse(null), ranges, mode); 333 } 334 allRanges = allRanges.union(ranges); 335 } 336 return this; 337 } 338 339 // ---- Builder related methods ---- 340 341 /** Builds the range table from the current state of the builder. */ build()342 public RangeTable build() { 343 ImmutableMap<Column<?>, DisjointRangeMap<?>> columnMap = columnRanges.entrySet().stream() 344 .map(e -> immutableEntry(e.getKey(), e.getValue().build())) 345 .sorted(comparingByKey(schema.ordering())) 346 .collect(toImmutableMap(Entry::getKey, Entry::getValue)); 347 return new RangeTable(schema, columnMap, allRanges, getUnassignedRanges()); 348 } 349 350 /** 351 * Returns a new builder with the same state as the current builder. This is useful when state 352 * is being built up incrementally. 353 */ copy()354 public Builder copy() { 355 // Can be made more efficient if necessary... 356 return build().toBuilder(); 357 } 358 359 /** Builds a minimal version of this table in which empty columns are no longer present. */ buildMinimal()360 public RangeTable buildMinimal() { 361 ImmutableSet<Column<?>> empty = columnRanges.entrySet().stream() 362 .filter(e -> e.getValue().getAssignedRanges().isEmpty()) 363 .map(Entry::getKey) 364 .collect(toImmutableSet()); 365 remove(empty); 366 return build(); 367 } 368 369 @Override toString()370 public final String toString() { 371 return build().toString(); 372 } 373 } 374 375 /** Returns a builder for a range table with the specified column mapping. */ builder(Schema schema)376 public static Builder builder(Schema schema) { 377 return new Builder(schema); 378 } 379 from( Schema schema, Table<RangeSpecification, Column<?>, Optional<?>> t)380 public static RangeTable from( 381 Schema schema, Table<RangeSpecification, Column<?>, Optional<?>> t) { 382 Builder table = builder(schema); 383 for (Entry<RangeSpecification, Map<Column<?>, Optional<?>>> row : t.rowMap().entrySet()) { 384 List<Assignment<?>> assignments = row.getValue().entrySet().stream() 385 .map(e -> Assignment.ofOptional(e.getKey(), e.getValue())) 386 .collect(toImmutableList()); 387 table.apply(Change.of(RangeTree.from(row.getKey()), assignments)); 388 } 389 return table.build(); 390 } 391 392 // Definition of table columns. 393 private final Schema schema; 394 // Mapping to the assigned ranges for each column type. 395 private final ImmutableMap<Column<?>, DisjointRangeMap<?>> columnRanges; 396 // All ranges in this table (possibly larger than union of all assigned ranges in all columns). 397 private final RangeTree allRanges; 398 // Ranges unassigned in any column (a subset of, or equal to allRanges). 399 private final RangeTree unassigned; 400 RangeTable( Schema schema, ImmutableMap<Column<?>, DisjointRangeMap<?>> columnRanges, RangeTree allRanges, RangeTree unassigned)401 private RangeTable( 402 Schema schema, 403 ImmutableMap<Column<?>, DisjointRangeMap<?>> columnRanges, 404 RangeTree allRanges, 405 RangeTree unassigned) { 406 this.schema = checkNotNull(schema); 407 this.columnRanges = checkNotNull(columnRanges); 408 this.allRanges = checkNotNull(allRanges); 409 this.unassigned = checkNotNull(unassigned); 410 } 411 412 /** Returns a builder initialized to the ranges and assignements in this table. */ toBuilder()413 public Builder toBuilder() { 414 // Any mode would work here (the builder is empty) but the "always overwrite" mode is fastest. 415 return new Builder(schema).add(this); 416 } 417 getRangeMap(Column<?> column)418 private Optional<DisjointRangeMap<?>> getRangeMap(Column<?> column) { 419 return Optional.ofNullable(columnRanges.get(schema.checkColumn(column))); 420 } 421 getSchema()422 public Schema getSchema() { 423 return schema; 424 } 425 getColumns()426 public ImmutableSet<Column<?>> getColumns() { 427 return columnRanges.keySet(); 428 } 429 430 /** 431 * Returns the set of values with assigned ranges in the given column. 432 * 433 * @throws IllegalArgumentException if the specified column does not exist in this table. 434 */ getAssignedValues(Column<T> column)435 public <T extends Comparable<T>> ImmutableSet<T> getAssignedValues(Column<T> column) { 436 getSchema().checkColumn(column); 437 // Safe since if the column is in the schema the values must have been checked when added. 438 @SuppressWarnings("unchecked") 439 DisjointRangeMap<T> rangeMap = 440 (DisjointRangeMap<T>) columnRanges.get(schema.checkColumn(column)); 441 return rangeMap != null ? rangeMap.getAssignedValues() : ImmutableSet.of(); 442 } 443 444 /** Returns all assigned ranges in the specified column. */ getAssignedRanges(Column<?> column)445 public RangeTree getAssignedRanges(Column<?> column) { 446 return getRangeMap(column).map(DisjointRangeMap::getAssignedRanges).orElse(RangeTree.empty()); 447 } 448 449 /** 450 * Returns ranges for the given assignment. If the value is {@code empty}, then the unassigned 451 * ranges in the column are returned. 452 */ getRanges(Assignment<?> assignment)453 public RangeTree getRanges(Assignment<?> assignment) { 454 return getRanges(assignment.column(), assignment.value().orElse(null)); 455 } 456 457 /** 458 * Returns ranges for the given value in the specified column. If the value is {@code null}, then 459 * the unassigned ranges in the column are returned. 460 */ getRanges(Column<?> column, @Nullable Object value)461 public RangeTree getRanges(Column<?> column, @Nullable Object value) { 462 getSchema().checkColumn(column); 463 if (value == null) { 464 return getAllRanges().subtract(getAssignedRanges(column)); 465 } else { 466 return getRangeMap(column).map(m -> m.getRanges(value)).orElse(RangeTree.empty()); 467 } 468 } 469 470 /** Returns all ranges present in this table. */ getAllRanges()471 public RangeTree getAllRanges() { 472 return allRanges; 473 } 474 475 /** Returns all ranges present in this table which are not assigned in any column. */ getUnassignedRanges()476 public RangeTree getUnassignedRanges() { 477 return unassigned; 478 } 479 480 /** 481 * Returns whether this table contains no ranges (assigned or unassigned). Note that not all 482 * empty tables are equal, since they may still differ by the columns they have. 483 */ isEmpty()484 public boolean isEmpty() { 485 return allRanges.isEmpty(); 486 } 487 488 /** 489 * Returns a sub-table with rows and columns limited by the specified bounds. The schema of the 490 * returned table is the same as this table. 491 */ subTable(RangeTree bounds, Set<Column<?>> columns)492 public RangeTable subTable(RangeTree bounds, Set<Column<?>> columns) { 493 // Columns must be a subset of what's allowed in this schema. 494 columns.forEach(getSchema()::checkColumn); 495 return subTable(bounds, getSchema(), columns); 496 } 497 498 /** 499 * Returns a sub-table with rows and columns limited by the specified bounds. The schema of the 500 * returned table is the same as this table. 501 */ subTable(RangeTree bounds, Column<?> first, Column<?>... rest)502 public RangeTable subTable(RangeTree bounds, Column<?> first, Column<?>... rest) { 503 return subTable(bounds, ImmutableSet.<Column<?>>builder().add(first).add(rest).build()); 504 } 505 506 /** 507 * Returns a table with rows and columns limited by the specified bounds. The schema of the 508 * returned table is the given sub-schema. 509 */ subTable(RangeTree bounds, Schema subSchema)510 public RangeTable subTable(RangeTree bounds, Schema subSchema) { 511 checkArgument(subSchema.isSubSchemaOf(getSchema()), 512 "expected sub-schema of %s, got %s", getSchema(), subSchema); 513 return subTable(bounds, subSchema, Sets.filter(getColumns(), subSchema::isValidColumn)); 514 } 515 516 // Callers MUST validate that the given set of columns are all valid in the subSchema. subTable(RangeTree bounds, Schema subSchema, Set<Column<?>> columns)517 private RangeTable subTable(RangeTree bounds, Schema subSchema, Set<Column<?>> columns) { 518 ImmutableMap<Column<?>, DisjointRangeMap<?>> columnMap = columns.stream() 519 // Bound the given columns which exist in this table. 520 .map(c -> immutableEntry(c, getRangeMap(c).map(r -> r.intersect(bounds)))) 521 // Reject columns we didn't already have (but allow empty columns if they exist). 522 .filter(e -> e.getValue().isPresent()) 523 // Sort to our schema (since the given set of columns is not required to be sorted). 524 .sorted(comparingByKey(schema.ordering())) 525 .collect(toImmutableMap(Entry::getKey, e -> e.getValue().get())); 526 return new RangeTable( 527 subSchema, columnMap, allRanges.intersect(bounds), unassigned.intersect(bounds)); 528 } 529 530 /** 531 * Returns the assigned rows of a RangeTable as a minimal list of disjoint changes, which can 532 * be applied to an empty table to recreate this table. No two changes affect the same columns 533 * in the same way and changes are ordered by the minimal values of their ranges. This is 534 * essentially the same information as returned in {@link #toImmutableTable()} but does not 535 * decompose ranges into range specifications, and it thus more amenable to compact 536 * serialization. 537 */ 538 // Note that the minimal nature of the returned changes is essential for some algorithms that 539 // operate on tables and this must not be changed. toChanges()540 public ImmutableList<Change> toChanges() { 541 Table<Column<?>, Optional<?>, RangeTree> table = HashBasedTable.create(); 542 for (Column<?> c : getColumns()) { 543 for (Object v : getAssignedValues(c)) { 544 table.put(c, Optional.of(v), getRanges(c, v)); 545 } 546 } 547 return toChanges(schema, table, getAllRanges()); 548 } 549 550 /** 551 * Returns a minimum set of changes based on a table of assignments (column plus value). This is 552 * not expected to be used often (since RangeTable is usually a better representation of the data 553 * but can be useful in representing things like updates and patches in which only some rows or 554 * columns are represented. 555 * @param schema a schema for the columns in the given Table (used to determine column order). 556 * @param table the table of assignments to assigned ranges. 557 * @param allRanges the set of all ranges affected by the changes (this might include ranges not 558 * present anywhere in the table, which correspond to empty rows). 559 */ toChanges( Schema schema, Table<Column<?>, Optional<?>, RangeTree> table, RangeTree allRanges)560 public static ImmutableList<Change> toChanges( 561 Schema schema, Table<Column<?>, Optional<?>, RangeTree> table, RangeTree allRanges) { 562 return ImmutableList.copyOf( 563 transform(toRows(table, allRanges, schema.ordering()), Row::toChange)); 564 } 565 566 /** 567 * Returns the data in this table represented as a {@link ImmutableTable}. Row keys are disjoint 568 * range specifications (in order). The returned table has the smallest number of rows necessary 569 * to represent the data in this range table. This is useful as a human readable serialized form 570 * since any digit sequence in the table is contained in a unique row. 571 */ toImmutableTable()572 public ImmutableTable<RangeSpecification, Column<?>, Optional<?>> toImmutableTable() { 573 Table<Column<?>, Optional<?>, RangeTree> table = HashBasedTable.create(); 574 for (Column<?> c : getColumns()) { 575 for (Object v : getAssignedValues(c)) { 576 table.put(c, Optional.of(v), getRanges(c, v)); 577 } 578 RangeTree unassigned = getAllRanges().subtract(getAssignedRanges(c)); 579 if (!unassigned.isEmpty()) { 580 table.put(c, Optional.empty(), unassigned); 581 } 582 } 583 // Unique changes contain disjoint ranges, each associated with a unique combination of 584 // assignments. 585 TreeBasedTable<RangeSpecification, Column<?>, Optional<?>> out = 586 TreeBasedTable.create(comparing(RangeSpecification::min), schema.ordering()); 587 for (Change c : toChanges(schema, table, getAllRanges())) { 588 List<RangeSpecification> keys = c.getRanges().asRangeSpecifications(); 589 for (Assignment<?> a : c.getAssignments()) { 590 for (RangeSpecification k : keys) { 591 out.put(k, a.column(), a.value()); 592 } 593 } 594 } 595 return ImmutableTable.copyOf(out); 596 } 597 598 /** 599 * Extracts a map for a single column in this table containing the minimal prefix tree for each 600 * of the assigned values. The returned prefixes are the shortest prefixes possible for 601 * distinguishing each value in the column. This method is especially useful if you want to 602 * categorize partial digit sequences efficiently (i.e. prefix matching). 603 * 604 * <p>A minimal length can be specified to avoid creating prefixes that are "too short" for some 605 * circumstances. Note that returned prefixes are never zero length, so {@code 1} is the lowest 606 * meaningful value (although zero is still accepted to imply "no length restriction"). 607 * 608 * <p>Note that for some table data, it is technically impossible to obtain perfect prefix 609 * information and in cases where overlap occurs, this method returns the shortest prefixes. This 610 * means that for some valid inputs it might be true that more than one prefix is matched. It 611 * is therefore up to the caller to determine a "best order" for testing the prefixes if this 612 * matters. See {@link PrefixTree#minimal(RangeTree, RangeTree, int)} for more information. 613 * 614 * <p>An example of an "impossible" prefix would be if "123" has value A, "1234" has value B and 615 * "12345" has value A again. In this case there is no prefix which can distinguish A and B 616 * (the calculated map would be { "123" ⟹ A, "1234" ⟹ B }). In this situation, testing for the 617 * longer prefix would help preserve as much of the original mapping as possible, but it would 618 * never be possible to correctly distinguish all inputs. 619 */ getPrefixMap( Column<T> column, int minPrefixLength)620 public <T extends Comparable<T>> ImmutableMap<T, PrefixTree> getPrefixMap( 621 Column<T> column, int minPrefixLength) { 622 ImmutableMap.Builder<T, PrefixTree> map = ImmutableMap.builder(); 623 // Important: Don't just use the assigned ranges in the column, use the assigned ranges of the 624 // entire table. This ensures unassigned ranges in the column are not accidentally captured by 625 // any of the generated prefixes. 626 RangeTree allRanges = getAllRanges(); 627 for (T value : getAssignedValues(column)) { 628 RangeTree include = getRanges(column, value); 629 map.put(value, PrefixTree.minimal(include, allRanges.subtract(include), minPrefixLength)); 630 } 631 return map.build(); 632 } 633 634 // Constants for the simplification routine below. 635 // Use -1 for unassigned rows (these are the "overlap" ranges and they don't have an index). 636 private static final Column<Integer> INDEX = 637 Column.create(Integer.class, "Change Index", -1, Integer::parseInt); 638 private static final Schema INDEX_SCHEMA = Schema.builder().add(INDEX).build(); 639 640 /** 641 * Applies a simplification function to the rows defined by the given columns of this table. The 642 * returned table will only have (at most) the specified columns present. 643 * 644 * <p>The simplification function is used to produce ranges which satisfy some business logic 645 * criteria (such as having at most N significant digits, or merging lengths). Range 646 * simplification enables easier comparison between data sources of differing precision, and 647 * helps to reduce unnecessary complexity in generated regular expressions. 648 * 649 * <p>The simplification function should return a range that's at least as large as the input 650 * range. This is to ensure that simplification cannot unassign ranges, even accidentally. The 651 * returned range is automatically restricted to preserve disjoint ranges in the final table. 652 * 653 * <p>By passing a {@link Change} rather than just a {@link RangeTree}, the simplification 654 * function has access to the row assignments for the range it is simplifying. This allows it to 655 * select different strategies according to the values in specific columns (e.g. area code 656 * length). 657 * 658 * <p>Note that unassigned ranges in the original table will be preserved and simplified ranges 659 * will not overwrite them. This can be useful for defining "no go" ranges which should be left 660 * alone. 661 */ simplify( Function<Change, RangeTree> simplifyFn, int minPrefixLength, Column<?> first, Column<?>... rest)662 public RangeTable simplify( 663 Function<Change, RangeTree> simplifyFn, 664 int minPrefixLength, 665 Column<?> first, 666 Column<?>... rest) { 667 // Build the single column "index" table (one index for each change) and simplify its ranges. 668 // This only works because "toChanges()" produces the minimal set of changes such that each 669 // unique combination of assignments appears only once. 670 ImmutableList<Change> rows = subTable(getAllRanges(), first, rest).toChanges(); 671 RangeTable simplifiedIndexTable = simplifyIndexTable(rows, simplifyFn, minPrefixLength); 672 673 // Reconstruct the output table by assigning values from the original change set according to 674 // the indices in the simplified index table. 675 Builder simplified = RangeTable.builder(getSchema()).add(simplifiedIndexTable.getAllRanges()); 676 for (int i : simplifiedIndexTable.getAssignedValues(INDEX)) { 677 RangeTree simplifiedRange = simplifiedIndexTable.getRanges(INDEX, i); 678 for (Assignment<?> a : rows.get(i).getAssignments()) { 679 simplified.assign(a, simplifiedRange, OverwriteMode.NEVER); 680 } 681 } 682 return simplified.build(); 683 } 684 685 /** 686 * Helper function to simplify an index table based on the given rows. The resulting table will 687 * have a single "index" column with simplified ranges, where the index value {@code N} 688 * references the Nth row in the given list of disjoint changes. This is a 3 stage process: 689 * <ol> 690 * <li>Step 1: Determine which ranges can overlap with respect to set of range prefixes. 691 * <li>Step 2: Do simplification on the non-overlapping "prefix disjoint" ranges in the table, 692 * which are then be re-partitioned by the disjoint prefixes. 693 * <li>Step 3: Copy over any overlapping ranges from the original table (these don't get 694 * simplified since it's not possible to easily re-pertition them). 695 * </ol> 696 */ simplifyIndexTable( ImmutableList<Change> rows, Function<Change, RangeTree> simplifyFn, int minPrefixLength)697 private static <T extends Comparable<T>> RangeTable simplifyIndexTable( 698 ImmutableList<Change> rows, Function<Change, RangeTree> simplifyFn, int minPrefixLength) { 699 RangeTable indexTable = makeIndexTable(rows); 700 701 // Step 1: Determine overlapping ranges from the index table, retaining minimum prefix length. 702 ImmutableMap<Integer, PrefixTree> nonDisjointPrefixes = 703 indexTable.getPrefixMap(INDEX, minPrefixLength); 704 // Don't just use the assigned ranges (we need to account for valid but unassigned ranges when 705 // determining overlaps). 706 RangeTree allRanges = indexTable.getAllRanges(); 707 RangeTree overlaps = RangeTree.empty(); 708 for (int n : indexTable.getAssignedValues(INDEX)) { 709 RangeTree otherRanges = allRanges.subtract(indexTable.getRanges(INDEX, n)); 710 overlaps = overlaps.union(nonDisjointPrefixes.get(n).retainFrom(otherRanges)); 711 } 712 713 // Step 2: Determine the "prefix disjoint" ranges in a new table and simplify it. 714 // 715 // Before getting the new set of prefixes, add the overlapping ranges back to the table, but 716 // without assigning them to anything. This keeps the generated prefixes as long as necessary 717 // to avoid creating conflicting assignments for different values. Essentially we're trying to 718 // keep ranges "away from" any overlaps. Note however that it is still possible for simplified 719 // ranges encroach on the overlapping areas, so we must still forcibly overwrite the original 720 // overlapping values after siplification. Consider: 721 // A = { "12x", "12xxx" }, B = { "123x" } 722 // where the simplification function just creates any "any" range for all lengths between the 723 // minimum and maximum range lengths (e.g. { "123", "45678" } ==> { "xxx", "xxxx", "xxxxx" }. 724 // 725 // The (non disjoint) prefix table is Pre(A) => { "12" }, Pre(B) => { "123" } and this 726 // captures the overlaps: 727 // Pre(A).retainFrom(B) = { "123x" } = B 728 // Pre(B).retainFrom(A) = { "123xx" } 729 // 730 // Since is of "B" is entirely contained by the overlap, it is not simplified, but A is 731 // simplified to: 732 // { "xxx", "xxxx", "xxxxx" } 733 // and the re-captured by the "disjoint" prefix (which is still just "12") to: 734 // { "12x", "12xx", "12xxx" } 735 // 736 // However now, when the original overlaps are added back at the end (in step 3) we find that 737 // both "123xx" already exists (with the same index) and "123x" exists with a different index. 738 // The resolution is to just overwrite all overlaps back into the table, since these represent 739 // the original (unsimplified) values. 740 // 741 // Thus in this case, the simplified table is: 742 // Sim(A) = { "12x", "12[0-24-9]x", "12xxx" }, Sim(B) = { "123x" } 743 // 744 // And it is still true that: Sim(A).containsAll(A) and Sim(B).containsAll(B) 745 RangeTable prefixDisjointTable = indexTable 746 .subTable(allRanges.subtract(overlaps), INDEX) 747 .toBuilder() 748 .add(overlaps) 749 .build(); 750 751 // NOTE: Another way to do this would be to implement an "exclusive prefix" method which could 752 // be used to immediately return a set of truly "disjoint" prefixes (although this would change 753 // the algorithm's behaviour since more ranges would be considered "overlapping" than now). 754 // TODO: Experiment with an alternate "exclusive" prefix function. 755 ImmutableMap<Integer, PrefixTree> disjointPrefixes = prefixDisjointTable.getPrefixMap(INDEX, 1); 756 // Not all values from the original table need be present in the derived table (since some 757 // overlaps account for all the ranges of a value). 758 Builder simplified = RangeTable.builder(INDEX_SCHEMA); 759 for (int n : prefixDisjointTable.getAssignedValues(INDEX)) { 760 RangeTree disjointRange = prefixDisjointTable.getRanges(INDEX, n); 761 // Pass just the assignments, not the whole row (Change) because that also contains a range, 762 // which might not be the same as the disjoint range (so it could be rather confusing). 763 PrefixTree disjointPrefix = disjointPrefixes.get(n); 764 RangeTree simplifiedRange = 765 simplifyFn.apply(Change.of(disjointRange, rows.get(n).getAssignments())); 766 // Technically this check is not strictly required, but there's probably no good use-case in 767 // which you'd want to remove assignments via the simplification process. 768 checkArgument(simplifiedRange.containsAll(disjointRange), 769 "simplification should return a superset of the given range\n" 770 + "input: %s\n" 771 + "output: %s\n" 772 + "missing: %s", 773 disjointRange, simplifiedRange, disjointRange.subtract(simplifiedRange)); 774 // Repartition the simplified ranges by the "disjoint" prefixes to restore most of the 775 // simplified ranges. These ranges should never overlap with each other. 776 RangeTree repartitionedRange = disjointPrefix.retainFrom(simplifiedRange); 777 simplified.assign(INDEX, n, repartitionedRange, OverwriteMode.NEVER); 778 } 779 780 // Step 3: Copy remaining overlapping ranges from the original table back into the result. 781 // Note that we may end up overwriting values here, but that's correct since it restores 782 // original "unsimplifiable" ranges. 783 for (int n : indexTable.getAssignedValues(INDEX)) { 784 simplified.assign( 785 INDEX, n, indexTable.getRanges(INDEX, n).intersect(overlaps), OverwriteMode.ALWAYS); 786 } 787 return simplified.build(); 788 } 789 790 // Helper to make a table with a single column than references a list of disjoint changes by 791 // index (against the range of that change). makeIndexTable(ImmutableList<Change> rows)792 private static RangeTable makeIndexTable(ImmutableList<Change> rows) { 793 Builder indexTable = RangeTable.builder(INDEX_SCHEMA); 794 for (int i = 0; i < rows.size(); i++) { 795 // Empty rows are added to the table, but not assigned an index. Their existence in the index 796 // table prevents over simplification from affecting unassigned rows of the original table. 797 if (rows.get(i).getAssignments().isEmpty()) { 798 indexTable.add(rows.get(i).getRanges()); 799 } else { 800 indexTable.assign(INDEX, i, rows.get(i).getRanges(), OverwriteMode.NEVER); 801 } 802 } 803 return indexTable.build(); 804 } 805 806 @Override equals(Object obj)807 public boolean equals(Object obj) { 808 if (!(obj instanceof RangeTable)) { 809 return false; 810 } 811 RangeTable other = (RangeTable) obj; 812 return this == other 813 || (schema.equals(other.schema) 814 && allRanges.equals(other.allRanges) 815 && columnRanges.values().asList().equals(other.columnRanges.values().asList())); 816 } 817 818 @Override hashCode()819 public int hashCode() { 820 // This could be memoized if it turns out to be slow. 821 return schema.hashCode() ^ columnRanges.hashCode() ^ allRanges.hashCode(); 822 } 823 824 // TODO: Prettier format for toString(). 825 @Override toString()826 public final String toString() { 827 ImmutableTable<RangeSpecification, Column<?>, Optional<?>> table = toImmutableTable(); 828 return table.rowMap().entrySet().stream() 829 .map(e -> String.format("%s, %s", e.getKey(), rowToString(e.getValue()))) 830 .collect(joining("\n")); 831 } 832 rowToString(Map<Column<?>, Optional<?>> r)833 private static String rowToString(Map<Column<?>, Optional<?>> r) { 834 return r.values().stream() 835 .map(v -> v.map(Object::toString).orElse("UNSET")) 836 .collect(joining(", ")); 837 } 838 839 // Helper method to convert a table of values into a minimal set of changes. This is used to 840 // turn a single RangeTable into an ImmutableTable, but also to convert a Patch into a minimal 841 // sequence of Changes. Each returned "row" defines a range, and a unique sequence of assignments 842 // over that range (i.e. no two rows have the same assignments in). The assignments are ordered 843 // in column order within each row, and the rows are ordered by the minimum digit sequence in 844 // each range and the ranges form a disjoint covering of the ranges in the original table. 845 // 846 // See go/phonenumber-v2-data-structure for more details. toRows( Table<Column<?>, Optional<?>, RangeTree> src, RangeTree allRanges, Comparator<Column<?>> columnOrdering)847 private static ImmutableList<Row> toRows( 848 Table<Column<?>, Optional<?>, RangeTree> src, 849 RangeTree allRanges, 850 Comparator<Column<?>> columnOrdering) { 851 // Get the non-empty columns in _reverse_ iteration order. We build up rows as a linked list 852 // structure, started from the "right hand side". This avoids a lot of copying as new columns 853 // are processed. 854 ImmutableList<Column<?>> reversedColumns = src.rowMap().entrySet().stream() 855 .filter(e -> !e.getValue().isEmpty()) 856 .map(Entry::getKey) 857 .sorted(columnOrdering.reversed()) 858 .collect(toImmutableList()); 859 List<Row> uniqueRows = new ArrayList<>(); 860 uniqueRows.add(Row.empty(allRanges)); 861 for (Column<?> col : reversedColumns) { 862 // Loop backward here so that rows can be (a) removed in place and (b) added at the end. 863 for (int i = uniqueRows.size() - 1; i >= 0; i--) { 864 Row row = uniqueRows.get(i); 865 // Track the unprocessed range for each row as we extend it. 866 RangeTree remainder = row.getRanges(); 867 for (Entry<Optional<?>, RangeTree> e : src.row(col).entrySet()) { 868 RangeTree overlap = e.getValue().intersect(remainder); 869 if (overlap.isEmpty()) { 870 continue; 871 } 872 // Extend the existing row by the current column value and reduce the remaining ranges. 873 uniqueRows.add(Row.of(overlap, col, e.getKey(), row)); 874 remainder = remainder.subtract(overlap); 875 if (remainder.isEmpty()) { 876 // We've accounted for all of the existing row in the new column, so remove it. 877 uniqueRows.remove(i); 878 break; 879 } 880 } 881 if (!remainder.isEmpty()) { 882 // The existing row is not completely covered by the new column, so retain what's left. 883 uniqueRows.set(i, row.bound(remainder)); 884 } 885 } 886 } 887 return ImmutableList.sortedCopyOf(comparing(r -> r.getRanges().first()), uniqueRows); 888 } 889 890 /** 891 * A notional "row" with some set of assignments in a range table or table like structure. Note 892 * that a Row can represent unassignment as well as assignment, and not all rows need to contain 893 * all columns. Rows are used for representing value in a table, but also changes between tables. 894 */ 895 @AutoValue 896 abstract static class Row implements Iterable<Assignment<?>> { empty(RangeTree row)897 private static Row empty(RangeTree row) { 898 return new AutoValue_RangeTable_Row(row, null); 899 } 900 of(RangeTree row, Column<?> col, Optional<?> val, Row next)901 private static Row of(RangeTree row, Column<?> col, Optional<?> val, Row next) { 902 checkArgument(!row.isEmpty(), "empty ranges not permitted (col=%s, val=%s)", col, val); 903 return new AutoValue_RangeTable_Row( 904 row, new AutoValue_RangeTable_Cell(Assignment.ofOptional(col, val), next.head())); 905 } 906 getRanges()907 public abstract RangeTree getRanges(); head()908 @Nullable abstract Cell head(); 909 toChange()910 Change toChange() { 911 return Change.of(getRanges(), this); 912 } 913 bound(RangeTree ranges)914 private Row bound(RangeTree ranges) { 915 return new AutoValue_RangeTable_Row(getRanges().intersect(ranges), head()); 916 } 917 918 @Override iterator()919 public Iterator<Assignment<?>> iterator() { 920 return new UnmodifiableIterator<Assignment<?>>() { 921 @Nullable private Cell cur = Row.this.head(); 922 923 @Override 924 public boolean hasNext() { 925 return cur != null; 926 } 927 928 @Override 929 public Assignment<?> next() { 930 Cell c = cur; 931 if (c == null) { 932 throw new NoSuchElementException(); 933 } 934 cur = cur.next(); 935 return c.assignment(); 936 } 937 }; 938 } 939 940 @Override toString()941 public final String toString() { 942 return "Row{" + getRanges() + " >> " + Iterables.toString(this) + "}"; 943 } 944 } 945 946 @AutoValue 947 abstract static class Cell { assignment()948 abstract Assignment<?> assignment(); next()949 @Nullable abstract Cell next(); 950 } 951 } 952