1# Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Lookup table operations.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21from tensorflow.python.eager import context 22from tensorflow.python.framework import dtypes 23from tensorflow.python.framework import ops 24from tensorflow.python.ops import gen_lookup_ops 25from tensorflow.python.ops import lookup_ops 26# pylint: disable=unused-import 27from tensorflow.python.ops.lookup_ops import FastHashSpec 28from tensorflow.python.ops.lookup_ops import HasherSpec 29from tensorflow.python.ops.lookup_ops import IdTableWithHashBuckets 30from tensorflow.python.ops.lookup_ops import index_table_from_file 31from tensorflow.python.ops.lookup_ops import index_to_string_table_from_file 32from tensorflow.python.ops.lookup_ops import InitializableLookupTableBase 33from tensorflow.python.ops.lookup_ops import InitializableLookupTableBaseV1 34from tensorflow.python.ops.lookup_ops import KeyValueTensorInitializer 35from tensorflow.python.ops.lookup_ops import LookupInterface 36from tensorflow.python.ops.lookup_ops import StrongHashSpec 37from tensorflow.python.ops.lookup_ops import TableInitializerBase 38from tensorflow.python.ops.lookup_ops import TextFileIdTableInitializer 39from tensorflow.python.ops.lookup_ops import TextFileIndex 40from tensorflow.python.ops.lookup_ops import TextFileInitializer 41from tensorflow.python.ops.lookup_ops import TextFileStringTableInitializer 42# pylint: enable=unused-import 43from tensorflow.python.util.deprecation import deprecated 44 45 46@deprecated("2017-04-10", "Use `index_table_from_file`.") 47def string_to_index_table_from_file(vocabulary_file=None, 48 num_oov_buckets=0, 49 vocab_size=None, 50 default_value=-1, 51 hasher_spec=FastHashSpec, 52 name=None): 53 return index_table_from_file( 54 vocabulary_file, num_oov_buckets, vocab_size, default_value, hasher_spec, 55 key_dtype=dtypes.string, name=name) 56 57 58@deprecated("2017-04-10", "Use `index_table_from_tensor`.") 59def string_to_index_table_from_tensor(mapping, 60 num_oov_buckets=0, 61 default_value=-1, 62 hasher_spec=FastHashSpec, 63 name=None): 64 with ops.name_scope(name, "string_to_index") as scope: 65 mapping = ops.convert_to_tensor(mapping) 66 if dtypes.string != mapping.dtype.base_dtype: 67 raise ValueError("string_to_index_table_from_tensor requires string.") 68 return index_table_from_tensor( 69 mapping, num_oov_buckets, default_value, hasher_spec, name=scope) 70 71 72def index_table_from_tensor(mapping, 73 num_oov_buckets=0, 74 default_value=-1, 75 hasher_spec=FastHashSpec, 76 dtype=dtypes.string, 77 name=None): 78 """Returns a lookup table that converts a string tensor into int64 IDs. 79 80 This operation constructs a lookup table to convert tensor of strings into 81 int64 IDs. The mapping can be initialized from a string `mapping` 1-D tensor 82 where each element is a key and corresponding index within the tensor is the 83 value. 84 85 Any lookup of an out-of-vocabulary token will return a bucket ID based on its 86 hash if `num_oov_buckets` is greater than zero. Otherwise it is assigned the 87 `default_value`. 88 The bucket ID range is `[mapping size, mapping size + num_oov_buckets - 1]`. 89 90 The underlying table must be initialized by calling 91 `session.run(tf.tables_initializer)` or `session.run(table.init)` once. 92 93 Elements in `mapping` cannot have duplicates, otherwise when executing the 94 table initializer op, it will throw a `FailedPreconditionError`. 95 96 Sample Usages: 97 98 ```python 99 mapping_strings = tf.constant(["emerson", "lake", "palmer"]) 100 table = tf.contrib.lookup.index_table_from_tensor( 101 mapping=mapping_strings, num_oov_buckets=1, default_value=-1) 102 features = tf.constant(["emerson", "lake", "and", "palmer"]) 103 ids = table.lookup(features) 104 ... 105 tf.tables_initializer().run() 106 107 ids.eval() ==> [0, 1, 3, 2] 108 ``` 109 110 Args: 111 mapping: A 1-D `Tensor` that specifies the mapping of keys to indices. The 112 type of this object must be castable to `dtype`. 113 num_oov_buckets: The number of out-of-vocabulary buckets. 114 default_value: The value to use for out-of-vocabulary feature values. 115 Defaults to -1. 116 hasher_spec: A `HasherSpec` to specify the hash function to use for 117 assignment of out-of-vocabulary buckets. 118 dtype: The type of values passed to `lookup`. Only string and integers are 119 supported. 120 name: A name for this op (optional). 121 122 Returns: 123 The lookup table to map an input `Tensor` to index `int64` `Tensor`. 124 125 Raises: 126 ValueError: If `mapping` is invalid. 127 ValueError: If `num_oov_buckets` is negative. 128 """ 129 if mapping is None: 130 raise ValueError("mapping must be specified.") 131 return lookup_ops.index_table_from_tensor( 132 vocabulary_list=mapping, 133 num_oov_buckets=num_oov_buckets, 134 default_value=default_value, 135 hasher_spec=hasher_spec, 136 dtype=dtype, 137 name=name) 138 139 140@deprecated( 141 "2017-01-07", "This op will be removed after the deprecation date. " 142 "Please switch to index_table_from_tensor and call the lookup " 143 "method of the returned table.") 144def string_to_index(tensor, mapping, default_value=-1, name=None): 145 """Maps `tensor` of strings into `int64` indices based on `mapping`. 146 147 This operation converts `tensor` of strings into `int64` indices. 148 The mapping is initialized from a string `mapping` tensor where each element 149 is a key and corresponding index within the tensor is the value. 150 151 Any entry in the input which does not have a corresponding entry in 'mapping' 152 (an out-of-vocabulary entry) is assigned the `default_value` 153 154 Elements in `mapping` cannot be duplicated, otherwise the initialization 155 will throw a FailedPreconditionError. 156 157 The underlying table must be initialized by calling 158 `session.run(tf.tables_initializer)` once. 159 160 For example: 161 162 ```python 163 mapping_strings = tf.constant(["emerson", "lake", "palmer"]) 164 feats = tf.constant(["emerson", "lake", "and", "palmer"]) 165 ids = tf.contrib.lookup.string_to_index( 166 feats, mapping=mapping_strings, default_value=-1) 167 ... 168 tf.tables_initializer().run() 169 170 ids.eval() ==> [0, 1, -1, 2] 171 ``` 172 173 Args: 174 tensor: A 1-D input `Tensor` with the strings to map to indices. 175 mapping: A 1-D string `Tensor` that specifies the mapping of strings to 176 indices. 177 default_value: The `int64` value to use for out-of-vocabulary strings. 178 Defaults to -1. 179 name: A name for this op (optional). 180 181 Returns: 182 The mapped indices. It has the same shape and tensor type (dense or sparse) 183 as `tensor`. 184 """ 185 table = index_table_from_tensor( 186 mapping=mapping, default_value=default_value, name=name) 187 return table.lookup(tensor) 188 189 190def index_to_string_table_from_tensor(mapping, default_value="UNK", name=None): 191 """Returns a lookup table that maps a `Tensor` of indices into strings. 192 193 This operation constructs a lookup table to map int64 indices into string 194 values. The mapping is initialized from a string `mapping` 1-D `Tensor` where 195 each element is a value and the corresponding index within the tensor is the 196 key. 197 198 Any input which does not have a corresponding index in 'mapping' 199 (an out-of-vocabulary entry) is assigned the `default_value` 200 201 The underlying table must be initialized by calling 202 `session.run(tf.tables_initializer)` or `session.run(table.init)` once. 203 204 Elements in `mapping` cannot have duplicates, otherwise when executing the 205 table initializer op, it will throw a `FailedPreconditionError`. 206 207 Sample Usages: 208 209 ```python 210 mapping_string = tf.constant(["emerson", "lake", "palmer"]) 211 indices = tf.constant([1, 5], tf.int64) 212 table = tf.contrib.lookup.index_to_string_table_from_tensor( 213 mapping_string, default_value="UNKNOWN") 214 values = table.lookup(indices) 215 ... 216 tf.tables_initializer().run() 217 218 values.eval() ==> ["lake", "UNKNOWN"] 219 ``` 220 221 Args: 222 mapping: A 1-D string `Tensor` that specifies the strings to map from 223 indices. 224 default_value: The value to use for out-of-vocabulary indices. 225 name: A name for this op (optional). 226 227 Returns: 228 The lookup table to map a string values associated to a given index `int64` 229 `Tensors`. 230 231 Raises: 232 ValueError: when `mapping` is not set. 233 """ 234 235 if mapping is None: 236 raise ValueError("mapping must be specified.") 237 238 return lookup_ops.index_to_string_table_from_tensor( 239 vocabulary_list=mapping, default_value=default_value, name=name) 240 241 242@deprecated( 243 "2017-01-07", "This op will be removed after the deprecation date. " 244 "Please switch to index_to_string_table_from_tensor and call the lookup " 245 "method of the returned table.") 246def index_to_string(tensor, mapping, default_value="UNK", name=None): 247 """Maps `tensor` of indices into string values based on `mapping`. 248 249 This operation converts `int64` indices into string values. The mapping is 250 initialized from a string `mapping` tensor where each element is a value and 251 the corresponding index within the tensor is the key. 252 253 Any input which does not have a corresponding index in 'mapping' 254 (an out-of-vocabulary entry) is assigned the `default_value` 255 256 The underlying table must be initialized by calling 257 `session.run(tf.tables_initializer)` once. 258 259 For example: 260 261 ```python 262 mapping_string = tf.constant(["emerson", "lake", "palmer"]) 263 indices = tf.constant([1, 5], tf.int64) 264 values = tf.contrib.lookup.index_to_string( 265 indices, mapping=mapping_string, default_value="UNKNOWN") 266 ... 267 tf.tables_initializer().run() 268 269 values.eval() ==> ["lake", "UNKNOWN"] 270 ``` 271 272 Args: 273 tensor: A `int64` `Tensor` with the indices to map to strings. 274 mapping: A 1-D string `Tensor` that specifies the strings to map from 275 indices. 276 default_value: The string value to use for out-of-vocabulary indices. 277 name: A name for this op (optional). 278 279 Returns: 280 The strings values associated to the indices. The resultant dense 281 feature value tensor has the same shape as the corresponding `indices`. 282 """ 283 table = index_to_string_table_from_tensor( 284 mapping=mapping, default_value=default_value, name=name) 285 return table.lookup(tensor) 286 287 288class HashTable(InitializableLookupTableBaseV1): 289 """A generic hash table implementation. 290 291 Example usage: 292 293 ```python 294 table = tf.HashTable( 295 tf.KeyValueTensorInitializer(keys, values), -1) 296 out = table.lookup(input_tensor) 297 table.init.run() 298 print(out.eval()) 299 ``` 300 """ 301 302 def __init__(self, initializer, default_value, shared_name=None, name=None): 303 """Creates a non-initialized `HashTable` object. 304 305 Creates a table, the type of its keys and values are specified by the 306 initializer. 307 Before using the table you will have to initialize it. After initialization 308 the table will be immutable. 309 310 Args: 311 initializer: The table initializer to use. See `HashTable` kernel for 312 supported key and value types. 313 default_value: The value to use if a key is missing in the table. 314 shared_name: If non-empty, this table will be shared under the given name 315 across multiple sessions. 316 name: A name for the operation (optional). 317 318 Returns: 319 A `HashTable` object. 320 """ 321 self._initializer = initializer 322 self._default_value = default_value 323 self._shared_name = shared_name 324 self._name = name or "hash_table" 325 self._table_name = None 326 super(HashTable, self).__init__(default_value, initializer) 327 self._value_shape = self._default_value.get_shape() 328 329 def _create_resource(self): 330 table_ref = gen_lookup_ops.hash_table_v2( 331 shared_name=self._shared_name, 332 key_dtype=self._initializer.key_dtype, 333 value_dtype=self._initializer.value_dtype, 334 name=self._name) 335 if context.executing_eagerly(): 336 self._table_name = None 337 else: 338 self._table_name = table_ref.op.name.split("/")[-1] 339 return table_ref 340 341 @property 342 def init(self): 343 return self.initializer 344 345 @property 346 def name(self): 347 return self._table_name 348 349 def export(self, name=None): 350 """Returns tensors of all keys and values in the table. 351 352 Args: 353 name: A name for the operation (optional). 354 355 Returns: 356 A pair of tensors with the first tensor containing all keys and the 357 second tensors containing all values in the table. 358 """ 359 with ops.name_scope(name, "%s_Export" % self.name, 360 [self.resource_handle]) as name: 361 exported_keys, exported_values = gen_lookup_ops.lookup_table_export_v2( 362 self.resource_handle, self._key_dtype, self._value_dtype, name=name) 363 364 exported_values.set_shape(exported_keys.get_shape().concatenate( 365 self._value_shape)) 366 return exported_keys, exported_values 367 368 369MutableHashTable = lookup_ops.MutableHashTable 370MutableDenseHashTable = lookup_ops.DenseHashTable 371