• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Lookup table operations."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21from tensorflow.python.eager import context
22from tensorflow.python.framework import dtypes
23from tensorflow.python.framework import ops
24from tensorflow.python.ops import gen_lookup_ops
25from tensorflow.python.ops import lookup_ops
26# pylint: disable=unused-import
27from tensorflow.python.ops.lookup_ops import FastHashSpec
28from tensorflow.python.ops.lookup_ops import HasherSpec
29from tensorflow.python.ops.lookup_ops import IdTableWithHashBuckets
30from tensorflow.python.ops.lookup_ops import index_table_from_file
31from tensorflow.python.ops.lookup_ops import index_to_string_table_from_file
32from tensorflow.python.ops.lookup_ops import InitializableLookupTableBase
33from tensorflow.python.ops.lookup_ops import InitializableLookupTableBaseV1
34from tensorflow.python.ops.lookup_ops import KeyValueTensorInitializer
35from tensorflow.python.ops.lookup_ops import LookupInterface
36from tensorflow.python.ops.lookup_ops import StrongHashSpec
37from tensorflow.python.ops.lookup_ops import TableInitializerBase
38from tensorflow.python.ops.lookup_ops import TextFileIdTableInitializer
39from tensorflow.python.ops.lookup_ops import TextFileIndex
40from tensorflow.python.ops.lookup_ops import TextFileInitializer
41from tensorflow.python.ops.lookup_ops import TextFileStringTableInitializer
42# pylint: enable=unused-import
43from tensorflow.python.util.deprecation import deprecated
44
45
46@deprecated("2017-04-10", "Use `index_table_from_file`.")
47def string_to_index_table_from_file(vocabulary_file=None,
48                                    num_oov_buckets=0,
49                                    vocab_size=None,
50                                    default_value=-1,
51                                    hasher_spec=FastHashSpec,
52                                    name=None):
53  return index_table_from_file(
54      vocabulary_file, num_oov_buckets, vocab_size, default_value, hasher_spec,
55      key_dtype=dtypes.string, name=name)
56
57
58@deprecated("2017-04-10", "Use `index_table_from_tensor`.")
59def string_to_index_table_from_tensor(mapping,
60                                      num_oov_buckets=0,
61                                      default_value=-1,
62                                      hasher_spec=FastHashSpec,
63                                      name=None):
64  with ops.name_scope(name, "string_to_index") as scope:
65    mapping = ops.convert_to_tensor(mapping)
66  if dtypes.string != mapping.dtype.base_dtype:
67    raise ValueError("string_to_index_table_from_tensor requires string.")
68  return index_table_from_tensor(
69      mapping, num_oov_buckets, default_value, hasher_spec, name=scope)
70
71
72def index_table_from_tensor(mapping,
73                            num_oov_buckets=0,
74                            default_value=-1,
75                            hasher_spec=FastHashSpec,
76                            dtype=dtypes.string,
77                            name=None):
78  """Returns a lookup table that converts a string tensor into int64 IDs.
79
80  This operation constructs a lookup table to convert tensor of strings into
81  int64 IDs. The mapping can be initialized from a string `mapping` 1-D tensor
82  where each element is a key and corresponding index within the tensor is the
83  value.
84
85  Any lookup of an out-of-vocabulary token will return a bucket ID based on its
86  hash if `num_oov_buckets` is greater than zero. Otherwise it is assigned the
87  `default_value`.
88  The bucket ID range is `[mapping size, mapping size + num_oov_buckets - 1]`.
89
90  The underlying table must be initialized by calling
91  `session.run(tf.tables_initializer)` or `session.run(table.init)` once.
92
93  Elements in `mapping` cannot have duplicates, otherwise when executing the
94  table initializer op, it will throw a `FailedPreconditionError`.
95
96  Sample Usages:
97
98  ```python
99  mapping_strings = tf.constant(["emerson", "lake", "palmer"])
100  table = tf.contrib.lookup.index_table_from_tensor(
101      mapping=mapping_strings, num_oov_buckets=1, default_value=-1)
102  features = tf.constant(["emerson", "lake", "and", "palmer"])
103  ids = table.lookup(features)
104  ...
105  tf.tables_initializer().run()
106
107  ids.eval()  ==> [0, 1, 3, 2]
108  ```
109
110  Args:
111    mapping: A 1-D `Tensor` that specifies the mapping of keys to indices. The
112      type of this object must be castable to `dtype`.
113    num_oov_buckets: The number of out-of-vocabulary buckets.
114    default_value: The value to use for out-of-vocabulary feature values.
115      Defaults to -1.
116    hasher_spec: A `HasherSpec` to specify the hash function to use for
117      assignment of out-of-vocabulary buckets.
118    dtype: The type of values passed to `lookup`. Only string and integers are
119      supported.
120    name: A name for this op (optional).
121
122  Returns:
123    The lookup table to map an input `Tensor` to index `int64` `Tensor`.
124
125  Raises:
126    ValueError: If `mapping` is invalid.
127    ValueError: If `num_oov_buckets` is negative.
128  """
129  if mapping is None:
130    raise ValueError("mapping must be specified.")
131  return lookup_ops.index_table_from_tensor(
132      vocabulary_list=mapping,
133      num_oov_buckets=num_oov_buckets,
134      default_value=default_value,
135      hasher_spec=hasher_spec,
136      dtype=dtype,
137      name=name)
138
139
140@deprecated(
141    "2017-01-07", "This op will be removed after the deprecation date. "
142    "Please switch to index_table_from_tensor and call the lookup "
143    "method of the returned table.")
144def string_to_index(tensor, mapping, default_value=-1, name=None):
145  """Maps `tensor` of strings into `int64` indices based on `mapping`.
146
147  This operation converts `tensor` of strings into `int64` indices.
148  The mapping is initialized from a string `mapping` tensor where each element
149  is a key and corresponding index within the tensor is the value.
150
151  Any entry in the input which does not have a corresponding entry in 'mapping'
152  (an out-of-vocabulary entry) is assigned the `default_value`
153
154  Elements in `mapping` cannot be duplicated, otherwise the initialization
155  will throw a FailedPreconditionError.
156
157  The underlying table must be initialized by calling
158  `session.run(tf.tables_initializer)` once.
159
160  For example:
161
162  ```python
163  mapping_strings = tf.constant(["emerson", "lake", "palmer"])
164  feats = tf.constant(["emerson", "lake", "and", "palmer"])
165  ids = tf.contrib.lookup.string_to_index(
166      feats, mapping=mapping_strings, default_value=-1)
167  ...
168  tf.tables_initializer().run()
169
170  ids.eval()  ==> [0, 1, -1, 2]
171  ```
172
173  Args:
174    tensor: A 1-D input `Tensor` with the strings to map to indices.
175    mapping: A 1-D string `Tensor` that specifies the mapping of strings to
176      indices.
177    default_value: The `int64` value to use for out-of-vocabulary strings.
178      Defaults to -1.
179    name: A name for this op (optional).
180
181  Returns:
182    The mapped indices. It has the same shape and tensor type (dense or sparse)
183    as `tensor`.
184  """
185  table = index_table_from_tensor(
186      mapping=mapping, default_value=default_value, name=name)
187  return table.lookup(tensor)
188
189
190def index_to_string_table_from_tensor(mapping, default_value="UNK", name=None):
191  """Returns a lookup table that maps a `Tensor` of indices into strings.
192
193  This operation constructs a lookup table to map int64 indices into string
194  values. The mapping is initialized from a string `mapping` 1-D `Tensor` where
195  each element is a value and the corresponding index within the tensor is the
196  key.
197
198  Any input which does not have a corresponding index in 'mapping'
199  (an out-of-vocabulary entry) is assigned the `default_value`
200
201  The underlying table must be initialized by calling
202  `session.run(tf.tables_initializer)` or `session.run(table.init)` once.
203
204  Elements in `mapping` cannot have duplicates, otherwise when executing the
205  table initializer op, it will throw a `FailedPreconditionError`.
206
207  Sample Usages:
208
209  ```python
210  mapping_string = tf.constant(["emerson", "lake", "palmer"])
211  indices = tf.constant([1, 5], tf.int64)
212  table = tf.contrib.lookup.index_to_string_table_from_tensor(
213      mapping_string, default_value="UNKNOWN")
214  values = table.lookup(indices)
215  ...
216  tf.tables_initializer().run()
217
218  values.eval() ==> ["lake", "UNKNOWN"]
219  ```
220
221  Args:
222    mapping: A 1-D string `Tensor` that specifies the strings to map from
223      indices.
224    default_value: The value to use for out-of-vocabulary indices.
225    name: A name for this op (optional).
226
227  Returns:
228    The lookup table to map a string values associated to a given index `int64`
229    `Tensors`.
230
231  Raises:
232    ValueError: when `mapping` is not set.
233  """
234
235  if mapping is None:
236    raise ValueError("mapping must be specified.")
237
238  return lookup_ops.index_to_string_table_from_tensor(
239      vocabulary_list=mapping, default_value=default_value, name=name)
240
241
242@deprecated(
243    "2017-01-07", "This op will be removed after the deprecation date. "
244    "Please switch to index_to_string_table_from_tensor and call the lookup "
245    "method of the returned table.")
246def index_to_string(tensor, mapping, default_value="UNK", name=None):
247  """Maps `tensor` of indices into string values based on `mapping`.
248
249  This operation converts `int64` indices into string values. The mapping is
250  initialized from a string `mapping` tensor where each element is a value and
251  the corresponding index within the tensor is the key.
252
253  Any input which does not have a corresponding index in 'mapping'
254  (an out-of-vocabulary entry) is assigned the `default_value`
255
256  The underlying table must be initialized by calling
257  `session.run(tf.tables_initializer)` once.
258
259  For example:
260
261  ```python
262  mapping_string = tf.constant(["emerson", "lake", "palmer"])
263  indices = tf.constant([1, 5], tf.int64)
264  values = tf.contrib.lookup.index_to_string(
265      indices, mapping=mapping_string, default_value="UNKNOWN")
266  ...
267  tf.tables_initializer().run()
268
269  values.eval() ==> ["lake", "UNKNOWN"]
270  ```
271
272  Args:
273    tensor: A `int64` `Tensor` with the indices to map to strings.
274    mapping: A 1-D string `Tensor` that specifies the strings to map from
275      indices.
276    default_value: The string value to use for out-of-vocabulary indices.
277    name: A name for this op (optional).
278
279  Returns:
280    The strings values associated to the indices. The resultant dense
281    feature value tensor has the same shape as the corresponding `indices`.
282  """
283  table = index_to_string_table_from_tensor(
284      mapping=mapping, default_value=default_value, name=name)
285  return table.lookup(tensor)
286
287
288class HashTable(InitializableLookupTableBaseV1):
289  """A generic hash table implementation.
290
291  Example usage:
292
293  ```python
294  table = tf.HashTable(
295      tf.KeyValueTensorInitializer(keys, values), -1)
296  out = table.lookup(input_tensor)
297  table.init.run()
298  print(out.eval())
299  ```
300  """
301
302  def __init__(self, initializer, default_value, shared_name=None, name=None):
303    """Creates a non-initialized `HashTable` object.
304
305    Creates a table, the type of its keys and values are specified by the
306    initializer.
307    Before using the table you will have to initialize it. After initialization
308    the table will be immutable.
309
310    Args:
311      initializer: The table initializer to use. See `HashTable` kernel for
312        supported key and value types.
313      default_value: The value to use if a key is missing in the table.
314      shared_name: If non-empty, this table will be shared under the given name
315        across multiple sessions.
316      name: A name for the operation (optional).
317
318    Returns:
319      A `HashTable` object.
320    """
321    self._initializer = initializer
322    self._default_value = default_value
323    self._shared_name = shared_name
324    self._name = name or "hash_table"
325    self._table_name = None
326    super(HashTable, self).__init__(default_value, initializer)
327    self._value_shape = self._default_value.get_shape()
328
329  def _create_resource(self):
330    table_ref = gen_lookup_ops.hash_table_v2(
331        shared_name=self._shared_name,
332        key_dtype=self._initializer.key_dtype,
333        value_dtype=self._initializer.value_dtype,
334        name=self._name)
335    if context.executing_eagerly():
336      self._table_name = None
337    else:
338      self._table_name = table_ref.op.name.split("/")[-1]
339    return table_ref
340
341  @property
342  def init(self):
343    return self.initializer
344
345  @property
346  def name(self):
347    return self._table_name
348
349  def export(self, name=None):
350    """Returns tensors of all keys and values in the table.
351
352    Args:
353      name: A name for the operation (optional).
354
355    Returns:
356      A pair of tensors with the first tensor containing all keys and the
357        second tensors containing all values in the table.
358    """
359    with ops.name_scope(name, "%s_Export" % self.name,
360                        [self.resource_handle]) as name:
361      exported_keys, exported_values = gen_lookup_ops.lookup_table_export_v2(
362          self.resource_handle, self._key_dtype, self._value_dtype, name=name)
363
364    exported_values.set_shape(exported_keys.get_shape().concatenate(
365        self._value_shape))
366    return exported_keys, exported_values
367
368
369MutableHashTable = lookup_ops.MutableHashTable
370MutableDenseHashTable = lookup_ops.DenseHashTable
371