• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15
16"""Parsing Ops."""
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21from tensorflow.python.framework import ops
22from tensorflow.python.framework import sparse_tensor
23from tensorflow.python.ops import array_ops
24from tensorflow.python.ops import control_flow_ops
25from tensorflow.python.ops import gen_parsing_ops
26from tensorflow.python.ops import math_ops
27from tensorflow.python.ops import parsing_config
28# go/tf-wildcard-import
29# pylint: disable=wildcard-import,undefined-variable
30from tensorflow.python.ops.gen_parsing_ops import *
31# pylint: enable=wildcard-import,undefined-variable
32from tensorflow.python.util import deprecation
33from tensorflow.python.util.tf_export import tf_export
34
35
36ops.NotDifferentiable("DecodeRaw")
37ops.NotDifferentiable("DecodePaddedRaw")
38ops.NotDifferentiable("ParseTensor")
39ops.NotDifferentiable("SerializeTensor")
40ops.NotDifferentiable("StringToNumber")
41
42
43VarLenFeature = parsing_config.VarLenFeature
44RaggedFeature = parsing_config.RaggedFeature
45SparseFeature = parsing_config.SparseFeature
46FixedLenFeature = parsing_config.FixedLenFeature
47FixedLenSequenceFeature = parsing_config.FixedLenSequenceFeature
48# pylint: disable=protected-access
49_ParseOpParams = parsing_config._ParseOpParams
50_construct_tensors_for_composite_features = (
51    parsing_config._construct_tensors_for_composite_features)
52# pylint: enable=protected-access
53
54
55# TODO(b/122887740) Switch files that use this private symbol to use new name.
56_construct_sparse_tensors_for_sparse_features = \
57    _construct_tensors_for_composite_features
58
59
60def _prepend_none_dimension(features):
61  """Returns a copy of features with adjusted FixedLenSequenceFeature shapes."""
62  if features:
63    modified_features = dict(features)  # Create a copy to modify
64    for key, feature in features.items():
65      if isinstance(feature, FixedLenSequenceFeature):
66        if not feature.allow_missing:
67          raise ValueError("Unsupported: FixedLenSequenceFeature requires "
68                           "allow_missing to be True.")
69        modified_features[key] = FixedLenSequenceFeature(
70            [None] + list(feature.shape),
71            feature.dtype,
72            feature.allow_missing,
73            feature.default_value)
74    return modified_features
75  else:
76    return features
77
78
79@tf_export("io.parse_example", v1=[])
80def parse_example_v2(serialized, features, example_names=None, name=None):
81  # pylint: disable=line-too-long
82  """Parses `Example` protos into a `dict` of tensors.
83
84  Parses a number of serialized [`Example`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
85  protos given in `serialized`. We refer to `serialized` as a batch with
86  `batch_size` many entries of individual `Example` protos.
87
88  `example_names` may contain descriptive names for the corresponding serialized
89  protos. These may be useful for debugging purposes, but they have no effect on
90  the output. If not `None`, `example_names` must be the same length as
91  `serialized`.
92
93  This op parses serialized examples into a dictionary mapping keys to `Tensor`
94  `SparseTensor`, and `RaggedTensor` objects. `features` is a dict from keys to
95  `VarLenFeature`, `SparseFeature`, `RaggedFeature`, and `FixedLenFeature`
96  objects. Each `VarLenFeature` and `SparseFeature` is mapped to a
97  `SparseTensor`; each `FixedLenFeature` is mapped to a `Tensor`; and each
98  `RaggedFeature` is mapped to a `RaggedTensor`.
99
100  Each `VarLenFeature` maps to a `SparseTensor` of the specified type
101  representing a ragged matrix. Its indices are `[batch, index]` where `batch`
102  identifies the example in `serialized`, and `index` is the value's index in
103  the list of values associated with that feature and example.
104
105  Each `SparseFeature` maps to a `SparseTensor` of the specified type
106  representing a Tensor of `dense_shape` `[batch_size] + SparseFeature.size`.
107  Its `values` come from the feature in the examples with key `value_key`.
108  A `values[i]` comes from a position `k` in the feature of an example at batch
109  entry `batch`. This positional information is recorded in `indices[i]` as
110  `[batch, index_0, index_1, ...]` where `index_j` is the `k-th` value of
111  the feature in the example at with key `SparseFeature.index_key[j]`.
112  In other words, we split the indices (except the first index indicating the
113  batch entry) of a `SparseTensor` by dimension into different features of the
114  `Example`. Due to its complexity a `VarLenFeature` should be preferred over a
115  `SparseFeature` whenever possible.
116
117  Each `FixedLenFeature` `df` maps to a `Tensor` of the specified type (or
118  `tf.float32` if not specified) and shape `(serialized.size(),) + df.shape`.
119
120  `FixedLenFeature` entries with a `default_value` are optional. With no default
121  value, we will fail if that `Feature` is missing from any example in
122  `serialized`.
123
124  Each `FixedLenSequenceFeature` `df` maps to a `Tensor` of the specified type
125  (or `tf.float32` if not specified) and shape
126  `(serialized.size(), None) + df.shape`.
127  All examples in `serialized` will be padded with `default_value` along the
128  second dimension.
129
130  Each `RaggedFeature` maps to a `RaggedTensor` of the specified type.  It
131  is formed by stacking the `RaggedTensor` for each example, where the
132  `RaggedTensor` for each individual example is constructed using the tensors
133  specified by `RaggedTensor.values_key` and `RaggedTensor.partition`.  See
134  the `tf.io.RaggedFeature` documentation for details and examples.
135
136  Examples:
137
138  For example, if one expects a `tf.float32` `VarLenFeature` `ft` and three
139  serialized `Example`s are provided:
140
141  ```
142  serialized = [
143    features
144      { feature { key: "ft" value { float_list { value: [1.0, 2.0] } } } },
145    features
146      { feature []},
147    features
148      { feature { key: "ft" value { float_list { value: [3.0] } } }
149  ]
150  ```
151
152  then the output will look like:
153
154  ```python
155  {"ft": SparseTensor(indices=[[0, 0], [0, 1], [2, 0]],
156                      values=[1.0, 2.0, 3.0],
157                      dense_shape=(3, 2)) }
158  ```
159
160  If instead a `FixedLenSequenceFeature` with `default_value = -1.0` and
161  `shape=[]` is used then the output will look like:
162
163  ```python
164  {"ft": [[1.0, 2.0], [3.0, -1.0]]}
165  ```
166
167  Given two `Example` input protos in `serialized`:
168
169  ```
170  [
171    features {
172      feature { key: "kw" value { bytes_list { value: [ "knit", "big" ] } } }
173      feature { key: "gps" value { float_list { value: [] } } }
174    },
175    features {
176      feature { key: "kw" value { bytes_list { value: [ "emmy" ] } } }
177      feature { key: "dank" value { int64_list { value: [ 42 ] } } }
178      feature { key: "gps" value { } }
179    }
180  ]
181  ```
182
183  And arguments
184
185  ```
186  example_names: ["input0", "input1"],
187  features: {
188      "kw": VarLenFeature(tf.string),
189      "dank": VarLenFeature(tf.int64),
190      "gps": VarLenFeature(tf.float32),
191  }
192  ```
193
194  Then the output is a dictionary:
195
196  ```python
197  {
198    "kw": SparseTensor(
199        indices=[[0, 0], [0, 1], [1, 0]],
200        values=["knit", "big", "emmy"]
201        dense_shape=[2, 2]),
202    "dank": SparseTensor(
203        indices=[[1, 0]],
204        values=[42],
205        dense_shape=[2, 1]),
206    "gps": SparseTensor(
207        indices=[],
208        values=[],
209        dense_shape=[2, 0]),
210  }
211  ```
212
213  For dense results in two serialized `Example`s:
214
215  ```
216  [
217    features {
218      feature { key: "age" value { int64_list { value: [ 0 ] } } }
219      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
220     },
221     features {
222      feature { key: "age" value { int64_list { value: [] } } }
223      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
224    }
225  ]
226  ```
227
228  We can use arguments:
229
230  ```
231  example_names: ["input0", "input1"],
232  features: {
233      "age": FixedLenFeature([], dtype=tf.int64, default_value=-1),
234      "gender": FixedLenFeature([], dtype=tf.string),
235  }
236  ```
237
238  And the expected output is:
239
240  ```python
241  {
242    "age": [[0], [-1]],
243    "gender": [["f"], ["f"]],
244  }
245  ```
246
247  An alternative to `VarLenFeature` to obtain a `SparseTensor` is
248  `SparseFeature`. For example, given two `Example` input protos in
249  `serialized`:
250
251  ```
252  [
253    features {
254      feature { key: "val" value { float_list { value: [ 0.5, -1.0 ] } } }
255      feature { key: "ix" value { int64_list { value: [ 3, 20 ] } } }
256    },
257    features {
258      feature { key: "val" value { float_list { value: [ 0.0 ] } } }
259      feature { key: "ix" value { int64_list { value: [ 42 ] } } }
260    }
261  ]
262  ```
263
264  And arguments
265
266  ```
267  example_names: ["input0", "input1"],
268  features: {
269      "sparse": SparseFeature(
270          index_key="ix", value_key="val", dtype=tf.float32, size=100),
271  }
272  ```
273
274  Then the output is a dictionary:
275
276  ```python
277  {
278    "sparse": SparseTensor(
279        indices=[[0, 3], [0, 20], [1, 42]],
280        values=[0.5, -1.0, 0.0]
281        dense_shape=[2, 100]),
282  }
283  ```
284
285  See the `tf.io.RaggedFeature` documentation for examples showing how
286  `RaggedFeature` can be used to obtain `RaggedTensor`s.
287
288  Args:
289    serialized: A vector (1-D Tensor) of strings, a batch of binary
290      serialized `Example` protos.
291    features: A `dict` mapping feature keys to `FixedLenFeature`,
292      `VarLenFeature`, `SparseFeature`, and `RaggedFeature` values.
293    example_names: A vector (1-D Tensor) of strings (optional), the names of
294      the serialized protos in the batch.
295    name: A name for this operation (optional).
296
297  Returns:
298    A `dict` mapping feature keys to `Tensor`, `SparseTensor`, and
299    `RaggedTensor` values.
300
301  Raises:
302    ValueError: if any feature is invalid.
303  """
304  if not features:
305    raise ValueError("Missing: features was %s." % features)
306  features = _prepend_none_dimension(features)
307  params = _ParseOpParams.from_features(features, [
308      VarLenFeature, SparseFeature, FixedLenFeature, FixedLenSequenceFeature,
309      RaggedFeature
310  ])
311
312  outputs = _parse_example_raw(serialized, example_names, params, name=name)
313  return _construct_tensors_for_composite_features(features, outputs)
314
315
316@tf_export(v1=["io.parse_example", "parse_example"])
317def parse_example(serialized, features, name=None, example_names=None):
318  return parse_example_v2(serialized, features, example_names, name)
319
320
321parse_example.__doc__ = parse_example_v2.__doc__
322
323
324def _parse_example_raw(serialized, names, params, name):
325  """Parses `Example` protos.
326
327  Args:
328    serialized: A vector (1-D Tensor) of strings, a batch of binary
329      serialized `Example` protos.
330    names: A vector (1-D Tensor) of strings (optional), the names of
331      the serialized protos.
332    params: A `ParseOpParams` containing the parameters for the parse op.
333    name: A name for this operation (optional).
334
335  Returns:
336    A `dict` mapping keys to `Tensor`s and `SparseTensor`s and `RaggedTensor`s.
337
338  """
339  if params.num_features == 0:
340    raise ValueError("Must provide at least one feature key")
341  with ops.name_scope(name, "ParseExample", [serialized, names]):
342    names = [] if names is None else names
343    serialized = ops.convert_to_tensor(serialized, name="serialized")
344    if params.ragged_keys and serialized.shape.ndims is None:
345      raise ValueError("serialized must have statically-known rank to "
346                       "parse ragged features.")
347    outputs = gen_parsing_ops.parse_example_v2(
348        serialized=serialized,
349        names=names,
350        sparse_keys=params.sparse_keys,
351        dense_keys=params.dense_keys,
352        ragged_keys=params.ragged_keys,
353        dense_defaults=params.dense_defaults_vec,
354        num_sparse=len(params.sparse_keys),
355        sparse_types=params.sparse_types,
356        ragged_value_types=params.ragged_value_types,
357        ragged_split_types=params.ragged_split_types,
358        dense_shapes=params.dense_shapes_as_proto,
359        name=name)
360    (sparse_indices, sparse_values, sparse_shapes, dense_values,
361     ragged_values, ragged_row_splits) = outputs
362    # pylint: disable=protected-access
363    ragged_tensors = parsing_config._build_ragged_tensors(
364        serialized.shape, ragged_values, ragged_row_splits)
365
366    sparse_tensors = [
367        sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape)
368        in zip(sparse_indices, sparse_values, sparse_shapes)]
369
370    return dict(
371        zip(params.sparse_keys + params.dense_keys + params.ragged_keys,
372            sparse_tensors + dense_values + ragged_tensors))
373
374
375@tf_export(v1=["io.parse_single_example", "parse_single_example"])
376def parse_single_example(serialized, features, name=None, example_names=None):
377  """Parses a single `Example` proto.
378
379  Similar to `parse_example`, except:
380
381  For dense tensors, the returned `Tensor` is identical to the output of
382  `parse_example`, except there is no batch dimension, the output shape is the
383  same as the shape given in `dense_shape`.
384
385  For `SparseTensor`s, the first (batch) column of the indices matrix is removed
386  (the indices matrix is a column vector), the values vector is unchanged, and
387  the first (`batch_size`) entry of the shape vector is removed (it is now a
388  single element vector).
389
390  One might see performance advantages by batching `Example` protos with
391  `parse_example` instead of using this function directly.
392
393  Args:
394    serialized: A scalar string Tensor, a single serialized Example.
395    features: A `dict` mapping feature keys to `FixedLenFeature` or
396      `VarLenFeature` values.
397    name: A name for this operation (optional).
398    example_names: (Optional) A scalar string Tensor, the associated name.
399
400  Returns:
401    A `dict` mapping feature keys to `Tensor` and `SparseTensor` values.
402
403  Raises:
404    ValueError: if any feature is invalid.
405  """
406  return parse_single_example_v2(serialized, features, example_names, name)
407
408
409@tf_export("io.parse_single_example", v1=[])
410def parse_single_example_v2(
411    serialized, features, example_names=None, name=None
412    ):
413  """Parses a single `Example` proto.
414
415  Similar to `parse_example`, except:
416
417  For dense tensors, the returned `Tensor` is identical to the output of
418  `parse_example`, except there is no batch dimension, the output shape is the
419  same as the shape given in `dense_shape`.
420
421  For `SparseTensor`s, the first (batch) column of the indices matrix is removed
422  (the indices matrix is a column vector), the values vector is unchanged, and
423  the first (`batch_size`) entry of the shape vector is removed (it is now a
424  single element vector).
425
426  One might see performance advantages by batching `Example` protos with
427  `parse_example` instead of using this function directly.
428
429  Args:
430    serialized: A scalar string Tensor, a single serialized Example.
431    features: A `dict` mapping feature keys to `FixedLenFeature` or
432      `VarLenFeature` values.
433    example_names: (Optional) A scalar string Tensor, the associated name.
434    name: A name for this operation (optional).
435
436  Returns:
437    A `dict` mapping feature keys to `Tensor` and `SparseTensor` values.
438
439  Raises:
440    ValueError: if any feature is invalid.
441  """
442  if not features:
443    raise ValueError("Missing features.")
444  with ops.name_scope(name, "ParseSingleExample", [serialized, example_names]):
445    serialized = ops.convert_to_tensor(serialized, name="serialized")
446    serialized = _assert_scalar(serialized, "serialized")
447    return parse_example_v2(serialized, features, example_names, name)
448
449
450@tf_export("io.parse_sequence_example")
451def parse_sequence_example(serialized,
452                           context_features=None,
453                           sequence_features=None,
454                           example_names=None,
455                           name=None):
456  # pylint: disable=line-too-long
457  """Parses a batch of `SequenceExample` protos.
458
459  Parses a vector of serialized
460  [`SequenceExample`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
461  protos given in `serialized`.
462
463  This op parses serialized sequence examples into a tuple of dictionaries,
464  each mapping keys to `Tensor` and `SparseTensor` objects.
465  The first dictionary contains mappings for keys appearing in
466  `context_features`, and the second dictionary contains mappings for keys
467  appearing in `sequence_features`.
468
469  At least one of `context_features` and `sequence_features` must be provided
470  and non-empty.
471
472  The `context_features` keys are associated with a `SequenceExample` as a
473  whole, independent of time / frame.  In contrast, the `sequence_features` keys
474  provide a way to access variable-length data within the `FeatureList` section
475  of the `SequenceExample` proto.  While the shapes of `context_features` values
476  are fixed with respect to frame, the frame dimension (the first dimension)
477  of `sequence_features` values may vary between `SequenceExample` protos,
478  and even between `feature_list` keys within the same `SequenceExample`.
479
480  `context_features` contains `VarLenFeature`, `RaggedFeature`, and
481  `FixedLenFeature`  objects. Each `VarLenFeature` is mapped to a
482  `SparseTensor`; each `RaggedFeature` is  mapped to a `RaggedTensor`; and each
483  `FixedLenFeature` is mapped to a `Tensor`, of the specified type, shape, and
484  default value.
485
486  `sequence_features` contains `VarLenFeature`, `RaggedFeature`, and
487  `FixedLenSequenceFeature` objects. Each `VarLenFeature` is mapped to a
488  `SparseTensor`; each `RaggedFeature` is mapped to a `RaggedTensor; and
489  each `FixedLenSequenceFeature` is mapped to a `Tensor`, each of the specified
490  type. The shape will be `(B,T,) + df.dense_shape` for
491  `FixedLenSequenceFeature` `df`, where `B` is the batch size, and `T` is the
492  length of the associated `FeatureList` in the `SequenceExample`. For instance,
493  `FixedLenSequenceFeature([])` yields a scalar 2-D `Tensor` of static shape
494  `[None, None]` and dynamic shape `[B, T]`, while
495  `FixedLenSequenceFeature([k])` (for `int k >= 1`) yields a 3-D matrix `Tensor`
496  of static shape `[None, None, k]` and dynamic shape `[B, T, k]`.
497
498  Like the input, the resulting output tensors have a batch dimension. This
499  means that the original per-example shapes of `VarLenFeature`s and
500  `FixedLenSequenceFeature`s can be lost. To handle that situation, this op also
501  provides dicts of shape tensors as part of the output. There is one dict for
502  the context features, and one for the feature_list features. Context features
503  of type `FixedLenFeature`s will not be present, since their shapes are already
504  known by the caller. In situations where the input 'FixedLenFeature`s are of
505  different lengths across examples, the shorter examples will be padded with
506  default datatype values: 0 for numeric types, and the empty string for string
507  types.
508
509  Each `SparseTensor` corresponding to `sequence_features` represents a ragged
510  vector.  Its indices are `[time, index]`, where `time` is the `FeatureList`
511  entry and `index` is the value's index in the list of values associated with
512  that time.
513
514  `FixedLenFeature` entries with a `default_value` and `FixedLenSequenceFeature`
515  entries with `allow_missing=True` are optional; otherwise, we will fail if
516  that `Feature` or `FeatureList` is missing from any example in `serialized`.
517
518  `example_name` may contain a descriptive name for the corresponding serialized
519  proto. This may be useful for debugging purposes, but it has no effect on the
520  output. If not `None`, `example_name` must be a scalar.
521
522  Args:
523    serialized: A vector (1-D Tensor) of type string containing binary
524      serialized `SequenceExample` protos.
525    context_features: A `dict` mapping feature keys to `FixedLenFeature` or
526      `VarLenFeature` or `RaggedFeature` values. These features are associated
527      with a `SequenceExample` as a whole.
528    sequence_features: A `dict` mapping feature keys to
529      `FixedLenSequenceFeature` or `VarLenFeature` or `RaggedFeature` values.
530      These features are associated with data within the `FeatureList` section
531      of the `SequenceExample` proto.
532    example_names: A vector (1-D Tensor) of strings (optional), the name of the
533      serialized protos.
534    name: A name for this operation (optional).
535
536  Returns:
537    A tuple of three `dict`s, each mapping keys to `Tensor`s,
538    `SparseTensor`s, and `RaggedTensor`. The first dict contains the context
539    key/values, the second dict contains the feature_list key/values, and the
540    final dict contains the lengths of any dense feature_list features.
541
542  Raises:
543    ValueError: if any feature is invalid.
544  """
545  if not (context_features or sequence_features):
546    raise ValueError("Missing features.")
547  context_params = _ParseOpParams.from_features(
548      context_features, [VarLenFeature, FixedLenFeature, RaggedFeature])
549  feature_list_params = _ParseOpParams.from_features(
550      sequence_features,
551      [VarLenFeature, FixedLenSequenceFeature, RaggedFeature])
552
553  with ops.name_scope(name, "ParseSequenceExample",
554                      [serialized, example_names]):
555    outputs = _parse_sequence_example_raw(serialized, example_names,
556                                          context_params, feature_list_params,
557                                          name)
558    context_output, feature_list_output, feature_list_lengths = outputs
559
560    if context_params.ragged_keys:
561      context_output = _construct_tensors_for_composite_features(
562          context_features, context_output)
563    if feature_list_params.ragged_keys:
564      feature_list_output = _construct_tensors_for_composite_features(
565          sequence_features, feature_list_output)
566
567    return context_output, feature_list_output, feature_list_lengths
568
569
570def _parse_sequence_example_raw(serialized,
571                                debug_name,
572                                context,
573                                feature_list,
574                                name=None):
575  """Parses a vector of `SequenceExample` protos.
576
577  Args:
578    serialized: A vector (1-D Tensor) of type string, containing binary
579      serialized `SequenceExample` protos.
580    debug_name: A vector (1-D Tensor) of strings (optional), the names of the
581      serialized protos.
582    context: A `ParseOpParams` containing the parameters for the parse
583      op for the context features.
584    feature_list: A `ParseOpParams` containing the parameters for the
585      parse op for the feature_list features.
586    name: A name for this operation (optional).
587
588  Returns:
589    A tuple of three `dict`s, each mapping keys to `Tensor`s, `SparseTensor`s,
590    and `RaggedTensor`s. The first dict contains the context key/values, the
591    second dict contains the feature_list key/values, and the final dict
592    contains the lengths of any dense feature_list features.
593
594  Raises:
595    TypeError: if feature_list.dense_defaults is not either None or a dict.
596  """
597  if context.num_features + feature_list.num_features == 0:
598    raise ValueError("Must provide at least one feature key")
599  with ops.name_scope(name, "ParseSequenceExample", [serialized]):
600    debug_name = [] if debug_name is None else debug_name
601
602    # Internal
603    feature_list_dense_missing_assumed_empty = []
604    for k, v in feature_list.dense_defaults.items():
605      if v is not None:
606        raise ValueError("Value feature_list.dense_defaults[%s] must be None" %
607                         k)
608      feature_list_dense_missing_assumed_empty.append(k)
609
610    has_ragged = context.ragged_keys or feature_list.ragged_keys
611    serialized = ops.convert_to_tensor(serialized, name="serialized")
612    if has_ragged and serialized.shape.ndims is None:
613      raise ValueError("serialized must have statically-known rank to "
614                       "parse ragged features.")
615    feature_list_dense_missing_assumed_empty_vector = [
616        key in feature_list_dense_missing_assumed_empty
617        for key in feature_list.dense_keys
618    ]
619    outputs = gen_parsing_ops.parse_sequence_example_v2(
620        # Inputs
621        serialized=serialized,
622        debug_name=debug_name,
623        context_sparse_keys=context.sparse_keys,
624        context_dense_keys=context.dense_keys,
625        context_ragged_keys=context.ragged_keys,
626        feature_list_sparse_keys=feature_list.sparse_keys,
627        feature_list_dense_keys=feature_list.dense_keys,
628        feature_list_ragged_keys=feature_list.ragged_keys,
629        feature_list_dense_missing_assumed_empty=(
630            feature_list_dense_missing_assumed_empty_vector),
631        context_dense_defaults=context.dense_defaults_vec,
632        # Attrs
633        Ncontext_sparse=len(context.sparse_keys),
634        Nfeature_list_sparse=len(feature_list.sparse_keys),
635        Nfeature_list_dense=len(feature_list.dense_keys),
636        context_sparse_types=context.sparse_types,
637        context_ragged_value_types=context.ragged_value_types,
638        context_ragged_split_types=context.ragged_split_types,
639        feature_list_dense_types=feature_list.dense_types,
640        feature_list_sparse_types=feature_list.sparse_types,
641        feature_list_ragged_value_types=feature_list.ragged_value_types,
642        feature_list_ragged_split_types=feature_list.ragged_split_types,
643        context_dense_shapes=context.dense_shapes_as_proto,
644        feature_list_dense_shapes=feature_list.dense_shapes,
645        name=name)
646    (context_sparse_indices, context_sparse_values, context_sparse_shapes,
647     context_dense_values, context_ragged_values, context_ragged_row_splits,
648     feature_list_sparse_indices, feature_list_sparse_values,
649     feature_list_sparse_shapes, feature_list_dense_values,
650     feature_list_dense_lengths, feature_list_ragged_values,
651     feature_list_ragged_outer_splits,
652     feature_list_ragged_inner_splits) = outputs
653    # pylint: disable=protected-access
654    context_ragged_tensors = parsing_config._build_ragged_tensors(
655        serialized.shape, context_ragged_values, context_ragged_row_splits)
656    feature_list_ragged_tensors = parsing_config._build_ragged_tensors(
657        serialized.shape, feature_list_ragged_values,
658        feature_list_ragged_outer_splits, feature_list_ragged_inner_splits)
659
660    # pylint: disable=g-complex-comprehension
661    context_sparse_tensors = [
662        sparse_tensor.SparseTensor(ix, val, shape)
663        for (ix, val,
664             shape) in zip(context_sparse_indices, context_sparse_values,
665                           context_sparse_shapes)
666    ]
667
668    feature_list_sparse_tensors = [
669        sparse_tensor.SparseTensor(ix, val, shape)
670        for (ix, val, shape
671            ) in zip(feature_list_sparse_indices, feature_list_sparse_values,
672                     feature_list_sparse_shapes)
673    ]
674    # pylint: enable=g-complex-comprehension
675
676    context_output = dict(
677        zip(
678            context.sparse_keys + context.dense_keys + context.ragged_keys,
679            context_sparse_tensors + context_dense_values +
680            context_ragged_tensors))
681    feature_list_output = dict(
682        zip(
683            feature_list.sparse_keys + feature_list.dense_keys +
684            feature_list.ragged_keys, feature_list_sparse_tensors +
685            feature_list_dense_values + feature_list_ragged_tensors))
686    feature_list_lengths = dict(
687        zip(feature_list.dense_keys, feature_list_dense_lengths))
688
689    return (context_output, feature_list_output, feature_list_lengths)
690
691
692@tf_export("io.parse_single_sequence_example",
693           v1=["io.parse_single_sequence_example",
694               "parse_single_sequence_example"])
695def parse_single_sequence_example(
696    serialized, context_features=None, sequence_features=None,
697    example_name=None, name=None):
698  # pylint: disable=line-too-long
699  """Parses a single `SequenceExample` proto.
700
701  Parses a single serialized [`SequenceExample`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
702  proto given in `serialized`.
703
704  This op parses a serialized sequence example into a tuple of dictionaries,
705  each mapping keys to `Tensor` and `SparseTensor` objects.
706  The first dictionary contains mappings for keys appearing in
707  `context_features`, and the second dictionary contains mappings for keys
708  appearing in `sequence_features`.
709
710  At least one of `context_features` and `sequence_features` must be provided
711  and non-empty.
712
713  The `context_features` keys are associated with a `SequenceExample` as a
714  whole, independent of time / frame.  In contrast, the `sequence_features` keys
715  provide a way to access variable-length data within the `FeatureList` section
716  of the `SequenceExample` proto.  While the shapes of `context_features` values
717  are fixed with respect to frame, the frame dimension (the first dimension)
718  of `sequence_features` values may vary between `SequenceExample` protos,
719  and even between `feature_list` keys within the same `SequenceExample`.
720
721  `context_features` contains `VarLenFeature`, `RaggedFeature`, and
722  `FixedLenFeature` objects. Each `VarLenFeature` is mapped to a `SparseTensor`;
723  each `RaggedFeature` is mapped to a `RaggedTensor`; and each `FixedLenFeature`
724  is mapped to a `Tensor`, of the specified type, shape, and default value.
725
726  `sequence_features` contains `VarLenFeature`, `RaggedFeature`, and
727  `FixedLenSequenceFeature` objects. Each `VarLenFeature` is mapped to a
728  `SparseTensor`; each `RaggedFeature` is mapped to a `RaggedTensor`; and each
729  `FixedLenSequenceFeature` is mapped to a `Tensor`, each of the specified type.
730  The shape will be `(T,) + df.dense_shape` for `FixedLenSequenceFeature` `df`,
731  where `T` is the length of the associated `FeatureList` in the
732  `SequenceExample`. For instance, `FixedLenSequenceFeature([])` yields a scalar
733  1-D `Tensor` of static shape `[None]` and dynamic shape `[T]`, while
734  `FixedLenSequenceFeature([k])` (for `int k >= 1`) yields a 2-D matrix `Tensor`
735  of static shape `[None, k]` and dynamic shape `[T, k]`.
736
737  Each `SparseTensor` corresponding to `sequence_features` represents a ragged
738  vector.  Its indices are `[time, index]`, where `time` is the `FeatureList`
739  entry and `index` is the value's index in the list of values associated with
740  that time.
741
742  `FixedLenFeature` entries with a `default_value` and `FixedLenSequenceFeature`
743  entries with `allow_missing=True` are optional; otherwise, we will fail if
744  that `Feature` or `FeatureList` is missing from any example in `serialized`.
745
746  `example_name` may contain a descriptive name for the corresponding serialized
747  proto. This may be useful for debugging purposes, but it has no effect on the
748  output. If not `None`, `example_name` must be a scalar.
749
750  Note that the batch version of this function, `tf.parse_sequence_example`,
751  is written for better memory efficiency and will be faster on large
752  `SequenceExample`s.
753
754  Args:
755    serialized: A scalar (0-D Tensor) of type string, a single binary
756      serialized `SequenceExample` proto.
757    context_features: A `dict` mapping feature keys to `FixedLenFeature` or
758      `VarLenFeature` or `RaggedFeature` values. These features are associated
759      with a `SequenceExample` as a whole.
760    sequence_features: A `dict` mapping feature keys to
761      `FixedLenSequenceFeature` or `VarLenFeature` or `RaggedFeature` values.
762      These features are associated with data within the `FeatureList` section
763      of the `SequenceExample` proto.
764    example_name: A scalar (0-D Tensor) of strings (optional), the name of
765      the serialized proto.
766    name: A name for this operation (optional).
767
768  Returns:
769    A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s
770    and `RaggedTensor`s.
771
772    * The first dict contains the context key/values.
773    * The second dict contains the feature_list key/values.
774
775  Raises:
776    ValueError: if any feature is invalid.
777  """
778  # pylint: enable=line-too-long
779  if not (context_features or sequence_features):
780    raise ValueError("Missing features.")
781  context_params = _ParseOpParams.from_features(
782      context_features, [VarLenFeature, FixedLenFeature, RaggedFeature])
783  feature_list_params = _ParseOpParams.from_features(
784      sequence_features,
785      [VarLenFeature, FixedLenSequenceFeature, RaggedFeature])
786
787  with ops.name_scope(name, "ParseSingleSequenceExample",
788                      [serialized, example_name]):
789    context_output, feature_list_output = (
790        _parse_single_sequence_example_raw(serialized, context_params,
791                                           feature_list_params, example_name,
792                                           name))
793
794    if context_params.ragged_keys:
795      context_output = _construct_tensors_for_composite_features(
796          context_features, context_output)
797    if feature_list_params.ragged_keys:
798      feature_list_output = _construct_tensors_for_composite_features(
799          sequence_features, feature_list_output)
800
801    return context_output, feature_list_output
802
803
804def _parse_single_sequence_example_raw(serialized,
805                                       context,
806                                       feature_list,
807                                       debug_name,
808                                       name=None):
809  """Parses a single `SequenceExample` proto.
810
811  Args:
812    serialized: A scalar (0-D Tensor) of type string, a single binary serialized
813      `SequenceExample` proto.
814    context: A `ParseOpParams` containing the parameters for the parse op for
815      the context features.
816    feature_list: A `ParseOpParams` containing the parameters for the parse op
817      for the feature_list features.
818    debug_name: A scalar (0-D Tensor) of strings (optional), the name of the
819      serialized proto.
820    name: A name for this operation (optional).
821
822  Returns:
823    A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s.
824    The first dict contains the context key/values.
825    The second dict contains the feature_list key/values.
826
827  Raises:
828    TypeError: if feature_list.dense_defaults is not either None or a dict.
829  """
830  with ops.name_scope(name, "ParseSingleExample", [serialized, debug_name]):
831    serialized = ops.convert_to_tensor(serialized, name="serialized")
832    serialized = _assert_scalar(serialized, "serialized")
833  return _parse_sequence_example_raw(serialized, debug_name, context,
834                                     feature_list, name)[:2]
835
836
837@tf_export("io.decode_raw", v1=[])
838def decode_raw(input_bytes,
839               out_type,
840               little_endian=True,
841               fixed_length=None,
842               name=None):
843  """Convert raw byte strings into tensors.
844
845  Args:
846    input_bytes:
847      Each element of the input Tensor is converted to an array of bytes.
848    out_type:
849      `DType` of the output. Acceptable types are `half`, `float`, `double`,
850      `int32`, `uint16`, `uint8`, `int16`, `int8`, `int64`.
851    little_endian:
852      Whether the `input_bytes` data is in little-endian format. Data will be
853      converted into host byte order if necessary.
854    fixed_length:
855      If set, the first `fixed_length` bytes of each element will be converted.
856      Data will be zero-padded or truncated to the specified length.
857
858      `fixed_length` must be a multiple of the size of `out_type`.
859      `fixed_length` must be specified if the elements of `input_bytes` are of
860      variable length.
861    name: A name for the operation (optional).
862
863  Returns:
864    A `Tensor` object storing the decoded bytes.
865
866  """
867  if fixed_length is not None:
868    return gen_parsing_ops.decode_padded_raw(
869        input_bytes,
870        fixed_length=fixed_length,
871        out_type=out_type,
872        little_endian=little_endian,
873        name=name)
874  else:
875    return gen_parsing_ops.decode_raw(
876        input_bytes, out_type, little_endian=little_endian, name=name)
877
878
879@tf_export(v1=["decode_raw", "io.decode_raw"])
880@deprecation.deprecated_args(None,
881                             "bytes is deprecated, use input_bytes instead",
882                             "bytes")
883def decode_raw_v1(
884    input_bytes=None,
885    out_type=None,
886    little_endian=True,
887    name=None,
888    bytes=None  # pylint: disable=redefined-builtin
889):
890  """Convert raw byte strings into tensors.
891
892  Args:
893    input_bytes:
894      Each element of the input Tensor is converted to an array of bytes.
895    out_type:
896      `DType` of the output. Acceptable types are `half`, `float`, `double`,
897      `int32`, `uint16`, `uint8`, `int16`, `int8`, `int64`.
898    little_endian:
899      Whether the `input_bytes` data is in little-endian format. Data will be
900      converted into host byte order if necessary.
901    name: A name for the operation (optional).
902    bytes: Deprecated parameter. Use `input_bytes` instead.
903
904  Returns:
905    A `Tensor` object storing the decoded bytes.
906  """
907  input_bytes = deprecation.deprecated_argument_lookup("input_bytes",
908                                                       input_bytes, "bytes",
909                                                       bytes)
910
911  # out_type is a required positional argument in the original API, and had to
912  # be changed to a keyword argument in order to facilitate the transition from
913  # the reserved named `bytes` to `input_bytes`. Ensure it's still set.
914  if out_type is None:
915    raise ValueError(
916        "decode_raw_v1() missing 1 positional argument: 'out_type'")
917
918  return gen_parsing_ops.decode_raw(
919      input_bytes, out_type, little_endian=little_endian, name=name)
920
921
922# Swap `name` and `na_value` for backward compatibility.
923@tf_export(v1=["io.decode_csv", "decode_csv"])
924@deprecation.deprecated_endpoints("decode_csv")
925def decode_csv(records,
926               record_defaults,
927               field_delim=",",
928               use_quote_delim=True,
929               name=None,
930               na_value="",
931               select_cols=None):
932  """Convert CSV records to tensors. Each column maps to one tensor.
933
934  RFC 4180 format is expected for the CSV records.
935  (https://tools.ietf.org/html/rfc4180)
936  Note that we allow leading and trailing spaces with int or float field.
937
938  Args:
939    records: A `Tensor` of type `string`.
940      Each string is a record/row in the csv and all records should have
941      the same format.
942    record_defaults: A list of `Tensor` objects with specific types.
943      Acceptable types are `float32`, `float64`, `int32`, `int64`, `string`.
944      One tensor per column of the input record, with either a
945      scalar default value for that column or an empty vector if the column is
946      required.
947    field_delim: An optional `string`. Defaults to `","`.
948      char delimiter to separate fields in a record.
949    use_quote_delim: An optional `bool`. Defaults to `True`.
950      If false, treats double quotation marks as regular
951      characters inside of the string fields (ignoring RFC 4180, Section 2,
952      Bullet 5).
953    name: A name for the operation (optional).
954    na_value: Additional string to recognize as NA/NaN.
955    select_cols: Optional sorted list of column indices to select. If specified,
956      only this subset of columns will be parsed and returned.
957
958  Returns:
959    A list of `Tensor` objects. Has the same type as `record_defaults`.
960    Each tensor will have the same shape as records.
961
962  Raises:
963    ValueError: If any of the arguments is malformed.
964  """
965  return decode_csv_v2(
966      records, record_defaults,
967      field_delim, use_quote_delim,
968      na_value, select_cols, name
969      )
970
971
972@tf_export("io.decode_csv", v1=[])
973def decode_csv_v2(records,
974                  record_defaults,
975                  field_delim=",",
976                  use_quote_delim=True,
977                  na_value="",
978                  select_cols=None,
979                  name=None):
980  """Convert CSV records to tensors. Each column maps to one tensor.
981
982  RFC 4180 format is expected for the CSV records.
983  (https://tools.ietf.org/html/rfc4180)
984  Note that we allow leading and trailing spaces with int or float field.
985
986  Args:
987    records: A `Tensor` of type `string`.
988      Each string is a record/row in the csv and all records should have
989      the same format.
990    record_defaults: A list of `Tensor` objects with specific types.
991      Acceptable types are `float32`, `float64`, `int32`, `int64`, `string`.
992      One tensor per column of the input record, with either a
993      scalar default value for that column or an empty vector if the column is
994      required.
995    field_delim: An optional `string`. Defaults to `","`.
996      char delimiter to separate fields in a record.
997    use_quote_delim: An optional `bool`. Defaults to `True`.
998      If false, treats double quotation marks as regular
999      characters inside of the string fields (ignoring RFC 4180, Section 2,
1000      Bullet 5).
1001    na_value: Additional string to recognize as NA/NaN.
1002    select_cols: Optional sorted list of column indices to select. If specified,
1003      only this subset of columns will be parsed and returned.
1004    name: A name for the operation (optional).
1005
1006  Returns:
1007    A list of `Tensor` objects. Has the same type as `record_defaults`.
1008    Each tensor will have the same shape as records.
1009
1010  Raises:
1011    ValueError: If any of the arguments is malformed.
1012  """
1013  if select_cols is not None and any(select_cols[i] >= select_cols[i + 1]
1014                                     for i in range(len(select_cols) - 1)):
1015    raise ValueError("select_cols is not strictly increasing.")
1016  if select_cols is not None and select_cols[0] < 0:
1017    raise ValueError("select_cols contains negative values.")
1018  if select_cols is not None and len(select_cols) != len(record_defaults):
1019    raise ValueError("Length of select_cols and record_defaults do not match.")
1020  return gen_parsing_ops.decode_csv(
1021      records=records,
1022      record_defaults=record_defaults,
1023      field_delim=field_delim,
1024      use_quote_delim=use_quote_delim,
1025      na_value=na_value,
1026      name=name,
1027      select_cols=select_cols,
1028  )
1029
1030
1031def _assert_scalar(value, name):
1032  """Asserts that `value` is scalar, and returns `value`."""
1033  value_rank = value.shape.rank
1034  if value_rank is None:
1035    check = control_flow_ops.Assert(
1036        math_ops.equal(array_ops.rank(value), 0),
1037        ["Input %s must be a scalar" % name],
1038        name="%sIsScalar" % name.capitalize())
1039    result = control_flow_ops.with_dependencies([check],
1040                                                value,
1041                                                name="%sDependencies" % name)
1042    result.set_shape([])
1043    return result
1044  elif value_rank == 0:
1045    return value
1046  else:
1047    raise ValueError("Input %s must be a scalar" % name)
1048