• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15
16"""Parsing Ops."""
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21from tensorflow.python.framework import ops
22from tensorflow.python.framework import sparse_tensor
23from tensorflow.python.ops import array_ops
24from tensorflow.python.ops import control_flow_ops
25from tensorflow.python.ops import gen_parsing_ops
26from tensorflow.python.ops import math_ops
27from tensorflow.python.ops import parsing_config
28# go/tf-wildcard-import
29# pylint: disable=wildcard-import,undefined-variable
30from tensorflow.python.ops.gen_parsing_ops import *
31# pylint: enable=wildcard-import,undefined-variable
32from tensorflow.python.util import deprecation
33from tensorflow.python.util import dispatch
34from tensorflow.python.util.tf_export import tf_export
35
36
37ops.NotDifferentiable("DecodeRaw")
38ops.NotDifferentiable("DecodePaddedRaw")
39ops.NotDifferentiable("ParseTensor")
40ops.NotDifferentiable("SerializeTensor")
41ops.NotDifferentiable("StringToNumber")
42
43
44VarLenFeature = parsing_config.VarLenFeature
45RaggedFeature = parsing_config.RaggedFeature
46SparseFeature = parsing_config.SparseFeature
47FixedLenFeature = parsing_config.FixedLenFeature
48FixedLenSequenceFeature = parsing_config.FixedLenSequenceFeature
49# pylint: disable=protected-access
50_ParseOpParams = parsing_config._ParseOpParams
51_construct_tensors_for_composite_features = (
52    parsing_config._construct_tensors_for_composite_features)
53# pylint: enable=protected-access
54
55
56# TODO(b/122887740) Switch files that use this private symbol to use new name.
57_construct_sparse_tensors_for_sparse_features = \
58    _construct_tensors_for_composite_features
59
60
61def _prepend_none_dimension(features):
62  """Returns a copy of features with adjusted FixedLenSequenceFeature shapes."""
63  if features:
64    modified_features = dict(features)  # Create a copy to modify
65    for key, feature in features.items():
66      if isinstance(feature, FixedLenSequenceFeature):
67        if not feature.allow_missing:
68          raise ValueError("Unsupported: FixedLenSequenceFeature requires "
69                           "allow_missing to be True.")
70        modified_features[key] = FixedLenSequenceFeature(
71            [None] + list(feature.shape),
72            feature.dtype,
73            feature.allow_missing,
74            feature.default_value)
75    return modified_features
76  else:
77    return features
78
79
80@tf_export("io.parse_example", v1=[])
81@dispatch.add_dispatch_support
82def parse_example_v2(serialized, features, example_names=None, name=None):
83  # pylint: disable=line-too-long
84  """Parses `Example` protos into a `dict` of tensors.
85
86  Parses a number of serialized [`Example`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
87  protos given in `serialized`. We refer to `serialized` as a batch with
88  `batch_size` many entries of individual `Example` protos.
89
90  `example_names` may contain descriptive names for the corresponding serialized
91  protos. These may be useful for debugging purposes, but they have no effect on
92  the output. If not `None`, `example_names` must be the same length as
93  `serialized`.
94
95  This op parses serialized examples into a dictionary mapping keys to `Tensor`
96  `SparseTensor`, and `RaggedTensor` objects. `features` is a dict from keys to
97  `VarLenFeature`, `SparseFeature`, `RaggedFeature`, and `FixedLenFeature`
98  objects. Each `VarLenFeature` and `SparseFeature` is mapped to a
99  `SparseTensor`; each `FixedLenFeature` is mapped to a `Tensor`; and each
100  `RaggedFeature` is mapped to a `RaggedTensor`.
101
102  Each `VarLenFeature` maps to a `SparseTensor` of the specified type
103  representing a ragged matrix. Its indices are `[batch, index]` where `batch`
104  identifies the example in `serialized`, and `index` is the value's index in
105  the list of values associated with that feature and example.
106
107  Each `SparseFeature` maps to a `SparseTensor` of the specified type
108  representing a Tensor of `dense_shape` `[batch_size] + SparseFeature.size`.
109  Its `values` come from the feature in the examples with key `value_key`.
110  A `values[i]` comes from a position `k` in the feature of an example at batch
111  entry `batch`. This positional information is recorded in `indices[i]` as
112  `[batch, index_0, index_1, ...]` where `index_j` is the `k-th` value of
113  the feature in the example at with key `SparseFeature.index_key[j]`.
114  In other words, we split the indices (except the first index indicating the
115  batch entry) of a `SparseTensor` by dimension into different features of the
116  `Example`. Due to its complexity a `VarLenFeature` should be preferred over a
117  `SparseFeature` whenever possible.
118
119  Each `FixedLenFeature` `df` maps to a `Tensor` of the specified type (or
120  `tf.float32` if not specified) and shape `(serialized.size(),) + df.shape`.
121
122  `FixedLenFeature` entries with a `default_value` are optional. With no default
123  value, we will fail if that `Feature` is missing from any example in
124  `serialized`.
125
126  Each `FixedLenSequenceFeature` `df` maps to a `Tensor` of the specified type
127  (or `tf.float32` if not specified) and shape
128  `(serialized.size(), None) + df.shape`.
129  All examples in `serialized` will be padded with `default_value` along the
130  second dimension.
131
132  Each `RaggedFeature` maps to a `RaggedTensor` of the specified type.  It
133  is formed by stacking the `RaggedTensor` for each example, where the
134  `RaggedTensor` for each individual example is constructed using the tensors
135  specified by `RaggedTensor.values_key` and `RaggedTensor.partition`.  See
136  the `tf.io.RaggedFeature` documentation for details and examples.
137
138  Examples:
139
140  For example, if one expects a `tf.float32` `VarLenFeature` `ft` and three
141  serialized `Example`s are provided:
142
143  ```
144  serialized = [
145    features
146      { feature { key: "ft" value { float_list { value: [1.0, 2.0] } } } },
147    features
148      { feature []},
149    features
150      { feature { key: "ft" value { float_list { value: [3.0] } } }
151  ]
152  ```
153
154  then the output will look like:
155
156  ```python
157  {"ft": SparseTensor(indices=[[0, 0], [0, 1], [2, 0]],
158                      values=[1.0, 2.0, 3.0],
159                      dense_shape=(3, 2)) }
160  ```
161
162  If instead a `FixedLenSequenceFeature` with `default_value = -1.0` and
163  `shape=[]` is used then the output will look like:
164
165  ```python
166  {"ft": [[1.0, 2.0], [3.0, -1.0]]}
167  ```
168
169  Given two `Example` input protos in `serialized`:
170
171  ```
172  [
173    features {
174      feature { key: "kw" value { bytes_list { value: [ "knit", "big" ] } } }
175      feature { key: "gps" value { float_list { value: [] } } }
176    },
177    features {
178      feature { key: "kw" value { bytes_list { value: [ "emmy" ] } } }
179      feature { key: "dank" value { int64_list { value: [ 42 ] } } }
180      feature { key: "gps" value { } }
181    }
182  ]
183  ```
184
185  And arguments
186
187  ```
188  example_names: ["input0", "input1"],
189  features: {
190      "kw": VarLenFeature(tf.string),
191      "dank": VarLenFeature(tf.int64),
192      "gps": VarLenFeature(tf.float32),
193  }
194  ```
195
196  Then the output is a dictionary:
197
198  ```python
199  {
200    "kw": SparseTensor(
201        indices=[[0, 0], [0, 1], [1, 0]],
202        values=["knit", "big", "emmy"]
203        dense_shape=[2, 2]),
204    "dank": SparseTensor(
205        indices=[[1, 0]],
206        values=[42],
207        dense_shape=[2, 1]),
208    "gps": SparseTensor(
209        indices=[],
210        values=[],
211        dense_shape=[2, 0]),
212  }
213  ```
214
215  For dense results in two serialized `Example`s:
216
217  ```
218  [
219    features {
220      feature { key: "age" value { int64_list { value: [ 0 ] } } }
221      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
222     },
223     features {
224      feature { key: "age" value { int64_list { value: [] } } }
225      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
226    }
227  ]
228  ```
229
230  We can use arguments:
231
232  ```
233  example_names: ["input0", "input1"],
234  features: {
235      "age": FixedLenFeature([], dtype=tf.int64, default_value=-1),
236      "gender": FixedLenFeature([], dtype=tf.string),
237  }
238  ```
239
240  And the expected output is:
241
242  ```python
243  {
244    "age": [[0], [-1]],
245    "gender": [["f"], ["f"]],
246  }
247  ```
248
249  An alternative to `VarLenFeature` to obtain a `SparseTensor` is
250  `SparseFeature`. For example, given two `Example` input protos in
251  `serialized`:
252
253  ```
254  [
255    features {
256      feature { key: "val" value { float_list { value: [ 0.5, -1.0 ] } } }
257      feature { key: "ix" value { int64_list { value: [ 3, 20 ] } } }
258    },
259    features {
260      feature { key: "val" value { float_list { value: [ 0.0 ] } } }
261      feature { key: "ix" value { int64_list { value: [ 42 ] } } }
262    }
263  ]
264  ```
265
266  And arguments
267
268  ```
269  example_names: ["input0", "input1"],
270  features: {
271      "sparse": SparseFeature(
272          index_key="ix", value_key="val", dtype=tf.float32, size=100),
273  }
274  ```
275
276  Then the output is a dictionary:
277
278  ```python
279  {
280    "sparse": SparseTensor(
281        indices=[[0, 3], [0, 20], [1, 42]],
282        values=[0.5, -1.0, 0.0]
283        dense_shape=[2, 100]),
284  }
285  ```
286
287  See the `tf.io.RaggedFeature` documentation for examples showing how
288  `RaggedFeature` can be used to obtain `RaggedTensor`s.
289
290  Args:
291    serialized: A vector (1-D Tensor) of strings, a batch of binary
292      serialized `Example` protos.
293    features: A `dict` mapping feature keys to `FixedLenFeature`,
294      `VarLenFeature`, `SparseFeature`, and `RaggedFeature` values.
295    example_names: A vector (1-D Tensor) of strings (optional), the names of
296      the serialized protos in the batch.
297    name: A name for this operation (optional).
298
299  Returns:
300    A `dict` mapping feature keys to `Tensor`, `SparseTensor`, and
301    `RaggedTensor` values.
302
303  Raises:
304    ValueError: if any feature is invalid.
305  """
306  if not features:
307    raise ValueError("Argument `features` cannot be None.")
308  features = _prepend_none_dimension(features)
309  params = _ParseOpParams.from_features(features, [
310      VarLenFeature, SparseFeature, FixedLenFeature, FixedLenSequenceFeature,
311      RaggedFeature
312  ])
313
314  outputs = _parse_example_raw(serialized, example_names, params, name=name)
315  return _construct_tensors_for_composite_features(features, outputs)
316
317
318@tf_export(v1=["io.parse_example", "parse_example"])
319@dispatch.add_dispatch_support
320def parse_example(serialized, features, name=None, example_names=None):
321  return parse_example_v2(serialized, features, example_names, name)
322
323
324parse_example.__doc__ = parse_example_v2.__doc__
325
326
327def _parse_example_raw(serialized, names, params, name):
328  """Parses `Example` protos.
329
330  Args:
331    serialized: A vector (1-D Tensor) of strings, a batch of binary
332      serialized `Example` protos.
333    names: A vector (1-D Tensor) of strings (optional), the names of
334      the serialized protos.
335    params: A `ParseOpParams` containing the parameters for the parse op.
336    name: A name for this operation (optional).
337
338  Returns:
339    A `dict` mapping keys to `Tensor`s and `SparseTensor`s and `RaggedTensor`s.
340
341  """
342  if params.num_features == 0:
343    raise ValueError("Must provide at least one feature key.")
344  with ops.name_scope(name, "ParseExample", [serialized, names]):
345    names = [] if names is None else names
346    serialized = ops.convert_to_tensor(serialized, name="serialized")
347    if params.ragged_keys and serialized.shape.ndims is None:
348      raise ValueError("serialized must have statically-known rank to "
349                       "parse ragged features.")
350    outputs = gen_parsing_ops.parse_example_v2(
351        serialized=serialized,
352        names=names,
353        sparse_keys=params.sparse_keys,
354        dense_keys=params.dense_keys,
355        ragged_keys=params.ragged_keys,
356        dense_defaults=params.dense_defaults_vec,
357        num_sparse=len(params.sparse_keys),
358        sparse_types=params.sparse_types,
359        ragged_value_types=params.ragged_value_types,
360        ragged_split_types=params.ragged_split_types,
361        dense_shapes=params.dense_shapes_as_proto,
362        name=name)
363    (sparse_indices, sparse_values, sparse_shapes, dense_values,
364     ragged_values, ragged_row_splits) = outputs
365    # pylint: disable=protected-access
366    ragged_tensors = parsing_config._build_ragged_tensors(
367        serialized.shape, ragged_values, ragged_row_splits)
368
369    sparse_tensors = [
370        sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape)
371        in zip(sparse_indices, sparse_values, sparse_shapes)]
372
373    return dict(
374        zip(params.sparse_keys + params.dense_keys + params.ragged_keys,
375            sparse_tensors + dense_values + ragged_tensors))
376
377
378@tf_export(v1=["io.parse_single_example", "parse_single_example"])
379@dispatch.add_dispatch_support
380def parse_single_example(serialized, features, name=None, example_names=None):
381  """Parses a single `Example` proto.
382
383  Similar to `parse_example`, except:
384
385  For dense tensors, the returned `Tensor` is identical to the output of
386  `parse_example`, except there is no batch dimension, the output shape is the
387  same as the shape given in `dense_shape`.
388
389  For `SparseTensor`s, the first (batch) column of the indices matrix is removed
390  (the indices matrix is a column vector), the values vector is unchanged, and
391  the first (`batch_size`) entry of the shape vector is removed (it is now a
392  single element vector).
393
394  One might see performance advantages by batching `Example` protos with
395  `parse_example` instead of using this function directly.
396
397  Args:
398    serialized: A scalar string Tensor, a single serialized Example.
399    features: A `dict` mapping feature keys to `FixedLenFeature` or
400      `VarLenFeature` values.
401    name: A name for this operation (optional).
402    example_names: (Optional) A scalar string Tensor, the associated name.
403
404  Returns:
405    A `dict` mapping feature keys to `Tensor` and `SparseTensor` values.
406
407  Raises:
408    ValueError: if any feature is invalid.
409  """
410  return parse_single_example_v2(serialized, features, example_names, name)
411
412
413@tf_export("io.parse_single_example", v1=[])
414@dispatch.add_dispatch_support
415def parse_single_example_v2(
416    serialized, features, example_names=None, name=None
417    ):
418  """Parses a single `Example` proto.
419
420  Similar to `parse_example`, except:
421
422  For dense tensors, the returned `Tensor` is identical to the output of
423  `parse_example`, except there is no batch dimension, the output shape is the
424  same as the shape given in `dense_shape`.
425
426  For `SparseTensor`s, the first (batch) column of the indices matrix is removed
427  (the indices matrix is a column vector), the values vector is unchanged, and
428  the first (`batch_size`) entry of the shape vector is removed (it is now a
429  single element vector).
430
431  One might see performance advantages by batching `Example` protos with
432  `parse_example` instead of using this function directly.
433
434  Args:
435    serialized: A scalar string Tensor, a single serialized Example.
436    features: A `dict` mapping feature keys to `FixedLenFeature` or
437      `VarLenFeature` values.
438    example_names: (Optional) A scalar string Tensor, the associated name.
439    name: A name for this operation (optional).
440
441  Returns:
442    A `dict` mapping feature keys to `Tensor` and `SparseTensor` values.
443
444  Raises:
445    ValueError: if any feature is invalid.
446  """
447  if not features:
448    raise ValueError("Invalid argument: features cannot be None.")
449  with ops.name_scope(name, "ParseSingleExample", [serialized, example_names]):
450    serialized = ops.convert_to_tensor(serialized, name="serialized")
451    serialized = _assert_scalar(serialized, "serialized")
452    return parse_example_v2(serialized, features, example_names, name)
453
454
455@tf_export("io.parse_sequence_example")
456@dispatch.add_dispatch_support
457def parse_sequence_example(serialized,
458                           context_features=None,
459                           sequence_features=None,
460                           example_names=None,
461                           name=None):
462  # pylint: disable=line-too-long
463  """Parses a batch of `SequenceExample` protos.
464
465  Parses a vector of serialized
466  [`SequenceExample`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
467  protos given in `serialized`.
468
469  This op parses serialized sequence examples into a tuple of dictionaries,
470  each mapping keys to `Tensor` and `SparseTensor` objects.
471  The first dictionary contains mappings for keys appearing in
472  `context_features`, and the second dictionary contains mappings for keys
473  appearing in `sequence_features`.
474
475  At least one of `context_features` and `sequence_features` must be provided
476  and non-empty.
477
478  The `context_features` keys are associated with a `SequenceExample` as a
479  whole, independent of time / frame.  In contrast, the `sequence_features` keys
480  provide a way to access variable-length data within the `FeatureList` section
481  of the `SequenceExample` proto.  While the shapes of `context_features` values
482  are fixed with respect to frame, the frame dimension (the first dimension)
483  of `sequence_features` values may vary between `SequenceExample` protos,
484  and even between `feature_list` keys within the same `SequenceExample`.
485
486  `context_features` contains `VarLenFeature`, `RaggedFeature`, and
487  `FixedLenFeature`  objects. Each `VarLenFeature` is mapped to a
488  `SparseTensor`; each `RaggedFeature` is  mapped to a `RaggedTensor`; and each
489  `FixedLenFeature` is mapped to a `Tensor`, of the specified type, shape, and
490  default value.
491
492  `sequence_features` contains `VarLenFeature`, `RaggedFeature`, and
493  `FixedLenSequenceFeature` objects. Each `VarLenFeature` is mapped to a
494  `SparseTensor`; each `RaggedFeature` is mapped to a `RaggedTensor`; and
495  each `FixedLenSequenceFeature` is mapped to a `Tensor`, each of the specified
496  type. The shape will be `(B,T,) + df.dense_shape` for
497  `FixedLenSequenceFeature` `df`, where `B` is the batch size, and `T` is the
498  length of the associated `FeatureList` in the `SequenceExample`. For instance,
499  `FixedLenSequenceFeature([])` yields a scalar 2-D `Tensor` of static shape
500  `[None, None]` and dynamic shape `[B, T]`, while
501  `FixedLenSequenceFeature([k])` (for `int k >= 1`) yields a 3-D matrix `Tensor`
502  of static shape `[None, None, k]` and dynamic shape `[B, T, k]`.
503
504  Like the input, the resulting output tensors have a batch dimension. This
505  means that the original per-example shapes of `VarLenFeature`s and
506  `FixedLenSequenceFeature`s can be lost. To handle that situation, this op also
507  provides dicts of shape tensors as part of the output. There is one dict for
508  the context features, and one for the feature_list features. Context features
509  of type `FixedLenFeature`s will not be present, since their shapes are already
510  known by the caller. In situations where the input `FixedLenSequenceFeature`s
511  are of different sequence lengths across examples, the shorter examples will
512  be padded with default datatype values: 0 for numeric types, and the empty
513  string for string types.
514
515  Each `SparseTensor` corresponding to `sequence_features` represents a ragged
516  vector.  Its indices are `[time, index]`, where `time` is the `FeatureList`
517  entry and `index` is the value's index in the list of values associated with
518  that time.
519
520  `FixedLenFeature` entries with a `default_value` and `FixedLenSequenceFeature`
521  entries with `allow_missing=True` are optional; otherwise, we will fail if
522  that `Feature` or `FeatureList` is missing from any example in `serialized`.
523
524  `example_name` may contain a descriptive name for the corresponding serialized
525  proto. This may be useful for debugging purposes, but it has no effect on the
526  output. If not `None`, `example_name` must be a scalar.
527
528  Args:
529    serialized: A vector (1-D Tensor) of type string containing binary
530      serialized `SequenceExample` protos.
531    context_features: A `dict` mapping feature keys to `FixedLenFeature` or
532      `VarLenFeature` or `RaggedFeature` values. These features are associated
533      with a `SequenceExample` as a whole.
534    sequence_features: A `dict` mapping feature keys to
535      `FixedLenSequenceFeature` or `VarLenFeature` or `RaggedFeature` values.
536      These features are associated with data within the `FeatureList` section
537      of the `SequenceExample` proto.
538    example_names: A vector (1-D Tensor) of strings (optional), the name of the
539      serialized protos.
540    name: A name for this operation (optional).
541
542  Returns:
543    A tuple of three `dict`s, each mapping keys to `Tensor`s,
544    `SparseTensor`s, and `RaggedTensor`. The first dict contains the context
545    key/values, the second dict contains the feature_list key/values, and the
546    final dict contains the lengths of any dense feature_list features.
547
548  Raises:
549    ValueError: if any feature is invalid.
550  """
551  if not (context_features or sequence_features):
552    raise ValueError("Both `context_features` and `sequence_features` argument "
553                     "are None, but at least one should have values.")
554  context_params = _ParseOpParams.from_features(
555      context_features, [VarLenFeature, FixedLenFeature, RaggedFeature])
556  feature_list_params = _ParseOpParams.from_features(
557      sequence_features,
558      [VarLenFeature, FixedLenSequenceFeature, RaggedFeature])
559
560  with ops.name_scope(name, "ParseSequenceExample",
561                      [serialized, example_names]):
562    outputs = _parse_sequence_example_raw(serialized, example_names,
563                                          context_params, feature_list_params,
564                                          name)
565    context_output, feature_list_output, feature_list_lengths = outputs
566
567    if context_params.ragged_keys:
568      context_output = _construct_tensors_for_composite_features(
569          context_features, context_output)
570    if feature_list_params.ragged_keys:
571      feature_list_output = _construct_tensors_for_composite_features(
572          sequence_features, feature_list_output)
573
574    return context_output, feature_list_output, feature_list_lengths
575
576
577def _parse_sequence_example_raw(serialized,
578                                debug_name,
579                                context,
580                                feature_list,
581                                name=None):
582  """Parses a vector of `SequenceExample` protos.
583
584  Args:
585    serialized: A vector (1-D Tensor) of type string, containing binary
586      serialized `SequenceExample` protos.
587    debug_name: A vector (1-D Tensor) of strings (optional), the names of the
588      serialized protos.
589    context: A `ParseOpParams` containing the parameters for the parse
590      op for the context features.
591    feature_list: A `ParseOpParams` containing the parameters for the
592      parse op for the feature_list features.
593    name: A name for this operation (optional).
594
595  Returns:
596    A tuple of three `dict`s, each mapping keys to `Tensor`s, `SparseTensor`s,
597    and `RaggedTensor`s. The first dict contains the context key/values, the
598    second dict contains the feature_list key/values, and the final dict
599    contains the lengths of any dense feature_list features.
600
601  Raises:
602    TypeError: if feature_list.dense_defaults is not either None or a dict.
603  """
604  if context.num_features + feature_list.num_features == 0:
605    raise ValueError("Must provide at least one feature key.")
606  with ops.name_scope(name, "ParseSequenceExample", [serialized]):
607    debug_name = [] if debug_name is None else debug_name
608
609    # Internal
610    feature_list_dense_missing_assumed_empty = []
611    for k, v in feature_list.dense_defaults.items():
612      if v is not None:
613        raise ValueError("Value feature_list.dense_defaults[%s] must be None" %
614                         k)
615      feature_list_dense_missing_assumed_empty.append(k)
616
617    has_ragged = context.ragged_keys or feature_list.ragged_keys
618    serialized = ops.convert_to_tensor(serialized, name="serialized")
619    if has_ragged and serialized.shape.ndims is None:
620      raise ValueError("serialized must have statically-known rank to "
621                       "parse ragged features.")
622    feature_list_dense_missing_assumed_empty_vector = [
623        key in feature_list_dense_missing_assumed_empty
624        for key in feature_list.dense_keys
625    ]
626    outputs = gen_parsing_ops.parse_sequence_example_v2(
627        # Inputs
628        serialized=serialized,
629        debug_name=debug_name,
630        context_sparse_keys=context.sparse_keys,
631        context_dense_keys=context.dense_keys,
632        context_ragged_keys=context.ragged_keys,
633        feature_list_sparse_keys=feature_list.sparse_keys,
634        feature_list_dense_keys=feature_list.dense_keys,
635        feature_list_ragged_keys=feature_list.ragged_keys,
636        feature_list_dense_missing_assumed_empty=(
637            feature_list_dense_missing_assumed_empty_vector),
638        context_dense_defaults=context.dense_defaults_vec,
639        # Attrs
640        Ncontext_sparse=len(context.sparse_keys),
641        Nfeature_list_sparse=len(feature_list.sparse_keys),
642        Nfeature_list_dense=len(feature_list.dense_keys),
643        context_sparse_types=context.sparse_types,
644        context_ragged_value_types=context.ragged_value_types,
645        context_ragged_split_types=context.ragged_split_types,
646        feature_list_dense_types=feature_list.dense_types,
647        feature_list_sparse_types=feature_list.sparse_types,
648        feature_list_ragged_value_types=feature_list.ragged_value_types,
649        feature_list_ragged_split_types=feature_list.ragged_split_types,
650        context_dense_shapes=context.dense_shapes_as_proto,
651        feature_list_dense_shapes=feature_list.dense_shapes,
652        name=name)
653    (context_sparse_indices, context_sparse_values, context_sparse_shapes,
654     context_dense_values, context_ragged_values, context_ragged_row_splits,
655     feature_list_sparse_indices, feature_list_sparse_values,
656     feature_list_sparse_shapes, feature_list_dense_values,
657     feature_list_dense_lengths, feature_list_ragged_values,
658     feature_list_ragged_outer_splits,
659     feature_list_ragged_inner_splits) = outputs
660    # pylint: disable=protected-access
661    context_ragged_tensors = parsing_config._build_ragged_tensors(
662        serialized.shape, context_ragged_values, context_ragged_row_splits)
663    feature_list_ragged_tensors = parsing_config._build_ragged_tensors(
664        serialized.shape, feature_list_ragged_values,
665        feature_list_ragged_outer_splits, feature_list_ragged_inner_splits)
666
667    # pylint: disable=g-complex-comprehension
668    context_sparse_tensors = [
669        sparse_tensor.SparseTensor(ix, val, shape)
670        for (ix, val,
671             shape) in zip(context_sparse_indices, context_sparse_values,
672                           context_sparse_shapes)
673    ]
674
675    feature_list_sparse_tensors = [
676        sparse_tensor.SparseTensor(ix, val, shape)
677        for (ix, val, shape
678            ) in zip(feature_list_sparse_indices, feature_list_sparse_values,
679                     feature_list_sparse_shapes)
680    ]
681    # pylint: enable=g-complex-comprehension
682
683    context_output = dict(
684        zip(
685            context.sparse_keys + context.dense_keys + context.ragged_keys,
686            context_sparse_tensors + context_dense_values +
687            context_ragged_tensors))
688    feature_list_output = dict(
689        zip(
690            feature_list.sparse_keys + feature_list.dense_keys +
691            feature_list.ragged_keys, feature_list_sparse_tensors +
692            feature_list_dense_values + feature_list_ragged_tensors))
693    feature_list_lengths = dict(
694        zip(feature_list.dense_keys, feature_list_dense_lengths))
695
696    return (context_output, feature_list_output, feature_list_lengths)
697
698
699@tf_export("io.parse_single_sequence_example",
700           v1=["io.parse_single_sequence_example",
701               "parse_single_sequence_example"])
702@dispatch.add_dispatch_support
703def parse_single_sequence_example(
704    serialized, context_features=None, sequence_features=None,
705    example_name=None, name=None):
706  # pylint: disable=line-too-long
707  """Parses a single `SequenceExample` proto.
708
709  Parses a single serialized [`SequenceExample`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
710  proto given in `serialized`.
711
712  This op parses a serialized sequence example into a tuple of dictionaries,
713  each mapping keys to `Tensor` and `SparseTensor` objects.
714  The first dictionary contains mappings for keys appearing in
715  `context_features`, and the second dictionary contains mappings for keys
716  appearing in `sequence_features`.
717
718  At least one of `context_features` and `sequence_features` must be provided
719  and non-empty.
720
721  The `context_features` keys are associated with a `SequenceExample` as a
722  whole, independent of time / frame.  In contrast, the `sequence_features` keys
723  provide a way to access variable-length data within the `FeatureList` section
724  of the `SequenceExample` proto.  While the shapes of `context_features` values
725  are fixed with respect to frame, the frame dimension (the first dimension)
726  of `sequence_features` values may vary between `SequenceExample` protos,
727  and even between `feature_list` keys within the same `SequenceExample`.
728
729  `context_features` contains `VarLenFeature`, `RaggedFeature`, and
730  `FixedLenFeature` objects. Each `VarLenFeature` is mapped to a `SparseTensor`;
731  each `RaggedFeature` is mapped to a `RaggedTensor`; and each `FixedLenFeature`
732  is mapped to a `Tensor`, of the specified type, shape, and default value.
733
734  `sequence_features` contains `VarLenFeature`, `RaggedFeature`, and
735  `FixedLenSequenceFeature` objects. Each `VarLenFeature` is mapped to a
736  `SparseTensor`; each `RaggedFeature` is mapped to a `RaggedTensor`; and each
737  `FixedLenSequenceFeature` is mapped to a `Tensor`, each of the specified type.
738  The shape will be `(T,) + df.dense_shape` for `FixedLenSequenceFeature` `df`,
739  where `T` is the length of the associated `FeatureList` in the
740  `SequenceExample`. For instance, `FixedLenSequenceFeature([])` yields a scalar
741  1-D `Tensor` of static shape `[None]` and dynamic shape `[T]`, while
742  `FixedLenSequenceFeature([k])` (for `int k >= 1`) yields a 2-D matrix `Tensor`
743  of static shape `[None, k]` and dynamic shape `[T, k]`.
744
745  Each `SparseTensor` corresponding to `sequence_features` represents a ragged
746  vector.  Its indices are `[time, index]`, where `time` is the `FeatureList`
747  entry and `index` is the value's index in the list of values associated with
748  that time.
749
750  `FixedLenFeature` entries with a `default_value` and `FixedLenSequenceFeature`
751  entries with `allow_missing=True` are optional; otherwise, we will fail if
752  that `Feature` or `FeatureList` is missing from any example in `serialized`.
753
754  `example_name` may contain a descriptive name for the corresponding serialized
755  proto. This may be useful for debugging purposes, but it has no effect on the
756  output. If not `None`, `example_name` must be a scalar.
757
758  Note that the batch version of this function, `tf.parse_sequence_example`,
759  is written for better memory efficiency and will be faster on large
760  `SequenceExample`s.
761
762  Args:
763    serialized: A scalar (0-D Tensor) of type string, a single binary
764      serialized `SequenceExample` proto.
765    context_features: A `dict` mapping feature keys to `FixedLenFeature` or
766      `VarLenFeature` or `RaggedFeature` values. These features are associated
767      with a `SequenceExample` as a whole.
768    sequence_features: A `dict` mapping feature keys to
769      `FixedLenSequenceFeature` or `VarLenFeature` or `RaggedFeature` values.
770      These features are associated with data within the `FeatureList` section
771      of the `SequenceExample` proto.
772    example_name: A scalar (0-D Tensor) of strings (optional), the name of
773      the serialized proto.
774    name: A name for this operation (optional).
775
776  Returns:
777    A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s
778    and `RaggedTensor`s.
779
780    * The first dict contains the context key/values.
781    * The second dict contains the feature_list key/values.
782
783  Raises:
784    ValueError: if any feature is invalid.
785  """
786  # pylint: enable=line-too-long
787  if not (context_features or sequence_features):
788    raise ValueError("Both context_features and sequence_features are None, but"
789                     " at least one should have values.")
790  context_params = _ParseOpParams.from_features(
791      context_features, [VarLenFeature, FixedLenFeature, RaggedFeature])
792  feature_list_params = _ParseOpParams.from_features(
793      sequence_features,
794      [VarLenFeature, FixedLenSequenceFeature, RaggedFeature])
795
796  with ops.name_scope(name, "ParseSingleSequenceExample",
797                      [serialized, example_name]):
798    context_output, feature_list_output = (
799        _parse_single_sequence_example_raw(serialized, context_params,
800                                           feature_list_params, example_name,
801                                           name))
802
803    if context_params.ragged_keys:
804      context_output = _construct_tensors_for_composite_features(
805          context_features, context_output)
806    if feature_list_params.ragged_keys:
807      feature_list_output = _construct_tensors_for_composite_features(
808          sequence_features, feature_list_output)
809
810    return context_output, feature_list_output
811
812
813def _parse_single_sequence_example_raw(serialized,
814                                       context,
815                                       feature_list,
816                                       debug_name,
817                                       name=None):
818  """Parses a single `SequenceExample` proto.
819
820  Args:
821    serialized: A scalar (0-D Tensor) of type string, a single binary serialized
822      `SequenceExample` proto.
823    context: A `ParseOpParams` containing the parameters for the parse op for
824      the context features.
825    feature_list: A `ParseOpParams` containing the parameters for the parse op
826      for the feature_list features.
827    debug_name: A scalar (0-D Tensor) of strings (optional), the name of the
828      serialized proto.
829    name: A name for this operation (optional).
830
831  Returns:
832    A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s.
833    The first dict contains the context key/values.
834    The second dict contains the feature_list key/values.
835
836  Raises:
837    TypeError: if feature_list.dense_defaults is not either None or a dict.
838  """
839  with ops.name_scope(name, "ParseSingleExample", [serialized, debug_name]):
840    serialized = ops.convert_to_tensor(serialized, name="serialized")
841    serialized = _assert_scalar(serialized, "serialized")
842  return _parse_sequence_example_raw(serialized, debug_name, context,
843                                     feature_list, name)[:2]
844
845
846@tf_export("io.decode_raw", v1=[])
847@dispatch.add_dispatch_support
848def decode_raw(input_bytes,
849               out_type,
850               little_endian=True,
851               fixed_length=None,
852               name=None):
853  r"""Convert raw bytes from input tensor into numeric tensors.
854
855  Every component of the input tensor is interpreted as a sequence of bytes.
856  These bytes are then decoded as numbers in the format specified by `out_type`.
857
858  >>> tf.io.decode_raw(tf.constant("1"), tf.uint8)
859  <tf.Tensor: shape=(1,), dtype=uint8, numpy=array([49], dtype=uint8)>
860  >>> tf.io.decode_raw(tf.constant("1,2"), tf.uint8)
861  <tf.Tensor: shape=(3,), dtype=uint8, numpy=array([49, 44, 50], dtype=uint8)>
862
863  Note that the rank of the output tensor is always one more than the input one:
864
865  >>> tf.io.decode_raw(tf.constant(["1","2"]), tf.uint8).shape
866  TensorShape([2, 1])
867  >>> tf.io.decode_raw(tf.constant([["1"],["2"]]), tf.uint8).shape
868  TensorShape([2, 1, 1])
869
870  This is because each byte in the input is converted to a new value on the
871  output (if output type is `uint8` or `int8`, otherwise chunks of inputs get
872  coverted to a new value):
873
874  >>> tf.io.decode_raw(tf.constant("123"), tf.uint8)
875  <tf.Tensor: shape=(3,), dtype=uint8, numpy=array([49, 50, 51], dtype=uint8)>
876  >>> tf.io.decode_raw(tf.constant("1234"), tf.uint8)
877  <tf.Tensor: shape=(4,), dtype=uint8, numpy=array([49, 50, 51, 52], ...
878  >>> # chuncked output
879  >>> tf.io.decode_raw(tf.constant("12"), tf.uint16)
880  <tf.Tensor: shape=(1,), dtype=uint16, numpy=array([12849], dtype=uint16)>
881  >>> tf.io.decode_raw(tf.constant("1234"), tf.uint16)
882  <tf.Tensor: shape=(2,), dtype=uint16, numpy=array([12849, 13363], ...
883  >>> # int64 output
884  >>> tf.io.decode_raw(tf.constant("12345678"), tf.int64)
885  <tf.Tensor: ... numpy=array([4050765991979987505])>
886  >>> tf.io.decode_raw(tf.constant("1234567887654321"), tf.int64)
887  <tf.Tensor: ... numpy=array([4050765991979987505, 3544952156018063160])>
888
889  The operation allows specifying endianness via the `little_endian` parameter.
890
891  >>> tf.io.decode_raw(tf.constant("\x0a\x0b"), tf.int16)
892  <tf.Tensor: shape=(1,), dtype=int16, numpy=array([2826], dtype=int16)>
893  >>> hex(2826)
894  '0xb0a'
895  >>> tf.io.decode_raw(tf.constant("\x0a\x0b"), tf.int16, little_endian=False)
896  <tf.Tensor: shape=(1,), dtype=int16, numpy=array([2571], dtype=int16)>
897  >>> hex(2571)
898  '0xa0b'
899
900  If the elements of `input_bytes` are of different length, you must specify
901  `fixed_length`:
902
903  >>> tf.io.decode_raw(tf.constant([["1"],["23"]]), tf.uint8, fixed_length=4)
904  <tf.Tensor: shape=(2, 1, 4), dtype=uint8, numpy=
905  array([[[49,  0,  0,  0]],
906         [[50, 51,  0,  0]]], dtype=uint8)>
907
908  If the `fixed_length` value is larger that the length of the `out_type` dtype,
909  multiple values are generated:
910
911  >>> tf.io.decode_raw(tf.constant(["1212"]), tf.uint16, fixed_length=4)
912  <tf.Tensor: shape=(1, 2), dtype=uint16, numpy=array([[12849, 12849]], ...
913
914  If the input value is larger than `fixed_length`, it is truncated:
915
916  >>> x=''.join([chr(1), chr(2), chr(3), chr(4)])
917  >>> tf.io.decode_raw(x, tf.uint16, fixed_length=2)
918  <tf.Tensor: shape=(1,), dtype=uint16, numpy=array([513], dtype=uint16)>
919  >>> hex(513)
920  '0x201'
921
922  If `little_endian` and `fixed_length` are specified, truncation to the fixed
923  length occurs before endianness conversion:
924
925  >>> x=''.join([chr(1), chr(2), chr(3), chr(4)])
926  >>> tf.io.decode_raw(x, tf.uint16, fixed_length=2, little_endian=False)
927  <tf.Tensor: shape=(1,), dtype=uint16, numpy=array([258], dtype=uint16)>
928  >>> hex(258)
929  '0x102'
930
931  If input values all have the same length, then specifying `fixed_length`
932  equal to the size of the strings should not change output:
933
934  >>> x = ["12345678", "87654321"]
935  >>> tf.io.decode_raw(x, tf.int16)
936  <tf.Tensor: shape=(2, 4), dtype=int16, numpy=
937  array([[12849, 13363, 13877, 14391],
938         [14136, 13622, 13108, 12594]], dtype=int16)>
939  >>> tf.io.decode_raw(x, tf.int16, fixed_length=len(x[0]))
940  <tf.Tensor: shape=(2, 4), dtype=int16, numpy=
941  array([[12849, 13363, 13877, 14391],
942         [14136, 13622, 13108, 12594]], dtype=int16)>
943
944  Args:
945    input_bytes:
946      Each element of the input Tensor is converted to an array of bytes.
947
948      Currently, this must be a tensor of strings (bytes), although semantically
949      the operation should support any input.
950    out_type:
951      `DType` of the output. Acceptable types are `half`, `float`, `double`,
952      `int32`, `uint16`, `uint8`, `int16`, `int8`, `int64`.
953    little_endian:
954      Whether the `input_bytes` data is in little-endian format. Data will be
955      converted into host byte order if necessary.
956    fixed_length:
957      If set, the first `fixed_length` bytes of each element will be converted.
958      Data will be zero-padded or truncated to the specified length.
959
960      `fixed_length` must be a multiple of the size of `out_type`.
961
962      `fixed_length` must be specified if the elements of `input_bytes` are of
963      variable length.
964    name: A name for the operation (optional).
965
966  Returns:
967    A `Tensor` object storing the decoded bytes.
968  """
969  if fixed_length is not None:
970    return gen_parsing_ops.decode_padded_raw(
971        input_bytes,
972        fixed_length=fixed_length,
973        out_type=out_type,
974        little_endian=little_endian,
975        name=name)
976  else:
977    return gen_parsing_ops.decode_raw(
978        input_bytes, out_type, little_endian=little_endian, name=name)
979
980
981@tf_export(v1=["decode_raw", "io.decode_raw"])
982@dispatch.add_dispatch_support
983@deprecation.deprecated_args(None,
984                             "bytes is deprecated, use input_bytes instead",
985                             "bytes")
986def decode_raw_v1(
987    input_bytes=None,
988    out_type=None,
989    little_endian=True,
990    name=None,
991    bytes=None  # pylint: disable=redefined-builtin
992):
993  """Convert raw byte strings into tensors.
994
995  Args:
996    input_bytes:
997      Each element of the input Tensor is converted to an array of bytes.
998    out_type:
999      `DType` of the output. Acceptable types are `half`, `float`, `double`,
1000      `int32`, `uint16`, `uint8`, `int16`, `int8`, `int64`.
1001    little_endian:
1002      Whether the `input_bytes` data is in little-endian format. Data will be
1003      converted into host byte order if necessary.
1004    name: A name for the operation (optional).
1005    bytes: Deprecated parameter. Use `input_bytes` instead.
1006
1007  Returns:
1008    A `Tensor` object storing the decoded bytes.
1009  """
1010  input_bytes = deprecation.deprecated_argument_lookup("input_bytes",
1011                                                       input_bytes, "bytes",
1012                                                       bytes)
1013
1014  # out_type is a required positional argument in the original API, and had to
1015  # be changed to a keyword argument in order to facilitate the transition from
1016  # the reserved named `bytes` to `input_bytes`. Ensure it's still set.
1017  if out_type is None:
1018    raise ValueError(
1019        "decode_raw_v1() missing 1 positional argument: 'out_type'")
1020
1021  return gen_parsing_ops.decode_raw(
1022      input_bytes, out_type, little_endian=little_endian, name=name)
1023
1024
1025# Swap `name` and `na_value` for backward compatibility.
1026@tf_export(v1=["io.decode_csv", "decode_csv"])
1027@dispatch.add_dispatch_support
1028@deprecation.deprecated_endpoints("decode_csv")
1029def decode_csv(records,
1030               record_defaults,
1031               field_delim=",",
1032               use_quote_delim=True,
1033               name=None,
1034               na_value="",
1035               select_cols=None):
1036  """Convert CSV records to tensors. Each column maps to one tensor.
1037
1038  RFC 4180 format is expected for the CSV records.
1039  (https://tools.ietf.org/html/rfc4180)
1040  Note that we allow leading and trailing spaces with int or float field.
1041
1042  Args:
1043    records: A `Tensor` of type `string`.
1044      Each string is a record/row in the csv and all records should have
1045      the same format.
1046    record_defaults: A list of `Tensor` objects with specific types.
1047      Acceptable types are `float32`, `float64`, `int32`, `int64`, `string`.
1048      One tensor per column of the input record, with either a
1049      scalar default value for that column or an empty vector if the column is
1050      required.
1051    field_delim: An optional `string`. Defaults to `","`.
1052      char delimiter to separate fields in a record.
1053    use_quote_delim: An optional `bool`. Defaults to `True`.
1054      If false, treats double quotation marks as regular
1055      characters inside of the string fields (ignoring RFC 4180, Section 2,
1056      Bullet 5).
1057    name: A name for the operation (optional).
1058    na_value: Additional string to recognize as NA/NaN.
1059    select_cols: Optional sorted list of column indices to select. If specified,
1060      only this subset of columns will be parsed and returned.
1061
1062  Returns:
1063    A list of `Tensor` objects. Has the same type as `record_defaults`.
1064    Each tensor will have the same shape as records.
1065
1066  Raises:
1067    ValueError: If any of the arguments is malformed.
1068  """
1069  return decode_csv_v2(
1070      records, record_defaults,
1071      field_delim, use_quote_delim,
1072      na_value, select_cols, name
1073      )
1074
1075
1076@tf_export("io.decode_csv", v1=[])
1077@dispatch.add_dispatch_support
1078def decode_csv_v2(records,
1079                  record_defaults,
1080                  field_delim=",",
1081                  use_quote_delim=True,
1082                  na_value="",
1083                  select_cols=None,
1084                  name=None):
1085  """Convert CSV records to tensors. Each column maps to one tensor.
1086
1087  RFC 4180 format is expected for the CSV records.
1088  (https://tools.ietf.org/html/rfc4180)
1089  Note that we allow leading and trailing spaces with int or float field.
1090
1091  Args:
1092    records: A `Tensor` of type `string`.
1093      Each string is a record/row in the csv and all records should have
1094      the same format.
1095    record_defaults: A list of `Tensor` objects with specific types.
1096      Acceptable types are `float32`, `float64`, `int32`, `int64`, `string`.
1097      One tensor per column of the input record, with either a
1098      scalar default value for that column or an empty vector if the column is
1099      required.
1100    field_delim: An optional `string`. Defaults to `","`.
1101      char delimiter to separate fields in a record.
1102    use_quote_delim: An optional `bool`. Defaults to `True`.
1103      If false, treats double quotation marks as regular
1104      characters inside of the string fields (ignoring RFC 4180, Section 2,
1105      Bullet 5).
1106    na_value: Additional string to recognize as NA/NaN.
1107    select_cols: Optional sorted list of column indices to select. If specified,
1108      only this subset of columns will be parsed and returned.
1109    name: A name for the operation (optional).
1110
1111  Returns:
1112    A list of `Tensor` objects. Has the same type as `record_defaults`.
1113    Each tensor will have the same shape as records.
1114
1115  Raises:
1116    ValueError: If any of the arguments is malformed.
1117  """
1118  if select_cols is not None and any(select_cols[i] >= select_cols[i + 1]
1119                                     for i in range(len(select_cols) - 1)):
1120    raise ValueError("select_cols is not strictly increasing.")
1121  if select_cols is not None and select_cols[0] < 0:
1122    raise ValueError("select_cols contains negative values.")
1123  if select_cols is not None and len(select_cols) != len(record_defaults):
1124    raise ValueError("Length of select_cols and record_defaults do not match.")
1125  return gen_parsing_ops.decode_csv(
1126      records=records,
1127      record_defaults=record_defaults,
1128      field_delim=field_delim,
1129      use_quote_delim=use_quote_delim,
1130      na_value=na_value,
1131      name=name,
1132      select_cols=select_cols,
1133  )
1134
1135
1136def _assert_scalar(value, name):
1137  """Asserts that `value` is scalar, and returns `value`."""
1138  value_rank = value.shape.rank
1139  if value_rank is None:
1140    check = control_flow_ops.Assert(
1141        math_ops.equal(array_ops.rank(value), 0),
1142        ["Input %s must be a scalar" % name],
1143        name="%sIsScalar" % name.capitalize())
1144    result = control_flow_ops.with_dependencies([check],
1145                                                value,
1146                                                name="%sDependencies" % name)
1147    result.set_shape([])
1148    return result
1149  elif value_rank == 0:
1150    return value
1151  else:
1152    raise ValueError("Input %s must be a scalar" % name)
1153
1154
1155@tf_export("io.decode_json_example",
1156           v1=["decode_json_example", "io.decode_json_example"])
1157def decode_json_example(json_examples, name=None):
1158  r"""Convert JSON-encoded Example records to binary protocol buffer strings.
1159
1160  Note: This is **not** a general purpose JSON parsing op.
1161
1162  This op converts JSON-serialized `tf.train.Example` (maybe created with
1163  `json_format.MessageToJson`, following the
1164  [standard JSON mapping](
1165  https://developers.google.com/protocol-buffers/docs/proto3#json))
1166  to a binary-serialized `tf.train.Example` (equivalent to
1167  `Example.SerializeToString()`) suitable for conversion to tensors with
1168  `tf.io.parse_example`.
1169
1170  Here is a `tf.train.Example` proto:
1171
1172  >>> example = tf.train.Example(
1173  ...   features=tf.train.Features(
1174  ...       feature={
1175  ...           "a": tf.train.Feature(
1176  ...               int64_list=tf.train.Int64List(
1177  ...                   value=[1, 1, 3]))}))
1178
1179  Here it is converted to JSON:
1180
1181  >>> from google.protobuf import json_format
1182  >>> example_json = json_format.MessageToJson(example)
1183  >>> print(example_json)
1184  {
1185    "features": {
1186      "feature": {
1187        "a": {
1188          "int64List": {
1189            "value": [
1190              "1",
1191              "1",
1192              "3"
1193            ]
1194          }
1195        }
1196      }
1197    }
1198  }
1199
1200  This op converts the above json string to a binary proto:
1201
1202  >>> example_binary = tf.io.decode_json_example(example_json)
1203  >>> example_binary.numpy()
1204  b'\n\x0f\n\r\n\x01a\x12\x08\x1a\x06\x08\x01\x08\x01\x08\x03'
1205
1206  The OP works on string tensors of andy shape:
1207
1208  >>> tf.io.decode_json_example([
1209  ...     [example_json, example_json],
1210  ...     [example_json, example_json]]).shape.as_list()
1211  [2, 2]
1212
1213  This resulting binary-string is equivalent to `Example.SerializeToString()`,
1214  and can be converted to Tensors using `tf.io.parse_example` and related
1215  functions:
1216
1217  >>> tf.io.parse_example(
1218  ...   serialized=[example_binary.numpy(),
1219  ...              example.SerializeToString()],
1220  ...   features = {'a': tf.io.FixedLenFeature(shape=[3], dtype=tf.int64)})
1221  {'a': <tf.Tensor: shape=(2, 3), dtype=int64, numpy=
1222   array([[1, 1, 3],
1223          [1, 1, 3]])>}
1224
1225  Args:
1226    json_examples: A string tensor containing json-serialized `tf.Example`
1227      protos.
1228    name: A name for the op.
1229
1230  Returns:
1231    A string Tensor containing the binary-serialized `tf.Example` protos.
1232
1233  Raises:
1234     `tf.errors.InvalidArgumentError`: If the JSON could not be converted to a
1235     `tf.Example`
1236  """
1237  return gen_parsing_ops.decode_json_example(json_examples, name=name)
1238