1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15 16"""Parsing Ops.""" 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21from tensorflow.python.framework import ops 22from tensorflow.python.framework import sparse_tensor 23from tensorflow.python.ops import array_ops 24from tensorflow.python.ops import control_flow_ops 25from tensorflow.python.ops import gen_parsing_ops 26from tensorflow.python.ops import math_ops 27from tensorflow.python.ops import parsing_config 28# go/tf-wildcard-import 29# pylint: disable=wildcard-import,undefined-variable 30from tensorflow.python.ops.gen_parsing_ops import * 31# pylint: enable=wildcard-import,undefined-variable 32from tensorflow.python.util import deprecation 33from tensorflow.python.util import dispatch 34from tensorflow.python.util.tf_export import tf_export 35 36 37ops.NotDifferentiable("DecodeRaw") 38ops.NotDifferentiable("DecodePaddedRaw") 39ops.NotDifferentiable("ParseTensor") 40ops.NotDifferentiable("SerializeTensor") 41ops.NotDifferentiable("StringToNumber") 42 43 44VarLenFeature = parsing_config.VarLenFeature 45RaggedFeature = parsing_config.RaggedFeature 46SparseFeature = parsing_config.SparseFeature 47FixedLenFeature = parsing_config.FixedLenFeature 48FixedLenSequenceFeature = parsing_config.FixedLenSequenceFeature 49# pylint: disable=protected-access 50_ParseOpParams = parsing_config._ParseOpParams 51_construct_tensors_for_composite_features = ( 52 parsing_config._construct_tensors_for_composite_features) 53# pylint: enable=protected-access 54 55 56# TODO(b/122887740) Switch files that use this private symbol to use new name. 57_construct_sparse_tensors_for_sparse_features = \ 58 _construct_tensors_for_composite_features 59 60 61def _prepend_none_dimension(features): 62 """Returns a copy of features with adjusted FixedLenSequenceFeature shapes.""" 63 if features: 64 modified_features = dict(features) # Create a copy to modify 65 for key, feature in features.items(): 66 if isinstance(feature, FixedLenSequenceFeature): 67 if not feature.allow_missing: 68 raise ValueError("Unsupported: FixedLenSequenceFeature requires " 69 "allow_missing to be True.") 70 modified_features[key] = FixedLenSequenceFeature( 71 [None] + list(feature.shape), 72 feature.dtype, 73 feature.allow_missing, 74 feature.default_value) 75 return modified_features 76 else: 77 return features 78 79 80@tf_export("io.parse_example", v1=[]) 81@dispatch.add_dispatch_support 82def parse_example_v2(serialized, features, example_names=None, name=None): 83 # pylint: disable=line-too-long 84 """Parses `Example` protos into a `dict` of tensors. 85 86 Parses a number of serialized [`Example`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto) 87 protos given in `serialized`. We refer to `serialized` as a batch with 88 `batch_size` many entries of individual `Example` protos. 89 90 `example_names` may contain descriptive names for the corresponding serialized 91 protos. These may be useful for debugging purposes, but they have no effect on 92 the output. If not `None`, `example_names` must be the same length as 93 `serialized`. 94 95 This op parses serialized examples into a dictionary mapping keys to `Tensor` 96 `SparseTensor`, and `RaggedTensor` objects. `features` is a dict from keys to 97 `VarLenFeature`, `SparseFeature`, `RaggedFeature`, and `FixedLenFeature` 98 objects. Each `VarLenFeature` and `SparseFeature` is mapped to a 99 `SparseTensor`; each `FixedLenFeature` is mapped to a `Tensor`; and each 100 `RaggedFeature` is mapped to a `RaggedTensor`. 101 102 Each `VarLenFeature` maps to a `SparseTensor` of the specified type 103 representing a ragged matrix. Its indices are `[batch, index]` where `batch` 104 identifies the example in `serialized`, and `index` is the value's index in 105 the list of values associated with that feature and example. 106 107 Each `SparseFeature` maps to a `SparseTensor` of the specified type 108 representing a Tensor of `dense_shape` `[batch_size] + SparseFeature.size`. 109 Its `values` come from the feature in the examples with key `value_key`. 110 A `values[i]` comes from a position `k` in the feature of an example at batch 111 entry `batch`. This positional information is recorded in `indices[i]` as 112 `[batch, index_0, index_1, ...]` where `index_j` is the `k-th` value of 113 the feature in the example at with key `SparseFeature.index_key[j]`. 114 In other words, we split the indices (except the first index indicating the 115 batch entry) of a `SparseTensor` by dimension into different features of the 116 `Example`. Due to its complexity a `VarLenFeature` should be preferred over a 117 `SparseFeature` whenever possible. 118 119 Each `FixedLenFeature` `df` maps to a `Tensor` of the specified type (or 120 `tf.float32` if not specified) and shape `(serialized.size(),) + df.shape`. 121 122 `FixedLenFeature` entries with a `default_value` are optional. With no default 123 value, we will fail if that `Feature` is missing from any example in 124 `serialized`. 125 126 Each `FixedLenSequenceFeature` `df` maps to a `Tensor` of the specified type 127 (or `tf.float32` if not specified) and shape 128 `(serialized.size(), None) + df.shape`. 129 All examples in `serialized` will be padded with `default_value` along the 130 second dimension. 131 132 Each `RaggedFeature` maps to a `RaggedTensor` of the specified type. It 133 is formed by stacking the `RaggedTensor` for each example, where the 134 `RaggedTensor` for each individual example is constructed using the tensors 135 specified by `RaggedTensor.values_key` and `RaggedTensor.partition`. See 136 the `tf.io.RaggedFeature` documentation for details and examples. 137 138 Examples: 139 140 For example, if one expects a `tf.float32` `VarLenFeature` `ft` and three 141 serialized `Example`s are provided: 142 143 ``` 144 serialized = [ 145 features 146 { feature { key: "ft" value { float_list { value: [1.0, 2.0] } } } }, 147 features 148 { feature []}, 149 features 150 { feature { key: "ft" value { float_list { value: [3.0] } } } 151 ] 152 ``` 153 154 then the output will look like: 155 156 ```python 157 {"ft": SparseTensor(indices=[[0, 0], [0, 1], [2, 0]], 158 values=[1.0, 2.0, 3.0], 159 dense_shape=(3, 2)) } 160 ``` 161 162 If instead a `FixedLenSequenceFeature` with `default_value = -1.0` and 163 `shape=[]` is used then the output will look like: 164 165 ```python 166 {"ft": [[1.0, 2.0], [3.0, -1.0]]} 167 ``` 168 169 Given two `Example` input protos in `serialized`: 170 171 ``` 172 [ 173 features { 174 feature { key: "kw" value { bytes_list { value: [ "knit", "big" ] } } } 175 feature { key: "gps" value { float_list { value: [] } } } 176 }, 177 features { 178 feature { key: "kw" value { bytes_list { value: [ "emmy" ] } } } 179 feature { key: "dank" value { int64_list { value: [ 42 ] } } } 180 feature { key: "gps" value { } } 181 } 182 ] 183 ``` 184 185 And arguments 186 187 ``` 188 example_names: ["input0", "input1"], 189 features: { 190 "kw": VarLenFeature(tf.string), 191 "dank": VarLenFeature(tf.int64), 192 "gps": VarLenFeature(tf.float32), 193 } 194 ``` 195 196 Then the output is a dictionary: 197 198 ```python 199 { 200 "kw": SparseTensor( 201 indices=[[0, 0], [0, 1], [1, 0]], 202 values=["knit", "big", "emmy"] 203 dense_shape=[2, 2]), 204 "dank": SparseTensor( 205 indices=[[1, 0]], 206 values=[42], 207 dense_shape=[2, 1]), 208 "gps": SparseTensor( 209 indices=[], 210 values=[], 211 dense_shape=[2, 0]), 212 } 213 ``` 214 215 For dense results in two serialized `Example`s: 216 217 ``` 218 [ 219 features { 220 feature { key: "age" value { int64_list { value: [ 0 ] } } } 221 feature { key: "gender" value { bytes_list { value: [ "f" ] } } } 222 }, 223 features { 224 feature { key: "age" value { int64_list { value: [] } } } 225 feature { key: "gender" value { bytes_list { value: [ "f" ] } } } 226 } 227 ] 228 ``` 229 230 We can use arguments: 231 232 ``` 233 example_names: ["input0", "input1"], 234 features: { 235 "age": FixedLenFeature([], dtype=tf.int64, default_value=-1), 236 "gender": FixedLenFeature([], dtype=tf.string), 237 } 238 ``` 239 240 And the expected output is: 241 242 ```python 243 { 244 "age": [[0], [-1]], 245 "gender": [["f"], ["f"]], 246 } 247 ``` 248 249 An alternative to `VarLenFeature` to obtain a `SparseTensor` is 250 `SparseFeature`. For example, given two `Example` input protos in 251 `serialized`: 252 253 ``` 254 [ 255 features { 256 feature { key: "val" value { float_list { value: [ 0.5, -1.0 ] } } } 257 feature { key: "ix" value { int64_list { value: [ 3, 20 ] } } } 258 }, 259 features { 260 feature { key: "val" value { float_list { value: [ 0.0 ] } } } 261 feature { key: "ix" value { int64_list { value: [ 42 ] } } } 262 } 263 ] 264 ``` 265 266 And arguments 267 268 ``` 269 example_names: ["input0", "input1"], 270 features: { 271 "sparse": SparseFeature( 272 index_key="ix", value_key="val", dtype=tf.float32, size=100), 273 } 274 ``` 275 276 Then the output is a dictionary: 277 278 ```python 279 { 280 "sparse": SparseTensor( 281 indices=[[0, 3], [0, 20], [1, 42]], 282 values=[0.5, -1.0, 0.0] 283 dense_shape=[2, 100]), 284 } 285 ``` 286 287 See the `tf.io.RaggedFeature` documentation for examples showing how 288 `RaggedFeature` can be used to obtain `RaggedTensor`s. 289 290 Args: 291 serialized: A vector (1-D Tensor) of strings, a batch of binary 292 serialized `Example` protos. 293 features: A `dict` mapping feature keys to `FixedLenFeature`, 294 `VarLenFeature`, `SparseFeature`, and `RaggedFeature` values. 295 example_names: A vector (1-D Tensor) of strings (optional), the names of 296 the serialized protos in the batch. 297 name: A name for this operation (optional). 298 299 Returns: 300 A `dict` mapping feature keys to `Tensor`, `SparseTensor`, and 301 `RaggedTensor` values. 302 303 Raises: 304 ValueError: if any feature is invalid. 305 """ 306 if not features: 307 raise ValueError("Argument `features` cannot be None.") 308 features = _prepend_none_dimension(features) 309 params = _ParseOpParams.from_features(features, [ 310 VarLenFeature, SparseFeature, FixedLenFeature, FixedLenSequenceFeature, 311 RaggedFeature 312 ]) 313 314 outputs = _parse_example_raw(serialized, example_names, params, name=name) 315 return _construct_tensors_for_composite_features(features, outputs) 316 317 318@tf_export(v1=["io.parse_example", "parse_example"]) 319@dispatch.add_dispatch_support 320def parse_example(serialized, features, name=None, example_names=None): 321 return parse_example_v2(serialized, features, example_names, name) 322 323 324parse_example.__doc__ = parse_example_v2.__doc__ 325 326 327def _parse_example_raw(serialized, names, params, name): 328 """Parses `Example` protos. 329 330 Args: 331 serialized: A vector (1-D Tensor) of strings, a batch of binary 332 serialized `Example` protos. 333 names: A vector (1-D Tensor) of strings (optional), the names of 334 the serialized protos. 335 params: A `ParseOpParams` containing the parameters for the parse op. 336 name: A name for this operation (optional). 337 338 Returns: 339 A `dict` mapping keys to `Tensor`s and `SparseTensor`s and `RaggedTensor`s. 340 341 """ 342 if params.num_features == 0: 343 raise ValueError("Must provide at least one feature key.") 344 with ops.name_scope(name, "ParseExample", [serialized, names]): 345 names = [] if names is None else names 346 serialized = ops.convert_to_tensor(serialized, name="serialized") 347 if params.ragged_keys and serialized.shape.ndims is None: 348 raise ValueError("serialized must have statically-known rank to " 349 "parse ragged features.") 350 outputs = gen_parsing_ops.parse_example_v2( 351 serialized=serialized, 352 names=names, 353 sparse_keys=params.sparse_keys, 354 dense_keys=params.dense_keys, 355 ragged_keys=params.ragged_keys, 356 dense_defaults=params.dense_defaults_vec, 357 num_sparse=len(params.sparse_keys), 358 sparse_types=params.sparse_types, 359 ragged_value_types=params.ragged_value_types, 360 ragged_split_types=params.ragged_split_types, 361 dense_shapes=params.dense_shapes_as_proto, 362 name=name) 363 (sparse_indices, sparse_values, sparse_shapes, dense_values, 364 ragged_values, ragged_row_splits) = outputs 365 # pylint: disable=protected-access 366 ragged_tensors = parsing_config._build_ragged_tensors( 367 serialized.shape, ragged_values, ragged_row_splits) 368 369 sparse_tensors = [ 370 sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape) 371 in zip(sparse_indices, sparse_values, sparse_shapes)] 372 373 return dict( 374 zip(params.sparse_keys + params.dense_keys + params.ragged_keys, 375 sparse_tensors + dense_values + ragged_tensors)) 376 377 378@tf_export(v1=["io.parse_single_example", "parse_single_example"]) 379@dispatch.add_dispatch_support 380def parse_single_example(serialized, features, name=None, example_names=None): 381 """Parses a single `Example` proto. 382 383 Similar to `parse_example`, except: 384 385 For dense tensors, the returned `Tensor` is identical to the output of 386 `parse_example`, except there is no batch dimension, the output shape is the 387 same as the shape given in `dense_shape`. 388 389 For `SparseTensor`s, the first (batch) column of the indices matrix is removed 390 (the indices matrix is a column vector), the values vector is unchanged, and 391 the first (`batch_size`) entry of the shape vector is removed (it is now a 392 single element vector). 393 394 One might see performance advantages by batching `Example` protos with 395 `parse_example` instead of using this function directly. 396 397 Args: 398 serialized: A scalar string Tensor, a single serialized Example. 399 features: A `dict` mapping feature keys to `FixedLenFeature` or 400 `VarLenFeature` values. 401 name: A name for this operation (optional). 402 example_names: (Optional) A scalar string Tensor, the associated name. 403 404 Returns: 405 A `dict` mapping feature keys to `Tensor` and `SparseTensor` values. 406 407 Raises: 408 ValueError: if any feature is invalid. 409 """ 410 return parse_single_example_v2(serialized, features, example_names, name) 411 412 413@tf_export("io.parse_single_example", v1=[]) 414@dispatch.add_dispatch_support 415def parse_single_example_v2( 416 serialized, features, example_names=None, name=None 417 ): 418 """Parses a single `Example` proto. 419 420 Similar to `parse_example`, except: 421 422 For dense tensors, the returned `Tensor` is identical to the output of 423 `parse_example`, except there is no batch dimension, the output shape is the 424 same as the shape given in `dense_shape`. 425 426 For `SparseTensor`s, the first (batch) column of the indices matrix is removed 427 (the indices matrix is a column vector), the values vector is unchanged, and 428 the first (`batch_size`) entry of the shape vector is removed (it is now a 429 single element vector). 430 431 One might see performance advantages by batching `Example` protos with 432 `parse_example` instead of using this function directly. 433 434 Args: 435 serialized: A scalar string Tensor, a single serialized Example. 436 features: A `dict` mapping feature keys to `FixedLenFeature` or 437 `VarLenFeature` values. 438 example_names: (Optional) A scalar string Tensor, the associated name. 439 name: A name for this operation (optional). 440 441 Returns: 442 A `dict` mapping feature keys to `Tensor` and `SparseTensor` values. 443 444 Raises: 445 ValueError: if any feature is invalid. 446 """ 447 if not features: 448 raise ValueError("Invalid argument: features cannot be None.") 449 with ops.name_scope(name, "ParseSingleExample", [serialized, example_names]): 450 serialized = ops.convert_to_tensor(serialized, name="serialized") 451 serialized = _assert_scalar(serialized, "serialized") 452 return parse_example_v2(serialized, features, example_names, name) 453 454 455@tf_export("io.parse_sequence_example") 456@dispatch.add_dispatch_support 457def parse_sequence_example(serialized, 458 context_features=None, 459 sequence_features=None, 460 example_names=None, 461 name=None): 462 # pylint: disable=line-too-long 463 """Parses a batch of `SequenceExample` protos. 464 465 Parses a vector of serialized 466 [`SequenceExample`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto) 467 protos given in `serialized`. 468 469 This op parses serialized sequence examples into a tuple of dictionaries, 470 each mapping keys to `Tensor` and `SparseTensor` objects. 471 The first dictionary contains mappings for keys appearing in 472 `context_features`, and the second dictionary contains mappings for keys 473 appearing in `sequence_features`. 474 475 At least one of `context_features` and `sequence_features` must be provided 476 and non-empty. 477 478 The `context_features` keys are associated with a `SequenceExample` as a 479 whole, independent of time / frame. In contrast, the `sequence_features` keys 480 provide a way to access variable-length data within the `FeatureList` section 481 of the `SequenceExample` proto. While the shapes of `context_features` values 482 are fixed with respect to frame, the frame dimension (the first dimension) 483 of `sequence_features` values may vary between `SequenceExample` protos, 484 and even between `feature_list` keys within the same `SequenceExample`. 485 486 `context_features` contains `VarLenFeature`, `RaggedFeature`, and 487 `FixedLenFeature` objects. Each `VarLenFeature` is mapped to a 488 `SparseTensor`; each `RaggedFeature` is mapped to a `RaggedTensor`; and each 489 `FixedLenFeature` is mapped to a `Tensor`, of the specified type, shape, and 490 default value. 491 492 `sequence_features` contains `VarLenFeature`, `RaggedFeature`, and 493 `FixedLenSequenceFeature` objects. Each `VarLenFeature` is mapped to a 494 `SparseTensor`; each `RaggedFeature` is mapped to a `RaggedTensor`; and 495 each `FixedLenSequenceFeature` is mapped to a `Tensor`, each of the specified 496 type. The shape will be `(B,T,) + df.dense_shape` for 497 `FixedLenSequenceFeature` `df`, where `B` is the batch size, and `T` is the 498 length of the associated `FeatureList` in the `SequenceExample`. For instance, 499 `FixedLenSequenceFeature([])` yields a scalar 2-D `Tensor` of static shape 500 `[None, None]` and dynamic shape `[B, T]`, while 501 `FixedLenSequenceFeature([k])` (for `int k >= 1`) yields a 3-D matrix `Tensor` 502 of static shape `[None, None, k]` and dynamic shape `[B, T, k]`. 503 504 Like the input, the resulting output tensors have a batch dimension. This 505 means that the original per-example shapes of `VarLenFeature`s and 506 `FixedLenSequenceFeature`s can be lost. To handle that situation, this op also 507 provides dicts of shape tensors as part of the output. There is one dict for 508 the context features, and one for the feature_list features. Context features 509 of type `FixedLenFeature`s will not be present, since their shapes are already 510 known by the caller. In situations where the input `FixedLenSequenceFeature`s 511 are of different sequence lengths across examples, the shorter examples will 512 be padded with default datatype values: 0 for numeric types, and the empty 513 string for string types. 514 515 Each `SparseTensor` corresponding to `sequence_features` represents a ragged 516 vector. Its indices are `[time, index]`, where `time` is the `FeatureList` 517 entry and `index` is the value's index in the list of values associated with 518 that time. 519 520 `FixedLenFeature` entries with a `default_value` and `FixedLenSequenceFeature` 521 entries with `allow_missing=True` are optional; otherwise, we will fail if 522 that `Feature` or `FeatureList` is missing from any example in `serialized`. 523 524 `example_name` may contain a descriptive name for the corresponding serialized 525 proto. This may be useful for debugging purposes, but it has no effect on the 526 output. If not `None`, `example_name` must be a scalar. 527 528 Args: 529 serialized: A vector (1-D Tensor) of type string containing binary 530 serialized `SequenceExample` protos. 531 context_features: A `dict` mapping feature keys to `FixedLenFeature` or 532 `VarLenFeature` or `RaggedFeature` values. These features are associated 533 with a `SequenceExample` as a whole. 534 sequence_features: A `dict` mapping feature keys to 535 `FixedLenSequenceFeature` or `VarLenFeature` or `RaggedFeature` values. 536 These features are associated with data within the `FeatureList` section 537 of the `SequenceExample` proto. 538 example_names: A vector (1-D Tensor) of strings (optional), the name of the 539 serialized protos. 540 name: A name for this operation (optional). 541 542 Returns: 543 A tuple of three `dict`s, each mapping keys to `Tensor`s, 544 `SparseTensor`s, and `RaggedTensor`. The first dict contains the context 545 key/values, the second dict contains the feature_list key/values, and the 546 final dict contains the lengths of any dense feature_list features. 547 548 Raises: 549 ValueError: if any feature is invalid. 550 """ 551 if not (context_features or sequence_features): 552 raise ValueError("Both `context_features` and `sequence_features` argument " 553 "are None, but at least one should have values.") 554 context_params = _ParseOpParams.from_features( 555 context_features, [VarLenFeature, FixedLenFeature, RaggedFeature]) 556 feature_list_params = _ParseOpParams.from_features( 557 sequence_features, 558 [VarLenFeature, FixedLenSequenceFeature, RaggedFeature]) 559 560 with ops.name_scope(name, "ParseSequenceExample", 561 [serialized, example_names]): 562 outputs = _parse_sequence_example_raw(serialized, example_names, 563 context_params, feature_list_params, 564 name) 565 context_output, feature_list_output, feature_list_lengths = outputs 566 567 if context_params.ragged_keys: 568 context_output = _construct_tensors_for_composite_features( 569 context_features, context_output) 570 if feature_list_params.ragged_keys: 571 feature_list_output = _construct_tensors_for_composite_features( 572 sequence_features, feature_list_output) 573 574 return context_output, feature_list_output, feature_list_lengths 575 576 577def _parse_sequence_example_raw(serialized, 578 debug_name, 579 context, 580 feature_list, 581 name=None): 582 """Parses a vector of `SequenceExample` protos. 583 584 Args: 585 serialized: A vector (1-D Tensor) of type string, containing binary 586 serialized `SequenceExample` protos. 587 debug_name: A vector (1-D Tensor) of strings (optional), the names of the 588 serialized protos. 589 context: A `ParseOpParams` containing the parameters for the parse 590 op for the context features. 591 feature_list: A `ParseOpParams` containing the parameters for the 592 parse op for the feature_list features. 593 name: A name for this operation (optional). 594 595 Returns: 596 A tuple of three `dict`s, each mapping keys to `Tensor`s, `SparseTensor`s, 597 and `RaggedTensor`s. The first dict contains the context key/values, the 598 second dict contains the feature_list key/values, and the final dict 599 contains the lengths of any dense feature_list features. 600 601 Raises: 602 TypeError: if feature_list.dense_defaults is not either None or a dict. 603 """ 604 if context.num_features + feature_list.num_features == 0: 605 raise ValueError("Must provide at least one feature key.") 606 with ops.name_scope(name, "ParseSequenceExample", [serialized]): 607 debug_name = [] if debug_name is None else debug_name 608 609 # Internal 610 feature_list_dense_missing_assumed_empty = [] 611 for k, v in feature_list.dense_defaults.items(): 612 if v is not None: 613 raise ValueError("Value feature_list.dense_defaults[%s] must be None" % 614 k) 615 feature_list_dense_missing_assumed_empty.append(k) 616 617 has_ragged = context.ragged_keys or feature_list.ragged_keys 618 serialized = ops.convert_to_tensor(serialized, name="serialized") 619 if has_ragged and serialized.shape.ndims is None: 620 raise ValueError("serialized must have statically-known rank to " 621 "parse ragged features.") 622 feature_list_dense_missing_assumed_empty_vector = [ 623 key in feature_list_dense_missing_assumed_empty 624 for key in feature_list.dense_keys 625 ] 626 outputs = gen_parsing_ops.parse_sequence_example_v2( 627 # Inputs 628 serialized=serialized, 629 debug_name=debug_name, 630 context_sparse_keys=context.sparse_keys, 631 context_dense_keys=context.dense_keys, 632 context_ragged_keys=context.ragged_keys, 633 feature_list_sparse_keys=feature_list.sparse_keys, 634 feature_list_dense_keys=feature_list.dense_keys, 635 feature_list_ragged_keys=feature_list.ragged_keys, 636 feature_list_dense_missing_assumed_empty=( 637 feature_list_dense_missing_assumed_empty_vector), 638 context_dense_defaults=context.dense_defaults_vec, 639 # Attrs 640 Ncontext_sparse=len(context.sparse_keys), 641 Nfeature_list_sparse=len(feature_list.sparse_keys), 642 Nfeature_list_dense=len(feature_list.dense_keys), 643 context_sparse_types=context.sparse_types, 644 context_ragged_value_types=context.ragged_value_types, 645 context_ragged_split_types=context.ragged_split_types, 646 feature_list_dense_types=feature_list.dense_types, 647 feature_list_sparse_types=feature_list.sparse_types, 648 feature_list_ragged_value_types=feature_list.ragged_value_types, 649 feature_list_ragged_split_types=feature_list.ragged_split_types, 650 context_dense_shapes=context.dense_shapes_as_proto, 651 feature_list_dense_shapes=feature_list.dense_shapes, 652 name=name) 653 (context_sparse_indices, context_sparse_values, context_sparse_shapes, 654 context_dense_values, context_ragged_values, context_ragged_row_splits, 655 feature_list_sparse_indices, feature_list_sparse_values, 656 feature_list_sparse_shapes, feature_list_dense_values, 657 feature_list_dense_lengths, feature_list_ragged_values, 658 feature_list_ragged_outer_splits, 659 feature_list_ragged_inner_splits) = outputs 660 # pylint: disable=protected-access 661 context_ragged_tensors = parsing_config._build_ragged_tensors( 662 serialized.shape, context_ragged_values, context_ragged_row_splits) 663 feature_list_ragged_tensors = parsing_config._build_ragged_tensors( 664 serialized.shape, feature_list_ragged_values, 665 feature_list_ragged_outer_splits, feature_list_ragged_inner_splits) 666 667 # pylint: disable=g-complex-comprehension 668 context_sparse_tensors = [ 669 sparse_tensor.SparseTensor(ix, val, shape) 670 for (ix, val, 671 shape) in zip(context_sparse_indices, context_sparse_values, 672 context_sparse_shapes) 673 ] 674 675 feature_list_sparse_tensors = [ 676 sparse_tensor.SparseTensor(ix, val, shape) 677 for (ix, val, shape 678 ) in zip(feature_list_sparse_indices, feature_list_sparse_values, 679 feature_list_sparse_shapes) 680 ] 681 # pylint: enable=g-complex-comprehension 682 683 context_output = dict( 684 zip( 685 context.sparse_keys + context.dense_keys + context.ragged_keys, 686 context_sparse_tensors + context_dense_values + 687 context_ragged_tensors)) 688 feature_list_output = dict( 689 zip( 690 feature_list.sparse_keys + feature_list.dense_keys + 691 feature_list.ragged_keys, feature_list_sparse_tensors + 692 feature_list_dense_values + feature_list_ragged_tensors)) 693 feature_list_lengths = dict( 694 zip(feature_list.dense_keys, feature_list_dense_lengths)) 695 696 return (context_output, feature_list_output, feature_list_lengths) 697 698 699@tf_export("io.parse_single_sequence_example", 700 v1=["io.parse_single_sequence_example", 701 "parse_single_sequence_example"]) 702@dispatch.add_dispatch_support 703def parse_single_sequence_example( 704 serialized, context_features=None, sequence_features=None, 705 example_name=None, name=None): 706 # pylint: disable=line-too-long 707 """Parses a single `SequenceExample` proto. 708 709 Parses a single serialized [`SequenceExample`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto) 710 proto given in `serialized`. 711 712 This op parses a serialized sequence example into a tuple of dictionaries, 713 each mapping keys to `Tensor` and `SparseTensor` objects. 714 The first dictionary contains mappings for keys appearing in 715 `context_features`, and the second dictionary contains mappings for keys 716 appearing in `sequence_features`. 717 718 At least one of `context_features` and `sequence_features` must be provided 719 and non-empty. 720 721 The `context_features` keys are associated with a `SequenceExample` as a 722 whole, independent of time / frame. In contrast, the `sequence_features` keys 723 provide a way to access variable-length data within the `FeatureList` section 724 of the `SequenceExample` proto. While the shapes of `context_features` values 725 are fixed with respect to frame, the frame dimension (the first dimension) 726 of `sequence_features` values may vary between `SequenceExample` protos, 727 and even between `feature_list` keys within the same `SequenceExample`. 728 729 `context_features` contains `VarLenFeature`, `RaggedFeature`, and 730 `FixedLenFeature` objects. Each `VarLenFeature` is mapped to a `SparseTensor`; 731 each `RaggedFeature` is mapped to a `RaggedTensor`; and each `FixedLenFeature` 732 is mapped to a `Tensor`, of the specified type, shape, and default value. 733 734 `sequence_features` contains `VarLenFeature`, `RaggedFeature`, and 735 `FixedLenSequenceFeature` objects. Each `VarLenFeature` is mapped to a 736 `SparseTensor`; each `RaggedFeature` is mapped to a `RaggedTensor`; and each 737 `FixedLenSequenceFeature` is mapped to a `Tensor`, each of the specified type. 738 The shape will be `(T,) + df.dense_shape` for `FixedLenSequenceFeature` `df`, 739 where `T` is the length of the associated `FeatureList` in the 740 `SequenceExample`. For instance, `FixedLenSequenceFeature([])` yields a scalar 741 1-D `Tensor` of static shape `[None]` and dynamic shape `[T]`, while 742 `FixedLenSequenceFeature([k])` (for `int k >= 1`) yields a 2-D matrix `Tensor` 743 of static shape `[None, k]` and dynamic shape `[T, k]`. 744 745 Each `SparseTensor` corresponding to `sequence_features` represents a ragged 746 vector. Its indices are `[time, index]`, where `time` is the `FeatureList` 747 entry and `index` is the value's index in the list of values associated with 748 that time. 749 750 `FixedLenFeature` entries with a `default_value` and `FixedLenSequenceFeature` 751 entries with `allow_missing=True` are optional; otherwise, we will fail if 752 that `Feature` or `FeatureList` is missing from any example in `serialized`. 753 754 `example_name` may contain a descriptive name for the corresponding serialized 755 proto. This may be useful for debugging purposes, but it has no effect on the 756 output. If not `None`, `example_name` must be a scalar. 757 758 Note that the batch version of this function, `tf.parse_sequence_example`, 759 is written for better memory efficiency and will be faster on large 760 `SequenceExample`s. 761 762 Args: 763 serialized: A scalar (0-D Tensor) of type string, a single binary 764 serialized `SequenceExample` proto. 765 context_features: A `dict` mapping feature keys to `FixedLenFeature` or 766 `VarLenFeature` or `RaggedFeature` values. These features are associated 767 with a `SequenceExample` as a whole. 768 sequence_features: A `dict` mapping feature keys to 769 `FixedLenSequenceFeature` or `VarLenFeature` or `RaggedFeature` values. 770 These features are associated with data within the `FeatureList` section 771 of the `SequenceExample` proto. 772 example_name: A scalar (0-D Tensor) of strings (optional), the name of 773 the serialized proto. 774 name: A name for this operation (optional). 775 776 Returns: 777 A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s 778 and `RaggedTensor`s. 779 780 * The first dict contains the context key/values. 781 * The second dict contains the feature_list key/values. 782 783 Raises: 784 ValueError: if any feature is invalid. 785 """ 786 # pylint: enable=line-too-long 787 if not (context_features or sequence_features): 788 raise ValueError("Both context_features and sequence_features are None, but" 789 " at least one should have values.") 790 context_params = _ParseOpParams.from_features( 791 context_features, [VarLenFeature, FixedLenFeature, RaggedFeature]) 792 feature_list_params = _ParseOpParams.from_features( 793 sequence_features, 794 [VarLenFeature, FixedLenSequenceFeature, RaggedFeature]) 795 796 with ops.name_scope(name, "ParseSingleSequenceExample", 797 [serialized, example_name]): 798 context_output, feature_list_output = ( 799 _parse_single_sequence_example_raw(serialized, context_params, 800 feature_list_params, example_name, 801 name)) 802 803 if context_params.ragged_keys: 804 context_output = _construct_tensors_for_composite_features( 805 context_features, context_output) 806 if feature_list_params.ragged_keys: 807 feature_list_output = _construct_tensors_for_composite_features( 808 sequence_features, feature_list_output) 809 810 return context_output, feature_list_output 811 812 813def _parse_single_sequence_example_raw(serialized, 814 context, 815 feature_list, 816 debug_name, 817 name=None): 818 """Parses a single `SequenceExample` proto. 819 820 Args: 821 serialized: A scalar (0-D Tensor) of type string, a single binary serialized 822 `SequenceExample` proto. 823 context: A `ParseOpParams` containing the parameters for the parse op for 824 the context features. 825 feature_list: A `ParseOpParams` containing the parameters for the parse op 826 for the feature_list features. 827 debug_name: A scalar (0-D Tensor) of strings (optional), the name of the 828 serialized proto. 829 name: A name for this operation (optional). 830 831 Returns: 832 A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s. 833 The first dict contains the context key/values. 834 The second dict contains the feature_list key/values. 835 836 Raises: 837 TypeError: if feature_list.dense_defaults is not either None or a dict. 838 """ 839 with ops.name_scope(name, "ParseSingleExample", [serialized, debug_name]): 840 serialized = ops.convert_to_tensor(serialized, name="serialized") 841 serialized = _assert_scalar(serialized, "serialized") 842 return _parse_sequence_example_raw(serialized, debug_name, context, 843 feature_list, name)[:2] 844 845 846@tf_export("io.decode_raw", v1=[]) 847@dispatch.add_dispatch_support 848def decode_raw(input_bytes, 849 out_type, 850 little_endian=True, 851 fixed_length=None, 852 name=None): 853 r"""Convert raw bytes from input tensor into numeric tensors. 854 855 Every component of the input tensor is interpreted as a sequence of bytes. 856 These bytes are then decoded as numbers in the format specified by `out_type`. 857 858 >>> tf.io.decode_raw(tf.constant("1"), tf.uint8) 859 <tf.Tensor: shape=(1,), dtype=uint8, numpy=array([49], dtype=uint8)> 860 >>> tf.io.decode_raw(tf.constant("1,2"), tf.uint8) 861 <tf.Tensor: shape=(3,), dtype=uint8, numpy=array([49, 44, 50], dtype=uint8)> 862 863 Note that the rank of the output tensor is always one more than the input one: 864 865 >>> tf.io.decode_raw(tf.constant(["1","2"]), tf.uint8).shape 866 TensorShape([2, 1]) 867 >>> tf.io.decode_raw(tf.constant([["1"],["2"]]), tf.uint8).shape 868 TensorShape([2, 1, 1]) 869 870 This is because each byte in the input is converted to a new value on the 871 output (if output type is `uint8` or `int8`, otherwise chunks of inputs get 872 coverted to a new value): 873 874 >>> tf.io.decode_raw(tf.constant("123"), tf.uint8) 875 <tf.Tensor: shape=(3,), dtype=uint8, numpy=array([49, 50, 51], dtype=uint8)> 876 >>> tf.io.decode_raw(tf.constant("1234"), tf.uint8) 877 <tf.Tensor: shape=(4,), dtype=uint8, numpy=array([49, 50, 51, 52], ... 878 >>> # chuncked output 879 >>> tf.io.decode_raw(tf.constant("12"), tf.uint16) 880 <tf.Tensor: shape=(1,), dtype=uint16, numpy=array([12849], dtype=uint16)> 881 >>> tf.io.decode_raw(tf.constant("1234"), tf.uint16) 882 <tf.Tensor: shape=(2,), dtype=uint16, numpy=array([12849, 13363], ... 883 >>> # int64 output 884 >>> tf.io.decode_raw(tf.constant("12345678"), tf.int64) 885 <tf.Tensor: ... numpy=array([4050765991979987505])> 886 >>> tf.io.decode_raw(tf.constant("1234567887654321"), tf.int64) 887 <tf.Tensor: ... numpy=array([4050765991979987505, 3544952156018063160])> 888 889 The operation allows specifying endianness via the `little_endian` parameter. 890 891 >>> tf.io.decode_raw(tf.constant("\x0a\x0b"), tf.int16) 892 <tf.Tensor: shape=(1,), dtype=int16, numpy=array([2826], dtype=int16)> 893 >>> hex(2826) 894 '0xb0a' 895 >>> tf.io.decode_raw(tf.constant("\x0a\x0b"), tf.int16, little_endian=False) 896 <tf.Tensor: shape=(1,), dtype=int16, numpy=array([2571], dtype=int16)> 897 >>> hex(2571) 898 '0xa0b' 899 900 If the elements of `input_bytes` are of different length, you must specify 901 `fixed_length`: 902 903 >>> tf.io.decode_raw(tf.constant([["1"],["23"]]), tf.uint8, fixed_length=4) 904 <tf.Tensor: shape=(2, 1, 4), dtype=uint8, numpy= 905 array([[[49, 0, 0, 0]], 906 [[50, 51, 0, 0]]], dtype=uint8)> 907 908 If the `fixed_length` value is larger that the length of the `out_type` dtype, 909 multiple values are generated: 910 911 >>> tf.io.decode_raw(tf.constant(["1212"]), tf.uint16, fixed_length=4) 912 <tf.Tensor: shape=(1, 2), dtype=uint16, numpy=array([[12849, 12849]], ... 913 914 If the input value is larger than `fixed_length`, it is truncated: 915 916 >>> x=''.join([chr(1), chr(2), chr(3), chr(4)]) 917 >>> tf.io.decode_raw(x, tf.uint16, fixed_length=2) 918 <tf.Tensor: shape=(1,), dtype=uint16, numpy=array([513], dtype=uint16)> 919 >>> hex(513) 920 '0x201' 921 922 If `little_endian` and `fixed_length` are specified, truncation to the fixed 923 length occurs before endianness conversion: 924 925 >>> x=''.join([chr(1), chr(2), chr(3), chr(4)]) 926 >>> tf.io.decode_raw(x, tf.uint16, fixed_length=2, little_endian=False) 927 <tf.Tensor: shape=(1,), dtype=uint16, numpy=array([258], dtype=uint16)> 928 >>> hex(258) 929 '0x102' 930 931 If input values all have the same length, then specifying `fixed_length` 932 equal to the size of the strings should not change output: 933 934 >>> x = ["12345678", "87654321"] 935 >>> tf.io.decode_raw(x, tf.int16) 936 <tf.Tensor: shape=(2, 4), dtype=int16, numpy= 937 array([[12849, 13363, 13877, 14391], 938 [14136, 13622, 13108, 12594]], dtype=int16)> 939 >>> tf.io.decode_raw(x, tf.int16, fixed_length=len(x[0])) 940 <tf.Tensor: shape=(2, 4), dtype=int16, numpy= 941 array([[12849, 13363, 13877, 14391], 942 [14136, 13622, 13108, 12594]], dtype=int16)> 943 944 Args: 945 input_bytes: 946 Each element of the input Tensor is converted to an array of bytes. 947 948 Currently, this must be a tensor of strings (bytes), although semantically 949 the operation should support any input. 950 out_type: 951 `DType` of the output. Acceptable types are `half`, `float`, `double`, 952 `int32`, `uint16`, `uint8`, `int16`, `int8`, `int64`. 953 little_endian: 954 Whether the `input_bytes` data is in little-endian format. Data will be 955 converted into host byte order if necessary. 956 fixed_length: 957 If set, the first `fixed_length` bytes of each element will be converted. 958 Data will be zero-padded or truncated to the specified length. 959 960 `fixed_length` must be a multiple of the size of `out_type`. 961 962 `fixed_length` must be specified if the elements of `input_bytes` are of 963 variable length. 964 name: A name for the operation (optional). 965 966 Returns: 967 A `Tensor` object storing the decoded bytes. 968 """ 969 if fixed_length is not None: 970 return gen_parsing_ops.decode_padded_raw( 971 input_bytes, 972 fixed_length=fixed_length, 973 out_type=out_type, 974 little_endian=little_endian, 975 name=name) 976 else: 977 return gen_parsing_ops.decode_raw( 978 input_bytes, out_type, little_endian=little_endian, name=name) 979 980 981@tf_export(v1=["decode_raw", "io.decode_raw"]) 982@dispatch.add_dispatch_support 983@deprecation.deprecated_args(None, 984 "bytes is deprecated, use input_bytes instead", 985 "bytes") 986def decode_raw_v1( 987 input_bytes=None, 988 out_type=None, 989 little_endian=True, 990 name=None, 991 bytes=None # pylint: disable=redefined-builtin 992): 993 """Convert raw byte strings into tensors. 994 995 Args: 996 input_bytes: 997 Each element of the input Tensor is converted to an array of bytes. 998 out_type: 999 `DType` of the output. Acceptable types are `half`, `float`, `double`, 1000 `int32`, `uint16`, `uint8`, `int16`, `int8`, `int64`. 1001 little_endian: 1002 Whether the `input_bytes` data is in little-endian format. Data will be 1003 converted into host byte order if necessary. 1004 name: A name for the operation (optional). 1005 bytes: Deprecated parameter. Use `input_bytes` instead. 1006 1007 Returns: 1008 A `Tensor` object storing the decoded bytes. 1009 """ 1010 input_bytes = deprecation.deprecated_argument_lookup("input_bytes", 1011 input_bytes, "bytes", 1012 bytes) 1013 1014 # out_type is a required positional argument in the original API, and had to 1015 # be changed to a keyword argument in order to facilitate the transition from 1016 # the reserved named `bytes` to `input_bytes`. Ensure it's still set. 1017 if out_type is None: 1018 raise ValueError( 1019 "decode_raw_v1() missing 1 positional argument: 'out_type'") 1020 1021 return gen_parsing_ops.decode_raw( 1022 input_bytes, out_type, little_endian=little_endian, name=name) 1023 1024 1025# Swap `name` and `na_value` for backward compatibility. 1026@tf_export(v1=["io.decode_csv", "decode_csv"]) 1027@dispatch.add_dispatch_support 1028@deprecation.deprecated_endpoints("decode_csv") 1029def decode_csv(records, 1030 record_defaults, 1031 field_delim=",", 1032 use_quote_delim=True, 1033 name=None, 1034 na_value="", 1035 select_cols=None): 1036 """Convert CSV records to tensors. Each column maps to one tensor. 1037 1038 RFC 4180 format is expected for the CSV records. 1039 (https://tools.ietf.org/html/rfc4180) 1040 Note that we allow leading and trailing spaces with int or float field. 1041 1042 Args: 1043 records: A `Tensor` of type `string`. 1044 Each string is a record/row in the csv and all records should have 1045 the same format. 1046 record_defaults: A list of `Tensor` objects with specific types. 1047 Acceptable types are `float32`, `float64`, `int32`, `int64`, `string`. 1048 One tensor per column of the input record, with either a 1049 scalar default value for that column or an empty vector if the column is 1050 required. 1051 field_delim: An optional `string`. Defaults to `","`. 1052 char delimiter to separate fields in a record. 1053 use_quote_delim: An optional `bool`. Defaults to `True`. 1054 If false, treats double quotation marks as regular 1055 characters inside of the string fields (ignoring RFC 4180, Section 2, 1056 Bullet 5). 1057 name: A name for the operation (optional). 1058 na_value: Additional string to recognize as NA/NaN. 1059 select_cols: Optional sorted list of column indices to select. If specified, 1060 only this subset of columns will be parsed and returned. 1061 1062 Returns: 1063 A list of `Tensor` objects. Has the same type as `record_defaults`. 1064 Each tensor will have the same shape as records. 1065 1066 Raises: 1067 ValueError: If any of the arguments is malformed. 1068 """ 1069 return decode_csv_v2( 1070 records, record_defaults, 1071 field_delim, use_quote_delim, 1072 na_value, select_cols, name 1073 ) 1074 1075 1076@tf_export("io.decode_csv", v1=[]) 1077@dispatch.add_dispatch_support 1078def decode_csv_v2(records, 1079 record_defaults, 1080 field_delim=",", 1081 use_quote_delim=True, 1082 na_value="", 1083 select_cols=None, 1084 name=None): 1085 """Convert CSV records to tensors. Each column maps to one tensor. 1086 1087 RFC 4180 format is expected for the CSV records. 1088 (https://tools.ietf.org/html/rfc4180) 1089 Note that we allow leading and trailing spaces with int or float field. 1090 1091 Args: 1092 records: A `Tensor` of type `string`. 1093 Each string is a record/row in the csv and all records should have 1094 the same format. 1095 record_defaults: A list of `Tensor` objects with specific types. 1096 Acceptable types are `float32`, `float64`, `int32`, `int64`, `string`. 1097 One tensor per column of the input record, with either a 1098 scalar default value for that column or an empty vector if the column is 1099 required. 1100 field_delim: An optional `string`. Defaults to `","`. 1101 char delimiter to separate fields in a record. 1102 use_quote_delim: An optional `bool`. Defaults to `True`. 1103 If false, treats double quotation marks as regular 1104 characters inside of the string fields (ignoring RFC 4180, Section 2, 1105 Bullet 5). 1106 na_value: Additional string to recognize as NA/NaN. 1107 select_cols: Optional sorted list of column indices to select. If specified, 1108 only this subset of columns will be parsed and returned. 1109 name: A name for the operation (optional). 1110 1111 Returns: 1112 A list of `Tensor` objects. Has the same type as `record_defaults`. 1113 Each tensor will have the same shape as records. 1114 1115 Raises: 1116 ValueError: If any of the arguments is malformed. 1117 """ 1118 if select_cols is not None and any(select_cols[i] >= select_cols[i + 1] 1119 for i in range(len(select_cols) - 1)): 1120 raise ValueError("select_cols is not strictly increasing.") 1121 if select_cols is not None and select_cols[0] < 0: 1122 raise ValueError("select_cols contains negative values.") 1123 if select_cols is not None and len(select_cols) != len(record_defaults): 1124 raise ValueError("Length of select_cols and record_defaults do not match.") 1125 return gen_parsing_ops.decode_csv( 1126 records=records, 1127 record_defaults=record_defaults, 1128 field_delim=field_delim, 1129 use_quote_delim=use_quote_delim, 1130 na_value=na_value, 1131 name=name, 1132 select_cols=select_cols, 1133 ) 1134 1135 1136def _assert_scalar(value, name): 1137 """Asserts that `value` is scalar, and returns `value`.""" 1138 value_rank = value.shape.rank 1139 if value_rank is None: 1140 check = control_flow_ops.Assert( 1141 math_ops.equal(array_ops.rank(value), 0), 1142 ["Input %s must be a scalar" % name], 1143 name="%sIsScalar" % name.capitalize()) 1144 result = control_flow_ops.with_dependencies([check], 1145 value, 1146 name="%sDependencies" % name) 1147 result.set_shape([]) 1148 return result 1149 elif value_rank == 0: 1150 return value 1151 else: 1152 raise ValueError("Input %s must be a scalar" % name) 1153 1154 1155@tf_export("io.decode_json_example", 1156 v1=["decode_json_example", "io.decode_json_example"]) 1157def decode_json_example(json_examples, name=None): 1158 r"""Convert JSON-encoded Example records to binary protocol buffer strings. 1159 1160 Note: This is **not** a general purpose JSON parsing op. 1161 1162 This op converts JSON-serialized `tf.train.Example` (maybe created with 1163 `json_format.MessageToJson`, following the 1164 [standard JSON mapping]( 1165 https://developers.google.com/protocol-buffers/docs/proto3#json)) 1166 to a binary-serialized `tf.train.Example` (equivalent to 1167 `Example.SerializeToString()`) suitable for conversion to tensors with 1168 `tf.io.parse_example`. 1169 1170 Here is a `tf.train.Example` proto: 1171 1172 >>> example = tf.train.Example( 1173 ... features=tf.train.Features( 1174 ... feature={ 1175 ... "a": tf.train.Feature( 1176 ... int64_list=tf.train.Int64List( 1177 ... value=[1, 1, 3]))})) 1178 1179 Here it is converted to JSON: 1180 1181 >>> from google.protobuf import json_format 1182 >>> example_json = json_format.MessageToJson(example) 1183 >>> print(example_json) 1184 { 1185 "features": { 1186 "feature": { 1187 "a": { 1188 "int64List": { 1189 "value": [ 1190 "1", 1191 "1", 1192 "3" 1193 ] 1194 } 1195 } 1196 } 1197 } 1198 } 1199 1200 This op converts the above json string to a binary proto: 1201 1202 >>> example_binary = tf.io.decode_json_example(example_json) 1203 >>> example_binary.numpy() 1204 b'\n\x0f\n\r\n\x01a\x12\x08\x1a\x06\x08\x01\x08\x01\x08\x03' 1205 1206 The OP works on string tensors of andy shape: 1207 1208 >>> tf.io.decode_json_example([ 1209 ... [example_json, example_json], 1210 ... [example_json, example_json]]).shape.as_list() 1211 [2, 2] 1212 1213 This resulting binary-string is equivalent to `Example.SerializeToString()`, 1214 and can be converted to Tensors using `tf.io.parse_example` and related 1215 functions: 1216 1217 >>> tf.io.parse_example( 1218 ... serialized=[example_binary.numpy(), 1219 ... example.SerializeToString()], 1220 ... features = {'a': tf.io.FixedLenFeature(shape=[3], dtype=tf.int64)}) 1221 {'a': <tf.Tensor: shape=(2, 3), dtype=int64, numpy= 1222 array([[1, 1, 3], 1223 [1, 1, 3]])>} 1224 1225 Args: 1226 json_examples: A string tensor containing json-serialized `tf.Example` 1227 protos. 1228 name: A name for the op. 1229 1230 Returns: 1231 A string Tensor containing the binary-serialized `tf.Example` protos. 1232 1233 Raises: 1234 `tf.errors.InvalidArgumentError`: If the JSON could not be converted to a 1235 `tf.Example` 1236 """ 1237 return gen_parsing_ops.decode_json_example(json_examples, name=name) 1238