1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Tests for tensorflow.ops.parsing_ops.""" 16 17import itertools 18 19import numpy as np 20 21from tensorflow.core.example import example_pb2 22from tensorflow.core.example import feature_pb2 23from tensorflow.python.framework import dtypes 24from tensorflow.python.framework import errors_impl 25from tensorflow.python.framework import ops 26from tensorflow.python.framework import sparse_tensor 27from tensorflow.python.framework import tensor_shape 28from tensorflow.python.framework import test_util 29from tensorflow.python.ops import parsing_ops 30from tensorflow.python.platform import test 31from tensorflow.python.platform import tf_logging 32 33# Helpers for creating Example objects 34example = example_pb2.Example 35feature = feature_pb2.Feature 36features = lambda d: feature_pb2.Features(feature=d) 37bytes_feature = lambda v: feature(bytes_list=feature_pb2.BytesList(value=v)) 38int64_feature = lambda v: feature(int64_list=feature_pb2.Int64List(value=v)) 39float_feature = lambda v: feature(float_list=feature_pb2.FloatList(value=v)) 40# Helpers for creating SequenceExample objects 41feature_list = lambda l: feature_pb2.FeatureList(feature=l) 42feature_lists = lambda d: feature_pb2.FeatureLists(feature_list=d) 43sequence_example = example_pb2.SequenceExample 44 45 46def empty_sparse(dtype, shape=None): 47 if shape is None: 48 shape = [0] 49 return (np.empty(shape=(0, len(shape)), dtype=np.int64), 50 np.array([], dtype=dtype), np.array(shape, dtype=np.int64)) 51 52 53def flatten(list_of_lists): 54 """Flatten one level of nesting.""" 55 return itertools.chain.from_iterable(list_of_lists) 56 57 58def flatten_values_tensors_or_sparse(tensors_list): 59 """Flatten each SparseTensor object into 3 Tensors for session.run().""" 60 return list( 61 flatten([[v.indices, v.values, v.dense_shape] if isinstance( 62 v, sparse_tensor.SparseTensor) else [v] for v in tensors_list])) 63 64 65def _compare_output_to_expected(tester, dict_tensors, expected_tensors, 66 flat_output): 67 tester.assertEqual(set(dict_tensors.keys()), set(expected_tensors.keys())) 68 69 i = 0 # Index into the flattened output of session.run() 70 for k, v in dict_tensors.items(): 71 expected_v = expected_tensors[k] 72 tf_logging.info("Comparing key: %s", k) 73 if isinstance(v, sparse_tensor.SparseTensor): 74 # Three outputs for SparseTensor : indices, values, shape. 75 tester.assertEqual([k, len(expected_v)], [k, 3]) 76 tester.assertAllEqual(expected_v[0], flat_output[i]) 77 tester.assertAllEqual(expected_v[1], flat_output[i + 1]) 78 tester.assertAllEqual(expected_v[2], flat_output[i + 2]) 79 i += 3 80 else: 81 # One output for standard Tensor. 82 tester.assertAllEqual(expected_v, flat_output[i]) 83 i += 1 84 85 86class ParseExampleTest(test.TestCase): 87 88 def _test(self, kwargs, expected_values=None, expected_err=None): 89 with self.cached_session() as sess: 90 if expected_err: 91 with self.assertRaisesWithPredicateMatch(expected_err[0], 92 expected_err[1]): 93 out = parsing_ops.parse_single_example(**kwargs) 94 sess.run(flatten_values_tensors_or_sparse(out.values())) 95 return 96 else: 97 # Returns dict w/ Tensors and SparseTensors. 98 out = parsing_ops.parse_single_example(**kwargs) 99 # Also include a test with the example names specified to retain 100 # code coverage of the unfused version, and ensure that the two 101 # versions produce the same results. 102 out_with_example_name = parsing_ops.parse_single_example( 103 example_names="name", **kwargs) 104 for result_dict in [out, out_with_example_name]: 105 result = flatten_values_tensors_or_sparse(result_dict.values()) 106 # Check values. 107 tf_result = self.evaluate(result) 108 _compare_output_to_expected(self, result_dict, expected_values, 109 tf_result) 110 111 for k, f in kwargs["features"].items(): 112 if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None: 113 self.assertEqual(tuple(out[k].get_shape().as_list()), f.shape) 114 elif isinstance(f, parsing_ops.VarLenFeature): 115 self.assertEqual( 116 tuple(out[k].indices.get_shape().as_list()), (None, 1)) 117 self.assertEqual(tuple(out[k].values.get_shape().as_list()), (None,)) 118 self.assertEqual( 119 tuple(out[k].dense_shape.get_shape().as_list()), (1,)) 120 121 @test_util.run_deprecated_v1 122 def testEmptySerializedWithAllDefaults(self): 123 sparse_name = "st_a" 124 a_name = "a" 125 b_name = "b" 126 c_name = "c:has_a_tricky_name" 127 a_default = [0, 42, 0] 128 b_default = np.random.rand(3, 3).astype(bytes) 129 c_default = np.random.rand(2).astype(np.float32) 130 131 expected_st_a = ( # indices, values, shape 132 np.empty((0, 1), dtype=np.int64), # indices 133 np.empty((0,), dtype=np.int64), # sp_a is DT_INT64 134 np.array([0], dtype=np.int64)) # max_elems = 0 135 136 expected_output = { 137 sparse_name: expected_st_a, 138 a_name: np.array([a_default]), 139 b_name: np.array(b_default), 140 c_name: np.array(c_default), 141 } 142 143 self._test({ 144 "serialized": ops.convert_to_tensor(""), 145 "features": { 146 sparse_name: 147 parsing_ops.VarLenFeature(dtypes.int64), 148 a_name: 149 parsing_ops.FixedLenFeature( 150 (1, 3), dtypes.int64, default_value=a_default), 151 b_name: 152 parsing_ops.FixedLenFeature( 153 (3, 3), dtypes.string, default_value=b_default), 154 c_name: 155 parsing_ops.FixedLenFeature( 156 (2,), dtypes.float32, default_value=c_default), 157 } 158 }, expected_output) 159 160 def testEmptySerializedWithoutDefaultsShouldFail(self): 161 input_features = { 162 "st_a": 163 parsing_ops.VarLenFeature(dtypes.int64), 164 "a": 165 parsing_ops.FixedLenFeature( 166 (1, 3), dtypes.int64, default_value=[0, 42, 0]), 167 "b": 168 parsing_ops.FixedLenFeature( 169 (3, 3), 170 dtypes.string, 171 default_value=np.random.rand(3, 3).astype(bytes)), 172 # Feature "c" is missing a default, this gap will cause failure. 173 "c": 174 parsing_ops.FixedLenFeature( 175 (2,), dtype=dtypes.float32), 176 } 177 178 # Edge case where the key is there but the feature value is empty 179 original = example(features=features({"c": feature()})) 180 self._test( 181 { 182 "serialized": original.SerializeToString(), 183 "features": input_features, 184 }, 185 expected_err=(errors_impl.OpError, 186 "Feature: c \\(data type: float\\) is required")) 187 188 # Standard case of missing key and value. 189 self._test( 190 { 191 "serialized": "", 192 "features": input_features, 193 }, 194 expected_err=(errors_impl.OpError, 195 "Feature: c \\(data type: float\\) is required")) 196 197 def testDenseNotMatchingShapeShouldFail(self): 198 original = example(features=features({ 199 "a": float_feature([-1, -1]), 200 })) 201 202 serialized = original.SerializeToString() 203 204 self._test( 205 { 206 "serialized": ops.convert_to_tensor(serialized), 207 "features": { 208 "a": parsing_ops.FixedLenFeature((1, 3), dtypes.float32) 209 } 210 }, 211 # TODO(mrry): Consider matching the `io.parse_example()` error message. 212 expected_err=(errors_impl.OpError, "Key: a.")) 213 214 def testDenseDefaultNoShapeShouldFail(self): 215 original = example(features=features({ 216 "a": float_feature([1, 1, 3]), 217 })) 218 219 serialized = original.SerializeToString() 220 221 self._test( 222 { 223 "serialized": ops.convert_to_tensor(serialized), 224 "features": { 225 "a": parsing_ops.FixedLenFeature(None, dtypes.float32) 226 } 227 }, 228 expected_err=(ValueError, "Missing shape for feature a")) 229 230 @test_util.run_deprecated_v1 231 def testSerializedContainingSparse(self): 232 original = [ 233 example(features=features({ 234 "st_c": float_feature([3, 4]) 235 })), 236 example(features=features({ 237 "st_c": float_feature([]), # empty float list 238 })), 239 example(features=features({ 240 "st_d": feature(), # feature with nothing in it 241 })), 242 example(features=features({ 243 "st_c": float_feature([1, 2, -1]), 244 "st_d": bytes_feature([b"hi"]) 245 })) 246 ] 247 248 expected_outputs = [{ 249 "st_c": (np.array([[0], [1]], dtype=np.int64), 250 np.array([3.0, 4.0], dtype=np.float32), 251 np.array([2], dtype=np.int64)), 252 "st_d": 253 empty_sparse(bytes) 254 }, { 255 "st_c": empty_sparse(np.float32), 256 "st_d": empty_sparse(bytes) 257 }, { 258 "st_c": empty_sparse(np.float32), 259 "st_d": empty_sparse(bytes) 260 }, { 261 "st_c": (np.array([[0], [1], [2]], dtype=np.int64), 262 np.array([1.0, 2.0, -1.0], dtype=np.float32), 263 np.array([3], dtype=np.int64)), 264 "st_d": (np.array([[0]], dtype=np.int64), np.array(["hi"], dtype=bytes), 265 np.array([1], dtype=np.int64)) 266 }] 267 268 for proto, expected_output in zip(original, expected_outputs): 269 self._test({ 270 "serialized": ops.convert_to_tensor(proto.SerializeToString()), 271 "features": { 272 "st_c": parsing_ops.VarLenFeature(dtypes.float32), 273 "st_d": parsing_ops.VarLenFeature(dtypes.string) 274 }, 275 }, expected_output) 276 277 def testSerializedContainingSparseFeature(self): 278 original = [ 279 example(features=features({ 280 "val": float_feature([3, 4]), 281 "idx": int64_feature([5, 10]) 282 })), 283 example(features=features({ 284 "val": float_feature([]), # empty float list 285 "idx": int64_feature([]) 286 })), 287 example(features=features({ 288 "val": feature(), # feature with nothing in it 289 # missing idx feature 290 })), 291 example(features=features({ 292 "val": float_feature([1, 2, -1]), 293 "idx": 294 int64_feature([0, 9, 3]) # unsorted 295 })) 296 ] 297 298 expected_outputs = [{ 299 "sp": (np.array([[5], [10]], dtype=np.int64), 300 np.array([3.0, 4.0], dtype=np.float32), 301 np.array([13], dtype=np.int64)) 302 }, { 303 "sp": empty_sparse(np.float32, shape=[13]) 304 }, { 305 "sp": empty_sparse(np.float32, shape=[13]) 306 }, { 307 "sp": (np.array([[0], [3], [9]], dtype=np.int64), 308 np.array([1.0, -1.0, 2.0], dtype=np.float32), 309 np.array([13], dtype=np.int64)) 310 }] 311 312 for proto, expected_output in zip(original, expected_outputs): 313 self._test({ 314 "serialized": ops.convert_to_tensor(proto.SerializeToString()), 315 "features": { 316 "sp": 317 parsing_ops.SparseFeature(["idx"], "val", dtypes.float32, 318 [13]) 319 } 320 }, expected_output) 321 322 def testSerializedContainingSparseFeatureReuse(self): 323 original = [ 324 example(features=features({ 325 "val1": float_feature([3, 4]), 326 "val2": float_feature([5, 6]), 327 "idx": int64_feature([5, 10]) 328 })), 329 example(features=features({ 330 "val1": float_feature([]), # empty float list 331 "idx": int64_feature([]) 332 })), 333 ] 334 335 expected_outputs = [{ 336 "sp1": (np.array([[5], [10]], dtype=np.int64), 337 np.array([3.0, 4.0], dtype=np.float32), 338 np.array([13], dtype=np.int64)), 339 "sp2": (np.array([[5], [10]], dtype=np.int64), 340 np.array([5.0, 6.0], dtype=np.float32), 341 np.array([7], dtype=np.int64)) 342 }, { 343 "sp1": empty_sparse(np.float32, shape=[13]), 344 "sp2": empty_sparse(np.float32, shape=[7]) 345 }] 346 347 for proto, expected_output in zip(original, expected_outputs): 348 self._test({ 349 "serialized": ops.convert_to_tensor(proto.SerializeToString()), 350 "features": { 351 "sp1": 352 parsing_ops.SparseFeature("idx", "val1", dtypes.float32, 13), 353 "sp2": 354 parsing_ops.SparseFeature( 355 "idx", 356 "val2", 357 dtypes.float32, 358 size=7, 359 already_sorted=True) 360 } 361 }, expected_output) 362 363 def testSerializedContaining3DSparseFeature(self): 364 original = [ 365 example(features=features({ 366 "val": float_feature([3, 4]), 367 "idx0": int64_feature([5, 10]), 368 "idx1": int64_feature([0, 2]), 369 })), 370 example(features=features({ 371 "val": float_feature([]), # empty float list 372 "idx0": int64_feature([]), 373 "idx1": int64_feature([]), 374 })), 375 example(features=features({ 376 "val": feature(), # feature with nothing in it 377 # missing idx feature 378 })), 379 example(features=features({ 380 "val": float_feature([1, 2, -1]), 381 "idx0": int64_feature([0, 9, 3]), # unsorted 382 "idx1": int64_feature([1, 0, 2]), 383 })) 384 ] 385 386 expected_outputs = [{ 387 "sp": (np.array([[5, 0], [10, 2]], dtype=np.int64), 388 np.array([3.0, 4.0], dtype=np.float32), 389 np.array([13, 3], dtype=np.int64)) 390 }, { 391 "sp": empty_sparse(np.float32, shape=[13, 3]) 392 }, { 393 "sp": empty_sparse(np.float32, shape=[13, 3]) 394 }, { 395 "sp": (np.array([[0, 1], [3, 2], [9, 0]], dtype=np.int64), 396 np.array([1.0, -1.0, 2.0], dtype=np.float32), 397 np.array([13, 3], dtype=np.int64)) 398 }] 399 400 for proto, expected_output in zip(original, expected_outputs): 401 self._test({ 402 "serialized": ops.convert_to_tensor(proto.SerializeToString()), 403 "features": { 404 "sp": 405 parsing_ops.SparseFeature(["idx0", "idx1"], "val", 406 dtypes.float32, [13, 3]) 407 } 408 }, expected_output) 409 410 def testSerializedContainingDense(self): 411 aname = "a" 412 bname = "b*has+a:tricky_name" 413 original = [ 414 example(features=features({ 415 aname: float_feature([1, 1]), 416 bname: bytes_feature([b"b0_str"]), 417 })), example(features=features({ 418 aname: float_feature([-1, -1]), 419 bname: bytes_feature([b""]), 420 })) 421 ] 422 423 # pylint: disable=too-many-function-args 424 expected_outputs = [ 425 { 426 aname: 427 np.array([1, 1], dtype=np.float32).reshape(1, 2, 1), 428 bname: 429 np.array(["b0_str"], dtype=bytes).reshape( 430 1, 1, 1, 1) 431 }, 432 { 433 aname: 434 np.array([-1, -1], dtype=np.float32).reshape(1, 2, 1), 435 bname: 436 np.array([""], dtype=bytes).reshape( 437 1, 1, 1, 1) 438 } 439 ] 440 # pylint: enable=too-many-function-args 441 442 for proto, expected_output in zip(original, expected_outputs): 443 # No defaults, values required 444 self._test({ 445 "serialized": ops.convert_to_tensor(proto.SerializeToString()), 446 "features": { 447 aname: 448 parsing_ops.FixedLenFeature((1, 2, 1), dtype=dtypes.float32), 449 bname: 450 parsing_ops.FixedLenFeature( 451 (1, 1, 1, 1), dtype=dtypes.string), 452 } 453 }, expected_output) 454 455 # This test is identical as the previous one except 456 # for the creation of 'serialized'. 457 def testSerializedContainingDenseWithConcat(self): 458 aname = "a" 459 bname = "b*has+a:tricky_name" 460 # TODO(lew): Feature appearing twice should be an error in future. 461 original = [ 462 (example(features=features({ 463 aname: float_feature([10, 10]), 464 })), example(features=features({ 465 aname: float_feature([1, 1]), 466 bname: bytes_feature([b"b0_str"]), 467 }))), 468 ( 469 example(features=features({ 470 bname: bytes_feature([b"b100"]), 471 })), 472 example(features=features({ 473 aname: float_feature([-1, -1]), 474 bname: bytes_feature([b"b1"]), 475 })),), 476 ] 477 478 # pylint: disable=too-many-function-args 479 expected_outputs = [ 480 { 481 aname: 482 np.array([1, 1], dtype=np.float32).reshape(1, 2, 1), 483 bname: 484 np.array(["b0_str"], dtype=bytes).reshape( 485 1, 1, 1, 1) 486 }, 487 { 488 aname: 489 np.array([-1, -1], dtype=np.float32).reshape(1, 2, 1), 490 bname: 491 np.array(["b1"], dtype=bytes).reshape( 492 1, 1, 1, 1) 493 } 494 ] 495 # pylint: enable=too-many-function-args 496 497 for (m, n), expected_output in zip(original, expected_outputs): 498 # No defaults, values required 499 self._test({ 500 "serialized": 501 ops.convert_to_tensor( 502 m.SerializeToString() + n.SerializeToString()), 503 "features": { 504 aname: 505 parsing_ops.FixedLenFeature((1, 2, 1), dtype=dtypes.float32), 506 bname: 507 parsing_ops.FixedLenFeature( 508 (1, 1, 1, 1), dtype=dtypes.string), 509 } 510 }, expected_output) 511 512 def testSerializedContainingDenseScalar(self): 513 original = [ 514 example(features=features({ 515 "a": float_feature([1]), 516 })), example(features=features({})) 517 ] 518 519 expected_outputs = [{ 520 "a": np.array([1], dtype=np.float32) 521 }, { 522 "a": np.array([-1], dtype=np.float32) 523 }] 524 525 for proto, expected_output in zip(original, expected_outputs): 526 self._test({ 527 "serialized": ops.convert_to_tensor(proto.SerializeToString()), 528 "features": { 529 "a": 530 parsing_ops.FixedLenFeature( 531 (1,), dtype=dtypes.float32, default_value=-1), 532 } 533 }, expected_output) 534 535 def testSerializedContainingDenseWithDefaults(self): 536 original = [ 537 example(features=features({ 538 "a": float_feature([1, 1]), 539 })), 540 example(features=features({ 541 "b": bytes_feature([b"b1"]), 542 })), 543 example(features=features({ 544 "b": feature() 545 })), 546 ] 547 548 # pylint: disable=too-many-function-args 549 expected_outputs = [ 550 { 551 "a": 552 np.array([1, 1], dtype=np.float32).reshape(1, 2, 1), 553 "b": 554 np.array("tmp_str", dtype=bytes).reshape( 555 1, 1, 1, 1) 556 }, 557 { 558 "a": 559 np.array([3, -3], dtype=np.float32).reshape(1, 2, 1), 560 "b": 561 np.array("b1", dtype=bytes).reshape( 562 1, 1, 1, 1) 563 }, 564 { 565 "a": 566 np.array([3, -3], dtype=np.float32).reshape(1, 2, 1), 567 "b": 568 np.array("tmp_str", dtype=bytes).reshape( 569 1, 1, 1, 1) 570 } 571 ] 572 # pylint: enable=too-many-function-args 573 574 for proto, expected_output in zip(original, expected_outputs): 575 self._test({ 576 "serialized": ops.convert_to_tensor(proto.SerializeToString()), 577 "features": { 578 "a": 579 parsing_ops.FixedLenFeature( 580 (1, 2, 1), 581 dtype=dtypes.float32, 582 default_value=[3.0, -3.0]), 583 "b": 584 parsing_ops.FixedLenFeature( 585 (1, 1, 1, 1), 586 dtype=dtypes.string, 587 default_value="tmp_str"), 588 } 589 }, expected_output) 590 591 @test_util.run_deprecated_v1 592 def testSerializedContainingSparseAndSparseFeatureAndDenseWithNoDefault(self): 593 original = [ 594 example(features=features({ 595 "c": float_feature([3, 4]), 596 "val": bytes_feature([b"a", b"b"]), 597 "idx": int64_feature([0, 3]) 598 })), example(features=features({ 599 "c": float_feature([1, 2]), 600 "val": bytes_feature([b"c"]), 601 "idx": int64_feature([7]) 602 })) 603 ] 604 605 a_default = np.array([[1, 2, 3]], dtype=np.int64) 606 b_default = np.random.rand(3, 3).astype(bytes) 607 608 expected_st_a = empty_sparse(np.int64) 609 610 expected_outputs = [{ 611 "st_a": 612 expected_st_a, 613 "sp": (np.array([[0], [3]], dtype=np.int64), 614 np.array(["a", "b"], dtype=bytes), np.array( 615 [13], dtype=np.int64)), 616 "a": 617 a_default, 618 "b": 619 b_default, 620 "c": 621 np.array([3, 4], dtype=np.float32) 622 }, { 623 "st_a": 624 expected_st_a, 625 "sp": (np.array([[7]], dtype=np.int64), np.array(["c"], dtype=bytes), 626 np.array([13], dtype=np.int64)), 627 "a": 628 a_default, 629 "b": 630 b_default, 631 "c": 632 np.array([1, 2], dtype=np.float32) 633 }] 634 635 for proto, expected_output in zip(original, expected_outputs): 636 self._test( 637 { 638 "serialized": ops.convert_to_tensor(proto.SerializeToString()), 639 "features": { 640 "st_a": 641 parsing_ops.VarLenFeature(dtypes.int64), 642 "sp": 643 parsing_ops.SparseFeature("idx", "val", dtypes.string, 13 644 ), 645 "a": 646 parsing_ops.FixedLenFeature( 647 (1, 3), dtypes.int64, default_value=a_default), 648 "b": 649 parsing_ops.FixedLenFeature( 650 (3, 3), dtypes.string, default_value=b_default), 651 # Feature "c" must be provided, since it has no default_value. 652 "c": 653 parsing_ops.FixedLenFeature((2,), dtypes.float32), 654 } 655 }, 656 expected_output) 657 658 @test_util.run_deprecated_v1 659 def testSerializedContainingSparseAndSparseFeatureWithReuse(self): 660 original = [ 661 example(features=features({ 662 "val": bytes_feature([b"a", b"b"]), 663 "idx": int64_feature([0, 3]) 664 })), example(features=features({ 665 "val": bytes_feature([b"c", b"d"]), 666 "idx": int64_feature([7, 1]) 667 })) 668 ] 669 670 expected_outputs = [{ 671 "idx": (np.array([[0], [1]], dtype=np.int64), 672 np.array([0, 3], dtype=np.int64), np.array([2], 673 dtype=np.int64)), 674 "sp": (np.array([[0], [3]], dtype=np.int64), 675 np.array(["a", "b"], dtype=bytes), np.array( 676 [13], dtype=np.int64)) 677 }, 678 { 679 "idx": (np.array([[0], [1]], dtype=np.int64), 680 np.array([7, 1], dtype=np.int64), 681 np.array([2], dtype=np.int64)), 682 "sp": (np.array([[1], [7]], dtype=np.int64), 683 np.array(["d", "c"], dtype=bytes), 684 np.array([13], dtype=np.int64)) 685 }] 686 687 for proto, expected_output in zip(original, expected_outputs): 688 self._test({ 689 "serialized": ops.convert_to_tensor(proto.SerializeToString()), 690 "features": { 691 "idx": 692 parsing_ops.VarLenFeature(dtypes.int64), 693 "sp": 694 parsing_ops.SparseFeature(["idx"], "val", dtypes.string, [13] 695 ), 696 } 697 }, expected_output) 698 699 @test_util.run_deprecated_v1 700 def testSerializedContainingVarLenDense(self): 701 aname = "a" 702 bname = "b" 703 cname = "c" 704 dname = "d" 705 original = [ 706 example(features=features({ 707 cname: int64_feature([2]), 708 })), 709 example(features=features({ 710 aname: float_feature([1, 1]), 711 bname: bytes_feature([b"b0_str", b"b1_str"]), 712 })), 713 example(features=features({ 714 aname: float_feature([-1, -1, 2, 2]), 715 bname: bytes_feature([b"b1"]), 716 })), 717 example(features=features({ 718 aname: float_feature([]), 719 cname: int64_feature([3]), 720 })), 721 ] 722 723 # pylint: disable=too-many-function-args 724 expected_outputs = [ 725 { 726 aname: np.empty(shape=(0, 2, 1), dtype=np.int64), 727 bname: np.empty(shape=(0, 1, 1, 1), dtype=bytes), 728 cname: np.array([2], dtype=np.int64), 729 dname: np.empty(shape=(0,), dtype=bytes) 730 }, 731 { 732 aname: 733 np.array([[[1], [1]]], dtype=np.float32), 734 bname: 735 np.array(["b0_str", "b1_str"], dtype=bytes).reshape(2, 1, 1, 1), 736 cname: 737 np.empty(shape=(0,), dtype=np.int64), 738 dname: 739 np.empty(shape=(0,), dtype=bytes) 740 }, 741 { 742 aname: np.array([[[-1], [-1]], [[2], [2]]], dtype=np.float32), 743 bname: np.array(["b1"], dtype=bytes).reshape(1, 1, 1, 1), 744 cname: np.empty(shape=(0,), dtype=np.int64), 745 dname: np.empty(shape=(0,), dtype=bytes) 746 }, 747 { 748 aname: np.empty(shape=(0, 2, 1), dtype=np.int64), 749 bname: np.empty(shape=(0, 1, 1, 1), dtype=bytes), 750 cname: np.array([3], dtype=np.int64), 751 dname: np.empty(shape=(0,), dtype=bytes) 752 }, 753 ] 754 # pylint: enable=too-many-function-args 755 756 for proto, expected_output in zip(original, expected_outputs): 757 self._test({ 758 "serialized": ops.convert_to_tensor(proto.SerializeToString()), 759 "features": { 760 aname: 761 parsing_ops.FixedLenSequenceFeature( 762 (2, 1), dtype=dtypes.float32, allow_missing=True), 763 bname: 764 parsing_ops.FixedLenSequenceFeature( 765 (1, 1, 1), dtype=dtypes.string, allow_missing=True), 766 cname: 767 parsing_ops.FixedLenSequenceFeature( 768 shape=[], dtype=dtypes.int64, allow_missing=True), 769 dname: 770 parsing_ops.FixedLenSequenceFeature( 771 shape=[], dtype=dtypes.string, allow_missing=True), 772 } 773 }, expected_output) 774 775 # Test with padding values. 776 # NOTE(mrry): Since we parse a single example at a time, the fixed-length 777 # sequences will not be padded, and the padding value will be ignored. 778 for proto, expected_output in zip(original, expected_outputs): 779 self._test({ 780 "serialized": ops.convert_to_tensor(proto.SerializeToString()), 781 "features": { 782 aname: 783 parsing_ops.FixedLenSequenceFeature( 784 (2, 1), dtype=dtypes.float32, allow_missing=True), 785 bname: 786 parsing_ops.FixedLenSequenceFeature( 787 (1, 1, 1), dtype=dtypes.string, allow_missing=True), 788 cname: 789 parsing_ops.FixedLenSequenceFeature( 790 shape=[], dtype=dtypes.int64, allow_missing=True), 791 dname: 792 parsing_ops.FixedLenSequenceFeature( 793 shape=[], dtype=dtypes.string, allow_missing=True), 794 } 795 }, expected_output) 796 797 # Change number of required values so the inputs are not a 798 # multiple of this size. 799 self._test( 800 { 801 "serialized": 802 ops.convert_to_tensor(original[2].SerializeToString()), 803 "features": { 804 aname: 805 parsing_ops.FixedLenSequenceFeature( 806 (2, 1), dtype=dtypes.float32, allow_missing=True), 807 bname: 808 parsing_ops.FixedLenSequenceFeature( 809 (2, 1, 1), dtype=dtypes.string, allow_missing=True), 810 } 811 }, 812 # TODO(mrry): Consider matching the `io.parse_example()` error message. 813 expected_err=(errors_impl.OpError, "Key: b.")) 814 815 self._test( 816 { 817 "serialized": ops.convert_to_tensor(""), 818 "features": { 819 aname: 820 parsing_ops.FixedLenSequenceFeature( 821 (2, 1), 822 dtype=dtypes.float32, 823 allow_missing=True, 824 default_value=[]), 825 bname: 826 parsing_ops.FixedLenSequenceFeature( 827 (2, 1, 1), dtype=dtypes.string, allow_missing=True), 828 } 829 }, 830 expected_err=(ValueError, 831 "Cannot reshape a tensor with 0 elements to shape")) 832 833 self._test( 834 { 835 "serialized": ops.convert_to_tensor(""), 836 "features": { 837 aname: 838 parsing_ops.FixedLenFeature( 839 (None, 2, 1), dtype=dtypes.float32), 840 bname: 841 parsing_ops.FixedLenSequenceFeature( 842 (2, 1, 1), dtype=dtypes.string, allow_missing=True), 843 } 844 }, 845 expected_err=(ValueError, 846 "First dimension of shape for feature a unknown. " 847 "Consider using FixedLenSequenceFeature.")) 848 849 self._test( 850 { 851 "serialized": ops.convert_to_tensor(""), 852 "features": { 853 cname: 854 parsing_ops.FixedLenFeature( 855 (1, None), dtype=dtypes.int64, default_value=[[1]]), 856 } 857 }, 858 expected_err=(ValueError, 859 "All dimensions of shape for feature c need to be known " 860 r"but received \(1, None\).")) 861 862 self._test( 863 { 864 "serialized": ops.convert_to_tensor(""), 865 "features": { 866 aname: 867 parsing_ops.FixedLenSequenceFeature( 868 (2, 1), dtype=dtypes.float32, allow_missing=True), 869 bname: 870 parsing_ops.FixedLenSequenceFeature( 871 (1, 1, 1), dtype=dtypes.string, allow_missing=True), 872 cname: 873 parsing_ops.FixedLenSequenceFeature( 874 shape=[], dtype=dtypes.int64, allow_missing=False), 875 dname: 876 parsing_ops.FixedLenSequenceFeature( 877 shape=[], dtype=dtypes.string, allow_missing=True), 878 } 879 }, 880 expected_err=(ValueError, 881 "Unsupported: FixedLenSequenceFeature requires " 882 "allow_missing to be True.")) 883 884 885class ParseSingleExampleTest(test.TestCase): 886 887 def _test(self, kwargs, expected_values=None, expected_err=None): 888 with self.cached_session() as sess: 889 if expected_err: 890 with self.assertRaisesWithPredicateMatch(expected_err[0], 891 expected_err[1]): 892 out = parsing_ops.parse_single_example(**kwargs) 893 sess.run(flatten_values_tensors_or_sparse(out.values())) 894 return 895 else: 896 # Returns dict w/ Tensors and SparseTensors. 897 out = parsing_ops.parse_single_example(**kwargs) 898 # Check values. 899 tf_result = sess.run(flatten_values_tensors_or_sparse(out.values())) 900 _compare_output_to_expected(self, out, expected_values, tf_result) 901 902 # Check shapes. 903 for k, f in kwargs["features"].items(): 904 if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None: 905 self.assertEqual(tuple(out[k].get_shape()), 906 tensor_shape.as_shape(f.shape)) 907 elif isinstance(f, parsing_ops.VarLenFeature): 908 self.assertEqual( 909 tuple(out[k].indices.get_shape().as_list()), (None, 1)) 910 self.assertEqual(tuple(out[k].values.get_shape().as_list()), (None,)) 911 self.assertEqual( 912 tuple(out[k].dense_shape.get_shape().as_list()), (1,)) 913 914 @test_util.run_deprecated_v1 915 def testSingleExampleWithSparseAndSparseFeatureAndDense(self): 916 original = example(features=features({ 917 "c": float_feature([3, 4]), 918 "d": float_feature([0.0, 1.0]), 919 "val": bytes_feature([b"a", b"b"]), 920 "idx": int64_feature([0, 3]), 921 "st_a": float_feature([3.0, 4.0]) 922 })) 923 924 serialized = original.SerializeToString() 925 926 expected_st_a = ( 927 np.array( 928 [[0], [1]], dtype=np.int64), # indices 929 np.array( 930 [3.0, 4.0], dtype=np.float32), # values 931 np.array( 932 [2], dtype=np.int64)) # shape: max_values = 2 933 934 expected_sp = ( # indices, values, shape 935 np.array( 936 [[0], [3]], dtype=np.int64), np.array( 937 ["a", "b"], dtype="|S"), np.array( 938 [13], dtype=np.int64)) # max_values = 13 939 940 a_default = [1, 2, 3] 941 b_default = np.random.rand(3, 3).astype(bytes) 942 expected_output = { 943 "st_a": expected_st_a, 944 "sp": expected_sp, 945 "a": [a_default], 946 "b": b_default, 947 "c": np.array([3, 4], dtype=np.float32), 948 "d": np.array([0.0, 1.0], dtype=np.float32), 949 } 950 951 self._test( 952 { 953 "serialized": 954 ops.convert_to_tensor(serialized), 955 "features": { 956 "st_a": 957 parsing_ops.VarLenFeature(dtypes.float32), 958 "sp": 959 parsing_ops.SparseFeature( 960 ["idx"], "val", dtypes.string, [13]), 961 "a": 962 parsing_ops.FixedLenFeature( 963 (1, 3), dtypes.int64, default_value=a_default), 964 "b": 965 parsing_ops.FixedLenFeature( 966 (3, 3), dtypes.string, default_value=b_default), 967 # Feature "c" must be provided, since it has no default_value. 968 "c": 969 parsing_ops.FixedLenFeature(2, dtypes.float32), 970 "d": 971 parsing_ops.FixedLenSequenceFeature([], 972 dtypes.float32, 973 allow_missing=True) 974 } 975 }, 976 expected_output) 977 978 def testExampleLongerThanSpec(self): 979 serialized = example( 980 features=features({ 981 "a": bytes_feature([b"a", b"b"]), 982 })).SerializeToString() 983 self._test( 984 { 985 "serialized": ops.convert_to_tensor(serialized), 986 "features": { 987 "a": parsing_ops.FixedLenFeature(1, dtypes.string) 988 } 989 }, 990 expected_err=(errors_impl.OpError, "Can't parse serialized Example")) 991 992 993if __name__ == "__main__": 994 test.main() 995