• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Tests for tensorflow.ops.parsing_ops."""
16
17import itertools
18
19import numpy as np
20
21from tensorflow.core.example import example_pb2
22from tensorflow.core.example import feature_pb2
23from tensorflow.python.framework import dtypes
24from tensorflow.python.framework import errors_impl
25from tensorflow.python.framework import ops
26from tensorflow.python.framework import sparse_tensor
27from tensorflow.python.framework import tensor_shape
28from tensorflow.python.framework import test_util
29from tensorflow.python.ops import parsing_ops
30from tensorflow.python.platform import test
31from tensorflow.python.platform import tf_logging
32
33# Helpers for creating Example objects
34example = example_pb2.Example
35feature = feature_pb2.Feature
36features = lambda d: feature_pb2.Features(feature=d)
37bytes_feature = lambda v: feature(bytes_list=feature_pb2.BytesList(value=v))
38int64_feature = lambda v: feature(int64_list=feature_pb2.Int64List(value=v))
39float_feature = lambda v: feature(float_list=feature_pb2.FloatList(value=v))
40# Helpers for creating SequenceExample objects
41feature_list = lambda l: feature_pb2.FeatureList(feature=l)
42feature_lists = lambda d: feature_pb2.FeatureLists(feature_list=d)
43sequence_example = example_pb2.SequenceExample
44
45
46def empty_sparse(dtype, shape=None):
47  if shape is None:
48    shape = [0]
49  return (np.empty(shape=(0, len(shape)), dtype=np.int64),
50          np.array([], dtype=dtype), np.array(shape, dtype=np.int64))
51
52
53def flatten(list_of_lists):
54  """Flatten one level of nesting."""
55  return itertools.chain.from_iterable(list_of_lists)
56
57
58def flatten_values_tensors_or_sparse(tensors_list):
59  """Flatten each SparseTensor object into 3 Tensors for session.run()."""
60  return list(
61      flatten([[v.indices, v.values, v.dense_shape] if isinstance(
62          v, sparse_tensor.SparseTensor) else [v] for v in tensors_list]))
63
64
65def _compare_output_to_expected(tester, dict_tensors, expected_tensors,
66                                flat_output):
67  tester.assertEqual(set(dict_tensors.keys()), set(expected_tensors.keys()))
68
69  i = 0  # Index into the flattened output of session.run()
70  for k, v in dict_tensors.items():
71    expected_v = expected_tensors[k]
72    tf_logging.info("Comparing key: %s", k)
73    if isinstance(v, sparse_tensor.SparseTensor):
74      # Three outputs for SparseTensor : indices, values, shape.
75      tester.assertEqual([k, len(expected_v)], [k, 3])
76      tester.assertAllEqual(expected_v[0], flat_output[i])
77      tester.assertAllEqual(expected_v[1], flat_output[i + 1])
78      tester.assertAllEqual(expected_v[2], flat_output[i + 2])
79      i += 3
80    else:
81      # One output for standard Tensor.
82      tester.assertAllEqual(expected_v, flat_output[i])
83      i += 1
84
85
86class ParseExampleTest(test.TestCase):
87
88  def _test(self, kwargs, expected_values=None, expected_err=None):
89    with self.cached_session() as sess:
90      if expected_err:
91        with self.assertRaisesWithPredicateMatch(expected_err[0],
92                                                 expected_err[1]):
93          out = parsing_ops.parse_single_example(**kwargs)
94          sess.run(flatten_values_tensors_or_sparse(out.values()))
95        return
96      else:
97        # Returns dict w/ Tensors and SparseTensors.
98        out = parsing_ops.parse_single_example(**kwargs)
99        # Also include a test with the example names specified to retain
100        # code coverage of the unfused version, and ensure that the two
101        # versions produce the same results.
102        out_with_example_name = parsing_ops.parse_single_example(
103            example_names="name", **kwargs)
104        for result_dict in [out, out_with_example_name]:
105          result = flatten_values_tensors_or_sparse(result_dict.values())
106          # Check values.
107          tf_result = self.evaluate(result)
108          _compare_output_to_expected(self, result_dict, expected_values,
109                                      tf_result)
110
111      for k, f in kwargs["features"].items():
112        if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None:
113          self.assertEqual(tuple(out[k].get_shape().as_list()), f.shape)
114        elif isinstance(f, parsing_ops.VarLenFeature):
115          self.assertEqual(
116              tuple(out[k].indices.get_shape().as_list()), (None, 1))
117          self.assertEqual(tuple(out[k].values.get_shape().as_list()), (None,))
118          self.assertEqual(
119              tuple(out[k].dense_shape.get_shape().as_list()), (1,))
120
121  @test_util.run_deprecated_v1
122  def testEmptySerializedWithAllDefaults(self):
123    sparse_name = "st_a"
124    a_name = "a"
125    b_name = "b"
126    c_name = "c:has_a_tricky_name"
127    a_default = [0, 42, 0]
128    b_default = np.random.rand(3, 3).astype(bytes)
129    c_default = np.random.rand(2).astype(np.float32)
130
131    expected_st_a = (  # indices, values, shape
132        np.empty((0, 1), dtype=np.int64),  # indices
133        np.empty((0,), dtype=np.int64),  # sp_a is DT_INT64
134        np.array([0], dtype=np.int64))  # max_elems = 0
135
136    expected_output = {
137        sparse_name: expected_st_a,
138        a_name: np.array([a_default]),
139        b_name: np.array(b_default),
140        c_name: np.array(c_default),
141    }
142
143    self._test({
144        "serialized": ops.convert_to_tensor(""),
145        "features": {
146            sparse_name:
147                parsing_ops.VarLenFeature(dtypes.int64),
148            a_name:
149                parsing_ops.FixedLenFeature(
150                    (1, 3), dtypes.int64, default_value=a_default),
151            b_name:
152                parsing_ops.FixedLenFeature(
153                    (3, 3), dtypes.string, default_value=b_default),
154            c_name:
155                parsing_ops.FixedLenFeature(
156                    (2,), dtypes.float32, default_value=c_default),
157        }
158    }, expected_output)
159
160  def testEmptySerializedWithoutDefaultsShouldFail(self):
161    input_features = {
162        "st_a":
163            parsing_ops.VarLenFeature(dtypes.int64),
164        "a":
165            parsing_ops.FixedLenFeature(
166                (1, 3), dtypes.int64, default_value=[0, 42, 0]),
167        "b":
168            parsing_ops.FixedLenFeature(
169                (3, 3),
170                dtypes.string,
171                default_value=np.random.rand(3, 3).astype(bytes)),
172        # Feature "c" is missing a default, this gap will cause failure.
173        "c":
174            parsing_ops.FixedLenFeature(
175                (2,), dtype=dtypes.float32),
176    }
177
178    # Edge case where the key is there but the feature value is empty
179    original = example(features=features({"c": feature()}))
180    self._test(
181        {
182            "serialized": original.SerializeToString(),
183            "features": input_features,
184        },
185        expected_err=(errors_impl.OpError,
186                      "Feature: c \\(data type: float\\) is required"))
187
188    # Standard case of missing key and value.
189    self._test(
190        {
191            "serialized": "",
192            "features": input_features,
193        },
194        expected_err=(errors_impl.OpError,
195                      "Feature: c \\(data type: float\\) is required"))
196
197  def testDenseNotMatchingShapeShouldFail(self):
198    original = example(features=features({
199        "a": float_feature([-1, -1]),
200    }))
201
202    serialized = original.SerializeToString()
203
204    self._test(
205        {
206            "serialized": ops.convert_to_tensor(serialized),
207            "features": {
208                "a": parsing_ops.FixedLenFeature((1, 3), dtypes.float32)
209            }
210        },
211        # TODO(mrry): Consider matching the `io.parse_example()` error message.
212        expected_err=(errors_impl.OpError, "Key: a."))
213
214  def testDenseDefaultNoShapeShouldFail(self):
215    original = example(features=features({
216        "a": float_feature([1, 1, 3]),
217    }))
218
219    serialized = original.SerializeToString()
220
221    self._test(
222        {
223            "serialized": ops.convert_to_tensor(serialized),
224            "features": {
225                "a": parsing_ops.FixedLenFeature(None, dtypes.float32)
226            }
227        },
228        expected_err=(ValueError, "Missing shape for feature a"))
229
230  @test_util.run_deprecated_v1
231  def testSerializedContainingSparse(self):
232    original = [
233        example(features=features({
234            "st_c": float_feature([3, 4])
235        })),
236        example(features=features({
237            "st_c": float_feature([]),  # empty float list
238        })),
239        example(features=features({
240            "st_d": feature(),  # feature with nothing in it
241        })),
242        example(features=features({
243            "st_c": float_feature([1, 2, -1]),
244            "st_d": bytes_feature([b"hi"])
245        }))
246    ]
247
248    expected_outputs = [{
249        "st_c": (np.array([[0], [1]], dtype=np.int64),
250                 np.array([3.0, 4.0], dtype=np.float32),
251                 np.array([2], dtype=np.int64)),
252        "st_d":
253            empty_sparse(bytes)
254    }, {
255        "st_c": empty_sparse(np.float32),
256        "st_d": empty_sparse(bytes)
257    }, {
258        "st_c": empty_sparse(np.float32),
259        "st_d": empty_sparse(bytes)
260    }, {
261        "st_c": (np.array([[0], [1], [2]], dtype=np.int64),
262                 np.array([1.0, 2.0, -1.0], dtype=np.float32),
263                 np.array([3], dtype=np.int64)),
264        "st_d": (np.array([[0]], dtype=np.int64), np.array(["hi"], dtype=bytes),
265                 np.array([1], dtype=np.int64))
266    }]
267
268    for proto, expected_output in zip(original, expected_outputs):
269      self._test({
270          "serialized": ops.convert_to_tensor(proto.SerializeToString()),
271          "features": {
272              "st_c": parsing_ops.VarLenFeature(dtypes.float32),
273              "st_d": parsing_ops.VarLenFeature(dtypes.string)
274          },
275      }, expected_output)
276
277  def testSerializedContainingSparseFeature(self):
278    original = [
279        example(features=features({
280            "val": float_feature([3, 4]),
281            "idx": int64_feature([5, 10])
282        })),
283        example(features=features({
284            "val": float_feature([]),  # empty float list
285            "idx": int64_feature([])
286        })),
287        example(features=features({
288            "val": feature(),  # feature with nothing in it
289            # missing idx feature
290        })),
291        example(features=features({
292            "val": float_feature([1, 2, -1]),
293            "idx":
294                int64_feature([0, 9, 3])  # unsorted
295        }))
296    ]
297
298    expected_outputs = [{
299        "sp": (np.array([[5], [10]], dtype=np.int64),
300               np.array([3.0, 4.0], dtype=np.float32),
301               np.array([13], dtype=np.int64))
302    }, {
303        "sp": empty_sparse(np.float32, shape=[13])
304    }, {
305        "sp": empty_sparse(np.float32, shape=[13])
306    }, {
307        "sp": (np.array([[0], [3], [9]], dtype=np.int64),
308               np.array([1.0, -1.0, 2.0], dtype=np.float32),
309               np.array([13], dtype=np.int64))
310    }]
311
312    for proto, expected_output in zip(original, expected_outputs):
313      self._test({
314          "serialized": ops.convert_to_tensor(proto.SerializeToString()),
315          "features": {
316              "sp":
317                  parsing_ops.SparseFeature(["idx"], "val", dtypes.float32,
318                                            [13])
319          }
320      }, expected_output)
321
322  def testSerializedContainingSparseFeatureReuse(self):
323    original = [
324        example(features=features({
325            "val1": float_feature([3, 4]),
326            "val2": float_feature([5, 6]),
327            "idx": int64_feature([5, 10])
328        })),
329        example(features=features({
330            "val1": float_feature([]),  # empty float list
331            "idx": int64_feature([])
332        })),
333    ]
334
335    expected_outputs = [{
336        "sp1": (np.array([[5], [10]], dtype=np.int64),
337                np.array([3.0, 4.0], dtype=np.float32),
338                np.array([13], dtype=np.int64)),
339        "sp2": (np.array([[5], [10]], dtype=np.int64),
340                np.array([5.0, 6.0], dtype=np.float32),
341                np.array([7], dtype=np.int64))
342    }, {
343        "sp1": empty_sparse(np.float32, shape=[13]),
344        "sp2": empty_sparse(np.float32, shape=[7])
345    }]
346
347    for proto, expected_output in zip(original, expected_outputs):
348      self._test({
349          "serialized": ops.convert_to_tensor(proto.SerializeToString()),
350          "features": {
351              "sp1":
352                  parsing_ops.SparseFeature("idx", "val1", dtypes.float32, 13),
353              "sp2":
354                  parsing_ops.SparseFeature(
355                      "idx",
356                      "val2",
357                      dtypes.float32,
358                      size=7,
359                      already_sorted=True)
360          }
361      }, expected_output)
362
363  def testSerializedContaining3DSparseFeature(self):
364    original = [
365        example(features=features({
366            "val": float_feature([3, 4]),
367            "idx0": int64_feature([5, 10]),
368            "idx1": int64_feature([0, 2]),
369        })),
370        example(features=features({
371            "val": float_feature([]),  # empty float list
372            "idx0": int64_feature([]),
373            "idx1": int64_feature([]),
374        })),
375        example(features=features({
376            "val": feature(),  # feature with nothing in it
377            # missing idx feature
378        })),
379        example(features=features({
380            "val": float_feature([1, 2, -1]),
381            "idx0": int64_feature([0, 9, 3]),  # unsorted
382            "idx1": int64_feature([1, 0, 2]),
383        }))
384    ]
385
386    expected_outputs = [{
387        "sp": (np.array([[5, 0], [10, 2]], dtype=np.int64),
388               np.array([3.0, 4.0], dtype=np.float32),
389               np.array([13, 3], dtype=np.int64))
390    }, {
391        "sp": empty_sparse(np.float32, shape=[13, 3])
392    }, {
393        "sp": empty_sparse(np.float32, shape=[13, 3])
394    }, {
395        "sp": (np.array([[0, 1], [3, 2], [9, 0]], dtype=np.int64),
396               np.array([1.0, -1.0, 2.0], dtype=np.float32),
397               np.array([13, 3], dtype=np.int64))
398    }]
399
400    for proto, expected_output in zip(original, expected_outputs):
401      self._test({
402          "serialized": ops.convert_to_tensor(proto.SerializeToString()),
403          "features": {
404              "sp":
405                  parsing_ops.SparseFeature(["idx0", "idx1"], "val",
406                                            dtypes.float32, [13, 3])
407          }
408      }, expected_output)
409
410  def testSerializedContainingDense(self):
411    aname = "a"
412    bname = "b*has+a:tricky_name"
413    original = [
414        example(features=features({
415            aname: float_feature([1, 1]),
416            bname: bytes_feature([b"b0_str"]),
417        })), example(features=features({
418            aname: float_feature([-1, -1]),
419            bname: bytes_feature([b""]),
420        }))
421    ]
422
423    # pylint: disable=too-many-function-args
424    expected_outputs = [
425        {
426            aname:
427                np.array([1, 1], dtype=np.float32).reshape(1, 2, 1),
428            bname:
429                np.array(["b0_str"], dtype=bytes).reshape(
430                    1, 1, 1, 1)
431        },
432        {
433            aname:
434                np.array([-1, -1], dtype=np.float32).reshape(1, 2, 1),
435            bname:
436                np.array([""], dtype=bytes).reshape(
437                    1, 1, 1, 1)
438        }
439    ]
440    # pylint: enable=too-many-function-args
441
442    for proto, expected_output in zip(original, expected_outputs):
443      # No defaults, values required
444      self._test({
445          "serialized": ops.convert_to_tensor(proto.SerializeToString()),
446          "features": {
447              aname:
448                  parsing_ops.FixedLenFeature((1, 2, 1), dtype=dtypes.float32),
449              bname:
450                  parsing_ops.FixedLenFeature(
451                      (1, 1, 1, 1), dtype=dtypes.string),
452          }
453      }, expected_output)
454
455  # This test is identical as the previous one except
456  # for the creation of 'serialized'.
457  def testSerializedContainingDenseWithConcat(self):
458    aname = "a"
459    bname = "b*has+a:tricky_name"
460    # TODO(lew): Feature appearing twice should be an error in future.
461    original = [
462        (example(features=features({
463            aname: float_feature([10, 10]),
464        })), example(features=features({
465            aname: float_feature([1, 1]),
466            bname: bytes_feature([b"b0_str"]),
467        }))),
468        (
469            example(features=features({
470                bname: bytes_feature([b"b100"]),
471            })),
472            example(features=features({
473                aname: float_feature([-1, -1]),
474                bname: bytes_feature([b"b1"]),
475            })),),
476    ]
477
478    # pylint: disable=too-many-function-args
479    expected_outputs = [
480        {
481            aname:
482                np.array([1, 1], dtype=np.float32).reshape(1, 2, 1),
483            bname:
484                np.array(["b0_str"], dtype=bytes).reshape(
485                    1, 1, 1, 1)
486        },
487        {
488            aname:
489                np.array([-1, -1], dtype=np.float32).reshape(1, 2, 1),
490            bname:
491                np.array(["b1"], dtype=bytes).reshape(
492                    1, 1, 1, 1)
493        }
494    ]
495    # pylint: enable=too-many-function-args
496
497    for (m, n), expected_output in zip(original, expected_outputs):
498      # No defaults, values required
499      self._test({
500          "serialized":
501              ops.convert_to_tensor(
502                  m.SerializeToString() + n.SerializeToString()),
503          "features": {
504              aname:
505                  parsing_ops.FixedLenFeature((1, 2, 1), dtype=dtypes.float32),
506              bname:
507                  parsing_ops.FixedLenFeature(
508                      (1, 1, 1, 1), dtype=dtypes.string),
509          }
510      }, expected_output)
511
512  def testSerializedContainingDenseScalar(self):
513    original = [
514        example(features=features({
515            "a": float_feature([1]),
516        })), example(features=features({}))
517    ]
518
519    expected_outputs = [{
520        "a": np.array([1], dtype=np.float32)
521    }, {
522        "a": np.array([-1], dtype=np.float32)
523    }]
524
525    for proto, expected_output in zip(original, expected_outputs):
526      self._test({
527          "serialized": ops.convert_to_tensor(proto.SerializeToString()),
528          "features": {
529              "a":
530                  parsing_ops.FixedLenFeature(
531                      (1,), dtype=dtypes.float32, default_value=-1),
532          }
533      }, expected_output)
534
535  def testSerializedContainingDenseWithDefaults(self):
536    original = [
537        example(features=features({
538            "a": float_feature([1, 1]),
539        })),
540        example(features=features({
541            "b": bytes_feature([b"b1"]),
542        })),
543        example(features=features({
544            "b": feature()
545        })),
546    ]
547
548    # pylint: disable=too-many-function-args
549    expected_outputs = [
550        {
551            "a":
552                np.array([1, 1], dtype=np.float32).reshape(1, 2, 1),
553            "b":
554                np.array("tmp_str", dtype=bytes).reshape(
555                    1, 1, 1, 1)
556        },
557        {
558            "a":
559                np.array([3, -3], dtype=np.float32).reshape(1, 2, 1),
560            "b":
561                np.array("b1", dtype=bytes).reshape(
562                    1, 1, 1, 1)
563        },
564        {
565            "a":
566                np.array([3, -3], dtype=np.float32).reshape(1, 2, 1),
567            "b":
568                np.array("tmp_str", dtype=bytes).reshape(
569                    1, 1, 1, 1)
570        }
571    ]
572    # pylint: enable=too-many-function-args
573
574    for proto, expected_output in zip(original, expected_outputs):
575      self._test({
576          "serialized": ops.convert_to_tensor(proto.SerializeToString()),
577          "features": {
578              "a":
579                  parsing_ops.FixedLenFeature(
580                      (1, 2, 1),
581                      dtype=dtypes.float32,
582                      default_value=[3.0, -3.0]),
583              "b":
584                  parsing_ops.FixedLenFeature(
585                      (1, 1, 1, 1),
586                      dtype=dtypes.string,
587                      default_value="tmp_str"),
588          }
589      }, expected_output)
590
591  @test_util.run_deprecated_v1
592  def testSerializedContainingSparseAndSparseFeatureAndDenseWithNoDefault(self):
593    original = [
594        example(features=features({
595            "c": float_feature([3, 4]),
596            "val": bytes_feature([b"a", b"b"]),
597            "idx": int64_feature([0, 3])
598        })), example(features=features({
599            "c": float_feature([1, 2]),
600            "val": bytes_feature([b"c"]),
601            "idx": int64_feature([7])
602        }))
603    ]
604
605    a_default = np.array([[1, 2, 3]], dtype=np.int64)
606    b_default = np.random.rand(3, 3).astype(bytes)
607
608    expected_st_a = empty_sparse(np.int64)
609
610    expected_outputs = [{
611        "st_a":
612            expected_st_a,
613        "sp": (np.array([[0], [3]], dtype=np.int64),
614               np.array(["a", "b"], dtype=bytes), np.array(
615                   [13], dtype=np.int64)),
616        "a":
617            a_default,
618        "b":
619            b_default,
620        "c":
621            np.array([3, 4], dtype=np.float32)
622    }, {
623        "st_a":
624            expected_st_a,
625        "sp": (np.array([[7]], dtype=np.int64), np.array(["c"], dtype=bytes),
626               np.array([13], dtype=np.int64)),
627        "a":
628            a_default,
629        "b":
630            b_default,
631        "c":
632            np.array([1, 2], dtype=np.float32)
633    }]
634
635    for proto, expected_output in zip(original, expected_outputs):
636      self._test(
637          {
638              "serialized": ops.convert_to_tensor(proto.SerializeToString()),
639              "features": {
640                  "st_a":
641                      parsing_ops.VarLenFeature(dtypes.int64),
642                  "sp":
643                      parsing_ops.SparseFeature("idx", "val", dtypes.string, 13
644                                               ),
645                  "a":
646                      parsing_ops.FixedLenFeature(
647                          (1, 3), dtypes.int64, default_value=a_default),
648                  "b":
649                      parsing_ops.FixedLenFeature(
650                          (3, 3), dtypes.string, default_value=b_default),
651                  # Feature "c" must be provided, since it has no default_value.
652                  "c":
653                      parsing_ops.FixedLenFeature((2,), dtypes.float32),
654              }
655          },
656          expected_output)
657
658  @test_util.run_deprecated_v1
659  def testSerializedContainingSparseAndSparseFeatureWithReuse(self):
660    original = [
661        example(features=features({
662            "val": bytes_feature([b"a", b"b"]),
663            "idx": int64_feature([0, 3])
664        })), example(features=features({
665            "val": bytes_feature([b"c", b"d"]),
666            "idx": int64_feature([7, 1])
667        }))
668    ]
669
670    expected_outputs = [{
671        "idx": (np.array([[0], [1]], dtype=np.int64),
672                np.array([0, 3], dtype=np.int64), np.array([2],
673                                                           dtype=np.int64)),
674        "sp": (np.array([[0], [3]], dtype=np.int64),
675               np.array(["a", "b"], dtype=bytes), np.array(
676                   [13], dtype=np.int64))
677    },
678                        {
679                            "idx": (np.array([[0], [1]], dtype=np.int64),
680                                    np.array([7, 1], dtype=np.int64),
681                                    np.array([2], dtype=np.int64)),
682                            "sp": (np.array([[1], [7]], dtype=np.int64),
683                                   np.array(["d", "c"], dtype=bytes),
684                                   np.array([13], dtype=np.int64))
685                        }]
686
687    for proto, expected_output in zip(original, expected_outputs):
688      self._test({
689          "serialized": ops.convert_to_tensor(proto.SerializeToString()),
690          "features": {
691              "idx":
692                  parsing_ops.VarLenFeature(dtypes.int64),
693              "sp":
694                  parsing_ops.SparseFeature(["idx"], "val", dtypes.string, [13]
695                                           ),
696          }
697      }, expected_output)
698
699  @test_util.run_deprecated_v1
700  def testSerializedContainingVarLenDense(self):
701    aname = "a"
702    bname = "b"
703    cname = "c"
704    dname = "d"
705    original = [
706        example(features=features({
707            cname: int64_feature([2]),
708        })),
709        example(features=features({
710            aname: float_feature([1, 1]),
711            bname: bytes_feature([b"b0_str", b"b1_str"]),
712        })),
713        example(features=features({
714            aname: float_feature([-1, -1, 2, 2]),
715            bname: bytes_feature([b"b1"]),
716        })),
717        example(features=features({
718            aname: float_feature([]),
719            cname: int64_feature([3]),
720        })),
721    ]
722
723    # pylint: disable=too-many-function-args
724    expected_outputs = [
725        {
726            aname: np.empty(shape=(0, 2, 1), dtype=np.int64),
727            bname: np.empty(shape=(0, 1, 1, 1), dtype=bytes),
728            cname: np.array([2], dtype=np.int64),
729            dname: np.empty(shape=(0,), dtype=bytes)
730        },
731        {
732            aname:
733                np.array([[[1], [1]]], dtype=np.float32),
734            bname:
735                np.array(["b0_str", "b1_str"], dtype=bytes).reshape(2, 1, 1, 1),
736            cname:
737                np.empty(shape=(0,), dtype=np.int64),
738            dname:
739                np.empty(shape=(0,), dtype=bytes)
740        },
741        {
742            aname: np.array([[[-1], [-1]], [[2], [2]]], dtype=np.float32),
743            bname: np.array(["b1"], dtype=bytes).reshape(1, 1, 1, 1),
744            cname: np.empty(shape=(0,), dtype=np.int64),
745            dname: np.empty(shape=(0,), dtype=bytes)
746        },
747        {
748            aname: np.empty(shape=(0, 2, 1), dtype=np.int64),
749            bname: np.empty(shape=(0, 1, 1, 1), dtype=bytes),
750            cname: np.array([3], dtype=np.int64),
751            dname: np.empty(shape=(0,), dtype=bytes)
752        },
753    ]
754    # pylint: enable=too-many-function-args
755
756    for proto, expected_output in zip(original, expected_outputs):
757      self._test({
758          "serialized": ops.convert_to_tensor(proto.SerializeToString()),
759          "features": {
760              aname:
761                  parsing_ops.FixedLenSequenceFeature(
762                      (2, 1), dtype=dtypes.float32, allow_missing=True),
763              bname:
764                  parsing_ops.FixedLenSequenceFeature(
765                      (1, 1, 1), dtype=dtypes.string, allow_missing=True),
766              cname:
767                  parsing_ops.FixedLenSequenceFeature(
768                      shape=[], dtype=dtypes.int64, allow_missing=True),
769              dname:
770                  parsing_ops.FixedLenSequenceFeature(
771                      shape=[], dtype=dtypes.string, allow_missing=True),
772          }
773      }, expected_output)
774
775    # Test with padding values.
776    # NOTE(mrry): Since we parse a single example at a time, the fixed-length
777    # sequences will not be padded, and the padding value will be ignored.
778    for proto, expected_output in zip(original, expected_outputs):
779      self._test({
780          "serialized": ops.convert_to_tensor(proto.SerializeToString()),
781          "features": {
782              aname:
783                  parsing_ops.FixedLenSequenceFeature(
784                      (2, 1), dtype=dtypes.float32, allow_missing=True),
785              bname:
786                  parsing_ops.FixedLenSequenceFeature(
787                      (1, 1, 1), dtype=dtypes.string, allow_missing=True),
788              cname:
789                  parsing_ops.FixedLenSequenceFeature(
790                      shape=[], dtype=dtypes.int64, allow_missing=True),
791              dname:
792                  parsing_ops.FixedLenSequenceFeature(
793                      shape=[], dtype=dtypes.string, allow_missing=True),
794          }
795      }, expected_output)
796
797    # Change number of required values so the inputs are not a
798    # multiple of this size.
799    self._test(
800        {
801            "serialized":
802                ops.convert_to_tensor(original[2].SerializeToString()),
803            "features": {
804                aname:
805                    parsing_ops.FixedLenSequenceFeature(
806                        (2, 1), dtype=dtypes.float32, allow_missing=True),
807                bname:
808                    parsing_ops.FixedLenSequenceFeature(
809                        (2, 1, 1), dtype=dtypes.string, allow_missing=True),
810            }
811        },
812        # TODO(mrry): Consider matching the `io.parse_example()` error message.
813        expected_err=(errors_impl.OpError, "Key: b."))
814
815    self._test(
816        {
817            "serialized": ops.convert_to_tensor(""),
818            "features": {
819                aname:
820                    parsing_ops.FixedLenSequenceFeature(
821                        (2, 1),
822                        dtype=dtypes.float32,
823                        allow_missing=True,
824                        default_value=[]),
825                bname:
826                    parsing_ops.FixedLenSequenceFeature(
827                        (2, 1, 1), dtype=dtypes.string, allow_missing=True),
828            }
829        },
830        expected_err=(ValueError,
831                      "Cannot reshape a tensor with 0 elements to shape"))
832
833    self._test(
834        {
835            "serialized": ops.convert_to_tensor(""),
836            "features": {
837                aname:
838                    parsing_ops.FixedLenFeature(
839                        (None, 2, 1), dtype=dtypes.float32),
840                bname:
841                    parsing_ops.FixedLenSequenceFeature(
842                        (2, 1, 1), dtype=dtypes.string, allow_missing=True),
843            }
844        },
845        expected_err=(ValueError,
846                      "First dimension of shape for feature a unknown. "
847                      "Consider using FixedLenSequenceFeature."))
848
849    self._test(
850        {
851            "serialized": ops.convert_to_tensor(""),
852            "features": {
853                cname:
854                    parsing_ops.FixedLenFeature(
855                        (1, None), dtype=dtypes.int64, default_value=[[1]]),
856            }
857        },
858        expected_err=(ValueError,
859                      "All dimensions of shape for feature c need to be known "
860                      r"but received \(1, None\)."))
861
862    self._test(
863        {
864            "serialized": ops.convert_to_tensor(""),
865            "features": {
866                aname:
867                    parsing_ops.FixedLenSequenceFeature(
868                        (2, 1), dtype=dtypes.float32, allow_missing=True),
869                bname:
870                    parsing_ops.FixedLenSequenceFeature(
871                        (1, 1, 1), dtype=dtypes.string, allow_missing=True),
872                cname:
873                    parsing_ops.FixedLenSequenceFeature(
874                        shape=[], dtype=dtypes.int64, allow_missing=False),
875                dname:
876                    parsing_ops.FixedLenSequenceFeature(
877                        shape=[], dtype=dtypes.string, allow_missing=True),
878            }
879        },
880        expected_err=(ValueError,
881                      "Unsupported: FixedLenSequenceFeature requires "
882                      "allow_missing to be True."))
883
884
885class ParseSingleExampleTest(test.TestCase):
886
887  def _test(self, kwargs, expected_values=None, expected_err=None):
888    with self.cached_session() as sess:
889      if expected_err:
890        with self.assertRaisesWithPredicateMatch(expected_err[0],
891                                                 expected_err[1]):
892          out = parsing_ops.parse_single_example(**kwargs)
893          sess.run(flatten_values_tensors_or_sparse(out.values()))
894        return
895      else:
896        # Returns dict w/ Tensors and SparseTensors.
897        out = parsing_ops.parse_single_example(**kwargs)
898        # Check values.
899        tf_result = sess.run(flatten_values_tensors_or_sparse(out.values()))
900        _compare_output_to_expected(self, out, expected_values, tf_result)
901
902      # Check shapes.
903      for k, f in kwargs["features"].items():
904        if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None:
905          self.assertEqual(tuple(out[k].get_shape()),
906                           tensor_shape.as_shape(f.shape))
907        elif isinstance(f, parsing_ops.VarLenFeature):
908          self.assertEqual(
909              tuple(out[k].indices.get_shape().as_list()), (None, 1))
910          self.assertEqual(tuple(out[k].values.get_shape().as_list()), (None,))
911          self.assertEqual(
912              tuple(out[k].dense_shape.get_shape().as_list()), (1,))
913
914  @test_util.run_deprecated_v1
915  def testSingleExampleWithSparseAndSparseFeatureAndDense(self):
916    original = example(features=features({
917        "c": float_feature([3, 4]),
918        "d": float_feature([0.0, 1.0]),
919        "val": bytes_feature([b"a", b"b"]),
920        "idx": int64_feature([0, 3]),
921        "st_a": float_feature([3.0, 4.0])
922    }))
923
924    serialized = original.SerializeToString()
925
926    expected_st_a = (
927        np.array(
928            [[0], [1]], dtype=np.int64),  # indices
929        np.array(
930            [3.0, 4.0], dtype=np.float32),  # values
931        np.array(
932            [2], dtype=np.int64))  # shape: max_values = 2
933
934    expected_sp = (  # indices, values, shape
935        np.array(
936            [[0], [3]], dtype=np.int64), np.array(
937                ["a", "b"], dtype="|S"), np.array(
938                    [13], dtype=np.int64))  # max_values = 13
939
940    a_default = [1, 2, 3]
941    b_default = np.random.rand(3, 3).astype(bytes)
942    expected_output = {
943        "st_a": expected_st_a,
944        "sp": expected_sp,
945        "a": [a_default],
946        "b": b_default,
947        "c": np.array([3, 4], dtype=np.float32),
948        "d": np.array([0.0, 1.0], dtype=np.float32),
949    }
950
951    self._test(
952        {
953            "serialized":
954                ops.convert_to_tensor(serialized),
955            "features": {
956                "st_a":
957                    parsing_ops.VarLenFeature(dtypes.float32),
958                "sp":
959                    parsing_ops.SparseFeature(
960                        ["idx"], "val", dtypes.string, [13]),
961                "a":
962                    parsing_ops.FixedLenFeature(
963                        (1, 3), dtypes.int64, default_value=a_default),
964                "b":
965                    parsing_ops.FixedLenFeature(
966                        (3, 3), dtypes.string, default_value=b_default),
967                # Feature "c" must be provided, since it has no default_value.
968                "c":
969                    parsing_ops.FixedLenFeature(2, dtypes.float32),
970                "d":
971                    parsing_ops.FixedLenSequenceFeature([],
972                                                        dtypes.float32,
973                                                        allow_missing=True)
974            }
975        },
976        expected_output)
977
978  def testExampleLongerThanSpec(self):
979    serialized = example(
980        features=features({
981            "a": bytes_feature([b"a", b"b"]),
982        })).SerializeToString()
983    self._test(
984        {
985            "serialized": ops.convert_to_tensor(serialized),
986            "features": {
987                "a": parsing_ops.FixedLenFeature(1, dtypes.string)
988            }
989        },
990        expected_err=(errors_impl.OpError, "Can't parse serialized Example"))
991
992
993if __name__ == "__main__":
994  test.main()
995