# Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =================================================================== """Tests for python.tpu.feature_column.""" import copy from absl.testing import parameterized from keras.feature_column import dense_features as df_lib from keras.feature_column import sequence_feature_column as sfc_lib from tensorflow.python.client import session from tensorflow.python.feature_column import feature_column_lib as fc_lib from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util from tensorflow.python.ops import init_ops from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables as variables_lib from tensorflow.python.platform import test from tensorflow.python.tpu import feature_column_v2 as tpu_fc from tensorflow.python.tpu import tpu from tensorflow.python.tpu import tpu_function def _initialized_session(): sess = session.Session() sess.run(variables_lib.global_variables_initializer()) sess.run(lookup_ops.tables_initializer()) return sess class _TestStateManager(fc_lib.StateManager): def __init__(self, trainable=True): self._all_variables = {} self._trainable = trainable def create_variable(self, feature_column, name, shape, dtype=None, trainable=True, use_resource=True, initializer=None): if feature_column not in self._all_variables: self._all_variables[feature_column] = {} var_dict = self._all_variables[feature_column] if name in var_dict: return var_dict[name] else: var = variable_scope.get_variable( name=name, shape=shape, dtype=dtype, trainable=self._trainable and trainable, use_resource=use_resource, initializer=initializer) var_dict[name] = var return var def get_variable(self, feature_column, name): return self._all_variables[feature_column][name] class EmbeddingColumnTestV2(test.TestCase, parameterized.TestCase): def test_defaults(self): categorical_column = fc_lib.categorical_column_with_identity( key='aaa', num_buckets=3) embedding_dimension = 2 embedding_column = tpu_fc.embedding_column_v2( categorical_column, dimension=embedding_dimension) # Can't test default initializer as it's a random function. self.assertIs(categorical_column, embedding_column.categorical_column) self.assertEqual(embedding_dimension, embedding_column.dimension) self.assertEqual('mean', embedding_column.combiner) self.assertEqual('aaa_embedding', embedding_column.name) self.assertEqual((embedding_dimension,), embedding_column.variable_shape) def test_all_constructor_args(self): categorical_column = fc_lib.categorical_column_with_identity( key='aaa', num_buckets=3) embedding_dimension = 2 embedding_column = tpu_fc.embedding_column_v2( categorical_column, dimension=embedding_dimension, combiner='my_combiner', initializer=lambda: 'my_initializer') self.assertIs(categorical_column, embedding_column.categorical_column) self.assertEqual(embedding_dimension, embedding_column.dimension) self.assertEqual('my_combiner', embedding_column.combiner) self.assertEqual('my_initializer', embedding_column.initializer()) self.assertEqual('aaa_embedding', embedding_column.name) self.assertEqual((embedding_dimension,), embedding_column.variable_shape) self.assertEqual({ 'aaa': parsing_ops.VarLenFeature(dtypes.int64) }, embedding_column._parse_example_spec) @parameterized.named_parameters( { 'testcase_name': 'use_safe_embedding_lookup', 'use_safe_embedding_lookup': True, }, { 'testcase_name': 'dont_use_safe_embedding_lookup', 'use_safe_embedding_lookup': False, }) @test_util.deprecated_graph_mode_only def test_feature_layer_cpu(self, use_safe_embedding_lookup): # Inputs. vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] # example 2, ids [] # example 3, ids [1] indices=((0, 0), (1, 0), (1, 1), (3, 0)), values=(2, 0, 1, 1), dense_shape=(4, 2)) # Embedding variable. embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.) # id 2 ) def _initializer(shape, dtype, partition_info=None): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertEqual(dtypes.float32, dtype) self.assertIsNone(partition_info) return embedding_values # Expected lookup result, using combiner='mean'. expected_lookups = ( # example 0, ids [2], embedding = [7, 11] (7., 11.), # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] (2., 3.5), # example 2, ids [], embedding = [0, 0] (0., 0.), # example 3, ids [1], embedding = [3, 5] (3., 5.), ) expected_lookups_sequence = ( # example 0, ids [2], embedding = [[7, 11], [0, 0]] ((7., 11.), (0., 0.),), # example 1, ids [0, 1], embedding = [[1, 2], [3. 5]] ((1., 2.), (3., 5.),), # example 2, ids [], embedding = [0, 0] ((0., 0.), (0., 0.),), # example 3, ids [1], embedding = [3, 5] ((3., 5.), (0., 0.),), ) # Build columns. categorical_column = fc_lib.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) sequence_categorical_column = ( fc_lib.sequence_categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size)) embedding_column = tpu_fc.embedding_column_v2( categorical_column, dimension=embedding_dimension, initializer=_initializer, use_safe_embedding_lookup=use_safe_embedding_lookup) sequence_embedding_column = tpu_fc.embedding_column_v2( sequence_categorical_column, dimension=embedding_dimension, initializer=_initializer, max_sequence_length=2, use_safe_embedding_lookup=use_safe_embedding_lookup) # Provide sparse input and get dense result. features = {'aaa': sparse_input, 'bbb': sparse_input} dense_features = df_lib.DenseFeatures([embedding_column]) sequence_features = sfc_lib.SequenceFeatures([sequence_embedding_column]) embedding_lookup = dense_features(features) sequence_embedding_lookup = sequence_features(features) # Assert expected embedding variable and lookups. global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) self.assertItemsEqual( ('dense_features/aaa_embedding/embedding_weights:0', 'sequence_features/bbb_embedding/embedding_weights:0',), tuple([v.name for v in global_vars])) with _initialized_session(): self.assertAllEqual(embedding_values, global_vars[0]) self.assertAllEqual(expected_lookups, embedding_lookup) self.assertAllEqual(expected_lookups_sequence, sequence_embedding_lookup[0].eval()) # The graph will still have SparseFillEmptyRows due to sequence being # a Rank3 embedding lookup. if use_safe_embedding_lookup: self.assertEqual(2, [ x.type for x in ops.get_default_graph().get_operations() ].count('SparseFillEmptyRows')) else: self.assertEqual(1, [ x.type for x in ops.get_default_graph().get_operations() ].count('SparseFillEmptyRows')) def test_deepcopy(self): categorical_column = fc_lib.categorical_column_with_identity( key='aaa', num_buckets=3) embedding_column = tpu_fc.embedding_column_v2( categorical_column, dimension=2) embedding_column_copy = copy.deepcopy(embedding_column) self.assertEqual(embedding_column.dimension, embedding_column_copy.dimension) self.assertEqual(embedding_column._max_sequence_length, embedding_column_copy._max_sequence_length) def test_with_scope_validation(self): categorical_column = fc_lib.categorical_column_with_identity( key='aaa', num_buckets=3) embedding_dimension = 2 initializer = init_ops.truncated_normal_initializer(mean=0.0, stddev=.5) embedding_column = tpu_fc._TPUEmbeddingColumnV2( categorical_column=categorical_column, dimension=embedding_dimension, combiner='mean', initializer=initializer, max_sequence_length=0, learning_rate_fn=None, use_safe_embedding_lookup=True, bypass_scope_validation=False) self.assertIs(categorical_column, embedding_column.categorical_column) self.assertEqual(embedding_dimension, embedding_column.dimension) state_manager = _TestStateManager() with tpu_function.tpu_shard_context(1): with variable_scope.variable_scope('tower1/scope1'): embedding_column.create_state(state_manager) with variable_scope.variable_scope('tower2/scope2'): # With default scope validation, the same column cannot be used in a new # variable scope. with self.assertRaisesRegex(ValueError, 'the variable scope name is different'): embedding_column.create_state(state_manager) def test_bypass_scope_validation(self): categorical_column = fc_lib.categorical_column_with_identity( key='aaa', num_buckets=3) embedding_dimension = 2 initializer = init_ops.truncated_normal_initializer(mean=0.0, stddev=.5) embedding_column = tpu_fc._TPUEmbeddingColumnV2( categorical_column=categorical_column, dimension=embedding_dimension, combiner='mean', initializer=initializer, max_sequence_length=0, learning_rate_fn=None, use_safe_embedding_lookup=True, bypass_scope_validation=True) self.assertIs(categorical_column, embedding_column.categorical_column) self.assertEqual(embedding_dimension, embedding_column.dimension) state_manager = _TestStateManager() with tpu_function.tpu_shard_context(1): with variable_scope.variable_scope('tower1/scope1'): embedding_column.create_state(state_manager) with variable_scope.variable_scope('tower2/scope2'): embedding_column.create_state(state_manager) def test_deepcopy_with_bypass_scope_validation(self): categorical_column = fc_lib.categorical_column_with_identity( key='aaa', num_buckets=3) embedding_dimension = 2 initializer = init_ops.truncated_normal_initializer(mean=0.0, stddev=.5) embedding_column = tpu_fc._TPUEmbeddingColumnV2( categorical_column=categorical_column, dimension=embedding_dimension, combiner='mean', initializer=initializer, max_sequence_length=0, use_safe_embedding_lookup=False, bypass_scope_validation=True) embedding_column_copy = copy.deepcopy(embedding_column) self.assertEqual(embedding_dimension, embedding_column_copy.dimension) self.assertEqual(embedding_column._max_sequence_length, embedding_column_copy._max_sequence_length) self.assertTrue(embedding_column_copy._bypass_scope_validation) self.assertFalse(embedding_column_copy.use_safe_embedding_lookup) class SharedEmbeddingColumnTestV2(test.TestCase, parameterized.TestCase): @test_util.deprecated_graph_mode_only def test_defaults(self): vocabulary_size = 3 categorical_column_a = fc_lib.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) categorical_column_b = fc_lib.categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size) embedding_dimension = 2 embedding_column_b, embedding_column_a = tpu_fc.shared_embedding_columns_v2( [categorical_column_b, categorical_column_a], dimension=embedding_dimension) self.assertIs(categorical_column_a, embedding_column_a.categorical_column) self.assertIs(categorical_column_b, embedding_column_b.categorical_column) self.assertEqual((vocabulary_size, embedding_dimension), embedding_column_a.get_embedding_table_size()) self.assertEqual((vocabulary_size, embedding_dimension), embedding_column_a.get_embedding_table_size()) self.assertEqual('mean', embedding_column_a.combiner) self.assertEqual('mean', embedding_column_b.combiner) self.assertIsNotNone(embedding_column_a.get_initializer()) self.assertIsNotNone(embedding_column_b.get_initializer()) self.assertEqual('aaa_bbb_shared_embedding', embedding_column_a.get_embedding_var_name()) self.assertEqual('aaa_bbb_shared_embedding', embedding_column_b.get_embedding_var_name()) self.assertEqual('aaa_shared_embedding', embedding_column_a.name) self.assertEqual('bbb_shared_embedding', embedding_column_b.name) self.assertEqual((embedding_dimension,), embedding_column_a.variable_shape) self.assertEqual((embedding_dimension,), embedding_column_b.variable_shape) @test_util.deprecated_graph_mode_only def test_all_constructor_args(self): vocabulary_size = 3 categorical_column_a = fc_lib.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) categorical_column_b = fc_lib.categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size) embedding_dimension = 2 embedding_column_a, embedding_column_b = tpu_fc.shared_embedding_columns_v2( [categorical_column_a, categorical_column_b], dimension=embedding_dimension, combiner='my_combiner', initializer=lambda: 'my_initializer', shared_embedding_collection_name='var_scope_name') self.assertIs(categorical_column_a, embedding_column_a.categorical_column) self.assertIs(categorical_column_b, embedding_column_b.categorical_column) self.assertEqual((vocabulary_size, embedding_dimension), embedding_column_a.get_embedding_table_size()) self.assertEqual((vocabulary_size, embedding_dimension), embedding_column_a.get_embedding_table_size()) self.assertEqual('my_combiner', embedding_column_a.combiner) self.assertEqual('my_combiner', embedding_column_b.combiner) self.assertEqual('my_initializer', embedding_column_a.get_initializer()()) self.assertEqual('my_initializer', embedding_column_b.get_initializer()()) self.assertEqual('var_scope_name', embedding_column_a.get_embedding_var_name()) self.assertEqual('var_scope_name', embedding_column_b.get_embedding_var_name()) self.assertEqual('aaa_shared_embedding', embedding_column_a.name) self.assertEqual('bbb_shared_embedding', embedding_column_b.name) self.assertEqual((embedding_dimension,), embedding_column_a.variable_shape) self.assertEqual((embedding_dimension,), embedding_column_b.variable_shape) @parameterized.named_parameters( { 'testcase_name': 'use_safe_embedding_lookup', 'use_safe_embedding_lookup': True }, { 'testcase_name': 'dont_use_safe_embedding_lookup', 'use_safe_embedding_lookup': False }) @test_util.deprecated_graph_mode_only def test_feature_layer_cpu(self, use_safe_embedding_lookup): # Inputs. vocabulary_size = 3 input_a = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) input_b = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] # example 2, ids [] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(3, 2)) input_features = {'aaa': input_a, 'bbb': input_b} # Embedding variable. embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.) # id 2 ) def _initializer(shape, dtype, partition_info=None): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertEqual(dtypes.float32, dtype) self.assertIsNone(partition_info) return embedding_values # Expected lookup result, using combiner='mean'. expected_lookups_a = ( # example 0: (7., 11.), # ids [2], embedding = [7, 11] # example 1: (2., 3.5), # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] ) expected_lookups_b = ( # example 0: ((7., 11.), (0., 0.),), # ids [2], embedding = [[7, 11], [0, 0]] # example 1: ((1., 2.), (3., 5.),), # ids [0, 1], embedding = [[1, 2], [3, 5]] # example 2: ((0., 0.), (0., 0.),), # ids [], embedding = [[0, 0], [0, 0]] ) # Build columns. categorical_column_a = fc_lib.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) categorical_column_b = fc_lib.sequence_categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size) embedding_column_a, embedding_column_b = tpu_fc.shared_embedding_columns_v2( [categorical_column_a, categorical_column_b], dimension=embedding_dimension, initializer=_initializer, max_sequence_lengths=[0, 2], use_safe_embedding_lookup=use_safe_embedding_lookup) # Provide sparse input and get dense result. dense_features = df_lib.DenseFeatures([embedding_column_a]) sequence_features = sfc_lib.SequenceFeatures([embedding_column_b]) embedding_lookup_a = dense_features(input_features) embedding_lookup_b = sequence_features(input_features) # Assert expected embedding variable and lookups. global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) self.assertItemsEqual( ('aaa_bbb_shared_embedding:0',), tuple([v.name for v in global_vars])) embedding_var = global_vars[0] with _initialized_session(): self.assertAllEqual(embedding_values, embedding_var) self.assertAllEqual(expected_lookups_a, embedding_lookup_a) self.assertAllEqual(expected_lookups_b, embedding_lookup_b[0].eval()) # The graph will still have SparseFillEmptyRows due to sequence being # a Rank3 embedding lookup. if use_safe_embedding_lookup: self.assertEqual(2, [ x.type for x in ops.get_default_graph().get_operations() ].count('SparseFillEmptyRows')) else: self.assertEqual(1, [ x.type for x in ops.get_default_graph().get_operations() ].count('SparseFillEmptyRows')) def test_deepcopy(self): vocabulary_size = 3 categorical_column_a = fc_lib.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) categorical_column_b = fc_lib.categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size) embedding_dimension = 2 columns = tpu_fc.shared_embedding_columns_v2( [categorical_column_b, categorical_column_a], dimension=embedding_dimension) columns_copy = copy.deepcopy(columns) self.assertEqual( [column._shared_embedding_collection_name for column in columns], [column._shared_embedding_collection_name for column in columns_copy]) class DeviceSpecificEmbeddingColumnTestV2(test.TestCase, parameterized.TestCase): @parameterized.named_parameters( { 'testcase_name': 'invalid_shared', 'shared': True, }, { 'testcase_name': 'invalid_not_shared', 'shared': False, }) @test_util.deprecated_graph_mode_only def test_invalid_cases(self, shared): # Inputs. input_sparse_tensor = sparse_tensor.SparseTensorValue( indices=((0, 0), (1, 0), (1, 1), (1, 4)), values=(2, 0, 1, 3), dense_shape=(2, 5)) input_features = {'inp': input_sparse_tensor} # Build columns. categorical_column_input = fc_lib.categorical_column_with_identity( key='inp', num_buckets=3) # Training on TPU with cpu embedding lookups is not supported. if shared: embedding_column = tpu_fc.shared_embedding_columns_v2( [categorical_column_input], dimension=2, embedding_lookup_device='cpu', tensor_core_shape=[None, 3]) else: embedding_column = tpu_fc.embedding_column_v2( categorical_column_input, dimension=2, embedding_lookup_device='cpu', tensor_core_shape=[None, 3]) dense_features = df_lib.DenseFeatures(embedding_column) with self.assertRaisesRegex( ValueError, r'.*embedding_lookup_device=\"cpu\" during training is not'): dense_features(input_features) # Inference on with TPU Embedding Hardware is not supported. if shared: embedding_column = tpu_fc.shared_embedding_columns_v2( [categorical_column_input], dimension=2, embedding_lookup_device='tpu_embedding_core', tensor_core_shape=[None, 3]) else: embedding_column = tpu_fc.embedding_column_v2( categorical_column_input, dimension=2, embedding_lookup_device='tpu_embedding_core', tensor_core_shape=[None, 3]) context = tpu._TPUInferenceContext('tpu_inference') context.Enter() dense_features = df_lib.DenseFeatures(embedding_column) with self.assertRaisesRegex( ValueError, r'Using embedding_lookup_device=tpu_embedding_core during inference is ' ): dense_features(input_features) context.Exit() @parameterized.named_parameters( { 'testcase_name': 'combiner_mean_shared', 'shared': True, 'combiner': 'mean' }, { 'testcase_name': 'combiner_sum_shared', 'shared': True, 'combiner': 'sum' }, { 'testcase_name': 'combiner_sqrtn_shared', 'shared': True, 'combiner': 'sqrtn' }, { 'testcase_name': 'combiner_mean_not_shared', 'shared': False, 'combiner': 'mean' }, { 'testcase_name': 'combiner_sum_not_shared', 'shared': False, 'combiner': 'sum' }, { 'testcase_name': 'combiner_sqrtn_not_shared', 'shared': False, 'combiner': 'sqrtn' }) @test_util.deprecated_graph_mode_only def test_dense_embedding_lookup(self, shared, combiner): # Inputs. vocabulary_size = 3 input_sparse_tensor = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1, 3] indices=((0, 0), (1, 0), (1, 1), (1, 4)), values=(2, 0, 1, 3), dense_shape=(2, 5)) input_features = {'inp': input_sparse_tensor} # Embedding variable. embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.), # id 2 (13., 17.) # id 3 ) def _initializer(shape, dtype, partition_info=None): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertEqual(dtypes.float32, dtype) self.assertIsNone(partition_info) return embedding_values # Build columns. categorical_column_input = fc_lib.categorical_column_with_identity( key='inp', num_buckets=vocabulary_size) # Set tensor_core_shape to be [None, 20] to ensure some padding and # dynamic batch size. if shared: embedding_column = tpu_fc.shared_embedding_columns_v2( [categorical_column_input], dimension=embedding_dimension, initializer=_initializer, combiner=combiner, embedding_lookup_device='tpu_tensor_core', tensor_core_shape=[None, 3]) else: embedding_column = tpu_fc.embedding_column_v2( categorical_column_input, dimension=embedding_dimension, initializer=_initializer, combiner=combiner, embedding_lookup_device='tpu_tensor_core', tensor_core_shape=[None, 3]) # Run in TPUContexts so that we hit the intended densification case. context = tpu._TPUInferenceContext('tpu_inference') context.Enter() with tpu_function.tpu_shard_context(1): dense_features = df_lib.DenseFeatures(embedding_column) # Sqrtn combiner not supported for now. if combiner == 'sqrtn': with self.assertRaisesRegex( ValueError, 'Dense TPU Embedding does not support combiner'): embedding_lookup = dense_features(input_features) return if combiner == 'mean': expected_lookups = ( # example 0: (7., 11.), # ids [2], embedding = [7, 11] # example 1: (2., 3.5), # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = # [2, 3.5] ) elif combiner == 'sum': expected_lookups = ( # example 0: (7., 11.), # ids [2], embedding = [7, 11] # example 1: (4., 7), # ids [0, 1], embedding = sum([1, 2] + [3, 5]) = [4, 7] ) embedding_lookup = dense_features(input_features) # Assert expected embedding variable and lookups. global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) if shared: self.assertCountEqual(('inp_shared_embedding:0',), tuple([v.name for v in global_vars])) else: self.assertCountEqual( ('dense_features/inp_embedding/embedding_weights:0',), tuple([v.name for v in global_vars])) embedding_var = global_vars[0] with _initialized_session(): self.assertAllEqual(embedding_values, embedding_var) eval_res = embedding_lookup.eval() self.assertAllEqual(expected_lookups, eval_res) context.Exit() @test_util.deprecated_graph_mode_only def test_empty_row(self): # Inputs. vocabulary_size = 3 input_sparse_tensor = sparse_tensor.SparseTensorValue( # example 0, ids [] # example 1, ids [0, 1, 3] indices=((1, 0), (1, 1), (1, 4)), values=(0, 1, 3), dense_shape=(2, 5)) input_features = {'inp': input_sparse_tensor} # Embedding variable. embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.), # id 2 (13., 17.) # id 3 ) def _initializer(shape, dtype, partition_info=None): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertEqual(dtypes.float32, dtype) self.assertIsNone(partition_info) return embedding_values # Build columns. categorical_column_input = fc_lib.categorical_column_with_identity( key='inp', num_buckets=vocabulary_size) # Set tensor_core_shape to be [None, 20] to ensure some padding and # dynamic batch size. embedding_column = tpu_fc.embedding_column_v2( categorical_column_input, dimension=embedding_dimension, initializer=_initializer, combiner='mean', embedding_lookup_device='tpu_tensor_core', tensor_core_shape=[None, 3]) # Run in TPUContexts so that we hit the intended densification case. context = tpu._TPUInferenceContext('tpu_inference') context.Enter() with tpu_function.tpu_shard_context(1): dense_features = df_lib.DenseFeatures(embedding_column) expected_lookups = ( # example 0: (0., 0.), # ids [], embedding = [0, 0] # example 1: (2., 3.5), # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] ) embedding_lookup = dense_features(input_features) # Assert expected embedding variable and lookups. global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) self.assertCountEqual( ('dense_features/inp_embedding/embedding_weights:0',), tuple([v.name for v in global_vars])) embedding_var = global_vars[0] with _initialized_session(): self.assertAllEqual(embedding_values, embedding_var) eval_res = embedding_lookup.eval() self.assertAllEqual(expected_lookups, eval_res) context.Exit() @test_util.deprecated_graph_mode_only def test_error_dense_shape_invalid(self): categorical_column_input = fc_lib.categorical_column_with_identity( key='inp', num_buckets=5) with self.assertRaisesRegex(ValueError, 'tensor_core_shape must be size 2'): tpu_fc.shared_embedding_columns_v2([categorical_column_input], dimension=20, tensor_core_shape=[None, 20, 15]) if __name__ == '__main__': test.main()