1# Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Tests specific to Feature Columns integration.""" 16 17import numpy as np 18 19from tensorflow.python import keras 20from tensorflow.python.data.ops import dataset_ops 21from tensorflow.python.feature_column import feature_column_lib as fc 22from tensorflow.python.keras import keras_parameterized 23from tensorflow.python.keras import metrics as metrics_module 24from tensorflow.python.keras import testing_utils 25from tensorflow.python.keras.feature_column import dense_features as df 26from tensorflow.python.keras.utils import np_utils 27from tensorflow.python.platform import test 28 29 30class TestDNNModel(keras.models.Model): 31 32 def __init__(self, feature_columns, units, name=None, **kwargs): 33 super(TestDNNModel, self).__init__(name=name, **kwargs) 34 self._input_layer = df.DenseFeatures(feature_columns, name='input_layer') 35 self._dense_layer = keras.layers.Dense(units, name='dense_layer') 36 37 def call(self, features): 38 net = self._input_layer(features) 39 net = self._dense_layer(net) 40 return net 41 42 43class FeatureColumnsIntegrationTest(keras_parameterized.TestCase): 44 """Most Sequential model API tests are covered in `training_test.py`. 45 46 """ 47 48 @keras_parameterized.run_all_keras_modes 49 def test_sequential_model(self): 50 columns = [fc.numeric_column('a')] 51 model = keras.models.Sequential([ 52 df.DenseFeatures(columns), 53 keras.layers.Dense(64, activation='relu'), 54 keras.layers.Dense(20, activation='softmax') 55 ]) 56 model.compile( 57 optimizer='rmsprop', 58 loss='categorical_crossentropy', 59 metrics=['accuracy'], 60 run_eagerly=testing_utils.should_run_eagerly()) 61 62 x = {'a': np.random.random((10, 1))} 63 y = np.random.randint(20, size=(10, 1)) 64 y = np_utils.to_categorical(y, num_classes=20) 65 model.fit(x, y, epochs=1, batch_size=5) 66 model.fit(x, y, epochs=1, batch_size=5) 67 model.evaluate(x, y, batch_size=5) 68 model.predict(x, batch_size=5) 69 70 @keras_parameterized.run_all_keras_modes 71 def test_sequential_model_with_ds_input(self): 72 columns = [fc.numeric_column('a')] 73 model = keras.models.Sequential([ 74 df.DenseFeatures(columns), 75 keras.layers.Dense(64, activation='relu'), 76 keras.layers.Dense(20, activation='softmax') 77 ]) 78 model.compile( 79 optimizer='rmsprop', 80 loss='categorical_crossentropy', 81 metrics=['accuracy'], 82 run_eagerly=testing_utils.should_run_eagerly()) 83 84 y = np.random.randint(20, size=(100, 1)) 85 y = np_utils.to_categorical(y, num_classes=20) 86 x = {'a': np.random.random((100, 1))} 87 ds1 = dataset_ops.Dataset.from_tensor_slices(x) 88 ds2 = dataset_ops.Dataset.from_tensor_slices(y) 89 ds = dataset_ops.Dataset.zip((ds1, ds2)).batch(5) 90 model.fit(ds, steps_per_epoch=1) 91 model.fit(ds, steps_per_epoch=1) 92 model.evaluate(ds, steps=1) 93 model.predict(ds, steps=1) 94 95 @keras_parameterized.run_all_keras_modes(always_skip_v1=True) 96 def test_sequential_model_with_crossed_column(self): 97 feature_columns = [] 98 age_buckets = fc.bucketized_column( 99 fc.numeric_column('age'), 100 boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65]) 101 feature_columns.append(age_buckets) 102 103 # indicator cols 104 thal = fc.categorical_column_with_vocabulary_list( 105 'thal', ['fixed', 'normal', 'reversible']) 106 107 crossed_feature = fc.crossed_column([age_buckets, thal], 108 hash_bucket_size=1000) 109 crossed_feature = fc.indicator_column(crossed_feature) 110 feature_columns.append(crossed_feature) 111 112 feature_layer = df.DenseFeatures(feature_columns) 113 114 model = keras.models.Sequential([ 115 feature_layer, 116 keras.layers.Dense(128, activation='relu'), 117 keras.layers.Dense(128, activation='relu'), 118 keras.layers.Dense(1, activation='sigmoid') 119 ]) 120 121 age_data = np.random.randint(10, 100, size=100) 122 thal_data = np.random.choice(['fixed', 'normal', 'reversible'], size=100) 123 inp_x = {'age': age_data, 'thal': thal_data} 124 inp_y = np.random.randint(0, 1, size=100) 125 ds = dataset_ops.Dataset.from_tensor_slices((inp_x, inp_y)).batch(5) 126 model.compile(optimizer='adam', 127 loss='binary_crossentropy', 128 metrics=['accuracy'],) 129 model.fit(ds, epochs=1) 130 model.fit(ds, epochs=1) 131 model.evaluate(ds) 132 model.predict(ds) 133 134 @keras_parameterized.run_all_keras_modes 135 def test_subclassed_model_with_feature_columns(self): 136 col_a = fc.numeric_column('a') 137 col_b = fc.numeric_column('b') 138 139 dnn_model = TestDNNModel([col_a, col_b], 20) 140 141 dnn_model.compile( 142 optimizer='rmsprop', 143 loss='categorical_crossentropy', 144 metrics=['accuracy'], 145 run_eagerly=testing_utils.should_run_eagerly()) 146 147 x = {'a': np.random.random((10, 1)), 'b': np.random.random((10, 1))} 148 y = np.random.randint(20, size=(10, 1)) 149 y = np_utils.to_categorical(y, num_classes=20) 150 dnn_model.fit(x=x, y=y, epochs=1, batch_size=5) 151 dnn_model.fit(x=x, y=y, epochs=1, batch_size=5) 152 dnn_model.evaluate(x=x, y=y, batch_size=5) 153 dnn_model.predict(x=x, batch_size=5) 154 155 @keras_parameterized.run_all_keras_modes 156 def test_subclassed_model_with_feature_columns_with_ds_input(self): 157 col_a = fc.numeric_column('a') 158 col_b = fc.numeric_column('b') 159 160 dnn_model = TestDNNModel([col_a, col_b], 20) 161 162 dnn_model.compile( 163 optimizer='rmsprop', 164 loss='categorical_crossentropy', 165 metrics=['accuracy'], 166 run_eagerly=testing_utils.should_run_eagerly()) 167 168 y = np.random.randint(20, size=(100, 1)) 169 y = np_utils.to_categorical(y, num_classes=20) 170 x = {'a': np.random.random((100, 1)), 'b': np.random.random((100, 1))} 171 ds1 = dataset_ops.Dataset.from_tensor_slices(x) 172 ds2 = dataset_ops.Dataset.from_tensor_slices(y) 173 ds = dataset_ops.Dataset.zip((ds1, ds2)).batch(5) 174 dnn_model.fit(ds, steps_per_epoch=1) 175 dnn_model.fit(ds, steps_per_epoch=1) 176 dnn_model.evaluate(ds, steps=1) 177 dnn_model.predict(ds, steps=1) 178 179 # TODO(kaftan) seems to throw an error when enabled. 180 @keras_parameterized.run_all_keras_modes 181 def DISABLED_test_function_model_feature_layer_input(self): 182 col_a = fc.numeric_column('a') 183 col_b = fc.numeric_column('b') 184 185 feature_layer = df.DenseFeatures([col_a, col_b], name='fc') 186 dense = keras.layers.Dense(4) 187 188 # This seems problematic.... We probably need something for DenseFeatures 189 # the way Input is for InputLayer. 190 output = dense(feature_layer) 191 192 model = keras.models.Model([feature_layer], [output]) 193 194 optimizer = 'rmsprop' 195 loss = 'mse' 196 loss_weights = [1., 0.5] 197 model.compile( 198 optimizer, 199 loss, 200 metrics=[metrics_module.CategoricalAccuracy(), 'mae'], 201 loss_weights=loss_weights) 202 203 data = ({'a': np.arange(10), 'b': np.arange(10)}, np.arange(10, 20)) 204 model.fit(*data, epochs=1) 205 206 # TODO(kaftan) seems to throw an error when enabled. 207 @keras_parameterized.run_all_keras_modes 208 def DISABLED_test_function_model_multiple_feature_layer_inputs(self): 209 col_a = fc.numeric_column('a') 210 col_b = fc.numeric_column('b') 211 col_c = fc.numeric_column('c') 212 213 fc1 = df.DenseFeatures([col_a, col_b], name='fc1') 214 fc2 = df.DenseFeatures([col_b, col_c], name='fc2') 215 dense = keras.layers.Dense(4) 216 217 # This seems problematic.... We probably need something for DenseFeatures 218 # the way Input is for InputLayer. 219 output = dense(fc1) + dense(fc2) 220 221 model = keras.models.Model([fc1, fc2], [output]) 222 223 optimizer = 'rmsprop' 224 loss = 'mse' 225 loss_weights = [1., 0.5] 226 model.compile( 227 optimizer, 228 loss, 229 metrics=[metrics_module.CategoricalAccuracy(), 'mae'], 230 loss_weights=loss_weights) 231 232 data_list = ([{ 233 'a': np.arange(10), 234 'b': np.arange(10) 235 }, { 236 'b': np.arange(10), 237 'c': np.arange(10) 238 }], np.arange(10, 100)) 239 model.fit(*data_list, epochs=1) 240 241 data_bloated_list = ([{ 242 'a': np.arange(10), 243 'b': np.arange(10), 244 'c': np.arange(10) 245 }, { 246 'a': np.arange(10), 247 'b': np.arange(10), 248 'c': np.arange(10) 249 }], np.arange(10, 100)) 250 model.fit(*data_bloated_list, epochs=1) 251 252 data_dict = ({ 253 'fc1': { 254 'a': np.arange(10), 255 'b': np.arange(10) 256 }, 257 'fc2': { 258 'b': np.arange(10), 259 'c': np.arange(10) 260 } 261 }, np.arange(10, 100)) 262 model.fit(*data_dict, epochs=1) 263 264 data_bloated_dict = ({ 265 'fc1': { 266 'a': np.arange(10), 267 'b': np.arange(10), 268 'c': np.arange(10) 269 }, 270 'fc2': { 271 'a': np.arange(10), 272 'b': np.arange(10), 273 'c': np.arange(10) 274 } 275 }, np.arange(10, 100)) 276 model.fit(*data_bloated_dict, epochs=1) 277 278 @keras_parameterized.run_all_keras_modes 279 def test_string_input(self): 280 x = {'age': np.random.random((1024, 1)), 281 'cabin': np.array(['a'] * 1024)} 282 y = np.random.randint(2, size=(1024, 1)) 283 ds1 = dataset_ops.Dataset.from_tensor_slices(x) 284 ds2 = dataset_ops.Dataset.from_tensor_slices(y) 285 dataset = dataset_ops.Dataset.zip((ds1, ds2)).batch(4) 286 categorical_cols = [fc.categorical_column_with_hash_bucket('cabin', 10)] 287 feature_cols = ([fc.numeric_column('age')] 288 + [fc.indicator_column(cc) for cc in categorical_cols]) 289 layers = [df.DenseFeatures(feature_cols), 290 keras.layers.Dense(128), 291 keras.layers.Dense(1)] 292 293 model = keras.models.Sequential(layers) 294 model.compile(optimizer='sgd', 295 loss=keras.losses.BinaryCrossentropy()) 296 model.fit(dataset) 297 298 299if __name__ == '__main__': 300 test.main() 301