• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Tests specific to Feature Columns integration."""
16
17import numpy as np
18
19from tensorflow.python import keras
20from tensorflow.python.data.ops import dataset_ops
21from tensorflow.python.feature_column import feature_column_lib as fc
22from tensorflow.python.keras import keras_parameterized
23from tensorflow.python.keras import metrics as metrics_module
24from tensorflow.python.keras import testing_utils
25from tensorflow.python.keras.feature_column import dense_features as df
26from tensorflow.python.keras.utils import np_utils
27from tensorflow.python.platform import test
28
29
30class TestDNNModel(keras.models.Model):
31
32  def __init__(self, feature_columns, units, name=None, **kwargs):
33    super(TestDNNModel, self).__init__(name=name, **kwargs)
34    self._input_layer = df.DenseFeatures(feature_columns, name='input_layer')
35    self._dense_layer = keras.layers.Dense(units, name='dense_layer')
36
37  def call(self, features):
38    net = self._input_layer(features)
39    net = self._dense_layer(net)
40    return net
41
42
43class FeatureColumnsIntegrationTest(keras_parameterized.TestCase):
44  """Most Sequential model API tests are covered in `training_test.py`.
45
46  """
47
48  @keras_parameterized.run_all_keras_modes
49  def test_sequential_model(self):
50    columns = [fc.numeric_column('a')]
51    model = keras.models.Sequential([
52        df.DenseFeatures(columns),
53        keras.layers.Dense(64, activation='relu'),
54        keras.layers.Dense(20, activation='softmax')
55    ])
56    model.compile(
57        optimizer='rmsprop',
58        loss='categorical_crossentropy',
59        metrics=['accuracy'],
60        run_eagerly=testing_utils.should_run_eagerly())
61
62    x = {'a': np.random.random((10, 1))}
63    y = np.random.randint(20, size=(10, 1))
64    y = np_utils.to_categorical(y, num_classes=20)
65    model.fit(x, y, epochs=1, batch_size=5)
66    model.fit(x, y, epochs=1, batch_size=5)
67    model.evaluate(x, y, batch_size=5)
68    model.predict(x, batch_size=5)
69
70  @keras_parameterized.run_all_keras_modes
71  def test_sequential_model_with_ds_input(self):
72    columns = [fc.numeric_column('a')]
73    model = keras.models.Sequential([
74        df.DenseFeatures(columns),
75        keras.layers.Dense(64, activation='relu'),
76        keras.layers.Dense(20, activation='softmax')
77    ])
78    model.compile(
79        optimizer='rmsprop',
80        loss='categorical_crossentropy',
81        metrics=['accuracy'],
82        run_eagerly=testing_utils.should_run_eagerly())
83
84    y = np.random.randint(20, size=(100, 1))
85    y = np_utils.to_categorical(y, num_classes=20)
86    x = {'a': np.random.random((100, 1))}
87    ds1 = dataset_ops.Dataset.from_tensor_slices(x)
88    ds2 = dataset_ops.Dataset.from_tensor_slices(y)
89    ds = dataset_ops.Dataset.zip((ds1, ds2)).batch(5)
90    model.fit(ds, steps_per_epoch=1)
91    model.fit(ds, steps_per_epoch=1)
92    model.evaluate(ds, steps=1)
93    model.predict(ds, steps=1)
94
95  @keras_parameterized.run_all_keras_modes(always_skip_v1=True)
96  def test_sequential_model_with_crossed_column(self):
97    feature_columns = []
98    age_buckets = fc.bucketized_column(
99        fc.numeric_column('age'),
100        boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
101    feature_columns.append(age_buckets)
102
103    # indicator cols
104    thal = fc.categorical_column_with_vocabulary_list(
105        'thal', ['fixed', 'normal', 'reversible'])
106
107    crossed_feature = fc.crossed_column([age_buckets, thal],
108                                        hash_bucket_size=1000)
109    crossed_feature = fc.indicator_column(crossed_feature)
110    feature_columns.append(crossed_feature)
111
112    feature_layer = df.DenseFeatures(feature_columns)
113
114    model = keras.models.Sequential([
115        feature_layer,
116        keras.layers.Dense(128, activation='relu'),
117        keras.layers.Dense(128, activation='relu'),
118        keras.layers.Dense(1, activation='sigmoid')
119    ])
120
121    age_data = np.random.randint(10, 100, size=100)
122    thal_data = np.random.choice(['fixed', 'normal', 'reversible'], size=100)
123    inp_x = {'age': age_data, 'thal': thal_data}
124    inp_y = np.random.randint(0, 1, size=100)
125    ds = dataset_ops.Dataset.from_tensor_slices((inp_x, inp_y)).batch(5)
126    model.compile(optimizer='adam',
127                  loss='binary_crossentropy',
128                  metrics=['accuracy'],)
129    model.fit(ds, epochs=1)
130    model.fit(ds, epochs=1)
131    model.evaluate(ds)
132    model.predict(ds)
133
134  @keras_parameterized.run_all_keras_modes
135  def test_subclassed_model_with_feature_columns(self):
136    col_a = fc.numeric_column('a')
137    col_b = fc.numeric_column('b')
138
139    dnn_model = TestDNNModel([col_a, col_b], 20)
140
141    dnn_model.compile(
142        optimizer='rmsprop',
143        loss='categorical_crossentropy',
144        metrics=['accuracy'],
145        run_eagerly=testing_utils.should_run_eagerly())
146
147    x = {'a': np.random.random((10, 1)), 'b': np.random.random((10, 1))}
148    y = np.random.randint(20, size=(10, 1))
149    y = np_utils.to_categorical(y, num_classes=20)
150    dnn_model.fit(x=x, y=y, epochs=1, batch_size=5)
151    dnn_model.fit(x=x, y=y, epochs=1, batch_size=5)
152    dnn_model.evaluate(x=x, y=y, batch_size=5)
153    dnn_model.predict(x=x, batch_size=5)
154
155  @keras_parameterized.run_all_keras_modes
156  def test_subclassed_model_with_feature_columns_with_ds_input(self):
157    col_a = fc.numeric_column('a')
158    col_b = fc.numeric_column('b')
159
160    dnn_model = TestDNNModel([col_a, col_b], 20)
161
162    dnn_model.compile(
163        optimizer='rmsprop',
164        loss='categorical_crossentropy',
165        metrics=['accuracy'],
166        run_eagerly=testing_utils.should_run_eagerly())
167
168    y = np.random.randint(20, size=(100, 1))
169    y = np_utils.to_categorical(y, num_classes=20)
170    x = {'a': np.random.random((100, 1)), 'b': np.random.random((100, 1))}
171    ds1 = dataset_ops.Dataset.from_tensor_slices(x)
172    ds2 = dataset_ops.Dataset.from_tensor_slices(y)
173    ds = dataset_ops.Dataset.zip((ds1, ds2)).batch(5)
174    dnn_model.fit(ds, steps_per_epoch=1)
175    dnn_model.fit(ds, steps_per_epoch=1)
176    dnn_model.evaluate(ds, steps=1)
177    dnn_model.predict(ds, steps=1)
178
179  # TODO(kaftan) seems to throw an error when enabled.
180  @keras_parameterized.run_all_keras_modes
181  def DISABLED_test_function_model_feature_layer_input(self):
182    col_a = fc.numeric_column('a')
183    col_b = fc.numeric_column('b')
184
185    feature_layer = df.DenseFeatures([col_a, col_b], name='fc')
186    dense = keras.layers.Dense(4)
187
188    # This seems problematic.... We probably need something for DenseFeatures
189    # the way Input is for InputLayer.
190    output = dense(feature_layer)
191
192    model = keras.models.Model([feature_layer], [output])
193
194    optimizer = 'rmsprop'
195    loss = 'mse'
196    loss_weights = [1., 0.5]
197    model.compile(
198        optimizer,
199        loss,
200        metrics=[metrics_module.CategoricalAccuracy(), 'mae'],
201        loss_weights=loss_weights)
202
203    data = ({'a': np.arange(10), 'b': np.arange(10)}, np.arange(10, 20))
204    model.fit(*data, epochs=1)
205
206  # TODO(kaftan) seems to throw an error when enabled.
207  @keras_parameterized.run_all_keras_modes
208  def DISABLED_test_function_model_multiple_feature_layer_inputs(self):
209    col_a = fc.numeric_column('a')
210    col_b = fc.numeric_column('b')
211    col_c = fc.numeric_column('c')
212
213    fc1 = df.DenseFeatures([col_a, col_b], name='fc1')
214    fc2 = df.DenseFeatures([col_b, col_c], name='fc2')
215    dense = keras.layers.Dense(4)
216
217    # This seems problematic.... We probably need something for DenseFeatures
218    # the way Input is for InputLayer.
219    output = dense(fc1) + dense(fc2)
220
221    model = keras.models.Model([fc1, fc2], [output])
222
223    optimizer = 'rmsprop'
224    loss = 'mse'
225    loss_weights = [1., 0.5]
226    model.compile(
227        optimizer,
228        loss,
229        metrics=[metrics_module.CategoricalAccuracy(), 'mae'],
230        loss_weights=loss_weights)
231
232    data_list = ([{
233        'a': np.arange(10),
234        'b': np.arange(10)
235    }, {
236        'b': np.arange(10),
237        'c': np.arange(10)
238    }], np.arange(10, 100))
239    model.fit(*data_list, epochs=1)
240
241    data_bloated_list = ([{
242        'a': np.arange(10),
243        'b': np.arange(10),
244        'c': np.arange(10)
245    }, {
246        'a': np.arange(10),
247        'b': np.arange(10),
248        'c': np.arange(10)
249    }], np.arange(10, 100))
250    model.fit(*data_bloated_list, epochs=1)
251
252    data_dict = ({
253        'fc1': {
254            'a': np.arange(10),
255            'b': np.arange(10)
256        },
257        'fc2': {
258            'b': np.arange(10),
259            'c': np.arange(10)
260        }
261    }, np.arange(10, 100))
262    model.fit(*data_dict, epochs=1)
263
264    data_bloated_dict = ({
265        'fc1': {
266            'a': np.arange(10),
267            'b': np.arange(10),
268            'c': np.arange(10)
269        },
270        'fc2': {
271            'a': np.arange(10),
272            'b': np.arange(10),
273            'c': np.arange(10)
274        }
275    }, np.arange(10, 100))
276    model.fit(*data_bloated_dict, epochs=1)
277
278  @keras_parameterized.run_all_keras_modes
279  def test_string_input(self):
280    x = {'age': np.random.random((1024, 1)),
281         'cabin': np.array(['a'] * 1024)}
282    y = np.random.randint(2, size=(1024, 1))
283    ds1 = dataset_ops.Dataset.from_tensor_slices(x)
284    ds2 = dataset_ops.Dataset.from_tensor_slices(y)
285    dataset = dataset_ops.Dataset.zip((ds1, ds2)).batch(4)
286    categorical_cols = [fc.categorical_column_with_hash_bucket('cabin', 10)]
287    feature_cols = ([fc.numeric_column('age')]
288                    + [fc.indicator_column(cc) for cc in categorical_cols])
289    layers = [df.DenseFeatures(feature_cols),
290              keras.layers.Dense(128),
291              keras.layers.Dense(1)]
292
293    model = keras.models.Sequential(layers)
294    model.compile(optimizer='sgd',
295                  loss=keras.losses.BinaryCrossentropy())
296    model.fit(dataset)
297
298
299if __name__ == '__main__':
300  test.main()
301