1# Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15 16"""Methods to allow pandas.DataFrame (deprecated). 17 18This module and all its submodules are deprecated. See 19[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) 20for migration instructions. 21""" 22 23from __future__ import absolute_import 24from __future__ import division 25from __future__ import print_function 26 27from tensorflow.python.estimator.inputs.pandas_io import pandas_input_fn as core_pandas_input_fn 28from tensorflow.python.util.deprecation import deprecated 29 30try: 31 # pylint: disable=g-import-not-at-top 32 import pandas as pd 33 HAS_PANDAS = True 34except IOError: 35 # Pandas writes a temporary file during import. If it fails, don't use pandas. 36 HAS_PANDAS = False 37except ImportError: 38 HAS_PANDAS = False 39 40PANDAS_DTYPES = { 41 'int8': 'int', 42 'int16': 'int', 43 'int32': 'int', 44 'int64': 'int', 45 'uint8': 'int', 46 'uint16': 'int', 47 'uint32': 'int', 48 'uint64': 'int', 49 'float16': 'float', 50 'float32': 'float', 51 'float64': 'float', 52 'bool': 'i' 53} 54 55 56@deprecated(None, 'Please use tf.compat.v1.estimator.inputs.pandas_input_fn') 57def pandas_input_fn(x, 58 y=None, 59 batch_size=128, 60 num_epochs=1, 61 shuffle=True, 62 queue_capacity=1000, 63 num_threads=1, 64 target_column='target'): 65 """This input_fn diffs from the core version with default `shuffle`.""" 66 return core_pandas_input_fn(x=x, 67 y=y, 68 batch_size=batch_size, 69 shuffle=shuffle, 70 num_epochs=num_epochs, 71 queue_capacity=queue_capacity, 72 num_threads=num_threads, 73 target_column=target_column) 74 75 76@deprecated(None, 'Please access pandas data directly.') 77def extract_pandas_data(data): 78 """Extract data from pandas.DataFrame for predictors. 79 80 Given a DataFrame, will extract the values and cast them to float. The 81 DataFrame is expected to contain values of type int, float or bool. 82 83 Args: 84 data: `pandas.DataFrame` containing the data to be extracted. 85 86 Returns: 87 A numpy `ndarray` of the DataFrame's values as floats. 88 89 Raises: 90 ValueError: if data contains types other than int, float or bool. 91 """ 92 if not isinstance(data, pd.DataFrame): 93 return data 94 95 bad_data = [column for column in data 96 if data[column].dtype.name not in PANDAS_DTYPES] 97 98 if not bad_data: 99 return data.values.astype('float') 100 else: 101 error_report = [("'" + str(column) + "' type='" + 102 data[column].dtype.name + "'") for column in bad_data] 103 raise ValueError('Data types for extracting pandas data must be int, ' 104 'float, or bool. Found: ' + ', '.join(error_report)) 105 106 107@deprecated(None, 'Please access pandas data directly.') 108def extract_pandas_matrix(data): 109 """Extracts numpy matrix from pandas DataFrame. 110 111 Args: 112 data: `pandas.DataFrame` containing the data to be extracted. 113 114 Returns: 115 A numpy `ndarray` of the DataFrame's values. 116 """ 117 if not isinstance(data, pd.DataFrame): 118 return data 119 120 return data.as_matrix() 121 122 123@deprecated(None, 'Please access pandas data directly.') 124def extract_pandas_labels(labels): 125 """Extract data from pandas.DataFrame for labels. 126 127 Args: 128 labels: `pandas.DataFrame` or `pandas.Series` containing one column of 129 labels to be extracted. 130 131 Returns: 132 A numpy `ndarray` of labels from the DataFrame. 133 134 Raises: 135 ValueError: if more than one column is found or type is not int, float or 136 bool. 137 """ 138 if isinstance(labels, 139 pd.DataFrame): # pandas.Series also belongs to DataFrame 140 if len(labels.columns) > 1: 141 raise ValueError('Only one column for labels is allowed.') 142 143 bad_data = [column for column in labels 144 if labels[column].dtype.name not in PANDAS_DTYPES] 145 if not bad_data: 146 return labels.values 147 else: 148 error_report = ["'" + str(column) + "' type=" 149 + str(labels[column].dtype.name) for column in bad_data] 150 raise ValueError('Data types for extracting labels must be int, ' 151 'float, or bool. Found: ' + ', '.join(error_report)) 152 else: 153 return labels 154