1# Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Various high level TF models (deprecated). 16 17This module and all its submodules are deprecated. See 18[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) 19for migration instructions. 20""" 21 22from __future__ import absolute_import 23from __future__ import division 24from __future__ import print_function 25 26import functools 27 28from tensorflow.contrib import rnn as contrib_rnn 29from tensorflow.contrib.learn.python.learn.ops import losses_ops 30from tensorflow.python.framework import dtypes 31from tensorflow.python.framework import ops 32from tensorflow.python.ops import array_ops as array_ops_ 33from tensorflow.python.ops import init_ops 34from tensorflow.python.ops import variable_scope as vs 35from tensorflow.python.summary import summary 36from tensorflow.python.util.deprecation import deprecated 37 38 39@deprecated(None, 'Consider using a tf.estimator.LinearRegressor') 40def linear_regression_zero_init(x, y): 41 """Linear regression subgraph with zero-value initial weights and bias. 42 43 Args: 44 x: tensor or placeholder for input features. 45 y: tensor or placeholder for labels. 46 47 Returns: 48 Predictions and loss tensors. 49 """ 50 return linear_regression(x, y, init_mean=0.0, init_stddev=0.0) 51 52 53@deprecated(None, 'Consider using a class from tf.estimator.LinearClassifier') 54def logistic_regression_zero_init(x, y): 55 """Logistic regression subgraph with zero-value initial weights and bias. 56 57 Args: 58 x: tensor or placeholder for input features. 59 y: tensor or placeholder for labels. 60 61 Returns: 62 Predictions and loss tensors. 63 """ 64 return logistic_regression(x, y, init_mean=0.0, init_stddev=0.0) 65 66 67@deprecated(None, 'Consider using a class from tf.estimator.') 68def linear_regression(x, y, init_mean=None, init_stddev=1.0): 69 """Creates linear regression TensorFlow subgraph. 70 71 Args: 72 x: tensor or placeholder for input features. 73 y: tensor or placeholder for labels. 74 init_mean: the mean value to use for initialization. 75 init_stddev: the standard deviation to use for initialization. 76 77 Returns: 78 Predictions and loss tensors. 79 80 Side effects: 81 The variables linear_regression.weights and linear_regression.bias are 82 initialized as follows. If init_mean is not None, then initialization 83 will be done using a random normal initializer with the given init_mean 84 and init_stddv. (These may be set to 0.0 each if a zero initialization 85 is desirable for convex use cases.) If init_mean is None, then the 86 uniform_unit_scaling_initialzer will be used. 87 """ 88 with vs.variable_scope('linear_regression'): 89 scope_name = vs.get_variable_scope().name 90 summary.histogram('%s.x' % scope_name, x) 91 summary.histogram('%s.y' % scope_name, y) 92 dtype = x.dtype.base_dtype 93 y_shape = y.get_shape() 94 if len(y_shape) == 1: 95 output_shape = 1 96 else: 97 output_shape = y_shape[1] 98 # Set up the requested initialization. 99 if init_mean is None: 100 weights = vs.get_variable( 101 'weights', [x.get_shape()[1], output_shape], dtype=dtype) 102 bias = vs.get_variable('bias', [output_shape], dtype=dtype) 103 else: 104 weights = vs.get_variable( 105 'weights', [x.get_shape()[1], output_shape], 106 initializer=init_ops.random_normal_initializer( 107 init_mean, init_stddev, dtype=dtype), 108 dtype=dtype) 109 bias = vs.get_variable( 110 'bias', [output_shape], 111 initializer=init_ops.random_normal_initializer( 112 init_mean, init_stddev, dtype=dtype), 113 dtype=dtype) 114 summary.histogram('%s.weights' % scope_name, weights) 115 summary.histogram('%s.bias' % scope_name, bias) 116 return losses_ops.mean_squared_error_regressor(x, y, weights, bias) 117 118 119@deprecated(None, 'Consider using a class from tf.estimator.') 120def logistic_regression(x, 121 y, 122 class_weight=None, 123 init_mean=None, 124 init_stddev=1.0): 125 """Creates logistic regression TensorFlow subgraph. 126 127 Args: 128 x: tensor or placeholder for input features, 129 shape should be [batch_size, n_features]. 130 y: tensor or placeholder for labels (one-hot), 131 shape should be [batch_size, n_classes]. 132 class_weight: tensor, [n_classes], where for each class 133 it has weight of the class. If not provided 134 will check if graph contains tensor `class_weight:0`. 135 If that is not provided either all ones are used. 136 init_mean: the mean value to use for initialization. 137 init_stddev: the standard deviation to use for initialization. 138 139 Returns: 140 Predictions and loss tensors. 141 142 Side effects: 143 The variables linear_regression.weights and linear_regression.bias are 144 initialized as follows. If init_mean is not None, then initialization 145 will be done using a random normal initializer with the given init_mean 146 and init_stddv. (These may be set to 0.0 each if a zero initialization 147 is desirable for convex use cases.) If init_mean is None, then the 148 uniform_unit_scaling_initialzer will be used. 149 """ 150 with vs.variable_scope('logistic_regression'): 151 scope_name = vs.get_variable_scope().name 152 summary.histogram('%s.x' % scope_name, x) 153 summary.histogram('%s.y' % scope_name, y) 154 dtype = x.dtype.base_dtype 155 # Set up the requested initialization. 156 if init_mean is None: 157 weights = vs.get_variable( 158 'weights', [x.get_shape()[1], y.get_shape()[-1]], dtype=dtype) 159 bias = vs.get_variable('bias', [y.get_shape()[-1]], dtype=dtype) 160 else: 161 weights = vs.get_variable( 162 'weights', [x.get_shape()[1], y.get_shape()[-1]], 163 initializer=init_ops.random_normal_initializer( 164 init_mean, init_stddev, dtype=dtype), 165 dtype=dtype) 166 bias = vs.get_variable( 167 'bias', [y.get_shape()[-1]], 168 initializer=init_ops.random_normal_initializer( 169 init_mean, init_stddev, dtype=dtype), 170 dtype=dtype) 171 summary.histogram('%s.weights' % scope_name, weights) 172 summary.histogram('%s.bias' % scope_name, bias) 173 # If no class weight provided, try to retrieve one from pre-defined 174 # tensor name in the graph. 175 if not class_weight: 176 try: 177 class_weight = ops.get_default_graph().get_tensor_by_name( 178 'class_weight:0') 179 except KeyError: 180 pass 181 182 return losses_ops.softmax_classifier( 183 x, y, weights, bias, class_weight=class_weight) 184 185 186## This will be in TensorFlow 0.7. 187## TODO(ilblackdragon): Clean this up when it's released 188def _reverse_seq(input_seq, lengths): 189 """Reverse a list of Tensors up to specified lengths. 190 191 Args: 192 input_seq: Sequence of seq_len tensors of dimension (batch_size, depth) 193 lengths: A tensor of dimension batch_size, containing lengths for each 194 sequence in the batch. If "None" is specified, simply 195 reverses the list. 196 197 Returns: 198 time-reversed sequence 199 """ 200 if lengths is None: 201 return list(reversed(input_seq)) 202 203 for input_ in input_seq: 204 input_.set_shape(input_.get_shape().with_rank(2)) 205 206 # Join into (time, batch_size, depth) 207 s_joined = array_ops_.pack(input_seq) 208 209 # Reverse along dimension 0 210 s_reversed = array_ops_.reverse_sequence(s_joined, lengths, 0, 1) 211 # Split again into list 212 result = array_ops_.unpack(s_reversed) 213 return result 214 215 216@deprecated(None, 'Please consider `tf.nn.bidirectional_dynamic_rnn`.') 217def bidirectional_rnn(cell_fw, 218 cell_bw, 219 inputs, 220 initial_state_fw=None, 221 initial_state_bw=None, 222 dtype=None, 223 sequence_length=None, 224 scope=None): 225 """Creates a bidirectional recurrent neural network. 226 227 Similar to the unidirectional case (rnn) but takes input and builds 228 independent forward and backward RNNs with the final forward and backward 229 outputs depth-concatenated, such that the output will have the format 230 [time][batch][cell_fw.output_size + cell_bw.output_size]. The input_size of 231 forward and backward cell must match. The initial state for both directions 232 is zero by default (but can be set optionally) and no intermediate states 233 are ever returned -- the network is fully unrolled for the given (passed in) 234 length(s) of the sequence(s) or completely unrolled if length(s) is not 235 given. 236 Args: 237 cell_fw: An instance of RNNCell, to be used for forward direction. 238 cell_bw: An instance of RNNCell, to be used for backward direction. 239 inputs: A length T list of inputs, each a tensor of shape 240 [batch_size, cell.input_size]. 241 initial_state_fw: (optional) An initial state for the forward RNN. 242 This must be a tensor of appropriate type and shape 243 [batch_size x cell.state_size]. 244 initial_state_bw: (optional) Same as for initial_state_fw. 245 dtype: (optional) The data type for the initial state. Required if 246 either of the initial states are not provided. 247 sequence_length: (optional) An int64 vector (tensor) of size 248 [batch_size], 249 containing the actual lengths for each of the sequences. 250 scope: VariableScope for the created subgraph; defaults to "BiRNN" 251 252 Returns: 253 A pair (outputs, state) where: 254 outputs is a length T list of outputs (one for each input), which 255 are depth-concatenated forward and backward outputs 256 state is the concatenated final state of the forward and backward RNN 257 258 Raises: 259 TypeError: If "cell_fw" or "cell_bw" is not an instance of RNNCell. 260 ValueError: If inputs is None or an empty list. 261 """ 262 263 if not isinstance(cell_fw, contrib_rnn.RNNCell): 264 raise TypeError('cell_fw must be an instance of RNNCell') 265 if not isinstance(cell_bw, contrib_rnn.RNNCell): 266 raise TypeError('cell_bw must be an instance of RNNCell') 267 if not isinstance(inputs, list): 268 raise TypeError('inputs must be a list') 269 if not inputs: 270 raise ValueError('inputs must not be empty') 271 272 name = scope or 'BiRNN' 273 # Forward direction 274 with vs.variable_scope(name + '_FW'): 275 output_fw, state_fw = contrib_rnn.static_rnn(cell_fw, inputs, 276 initial_state_fw, dtype, 277 sequence_length) 278 279 # Backward direction 280 with vs.variable_scope(name + '_BW'): 281 tmp, state_bw = contrib_rnn.static_rnn( 282 cell_bw, 283 _reverse_seq(inputs, sequence_length), initial_state_bw, dtype, 284 sequence_length) 285 output_bw = _reverse_seq(tmp, sequence_length) 286 # Concat each of the forward/backward outputs 287 outputs = [ 288 array_ops_.concat([fw, bw], 1) for fw, bw in zip(output_fw, output_bw) 289 ] 290 291 return outputs, array_ops_.concat([state_fw, state_bw], 1) 292 293 294# End of TensorFlow 0.7 295 296 297@deprecated(None, 'Please consider tensorflow/tensor2tensor.') 298def get_rnn_model(rnn_size, cell_type, num_layers, input_op_fn, bidirectional, 299 target_predictor_fn, sequence_length, initial_state, 300 attn_length, attn_size, attn_vec_size): 301 """Returns a function that creates a RNN TensorFlow subgraph. 302 303 Args: 304 rnn_size: The size for rnn cell, e.g. size of your word embeddings. 305 cell_type: The type of rnn cell, including rnn, gru, and lstm. 306 num_layers: The number of layers of the rnn model. 307 input_op_fn: Function that will transform the input tensor, such as 308 creating word embeddings, byte list, etc. This takes 309 an argument `x` for input and returns transformed `x`. 310 bidirectional: boolean, Whether this is a bidirectional rnn. 311 target_predictor_fn: Function that will predict target from input 312 features. This can be logistic regression, 313 linear regression or any other model, 314 that takes `x`, `y` and returns predictions and loss 315 tensors. 316 sequence_length: If sequence_length is provided, dynamic calculation is 317 performed. This saves computational time when unrolling past max sequence 318 length. Required for bidirectional RNNs. 319 initial_state: An initial state for the RNN. This must be a tensor of 320 appropriate type and shape [batch_size x cell.state_size]. 321 attn_length: integer, the size of attention vector attached to rnn cells. 322 attn_size: integer, the size of an attention window attached to rnn cells. 323 attn_vec_size: integer, the number of convolutional features calculated on 324 attention state and the size of the hidden layer built from base cell 325 state. 326 327 Returns: 328 A function that creates the subgraph. 329 """ 330 331 def rnn_estimator(x, y): 332 """RNN estimator with target predictor function on top.""" 333 x = input_op_fn(x) 334 if cell_type == 'rnn': 335 cell_fn = contrib_rnn.BasicRNNCell 336 elif cell_type == 'gru': 337 cell_fn = contrib_rnn.GRUCell 338 elif cell_type == 'lstm': 339 cell_fn = functools.partial( 340 contrib_rnn.BasicLSTMCell, state_is_tuple=False) 341 else: 342 raise ValueError('cell_type {} is not supported. '.format(cell_type)) 343 # TODO(ipolosukhin): state_is_tuple=False is deprecated 344 if bidirectional: 345 # forward direction cell 346 fw_cell = lambda: cell_fn(rnn_size) 347 bw_cell = lambda: cell_fn(rnn_size) 348 # attach attention cells if specified 349 if attn_length is not None: 350 def attn_fw_cell(): 351 return contrib_rnn.AttentionCellWrapper( 352 fw_cell(), 353 attn_length=attn_length, 354 attn_size=attn_size, 355 attn_vec_size=attn_vec_size, 356 state_is_tuple=False) 357 358 def attn_bw_cell(): 359 return contrib_rnn.AttentionCellWrapper( 360 bw_cell(), 361 attn_length=attn_length, 362 attn_size=attn_size, 363 attn_vec_size=attn_vec_size, 364 state_is_tuple=False) 365 else: 366 attn_fw_cell = fw_cell 367 attn_bw_cell = bw_cell 368 369 rnn_fw_cell = contrib_rnn.MultiRNNCell( 370 [attn_fw_cell() for _ in range(num_layers)], state_is_tuple=False) 371 # backward direction cell 372 rnn_bw_cell = contrib_rnn.MultiRNNCell( 373 [attn_bw_cell() for _ in range(num_layers)], state_is_tuple=False) 374 # pylint: disable=unexpected-keyword-arg, no-value-for-parameter 375 _, encoding = bidirectional_rnn( 376 rnn_fw_cell, 377 rnn_bw_cell, 378 x, 379 dtype=dtypes.float32, 380 sequence_length=sequence_length, 381 initial_state_fw=initial_state, 382 initial_state_bw=initial_state) 383 else: 384 rnn_cell = lambda: cell_fn(rnn_size) 385 386 if attn_length is not None: 387 def attn_rnn_cell(): 388 return contrib_rnn.AttentionCellWrapper( 389 rnn_cell(), 390 attn_length=attn_length, 391 attn_size=attn_size, 392 attn_vec_size=attn_vec_size, 393 state_is_tuple=False) 394 else: 395 attn_rnn_cell = rnn_cell 396 397 cell = contrib_rnn.MultiRNNCell( 398 [attn_rnn_cell() for _ in range(num_layers)], state_is_tuple=False) 399 _, encoding = contrib_rnn.static_rnn( 400 cell, 401 x, 402 dtype=dtypes.float32, 403 sequence_length=sequence_length, 404 initial_state=initial_state) 405 return target_predictor_fn(encoding, y) 406 407 return rnn_estimator 408