• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Various high level TF models (deprecated).
16
17This module and all its submodules are deprecated. See
18[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md)
19for migration instructions.
20"""
21
22from __future__ import absolute_import
23from __future__ import division
24from __future__ import print_function
25
26import functools
27
28from tensorflow.contrib import rnn as contrib_rnn
29from tensorflow.contrib.learn.python.learn.ops import losses_ops
30from tensorflow.python.framework import dtypes
31from tensorflow.python.framework import ops
32from tensorflow.python.ops import array_ops as array_ops_
33from tensorflow.python.ops import init_ops
34from tensorflow.python.ops import variable_scope as vs
35from tensorflow.python.summary import summary
36from tensorflow.python.util.deprecation import deprecated
37
38
39@deprecated(None, 'Consider using a tf.estimator.LinearRegressor')
40def linear_regression_zero_init(x, y):
41  """Linear regression subgraph with zero-value initial weights and bias.
42
43  Args:
44    x: tensor or placeholder for input features.
45    y: tensor or placeholder for labels.
46
47  Returns:
48    Predictions and loss tensors.
49  """
50  return linear_regression(x, y, init_mean=0.0, init_stddev=0.0)
51
52
53@deprecated(None, 'Consider using a class from tf.estimator.LinearClassifier')
54def logistic_regression_zero_init(x, y):
55  """Logistic regression subgraph with zero-value initial weights and bias.
56
57  Args:
58    x: tensor or placeholder for input features.
59    y: tensor or placeholder for labels.
60
61  Returns:
62    Predictions and loss tensors.
63  """
64  return logistic_regression(x, y, init_mean=0.0, init_stddev=0.0)
65
66
67@deprecated(None, 'Consider using a class from tf.estimator.')
68def linear_regression(x, y, init_mean=None, init_stddev=1.0):
69  """Creates linear regression TensorFlow subgraph.
70
71  Args:
72    x: tensor or placeholder for input features.
73    y: tensor or placeholder for labels.
74    init_mean: the mean value to use for initialization.
75    init_stddev: the standard deviation to use for initialization.
76
77  Returns:
78    Predictions and loss tensors.
79
80  Side effects:
81    The variables linear_regression.weights and linear_regression.bias are
82    initialized as follows.  If init_mean is not None, then initialization
83    will be done using a random normal initializer with the given init_mean
84    and init_stddv.  (These may be set to 0.0 each if a zero initialization
85    is desirable for convex use cases.)  If init_mean is None, then the
86    uniform_unit_scaling_initialzer will be used.
87  """
88  with vs.variable_scope('linear_regression'):
89    scope_name = vs.get_variable_scope().name
90    summary.histogram('%s.x' % scope_name, x)
91    summary.histogram('%s.y' % scope_name, y)
92    dtype = x.dtype.base_dtype
93    y_shape = y.get_shape()
94    if len(y_shape) == 1:
95      output_shape = 1
96    else:
97      output_shape = y_shape[1]
98    # Set up the requested initialization.
99    if init_mean is None:
100      weights = vs.get_variable(
101          'weights', [x.get_shape()[1], output_shape], dtype=dtype)
102      bias = vs.get_variable('bias', [output_shape], dtype=dtype)
103    else:
104      weights = vs.get_variable(
105          'weights', [x.get_shape()[1], output_shape],
106          initializer=init_ops.random_normal_initializer(
107              init_mean, init_stddev, dtype=dtype),
108          dtype=dtype)
109      bias = vs.get_variable(
110          'bias', [output_shape],
111          initializer=init_ops.random_normal_initializer(
112              init_mean, init_stddev, dtype=dtype),
113          dtype=dtype)
114    summary.histogram('%s.weights' % scope_name, weights)
115    summary.histogram('%s.bias' % scope_name, bias)
116    return losses_ops.mean_squared_error_regressor(x, y, weights, bias)
117
118
119@deprecated(None, 'Consider using a class from tf.estimator.')
120def logistic_regression(x,
121                        y,
122                        class_weight=None,
123                        init_mean=None,
124                        init_stddev=1.0):
125  """Creates logistic regression TensorFlow subgraph.
126
127  Args:
128    x: tensor or placeholder for input features,
129       shape should be [batch_size, n_features].
130    y: tensor or placeholder for labels (one-hot),
131       shape should be [batch_size, n_classes].
132    class_weight: tensor, [n_classes], where for each class
133                  it has weight of the class. If not provided
134                  will check if graph contains tensor `class_weight:0`.
135                  If that is not provided either all ones are used.
136    init_mean: the mean value to use for initialization.
137    init_stddev: the standard deviation to use for initialization.
138
139  Returns:
140    Predictions and loss tensors.
141
142  Side effects:
143    The variables linear_regression.weights and linear_regression.bias are
144    initialized as follows.  If init_mean is not None, then initialization
145    will be done using a random normal initializer with the given init_mean
146    and init_stddv.  (These may be set to 0.0 each if a zero initialization
147    is desirable for convex use cases.)  If init_mean is None, then the
148    uniform_unit_scaling_initialzer will be used.
149  """
150  with vs.variable_scope('logistic_regression'):
151    scope_name = vs.get_variable_scope().name
152    summary.histogram('%s.x' % scope_name, x)
153    summary.histogram('%s.y' % scope_name, y)
154    dtype = x.dtype.base_dtype
155    # Set up the requested initialization.
156    if init_mean is None:
157      weights = vs.get_variable(
158          'weights', [x.get_shape()[1], y.get_shape()[-1]], dtype=dtype)
159      bias = vs.get_variable('bias', [y.get_shape()[-1]], dtype=dtype)
160    else:
161      weights = vs.get_variable(
162          'weights', [x.get_shape()[1], y.get_shape()[-1]],
163          initializer=init_ops.random_normal_initializer(
164              init_mean, init_stddev, dtype=dtype),
165          dtype=dtype)
166      bias = vs.get_variable(
167          'bias', [y.get_shape()[-1]],
168          initializer=init_ops.random_normal_initializer(
169              init_mean, init_stddev, dtype=dtype),
170          dtype=dtype)
171    summary.histogram('%s.weights' % scope_name, weights)
172    summary.histogram('%s.bias' % scope_name, bias)
173    # If no class weight provided, try to retrieve one from pre-defined
174    # tensor name in the graph.
175    if not class_weight:
176      try:
177        class_weight = ops.get_default_graph().get_tensor_by_name(
178            'class_weight:0')
179      except KeyError:
180        pass
181
182    return losses_ops.softmax_classifier(
183        x, y, weights, bias, class_weight=class_weight)
184
185
186## This will be in TensorFlow 0.7.
187## TODO(ilblackdragon): Clean this up when it's released
188def _reverse_seq(input_seq, lengths):
189  """Reverse a list of Tensors up to specified lengths.
190
191  Args:
192    input_seq: Sequence of seq_len tensors of dimension (batch_size, depth)
193    lengths:   A tensor of dimension batch_size, containing lengths for each
194               sequence in the batch. If "None" is specified, simply
195               reverses the list.
196
197  Returns:
198    time-reversed sequence
199  """
200  if lengths is None:
201    return list(reversed(input_seq))
202
203  for input_ in input_seq:
204    input_.set_shape(input_.get_shape().with_rank(2))
205
206  # Join into (time, batch_size, depth)
207  s_joined = array_ops_.pack(input_seq)
208
209  # Reverse along dimension 0
210  s_reversed = array_ops_.reverse_sequence(s_joined, lengths, 0, 1)
211  # Split again into list
212  result = array_ops_.unpack(s_reversed)
213  return result
214
215
216@deprecated(None, 'Please consider `tf.nn.bidirectional_dynamic_rnn`.')
217def bidirectional_rnn(cell_fw,
218                      cell_bw,
219                      inputs,
220                      initial_state_fw=None,
221                      initial_state_bw=None,
222                      dtype=None,
223                      sequence_length=None,
224                      scope=None):
225  """Creates a bidirectional recurrent neural network.
226
227  Similar to the unidirectional case (rnn) but takes input and builds
228  independent forward and backward RNNs with the final forward and backward
229  outputs depth-concatenated, such that the output will have the format
230  [time][batch][cell_fw.output_size + cell_bw.output_size]. The input_size of
231  forward and backward cell must match. The initial state for both directions
232  is zero by default (but can be set optionally) and no intermediate states
233  are ever returned -- the network is fully unrolled for the given (passed in)
234  length(s) of the sequence(s) or completely unrolled if length(s) is not
235  given.
236  Args:
237    cell_fw: An instance of RNNCell, to be used for forward direction.
238    cell_bw: An instance of RNNCell, to be used for backward direction.
239    inputs: A length T list of inputs, each a tensor of shape
240      [batch_size, cell.input_size].
241    initial_state_fw: (optional) An initial state for the forward RNN.
242      This must be a tensor of appropriate type and shape
243      [batch_size x cell.state_size].
244    initial_state_bw: (optional) Same as for initial_state_fw.
245    dtype: (optional) The data type for the initial state.  Required if
246      either of the initial states are not provided.
247    sequence_length: (optional) An int64 vector (tensor) of size
248      [batch_size],
249      containing the actual lengths for each of the sequences.
250    scope: VariableScope for the created subgraph; defaults to "BiRNN"
251
252  Returns:
253    A pair (outputs, state) where:
254      outputs is a length T list of outputs (one for each input), which
255      are depth-concatenated forward and backward outputs
256      state is the concatenated final state of the forward and backward RNN
257
258  Raises:
259    TypeError: If "cell_fw" or "cell_bw" is not an instance of RNNCell.
260    ValueError: If inputs is None or an empty list.
261  """
262
263  if not isinstance(cell_fw, contrib_rnn.RNNCell):
264    raise TypeError('cell_fw must be an instance of RNNCell')
265  if not isinstance(cell_bw, contrib_rnn.RNNCell):
266    raise TypeError('cell_bw must be an instance of RNNCell')
267  if not isinstance(inputs, list):
268    raise TypeError('inputs must be a list')
269  if not inputs:
270    raise ValueError('inputs must not be empty')
271
272  name = scope or 'BiRNN'
273  # Forward direction
274  with vs.variable_scope(name + '_FW'):
275    output_fw, state_fw = contrib_rnn.static_rnn(cell_fw, inputs,
276                                                 initial_state_fw, dtype,
277                                                 sequence_length)
278
279  # Backward direction
280  with vs.variable_scope(name + '_BW'):
281    tmp, state_bw = contrib_rnn.static_rnn(
282        cell_bw,
283        _reverse_seq(inputs, sequence_length), initial_state_bw, dtype,
284        sequence_length)
285  output_bw = _reverse_seq(tmp, sequence_length)
286  # Concat each of the forward/backward outputs
287  outputs = [
288      array_ops_.concat([fw, bw], 1) for fw, bw in zip(output_fw, output_bw)
289  ]
290
291  return outputs, array_ops_.concat([state_fw, state_bw], 1)
292
293
294# End of TensorFlow 0.7
295
296
297@deprecated(None, 'Please consider tensorflow/tensor2tensor.')
298def get_rnn_model(rnn_size, cell_type, num_layers, input_op_fn, bidirectional,
299                  target_predictor_fn, sequence_length, initial_state,
300                  attn_length, attn_size, attn_vec_size):
301  """Returns a function that creates a RNN TensorFlow subgraph.
302
303  Args:
304    rnn_size: The size for rnn cell, e.g. size of your word embeddings.
305    cell_type: The type of rnn cell, including rnn, gru, and lstm.
306    num_layers: The number of layers of the rnn model.
307    input_op_fn: Function that will transform the input tensor, such as
308                 creating word embeddings, byte list, etc. This takes
309                 an argument `x` for input and returns transformed `x`.
310    bidirectional: boolean, Whether this is a bidirectional rnn.
311    target_predictor_fn: Function that will predict target from input
312                         features. This can be logistic regression,
313                         linear regression or any other model,
314                         that takes `x`, `y` and returns predictions and loss
315                         tensors.
316    sequence_length: If sequence_length is provided, dynamic calculation is
317      performed. This saves computational time when unrolling past max sequence
318      length. Required for bidirectional RNNs.
319    initial_state: An initial state for the RNN. This must be a tensor of
320      appropriate type and shape [batch_size x cell.state_size].
321    attn_length: integer, the size of attention vector attached to rnn cells.
322    attn_size: integer, the size of an attention window attached to rnn cells.
323    attn_vec_size: integer, the number of convolutional features calculated on
324      attention state and the size of the hidden layer built from base cell
325      state.
326
327  Returns:
328    A function that creates the subgraph.
329  """
330
331  def rnn_estimator(x, y):
332    """RNN estimator with target predictor function on top."""
333    x = input_op_fn(x)
334    if cell_type == 'rnn':
335      cell_fn = contrib_rnn.BasicRNNCell
336    elif cell_type == 'gru':
337      cell_fn = contrib_rnn.GRUCell
338    elif cell_type == 'lstm':
339      cell_fn = functools.partial(
340          contrib_rnn.BasicLSTMCell, state_is_tuple=False)
341    else:
342      raise ValueError('cell_type {} is not supported. '.format(cell_type))
343    # TODO(ipolosukhin): state_is_tuple=False is deprecated
344    if bidirectional:
345      # forward direction cell
346      fw_cell = lambda: cell_fn(rnn_size)
347      bw_cell = lambda: cell_fn(rnn_size)
348      # attach attention cells if specified
349      if attn_length is not None:
350        def attn_fw_cell():
351          return contrib_rnn.AttentionCellWrapper(
352              fw_cell(),
353              attn_length=attn_length,
354              attn_size=attn_size,
355              attn_vec_size=attn_vec_size,
356              state_is_tuple=False)
357
358        def attn_bw_cell():
359          return contrib_rnn.AttentionCellWrapper(
360              bw_cell(),
361              attn_length=attn_length,
362              attn_size=attn_size,
363              attn_vec_size=attn_vec_size,
364              state_is_tuple=False)
365      else:
366        attn_fw_cell = fw_cell
367        attn_bw_cell = bw_cell
368
369      rnn_fw_cell = contrib_rnn.MultiRNNCell(
370          [attn_fw_cell() for _ in range(num_layers)], state_is_tuple=False)
371      # backward direction cell
372      rnn_bw_cell = contrib_rnn.MultiRNNCell(
373          [attn_bw_cell() for _ in range(num_layers)], state_is_tuple=False)
374      # pylint: disable=unexpected-keyword-arg, no-value-for-parameter
375      _, encoding = bidirectional_rnn(
376          rnn_fw_cell,
377          rnn_bw_cell,
378          x,
379          dtype=dtypes.float32,
380          sequence_length=sequence_length,
381          initial_state_fw=initial_state,
382          initial_state_bw=initial_state)
383    else:
384      rnn_cell = lambda: cell_fn(rnn_size)
385
386      if attn_length is not None:
387        def attn_rnn_cell():
388          return contrib_rnn.AttentionCellWrapper(
389              rnn_cell(),
390              attn_length=attn_length,
391              attn_size=attn_size,
392              attn_vec_size=attn_vec_size,
393              state_is_tuple=False)
394      else:
395        attn_rnn_cell = rnn_cell
396
397      cell = contrib_rnn.MultiRNNCell(
398          [attn_rnn_cell() for _ in range(num_layers)], state_is_tuple=False)
399      _, encoding = contrib_rnn.static_rnn(
400          cell,
401          x,
402          dtype=dtypes.float32,
403          sequence_length=sequence_length,
404          initial_state=initial_state)
405    return target_predictor_fn(encoding, y)
406
407  return rnn_estimator
408