• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15
16"""Adagrad for TensorFlow."""
17from tensorflow.python.framework import ops
18from tensorflow.python.ops import array_ops
19from tensorflow.python.ops import gen_array_ops
20from tensorflow.python.ops import init_ops
21from tensorflow.python.ops import math_ops
22from tensorflow.python.training import optimizer
23from tensorflow.python.training import training_ops
24from tensorflow.python.util.tf_export import tf_export
25
26
27@tf_export(v1=["train.AdagradOptimizer"])
28class AdagradOptimizer(optimizer.Optimizer):
29  """Optimizer that implements the Adagrad algorithm.
30
31  References:
32    Adaptive Subgradient Methods for Online Learning and Stochastic Optimization
33      :[Duchi et al., 2011](http://jmlr.org/papers/v12/duchi11a.html)
34      ([pdf](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf))
35
36  @compatibility(TF2)
37  tf.compat.v1.train.AdagradOptimizer is compatible with eager mode and
38  `tf.function`.
39  When eager execution is enabled, `learning_rate`,
40  `initial_accumulator_value`, and `epsilon` can each be a callable that
41  takes no arguments and returns the actual value to use. This can be useful
42  for changing these values across different invocations of optimizer
43  functions.
44
45  To switch to native TF2 style, use [`tf.keras.optimizers.Adagrad`]
46  (https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adagrad)
47  instead. Please notice that due to the implementation differences,
48  `tf.keras.optimizers.Adagrad` and
49  `tf.compat.v1.train.AdagradOptimizer` may have slight differences in
50  floating point numerics even though the formula used for the variable
51  updates still matches.
52
53  #### Structural mapping to native TF2
54
55  Before:
56
57  ```python
58  optimizer = tf.compat.v1.train.AdagradOptimizer(
59    learning_rate=learning_rate,
60    initial_accumulator_value=initial_accumulator_value)
61  ```
62
63  After:
64
65  ```python
66  optimizer = tf.keras.optimizers.Adagrad(
67    learning_rate=learning_rate,
68    initial_accumulator_value=initial_accumulator_value,
69    epsilon=1e-07)
70  ```
71
72  #### How to map arguments
73  | TF1 Arg Name       | TF2 Arg Name   | Note                             |
74  | ------------------ | -------------  | -------------------------------  |
75  | `learning_rate`    | `learning_rate` | Be careful of setting           |
76  : : : learning_rate tensor value computed from the global step.          :
77  : : : In TF1 this was usually meant to imply a dynamic learning rate and :
78  : : : would recompute in each step. In TF2 (eager + function) it will    :
79  : : : treat it as a scalar value that only gets computed once instead of :
80  : : : a symbolic placeholder to be computed each time.                   :
81  | `initial_accumulator_value` | `initial_accumulator_value` | The        |
82  : : : argument can be value of zero in TF2, which is not accepted in TF1.|
83  | - | `epsilon`      | `epsilon` is become configurable in TF2. The      |
84  : : : defualt value is changed from 1e-8 to 1e-7                         :
85  | `use_locking`      | -             | Not applicable in TF2.            |
86
87  #### Before & after usage example
88  Before:
89
90  ```python
91  x = tf.Variable([1,2,3], dtype=tf.float32)
92  grad = tf.constant([0.1, 0.2, 0.3])
93  optimizer = tf.compat.v1.train.AdagradOptimizer(learning_rate=0.001)
94  optimizer.apply_gradients(zip([grad], [x]))
95  ```
96
97  After:
98
99  ```python
100  x = tf.Variable([1,2,3], dtype=tf.float32)
101  grad = tf.constant([0.1, 0.2, 0.3])
102  optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.001)
103  optimizer.apply_gradients(zip([grad], [x]))
104  ```
105
106  @end_compatibility
107  """
108
109  def __init__(self, learning_rate, initial_accumulator_value=0.1,
110               use_locking=False, name="Adagrad"):
111    """Construct a new Adagrad optimizer.
112
113    Args:
114      learning_rate: A `Tensor` or a floating point value.  The learning rate.
115      initial_accumulator_value: A floating point value.
116        Starting value for the accumulators, must be positive.
117      use_locking: If `True` use locks for update operations.
118      name: Optional name prefix for the operations created when applying
119        gradients.  Defaults to "Adagrad".
120
121    Raises:
122      ValueError: If the `initial_accumulator_value` is invalid.
123
124    """
125    if initial_accumulator_value <= 0.0:
126      raise ValueError("initial_accumulator_value must be positive: %s" %
127                       initial_accumulator_value)
128    super(AdagradOptimizer, self).__init__(use_locking, name)
129    self._learning_rate = learning_rate
130    self._initial_accumulator_value = initial_accumulator_value
131    # Created in Initialize.
132    self._learning_rate_tensor = None
133
134  def _create_slots(self, var_list):
135    for v in var_list:
136      dtype = v.dtype.base_dtype
137      if v.get_shape().is_fully_defined():
138        init = init_ops.constant_initializer(self._initial_accumulator_value,
139                                             dtype=dtype)
140      else:
141        init = self._init_constant_op(v, dtype)
142      self._get_or_make_slot_with_initializer(v, init, v.get_shape(), dtype,
143                                              "accumulator", self._name)
144
145  def _init_constant_op(self, v, dtype):
146    def init():
147      # Use a Tensor instead of initializer if variable does not have
148      # static shape.
149      init_constant = gen_array_ops.fill(array_ops.shape(v),
150                                         self._initial_accumulator_value)
151      return math_ops.cast(init_constant, dtype)
152    return init
153
154  def _prepare(self):
155    learning_rate = self._call_if_callable(self._learning_rate)
156    self._learning_rate_tensor = ops.convert_to_tensor(
157        learning_rate, name="learning_rate")
158
159  def _apply_dense(self, grad, var):
160    acc = self.get_slot(var, "accumulator")
161    return training_ops.apply_adagrad(
162        var,
163        acc,
164        math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype),
165        grad,
166        use_locking=self._use_locking)
167
168  def _resource_apply_dense(self, grad, var):
169    acc = self.get_slot(var, "accumulator")
170    return training_ops.resource_apply_adagrad(
171        var.handle,
172        acc.handle,
173        math_ops.cast(self._learning_rate_tensor, grad.dtype.base_dtype),
174        grad,
175        use_locking=self._use_locking)
176
177  def _apply_sparse(self, grad, var):
178    acc = self.get_slot(var, "accumulator")
179    return training_ops.sparse_apply_adagrad(
180        var,
181        acc,
182        math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype),
183        grad.values,
184        grad.indices,
185        use_locking=self._use_locking)
186
187  def _resource_apply_sparse(self, grad, var, indices):
188    acc = self.get_slot(var, "accumulator")
189    return training_ops.resource_sparse_apply_adagrad(
190        var.handle,
191        acc.handle,
192        math_ops.cast(self._learning_rate_tensor, grad.dtype),
193        grad,
194        indices,
195        use_locking=self._use_locking)
196