• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15
16"""ProximalAdagrad for TensorFlow."""
17from tensorflow.python.framework import constant_op
18from tensorflow.python.framework import ops
19from tensorflow.python.ops import math_ops
20from tensorflow.python.training import optimizer
21from tensorflow.python.training import training_ops
22from tensorflow.python.util.tf_export import tf_export
23
24
25@tf_export(v1=["train.ProximalAdagradOptimizer"])
26class ProximalAdagradOptimizer(optimizer.Optimizer):
27  # pylint: disable=line-too-long
28  """Optimizer that implements the Proximal Adagrad algorithm.
29
30  References:
31    Adaptive Subgradient Methods for Online Learning and Stochastic Optimization:
32      [Duchi et al., 2011](http://jmlr.org/papers/v12/duchi11a.html)
33      ([pdf](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf))
34    Efficient Learning using Forward-Backward Splitting:
35      [Duchi et al., 2009](http://papers.nips.cc/paper/3793-efficient-learning-using-forward-backward-splitting)
36      ([pdf](http://papers.nips.cc/paper/3793-efficient-learning-using-forward-backward-splitting.pdf))
37  """
38
39  def __init__(self, learning_rate, initial_accumulator_value=0.1,
40               l1_regularization_strength=0.0, l2_regularization_strength=0.0,
41               use_locking=False, name="ProximalAdagrad"):
42    """Construct a new ProximalAdagrad optimizer.
43
44    Args:
45      learning_rate: A `Tensor` or a floating point value.  The learning rate.
46      initial_accumulator_value: A floating point value.
47        Starting value for the accumulators, must be positive.
48      l1_regularization_strength: A float value, must be greater than or
49        equal to zero.
50      l2_regularization_strength: A float value, must be greater than or
51        equal to zero.
52      use_locking: If `True` use locks for update operations.
53      name: Optional name prefix for the operations created when applying
54        gradients.  Defaults to "Adagrad".
55
56    Raises:
57      ValueError: If the `initial_accumulator_value` is invalid.
58    """
59    if initial_accumulator_value <= 0.0:
60      raise ValueError("initial_accumulator_value must be positive: %s" %
61                       initial_accumulator_value)
62    super(ProximalAdagradOptimizer, self).__init__(use_locking, name)
63    self._learning_rate = learning_rate
64    self._initial_accumulator_value = initial_accumulator_value
65    self._l1_regularization_strength = l1_regularization_strength
66    self._l2_regularization_strength = l2_regularization_strength
67    # Created in Initialize.
68    self._l1_regularization_strength_tensor = None
69    self._l2_regularization_strength_tensor = None
70    self._learning_rate_tensor = None
71
72  def _create_slots(self, var_list):
73    for v in var_list:
74      with ops.colocate_with(v):
75        val = constant_op.constant(self._initial_accumulator_value,
76                                   shape=v.get_shape(),
77                                   dtype=v.dtype.base_dtype)
78      self._get_or_make_slot(v, val, "accumulator", self._name)
79
80  def _prepare(self):
81    self._learning_rate_tensor = ops.convert_to_tensor(self._learning_rate,
82                                                       name="learning_rate")
83    self._l1_regularization_strength_tensor = ops.convert_to_tensor(
84        self._l1_regularization_strength,
85        name="l1_regularization_strength")
86    self._l2_regularization_strength_tensor = ops.convert_to_tensor(
87        self._l2_regularization_strength,
88        name="l2_regularization_strength")
89
90  def _apply_dense(self, grad, var):
91    acc = self.get_slot(var, "accumulator")
92    return training_ops.apply_proximal_adagrad(
93        var, acc, self._learning_rate_tensor,
94        self._l1_regularization_strength_tensor,
95        self._l2_regularization_strength_tensor,
96        grad, use_locking=self._use_locking)
97
98  def _resource_apply_dense(self, grad, var):
99    acc = self.get_slot(var, "accumulator")
100    return training_ops.resource_apply_proximal_adagrad(
101        var.handle, acc.handle, self._learning_rate_tensor,
102        self._l1_regularization_strength_tensor,
103        self._l2_regularization_strength_tensor,
104        grad, use_locking=self._use_locking)
105
106  def _apply_sparse(self, grad, var):
107    acc = self.get_slot(var, "accumulator")
108    return training_ops.sparse_apply_proximal_adagrad(
109        var, acc, self._learning_rate_tensor,
110        self._l1_regularization_strength_tensor,
111        self._l2_regularization_strength_tensor,
112        grad.values, grad.indices,
113        use_locking=self._use_locking)
114
115  def _resource_apply_sparse(self, grad, var, indices):
116    acc = self.get_slot(var, "accumulator")
117    return training_ops.resource_sparse_apply_proximal_adagrad(
118        var.handle, acc.handle,
119        math_ops.cast(self._learning_rate_tensor, grad.dtype),
120        math_ops.cast(self._l1_regularization_strength_tensor, grad.dtype),
121        math_ops.cast(self._l2_regularization_strength_tensor, grad.dtype),
122        grad, indices,
123        use_locking=self._use_locking)
124