• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15
16"""ProximalAdagrad for TensorFlow."""
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21from tensorflow.python.framework import constant_op
22from tensorflow.python.framework import ops
23from tensorflow.python.ops import math_ops
24from tensorflow.python.training import optimizer
25from tensorflow.python.training import training_ops
26from tensorflow.python.util.tf_export import tf_export
27
28
29@tf_export(v1=["train.ProximalAdagradOptimizer"])
30class ProximalAdagradOptimizer(optimizer.Optimizer):
31  # pylint: disable=line-too-long
32  """Optimizer that implements the Proximal Adagrad algorithm.
33
34  See this [paper](http://papers.nips.cc/paper/3793-efficient-learning-using-forward-backward-splitting.pdf).
35  """
36
37  def __init__(self, learning_rate, initial_accumulator_value=0.1,
38               l1_regularization_strength=0.0, l2_regularization_strength=0.0,
39               use_locking=False, name="ProximalAdagrad"):
40    """Construct a new ProximalAdagrad optimizer.
41
42    Args:
43      learning_rate: A `Tensor` or a floating point value.  The learning rate.
44      initial_accumulator_value: A floating point value.
45        Starting value for the accumulators, must be positive.
46      l1_regularization_strength: A float value, must be greater than or
47        equal to zero.
48      l2_regularization_strength: A float value, must be greater than or
49        equal to zero.
50      use_locking: If `True` use locks for update operations.
51      name: Optional name prefix for the operations created when applying
52        gradients.  Defaults to "Adagrad".
53
54    Raises:
55      ValueError: If the `initial_accumulator_value` is invalid.
56    """
57    if initial_accumulator_value <= 0.0:
58      raise ValueError("initial_accumulator_value must be positive: %s" %
59                       initial_accumulator_value)
60    super(ProximalAdagradOptimizer, self).__init__(use_locking, name)
61    self._learning_rate = learning_rate
62    self._initial_accumulator_value = initial_accumulator_value
63    self._l1_regularization_strength = l1_regularization_strength
64    self._l2_regularization_strength = l2_regularization_strength
65    # Created in Initialize.
66    self._l1_regularization_strength_tensor = None
67    self._l2_regularization_strength_tensor = None
68    self._learning_rate_tensor = None
69
70  def _create_slots(self, var_list):
71    for v in var_list:
72      with ops.colocate_with(v):
73        val = constant_op.constant(self._initial_accumulator_value,
74                                   shape=v.get_shape(),
75                                   dtype=v.dtype.base_dtype)
76      self._get_or_make_slot(v, val, "accumulator", self._name)
77
78  def _prepare(self):
79    self._learning_rate_tensor = ops.convert_to_tensor(self._learning_rate,
80                                                       name="learning_rate")
81    self._l1_regularization_strength_tensor = ops.convert_to_tensor(
82        self._l1_regularization_strength,
83        name="l1_regularization_strength")
84    self._l2_regularization_strength_tensor = ops.convert_to_tensor(
85        self._l2_regularization_strength,
86        name="l2_regularization_strength")
87
88  def _apply_dense(self, grad, var):
89    acc = self.get_slot(var, "accumulator")
90    return training_ops.apply_proximal_adagrad(
91        var, acc, self._learning_rate_tensor,
92        self._l1_regularization_strength_tensor,
93        self._l2_regularization_strength_tensor,
94        grad, use_locking=self._use_locking)
95
96  def _resource_apply_dense(self, grad, var):
97    acc = self.get_slot(var, "accumulator")
98    return training_ops.resource_apply_proximal_adagrad(
99        var.handle, acc.handle, self._learning_rate_tensor,
100        self._l1_regularization_strength_tensor,
101        self._l2_regularization_strength_tensor,
102        grad, use_locking=self._use_locking)
103
104  def _apply_sparse(self, grad, var):
105    acc = self.get_slot(var, "accumulator")
106    return training_ops.sparse_apply_proximal_adagrad(
107        var, acc, self._learning_rate_tensor,
108        self._l1_regularization_strength_tensor,
109        self._l2_regularization_strength_tensor,
110        grad.values, grad.indices,
111        use_locking=self._use_locking)
112
113  def _resource_apply_sparse(self, grad, var, indices):
114    acc = self.get_slot(var, "accumulator")
115    return training_ops.resource_sparse_apply_proximal_adagrad(
116        var.handle, acc.handle,
117        math_ops.cast(self._learning_rate_tensor, grad.dtype),
118        math_ops.cast(self._l1_regularization_strength_tensor, grad.dtype),
119        math_ops.cast(self._l2_regularization_strength_tensor, grad.dtype),
120        grad, indices,
121        use_locking=self._use_locking)
122