1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15 16"""ProximalAdagrad for TensorFlow.""" 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21from tensorflow.python.framework import constant_op 22from tensorflow.python.framework import ops 23from tensorflow.python.ops import math_ops 24from tensorflow.python.training import optimizer 25from tensorflow.python.training import training_ops 26from tensorflow.python.util.tf_export import tf_export 27 28 29@tf_export(v1=["train.ProximalAdagradOptimizer"]) 30class ProximalAdagradOptimizer(optimizer.Optimizer): 31 # pylint: disable=line-too-long 32 """Optimizer that implements the Proximal Adagrad algorithm. 33 34 See this [paper](http://papers.nips.cc/paper/3793-efficient-learning-using-forward-backward-splitting.pdf). 35 """ 36 37 def __init__(self, learning_rate, initial_accumulator_value=0.1, 38 l1_regularization_strength=0.0, l2_regularization_strength=0.0, 39 use_locking=False, name="ProximalAdagrad"): 40 """Construct a new ProximalAdagrad optimizer. 41 42 Args: 43 learning_rate: A `Tensor` or a floating point value. The learning rate. 44 initial_accumulator_value: A floating point value. 45 Starting value for the accumulators, must be positive. 46 l1_regularization_strength: A float value, must be greater than or 47 equal to zero. 48 l2_regularization_strength: A float value, must be greater than or 49 equal to zero. 50 use_locking: If `True` use locks for update operations. 51 name: Optional name prefix for the operations created when applying 52 gradients. Defaults to "Adagrad". 53 54 Raises: 55 ValueError: If the `initial_accumulator_value` is invalid. 56 """ 57 if initial_accumulator_value <= 0.0: 58 raise ValueError("initial_accumulator_value must be positive: %s" % 59 initial_accumulator_value) 60 super(ProximalAdagradOptimizer, self).__init__(use_locking, name) 61 self._learning_rate = learning_rate 62 self._initial_accumulator_value = initial_accumulator_value 63 self._l1_regularization_strength = l1_regularization_strength 64 self._l2_regularization_strength = l2_regularization_strength 65 # Created in Initialize. 66 self._l1_regularization_strength_tensor = None 67 self._l2_regularization_strength_tensor = None 68 self._learning_rate_tensor = None 69 70 def _create_slots(self, var_list): 71 for v in var_list: 72 with ops.colocate_with(v): 73 val = constant_op.constant(self._initial_accumulator_value, 74 shape=v.get_shape(), 75 dtype=v.dtype.base_dtype) 76 self._get_or_make_slot(v, val, "accumulator", self._name) 77 78 def _prepare(self): 79 self._learning_rate_tensor = ops.convert_to_tensor(self._learning_rate, 80 name="learning_rate") 81 self._l1_regularization_strength_tensor = ops.convert_to_tensor( 82 self._l1_regularization_strength, 83 name="l1_regularization_strength") 84 self._l2_regularization_strength_tensor = ops.convert_to_tensor( 85 self._l2_regularization_strength, 86 name="l2_regularization_strength") 87 88 def _apply_dense(self, grad, var): 89 acc = self.get_slot(var, "accumulator") 90 return training_ops.apply_proximal_adagrad( 91 var, acc, self._learning_rate_tensor, 92 self._l1_regularization_strength_tensor, 93 self._l2_regularization_strength_tensor, 94 grad, use_locking=self._use_locking) 95 96 def _resource_apply_dense(self, grad, var): 97 acc = self.get_slot(var, "accumulator") 98 return training_ops.resource_apply_proximal_adagrad( 99 var.handle, acc.handle, self._learning_rate_tensor, 100 self._l1_regularization_strength_tensor, 101 self._l2_regularization_strength_tensor, 102 grad, use_locking=self._use_locking) 103 104 def _apply_sparse(self, grad, var): 105 acc = self.get_slot(var, "accumulator") 106 return training_ops.sparse_apply_proximal_adagrad( 107 var, acc, self._learning_rate_tensor, 108 self._l1_regularization_strength_tensor, 109 self._l2_regularization_strength_tensor, 110 grad.values, grad.indices, 111 use_locking=self._use_locking) 112 113 def _resource_apply_sparse(self, grad, var, indices): 114 acc = self.get_slot(var, "accumulator") 115 return training_ops.resource_sparse_apply_proximal_adagrad( 116 var.handle, acc.handle, 117 math_ops.cast(self._learning_rate_tensor, grad.dtype), 118 math_ops.cast(self._l1_regularization_strength_tensor, grad.dtype), 119 math_ops.cast(self._l2_regularization_strength_tensor, grad.dtype), 120 grad, indices, 121 use_locking=self._use_locking) 122