1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15 16"""Adagrad for TensorFlow.""" 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21from tensorflow.python.framework import ops 22from tensorflow.python.ops import array_ops 23from tensorflow.python.ops import gen_array_ops 24from tensorflow.python.ops import init_ops 25from tensorflow.python.ops import math_ops 26from tensorflow.python.training import optimizer 27from tensorflow.python.training import training_ops 28from tensorflow.python.util.tf_export import tf_export 29 30 31@tf_export(v1=["train.AdagradOptimizer"]) 32class AdagradOptimizer(optimizer.Optimizer): 33 """Optimizer that implements the Adagrad algorithm. 34 35 References: 36 Adaptive Subgradient Methods for Online Learning and Stochastic Optimization 37 :[Duchi et al., 2011](http://jmlr.org/papers/v12/duchi11a.html) 38 ([pdf](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)) 39 """ 40 41 def __init__(self, learning_rate, initial_accumulator_value=0.1, 42 use_locking=False, name="Adagrad"): 43 """Construct a new Adagrad optimizer. 44 45 Args: 46 learning_rate: A `Tensor` or a floating point value. The learning rate. 47 initial_accumulator_value: A floating point value. 48 Starting value for the accumulators, must be positive. 49 use_locking: If `True` use locks for update operations. 50 name: Optional name prefix for the operations created when applying 51 gradients. Defaults to "Adagrad". 52 53 Raises: 54 ValueError: If the `initial_accumulator_value` is invalid. 55 56 @compatibility(eager) 57 When eager execution is enabled, `learning_rate` can be a callable that 58 takes no arguments and returns the actual value to use. This can be useful 59 for changing these values across different invocations of optimizer 60 functions. 61 @end_compatibility 62 """ 63 if initial_accumulator_value <= 0.0: 64 raise ValueError("initial_accumulator_value must be positive: %s" % 65 initial_accumulator_value) 66 super(AdagradOptimizer, self).__init__(use_locking, name) 67 self._learning_rate = learning_rate 68 self._initial_accumulator_value = initial_accumulator_value 69 # Created in Initialize. 70 self._learning_rate_tensor = None 71 72 def _create_slots(self, var_list): 73 for v in var_list: 74 dtype = v.dtype.base_dtype 75 if v.get_shape().is_fully_defined(): 76 init = init_ops.constant_initializer(self._initial_accumulator_value, 77 dtype=dtype) 78 else: 79 init = self._init_constant_op(v, dtype) 80 self._get_or_make_slot_with_initializer(v, init, v.get_shape(), dtype, 81 "accumulator", self._name) 82 83 def _init_constant_op(self, v, dtype): 84 def init(): 85 # Use a Tensor instead of initializer if variable does not have 86 # static shape. 87 init_constant = gen_array_ops.fill(array_ops.shape(v), 88 self._initial_accumulator_value) 89 return math_ops.cast(init_constant, dtype) 90 return init 91 92 def _prepare(self): 93 learning_rate = self._call_if_callable(self._learning_rate) 94 self._learning_rate_tensor = ops.convert_to_tensor( 95 learning_rate, name="learning_rate") 96 97 def _apply_dense(self, grad, var): 98 acc = self.get_slot(var, "accumulator") 99 return training_ops.apply_adagrad( 100 var, 101 acc, 102 math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype), 103 grad, 104 use_locking=self._use_locking) 105 106 def _resource_apply_dense(self, grad, var): 107 acc = self.get_slot(var, "accumulator") 108 return training_ops.resource_apply_adagrad( 109 var.handle, 110 acc.handle, 111 math_ops.cast(self._learning_rate_tensor, grad.dtype.base_dtype), 112 grad, 113 use_locking=self._use_locking) 114 115 def _apply_sparse(self, grad, var): 116 acc = self.get_slot(var, "accumulator") 117 return training_ops.sparse_apply_adagrad( 118 var, 119 acc, 120 math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype), 121 grad.values, 122 grad.indices, 123 use_locking=self._use_locking) 124 125 def _resource_apply_sparse(self, grad, var, indices): 126 acc = self.get_slot(var, "accumulator") 127 return training_ops.resource_sparse_apply_adagrad( 128 var.handle, 129 acc.handle, 130 math_ops.cast(self._learning_rate_tensor, grad.dtype), 131 grad, 132 indices, 133 use_locking=self._use_locking) 134