1# Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Tests for Ftrl optimizer.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import numpy as np 22 23from tensorflow.compiler.tests.xla_test import XLATestCase 24from tensorflow.python.framework import constant_op 25from tensorflow.python.ops import resource_variable_ops 26from tensorflow.python.ops import variables 27from tensorflow.python.platform import test 28from tensorflow.python.training import adagrad 29from tensorflow.python.training import ftrl 30from tensorflow.python.training import gradient_descent 31 32 33class FtrlOptimizerTest(XLATestCase): 34 35 def initVariableAndGradient(self, dtype): 36 var0 = resource_variable_ops.ResourceVariable([0.0, 0.0], dtype=dtype) 37 var1 = resource_variable_ops.ResourceVariable([0.0, 0.0], dtype=dtype) 38 grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) 39 grads1 = constant_op.constant([0.02, 0.04], dtype=dtype) 40 41 return var0, var1, grads0, grads1 42 43 def equivAdagradTest_FtrlPart(self, steps, dtype): 44 var0, var1, grads0, grads1 = self.initVariableAndGradient(dtype) 45 opt = ftrl.FtrlOptimizer( 46 3.0, 47 learning_rate_power=-0.5, # using Adagrad learning rate 48 initial_accumulator_value=0.1, 49 l1_regularization_strength=0.0, 50 l2_regularization_strength=0.0) 51 ftrl_update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) 52 variables.global_variables_initializer().run() 53 # Fetch params to validate initial values 54 self.assertAllClose([0.0, 0.0], var0.eval()) 55 self.assertAllClose([0.0, 0.0], var1.eval()) 56 57 # Run Ftrl for a few steps 58 for _ in range(steps): 59 ftrl_update.run() 60 61 return var0.eval(), var1.eval() 62 63 def equivAdagradTest_AdagradPart(self, steps, dtype): 64 var0, var1, grads0, grads1 = self.initVariableAndGradient(dtype) 65 opt = adagrad.AdagradOptimizer(3.0, initial_accumulator_value=0.1) 66 adagrad_update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) 67 variables.global_variables_initializer().run() 68 # Fetch params to validate initial values 69 self.assertAllClose([0.0, 0.0], var0.eval()) 70 self.assertAllClose([0.0, 0.0], var1.eval()) 71 72 # Run Adagrad for a few steps 73 for _ in range(steps): 74 adagrad_update.run() 75 76 return var0.eval(), var1.eval() 77 78 def equivGradientDescentTest_FtrlPart(self, steps, dtype): 79 var0, var1, grads0, grads1 = self.initVariableAndGradient(dtype) 80 opt = ftrl.FtrlOptimizer( 81 3.0, 82 learning_rate_power=-0.0, # using Fixed learning rate 83 initial_accumulator_value=0.1, 84 l1_regularization_strength=0.0, 85 l2_regularization_strength=0.0) 86 ftrl_update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) 87 variables.global_variables_initializer().run() 88 # Fetch params to validate initial values 89 self.assertAllClose([0.0, 0.0], var0.eval()) 90 self.assertAllClose([0.0, 0.0], var1.eval()) 91 92 # Run Ftrl for a few steps 93 for _ in range(steps): 94 ftrl_update.run() 95 96 return var0.eval(), var1.eval() 97 98 def equivGradientDescentTest_GradientDescentPart(self, steps, dtype): 99 var0, var1, grads0, grads1 = self.initVariableAndGradient(dtype) 100 opt = gradient_descent.GradientDescentOptimizer(3.0, name="sgd") 101 sgd_update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) 102 variables.global_variables_initializer().run() 103 # Fetch params to validate initial values 104 self.assertAllClose([0.0, 0.0], var0.eval()) 105 self.assertAllClose([0.0, 0.0], var1.eval()) 106 107 # Run GradientDescent for a few steps 108 for _ in range(steps): 109 sgd_update.run() 110 111 return var0.eval(), var1.eval() 112 113 def testFtrlwithoutRegularization(self): 114 for dtype in self.float_types: 115 with self.test_session(), self.test_scope(): 116 var0 = resource_variable_ops.ResourceVariable([0.0, 0.0], dtype=dtype) 117 var1 = resource_variable_ops.ResourceVariable([0.0, 0.0], dtype=dtype) 118 grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) 119 grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) 120 opt = ftrl.FtrlOptimizer( 121 3.0, 122 initial_accumulator_value=0.1, 123 l1_regularization_strength=0.0, 124 l2_regularization_strength=0.0) 125 ftrl_update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) 126 variables.global_variables_initializer().run() 127 # Fetch params to validate initial values 128 self.assertAllClose([0.0, 0.0], var0.eval()) 129 self.assertAllClose([0.0, 0.0], var1.eval()) 130 131 # Run 3 steps FTRL 132 for _ in range(3): 133 ftrl_update.run() 134 135 # Validate updated params 136 self.assertAllCloseAccordingToType( 137 np.array([-2.60260963, -4.29698515]), var0.eval(), float_rtol=1e-5) 138 self.assertAllCloseAccordingToType( 139 np.array([-0.28432083, -0.56694895]), var1.eval(), float_rtol=1e-5) 140 141 def testFtrlwithoutRegularization2(self): 142 for dtype in self.float_types: 143 with self.test_session(), self.test_scope(): 144 var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) 145 var1 = resource_variable_ops.ResourceVariable([4.0, 3.0], dtype=dtype) 146 grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) 147 grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) 148 opt = ftrl.FtrlOptimizer( 149 3.0, 150 initial_accumulator_value=0.1, 151 l1_regularization_strength=0.0, 152 l2_regularization_strength=0.0) 153 ftrl_update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) 154 variables.global_variables_initializer().run() 155 # Fetch params to validate initial values 156 self.assertAllClose([1.0, 2.0], var0.eval()) 157 self.assertAllClose([4.0, 3.0], var1.eval()) 158 159 # Run 3 steps FTRL 160 for _ in range(3): 161 ftrl_update.run() 162 163 # Validate updated params 164 self.assertAllCloseAccordingToType( 165 np.array([-2.55607247, -3.98729396]), var0.eval(), 1e-5, 1e-5) 166 self.assertAllCloseAccordingToType( 167 np.array([-0.28232238, -0.56096673]), var1.eval(), 1e-5, 1e-5) 168 169 def testFtrlWithL1(self): 170 for dtype in self.float_types: 171 with self.test_session(), self.test_scope(): 172 var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) 173 var1 = resource_variable_ops.ResourceVariable([4.0, 3.0], dtype=dtype) 174 grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) 175 grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) 176 opt = ftrl.FtrlOptimizer( 177 3.0, 178 initial_accumulator_value=0.1, 179 l1_regularization_strength=0.001, 180 l2_regularization_strength=0.0) 181 ftrl_update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) 182 variables.global_variables_initializer().run() 183 # Fetch params to validate initial values 184 self.assertAllClose([1.0, 2.0], var0.eval()) 185 self.assertAllClose([4.0, 3.0], var1.eval()) 186 187 # Run 10 steps FTRL 188 for _ in range(10): 189 ftrl_update.run() 190 191 # Validate updated params 192 self.assertAllCloseAccordingToType( 193 np.array([-7.66718769, -10.91273689]), var0.eval(), rtol=1e-4) 194 self.assertAllCloseAccordingToType( 195 np.array([-0.93460727, -1.86147261]), var1.eval(), rtol=1e-4) 196 197 def testFtrlWithL1_L2(self): 198 for dtype in self.float_types: 199 with self.test_session(), self.test_scope(): 200 var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) 201 var1 = resource_variable_ops.ResourceVariable([4.0, 3.0], dtype=dtype) 202 grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) 203 grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) 204 opt = ftrl.FtrlOptimizer( 205 3.0, 206 initial_accumulator_value=0.1, 207 l1_regularization_strength=0.001, 208 l2_regularization_strength=2.0) 209 ftrl_update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) 210 variables.global_variables_initializer().run() 211 # Fetch params to validate initial values 212 self.assertAllClose([1.0, 2.0], var0.eval()) 213 self.assertAllClose([4.0, 3.0], var1.eval()) 214 215 # Run 10 steps FTRL 216 for _ in range(10): 217 ftrl_update.run() 218 219 # Validate updated params 220 self.assertAllCloseAccordingToType( 221 np.array([-0.24059935, -0.46829352]), var0.eval(), rtol=1e-5) 222 self.assertAllCloseAccordingToType( 223 np.array([-0.02406147, -0.04830509]), var1.eval(), rtol=1e-5) 224 225 def testFtrlWithL1_L2_L2Shrinkage(self): 226 """Test the new FTRL op with support for l2 shrinkage. 227 228 The addition of this parameter which places a constant pressure on weights 229 towards the origin causes the gradient descent trajectory to differ. The 230 weights will tend to have smaller magnitudes with this parameter set. 231 """ 232 for dtype in self.float_types: 233 with self.test_session(), self.test_scope(): 234 var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) 235 var1 = resource_variable_ops.ResourceVariable([4.0, 3.0], dtype=dtype) 236 grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) 237 grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) 238 opt = ftrl.FtrlOptimizer( 239 3.0, 240 initial_accumulator_value=0.1, 241 l1_regularization_strength=0.001, 242 l2_regularization_strength=2.0, 243 l2_shrinkage_regularization_strength=0.1) 244 ftrl_update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) 245 variables.global_variables_initializer().run() 246 # Fetch params to validate initial values 247 self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval()) 248 self.assertAllCloseAccordingToType([4.0, 3.0], var1.eval()) 249 250 # Run 10 steps FTRL 251 for _ in range(10): 252 ftrl_update.run() 253 254 # Validate updated params 255 self.assertAllCloseAccordingToType( 256 np.array([-0.21931979, -0.40642974]), var0.eval(), rtol=1e-4) 257 self.assertAllCloseAccordingToType( 258 np.array([-0.0282721, -0.07188385]), var1.eval(), rtol=1e-4) 259 260 # When variables are initialized with Zero, FTRL-Proximal has two properties: 261 # 1. Without L1&L2 but with fixed learning rate, FTRL-Proximal is identical 262 # with GradientDescent. 263 # 2. Without L1&L2 but with adaptive learning rate, FTRL-Proximal is idential 264 # with Adagrad. 265 # So, basing on these two properties, we test if our implementation of 266 # FTRL-Proximal performs same updates as Adagrad or GradientDescent. 267 def testEquivAdagradwithoutRegularization(self): 268 steps = 5 269 for dtype in self.float_types: 270 with self.test_session(), self.test_scope(): 271 val0, val1 = self.equivAdagradTest_FtrlPart(steps, dtype) 272 with self.test_session(), self.test_scope(): 273 val2, val3 = self.equivAdagradTest_AdagradPart(steps, dtype) 274 275 self.assertAllCloseAccordingToType(val0, val2, rtol=1e-4) 276 self.assertAllCloseAccordingToType(val1, val3, rtol=1e-4) 277 278 def testEquivGradientDescentwithoutRegularization(self): 279 steps = 5 280 for dtype in self.float_types: 281 with self.test_session(), self.test_scope(): 282 val0, val1 = self.equivGradientDescentTest_FtrlPart(steps, dtype) 283 with self.test_session(), self.test_scope(): 284 val2, val3 = self.equivGradientDescentTest_GradientDescentPart( 285 steps, dtype) 286 287 self.assertAllCloseAccordingToType(val0, val2, rtol=1e-5) 288 self.assertAllCloseAccordingToType(val1, val3, rtol=1e-5) 289 290 291if __name__ == "__main__": 292 test.main() 293