1# Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Tests for Adam.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import numpy as np 22 23from tensorflow.compiler.tests import xla_test 24from tensorflow.python.framework import constant_op 25from tensorflow.python.framework import dtypes 26from tensorflow.python.ops import array_ops 27from tensorflow.python.ops import resource_variable_ops 28from tensorflow.python.ops import variable_scope 29from tensorflow.python.ops import variables 30from tensorflow.python.platform import test 31from tensorflow.python.training import adam 32 33 34def adam_update_numpy(param, 35 g_t, 36 t, 37 m, 38 v, 39 alpha=0.001, 40 beta1=0.9, 41 beta2=0.999, 42 epsilon=1e-8): 43 alpha_t = alpha * np.sqrt(1 - beta2**t) / (1 - beta1**t) 44 45 m_t = beta1 * m + (1 - beta1) * g_t 46 v_t = beta2 * v + (1 - beta2) * g_t * g_t 47 48 param_t = param - alpha_t * m_t / (np.sqrt(v_t) + epsilon) 49 return param_t, m_t, v_t 50 51 52class AdamOptimizerTest(xla_test.XLATestCase): 53 54 def testBasic(self): 55 for dtype in self.float_types | self.complex_types: 56 # TODO: test fails for float16 due to excessive precision requirements. 57 if dtype in [np.float16, dtypes.bfloat16.as_numpy_dtype]: 58 continue 59 with self.session(), self.test_scope(): 60 variable_scope.get_variable_scope().set_use_resource(True) 61 62 # Initialize variables for numpy implementation. 63 m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 64 var0_np = np.array([1.0, 2.0], dtype=dtype) 65 grads0_np = np.array([0.1, 0.1], dtype=dtype) 66 var1_np = np.array([3.0, 4.0], dtype=dtype) 67 grads1_np = np.array([0.01, 0.01], dtype=dtype) 68 69 var0 = resource_variable_ops.ResourceVariable(var0_np) 70 var1 = resource_variable_ops.ResourceVariable(var1_np) 71 grads0 = array_ops.placeholder(dtype) 72 grads1 = array_ops.placeholder(dtype) 73 opt = adam.AdamOptimizer() 74 update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) 75 variables.global_variables_initializer().run() 76 77 # Fetch params to validate initial values 78 self.assertAllClose([1.0, 2.0], self.evaluate(var0)) 79 self.assertAllClose([3.0, 4.0], self.evaluate(var1)) 80 81 beta1_power, beta2_power = opt._get_beta_accumulators() 82 83 # Run 3 steps of Adam 84 for t in range(1, 4): 85 self.assertAllCloseAccordingToType(0.9**t, self.evaluate(beta1_power)) 86 self.assertAllCloseAccordingToType(0.999**t, 87 self.evaluate(beta2_power)) 88 update.run(feed_dict={grads0: grads0_np, grads1: grads1_np}) 89 90 var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0) 91 var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1) 92 93 # Validate updated params 94 self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) 95 self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) 96 97 def testTensorLearningRate(self): 98 for dtype in self.float_types | self.complex_types: 99 # TODO: test fails for float16 due to excessive precision requirements. 100 if dtype in [np.float16, dtypes.bfloat16.as_numpy_dtype]: 101 continue 102 with self.session(), self.test_scope(): 103 variable_scope.get_variable_scope().set_use_resource(True) 104 105 # Initialize variables for numpy implementation. 106 m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 107 var0_np = np.array([1.0, 2.0], dtype=dtype) 108 grads0_np = np.array([0.1, 0.1], dtype=dtype) 109 var1_np = np.array([3.0, 4.0], dtype=dtype) 110 grads1_np = np.array([0.01, 0.01], dtype=dtype) 111 112 var0 = resource_variable_ops.ResourceVariable(var0_np) 113 var1 = resource_variable_ops.ResourceVariable(var1_np) 114 grads0 = array_ops.placeholder(dtype) 115 grads1 = array_ops.placeholder(dtype) 116 opt = adam.AdamOptimizer(constant_op.constant(0.001)) 117 update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) 118 variables.global_variables_initializer().run() 119 120 # Fetch params to validate initial values 121 self.assertAllClose([1.0, 2.0], self.evaluate(var0)) 122 self.assertAllClose([3.0, 4.0], self.evaluate(var1)) 123 124 beta1_power, beta2_power = opt._get_beta_accumulators() 125 126 # Run 3 steps of Adam 127 for t in range(1, 4): 128 self.assertAllCloseAccordingToType(0.9**t, self.evaluate(beta1_power)) 129 self.assertAllCloseAccordingToType(0.999**t, 130 self.evaluate(beta2_power)) 131 update.run(feed_dict={grads0: grads0_np, grads1: grads1_np}) 132 133 var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0) 134 var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1) 135 136 # Validate updated params 137 self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) 138 self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) 139 140 def testSharing(self): 141 for dtype in self.float_types | self.complex_types: 142 # TODO: test fails for float16 due to excessive precision requirements. 143 if dtype in [np.float16, dtypes.bfloat16.as_numpy_dtype]: 144 continue 145 with self.session(), self.test_scope(): 146 variable_scope.get_variable_scope().set_use_resource(True) 147 148 # Initialize variables for numpy implementation. 149 m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 150 var0_np = np.array([1.0, 2.0], dtype=dtype) 151 grads0_np = np.array([0.1, 0.1], dtype=dtype) 152 var1_np = np.array([3.0, 4.0], dtype=dtype) 153 grads1_np = np.array([0.01, 0.01], dtype=dtype) 154 155 var0 = resource_variable_ops.ResourceVariable(var0_np) 156 var1 = resource_variable_ops.ResourceVariable(var1_np) 157 grads0 = array_ops.placeholder(dtype) 158 grads1 = array_ops.placeholder(dtype) 159 opt = adam.AdamOptimizer() 160 update1 = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) 161 update2 = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) 162 variables.global_variables_initializer().run() 163 164 beta1_power, beta2_power = opt._get_beta_accumulators() 165 166 # Fetch params to validate initial values 167 self.assertAllClose([1.0, 2.0], self.evaluate(var0)) 168 self.assertAllClose([3.0, 4.0], self.evaluate(var1)) 169 170 # Run 3 steps of intertwined Adam1 and Adam2. 171 for t in range(1, 4): 172 self.assertAllCloseAccordingToType(0.9**t, self.evaluate(beta1_power)) 173 self.assertAllCloseAccordingToType(0.999**t, 174 self.evaluate(beta2_power)) 175 if t % 2 == 0: 176 update1.run(feed_dict={grads0: grads0_np, grads1: grads1_np}) 177 else: 178 update2.run(feed_dict={grads0: grads0_np, grads1: grads1_np}) 179 180 var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0) 181 var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1) 182 183 # Validate updated params 184 self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) 185 self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) 186 187 188if __name__ == "__main__": 189 test.main() 190