1# Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15from __future__ import absolute_import 16from __future__ import division 17from __future__ import print_function 18 19import functools 20 21from absl.testing import parameterized 22import numpy as np 23 24from tensorflow.python import pywrap_tfe 25from tensorflow.python.eager import backprop 26from tensorflow.python.eager import context 27from tensorflow.python.eager import def_function 28from tensorflow.python.eager import function 29from tensorflow.python.eager import tape as tape_lib 30from tensorflow.python.eager import test 31from tensorflow.python.framework import constant_op 32from tensorflow.python.framework import dtypes 33from tensorflow.python.framework import errors_impl 34from tensorflow.python.framework import ops 35from tensorflow.python.framework import sparse_tensor 36from tensorflow.python.framework import tensor_shape 37from tensorflow.python.framework import tensor_util 38from tensorflow.python.framework import test_util 39from tensorflow.python.framework.memory_checker import MemoryChecker 40from tensorflow.python.layers.pooling import max_pooling3d 41from tensorflow.python.ops import array_ops 42from tensorflow.python.ops import control_flow_ops 43from tensorflow.python.ops import custom_gradient 44from tensorflow.python.ops import embedding_ops 45from tensorflow.python.ops import functional_ops 46from tensorflow.python.ops import gradient_checker_v2 47from tensorflow.python.ops import gradients 48from tensorflow.python.ops import math_ops 49from tensorflow.python.ops import nn 50from tensorflow.python.ops import nn_grad # pylint: disable=unused-import 51from tensorflow.python.ops import nn_ops 52from tensorflow.python.ops import random_ops 53from tensorflow.python.ops import resource_variable_ops 54from tensorflow.python.ops import sparse_ops 55from tensorflow.python.ops import variables 56from tensorflow.python.training import training 57 58 59class BackpropTest(test.TestCase, parameterized.TestCase): 60 61 @test_util.run_in_graph_and_eager_modes 62 def testAggregateGradients(self): 63 64 def fn(x): 65 ind1 = constant_op.constant(np.array([0, 1])) 66 ind2 = constant_op.constant(np.array([2, 3])) 67 ind3 = constant_op.constant(np.array([1, 3])) 68 g1 = embedding_ops.embedding_lookup(x, ind1) 69 g2 = embedding_ops.embedding_lookup(x, ind2) 70 g3 = embedding_ops.embedding_lookup(x, ind3) 71 return g1 * g2 * g3 72 73 var_np = np.random.rand(4, 2).astype(np.float32) 74 var = constant_op.constant(var_np) 75 grad = backprop.gradients_function(fn, [0])(var)[0] 76 grad = self.evaluate(ops.convert_to_tensor(grad)) 77 78 if not context.executing_eagerly(): 79 tf_var = array_ops.constant(var_np, dtypes.float32) 80 tf_ind1 = array_ops.constant([0, 1]) 81 tf_ind2 = array_ops.constant([2, 3]) 82 tf_ind3 = array_ops.constant([1, 3]) 83 tf_g1 = embedding_ops.embedding_lookup(tf_var, tf_ind1) 84 tf_g2 = embedding_ops.embedding_lookup(tf_var, tf_ind2) 85 tf_g3 = embedding_ops.embedding_lookup(tf_var, tf_ind3) 86 tf_y = tf_g1 * tf_g2 * tf_g3 87 tf_grad = gradients.gradients(tf_y, [tf_var])[0] 88 89 tf_dense_grad = math_ops.unsorted_segment_sum(tf_grad.values, 90 tf_grad.indices, 91 tf_grad.dense_shape[0]) 92 93 self.assertAllClose(grad, self.evaluate(tf_dense_grad)) 94 95 @test_util.run_in_graph_and_eager_modes 96 def testAggregateGradientsWithTensor(self): 97 98 def fn(x): 99 ind1 = constant_op.constant(np.array([0, 1])) 100 # A mixture of IndexedSlices and dense tensor to aggregate. 101 g1 = embedding_ops.embedding_lookup(x, ind1) 102 g2 = math_ops.reduce_sum(x * constant_op.constant(2.0)) 103 return g1 * g2 104 105 var_np = np.random.rand(4, 2).astype(np.float32) 106 var = constant_op.constant(var_np) 107 grad = backprop.gradients_function(fn, [0])(var)[0] 108 grad = self.evaluate(ops.convert_to_tensor(grad)) 109 110 if not context.executing_eagerly(): 111 tf_var = array_ops.constant(var_np, dtypes.float32) 112 tf_ind1 = array_ops.constant([0, 1]) 113 tf_g1 = embedding_ops.embedding_lookup(tf_var, tf_ind1) 114 tf_g2 = math_ops.reduce_sum(tf_var * 2.0, axis=(0, 1)) 115 tf_y = tf_g1 * tf_g2 116 tf_grad = gradients.gradients(tf_y, [tf_var])[0] 117 118 self.assertAllClose(grad, tf_grad) 119 120 def testImplicitGradWithResourceVariable(self): 121 x = resource_variable_ops.ResourceVariable( 122 initial_value=constant_op.constant(1.0), name='x') 123 124 def fn(): 125 b = constant_op.constant(2.0) 126 c = math_ops.add(x.value(), b) 127 return math_ops.add(c, constant_op.constant(3.0)) 128 129 grads_and_vars = backprop.implicit_grad(fn)() 130 self.assertAllEqual(grads_and_vars[0][0], 1.0) 131 self.assertAllEqual(id(grads_and_vars[0][1]), id(x)) 132 133 @parameterized.named_parameters([('Function', def_function.function), 134 ('NoFunction', lambda f: f)]) 135 def testNoOpBehaviorConsistent(self, decorator): 136 137 @decorator 138 def f(x): 139 # Test all different types of no-ops 140 x1 = array_ops.identity(x) 141 x2 = math_ops.add_v2(x, 0) 142 x3 = math_ops.subtract(x, 0) 143 x4 = math_ops.multiply(x, 1) 144 with backprop.GradientTape() as t: 145 t.watch(x) 146 t.watch(x1) 147 t.watch(x2) 148 t.watch(x3) 149 t.watch(x4) 150 y1 = x * 2. 151 y2 = x1 * 3. 152 y3 = x2 * 3. 153 y4 = x3 * 3. 154 y5 = x4 * 3. 155 loss = y1 + y2 + y3 + y4 + y5 156 return t.gradient(loss, [x, x1, x2, x3, x4]) 157 158 self.assertAllClose([2., 3., 3., 3., 3.], f(constant_op.constant(10.))) 159 160 def testResourceHandleOutputWithoutHandleData(self): 161 # This is a bit of a weird thing to test since we try to maintain handle 162 # data. But users do create their own resources, and those often do not have 163 # any handle data. 164 h = resource_variable_ops.var_handle_op( 165 shape=[], dtype=dtypes.float32, shared_name='abc') 166 167 with backprop.GradientTape() as tape: 168 x = constant_op.constant(1.) 169 tape.watch(x) 170 tape.watch(h) 171 y, h = array_ops.identity_n([x, h]) 172 173 self.assertAllClose(1., tape.gradient(y, x)) 174 175 def testGradientInsideLoop(self): 176 with ops.Graph().as_default(): 177 v = resource_variable_ops.ResourceVariable(1.0) 178 179 def body(_): 180 _ = v + 1.0 # This reads the variable inside the loop context 181 with backprop.GradientTape() as t: 182 result = v * 2 183 self.assertIsNotNone(t.gradient(result, v)) 184 return 1.0 185 186 control_flow_ops.while_loop(lambda i: False, body, [1.0]) 187 188 def testWhereGradient(self): 189 # Note: where is special because only some of its arguments are of 190 # differentiable dtypes. 191 192 def f(x): 193 return array_ops.where(x < 10, x, x * x) 194 195 g = backprop.gradients_function(f) 196 197 self.assertAllEqual(g(5.)[0], 1.0) 198 self.assertAllEqual(g(50.)[0], 100.0) 199 200 def testTwoTargets(self): 201 with backprop.GradientTape() as t: 202 x = constant_op.constant(3.0) 203 y = constant_op.constant(2.0) 204 t.watch([x, y]) 205 xx = 2 * x 206 yy = 3 * y 207 dx, dy = t.gradient([xx, yy], [x, y]) 208 self.assertAllEqual(dx, 2.0) 209 self.assertAllEqual(dy, 3.0) 210 211 def testCustomGradientEmptyError(self): 212 213 @custom_gradient.custom_gradient 214 def identity(x): 215 216 def grad(_): 217 return [] # This return value is wrong! 218 219 return x, grad 220 221 x = variables.Variable(1.0) 222 with backprop.GradientTape() as t: 223 y = identity(x) 224 with self.assertRaises(ValueError): 225 t.gradient(y, [x]) 226 227 def testOutputGradUsedInComputation(self): 228 with backprop.GradientTape() as t: 229 x = constant_op.constant(3.0) 230 y = constant_op.constant(2.0) 231 t.watch([x, y]) 232 loss = x * y 233 dx, = t.gradient([loss, x], [x], output_gradients=[1.0, 2.0]) 234 self.assertAllEqual(dx, 4.0) 235 236 def testDy(self): 237 238 def f(x): 239 return x 240 241 grad_fn = backprop.gradients_function(f) 242 self.assertAllEqual(2., grad_fn(1., dy=2.)[0]) 243 244 def testGradientInteger(self): 245 246 def f(x): 247 return x + x 248 249 int_tensor = constant_op.constant(1) 250 self.assertEqual(backprop.gradients_function(f)(int_tensor)[0], None) 251 252 def testErrors(self): 253 254 @custom_gradient.custom_gradient 255 def f(x): 256 257 def grad(_): 258 raise RuntimeError('x') 259 260 return x, grad 261 262 # TODO(apassos) raise the right error here 263 with self.assertRaises(RuntimeError): 264 backprop.gradients_function(f)(constant_op.constant(1.0)) 265 266 def testGradientsFunctionInCustomGradient(self): 267 268 @custom_gradient.custom_gradient 269 def f(x): 270 (y,) = backprop.gradients_function(lambda x: x * x)(x) 271 272 def grad(dy): 273 return [2 * dy] 274 275 return y, grad 276 277 self.assertAllEqual(f(1.0), 2.0) 278 279 def testImplicitGradOverEmbeddingLookup(self): 280 batch_size = 8 281 embedding_size = 512 282 vocab_size = 1000 283 lrn_rate = 0.1 284 random_init = random_ops.random_uniform([vocab_size, embedding_size]) 285 286 x = array_ops.ones((batch_size), dtypes.int64) 287 embedding = resource_variable_ops.ResourceVariable( 288 initial_value=random_init, dtype=dtypes.float32, name='embedding') 289 290 def f(): 291 embedded_x = embedding_ops.embedding_lookup(embedding, x) 292 return constant_op.constant(1.0, dtypes.float32) - embedded_x 293 294 grad = backprop.implicit_grad(f)()[0][0] 295 opt = training.GradientDescentOptimizer(lrn_rate) 296 297 with ops.Graph().as_default(), self.cached_session(): 298 tf_x = array_ops.ones((batch_size), dtypes.int64) 299 # TODO(ashankar,apassos): Change to ResourceVariable. 300 tf_embedding = variables.Variable( 301 random_init.numpy(), name='tf_embedding') 302 tf_embedded_x = embedding_ops.embedding_lookup(tf_embedding, tf_x) 303 tf_y = 1.0 - tf_embedded_x 304 tf_grad = gradients.gradients(tf_y, [tf_embedding])[0] 305 tf_opt = training.GradientDescentOptimizer(0.1) 306 tf_embedding.initializer.run() 307 308 self.assertAllClose(tf_grad.indices, grad.indices) 309 self.assertAllClose(tf_grad.values, grad.values) 310 311 tf_opt.apply_gradients([(tf_grad, tf_embedding)]).run() 312 expected = self.evaluate(tf_embedding) 313 opt.apply_gradients([(grad, embedding)]) 314 self.assertAllClose(expected, embedding.read_value()) 315 316 def testImplicitGradOrdering(self): 317 v0 = resource_variable_ops.ResourceVariable(1.0) 318 v1 = resource_variable_ops.ResourceVariable(2.0) 319 320 def f(): 321 x = v1 * v1 322 y = v0 * v0 323 return x + y 324 325 grads = backprop.implicit_grad(f)() 326 ordered_variables = [x[1] for x in grads] 327 self.assertIs(ordered_variables[0], v0) 328 self.assertIs(ordered_variables[1], v1) 329 330 def testTapeNoOpGradient(self): 331 x = constant_op.constant(3.0) 332 with backprop.GradientTape() as t: 333 t.watch(x) 334 y = x 335 self.assertEqual(t.gradient(y, x).numpy(), 1.0) 336 337 def testTapeIdentityGradientIsIdentity(self): 338 x = constant_op.constant(3.0) 339 with backprop.GradientTape() as t: 340 t.watch(x) 341 y = array_ops.identity(x) 342 self.assertEqual(t.gradient(y, x).numpy(), 1.0) 343 344 def testFunctionIndexedSlicesGradient(self): 345 346 @def_function.function 347 def f(x): 348 return x + 1 349 350 with backprop.GradientTape() as t: 351 x = constant_op.constant([1.0]) 352 t.watch(x) 353 y = f(x) 354 y = array_ops.gather(y, [0]) 355 self.assertAllEqual(t.gradient(y, x), [1.0]) 356 357 def testTapeGradientMultiTargetOneIsSource(self): 358 x = constant_op.constant(2.0) 359 with backprop.GradientTape() as t: 360 t.watch(x) 361 y = x * x 362 self.assertEqual(t.gradient([x, y], x).numpy(), 5.0) 363 364 def testTapeNoOpGradientWithMultiTargetAllSource(self): 365 x = constant_op.constant(3.0) 366 with backprop.GradientTape() as t: 367 t.watch(x) 368 y = x 369 self.assertEqual(t.gradient([y, y], x).numpy(), 2.0) 370 371 def testTapeNoOpGradientWithMultiTargetMultiSource(self): 372 x = constant_op.constant(3.0) 373 y = constant_op.constant(5.0) 374 with backprop.GradientTape() as t: 375 t.watch(x) 376 t.watch(y) 377 z = y * y 378 self.assertAllEqual(t.gradient([x, y, z], [x, y]), [1.0, 11.0]) 379 380 def testTapeGradientStringTarget(self): 381 s = constant_op.constant('unknown', dtype=dtypes.string) 382 x = constant_op.constant(3.0) 383 384 with backprop.GradientTape() as t: 385 t.watch(x) 386 t.watch(s) 387 grads = t.gradient(s, x) 388 self.assertEqual(grads, None) 389 390 def testTapeNoOpGradientStringSourceAndTarget(self): 391 s = constant_op.constant('unknown', dtype=dtypes.string) 392 393 with backprop.GradientTape() as t: 394 t.watch(s) 395 grads = t.gradient(s, s) 396 self.assertEqual(grads, None) 397 398 def testTapeNoOpGradientWithMultiTargetMultiSourceIncludeString(self): 399 x = constant_op.constant(3.0) 400 y = constant_op.constant(5.0) 401 s = constant_op.constant('unknown', dtype=dtypes.string) 402 403 with backprop.GradientTape() as t: 404 t.watch(x) 405 t.watch(y) 406 t.watch(s) 407 z = y * y 408 grads = t.gradient([x, y, z, s], [x, y, s]) 409 self.assertAllEqual(grads[:2], [1.0, 11.0]) 410 self.assertEqual(grads[2], None) 411 412 def testTapeNoOpOnVariableIsIdentity(self): 413 v0 = resource_variable_ops.ResourceVariable(1.0) 414 with backprop.GradientTape() as t: 415 y = v0.read_value() 416 self.assertEqual(t.gradient(y, v0).numpy(), 1.0) 417 418 @test_util.assert_no_new_tensors 419 @test_util.assert_no_garbage_created 420 def testTapeNoOpGradient2By2(self): 421 a_2_by_2 = constant_op.constant(2.0, shape=[2, 2]) 422 with backprop.GradientTape(persistent=True) as tape: 423 tape.watch(a_2_by_2) 424 dy_dy = tape.gradient(a_2_by_2, [a_2_by_2])[0] 425 self.assertAllEqual(dy_dy.numpy(), 426 constant_op.constant(1.0, shape=[2, 2]).numpy()) 427 428 @test_util.assert_no_new_pyobjects_executing_eagerly 429 def testTapeNoOpGradientMultiTarget2By2(self): 430 a_2_by_2 = constant_op.constant(2.0, shape=[2, 2]) 431 with backprop.GradientTape(persistent=True) as tape: 432 tape.watch(a_2_by_2) 433 dy_dy = tape.gradient([a_2_by_2, a_2_by_2], [a_2_by_2])[0] 434 self.assertAllEqual(dy_dy.numpy(), 435 constant_op.constant(2.0, shape=[2, 2]).numpy()) 436 437 def testTapeStopRecording(self): 438 with backprop.GradientTape() as t: 439 x = resource_variable_ops.ResourceVariable(1.0) 440 with t.stop_recording(): 441 y = x * x 442 self.assertEqual(t.gradient(y, x), None) 443 444 def testTapeStopStartRecording(self): 445 with backprop.GradientTape(persistent=True) as t: 446 x = resource_variable_ops.ResourceVariable(1.0) 447 x2 = x * 2 # This should be differentiated through. 448 with t.stop_recording(): 449 y = x2 * x2 450 z = x2 * x2 451 self.assertEqual(t.gradient(y, x2), None) 452 453 # If the x*2 was not differentiated through, this would be 2.0, not 4.0 454 self.assertEqual(t.gradient(z, x2).numpy(), 4.0) 455 456 def testTapeReset(self): 457 with backprop.GradientTape() as t: 458 v = resource_variable_ops.ResourceVariable(1.0) 459 loss = v * v 460 t.reset() 461 loss += v * v 462 self.assertAllEqual(t.gradient(loss, v), 2.0) 463 464 def testPythonMax(self): 465 x = [ 466 resource_variable_ops.ResourceVariable(2.), 467 resource_variable_ops.ResourceVariable(3.), 468 resource_variable_ops.ResourceVariable(5.) 469 ] 470 with backprop.GradientTape() as t: 471 f = max(x) 472 grad = t.gradient(f, x) 473 self.assertAllEqual(self.evaluate(f), 5.) 474 self.assertAllEqual(self.evaluate(grad), [None, None, 1.0]) 475 476 def testAutomaticWatchedVariables(self): 477 with backprop.GradientTape() as t: 478 self.assertEqual(0, len(t.watched_variables())) 479 v = resource_variable_ops.ResourceVariable(1.0) 480 loss = v * v 481 self.assertAllEqual([v], t.watched_variables()) 482 483 t.reset() 484 self.assertEqual(0, len(t.watched_variables())) 485 loss += v * v 486 self.assertAllEqual([v], t.watched_variables()) 487 488 def testExplicitWatchedVariables(self): 489 with backprop.GradientTape() as t: 490 self.assertEqual(0, len(t.watched_variables())) 491 v = resource_variable_ops.ResourceVariable(1.0) 492 t.watch(v) 493 self.assertAllEqual([v], t.watched_variables()) 494 495 t.reset() 496 self.assertEqual(0, len(t.watched_variables())) 497 t.watch(v) 498 self.assertAllEqual([v], t.watched_variables()) 499 500 @test_util.assert_no_new_tensors 501 def testGradientNone(self): 502 503 def loss(x, l): 504 return math_ops.reduce_mean( 505 nn_ops.softmax_cross_entropy_with_logits(logits=x, labels=l), 506 constant_op.constant([0])) 507 508 logits = constant_op.constant([[0.0, 0.0]]) 509 labels = constant_op.constant([[1.0, 0.0]]) 510 # softmax_cross_entropy_with_logits returns two outputs and in this case the 511 # gradient wrt the second is None. 512 g, = backprop.gradients_function(loss, [0])(logits, labels) 513 self.assertAllEqual(g.numpy(), [[-0.5, 0.5]]) 514 515 @test_util.run_in_graph_and_eager_modes 516 def testGradientWithinTapeBlock(self): 517 v1 = resource_variable_ops.ResourceVariable(1.) 518 self.evaluate(v1.initializer) 519 with backprop.GradientTape() as t: 520 loss = 2 * v1 521 grad = t.gradient(loss, v1) 522 self.assertAllEqual(self.evaluate(grad), 2.0) 523 524 with backprop.GradientTape(persistent=True) as t: 525 loss = 2 * v1 526 grad = t.gradient(loss, v1) 527 self.assertAllEqual(self.evaluate(grad), 2.0) 528 529 @test_util.run_in_graph_and_eager_modes 530 def testNestedSelfContexts(self): 531 v1 = resource_variable_ops.ResourceVariable(1.) 532 self.evaluate(v1.initializer) 533 with backprop.GradientTape() as t: 534 with self.assertRaises(ValueError): 535 with t: 536 pass 537 538 @test_util.assert_no_new_tensors 539 def testSecondGrad(self): 540 541 def first(x): 542 l = constant_op.constant([[0.0]]) 543 x = nn_ops.softmax_cross_entropy_with_logits(labels=l, logits=x) 544 x = math_ops.reduce_sum(x, constant_op.constant([0])) 545 return x 546 547 def second(x): 548 grad = backprop.gradients_function(first, [0])(x)[0] 549 return math_ops.reduce_sum(grad, constant_op.constant([0])) 550 551 f = constant_op.constant([[0.1]]) 552 grad = backprop.gradients_function(second, [0])(f)[0] 553 self.assertAllEqual([[0.0]], grad) 554 555 @test_util.run_in_graph_and_eager_modes 556 def testWatchingIsTapeLocal(self): 557 x1 = resource_variable_ops.ResourceVariable(2.0, trainable=False) 558 x2 = resource_variable_ops.ResourceVariable(2.0, trainable=False) 559 560 with backprop.GradientTape() as tape1: 561 with backprop.GradientTape() as tape2: 562 tape1.watch(x1) 563 tape2.watch([x1, x2]) 564 y = x1**3 565 z = x2**2 566 dy, dz = tape2.gradient([y, z], [x1, x2]) 567 d2y, d2z = tape1.gradient([dy, dz], [x1, x2]) 568 569 self.evaluate([x1.initializer, x2.initializer]) 570 self.assertEqual(self.evaluate(d2y), 12.0) 571 self.assertIsNone(d2z) 572 573 @test_util.assert_no_new_tensors 574 def testMakeVJP(self): 575 576 def f(x): 577 return x * x 578 579 wrapped_fn = backprop.make_vjp(f, persistent=False) 580 result, vjp = wrapped_fn(constant_op.constant(3.0)) 581 self.assertAllEqual(result, 9.0) 582 self.assertAllEqual(vjp(2.0)[0], 12.0) 583 584 def testPersistentMakeVJP(self): 585 586 def f(x): 587 return x * x 588 589 wrapped_fn = backprop.make_vjp(f, persistent=True) 590 _, vjp = wrapped_fn(constant_op.constant(3.0)) 591 vjp_result1 = vjp(2.0)[0] 592 vjp_result2 = vjp(2.0)[0] 593 self.assertAllEqual(vjp_result1, vjp_result2, 12.0) 594 595 @test_util.assert_no_new_tensors 596 def testGradGrad(self): 597 598 def sq(x): 599 return x * x 600 601 def grad(x): 602 value = backprop.gradients_function(sq, [0])(x)[0] 603 return value 604 605 gradgrad = backprop.gradients_function(grad, [0]) 606 607 self.assertAllEqual(gradgrad(constant_op.constant(3.0))[0], 2.0) 608 609 @test_util.assert_no_new_tensors 610 def testGradGradExp(self): 611 612 def grad(x): 613 value = backprop.gradients_function(math_ops.exp, [0])(x)[0] 614 return value 615 616 gradgrad = backprop.gradients_function(grad, [0]) 617 618 self.assertAllEqual(gradgrad(constant_op.constant(0.0))[0], 1.0) 619 620 @test_util.assert_no_new_tensors 621 def testStopGradient(self): 622 grad = backprop.gradients_function( 623 lambda x: array_ops.stop_gradient(math_ops.argmax(x))) 624 self.assertAllEqual(grad([0.0])[0], None) 625 626 @test_util.assert_no_new_tensors 627 def testArgmax(self): 628 629 def argmax(x): 630 i = math_ops.argmax(x) 631 return array_ops.stop_gradient(i) 632 633 grad = backprop.gradients_function(argmax) 634 self.assertAllEqual(grad([0.0])[0], None) 635 636 @test_util.run_gpu_only 637 @test_util.assert_no_new_tensors 638 def testGPU(self): 639 640 def fn(x): 641 with context.device('/gpu:0'): 642 b = constant_op.constant(2.0) 643 c = math_ops.add(x.gpu(), b) 644 # TODO(apassos): remove cpu below by making TensorVSPace aware 645 # of devices. 646 return math_ops.add(c, constant_op.constant(3.0)).cpu() 647 648 grad = backprop.gradients_function(fn, [0])(constant_op.constant(1.0))[0] 649 self.assertAllEqual(grad, 1.0) 650 651 @test_util.run_gpu_only 652 @test_util.assert_no_new_tensors 653 def testGPUImplicitGrad(self): 654 with context.device('gpu:0'): 655 v = resource_variable_ops.ResourceVariable( 656 constant_op.constant(1.0), name='v') 657 658 def f(): 659 with context.device('gpu:0'): 660 return v.read_value() 661 662 self.assertEqual(backprop.implicit_grad(f)()[0][0].cpu().numpy(), 1.0) 663 664 @test_util.assert_no_new_tensors 665 def testCPU(self): 666 667 def fn(x): 668 b = constant_op.constant(2.0) 669 c = math_ops.add(x, b) 670 return math_ops.add(c, constant_op.constant(3.0)) 671 672 grad = backprop.gradients_function(fn, [0])(constant_op.constant(1.0))[0] 673 self.assertAllEqual(grad, 1.0) 674 675 @test_util.run_gpu_only 676 @test_util.assert_no_new_tensors 677 def testTensorCopyGPU2CPU2GPU(self): 678 679 def f(a, b): 680 return a.cpu() + b.cpu() 681 682 with context.device('/gpu:0'): 683 a = constant_op.constant(1.0) 684 b = constant_op.constant(2.0) 685 686 grad = backprop.gradients_function(f, [0])(a, b)[0] 687 self.assertAllEqual(grad, 1.0) 688 689 @test_util.assert_no_new_tensors 690 def testEmptyParams(self): 691 692 def fn(a, b): 693 return a * b 694 695 x = constant_op.constant(1.0) 696 y = constant_op.constant(2.0) 697 dx, dy = backprop.gradients_function(fn)(x, y) 698 self.assertAllEqual(dx, y.numpy()) 699 self.assertAllEqual(dy, x.numpy()) 700 701 @test_util.assert_no_new_tensors 702 def testUnconnectedNone(self): 703 v = resource_variable_ops.ResourceVariable(1.0, name='testUnconnectedNone') 704 705 def f(): 706 v.read_value() 707 return constant_op.constant(1.0) 708 709 self.assertEqual(backprop.implicit_grad(f)()[0][0], None) 710 711 @test_util.assert_no_new_tensors 712 def testGradientTapeReEnterContext(self): 713 g = backprop.GradientTape() 714 with g: 715 x = constant_op.constant(3.0) 716 g.watch(x) 717 y = 2 * x 718 with g: 719 z = 2 * y 720 grad = g.gradient(target=z, sources=[x]) 721 self.assertEqual(self.evaluate(grad), [4.0]) 722 723 @test_util.assert_no_new_tensors 724 @test_util.run_in_graph_and_eager_modes 725 def testGradientTapeRepeatedSource(self): 726 with backprop.GradientTape(persistent=False) as g: 727 x = constant_op.constant(3.0) 728 g.watch(x) 729 y = 2 * x 730 grad = g.gradient(target=y, sources=[x, x]) 731 self.assertEqual(self.evaluate(grad), [2.0, 2.0]) 732 733 @test_util.assert_no_new_tensors 734 @test_util.run_in_graph_and_eager_modes 735 def testPersistentGradientTapeRepeatedSource(self): 736 with backprop.GradientTape(persistent=True) as g: 737 x = constant_op.constant(3.0) 738 y = constant_op.constant(5.0) 739 g.watch(x) 740 g.watch(y) 741 z = x * x + x * y 742 grad = g.gradient(target=z, sources=[x, x]) 743 self.assertEqual(self.evaluate(grad), [11.0, 11.0]) 744 grad = g.gradient(target=z, sources=[y, x]) 745 self.assertEqual(self.evaluate(grad), [3.0, 11.0]) 746 747 @test_util.assert_no_new_tensors 748 @test_util.run_in_graph_and_eager_modes 749 def testGradientTapeStructure(self): 750 with backprop.GradientTape(persistent=True) as g: 751 # Using different constant values because constant tensors are 752 # cached, leading to a different gradient then what one might expect. 753 x1 = constant_op.constant(3.0) 754 x2 = constant_op.constant(3.1) 755 x3 = constant_op.constant(3.2) 756 g.watch(x1) 757 g.watch(x2) 758 g.watch(x3) 759 y = x1 + 2 * x2 + 3 * x3 760 self.assertEqual(self.evaluate(g.gradient(y, x1)), [1.0]) 761 self.assertEqual(self.evaluate(g.gradient(y, (x1,))), (1.0,)) 762 self.assertEqual(self.evaluate(g.gradient(y, (x1, x2))), (1.0, 2.0)) 763 self.assertEqual( 764 self.evaluate(g.gradient(y, [(x1, x2), (x2, x3)])), [(1.0, 2.0), 765 (2.0, 3.0)]) 766 self.assertEqual( 767 self.evaluate(g.gradient(y, (x1, x2, [x1, x3]))), 768 (1.0, 2.0, [1.0, 3.0])) 769 self.assertEqual( 770 self.evaluate(g.gradient(y, [x1, { 771 'x2': x2, 772 'x3': x3 773 }])), [1.0, { 774 'x2': 2.0, 775 'x3': 3.0 776 }]) 777 778 @test_util.assert_no_new_tensors 779 @test_util.run_in_graph_and_eager_modes 780 def testGradientTape(self): 781 with backprop.GradientTape() as g: 782 x = constant_op.constant(3.0) 783 g.watch(x) 784 y = x * x 785 with backprop.GradientTape() as gg: 786 gg.watch(y) 787 z = 2 * y 788 inner_grad = gg.gradient(z, [y])[0] 789 self.assertEqual(self.evaluate(inner_grad), 2.0) 790 y += inner_grad 791 grad = g.gradient(y, [x])[0] 792 self.assertEqual(self.evaluate(grad), 6.0) 793 794 @test_util.assert_no_new_tensors 795 @test_util.run_in_graph_and_eager_modes 796 def testGadientTapeCalledOnConstantTarget(self): 797 with backprop.GradientTape() as g: 798 x = variables.Variable([3.0]) 799 y = variables.Variable([2.0]) 800 grad = g.gradient(x, y) 801 self.assertAllEqual(grad, None) 802 803 @test_util.run_in_graph_and_eager_modes 804 @test_util.run_v1_only('b/120545219') 805 def testGradientTapeWithCond(self): 806 x = constant_op.constant(3.0) 807 808 def true_fn(): 809 return x 810 811 def false_fn(): 812 return x * x 813 814 with backprop.GradientTape() as g: 815 g.watch(x) 816 y = control_flow_ops.cond(x < x, true_fn, false_fn) 817 818 if not context.executing_eagerly(): 819 with self.assertRaisesRegex(NotImplementedError, 'tf.gradients'): 820 dy = g.gradient(y, [x])[0] 821 else: 822 dy = g.gradient(y, [x])[0] 823 self.assertEqual(self.evaluate(dy), 6.0) 824 825 @test_util.run_in_graph_and_eager_modes 826 @test_util.run_v1_only('b/120545219') 827 def testGradientTapeWithWhileLoop(self): 828 i = constant_op.constant(1) 829 x = constant_op.constant(2.) 830 831 def cond(i, _): 832 return i < 3 833 834 def body(i, x): 835 return i + 1, x * 2 836 837 with backprop.GradientTape() as g: 838 g.watch([x]) 839 _, y = control_flow_ops.while_loop(cond, body, [i, x]) 840 841 if not context.executing_eagerly(): 842 with self.assertRaisesRegex(NotImplementedError, 'tf.gradients'): 843 dy = g.gradient(y, [x])[0] 844 else: 845 dy = g.gradient(y, [x])[0] 846 self.assertEqual(self.evaluate(dy), 4.0) 847 848 @test_util.assert_no_new_tensors 849 def testGradientTapeGradientCalledMultipleTimes(self): 850 with backprop.GradientTape() as g: 851 x = constant_op.constant(3.0) 852 g.watch(x) 853 y = x * x 854 z = y * y 855 g.gradient(z, [x]) 856 with self.assertRaisesRegex( 857 RuntimeError, 'A non-persistent GradientTape can only'): 858 g.gradient(y, [x]) 859 860 @test_util.assert_no_new_tensors 861 def testGradientTapeJacobianCalledMultipleTimes(self): 862 with backprop.GradientTape() as g: 863 x = constant_op.constant(3.0) 864 g.watch(x) 865 y = x * x 866 z = y * y 867 g.jacobian(z, [x]) 868 with self.assertRaisesRegex( 869 RuntimeError, 'A non-persistent GradientTape can only'): 870 g.jacobian(y, [x]) 871 872 @test_util.assert_no_new_tensors 873 def testJacobianInsideGradientTapeScope(self): 874 with backprop.GradientTape() as g: 875 x = constant_op.constant(3.0) 876 g.watch(x) 877 y = x * x 878 z = y * y 879 self.assertAllClose(4. * 3. ** 3., g.jacobian(z, x)) 880 881 @test_util.assert_no_new_tensors 882 def testBatchJacobianInsideGradientTapeScope(self): 883 with backprop.GradientTape(persistent=True) as g: 884 x = constant_op.constant([[3.0]]) 885 g.watch(x) 886 y = x * x 887 z = y * y 888 self.assertAllClose([[[4. * 3. ** 3.]]], g.batch_jacobian(z, x)) 889 890 def testBatchJacobianParallelIterations(self): 891 @def_function.function 892 def f(persistent): 893 with backprop.GradientTape(persistent=persistent) as t: 894 x = constant_op.constant([[3.0]]) 895 t.watch(x) 896 y = x * x 897 z = array_ops.tile(y * y, [1, 16]) 898 return t.batch_jacobian(z, x, parallel_iterations=8) 899 with self.assertRaisesRegex(RuntimeError, 900 'persistent=True.*parallel_iterations'): 901 f(persistent=False) 902 self.assertAllClose([[[4. * 3. ** 3.]] * 16], f(persistent=True)) 903 904 @test_util.assert_no_new_tensors 905 def testGradientTapeBatchJacobianCalledMultipleTimes(self): 906 with backprop.GradientTape() as g: 907 x = constant_op.constant([[3.0]]) 908 g.watch(x) 909 y = x * x 910 z = y * y 911 g.batch_jacobian(z, x) 912 with self.assertRaisesRegex( 913 RuntimeError, 'A non-persistent GradientTape can only'): 914 g.batch_jacobian(y, [x]) 915 916 @test_util.assert_no_new_tensors 917 @test_util.run_in_graph_and_eager_modes 918 @test_util.run_v1_only('b/120545219') 919 def testPersistentTape(self): 920 with backprop.GradientTape(persistent=True) as g: 921 x = constant_op.constant(3.0) 922 g.watch(x) 923 y = x * x 924 z = y * y 925 dz_dx = g.gradient(z, [x])[0] 926 self.assertEqual(self.evaluate(dz_dx), 4 * 3 * 3 * 3) 927 dy_dx = g.gradient(y, [x])[0] 928 self.assertEqual(self.evaluate(dy_dx), 2 * 3) 929 del g 930 931 @test_util.assert_no_new_tensors 932 @test_util.run_in_graph_and_eager_modes 933 def testHigherOrderGradient(self): 934 with backprop.GradientTape(persistent=True) as g: 935 x = constant_op.constant(3.0) 936 g.watch(x) 937 y = x**3 # y := x^3 938 dy_dx = g.gradient(y, x) # dy/dx := 3x^2 939 d2y_dx2 = g.gradient(dy_dx, x) # d2y/dx2 := 6x 940 d3y_dx3 = g.gradient(d2y_dx2, x) # d3y/dx3 := 6 941 x = 3 942 self.assertEqual(self.evaluate(y), x**3) 943 self.assertEqual(self.evaluate(dy_dx), 3 * x**2) 944 self.assertEqual(self.evaluate(d2y_dx2), 6 * x) 945 self.assertEqual(self.evaluate(d3y_dx3), 6) 946 del g 947 948 @test_util.assert_no_new_tensors 949 @test_util.run_in_graph_and_eager_modes 950 def testPersistentNestedTape(self): 951 with backprop.GradientTape(persistent=True) as g: 952 x = constant_op.constant(3.0) 953 g.watch(x) 954 y = x * x 955 with backprop.GradientTape(persistent=True) as gg: 956 gg.watch(y) 957 z = 2 * y 958 for _ in range(2): 959 inner_grad = gg.gradient(z, [y])[0] 960 self.assertEqual(self.evaluate(inner_grad), 2.0) 961 y += inner_grad 962 del gg 963 grad = g.gradient(y, [x])[0] 964 self.assertEqual(self.evaluate(grad), 6.0) 965 grad = g.gradient(z, [x])[0] 966 self.assertEqual(self.evaluate(grad), 12.0) 967 del g 968 969 @test_util.assert_no_new_tensors 970 @test_util.run_in_graph_and_eager_modes 971 def testGradientTapeVariable(self): 972 v = resource_variable_ops.ResourceVariable(1.0, name='v') 973 self.evaluate(v.initializer) 974 with backprop.GradientTape() as g: 975 y = v * v 976 grad = g.gradient(y, [v])[0] 977 self.assertAllEqual(self.evaluate(grad), 2.0) 978 979 @test_util.assert_no_new_tensors 980 @test_util.run_in_graph_and_eager_modes 981 def testNestedGradients(self): 982 x = constant_op.constant(3.0) 983 with backprop.GradientTape() as g: 984 g.watch(x) 985 y = x * x 986 z = y * y 987 dz_dx, dz_dy = g.gradient(z, [x, y]) 988 self.assertEqual(self.evaluate(dz_dx), 108.0) 989 self.assertEqual(self.evaluate(dz_dy), 18.0) 990 991 @test_util.assert_no_new_tensors 992 @test_util.run_in_graph_and_eager_modes 993 def testUnconnectedGradientsDefault(self): 994 x = constant_op.constant(1.0) 995 y = constant_op.constant(3.0) 996 with backprop.GradientTape() as g: 997 g.watch([x, y]) 998 z = y * 2 999 dz_dx = g.gradient(z, x) 1000 self.assertEqual(dz_dx, None) 1001 1002 @test_util.assert_no_new_tensors 1003 @test_util.run_in_graph_and_eager_modes 1004 def testUnconnectedGradientsZeros(self): 1005 x = constant_op.constant(1.0, shape=[2, 2]) 1006 y = constant_op.constant(3.0) 1007 with backprop.GradientTape() as g: 1008 g.watch([x, y]) 1009 z = y * 2 1010 dz_dx = g.gradient(z, x, unconnected_gradients='zero') 1011 self.assertAllEqual([[0.0, 0.0], [0.0, 0.0]], self.evaluate(dz_dx)) 1012 1013 @test_util.assert_no_new_tensors 1014 @test_util.run_in_graph_and_eager_modes 1015 def testUnconnectedGradientsVariablesZeros(self): 1016 x = resource_variable_ops.ResourceVariable( 1017 constant_op.constant(1., shape=[2, 2])) 1018 self.evaluate(x.initializer) 1019 y = resource_variable_ops.ResourceVariable(constant_op.constant(3.)) 1020 self.evaluate(y.initializer) 1021 with backprop.GradientTape() as g: 1022 g.watch([x, y]) 1023 z = y * 2 1024 dz_dx = g.gradient(z, x, unconnected_gradients='zero') 1025 self.assertAllEqual([[0.0, 0.0], [0.0, 0.0]], self.evaluate(dz_dx)) 1026 1027 @test_util.run_in_graph_and_eager_modes 1028 def testUnknownUnconnectedGradientsValueGiven(self): 1029 x = constant_op.constant(1.0) 1030 y = constant_op.constant(1.0) 1031 with backprop.GradientTape() as g: 1032 g.watch([x, y]) 1033 z = y * 2 1034 with self.assertRaisesRegex( 1035 ValueError, "Unknown value for unconnected_gradients: 'nonsense'"): 1036 g.gradient(z, x, unconnected_gradients='nonsense') 1037 1038 @test_util.run_in_graph_and_eager_modes 1039 def testUnconnectedGradientsNestedDefunZeros(self): 1040 1041 @function.defun 1042 def f(x): 1043 return x * x 1044 1045 @function.defun 1046 def h(y): 1047 z = f(y) 1048 return array_ops.stop_gradient(z) 1049 1050 x = constant_op.constant(1.0) 1051 with backprop.GradientTape() as g: 1052 g.watch(x) 1053 k = x + 2. 1054 y = h(k) 1055 1056 dy_dx = g.gradient(y, x, unconnected_gradients='zero') 1057 self.assertEqual(0.0, self.evaluate(dy_dx)) 1058 1059 def testInvalidRecordOperationMessage(self): 1060 y = constant_op.constant(2.) 1061 x = constant_op.constant(1.) 1062 with backprop.GradientTape() as g: 1063 g.watch(x) 1064 tape_lib.record_operation('InvalidBackprop', [y], [x], lambda dy: []) 1065 with self.assertRaisesRegex(errors_impl.InternalError, 1066 'InvalidBackprop.*too few gradients'): 1067 g.gradient(y, x) 1068 1069 @test_util.assert_no_new_tensors 1070 def testEmptyParamsForValueAndGradFunction(self): 1071 1072 def fn(a, b): 1073 return a * b 1074 1075 val_and_grads_fn = backprop.val_and_grad_function(fn) 1076 1077 x = 2.0 1078 y = 3.0 1079 val, (dx, dy) = val_and_grads_fn(x, y) 1080 self.assertAllClose(val, x * y) 1081 self.assertAllEqual(dx, y) 1082 self.assertAllEqual(dy, x) 1083 1084 @test_util.assert_no_new_tensors 1085 def testNonEmptyParamsForValueAndGradFunction(self): 1086 1087 def fn(a, b): 1088 return a * b 1089 1090 val_and_grad_fn = backprop.val_and_grad_function(fn, params=[1]) 1091 1092 x = 2.0 1093 y = 3.0 1094 val, grads = val_and_grad_fn(x, y) 1095 self.assertAllClose(val, x * y) 1096 self.assertEqual(1, len(grads)) 1097 self.assertAllEqual(grads[0], x) 1098 1099 @test_util.run_gpu_only 1100 @test_util.assert_no_new_tensors 1101 def testTensorCopyCPU2GPU2CPU(self): 1102 # forward: a (cpu->gpu) -> add (gpu) -> c (gpu->cpu) -> add (cpu) -> e (cpu) 1103 # back: e (cpu) -> add (cpu) -> c (cpu->gpu) -> add (gpu) -> grad (gpu->cpu) 1104 def f(a, b): 1105 with context.device('/gpu:0'): 1106 c = math_ops.add(a.gpu(0), b.gpu(0)) 1107 return math_ops.add(c.cpu(), constant_op.constant(3.0)) 1108 1109 with context.device('/cpu:0'): 1110 a = constant_op.constant(1.0) 1111 b = constant_op.constant(2.0) 1112 1113 grad = backprop.gradients_function(f, [0])(a, b)[0] 1114 self.assertAllEqual(grad, 1.0) 1115 1116 def testGetAttrType(self): 1117 typ = backprop.op_attr_type('Add', 'T') 1118 self.assertEqual(typ, int(pywrap_tfe.TF_ATTR_TYPE)) 1119 1120 def testGetAttrList(self): 1121 typ = backprop.op_attr_type('MaxPool', 'ksize') 1122 self.assertEqual(typ, [int(pywrap_tfe.TF_ATTR_INT)]) 1123 1124 def testMakeAttrType(self): 1125 self.assertEqual(dtypes.float32, 1126 backprop.make_attr(int(pywrap_tfe.TF_ATTR_TYPE), 1)) 1127 1128 def testMakeAttrTypeList(self): 1129 self.assertEqual([dtypes.float32], 1130 backprop.make_attr([int(pywrap_tfe.TF_ATTR_TYPE)], [1])) 1131 1132 def testMulType(self): 1133 1134 def mul(x): 1135 return math_ops._mul_dispatch(x, x) # pylint: disable=protected-access 1136 1137 self.assertAllEqual(backprop.gradients_function(mul)(3.0)[0].numpy(), 6.0) 1138 1139 def testMakeAttrShape(self): 1140 for s in ([], None, [1, 2, 3], [None, None], [1, None, 3]): 1141 expected = tensor_shape.TensorShape(s).as_proto() 1142 actual = backprop.make_attr(int(pywrap_tfe.TF_ATTR_SHAPE), s) 1143 self.assertEqual( 1144 expected, 1145 actual, 1146 msg=('For shape %r, expected %r != %r actual' % 1147 (s, expected, actual))) 1148 1149 def testMakeAttrShapeList(self): 1150 shape_list = [[], None, [1, 2, 3], [None, None], [1, None, 3]] 1151 self.assertEqual( 1152 [tensor_shape.TensorShape(s).as_proto() for s in shape_list], 1153 backprop.make_attr([int(pywrap_tfe.TF_ATTR_SHAPE)], shape_list)) 1154 1155 def testArgsGradientFunction(self): 1156 1157 def f(*args): 1158 return args[0] * args[0] 1159 1160 grad = backprop.gradients_function(f) 1161 self.assertAllEqual(grad(1.0)[0], 2.0) 1162 1163 def testPartial(self): 1164 1165 def f(x, y): 1166 return x * y 1167 1168 part = functools.partial(f, constant_op.constant(2.0)) 1169 self.assertAllEqual( 1170 backprop.gradients_function(part)(constant_op.constant(1.0))[0], 2.0) 1171 1172 def testReturnSameThing(self): 1173 1174 def f(x): 1175 return x, 2 * x 1176 1177 self.assertAllEqual(backprop.gradients_function(f)(1.0)[0], 3.0) 1178 1179 @test_util.assert_no_new_tensors 1180 def testExceptionSafety(self): 1181 1182 def f(unused_x): 1183 raise ValueError() 1184 1185 try: 1186 backprop.gradients_function(f)(1.0) 1187 except ValueError: 1188 pass 1189 1190 def real_f(x): 1191 return x * x 1192 1193 self.assertAllEqual(backprop.gradients_function(real_f)(1.0)[0], 2.0) 1194 1195 @test_util.assert_no_new_tensors 1196 def testMultiValueConvertToTensor(self): 1197 x = resource_variable_ops.ResourceVariable( 1198 initial_value=array_ops.constant([1.0]), name='x') 1199 1200 def fn(): 1201 a = math_ops.add(x.value(), 1.0) 1202 # Make sure convert_to_tensor works correctly with list of TensorNodes. 1203 b = array_ops.stack([a, a], axis=0) 1204 return math_ops.reduce_mean(b) 1205 1206 grad = backprop.implicit_grad(fn)()[0][0] 1207 self.assertAllEqual([1.0], grad) 1208 1209 def testOutput(self): 1210 1211 def multiout(x): 1212 return x + 2, x * x 1213 1214 x = constant_op.constant([0.0, 1.0, 2.0]) 1215 1216 grad = backprop.gradients_function(multiout)(x)[0] 1217 self.assertAllEqual([1.0, 3.0, 5.0], grad) 1218 1219 def testMultiValuePreservesIfNotDiffedAgainst(self): 1220 1221 def tfe_conv2d(timage, tkernel, conv2dstrides): 1222 return nn_ops.conv2d(timage, tkernel, conv2dstrides, 'SAME') 1223 1224 i = constant_op.constant([[[[1.0]]]]) 1225 k = constant_op.constant([[[[2.0]]]]) 1226 s = [1, 1, 1, 1] 1227 1228 grad = backprop.gradients_function(tfe_conv2d, params=(0,))(i, k, s)[0] 1229 self.assertAllEqual([[[[2.0]]]], grad) 1230 1231 def testSameObjectForMultipleArguments(self): 1232 1233 def f(x, y): 1234 return math_ops.multiply(x, y) 1235 1236 g = backprop.gradients_function(f) 1237 1238 def np_g(x, y): 1239 dx, dy = g(x, y) 1240 return [dx.numpy(), dy.numpy()] 1241 1242 x = constant_op.constant(1.) 1243 self.assertAllEqual([1., 1.], np_g(x, x)) 1244 x = 1. 1245 self.assertAllEqual([1., 1.], np_g(x, x)) 1246 x = constant_op.constant([[1.]]) 1247 self.assertAllEqual([[[1.]], [[1.]]], np_g(x, x)) 1248 x = [[1.]] 1249 self.assertAllEqual([[[1.]], [[1.]]], np_g(x, x)) 1250 1251 v = resource_variable_ops.ResourceVariable( 1252 initial_value=1., name='testSameObjectForMultipleArguments.Variable') 1253 self.assertAllEqual([1., 1.], np_g(v, v)) 1254 1255 @test_util.assert_no_new_tensors 1256 def testImplicitGradientsCustomGradientAndCachedVariableValue(self): 1257 1258 @custom_gradient.custom_gradient 1259 def my_square(x): 1260 result = math_ops.square(x) 1261 1262 def grad(dr): 1263 return 2 * dr * x + 1 1264 1265 return result, grad 1266 1267 x = resource_variable_ops.ResourceVariable( 1268 initial_value=3., name='X.' + self.id()) 1269 1270 def f(): 1271 return my_square(x) 1272 1273 g = backprop.implicit_grad(f) 1274 1275 grads_and_vars = g() 1276 self.assertEqual(1, len(grads_and_vars)) 1277 grad, var = grads_and_vars[0] 1278 self.assertAllEqual(7, grad) 1279 self.assertAllEqual(x, var) 1280 1281 def testJacobianCustomGradient(self): 1282 1283 class MyCallable(object): 1284 1285 def __init__(self): 1286 self.a = variables.Variable(1.) 1287 self.b = variables.Variable(2.) 1288 self.c = variables.Variable(3.) 1289 1290 def __call__(self, x): 1291 return self.a * x * x + self.b * x + self.c 1292 1293 @def_function.function 1294 def call(c, x): 1295 1296 @custom_gradient.custom_gradient 1297 def _call(): 1298 y = c(x) 1299 1300 def grad(dy, variables=None): # pylint: disable=redefined-outer-name 1301 with backprop.GradientTape(persistent=True) as g: 1302 g.watch(variables) 1303 y = c(x) 1304 grad_vars = [ 1305 2 * math_ops.reduce_sum(dy * g.jacobian(y, v)) for v in variables 1306 ] 1307 del g 1308 return (), grad_vars 1309 1310 return y, grad 1311 1312 return _call() 1313 1314 c = MyCallable() 1315 x = constant_op.constant([1., 2., 3.]) 1316 with backprop.GradientTape(persistent=True) as g: 1317 g.watch([c.a, c.b, c.c]) 1318 y = call(c, x) 1319 self.assertAllEqual(g.gradient(y, x), None) 1320 1321 @test_util.assert_no_new_tensors 1322 def testCustomGradient(self): 1323 1324 @custom_gradient.custom_gradient 1325 def my_mul(x, y): 1326 result = x * y 1327 1328 def grad(dr): 1329 return [dr * y, dr * x] 1330 1331 return result, grad 1332 1333 lr = 0.25 1334 x = resource_variable_ops.ResourceVariable(2., name='x') 1335 1336 def loss(x): 1337 return my_mul(2., x.read_value()) 1338 1339 loss_grads_fn = backprop.implicit_val_and_grad(loss) 1340 1341 losses = [] 1342 for _ in range(5): 1343 loss, grads_and_vars = loss_grads_fn(x) 1344 losses.append(loss.numpy()) 1345 for (grad, var) in grads_and_vars: 1346 var.assign_sub(lr * grad) 1347 self.assertAllEqual(losses, [4.0, 3., 2., 1., 0.]) 1348 1349 @test_util.assert_no_new_tensors 1350 def testCustomGradientIdentity(self): 1351 1352 @custom_gradient.custom_gradient 1353 def my_identity(x): 1354 1355 def grad(dresult): 1356 return [2 * dresult] 1357 1358 return x, grad 1359 1360 self.assertAllEqual(backprop.gradients_function(my_identity)(1.0)[0], 2.0) 1361 1362 def testDifferentiatingFunctionThatReturnsNone(self): 1363 1364 def fn(x, y): 1365 result = x * y # pylint: disable=unused-variable 1366 1367 x = constant_op.constant(1) 1368 y = constant_op.constant(2) 1369 1370 loss_grads_fn = backprop.implicit_val_and_grad(fn) 1371 with self.assertRaisesRegex( 1372 ValueError, 'Cannot differentiate a function that returns None; ' 1373 'did you forget to return a value from fn?'): 1374 loss_grads_fn(x, y) 1375 1376 val_and_grads_fn = backprop.val_and_grad_function(fn) 1377 with self.assertRaisesRegex( 1378 ValueError, 'Cannot differentiate a function that returns None; ' 1379 'did you forget to return a value from fn?'): 1380 val_and_grads_fn(x, y) 1381 1382 def testZerosCacheDoesntLeakAcrossGraphs(self): 1383 with ops.Graph().as_default(): 1384 1385 def get_grad(): 1386 with ops.Graph().as_default(), self.cached_session(): 1387 t = constant_op.constant(1, dtype=dtypes.float32, shape=(10, 4)) 1388 x = constant_op.constant(2, dtype=dtypes.float32, shape=(10, 4)) 1389 with backprop.GradientTape() as tape: 1390 tape.watch(x) 1391 x1, _ = array_ops.split(x, num_or_size_splits=2, axis=1) 1392 y1 = x1**2 1393 y = array_ops.concat([y1, t], axis=1) 1394 return self.evaluate(tape.gradient(y, x)) 1395 1396 grad1 = get_grad() 1397 grad2 = get_grad() 1398 1399 self.assertAllEqual(grad1, grad2) 1400 1401 @test_util.run_in_graph_and_eager_modes 1402 def testSelectivelyWatchVariables(self): 1403 x1 = resource_variable_ops.ResourceVariable(1.0) 1404 x2 = resource_variable_ops.ResourceVariable(1.0) 1405 with backprop.GradientTape(watch_accessed_variables=False) as tape: 1406 tape.watch(x2) 1407 y = x1**2 1408 z = x2**3 1409 self.assertTupleEqual(tape.watched_variables(), (x2,)) 1410 dy, dz = tape.gradient([y, z], [x1, x2]) 1411 self.evaluate([x1.initializer, x2.initializer]) 1412 self.assertIsNone(dy) 1413 self.assertEqual(self.evaluate(dz), 3.0) 1414 1415 @test_util.run_in_graph_and_eager_modes 1416 def testDifferentiatingScalarCache(self): 1417 # In the following test, if x2 = x1 (i.e the objects are the exact same), 1418 # then y is essentially, 2*x1, and dy/dx1 = 2. 1419 # When we had a pure scalar cache in eager, this would be the case. This 1420 # test prevents us from going back to that case. 1421 with backprop.GradientTape(persistent=False) as g: 1422 x1 = constant_op.constant(3.0) 1423 x2 = constant_op.constant(3.0) 1424 g.watch(x1) 1425 g.watch(x2) 1426 y = x1 + x2 1427 grad = g.gradient(target=y, sources=[x1]) 1428 self.assertEqual(self.evaluate(grad), [1.0]) 1429 1430 def testVariablesAndConstantsProduceTheSameGradients(self): 1431 1432 # In the following test, differentiating [y, z] against [a, b] gives: 1433 # (dy/da + dz/da, dy/db + dz/db). 1434 # If a and b are the same constant, dz/da will not be 0 (which it should 1435 # be). 1436 # This is solved by using variable since doing a read_value on a tensor will 1437 # produce a new tensor and corresponding TensorHandle, and not reuse the 1438 # same tensor (which would happen if we are using a cache and reusing 1439 # EagerTensor objects). 1440 def get_grads(a, b): 1441 with backprop.GradientTape() as tape: 1442 tape.watch([a, b]) 1443 y = a**3 1444 z = b**2 1445 return tape.gradient([y, z], [a, b]) 1446 1447 gradients_constants = get_grads( 1448 constant_op.constant(2.0), constant_op.constant(2.0)) 1449 gradients_variables = get_grads( 1450 resource_variable_ops.ResourceVariable(2.0), 1451 resource_variable_ops.ResourceVariable(2.0)) 1452 self.assertAllEqual(gradients_constants, gradients_variables) 1453 1454 def testUnknownShapes(self): 1455 with ops.Graph().as_default(): 1456 with backprop.GradientTape() as tape: 1457 a = array_ops.placeholder(dtype=dtypes.float32, shape=None) 1458 tape.watch(a) 1459 b = a**3 1460 1461 db_da = tape.gradient(b, a) 1462 1463 with self.cached_session() as sess: 1464 self.assertEqual((8.0, 12.0), sess.run((b, db_da), feed_dict={a: 2.0})) 1465 1466 @test_util.run_in_graph_and_eager_modes 1467 def testCustomGradientInEagerAndGraph(self): 1468 1469 @custom_gradient.custom_gradient 1470 def f(x): 1471 y = x * x 1472 1473 def grad(dy): 1474 return [4 * dy] 1475 1476 return y, grad 1477 1478 with backprop.GradientTape() as t: 1479 c = constant_op.constant(1.0) 1480 t.watch(c) 1481 g = f(c) 1482 self.assertAllEqual(self.evaluate(t.gradient(g, c)), 4.0) 1483 1484 def testOverrideSecondOrderWithCustomGradient(self): 1485 1486 @custom_gradient.custom_gradient 1487 def f(x): 1488 1489 def first_order_grad(dz): 1490 1491 @custom_gradient.custom_gradient 1492 def first_order_custom(unused_x): 1493 1494 def h(ddz): 1495 return -2.1 * ddz 1496 1497 return -1.1, h 1498 1499 return dz * first_order_custom(x) 1500 1501 return x + 10., first_order_grad 1502 1503 c = constant_op.constant(1.) 1504 with backprop.GradientTape() as outer: 1505 outer.watch(c) 1506 with backprop.GradientTape() as inner: 1507 inner.watch(c) 1508 d = f(c)**4. 1509 dd = inner.gradient(d, c) 1510 self.assertAllClose(4. * f(c)**3. * -1.1, dd) 1511 self.assertAllClose(3. * 4. * f(c)**2. * -1.1 * -1.1 + 4. * f(c)**3. * -2.1, 1512 outer.gradient(dd, c)) 1513 1514 @test_util.run_in_graph_and_eager_modes 1515 def testCustomGradientForwardprop(self): 1516 1517 @custom_gradient.custom_gradient 1518 def f(x): 1519 z = 2. * tensor_util.constant_value(x) 1520 1521 def g(dz): 1522 1523 @custom_gradient.custom_gradient 1524 def first_order(unused_x, unused_dz): 1525 1526 def second_order_and_transpose(unused_ddz): 1527 return 2.2, 3.1 1528 1529 return 2.1, second_order_and_transpose 1530 1531 return first_order(x, dz) 1532 1533 return z, g 1534 1535 with backprop.GradientTape(persistent=True) as t: 1536 with backprop.GradientTape() as tt: 1537 c = constant_op.constant(1.) 1538 t.watch(c) 1539 tt.watch(c) 1540 output_grad = array_ops.ones([]) 1541 t.watch(output_grad) 1542 output = f(c) 1543 self.assertAllClose(2., output) 1544 gc = tt.gradient(output, c, output_gradients=output_grad) 1545 self.assertAllClose(2.1, gc) 1546 ggc = t.gradient(gc, c) 1547 self.assertAllClose(2.2, ggc) 1548 # Note that executed eagerly this kind of transpose is not efficient. But 1549 # from a tf.function we could prune out the first-order gradient 1550 # computation. 1551 transpose = t.gradient(gc, output_grad) 1552 self.assertAllClose(3.1, transpose) 1553 1554 @test_util.run_in_graph_and_eager_modes 1555 def testMaxPooling3DGradient(self): 1556 1557 def forward(a): 1558 r = max_pooling3d(a, pool_size=pool_size, strides=strides, padding='SAME') 1559 return r 1560 1561 input_sizes = [1, 3, 2, 4, 1] 1562 pool_size = (2, 2, 1) 1563 strides = (1, 1, 1) 1564 1565 total_size = np.prod(input_sizes) 1566 x = np.arange(1, total_size + 1, dtype=np.float32) 1567 aa = constant_op.constant(x, shape=input_sizes, dtype=dtypes.float32) 1568 da = backprop.gradients_function(forward)(aa) 1569 1570 if not context.executing_eagerly(): 1571 tf_aa = constant_op.constant(x, shape=input_sizes, dtype=dtypes.float32) 1572 tf_max = max_pooling3d( 1573 tf_aa, pool_size=pool_size, strides=strides, padding='SAME') 1574 tf_da = gradients.gradients(tf_max, [tf_aa]) 1575 self.assertAllEqual(da[0], tf_da[0]) 1576 1577 @test_util.run_in_graph_and_eager_modes 1578 def testWatchBadThing(self): 1579 g = backprop.GradientTape() 1580 with self.assertRaisesRegex(ValueError, 'ndarray'): 1581 g.watch(np.array(1.)) 1582 1583 def testWatchComposite(self): 1584 """Test that tape.watch expands composites and watches component Tensors.""" 1585 with backprop.GradientTape() as t: 1586 values = constant_op.constant([1.0, 2.0], dtypes.float32) 1587 s = sparse_tensor.SparseTensor( 1588 indices=[[0, 0], [1, 2]], values=values, dense_shape=[3, 4]) 1589 t.watch(s) 1590 z = sparse_ops.sparse_reduce_sum_v2(s) 1591 result = t.gradient(z, values) 1592 self.assertAllEqual(result, [1.0, 1.0]) 1593 1594 def testWatchedVariablesAfterNonPersistentGradientCall(self): 1595 with backprop.GradientTape(persistent=False) as tape: 1596 x = resource_variable_ops.ResourceVariable(1.0) 1597 tape.watch(x) 1598 tape.gradient(x, x) 1599 self.assertEqual((x,), tape.watched_variables()) 1600 1601 def testWatchedVariablesOnlyHasVariablesFromLastTape(self): 1602 with backprop.GradientTape(persistent=False) as tape: 1603 x = resource_variable_ops.ResourceVariable(1.0) 1604 tape.watch(x) 1605 with backprop.GradientTape(persistent=False) as tape: 1606 z = resource_variable_ops.ResourceVariable(2.0) 1607 tape.watch(z) 1608 tape.gradient(z, z) 1609 self.assertEqual((z,), tape.watched_variables()) 1610 1611 def testWatchedVariablesRespectReset(self): 1612 with backprop.GradientTape(persistent=False) as tape: 1613 x = resource_variable_ops.ResourceVariable(1.0) 1614 tape.watch(x) 1615 self.assertEqual((x,), tape.watched_variables()) 1616 tape.reset() 1617 z = resource_variable_ops.ResourceVariable(2.0) 1618 tape.watch(z) 1619 self.assertEqual((z,), tape.watched_variables()) 1620 tape.gradient(z, z) 1621 self.assertEqual((z,), tape.watched_variables()) 1622 1623 def testNameScope(self): 1624 1625 def fn(x): 1626 with ops.name_scope('my_scope'): 1627 a = math_ops.cos(x) 1628 b = math_ops.cos(x) 1629 return math_ops.add(a, b) 1630 1631 @function.defun 1632 def grad_fn(x): 1633 return backprop.gradients_function(fn)(x) 1634 1635 grad_ops = grad_fn.get_concrete_function( 1636 constant_op.constant(1.0)).graph.get_operations() 1637 num_sin_ops_found = 0 1638 for op in grad_ops: 1639 if op.type == 'Sin': 1640 num_sin_ops_found += 1 1641 self.assertIn('gradient_tape/my_scope/', op.name) 1642 self.assertEqual(num_sin_ops_found, 2) 1643 1644 @test_util.assert_no_new_pyobjects_executing_eagerly 1645 def testRecomputeGradWithDifferentShape(self): 1646 1647 @custom_gradient.recompute_grad 1648 def outer(x): 1649 return [x[0] + 1, x[1] + 1] 1650 1651 x = [ 1652 variables.Variable([1.0, 2.0], name='a'), 1653 variables.Variable(1.0, name='b') 1654 ] 1655 with backprop.GradientTape(): 1656 y = outer(x) 1657 self.assertAllEqual(y[0], [2.0, 3.0]) 1658 self.assertAllEqual(y[1], 2.0) 1659 1660 @custom_gradient.recompute_grad 1661 def outer_dict(x): 1662 for key in x.keys(): 1663 x[key] = x[key] + 1 1664 return x 1665 1666 x = {x[0].ref(): x[0], x[1].ref(): x[1]} 1667 with backprop.GradientTape(): 1668 y = outer_dict(x) 1669 y = list(y.values()) 1670 self.assertAllEqual(y[0], [2.0, 3.0]) 1671 self.assertAllEqual(y[1], 2.0) 1672 1673 @test_util.assert_no_new_pyobjects_executing_eagerly 1674 def testRecomputeGradWithNestedFunctionAndWhileLoop(self): 1675 1676 @custom_gradient.recompute_grad 1677 @def_function.function 1678 def outer(x): 1679 1680 @def_function.function 1681 def middle(y): 1682 1683 @def_function.function 1684 def inner(z): 1685 return z + 1 1686 1687 i = constant_op.constant(0.0) 1688 c = lambda y, i: i < 10. 1689 b = lambda y, i: (inner(y), i + 1.0) 1690 y, i = control_flow_ops.while_loop(c, b, [y, i]) 1691 1692 return y 1693 1694 return middle(x) 1695 1696 with MemoryChecker() as memory_checker: 1697 for _ in range(5): 1698 x = variables.Variable(1.0, name='x') 1699 with backprop.GradientTape(): 1700 y = outer(x) 1701 self.assertAllEqual(y, 11.0) 1702 1703 memory_checker.report() 1704 memory_checker.assert_no_leak_if_all_possibly_except_one() 1705 1706 1707class JacobianTest(test.TestCase): 1708 1709 def _jacobian(self, experimental_use_pfor): 1710 persistent = context.executing_eagerly and not experimental_use_pfor 1711 with backprop.GradientTape(persistent=persistent) as g: 1712 x = constant_op.constant([1., 2.]) 1713 y = constant_op.constant([3., 4.]) 1714 g.watch(x) 1715 g.watch(y) 1716 z = x * x * y 1717 jacobian = g.jacobian( 1718 z, [x, y], experimental_use_pfor=experimental_use_pfor) 1719 answer = [array_ops.diag(2 * x * y), array_ops.diag(x * x)] 1720 return jacobian, answer 1721 1722 @test_util.run_v1_only('b/120545219') 1723 def testPfor(self): 1724 jacobian, answer = self._jacobian(experimental_use_pfor=True) 1725 for j, a in zip(jacobian, answer): 1726 self.assertAllEqual(a, j) 1727 1728 @test_util.run_v1_only('b/120545219') 1729 def testWhileLoop(self): 1730 jacobian, answer = self._jacobian(experimental_use_pfor=False) 1731 for j, a in zip(jacobian, answer): 1732 self.assertAllEqual(a, j) 1733 1734 @test_util.run_v1_only('b/120545219') 1735 def testPforDefun(self): 1736 1737 @function.defun 1738 def _f(): 1739 return self._jacobian(experimental_use_pfor=True) 1740 1741 jacobian, answer = _f() 1742 for j, a in zip(jacobian, answer): 1743 self.assertAllEqual(a, j) 1744 1745 @test_util.run_v1_only('b/120545219') 1746 def testWhileLoopDefun(self): 1747 1748 @function.defun 1749 def _f(): 1750 return self._jacobian(experimental_use_pfor=False) 1751 1752 jacobian, answer = _f() 1753 for j, a in zip(jacobian, answer): 1754 self.assertAllEqual(a, j) 1755 1756 @test_util.run_v1_only('b/120545219') 1757 def testPersistentTape(self): 1758 if not context.executing_eagerly(): 1759 return 1760 with backprop.GradientTape() as g: 1761 x = constant_op.constant([1.0, 2.0]) 1762 g.watch(x) 1763 y = x * x 1764 with self.assertRaisesRegex(RuntimeError, 'persistent'): 1765 g.jacobian(y, x, experimental_use_pfor=False) 1766 1767 @test_util.run_v1_only('b/120545219') 1768 def test_parallel_iterations(self): 1769 with backprop.GradientTape(persistent=True) as g: 1770 x = constant_op.constant([[1., 2], [3, 4]]) 1771 g.watch(x) 1772 y = math_ops.matmul(x, x) 1773 self.assertAllClose( 1774 g.jacobian(y, x, parallel_iterations=2), 1775 g.jacobian(y, x, parallel_iterations=3)) 1776 1777 @test_util.run_in_graph_and_eager_modes 1778 def test_nested_jacobian(self): 1779 if context.executing_eagerly(): 1780 # TODO(agarwal): b/128842926 1781 self.skipTest('Conversion of function calls not implemented yet.') 1782 x = array_ops.ones((10, 2)) 1783 with backprop.GradientTape(persistent=False) as g: 1784 g.watch(x) 1785 with backprop.GradientTape(persistent=False) as gg: 1786 gg.watch(x) 1787 y = math_ops.reduce_sum(math_ops.square(x)) 1788 dy_x = gg.jacobian(y, x) 1789 dy_xx = g.batch_jacobian(dy_x, x) 1790 dy_xx_answer = [[[2., 0], [0, 2.]]] * 10 1791 self.assertAllClose(dy_xx_answer, self.evaluate(dy_xx)) 1792 1793 def test_nested_batch_jacobian_foldl(self): 1794 def _grad(f): 1795 def _grad_function(primal): 1796 with backprop.GradientTape() as tape: 1797 tape.watch(primal) 1798 primal_out = f(primal) 1799 return tape.batch_jacobian(primal_out, primal) 1800 return _grad_function 1801 1802 def _func(x): 1803 return array_ops.reshape( 1804 functional_ops.foldl_v2(lambda a, b: math_ops.cos(a + b), 1805 array_ops.transpose(x)), 1806 [1, 1]) 1807 1808 f = _func 1809 x = constant_op.constant([[1., 2.]]) 1810 for _ in range(2): 1811 theoretical, numerical = gradient_checker_v2.compute_gradient(f, [x]) 1812 self.assertAllClose(theoretical, numerical, rtol=1e-3) 1813 f = _grad(f) 1814 expected_flat = array_ops.reshape(numerical, [-1]) 1815 self.assertAllClose(expected_flat, 1816 array_ops.reshape(f(x), [-1]), 1817 rtol=1e-3) 1818 self.assertAllClose(expected_flat, 1819 array_ops.reshape(def_function.function(f)(x), [-1]), 1820 rtol=1e-3) 1821 1822 def test_grad_jacobian_conv(self): 1823 def _inner(x): 1824 kernel = array_ops.ones([3, 3, 1, 9]) 1825 with backprop.GradientTape() as tape: 1826 tape.watch(x) 1827 y = nn_ops.conv2d(x, kernel, strides=(1, 1), padding='SAME', 1828 data_format='NHWC') 1829 reduced = math_ops.reduce_sum(y ** 2., axis=[2, 3]) 1830 return math_ops.reduce_sum(tape.batch_jacobian(reduced, x)) 1831 1832 theoretical, numerical = gradient_checker_v2.compute_gradient( 1833 def_function.function(_inner), [array_ops.ones([10, 4, 4, 1])]) 1834 self.assertAllClose(numerical, theoretical, rtol=1e-1) 1835 1836 @def_function.function 1837 def _outer(): 1838 with backprop.GradientTape() as tape: 1839 x = array_ops.ones([10, 4, 4, 1]) 1840 tape.watch(x) 1841 y = _inner(x) 1842 return tape.gradient(y, x) 1843 1844 self.assertAllClose(array_ops.reshape(numerical, [-1]), 1845 array_ops.reshape(_outer(), [-1]), rtol=1e-1) 1846 1847 @test_util.run_in_graph_and_eager_modes 1848 def test_indexed_slices(self): 1849 with backprop.GradientTape(persistent=True) as g: 1850 inp = random_ops.random_uniform([3, 2]) 1851 g.watch(inp) 1852 output = nn.embedding_lookup(inp, [0, 2]) 1853 self.assertAllClose( 1854 g.jacobian(output, inp, experimental_use_pfor=True), 1855 g.jacobian(output, inp, experimental_use_pfor=False)) 1856 1857 def test_foldl_partial_function(self): 1858 x = array_ops.zeros([3]) 1859 with backprop.GradientTape(persistent=True) as tape: 1860 tape.watch(x) 1861 result = def_function.function( 1862 functools.partial(functional_ops.foldl_v2, lambda a, b: a + b))( 1863 x) 1864 self.assertAllClose([1., 1., 1.], 1865 tape.jacobian(result, x, experimental_use_pfor=True)) 1866 self.assertAllClose([1., 1., 1.], 1867 tape.jacobian(result, x, experimental_use_pfor=False)) 1868 1869 # Non-persistent tapes take a different function gradient path, but also 1870 # work with pfor=True. 1871 x = array_ops.zeros([3]) 1872 with backprop.GradientTape() as tape: 1873 tape.watch(x) 1874 result = def_function.function( 1875 functools.partial(functional_ops.foldl_v2, lambda a, b: a + b))( 1876 x) 1877 self.assertAllClose([1., 1., 1.], 1878 tape.jacobian(result, x, experimental_use_pfor=True)) 1879 1880 def test_foldl_pure_function(self): 1881 1882 @def_function.function 1883 def compute_jacobian(use_pfor): 1884 x = array_ops.zeros([3]) 1885 with backprop.GradientTape(persistent=True) as tape: 1886 tape.watch(x) 1887 result = functools.partial(functional_ops.foldl_v2, lambda a, b: a + b)( 1888 x) 1889 return tape.jacobian(result, x, experimental_use_pfor=use_pfor) 1890 1891 self.assertAllClose(compute_jacobian(use_pfor=True), 1892 compute_jacobian(use_pfor=False)) 1893 1894 def test_cond_func_grad_jacobian(self): 1895 1896 @def_function.function 1897 def f(x): 1898 y = control_flow_ops.cond(x > 0., lambda: x**3., lambda: x**2.) 1899 return y 1900 1901 with backprop.GradientTape(persistent=True) as tape: 1902 x = constant_op.constant(1.) 1903 tape.watch(x) 1904 y = f(x) 1905 grad = tape.gradient(y, x) 1906 self.assertAllClose(3., grad) 1907 jacobian = tape.jacobian(grad, x, experimental_use_pfor=False) 1908 self.assertAllClose(6., jacobian) 1909 jacobian_pfor = tape.jacobian(grad, x, experimental_use_pfor=True) 1910 self.assertAllClose(6., jacobian_pfor) 1911 1912 1913@test_util.run_all_in_graph_and_eager_modes 1914class BatchJacobianTest(test.TestCase, parameterized.TestCase): 1915 1916 def _batch_jacobian(self, experimental_use_pfor): 1917 persistent = context.executing_eagerly and not experimental_use_pfor 1918 with backprop.GradientTape(persistent=persistent) as g: 1919 x = constant_op.constant([[1., 2.], [3., 4.]]) 1920 y = constant_op.constant([[3., 4.], [5., 6.]]) 1921 g.watch(x) 1922 z = x * x * y 1923 batch_jacobian = g.batch_jacobian( 1924 z, x, experimental_use_pfor=experimental_use_pfor) 1925 answer = array_ops.stack( 1926 [array_ops.diag(2 * x[0] * y[0]), 1927 array_ops.diag(2 * x[1] * y[1])]) 1928 return batch_jacobian, answer 1929 1930 def testPfor(self): 1931 batch_jacobian, answer = self._batch_jacobian(experimental_use_pfor=True) 1932 self.assertAllEqual(answer, batch_jacobian) 1933 1934 def testWhileLoop(self): 1935 batch_jacobian, answer = self._batch_jacobian(experimental_use_pfor=False) 1936 self.assertAllEqual(answer, batch_jacobian) 1937 1938 def testPforDefun(self): 1939 1940 @function.defun 1941 def _f(): 1942 return self._batch_jacobian(experimental_use_pfor=True) 1943 1944 batch_jacobian, answer = _f() 1945 self.assertAllEqual(answer, batch_jacobian) 1946 1947 def testWhileLoopDefun(self): 1948 1949 @function.defun 1950 def _f(): 1951 return self._batch_jacobian(experimental_use_pfor=False) 1952 1953 batch_jacobian, answer = _f() 1954 self.assertAllEqual(answer, batch_jacobian) 1955 1956 def testPersistentTape(self): 1957 if not context.executing_eagerly(): 1958 return 1959 with backprop.GradientTape() as g: 1960 x = constant_op.constant([[1.0, 2.0]]) 1961 g.watch(x) 1962 y = x * x 1963 with self.assertRaisesRegex(RuntimeError, 'persistent'): 1964 g.batch_jacobian(y, x, experimental_use_pfor=False) 1965 1966 def testBadShape(self): 1967 x = random_ops.random_uniform([2, 3]) 1968 with backprop.GradientTape() as g: 1969 y = array_ops.concat([x, x], axis=0) 1970 with self.assertRaisesRegex(ValueError, 'Need first dimension'): 1971 g.batch_jacobian(y, x) 1972 1973 def testBadInputRank(self): 1974 x = random_ops.random_uniform([2]) 1975 with backprop.GradientTape() as g: 1976 y = random_ops.random_uniform([2, 2]) 1977 with self.assertRaisesRegex(ValueError, 'must have rank at least 2'): 1978 g.batch_jacobian(y, x) 1979 1980 def testBadOutputRank(self): 1981 x = random_ops.random_uniform([2, 2]) 1982 with backprop.GradientTape() as g: 1983 y = random_ops.random_uniform([2]) 1984 with self.assertRaisesRegex(ValueError, 'must have rank at least 2'): 1985 g.batch_jacobian(y, x) 1986 1987 def test_parallel_iterations(self): 1988 with backprop.GradientTape(persistent=True) as g: 1989 x = constant_op.constant([[1., 2], [3, 4]]) 1990 g.watch(x) 1991 w = constant_op.constant([[1., 2, 3, 4], [5, 6, 7, 8]]) 1992 y = math_ops.matmul(x, w) 1993 self.assertAllClose( 1994 g.batch_jacobian(y, x, parallel_iterations=2), 1995 g.batch_jacobian(y, x, parallel_iterations=3)) 1996 1997 @parameterized.parameters((True, True), (True, False), (False, True), 1998 (False, False)) 1999 def test_degenerate_shape(self, use_function, use_pfor): 2000 2001 def f(x): 2002 with backprop.GradientTape(persistent=True) as tape: 2003 tape.watch(x) 2004 y = x**2 2005 return tape.batch_jacobian(y, x, experimental_use_pfor=use_pfor) 2006 2007 if use_function: 2008 f = def_function.function(f) 2009 self.assertAllEqual([1, 0, 0], array_ops.shape(f(array_ops.zeros([1, 0])))) 2010 2011 @parameterized.parameters((True,), (False)) 2012 def test_zeros_type_correct(self, use_pfor): 2013 for dtype in [dtypes.float32, dtypes.float64]: 2014 @def_function.function 2015 def f(x): 2016 del x 2017 return constant_op.constant([[1.]], dtype=dtype) # pylint: disable=cell-var-from-loop 2018 2019 with backprop.GradientTape(persistent=True) as tape: 2020 x = constant_op.constant([[2.]], dtype=dtype) 2021 tape.watch(x) 2022 y = f(x) 2023 jac = tape.batch_jacobian(y, x, experimental_use_pfor=use_pfor) 2024 self.assertEqual(dtype, jac.dtype) 2025 self.assertAllClose([[[0.]]], jac) 2026 2027 with backprop.GradientTape(persistent=True) as tape: 2028 x = constant_op.constant([[2.]], dtype=dtype) 2029 tape.watch(x) 2030 y = f(x) 2031 jac = tape.batch_jacobian(y, x, unconnected_gradients='zero', 2032 experimental_use_pfor=use_pfor) 2033 self.assertEqual(dtype, jac.dtype) 2034 self.assertAllClose([[[0.]]], jac) 2035 2036 2037class AggregateIndexedSlicesGradientsTest(test_util.TensorFlowTestCase): 2038 2039 def _assert_indexed_slices_equal(self, left, right): 2040 self.assertAllEqual( 2041 self.evaluate(ops.convert_to_tensor(left)), 2042 self.evaluate(ops.convert_to_tensor(right))) 2043 2044 def testNoGradients(self): 2045 self.assertIsNone(backprop.aggregate_indexed_slices_gradients([])) 2046 2047 def testOneGradient(self): 2048 t = math_ops._as_indexed_slices( 2049 constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) 2050 result = backprop.aggregate_indexed_slices_gradients([t]) 2051 self._assert_indexed_slices_equal(t, result) 2052 2053 def testMultipleGradients(self): 2054 t0 = math_ops._as_indexed_slices( 2055 constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) 2056 t1 = math_ops._as_indexed_slices( 2057 constant_op.constant([[0., 0.], [5, 6], [7., 8.]])) 2058 total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]]) 2059 result = backprop.aggregate_indexed_slices_gradients([t0, t1]) 2060 self._assert_indexed_slices_equal(total, result) 2061 2062 def testMultipleGradientsWithNones(self): 2063 t0 = math_ops._as_indexed_slices( 2064 constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) 2065 t1 = math_ops._as_indexed_slices( 2066 constant_op.constant([[0., 0.], [5, 6], [7., 8.]])) 2067 t3 = None 2068 total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]]) 2069 result = backprop.aggregate_indexed_slices_gradients([t0, t1, t3]) 2070 self._assert_indexed_slices_equal(total, result) 2071 2072 def testMixedTensorAndIndexedSlices(self): 2073 t0 = math_ops._as_indexed_slices( 2074 constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) 2075 t1 = constant_op.constant([[0., 0.], [5, 6], [7., 8.]]) 2076 total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]]) 2077 result = backprop.aggregate_indexed_slices_gradients([t0, t1]) 2078 self._assert_indexed_slices_equal(total, result) 2079 2080 2081if __name__ == '__main__': 2082 test.main() 2083