• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15from __future__ import absolute_import
16from __future__ import division
17from __future__ import print_function
18
19import functools
20
21from absl.testing import parameterized
22import numpy as np
23
24from tensorflow.python import pywrap_tfe
25from tensorflow.python.eager import backprop
26from tensorflow.python.eager import context
27from tensorflow.python.eager import def_function
28from tensorflow.python.eager import function
29from tensorflow.python.eager import tape as tape_lib
30from tensorflow.python.eager import test
31from tensorflow.python.framework import constant_op
32from tensorflow.python.framework import dtypes
33from tensorflow.python.framework import errors_impl
34from tensorflow.python.framework import ops
35from tensorflow.python.framework import sparse_tensor
36from tensorflow.python.framework import tensor_shape
37from tensorflow.python.framework import tensor_util
38from tensorflow.python.framework import test_util
39from tensorflow.python.framework.memory_checker import MemoryChecker
40from tensorflow.python.layers.pooling import max_pooling3d
41from tensorflow.python.ops import array_ops
42from tensorflow.python.ops import control_flow_ops
43from tensorflow.python.ops import custom_gradient
44from tensorflow.python.ops import embedding_ops
45from tensorflow.python.ops import functional_ops
46from tensorflow.python.ops import gradient_checker_v2
47from tensorflow.python.ops import gradients
48from tensorflow.python.ops import math_ops
49from tensorflow.python.ops import nn
50from tensorflow.python.ops import nn_grad  # pylint: disable=unused-import
51from tensorflow.python.ops import nn_ops
52from tensorflow.python.ops import random_ops
53from tensorflow.python.ops import resource_variable_ops
54from tensorflow.python.ops import sparse_ops
55from tensorflow.python.ops import variables
56from tensorflow.python.training import training
57
58
59class BackpropTest(test.TestCase, parameterized.TestCase):
60
61  @test_util.run_in_graph_and_eager_modes
62  def testAggregateGradients(self):
63
64    def fn(x):
65      ind1 = constant_op.constant(np.array([0, 1]))
66      ind2 = constant_op.constant(np.array([2, 3]))
67      ind3 = constant_op.constant(np.array([1, 3]))
68      g1 = embedding_ops.embedding_lookup(x, ind1)
69      g2 = embedding_ops.embedding_lookup(x, ind2)
70      g3 = embedding_ops.embedding_lookup(x, ind3)
71      return g1 * g2 * g3
72
73    var_np = np.random.rand(4, 2).astype(np.float32)
74    var = constant_op.constant(var_np)
75    grad = backprop.gradients_function(fn, [0])(var)[0]
76    grad = self.evaluate(ops.convert_to_tensor(grad))
77
78    if not context.executing_eagerly():
79      tf_var = array_ops.constant(var_np, dtypes.float32)
80      tf_ind1 = array_ops.constant([0, 1])
81      tf_ind2 = array_ops.constant([2, 3])
82      tf_ind3 = array_ops.constant([1, 3])
83      tf_g1 = embedding_ops.embedding_lookup(tf_var, tf_ind1)
84      tf_g2 = embedding_ops.embedding_lookup(tf_var, tf_ind2)
85      tf_g3 = embedding_ops.embedding_lookup(tf_var, tf_ind3)
86      tf_y = tf_g1 * tf_g2 * tf_g3
87      tf_grad = gradients.gradients(tf_y, [tf_var])[0]
88
89      tf_dense_grad = math_ops.unsorted_segment_sum(tf_grad.values,
90                                                    tf_grad.indices,
91                                                    tf_grad.dense_shape[0])
92
93      self.assertAllClose(grad, self.evaluate(tf_dense_grad))
94
95  @test_util.run_in_graph_and_eager_modes
96  def testAggregateGradientsWithTensor(self):
97
98    def fn(x):
99      ind1 = constant_op.constant(np.array([0, 1]))
100      # A mixture of IndexedSlices and dense tensor to aggregate.
101      g1 = embedding_ops.embedding_lookup(x, ind1)
102      g2 = math_ops.reduce_sum(x * constant_op.constant(2.0))
103      return g1 * g2
104
105    var_np = np.random.rand(4, 2).astype(np.float32)
106    var = constant_op.constant(var_np)
107    grad = backprop.gradients_function(fn, [0])(var)[0]
108    grad = self.evaluate(ops.convert_to_tensor(grad))
109
110    if not context.executing_eagerly():
111      tf_var = array_ops.constant(var_np, dtypes.float32)
112      tf_ind1 = array_ops.constant([0, 1])
113      tf_g1 = embedding_ops.embedding_lookup(tf_var, tf_ind1)
114      tf_g2 = math_ops.reduce_sum(tf_var * 2.0, axis=(0, 1))
115      tf_y = tf_g1 * tf_g2
116      tf_grad = gradients.gradients(tf_y, [tf_var])[0]
117
118      self.assertAllClose(grad, tf_grad)
119
120  def testImplicitGradWithResourceVariable(self):
121    x = resource_variable_ops.ResourceVariable(
122        initial_value=constant_op.constant(1.0), name='x')
123
124    def fn():
125      b = constant_op.constant(2.0)
126      c = math_ops.add(x.value(), b)
127      return math_ops.add(c, constant_op.constant(3.0))
128
129    grads_and_vars = backprop.implicit_grad(fn)()
130    self.assertAllEqual(grads_and_vars[0][0], 1.0)
131    self.assertAllEqual(id(grads_and_vars[0][1]), id(x))
132
133  @parameterized.named_parameters([('Function', def_function.function),
134                                   ('NoFunction', lambda f: f)])
135  def testNoOpBehaviorConsistent(self, decorator):
136
137    @decorator
138    def f(x):
139      # Test all different types of no-ops
140      x1 = array_ops.identity(x)
141      x2 = math_ops.add_v2(x, 0)
142      x3 = math_ops.subtract(x, 0)
143      x4 = math_ops.multiply(x, 1)
144      with backprop.GradientTape() as t:
145        t.watch(x)
146        t.watch(x1)
147        t.watch(x2)
148        t.watch(x3)
149        t.watch(x4)
150        y1 = x * 2.
151        y2 = x1 * 3.
152        y3 = x2 * 3.
153        y4 = x3 * 3.
154        y5 = x4 * 3.
155        loss = y1 + y2 + y3 + y4 + y5
156      return t.gradient(loss, [x, x1, x2, x3, x4])
157
158    self.assertAllClose([2., 3., 3., 3., 3.], f(constant_op.constant(10.)))
159
160  def testResourceHandleOutputWithoutHandleData(self):
161    # This is a bit of a weird thing to test since we try to maintain handle
162    # data. But users do create their own resources, and those often do not have
163    # any handle data.
164    h = resource_variable_ops.var_handle_op(
165        shape=[], dtype=dtypes.float32, shared_name='abc')
166
167    with backprop.GradientTape() as tape:
168      x = constant_op.constant(1.)
169      tape.watch(x)
170      tape.watch(h)
171      y, h = array_ops.identity_n([x, h])
172
173    self.assertAllClose(1., tape.gradient(y, x))
174
175  def testGradientInsideLoop(self):
176    with ops.Graph().as_default():
177      v = resource_variable_ops.ResourceVariable(1.0)
178
179      def body(_):
180        _ = v + 1.0  # This reads the variable inside the loop context
181        with backprop.GradientTape() as t:
182          result = v * 2
183        self.assertIsNotNone(t.gradient(result, v))
184        return 1.0
185
186      control_flow_ops.while_loop(lambda i: False, body, [1.0])
187
188  def testWhereGradient(self):
189    # Note: where is special because only some of its arguments are of
190    # differentiable dtypes.
191
192    def f(x):
193      return array_ops.where(x < 10, x, x * x)
194
195    g = backprop.gradients_function(f)
196
197    self.assertAllEqual(g(5.)[0], 1.0)
198    self.assertAllEqual(g(50.)[0], 100.0)
199
200  def testTwoTargets(self):
201    with backprop.GradientTape() as t:
202      x = constant_op.constant(3.0)
203      y = constant_op.constant(2.0)
204      t.watch([x, y])
205      xx = 2 * x
206      yy = 3 * y
207    dx, dy = t.gradient([xx, yy], [x, y])
208    self.assertAllEqual(dx, 2.0)
209    self.assertAllEqual(dy, 3.0)
210
211  def testCustomGradientEmptyError(self):
212
213    @custom_gradient.custom_gradient
214    def identity(x):
215
216      def grad(_):
217        return []  # This return value is wrong!
218
219      return x, grad
220
221    x = variables.Variable(1.0)
222    with backprop.GradientTape() as t:
223      y = identity(x)
224    with self.assertRaises(ValueError):
225      t.gradient(y, [x])
226
227  def testOutputGradUsedInComputation(self):
228    with backprop.GradientTape() as t:
229      x = constant_op.constant(3.0)
230      y = constant_op.constant(2.0)
231      t.watch([x, y])
232      loss = x * y
233    dx, = t.gradient([loss, x], [x], output_gradients=[1.0, 2.0])
234    self.assertAllEqual(dx, 4.0)
235
236  def testDy(self):
237
238    def f(x):
239      return x
240
241    grad_fn = backprop.gradients_function(f)
242    self.assertAllEqual(2., grad_fn(1., dy=2.)[0])
243
244  def testGradientInteger(self):
245
246    def f(x):
247      return x + x
248
249    int_tensor = constant_op.constant(1)
250    self.assertEqual(backprop.gradients_function(f)(int_tensor)[0], None)
251
252  def testErrors(self):
253
254    @custom_gradient.custom_gradient
255    def f(x):
256
257      def grad(_):
258        raise RuntimeError('x')
259
260      return x, grad
261
262    # TODO(apassos) raise the right error here
263    with self.assertRaises(RuntimeError):
264      backprop.gradients_function(f)(constant_op.constant(1.0))
265
266  def testGradientsFunctionInCustomGradient(self):
267
268    @custom_gradient.custom_gradient
269    def f(x):
270      (y,) = backprop.gradients_function(lambda x: x * x)(x)
271
272      def grad(dy):
273        return [2 * dy]
274
275      return y, grad
276
277    self.assertAllEqual(f(1.0), 2.0)
278
279  def testImplicitGradOverEmbeddingLookup(self):
280    batch_size = 8
281    embedding_size = 512
282    vocab_size = 1000
283    lrn_rate = 0.1
284    random_init = random_ops.random_uniform([vocab_size, embedding_size])
285
286    x = array_ops.ones((batch_size), dtypes.int64)
287    embedding = resource_variable_ops.ResourceVariable(
288        initial_value=random_init, dtype=dtypes.float32, name='embedding')
289
290    def f():
291      embedded_x = embedding_ops.embedding_lookup(embedding, x)
292      return constant_op.constant(1.0, dtypes.float32) - embedded_x
293
294    grad = backprop.implicit_grad(f)()[0][0]
295    opt = training.GradientDescentOptimizer(lrn_rate)
296
297    with ops.Graph().as_default(), self.cached_session():
298      tf_x = array_ops.ones((batch_size), dtypes.int64)
299      # TODO(ashankar,apassos): Change to ResourceVariable.
300      tf_embedding = variables.Variable(
301          random_init.numpy(), name='tf_embedding')
302      tf_embedded_x = embedding_ops.embedding_lookup(tf_embedding, tf_x)
303      tf_y = 1.0 - tf_embedded_x
304      tf_grad = gradients.gradients(tf_y, [tf_embedding])[0]
305      tf_opt = training.GradientDescentOptimizer(0.1)
306      tf_embedding.initializer.run()
307
308      self.assertAllClose(tf_grad.indices, grad.indices)
309      self.assertAllClose(tf_grad.values, grad.values)
310
311      tf_opt.apply_gradients([(tf_grad, tf_embedding)]).run()
312      expected = self.evaluate(tf_embedding)
313    opt.apply_gradients([(grad, embedding)])
314    self.assertAllClose(expected, embedding.read_value())
315
316  def testImplicitGradOrdering(self):
317    v0 = resource_variable_ops.ResourceVariable(1.0)
318    v1 = resource_variable_ops.ResourceVariable(2.0)
319
320    def f():
321      x = v1 * v1
322      y = v0 * v0
323      return x + y
324
325    grads = backprop.implicit_grad(f)()
326    ordered_variables = [x[1] for x in grads]
327    self.assertIs(ordered_variables[0], v0)
328    self.assertIs(ordered_variables[1], v1)
329
330  def testTapeNoOpGradient(self):
331    x = constant_op.constant(3.0)
332    with backprop.GradientTape() as t:
333      t.watch(x)
334      y = x
335    self.assertEqual(t.gradient(y, x).numpy(), 1.0)
336
337  def testTapeIdentityGradientIsIdentity(self):
338    x = constant_op.constant(3.0)
339    with backprop.GradientTape() as t:
340      t.watch(x)
341      y = array_ops.identity(x)
342    self.assertEqual(t.gradient(y, x).numpy(), 1.0)
343
344  def testFunctionIndexedSlicesGradient(self):
345
346    @def_function.function
347    def f(x):
348      return x + 1
349
350    with backprop.GradientTape() as t:
351      x = constant_op.constant([1.0])
352      t.watch(x)
353      y = f(x)
354      y = array_ops.gather(y, [0])
355    self.assertAllEqual(t.gradient(y, x), [1.0])
356
357  def testTapeGradientMultiTargetOneIsSource(self):
358    x = constant_op.constant(2.0)
359    with backprop.GradientTape() as t:
360      t.watch(x)
361      y = x * x
362    self.assertEqual(t.gradient([x, y], x).numpy(), 5.0)
363
364  def testTapeNoOpGradientWithMultiTargetAllSource(self):
365    x = constant_op.constant(3.0)
366    with backprop.GradientTape() as t:
367      t.watch(x)
368      y = x
369    self.assertEqual(t.gradient([y, y], x).numpy(), 2.0)
370
371  def testTapeNoOpGradientWithMultiTargetMultiSource(self):
372    x = constant_op.constant(3.0)
373    y = constant_op.constant(5.0)
374    with backprop.GradientTape() as t:
375      t.watch(x)
376      t.watch(y)
377      z = y * y
378    self.assertAllEqual(t.gradient([x, y, z], [x, y]), [1.0, 11.0])
379
380  def testTapeGradientStringTarget(self):
381    s = constant_op.constant('unknown', dtype=dtypes.string)
382    x = constant_op.constant(3.0)
383
384    with backprop.GradientTape() as t:
385      t.watch(x)
386      t.watch(s)
387    grads = t.gradient(s, x)
388    self.assertEqual(grads, None)
389
390  def testTapeNoOpGradientStringSourceAndTarget(self):
391    s = constant_op.constant('unknown', dtype=dtypes.string)
392
393    with backprop.GradientTape() as t:
394      t.watch(s)
395    grads = t.gradient(s, s)
396    self.assertEqual(grads, None)
397
398  def testTapeNoOpGradientWithMultiTargetMultiSourceIncludeString(self):
399    x = constant_op.constant(3.0)
400    y = constant_op.constant(5.0)
401    s = constant_op.constant('unknown', dtype=dtypes.string)
402
403    with backprop.GradientTape() as t:
404      t.watch(x)
405      t.watch(y)
406      t.watch(s)
407      z = y * y
408    grads = t.gradient([x, y, z, s], [x, y, s])
409    self.assertAllEqual(grads[:2], [1.0, 11.0])
410    self.assertEqual(grads[2], None)
411
412  def testTapeNoOpOnVariableIsIdentity(self):
413    v0 = resource_variable_ops.ResourceVariable(1.0)
414    with backprop.GradientTape() as t:
415      y = v0.read_value()
416    self.assertEqual(t.gradient(y, v0).numpy(), 1.0)
417
418  @test_util.assert_no_new_tensors
419  @test_util.assert_no_garbage_created
420  def testTapeNoOpGradient2By2(self):
421    a_2_by_2 = constant_op.constant(2.0, shape=[2, 2])
422    with backprop.GradientTape(persistent=True) as tape:
423      tape.watch(a_2_by_2)
424    dy_dy = tape.gradient(a_2_by_2, [a_2_by_2])[0]
425    self.assertAllEqual(dy_dy.numpy(),
426                        constant_op.constant(1.0, shape=[2, 2]).numpy())
427
428  @test_util.assert_no_new_pyobjects_executing_eagerly
429  def testTapeNoOpGradientMultiTarget2By2(self):
430    a_2_by_2 = constant_op.constant(2.0, shape=[2, 2])
431    with backprop.GradientTape(persistent=True) as tape:
432      tape.watch(a_2_by_2)
433    dy_dy = tape.gradient([a_2_by_2, a_2_by_2], [a_2_by_2])[0]
434    self.assertAllEqual(dy_dy.numpy(),
435                        constant_op.constant(2.0, shape=[2, 2]).numpy())
436
437  def testTapeStopRecording(self):
438    with backprop.GradientTape() as t:
439      x = resource_variable_ops.ResourceVariable(1.0)
440      with t.stop_recording():
441        y = x * x
442    self.assertEqual(t.gradient(y, x), None)
443
444  def testTapeStopStartRecording(self):
445    with backprop.GradientTape(persistent=True) as t:
446      x = resource_variable_ops.ResourceVariable(1.0)
447      x2 = x * 2  # This should be differentiated through.
448      with t.stop_recording():
449        y = x2 * x2
450      z = x2 * x2
451    self.assertEqual(t.gradient(y, x2), None)
452
453    # If the x*2 was not differentiated through, this would be 2.0, not 4.0
454    self.assertEqual(t.gradient(z, x2).numpy(), 4.0)
455
456  def testTapeReset(self):
457    with backprop.GradientTape() as t:
458      v = resource_variable_ops.ResourceVariable(1.0)
459      loss = v * v
460      t.reset()
461      loss += v * v
462    self.assertAllEqual(t.gradient(loss, v), 2.0)
463
464  def testPythonMax(self):
465    x = [
466        resource_variable_ops.ResourceVariable(2.),
467        resource_variable_ops.ResourceVariable(3.),
468        resource_variable_ops.ResourceVariable(5.)
469    ]
470    with backprop.GradientTape() as t:
471      f = max(x)
472    grad = t.gradient(f, x)
473    self.assertAllEqual(self.evaluate(f), 5.)
474    self.assertAllEqual(self.evaluate(grad), [None, None, 1.0])
475
476  def testAutomaticWatchedVariables(self):
477    with backprop.GradientTape() as t:
478      self.assertEqual(0, len(t.watched_variables()))
479      v = resource_variable_ops.ResourceVariable(1.0)
480      loss = v * v
481      self.assertAllEqual([v], t.watched_variables())
482
483      t.reset()
484      self.assertEqual(0, len(t.watched_variables()))
485      loss += v * v
486      self.assertAllEqual([v], t.watched_variables())
487
488  def testExplicitWatchedVariables(self):
489    with backprop.GradientTape() as t:
490      self.assertEqual(0, len(t.watched_variables()))
491      v = resource_variable_ops.ResourceVariable(1.0)
492      t.watch(v)
493      self.assertAllEqual([v], t.watched_variables())
494
495      t.reset()
496      self.assertEqual(0, len(t.watched_variables()))
497      t.watch(v)
498      self.assertAllEqual([v], t.watched_variables())
499
500  @test_util.assert_no_new_tensors
501  def testGradientNone(self):
502
503    def loss(x, l):
504      return math_ops.reduce_mean(
505          nn_ops.softmax_cross_entropy_with_logits(logits=x, labels=l),
506          constant_op.constant([0]))
507
508    logits = constant_op.constant([[0.0, 0.0]])
509    labels = constant_op.constant([[1.0, 0.0]])
510    # softmax_cross_entropy_with_logits returns two outputs and in this case the
511    # gradient wrt the second is None.
512    g, = backprop.gradients_function(loss, [0])(logits, labels)
513    self.assertAllEqual(g.numpy(), [[-0.5, 0.5]])
514
515  @test_util.run_in_graph_and_eager_modes
516  def testGradientWithinTapeBlock(self):
517    v1 = resource_variable_ops.ResourceVariable(1.)
518    self.evaluate(v1.initializer)
519    with backprop.GradientTape() as t:
520      loss = 2 * v1
521      grad = t.gradient(loss, v1)
522    self.assertAllEqual(self.evaluate(grad), 2.0)
523
524    with backprop.GradientTape(persistent=True) as t:
525      loss = 2 * v1
526      grad = t.gradient(loss, v1)
527    self.assertAllEqual(self.evaluate(grad), 2.0)
528
529  @test_util.run_in_graph_and_eager_modes
530  def testNestedSelfContexts(self):
531    v1 = resource_variable_ops.ResourceVariable(1.)
532    self.evaluate(v1.initializer)
533    with backprop.GradientTape() as t:
534      with self.assertRaises(ValueError):
535        with t:
536          pass
537
538  @test_util.assert_no_new_tensors
539  def testSecondGrad(self):
540
541    def first(x):
542      l = constant_op.constant([[0.0]])
543      x = nn_ops.softmax_cross_entropy_with_logits(labels=l, logits=x)
544      x = math_ops.reduce_sum(x, constant_op.constant([0]))
545      return x
546
547    def second(x):
548      grad = backprop.gradients_function(first, [0])(x)[0]
549      return math_ops.reduce_sum(grad, constant_op.constant([0]))
550
551    f = constant_op.constant([[0.1]])
552    grad = backprop.gradients_function(second, [0])(f)[0]
553    self.assertAllEqual([[0.0]], grad)
554
555  @test_util.run_in_graph_and_eager_modes
556  def testWatchingIsTapeLocal(self):
557    x1 = resource_variable_ops.ResourceVariable(2.0, trainable=False)
558    x2 = resource_variable_ops.ResourceVariable(2.0, trainable=False)
559
560    with backprop.GradientTape() as tape1:
561      with backprop.GradientTape() as tape2:
562        tape1.watch(x1)
563        tape2.watch([x1, x2])
564        y = x1**3
565        z = x2**2
566        dy, dz = tape2.gradient([y, z], [x1, x2])
567      d2y, d2z = tape1.gradient([dy, dz], [x1, x2])
568
569    self.evaluate([x1.initializer, x2.initializer])
570    self.assertEqual(self.evaluate(d2y), 12.0)
571    self.assertIsNone(d2z)
572
573  @test_util.assert_no_new_tensors
574  def testMakeVJP(self):
575
576    def f(x):
577      return x * x
578
579    wrapped_fn = backprop.make_vjp(f, persistent=False)
580    result, vjp = wrapped_fn(constant_op.constant(3.0))
581    self.assertAllEqual(result, 9.0)
582    self.assertAllEqual(vjp(2.0)[0], 12.0)
583
584  def testPersistentMakeVJP(self):
585
586    def f(x):
587      return x * x
588
589    wrapped_fn = backprop.make_vjp(f, persistent=True)
590    _, vjp = wrapped_fn(constant_op.constant(3.0))
591    vjp_result1 = vjp(2.0)[0]
592    vjp_result2 = vjp(2.0)[0]
593    self.assertAllEqual(vjp_result1, vjp_result2, 12.0)
594
595  @test_util.assert_no_new_tensors
596  def testGradGrad(self):
597
598    def sq(x):
599      return x * x
600
601    def grad(x):
602      value = backprop.gradients_function(sq, [0])(x)[0]
603      return value
604
605    gradgrad = backprop.gradients_function(grad, [0])
606
607    self.assertAllEqual(gradgrad(constant_op.constant(3.0))[0], 2.0)
608
609  @test_util.assert_no_new_tensors
610  def testGradGradExp(self):
611
612    def grad(x):
613      value = backprop.gradients_function(math_ops.exp, [0])(x)[0]
614      return value
615
616    gradgrad = backprop.gradients_function(grad, [0])
617
618    self.assertAllEqual(gradgrad(constant_op.constant(0.0))[0], 1.0)
619
620  @test_util.assert_no_new_tensors
621  def testStopGradient(self):
622    grad = backprop.gradients_function(
623        lambda x: array_ops.stop_gradient(math_ops.argmax(x)))
624    self.assertAllEqual(grad([0.0])[0], None)
625
626  @test_util.assert_no_new_tensors
627  def testArgmax(self):
628
629    def argmax(x):
630      i = math_ops.argmax(x)
631      return array_ops.stop_gradient(i)
632
633    grad = backprop.gradients_function(argmax)
634    self.assertAllEqual(grad([0.0])[0], None)
635
636  @test_util.run_gpu_only
637  @test_util.assert_no_new_tensors
638  def testGPU(self):
639
640    def fn(x):
641      with context.device('/gpu:0'):
642        b = constant_op.constant(2.0)
643        c = math_ops.add(x.gpu(), b)
644        # TODO(apassos): remove cpu below by making TensorVSPace aware
645        # of devices.
646        return math_ops.add(c, constant_op.constant(3.0)).cpu()
647
648    grad = backprop.gradients_function(fn, [0])(constant_op.constant(1.0))[0]
649    self.assertAllEqual(grad, 1.0)
650
651  @test_util.run_gpu_only
652  @test_util.assert_no_new_tensors
653  def testGPUImplicitGrad(self):
654    with context.device('gpu:0'):
655      v = resource_variable_ops.ResourceVariable(
656          constant_op.constant(1.0), name='v')
657
658    def f():
659      with context.device('gpu:0'):
660        return v.read_value()
661
662    self.assertEqual(backprop.implicit_grad(f)()[0][0].cpu().numpy(), 1.0)
663
664  @test_util.assert_no_new_tensors
665  def testCPU(self):
666
667    def fn(x):
668      b = constant_op.constant(2.0)
669      c = math_ops.add(x, b)
670      return math_ops.add(c, constant_op.constant(3.0))
671
672    grad = backprop.gradients_function(fn, [0])(constant_op.constant(1.0))[0]
673    self.assertAllEqual(grad, 1.0)
674
675  @test_util.run_gpu_only
676  @test_util.assert_no_new_tensors
677  def testTensorCopyGPU2CPU2GPU(self):
678
679    def f(a, b):
680      return a.cpu() + b.cpu()
681
682    with context.device('/gpu:0'):
683      a = constant_op.constant(1.0)
684      b = constant_op.constant(2.0)
685
686    grad = backprop.gradients_function(f, [0])(a, b)[0]
687    self.assertAllEqual(grad, 1.0)
688
689  @test_util.assert_no_new_tensors
690  def testEmptyParams(self):
691
692    def fn(a, b):
693      return a * b
694
695    x = constant_op.constant(1.0)
696    y = constant_op.constant(2.0)
697    dx, dy = backprop.gradients_function(fn)(x, y)
698    self.assertAllEqual(dx, y.numpy())
699    self.assertAllEqual(dy, x.numpy())
700
701  @test_util.assert_no_new_tensors
702  def testUnconnectedNone(self):
703    v = resource_variable_ops.ResourceVariable(1.0, name='testUnconnectedNone')
704
705    def f():
706      v.read_value()
707      return constant_op.constant(1.0)
708
709    self.assertEqual(backprop.implicit_grad(f)()[0][0], None)
710
711  @test_util.assert_no_new_tensors
712  def testGradientTapeReEnterContext(self):
713    g = backprop.GradientTape()
714    with g:
715      x = constant_op.constant(3.0)
716      g.watch(x)
717      y = 2 * x
718    with g:
719      z = 2 * y
720    grad = g.gradient(target=z, sources=[x])
721    self.assertEqual(self.evaluate(grad), [4.0])
722
723  @test_util.assert_no_new_tensors
724  @test_util.run_in_graph_and_eager_modes
725  def testGradientTapeRepeatedSource(self):
726    with backprop.GradientTape(persistent=False) as g:
727      x = constant_op.constant(3.0)
728      g.watch(x)
729      y = 2 * x
730    grad = g.gradient(target=y, sources=[x, x])
731    self.assertEqual(self.evaluate(grad), [2.0, 2.0])
732
733  @test_util.assert_no_new_tensors
734  @test_util.run_in_graph_and_eager_modes
735  def testPersistentGradientTapeRepeatedSource(self):
736    with backprop.GradientTape(persistent=True) as g:
737      x = constant_op.constant(3.0)
738      y = constant_op.constant(5.0)
739      g.watch(x)
740      g.watch(y)
741      z = x * x + x * y
742    grad = g.gradient(target=z, sources=[x, x])
743    self.assertEqual(self.evaluate(grad), [11.0, 11.0])
744    grad = g.gradient(target=z, sources=[y, x])
745    self.assertEqual(self.evaluate(grad), [3.0, 11.0])
746
747  @test_util.assert_no_new_tensors
748  @test_util.run_in_graph_and_eager_modes
749  def testGradientTapeStructure(self):
750    with backprop.GradientTape(persistent=True) as g:
751      # Using different constant values because constant tensors are
752      # cached, leading to a different gradient then what one might expect.
753      x1 = constant_op.constant(3.0)
754      x2 = constant_op.constant(3.1)
755      x3 = constant_op.constant(3.2)
756      g.watch(x1)
757      g.watch(x2)
758      g.watch(x3)
759      y = x1 + 2 * x2 + 3 * x3
760    self.assertEqual(self.evaluate(g.gradient(y, x1)), [1.0])
761    self.assertEqual(self.evaluate(g.gradient(y, (x1,))), (1.0,))
762    self.assertEqual(self.evaluate(g.gradient(y, (x1, x2))), (1.0, 2.0))
763    self.assertEqual(
764        self.evaluate(g.gradient(y, [(x1, x2), (x2, x3)])), [(1.0, 2.0),
765                                                             (2.0, 3.0)])
766    self.assertEqual(
767        self.evaluate(g.gradient(y, (x1, x2, [x1, x3]))),
768        (1.0, 2.0, [1.0, 3.0]))
769    self.assertEqual(
770        self.evaluate(g.gradient(y, [x1, {
771            'x2': x2,
772            'x3': x3
773        }])), [1.0, {
774            'x2': 2.0,
775            'x3': 3.0
776        }])
777
778  @test_util.assert_no_new_tensors
779  @test_util.run_in_graph_and_eager_modes
780  def testGradientTape(self):
781    with backprop.GradientTape() as g:
782      x = constant_op.constant(3.0)
783      g.watch(x)
784      y = x * x
785      with backprop.GradientTape() as gg:
786        gg.watch(y)
787        z = 2 * y
788      inner_grad = gg.gradient(z, [y])[0]
789      self.assertEqual(self.evaluate(inner_grad), 2.0)
790      y += inner_grad
791    grad = g.gradient(y, [x])[0]
792    self.assertEqual(self.evaluate(grad), 6.0)
793
794  @test_util.assert_no_new_tensors
795  @test_util.run_in_graph_and_eager_modes
796  def testGadientTapeCalledOnConstantTarget(self):
797    with backprop.GradientTape() as g:
798      x = variables.Variable([3.0])
799      y = variables.Variable([2.0])
800    grad = g.gradient(x, y)
801    self.assertAllEqual(grad, None)
802
803  @test_util.run_in_graph_and_eager_modes
804  @test_util.run_v1_only('b/120545219')
805  def testGradientTapeWithCond(self):
806    x = constant_op.constant(3.0)
807
808    def true_fn():
809      return x
810
811    def false_fn():
812      return x * x
813
814    with backprop.GradientTape() as g:
815      g.watch(x)
816      y = control_flow_ops.cond(x < x, true_fn, false_fn)
817
818    if not context.executing_eagerly():
819      with self.assertRaisesRegex(NotImplementedError, 'tf.gradients'):
820        dy = g.gradient(y, [x])[0]
821    else:
822      dy = g.gradient(y, [x])[0]
823      self.assertEqual(self.evaluate(dy), 6.0)
824
825  @test_util.run_in_graph_and_eager_modes
826  @test_util.run_v1_only('b/120545219')
827  def testGradientTapeWithWhileLoop(self):
828    i = constant_op.constant(1)
829    x = constant_op.constant(2.)
830
831    def cond(i, _):
832      return i < 3
833
834    def body(i, x):
835      return i + 1, x * 2
836
837    with backprop.GradientTape() as g:
838      g.watch([x])
839      _, y = control_flow_ops.while_loop(cond, body, [i, x])
840
841    if not context.executing_eagerly():
842      with self.assertRaisesRegex(NotImplementedError, 'tf.gradients'):
843        dy = g.gradient(y, [x])[0]
844    else:
845      dy = g.gradient(y, [x])[0]
846      self.assertEqual(self.evaluate(dy), 4.0)
847
848  @test_util.assert_no_new_tensors
849  def testGradientTapeGradientCalledMultipleTimes(self):
850    with backprop.GradientTape() as g:
851      x = constant_op.constant(3.0)
852      g.watch(x)
853      y = x * x
854      z = y * y
855    g.gradient(z, [x])
856    with self.assertRaisesRegex(
857        RuntimeError, 'A non-persistent GradientTape can only'):
858      g.gradient(y, [x])
859
860  @test_util.assert_no_new_tensors
861  def testGradientTapeJacobianCalledMultipleTimes(self):
862    with backprop.GradientTape() as g:
863      x = constant_op.constant(3.0)
864      g.watch(x)
865      y = x * x
866      z = y * y
867    g.jacobian(z, [x])
868    with self.assertRaisesRegex(
869        RuntimeError, 'A non-persistent GradientTape can only'):
870      g.jacobian(y, [x])
871
872  @test_util.assert_no_new_tensors
873  def testJacobianInsideGradientTapeScope(self):
874    with backprop.GradientTape() as g:
875      x = constant_op.constant(3.0)
876      g.watch(x)
877      y = x * x
878      z = y * y
879      self.assertAllClose(4. * 3. ** 3., g.jacobian(z, x))
880
881  @test_util.assert_no_new_tensors
882  def testBatchJacobianInsideGradientTapeScope(self):
883    with backprop.GradientTape(persistent=True) as g:
884      x = constant_op.constant([[3.0]])
885      g.watch(x)
886      y = x * x
887      z = y * y
888      self.assertAllClose([[[4. * 3. ** 3.]]], g.batch_jacobian(z, x))
889
890  def testBatchJacobianParallelIterations(self):
891    @def_function.function
892    def f(persistent):
893      with backprop.GradientTape(persistent=persistent) as t:
894        x = constant_op.constant([[3.0]])
895        t.watch(x)
896        y = x * x
897        z = array_ops.tile(y * y, [1, 16])
898      return t.batch_jacobian(z, x, parallel_iterations=8)
899    with self.assertRaisesRegex(RuntimeError,
900                                'persistent=True.*parallel_iterations'):
901      f(persistent=False)
902    self.assertAllClose([[[4. * 3. ** 3.]] * 16], f(persistent=True))
903
904  @test_util.assert_no_new_tensors
905  def testGradientTapeBatchJacobianCalledMultipleTimes(self):
906    with backprop.GradientTape() as g:
907      x = constant_op.constant([[3.0]])
908      g.watch(x)
909      y = x * x
910      z = y * y
911    g.batch_jacobian(z, x)
912    with self.assertRaisesRegex(
913        RuntimeError, 'A non-persistent GradientTape can only'):
914      g.batch_jacobian(y, [x])
915
916  @test_util.assert_no_new_tensors
917  @test_util.run_in_graph_and_eager_modes
918  @test_util.run_v1_only('b/120545219')
919  def testPersistentTape(self):
920    with backprop.GradientTape(persistent=True) as g:
921      x = constant_op.constant(3.0)
922      g.watch(x)
923      y = x * x
924      z = y * y
925    dz_dx = g.gradient(z, [x])[0]
926    self.assertEqual(self.evaluate(dz_dx), 4 * 3 * 3 * 3)
927    dy_dx = g.gradient(y, [x])[0]
928    self.assertEqual(self.evaluate(dy_dx), 2 * 3)
929    del g
930
931  @test_util.assert_no_new_tensors
932  @test_util.run_in_graph_and_eager_modes
933  def testHigherOrderGradient(self):
934    with backprop.GradientTape(persistent=True) as g:
935      x = constant_op.constant(3.0)
936      g.watch(x)
937      y = x**3  # y       := x^3
938      dy_dx = g.gradient(y, x)  # dy/dx   := 3x^2
939      d2y_dx2 = g.gradient(dy_dx, x)  # d2y/dx2 := 6x
940    d3y_dx3 = g.gradient(d2y_dx2, x)  # d3y/dx3 := 6
941    x = 3
942    self.assertEqual(self.evaluate(y), x**3)
943    self.assertEqual(self.evaluate(dy_dx), 3 * x**2)
944    self.assertEqual(self.evaluate(d2y_dx2), 6 * x)
945    self.assertEqual(self.evaluate(d3y_dx3), 6)
946    del g
947
948  @test_util.assert_no_new_tensors
949  @test_util.run_in_graph_and_eager_modes
950  def testPersistentNestedTape(self):
951    with backprop.GradientTape(persistent=True) as g:
952      x = constant_op.constant(3.0)
953      g.watch(x)
954      y = x * x
955      with backprop.GradientTape(persistent=True) as gg:
956        gg.watch(y)
957        z = 2 * y
958      for _ in range(2):
959        inner_grad = gg.gradient(z, [y])[0]
960        self.assertEqual(self.evaluate(inner_grad), 2.0)
961      y += inner_grad
962      del gg
963    grad = g.gradient(y, [x])[0]
964    self.assertEqual(self.evaluate(grad), 6.0)
965    grad = g.gradient(z, [x])[0]
966    self.assertEqual(self.evaluate(grad), 12.0)
967    del g
968
969  @test_util.assert_no_new_tensors
970  @test_util.run_in_graph_and_eager_modes
971  def testGradientTapeVariable(self):
972    v = resource_variable_ops.ResourceVariable(1.0, name='v')
973    self.evaluate(v.initializer)
974    with backprop.GradientTape() as g:
975      y = v * v
976    grad = g.gradient(y, [v])[0]
977    self.assertAllEqual(self.evaluate(grad), 2.0)
978
979  @test_util.assert_no_new_tensors
980  @test_util.run_in_graph_and_eager_modes
981  def testNestedGradients(self):
982    x = constant_op.constant(3.0)
983    with backprop.GradientTape() as g:
984      g.watch(x)
985      y = x * x
986      z = y * y
987    dz_dx, dz_dy = g.gradient(z, [x, y])
988    self.assertEqual(self.evaluate(dz_dx), 108.0)
989    self.assertEqual(self.evaluate(dz_dy), 18.0)
990
991  @test_util.assert_no_new_tensors
992  @test_util.run_in_graph_and_eager_modes
993  def testUnconnectedGradientsDefault(self):
994    x = constant_op.constant(1.0)
995    y = constant_op.constant(3.0)
996    with backprop.GradientTape() as g:
997      g.watch([x, y])
998      z = y * 2
999    dz_dx = g.gradient(z, x)
1000    self.assertEqual(dz_dx, None)
1001
1002  @test_util.assert_no_new_tensors
1003  @test_util.run_in_graph_and_eager_modes
1004  def testUnconnectedGradientsZeros(self):
1005    x = constant_op.constant(1.0, shape=[2, 2])
1006    y = constant_op.constant(3.0)
1007    with backprop.GradientTape() as g:
1008      g.watch([x, y])
1009      z = y * 2
1010    dz_dx = g.gradient(z, x, unconnected_gradients='zero')
1011    self.assertAllEqual([[0.0, 0.0], [0.0, 0.0]], self.evaluate(dz_dx))
1012
1013  @test_util.assert_no_new_tensors
1014  @test_util.run_in_graph_and_eager_modes
1015  def testUnconnectedGradientsVariablesZeros(self):
1016    x = resource_variable_ops.ResourceVariable(
1017        constant_op.constant(1., shape=[2, 2]))
1018    self.evaluate(x.initializer)
1019    y = resource_variable_ops.ResourceVariable(constant_op.constant(3.))
1020    self.evaluate(y.initializer)
1021    with backprop.GradientTape() as g:
1022      g.watch([x, y])
1023      z = y * 2
1024    dz_dx = g.gradient(z, x, unconnected_gradients='zero')
1025    self.assertAllEqual([[0.0, 0.0], [0.0, 0.0]], self.evaluate(dz_dx))
1026
1027  @test_util.run_in_graph_and_eager_modes
1028  def testUnknownUnconnectedGradientsValueGiven(self):
1029    x = constant_op.constant(1.0)
1030    y = constant_op.constant(1.0)
1031    with backprop.GradientTape() as g:
1032      g.watch([x, y])
1033      z = y * 2
1034    with self.assertRaisesRegex(
1035        ValueError, "Unknown value for unconnected_gradients: 'nonsense'"):
1036      g.gradient(z, x, unconnected_gradients='nonsense')
1037
1038  @test_util.run_in_graph_and_eager_modes
1039  def testUnconnectedGradientsNestedDefunZeros(self):
1040
1041    @function.defun
1042    def f(x):
1043      return x * x
1044
1045    @function.defun
1046    def h(y):
1047      z = f(y)
1048      return array_ops.stop_gradient(z)
1049
1050    x = constant_op.constant(1.0)
1051    with backprop.GradientTape() as g:
1052      g.watch(x)
1053      k = x + 2.
1054      y = h(k)
1055
1056    dy_dx = g.gradient(y, x, unconnected_gradients='zero')
1057    self.assertEqual(0.0, self.evaluate(dy_dx))
1058
1059  def testInvalidRecordOperationMessage(self):
1060    y = constant_op.constant(2.)
1061    x = constant_op.constant(1.)
1062    with backprop.GradientTape() as g:
1063      g.watch(x)
1064      tape_lib.record_operation('InvalidBackprop', [y], [x], lambda dy: [])
1065    with self.assertRaisesRegex(errors_impl.InternalError,
1066                                'InvalidBackprop.*too few gradients'):
1067      g.gradient(y, x)
1068
1069  @test_util.assert_no_new_tensors
1070  def testEmptyParamsForValueAndGradFunction(self):
1071
1072    def fn(a, b):
1073      return a * b
1074
1075    val_and_grads_fn = backprop.val_and_grad_function(fn)
1076
1077    x = 2.0
1078    y = 3.0
1079    val, (dx, dy) = val_and_grads_fn(x, y)
1080    self.assertAllClose(val, x * y)
1081    self.assertAllEqual(dx, y)
1082    self.assertAllEqual(dy, x)
1083
1084  @test_util.assert_no_new_tensors
1085  def testNonEmptyParamsForValueAndGradFunction(self):
1086
1087    def fn(a, b):
1088      return a * b
1089
1090    val_and_grad_fn = backprop.val_and_grad_function(fn, params=[1])
1091
1092    x = 2.0
1093    y = 3.0
1094    val, grads = val_and_grad_fn(x, y)
1095    self.assertAllClose(val, x * y)
1096    self.assertEqual(1, len(grads))
1097    self.assertAllEqual(grads[0], x)
1098
1099  @test_util.run_gpu_only
1100  @test_util.assert_no_new_tensors
1101  def testTensorCopyCPU2GPU2CPU(self):
1102    # forward: a (cpu->gpu) -> add (gpu) -> c (gpu->cpu) -> add (cpu) -> e (cpu)
1103    # back: e (cpu) -> add (cpu) -> c (cpu->gpu) -> add (gpu) -> grad (gpu->cpu)
1104    def f(a, b):
1105      with context.device('/gpu:0'):
1106        c = math_ops.add(a.gpu(0), b.gpu(0))
1107      return math_ops.add(c.cpu(), constant_op.constant(3.0))
1108
1109    with context.device('/cpu:0'):
1110      a = constant_op.constant(1.0)
1111      b = constant_op.constant(2.0)
1112
1113    grad = backprop.gradients_function(f, [0])(a, b)[0]
1114    self.assertAllEqual(grad, 1.0)
1115
1116  def testGetAttrType(self):
1117    typ = backprop.op_attr_type('Add', 'T')
1118    self.assertEqual(typ, int(pywrap_tfe.TF_ATTR_TYPE))
1119
1120  def testGetAttrList(self):
1121    typ = backprop.op_attr_type('MaxPool', 'ksize')
1122    self.assertEqual(typ, [int(pywrap_tfe.TF_ATTR_INT)])
1123
1124  def testMakeAttrType(self):
1125    self.assertEqual(dtypes.float32,
1126                     backprop.make_attr(int(pywrap_tfe.TF_ATTR_TYPE), 1))
1127
1128  def testMakeAttrTypeList(self):
1129    self.assertEqual([dtypes.float32],
1130                     backprop.make_attr([int(pywrap_tfe.TF_ATTR_TYPE)], [1]))
1131
1132  def testMulType(self):
1133
1134    def mul(x):
1135      return math_ops._mul_dispatch(x, x)  # pylint: disable=protected-access
1136
1137    self.assertAllEqual(backprop.gradients_function(mul)(3.0)[0].numpy(), 6.0)
1138
1139  def testMakeAttrShape(self):
1140    for s in ([], None, [1, 2, 3], [None, None], [1, None, 3]):
1141      expected = tensor_shape.TensorShape(s).as_proto()
1142      actual = backprop.make_attr(int(pywrap_tfe.TF_ATTR_SHAPE), s)
1143      self.assertEqual(
1144          expected,
1145          actual,
1146          msg=('For shape %r, expected %r != %r actual' %
1147               (s, expected, actual)))
1148
1149  def testMakeAttrShapeList(self):
1150    shape_list = [[], None, [1, 2, 3], [None, None], [1, None, 3]]
1151    self.assertEqual(
1152        [tensor_shape.TensorShape(s).as_proto() for s in shape_list],
1153        backprop.make_attr([int(pywrap_tfe.TF_ATTR_SHAPE)], shape_list))
1154
1155  def testArgsGradientFunction(self):
1156
1157    def f(*args):
1158      return args[0] * args[0]
1159
1160    grad = backprop.gradients_function(f)
1161    self.assertAllEqual(grad(1.0)[0], 2.0)
1162
1163  def testPartial(self):
1164
1165    def f(x, y):
1166      return x * y
1167
1168    part = functools.partial(f, constant_op.constant(2.0))
1169    self.assertAllEqual(
1170        backprop.gradients_function(part)(constant_op.constant(1.0))[0], 2.0)
1171
1172  def testReturnSameThing(self):
1173
1174    def f(x):
1175      return x, 2 * x
1176
1177    self.assertAllEqual(backprop.gradients_function(f)(1.0)[0], 3.0)
1178
1179  @test_util.assert_no_new_tensors
1180  def testExceptionSafety(self):
1181
1182    def f(unused_x):
1183      raise ValueError()
1184
1185    try:
1186      backprop.gradients_function(f)(1.0)
1187    except ValueError:
1188      pass
1189
1190    def real_f(x):
1191      return x * x
1192
1193    self.assertAllEqual(backprop.gradients_function(real_f)(1.0)[0], 2.0)
1194
1195  @test_util.assert_no_new_tensors
1196  def testMultiValueConvertToTensor(self):
1197    x = resource_variable_ops.ResourceVariable(
1198        initial_value=array_ops.constant([1.0]), name='x')
1199
1200    def fn():
1201      a = math_ops.add(x.value(), 1.0)
1202      # Make sure convert_to_tensor works correctly with list of TensorNodes.
1203      b = array_ops.stack([a, a], axis=0)
1204      return math_ops.reduce_mean(b)
1205
1206    grad = backprop.implicit_grad(fn)()[0][0]
1207    self.assertAllEqual([1.0], grad)
1208
1209  def testOutput(self):
1210
1211    def multiout(x):
1212      return x + 2, x * x
1213
1214    x = constant_op.constant([0.0, 1.0, 2.0])
1215
1216    grad = backprop.gradients_function(multiout)(x)[0]
1217    self.assertAllEqual([1.0, 3.0, 5.0], grad)
1218
1219  def testMultiValuePreservesIfNotDiffedAgainst(self):
1220
1221    def tfe_conv2d(timage, tkernel, conv2dstrides):
1222      return nn_ops.conv2d(timage, tkernel, conv2dstrides, 'SAME')
1223
1224    i = constant_op.constant([[[[1.0]]]])
1225    k = constant_op.constant([[[[2.0]]]])
1226    s = [1, 1, 1, 1]
1227
1228    grad = backprop.gradients_function(tfe_conv2d, params=(0,))(i, k, s)[0]
1229    self.assertAllEqual([[[[2.0]]]], grad)
1230
1231  def testSameObjectForMultipleArguments(self):
1232
1233    def f(x, y):
1234      return math_ops.multiply(x, y)
1235
1236    g = backprop.gradients_function(f)
1237
1238    def np_g(x, y):
1239      dx, dy = g(x, y)
1240      return [dx.numpy(), dy.numpy()]
1241
1242    x = constant_op.constant(1.)
1243    self.assertAllEqual([1., 1.], np_g(x, x))
1244    x = 1.
1245    self.assertAllEqual([1., 1.], np_g(x, x))
1246    x = constant_op.constant([[1.]])
1247    self.assertAllEqual([[[1.]], [[1.]]], np_g(x, x))
1248    x = [[1.]]
1249    self.assertAllEqual([[[1.]], [[1.]]], np_g(x, x))
1250
1251    v = resource_variable_ops.ResourceVariable(
1252        initial_value=1., name='testSameObjectForMultipleArguments.Variable')
1253    self.assertAllEqual([1., 1.], np_g(v, v))
1254
1255  @test_util.assert_no_new_tensors
1256  def testImplicitGradientsCustomGradientAndCachedVariableValue(self):
1257
1258    @custom_gradient.custom_gradient
1259    def my_square(x):
1260      result = math_ops.square(x)
1261
1262      def grad(dr):
1263        return 2 * dr * x + 1
1264
1265      return result, grad
1266
1267    x = resource_variable_ops.ResourceVariable(
1268        initial_value=3., name='X.' + self.id())
1269
1270    def f():
1271      return my_square(x)
1272
1273    g = backprop.implicit_grad(f)
1274
1275    grads_and_vars = g()
1276    self.assertEqual(1, len(grads_and_vars))
1277    grad, var = grads_and_vars[0]
1278    self.assertAllEqual(7, grad)
1279    self.assertAllEqual(x, var)
1280
1281  def testJacobianCustomGradient(self):
1282
1283    class MyCallable(object):
1284
1285      def __init__(self):
1286        self.a = variables.Variable(1.)
1287        self.b = variables.Variable(2.)
1288        self.c = variables.Variable(3.)
1289
1290      def __call__(self, x):
1291        return self.a * x * x + self.b * x + self.c
1292
1293    @def_function.function
1294    def call(c, x):
1295
1296      @custom_gradient.custom_gradient
1297      def _call():
1298        y = c(x)
1299
1300        def grad(dy, variables=None):  # pylint: disable=redefined-outer-name
1301          with backprop.GradientTape(persistent=True) as g:
1302            g.watch(variables)
1303            y = c(x)
1304          grad_vars = [
1305              2 * math_ops.reduce_sum(dy * g.jacobian(y, v)) for v in variables
1306          ]
1307          del g
1308          return (), grad_vars
1309
1310        return y, grad
1311
1312      return _call()
1313
1314    c = MyCallable()
1315    x = constant_op.constant([1., 2., 3.])
1316    with backprop.GradientTape(persistent=True) as g:
1317      g.watch([c.a, c.b, c.c])
1318      y = call(c, x)
1319    self.assertAllEqual(g.gradient(y, x), None)
1320
1321  @test_util.assert_no_new_tensors
1322  def testCustomGradient(self):
1323
1324    @custom_gradient.custom_gradient
1325    def my_mul(x, y):
1326      result = x * y
1327
1328      def grad(dr):
1329        return [dr * y, dr * x]
1330
1331      return result, grad
1332
1333    lr = 0.25
1334    x = resource_variable_ops.ResourceVariable(2., name='x')
1335
1336    def loss(x):
1337      return my_mul(2., x.read_value())
1338
1339    loss_grads_fn = backprop.implicit_val_and_grad(loss)
1340
1341    losses = []
1342    for _ in range(5):
1343      loss, grads_and_vars = loss_grads_fn(x)
1344      losses.append(loss.numpy())
1345      for (grad, var) in grads_and_vars:
1346        var.assign_sub(lr * grad)
1347    self.assertAllEqual(losses, [4.0, 3., 2., 1., 0.])
1348
1349  @test_util.assert_no_new_tensors
1350  def testCustomGradientIdentity(self):
1351
1352    @custom_gradient.custom_gradient
1353    def my_identity(x):
1354
1355      def grad(dresult):
1356        return [2 * dresult]
1357
1358      return x, grad
1359
1360    self.assertAllEqual(backprop.gradients_function(my_identity)(1.0)[0], 2.0)
1361
1362  def testDifferentiatingFunctionThatReturnsNone(self):
1363
1364    def fn(x, y):
1365      result = x * y  # pylint: disable=unused-variable
1366
1367    x = constant_op.constant(1)
1368    y = constant_op.constant(2)
1369
1370    loss_grads_fn = backprop.implicit_val_and_grad(fn)
1371    with self.assertRaisesRegex(
1372        ValueError, 'Cannot differentiate a function that returns None; '
1373        'did you forget to return a value from fn?'):
1374      loss_grads_fn(x, y)
1375
1376    val_and_grads_fn = backprop.val_and_grad_function(fn)
1377    with self.assertRaisesRegex(
1378        ValueError, 'Cannot differentiate a function that returns None; '
1379        'did you forget to return a value from fn?'):
1380      val_and_grads_fn(x, y)
1381
1382  def testZerosCacheDoesntLeakAcrossGraphs(self):
1383    with ops.Graph().as_default():
1384
1385      def get_grad():
1386        with ops.Graph().as_default(), self.cached_session():
1387          t = constant_op.constant(1, dtype=dtypes.float32, shape=(10, 4))
1388          x = constant_op.constant(2, dtype=dtypes.float32, shape=(10, 4))
1389          with backprop.GradientTape() as tape:
1390            tape.watch(x)
1391            x1, _ = array_ops.split(x, num_or_size_splits=2, axis=1)
1392            y1 = x1**2
1393            y = array_ops.concat([y1, t], axis=1)
1394          return self.evaluate(tape.gradient(y, x))
1395
1396      grad1 = get_grad()
1397      grad2 = get_grad()
1398
1399      self.assertAllEqual(grad1, grad2)
1400
1401  @test_util.run_in_graph_and_eager_modes
1402  def testSelectivelyWatchVariables(self):
1403    x1 = resource_variable_ops.ResourceVariable(1.0)
1404    x2 = resource_variable_ops.ResourceVariable(1.0)
1405    with backprop.GradientTape(watch_accessed_variables=False) as tape:
1406      tape.watch(x2)
1407      y = x1**2
1408      z = x2**3
1409    self.assertTupleEqual(tape.watched_variables(), (x2,))
1410    dy, dz = tape.gradient([y, z], [x1, x2])
1411    self.evaluate([x1.initializer, x2.initializer])
1412    self.assertIsNone(dy)
1413    self.assertEqual(self.evaluate(dz), 3.0)
1414
1415  @test_util.run_in_graph_and_eager_modes
1416  def testDifferentiatingScalarCache(self):
1417    # In the following test, if x2 = x1 (i.e the objects are the exact same),
1418    # then y is essentially, 2*x1, and dy/dx1 = 2.
1419    # When we had a pure scalar cache in eager, this would be the case. This
1420    # test prevents us from going back to that case.
1421    with backprop.GradientTape(persistent=False) as g:
1422      x1 = constant_op.constant(3.0)
1423      x2 = constant_op.constant(3.0)
1424      g.watch(x1)
1425      g.watch(x2)
1426      y = x1 + x2
1427    grad = g.gradient(target=y, sources=[x1])
1428    self.assertEqual(self.evaluate(grad), [1.0])
1429
1430  def testVariablesAndConstantsProduceTheSameGradients(self):
1431
1432    # In the following test, differentiating [y, z] against [a, b] gives:
1433    # (dy/da + dz/da, dy/db + dz/db).
1434    # If a and b are the same constant, dz/da will not be 0 (which it should
1435    # be).
1436    # This is solved by using variable since doing a read_value on a tensor will
1437    # produce a new tensor and corresponding TensorHandle, and not reuse the
1438    # same tensor (which would happen if we are using a cache and reusing
1439    # EagerTensor objects).
1440    def get_grads(a, b):
1441      with backprop.GradientTape() as tape:
1442        tape.watch([a, b])
1443        y = a**3
1444        z = b**2
1445      return tape.gradient([y, z], [a, b])
1446
1447    gradients_constants = get_grads(
1448        constant_op.constant(2.0), constant_op.constant(2.0))
1449    gradients_variables = get_grads(
1450        resource_variable_ops.ResourceVariable(2.0),
1451        resource_variable_ops.ResourceVariable(2.0))
1452    self.assertAllEqual(gradients_constants, gradients_variables)
1453
1454  def testUnknownShapes(self):
1455    with ops.Graph().as_default():
1456      with backprop.GradientTape() as tape:
1457        a = array_ops.placeholder(dtype=dtypes.float32, shape=None)
1458        tape.watch(a)
1459        b = a**3
1460
1461      db_da = tape.gradient(b, a)
1462
1463      with self.cached_session() as sess:
1464        self.assertEqual((8.0, 12.0), sess.run((b, db_da), feed_dict={a: 2.0}))
1465
1466  @test_util.run_in_graph_and_eager_modes
1467  def testCustomGradientInEagerAndGraph(self):
1468
1469    @custom_gradient.custom_gradient
1470    def f(x):
1471      y = x * x
1472
1473      def grad(dy):
1474        return [4 * dy]
1475
1476      return y, grad
1477
1478    with backprop.GradientTape() as t:
1479      c = constant_op.constant(1.0)
1480      t.watch(c)
1481      g = f(c)
1482    self.assertAllEqual(self.evaluate(t.gradient(g, c)), 4.0)
1483
1484  def testOverrideSecondOrderWithCustomGradient(self):
1485
1486    @custom_gradient.custom_gradient
1487    def f(x):
1488
1489      def first_order_grad(dz):
1490
1491        @custom_gradient.custom_gradient
1492        def first_order_custom(unused_x):
1493
1494          def h(ddz):
1495            return -2.1 * ddz
1496
1497          return -1.1, h
1498
1499        return dz * first_order_custom(x)
1500
1501      return x + 10., first_order_grad
1502
1503    c = constant_op.constant(1.)
1504    with backprop.GradientTape() as outer:
1505      outer.watch(c)
1506      with backprop.GradientTape() as inner:
1507        inner.watch(c)
1508        d = f(c)**4.
1509      dd = inner.gradient(d, c)
1510      self.assertAllClose(4. * f(c)**3. * -1.1, dd)
1511    self.assertAllClose(3. * 4. * f(c)**2. * -1.1 * -1.1 + 4. * f(c)**3. * -2.1,
1512                        outer.gradient(dd, c))
1513
1514  @test_util.run_in_graph_and_eager_modes
1515  def testCustomGradientForwardprop(self):
1516
1517    @custom_gradient.custom_gradient
1518    def f(x):
1519      z = 2. * tensor_util.constant_value(x)
1520
1521      def g(dz):
1522
1523        @custom_gradient.custom_gradient
1524        def first_order(unused_x, unused_dz):
1525
1526          def second_order_and_transpose(unused_ddz):
1527            return 2.2, 3.1
1528
1529          return 2.1, second_order_and_transpose
1530
1531        return first_order(x, dz)
1532
1533      return z, g
1534
1535    with backprop.GradientTape(persistent=True) as t:
1536      with backprop.GradientTape() as tt:
1537        c = constant_op.constant(1.)
1538        t.watch(c)
1539        tt.watch(c)
1540        output_grad = array_ops.ones([])
1541        t.watch(output_grad)
1542        output = f(c)
1543        self.assertAllClose(2., output)
1544      gc = tt.gradient(output, c, output_gradients=output_grad)
1545      self.assertAllClose(2.1, gc)
1546    ggc = t.gradient(gc, c)
1547    self.assertAllClose(2.2, ggc)
1548    # Note that executed eagerly this kind of transpose is not efficient. But
1549    # from a tf.function we could prune out the first-order gradient
1550    # computation.
1551    transpose = t.gradient(gc, output_grad)
1552    self.assertAllClose(3.1, transpose)
1553
1554  @test_util.run_in_graph_and_eager_modes
1555  def testMaxPooling3DGradient(self):
1556
1557    def forward(a):
1558      r = max_pooling3d(a, pool_size=pool_size, strides=strides, padding='SAME')
1559      return r
1560
1561    input_sizes = [1, 3, 2, 4, 1]
1562    pool_size = (2, 2, 1)
1563    strides = (1, 1, 1)
1564
1565    total_size = np.prod(input_sizes)
1566    x = np.arange(1, total_size + 1, dtype=np.float32)
1567    aa = constant_op.constant(x, shape=input_sizes, dtype=dtypes.float32)
1568    da = backprop.gradients_function(forward)(aa)
1569
1570    if not context.executing_eagerly():
1571      tf_aa = constant_op.constant(x, shape=input_sizes, dtype=dtypes.float32)
1572      tf_max = max_pooling3d(
1573          tf_aa, pool_size=pool_size, strides=strides, padding='SAME')
1574      tf_da = gradients.gradients(tf_max, [tf_aa])
1575      self.assertAllEqual(da[0], tf_da[0])
1576
1577  @test_util.run_in_graph_and_eager_modes
1578  def testWatchBadThing(self):
1579    g = backprop.GradientTape()
1580    with self.assertRaisesRegex(ValueError, 'ndarray'):
1581      g.watch(np.array(1.))
1582
1583  def testWatchComposite(self):
1584    """Test that tape.watch expands composites and watches component Tensors."""
1585    with backprop.GradientTape() as t:
1586      values = constant_op.constant([1.0, 2.0], dtypes.float32)
1587      s = sparse_tensor.SparseTensor(
1588          indices=[[0, 0], [1, 2]], values=values, dense_shape=[3, 4])
1589      t.watch(s)
1590      z = sparse_ops.sparse_reduce_sum_v2(s)
1591    result = t.gradient(z, values)
1592    self.assertAllEqual(result, [1.0, 1.0])
1593
1594  def testWatchedVariablesAfterNonPersistentGradientCall(self):
1595    with backprop.GradientTape(persistent=False) as tape:
1596      x = resource_variable_ops.ResourceVariable(1.0)
1597      tape.watch(x)
1598    tape.gradient(x, x)
1599    self.assertEqual((x,), tape.watched_variables())
1600
1601  def testWatchedVariablesOnlyHasVariablesFromLastTape(self):
1602    with backprop.GradientTape(persistent=False) as tape:
1603      x = resource_variable_ops.ResourceVariable(1.0)
1604      tape.watch(x)
1605    with backprop.GradientTape(persistent=False) as tape:
1606      z = resource_variable_ops.ResourceVariable(2.0)
1607      tape.watch(z)
1608    tape.gradient(z, z)
1609    self.assertEqual((z,), tape.watched_variables())
1610
1611  def testWatchedVariablesRespectReset(self):
1612    with backprop.GradientTape(persistent=False) as tape:
1613      x = resource_variable_ops.ResourceVariable(1.0)
1614      tape.watch(x)
1615      self.assertEqual((x,), tape.watched_variables())
1616      tape.reset()
1617      z = resource_variable_ops.ResourceVariable(2.0)
1618      tape.watch(z)
1619      self.assertEqual((z,), tape.watched_variables())
1620    tape.gradient(z, z)
1621    self.assertEqual((z,), tape.watched_variables())
1622
1623  def testNameScope(self):
1624
1625    def fn(x):
1626      with ops.name_scope('my_scope'):
1627        a = math_ops.cos(x)
1628        b = math_ops.cos(x)
1629        return math_ops.add(a, b)
1630
1631    @function.defun
1632    def grad_fn(x):
1633      return backprop.gradients_function(fn)(x)
1634
1635    grad_ops = grad_fn.get_concrete_function(
1636        constant_op.constant(1.0)).graph.get_operations()
1637    num_sin_ops_found = 0
1638    for op in grad_ops:
1639      if op.type == 'Sin':
1640        num_sin_ops_found += 1
1641        self.assertIn('gradient_tape/my_scope/', op.name)
1642    self.assertEqual(num_sin_ops_found, 2)
1643
1644  @test_util.assert_no_new_pyobjects_executing_eagerly
1645  def testRecomputeGradWithDifferentShape(self):
1646
1647    @custom_gradient.recompute_grad
1648    def outer(x):
1649      return [x[0] + 1, x[1] + 1]
1650
1651    x = [
1652        variables.Variable([1.0, 2.0], name='a'),
1653        variables.Variable(1.0, name='b')
1654    ]
1655    with backprop.GradientTape():
1656      y = outer(x)
1657      self.assertAllEqual(y[0], [2.0, 3.0])
1658      self.assertAllEqual(y[1], 2.0)
1659
1660    @custom_gradient.recompute_grad
1661    def outer_dict(x):
1662      for key in x.keys():
1663        x[key] = x[key] + 1
1664      return x
1665
1666    x = {x[0].ref(): x[0], x[1].ref(): x[1]}
1667    with backprop.GradientTape():
1668      y = outer_dict(x)
1669      y = list(y.values())
1670      self.assertAllEqual(y[0], [2.0, 3.0])
1671      self.assertAllEqual(y[1], 2.0)
1672
1673  @test_util.assert_no_new_pyobjects_executing_eagerly
1674  def testRecomputeGradWithNestedFunctionAndWhileLoop(self):
1675
1676    @custom_gradient.recompute_grad
1677    @def_function.function
1678    def outer(x):
1679
1680      @def_function.function
1681      def middle(y):
1682
1683        @def_function.function
1684        def inner(z):
1685          return z + 1
1686
1687        i = constant_op.constant(0.0)
1688        c = lambda y, i: i < 10.
1689        b = lambda y, i: (inner(y), i + 1.0)
1690        y, i = control_flow_ops.while_loop(c, b, [y, i])
1691
1692        return y
1693
1694      return middle(x)
1695
1696    with MemoryChecker() as memory_checker:
1697      for _ in range(5):
1698        x = variables.Variable(1.0, name='x')
1699        with backprop.GradientTape():
1700          y = outer(x)
1701          self.assertAllEqual(y, 11.0)
1702
1703    memory_checker.report()
1704    memory_checker.assert_no_leak_if_all_possibly_except_one()
1705
1706
1707class JacobianTest(test.TestCase):
1708
1709  def _jacobian(self, experimental_use_pfor):
1710    persistent = context.executing_eagerly and not experimental_use_pfor
1711    with backprop.GradientTape(persistent=persistent) as g:
1712      x = constant_op.constant([1., 2.])
1713      y = constant_op.constant([3., 4.])
1714      g.watch(x)
1715      g.watch(y)
1716      z = x * x * y
1717    jacobian = g.jacobian(
1718        z, [x, y], experimental_use_pfor=experimental_use_pfor)
1719    answer = [array_ops.diag(2 * x * y), array_ops.diag(x * x)]
1720    return jacobian, answer
1721
1722  @test_util.run_v1_only('b/120545219')
1723  def testPfor(self):
1724    jacobian, answer = self._jacobian(experimental_use_pfor=True)
1725    for j, a in zip(jacobian, answer):
1726      self.assertAllEqual(a, j)
1727
1728  @test_util.run_v1_only('b/120545219')
1729  def testWhileLoop(self):
1730    jacobian, answer = self._jacobian(experimental_use_pfor=False)
1731    for j, a in zip(jacobian, answer):
1732      self.assertAllEqual(a, j)
1733
1734  @test_util.run_v1_only('b/120545219')
1735  def testPforDefun(self):
1736
1737    @function.defun
1738    def _f():
1739      return self._jacobian(experimental_use_pfor=True)
1740
1741    jacobian, answer = _f()
1742    for j, a in zip(jacobian, answer):
1743      self.assertAllEqual(a, j)
1744
1745  @test_util.run_v1_only('b/120545219')
1746  def testWhileLoopDefun(self):
1747
1748    @function.defun
1749    def _f():
1750      return self._jacobian(experimental_use_pfor=False)
1751
1752    jacobian, answer = _f()
1753    for j, a in zip(jacobian, answer):
1754      self.assertAllEqual(a, j)
1755
1756  @test_util.run_v1_only('b/120545219')
1757  def testPersistentTape(self):
1758    if not context.executing_eagerly():
1759      return
1760    with backprop.GradientTape() as g:
1761      x = constant_op.constant([1.0, 2.0])
1762      g.watch(x)
1763      y = x * x
1764    with self.assertRaisesRegex(RuntimeError, 'persistent'):
1765      g.jacobian(y, x, experimental_use_pfor=False)
1766
1767  @test_util.run_v1_only('b/120545219')
1768  def test_parallel_iterations(self):
1769    with backprop.GradientTape(persistent=True) as g:
1770      x = constant_op.constant([[1., 2], [3, 4]])
1771      g.watch(x)
1772      y = math_ops.matmul(x, x)
1773    self.assertAllClose(
1774        g.jacobian(y, x, parallel_iterations=2),
1775        g.jacobian(y, x, parallel_iterations=3))
1776
1777  @test_util.run_in_graph_and_eager_modes
1778  def test_nested_jacobian(self):
1779    if context.executing_eagerly():
1780      # TODO(agarwal): b/128842926
1781      self.skipTest('Conversion of function calls not implemented yet.')
1782    x = array_ops.ones((10, 2))
1783    with backprop.GradientTape(persistent=False) as g:
1784      g.watch(x)
1785      with backprop.GradientTape(persistent=False) as gg:
1786        gg.watch(x)
1787        y = math_ops.reduce_sum(math_ops.square(x))
1788      dy_x = gg.jacobian(y, x)
1789    dy_xx = g.batch_jacobian(dy_x, x)
1790    dy_xx_answer = [[[2., 0], [0, 2.]]] * 10
1791    self.assertAllClose(dy_xx_answer, self.evaluate(dy_xx))
1792
1793  def test_nested_batch_jacobian_foldl(self):
1794    def _grad(f):
1795      def _grad_function(primal):
1796        with backprop.GradientTape() as tape:
1797          tape.watch(primal)
1798          primal_out = f(primal)
1799        return tape.batch_jacobian(primal_out, primal)
1800      return _grad_function
1801
1802    def _func(x):
1803      return array_ops.reshape(
1804          functional_ops.foldl_v2(lambda a, b: math_ops.cos(a + b),
1805                                  array_ops.transpose(x)),
1806          [1, 1])
1807
1808    f = _func
1809    x = constant_op.constant([[1., 2.]])
1810    for _ in range(2):
1811      theoretical, numerical = gradient_checker_v2.compute_gradient(f, [x])
1812      self.assertAllClose(theoretical, numerical, rtol=1e-3)
1813      f = _grad(f)
1814      expected_flat = array_ops.reshape(numerical, [-1])
1815      self.assertAllClose(expected_flat,
1816                          array_ops.reshape(f(x), [-1]),
1817                          rtol=1e-3)
1818      self.assertAllClose(expected_flat,
1819                          array_ops.reshape(def_function.function(f)(x), [-1]),
1820                          rtol=1e-3)
1821
1822  def test_grad_jacobian_conv(self):
1823    def _inner(x):
1824      kernel = array_ops.ones([3, 3, 1, 9])
1825      with backprop.GradientTape() as tape:
1826        tape.watch(x)
1827        y = nn_ops.conv2d(x, kernel, strides=(1, 1), padding='SAME',
1828                          data_format='NHWC')
1829        reduced = math_ops.reduce_sum(y ** 2., axis=[2, 3])
1830      return math_ops.reduce_sum(tape.batch_jacobian(reduced, x))
1831
1832    theoretical, numerical = gradient_checker_v2.compute_gradient(
1833        def_function.function(_inner), [array_ops.ones([10, 4, 4, 1])])
1834    self.assertAllClose(numerical, theoretical, rtol=1e-1)
1835
1836    @def_function.function
1837    def _outer():
1838      with backprop.GradientTape() as tape:
1839        x = array_ops.ones([10, 4, 4, 1])
1840        tape.watch(x)
1841        y = _inner(x)
1842      return tape.gradient(y, x)
1843
1844    self.assertAllClose(array_ops.reshape(numerical, [-1]),
1845                        array_ops.reshape(_outer(), [-1]), rtol=1e-1)
1846
1847  @test_util.run_in_graph_and_eager_modes
1848  def test_indexed_slices(self):
1849    with backprop.GradientTape(persistent=True) as g:
1850      inp = random_ops.random_uniform([3, 2])
1851      g.watch(inp)
1852      output = nn.embedding_lookup(inp, [0, 2])
1853    self.assertAllClose(
1854        g.jacobian(output, inp, experimental_use_pfor=True),
1855        g.jacobian(output, inp, experimental_use_pfor=False))
1856
1857  def test_foldl_partial_function(self):
1858    x = array_ops.zeros([3])
1859    with backprop.GradientTape(persistent=True) as tape:
1860      tape.watch(x)
1861      result = def_function.function(
1862          functools.partial(functional_ops.foldl_v2, lambda a, b: a + b))(
1863              x)
1864    self.assertAllClose([1., 1., 1.],
1865                        tape.jacobian(result, x, experimental_use_pfor=True))
1866    self.assertAllClose([1., 1., 1.],
1867                        tape.jacobian(result, x, experimental_use_pfor=False))
1868
1869    # Non-persistent tapes take a different function gradient path, but also
1870    # work with pfor=True.
1871    x = array_ops.zeros([3])
1872    with backprop.GradientTape() as tape:
1873      tape.watch(x)
1874      result = def_function.function(
1875          functools.partial(functional_ops.foldl_v2, lambda a, b: a + b))(
1876              x)
1877    self.assertAllClose([1., 1., 1.],
1878                        tape.jacobian(result, x, experimental_use_pfor=True))
1879
1880  def test_foldl_pure_function(self):
1881
1882    @def_function.function
1883    def compute_jacobian(use_pfor):
1884      x = array_ops.zeros([3])
1885      with backprop.GradientTape(persistent=True) as tape:
1886        tape.watch(x)
1887        result = functools.partial(functional_ops.foldl_v2, lambda a, b: a + b)(
1888            x)
1889      return tape.jacobian(result, x, experimental_use_pfor=use_pfor)
1890
1891    self.assertAllClose(compute_jacobian(use_pfor=True),
1892                        compute_jacobian(use_pfor=False))
1893
1894  def test_cond_func_grad_jacobian(self):
1895
1896    @def_function.function
1897    def f(x):
1898      y = control_flow_ops.cond(x > 0., lambda: x**3., lambda: x**2.)
1899      return y
1900
1901    with backprop.GradientTape(persistent=True) as tape:
1902      x = constant_op.constant(1.)
1903      tape.watch(x)
1904      y = f(x)
1905      grad = tape.gradient(y, x)
1906    self.assertAllClose(3., grad)
1907    jacobian = tape.jacobian(grad, x, experimental_use_pfor=False)
1908    self.assertAllClose(6., jacobian)
1909    jacobian_pfor = tape.jacobian(grad, x, experimental_use_pfor=True)
1910    self.assertAllClose(6., jacobian_pfor)
1911
1912
1913@test_util.run_all_in_graph_and_eager_modes
1914class BatchJacobianTest(test.TestCase, parameterized.TestCase):
1915
1916  def _batch_jacobian(self, experimental_use_pfor):
1917    persistent = context.executing_eagerly and not experimental_use_pfor
1918    with backprop.GradientTape(persistent=persistent) as g:
1919      x = constant_op.constant([[1., 2.], [3., 4.]])
1920      y = constant_op.constant([[3., 4.], [5., 6.]])
1921      g.watch(x)
1922      z = x * x * y
1923    batch_jacobian = g.batch_jacobian(
1924        z, x, experimental_use_pfor=experimental_use_pfor)
1925    answer = array_ops.stack(
1926        [array_ops.diag(2 * x[0] * y[0]),
1927         array_ops.diag(2 * x[1] * y[1])])
1928    return batch_jacobian, answer
1929
1930  def testPfor(self):
1931    batch_jacobian, answer = self._batch_jacobian(experimental_use_pfor=True)
1932    self.assertAllEqual(answer, batch_jacobian)
1933
1934  def testWhileLoop(self):
1935    batch_jacobian, answer = self._batch_jacobian(experimental_use_pfor=False)
1936    self.assertAllEqual(answer, batch_jacobian)
1937
1938  def testPforDefun(self):
1939
1940    @function.defun
1941    def _f():
1942      return self._batch_jacobian(experimental_use_pfor=True)
1943
1944    batch_jacobian, answer = _f()
1945    self.assertAllEqual(answer, batch_jacobian)
1946
1947  def testWhileLoopDefun(self):
1948
1949    @function.defun
1950    def _f():
1951      return self._batch_jacobian(experimental_use_pfor=False)
1952
1953    batch_jacobian, answer = _f()
1954    self.assertAllEqual(answer, batch_jacobian)
1955
1956  def testPersistentTape(self):
1957    if not context.executing_eagerly():
1958      return
1959    with backprop.GradientTape() as g:
1960      x = constant_op.constant([[1.0, 2.0]])
1961      g.watch(x)
1962      y = x * x
1963    with self.assertRaisesRegex(RuntimeError, 'persistent'):
1964      g.batch_jacobian(y, x, experimental_use_pfor=False)
1965
1966  def testBadShape(self):
1967    x = random_ops.random_uniform([2, 3])
1968    with backprop.GradientTape() as g:
1969      y = array_ops.concat([x, x], axis=0)
1970    with self.assertRaisesRegex(ValueError, 'Need first dimension'):
1971      g.batch_jacobian(y, x)
1972
1973  def testBadInputRank(self):
1974    x = random_ops.random_uniform([2])
1975    with backprop.GradientTape() as g:
1976      y = random_ops.random_uniform([2, 2])
1977    with self.assertRaisesRegex(ValueError, 'must have rank at least 2'):
1978      g.batch_jacobian(y, x)
1979
1980  def testBadOutputRank(self):
1981    x = random_ops.random_uniform([2, 2])
1982    with backprop.GradientTape() as g:
1983      y = random_ops.random_uniform([2])
1984    with self.assertRaisesRegex(ValueError, 'must have rank at least 2'):
1985      g.batch_jacobian(y, x)
1986
1987  def test_parallel_iterations(self):
1988    with backprop.GradientTape(persistent=True) as g:
1989      x = constant_op.constant([[1., 2], [3, 4]])
1990      g.watch(x)
1991      w = constant_op.constant([[1., 2, 3, 4], [5, 6, 7, 8]])
1992      y = math_ops.matmul(x, w)
1993    self.assertAllClose(
1994        g.batch_jacobian(y, x, parallel_iterations=2),
1995        g.batch_jacobian(y, x, parallel_iterations=3))
1996
1997  @parameterized.parameters((True, True), (True, False), (False, True),
1998                            (False, False))
1999  def test_degenerate_shape(self, use_function, use_pfor):
2000
2001    def f(x):
2002      with backprop.GradientTape(persistent=True) as tape:
2003        tape.watch(x)
2004        y = x**2
2005      return tape.batch_jacobian(y, x, experimental_use_pfor=use_pfor)
2006
2007    if use_function:
2008      f = def_function.function(f)
2009    self.assertAllEqual([1, 0, 0], array_ops.shape(f(array_ops.zeros([1, 0]))))
2010
2011  @parameterized.parameters((True,), (False))
2012  def test_zeros_type_correct(self, use_pfor):
2013    for dtype in [dtypes.float32, dtypes.float64]:
2014      @def_function.function
2015      def f(x):
2016        del x
2017        return constant_op.constant([[1.]], dtype=dtype)  # pylint: disable=cell-var-from-loop
2018
2019      with backprop.GradientTape(persistent=True) as tape:
2020        x = constant_op.constant([[2.]], dtype=dtype)
2021        tape.watch(x)
2022        y = f(x)
2023      jac = tape.batch_jacobian(y, x, experimental_use_pfor=use_pfor)
2024      self.assertEqual(dtype, jac.dtype)
2025      self.assertAllClose([[[0.]]], jac)
2026
2027      with backprop.GradientTape(persistent=True) as tape:
2028        x = constant_op.constant([[2.]], dtype=dtype)
2029        tape.watch(x)
2030        y = f(x)
2031      jac = tape.batch_jacobian(y, x, unconnected_gradients='zero',
2032                                experimental_use_pfor=use_pfor)
2033      self.assertEqual(dtype, jac.dtype)
2034      self.assertAllClose([[[0.]]], jac)
2035
2036
2037class AggregateIndexedSlicesGradientsTest(test_util.TensorFlowTestCase):
2038
2039  def _assert_indexed_slices_equal(self, left, right):
2040    self.assertAllEqual(
2041        self.evaluate(ops.convert_to_tensor(left)),
2042        self.evaluate(ops.convert_to_tensor(right)))
2043
2044  def testNoGradients(self):
2045    self.assertIsNone(backprop.aggregate_indexed_slices_gradients([]))
2046
2047  def testOneGradient(self):
2048    t = math_ops._as_indexed_slices(
2049        constant_op.constant([[1., 2.], [0, 0], [3., 4.]]))
2050    result = backprop.aggregate_indexed_slices_gradients([t])
2051    self._assert_indexed_slices_equal(t, result)
2052
2053  def testMultipleGradients(self):
2054    t0 = math_ops._as_indexed_slices(
2055        constant_op.constant([[1., 2.], [0, 0], [3., 4.]]))
2056    t1 = math_ops._as_indexed_slices(
2057        constant_op.constant([[0., 0.], [5, 6], [7., 8.]]))
2058    total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]])
2059    result = backprop.aggregate_indexed_slices_gradients([t0, t1])
2060    self._assert_indexed_slices_equal(total, result)
2061
2062  def testMultipleGradientsWithNones(self):
2063    t0 = math_ops._as_indexed_slices(
2064        constant_op.constant([[1., 2.], [0, 0], [3., 4.]]))
2065    t1 = math_ops._as_indexed_slices(
2066        constant_op.constant([[0., 0.], [5, 6], [7., 8.]]))
2067    t3 = None
2068    total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]])
2069    result = backprop.aggregate_indexed_slices_gradients([t0, t1, t3])
2070    self._assert_indexed_slices_equal(total, result)
2071
2072  def testMixedTensorAndIndexedSlices(self):
2073    t0 = math_ops._as_indexed_slices(
2074        constant_op.constant([[1., 2.], [0, 0], [3., 4.]]))
2075    t1 = constant_op.constant([[0., 0.], [5, 6], [7., 8.]])
2076    total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]])
2077    result = backprop.aggregate_indexed_slices_gradients([t0, t1])
2078    self._assert_indexed_slices_equal(total, result)
2079
2080
2081if __name__ == '__main__':
2082  test.main()
2083