• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Tests for tensorflow.ops.tf.Cholesky."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21import numpy as np
22from six.moves import xrange  # pylint: disable=redefined-builtin
23
24from tensorflow.python.client import session
25from tensorflow.python.framework import constant_op
26from tensorflow.python.framework import dtypes as dtypes_lib
27from tensorflow.python.framework import errors_impl
28from tensorflow.python.framework import ops
29from tensorflow.python.framework import test_util
30from tensorflow.python.ops import array_ops
31from tensorflow.python.ops import control_flow_ops
32from tensorflow.python.ops import gen_linalg_ops
33from tensorflow.python.ops import gradient_checker
34from tensorflow.python.ops import gradients_impl
35from tensorflow.python.ops import linalg_ops
36from tensorflow.python.ops import math_ops
37from tensorflow.python.ops import random_ops
38from tensorflow.python.ops import variables
39from tensorflow.python.ops.linalg import linalg
40from tensorflow.python.platform import benchmark
41from tensorflow.python.platform import test
42from tensorflow.python.platform import tf_logging
43
44
45# Different gradient implementations for benchmark purposes
46def SpecializedGrad(l, grad):
47  return gen_linalg_ops.cholesky_grad(l, grad)
48
49
50def _GradWithInverseL(l, l_inverse, grad):
51  middle = math_ops.matmul(l, grad, adjoint_a=True)
52  middle = array_ops.matrix_set_diag(middle,
53                                     0.5 * array_ops.matrix_diag_part(middle))
54  middle = array_ops.matrix_band_part(middle, -1, 0)
55  grad_a = math_ops.matmul(
56      math_ops.matmul(l_inverse, middle, adjoint_a=True), l_inverse)
57  grad_a += math_ops.conj(array_ops.matrix_transpose(grad_a))
58  return grad_a * 0.5
59
60
61def TriAngSolveCompositeGrad(l, grad):
62  # Gradient is l^{-H} @ ((l^{H} @ grad) * (tril(ones)-1/2*eye)) @ l^{-1}
63
64  # Compute ((l^{H} @ grad) * (tril(ones)-1/2*eye)) = middle
65  middle = math_ops.matmul(l, grad, adjoint_a=True)
66  middle = array_ops.matrix_set_diag(middle,
67                                     0.5 * array_ops.matrix_diag_part(middle))
68  middle = array_ops.matrix_band_part(middle, -1, 0)
69
70  # Compute l^{-H} @ middle = z
71  l_inverse_middle = linalg_ops.matrix_triangular_solve(l, middle, adjoint=True)
72
73  # We need to compute z @ l^{-1}. With matrix_triangular_solve we
74  # actually compute l^{-H} @ z^{H} = grad. Since we later add grad^{H}
75  # we can ommit the conjugate transpose here.
76  z_h = math_ops.conj(array_ops.matrix_transpose(l_inverse_middle))
77  grad_a = linalg_ops.matrix_triangular_solve(l, z_h, adjoint=True)
78  grad_a += linalg.adjoint(grad_a)
79  return grad_a * 0.5
80
81
82def MatrixInverseCompositeGrad(l, grad):
83  l_inverse = linalg_ops.matrix_inverse(l)
84  return _GradWithInverseL(l, l_inverse, grad)
85
86
87def TriAngInvCompositeGrad(l, grad):
88  num_rows = array_ops.shape(l)[-1]
89  batch_shape = array_ops.shape(l)[:-2]
90  l_inverse = linalg_ops.matrix_triangular_solve(l,
91                                                 linalg_ops.eye(
92                                                     num_rows,
93                                                     batch_shape=batch_shape,
94                                                     dtype=l.dtype))
95  return _GradWithInverseL(l, l_inverse, grad)
96
97
98class CholeskyOpTest(test.TestCase):
99
100  def _verifyCholeskyBase(self, sess, x, chol, verification):
101    chol_np, verification_np = self.evaluate([chol, verification])
102    self.assertAllClose(x, verification_np)
103    self.assertShapeEqual(x, chol)
104    # Check that the cholesky is lower triangular, and has positive diagonal
105    # elements.
106    if chol_np.shape[-1] > 0:
107      chol_reshaped = np.reshape(chol_np, (-1, chol_np.shape[-2],
108                                           chol_np.shape[-1]))
109      for chol_matrix in chol_reshaped:
110        self.assertAllClose(chol_matrix, np.tril(chol_matrix))
111        self.assertTrue((np.diag(chol_matrix) > 0.0).all())
112
113  def _verifyCholesky(self, x):
114    # Verify that LL^T == x.
115    with self.cached_session(use_gpu=True) as sess:
116      chol = linalg_ops.cholesky(x)
117      verification = math_ops.matmul(chol, chol, adjoint_b=True)
118      self._verifyCholeskyBase(sess, x, chol, verification)
119
120  def testBasic(self):
121    data = np.array([[4., -1., 2.], [-1., 6., 0], [2., 0., 5.]])
122    for dtype in (np.float32, np.float64):
123      self._verifyCholesky(data.astype(dtype))
124    for dtype in (np.complex64, np.complex128):
125      complex_data = np.tril(1j * data, -1).astype(dtype)
126      complex_data += np.triu(-1j * data, 1).astype(dtype)
127      complex_data += data
128      self._verifyCholesky(complex_data)
129
130  def testBatch(self):
131    simple_array = np.array([[[1., 0.], [0., 5.]]])  # shape (1, 2, 2)
132    self._verifyCholesky(simple_array)
133    self._verifyCholesky(np.vstack((simple_array, simple_array)))
134    odd_sized_array = np.array([[[4., -1., 2.], [-1., 6., 0], [2., 0., 5.]]])
135    self._verifyCholesky(np.vstack((odd_sized_array, odd_sized_array)))
136
137    # Generate random positive-definite matrices.
138    matrices = np.random.rand(10, 5, 5)
139    for i in xrange(10):
140      matrices[i] = np.dot(matrices[i].T, matrices[i])
141    self._verifyCholesky(matrices)
142
143    # Generate random complex valued positive-definite matrices.
144    matrices = np.random.rand(10, 5, 5) + 1j * np.random.rand(10, 5, 5)
145    for i in xrange(10):
146      matrices[i] = np.dot(matrices[i].T.conj(), matrices[i])
147    self._verifyCholesky(matrices)
148
149  @test_util.run_deprecated_v1
150  def testNonSquareMatrix(self):
151    with self.assertRaises(ValueError):
152      linalg_ops.cholesky(np.array([[1., 2., 3.], [3., 4., 5.]]))
153    with self.assertRaises(ValueError):
154      linalg_ops.cholesky(
155          np.array([[[1., 2., 3.], [3., 4., 5.]], [[1., 2., 3.], [3., 4., 5.]]
156                   ]))
157
158  @test_util.run_v1_only("b/120545219")
159  def testWrongDimensions(self):
160    tensor3 = constant_op.constant([1., 2.])
161    with self.assertRaises(ValueError):
162      linalg_ops.cholesky(tensor3)
163    with self.assertRaises(ValueError):
164      linalg_ops.cholesky(tensor3)
165
166  # The below invalid Cholesky call returns an error with TF Classic and just
167  # returns NaNs with XLA.
168  @test_util.disable_xla("b/123337890")
169  def testNotInvertibleCPU(self):
170    # The input should be invertible.
171    with self.session(use_gpu=True):
172      with self.assertRaisesRegexp(
173          errors_impl.InvalidArgumentError,
174          "Cholesky decomposition was not successful. The"
175          " input might not be valid."):
176        # All rows of the matrix below add to zero
177        self._verifyCholesky(
178            np.array([[1., -1., 0.], [-1., 1., -1.], [0., -1., 1.]]))
179
180  def testEmpty(self):
181    self._verifyCholesky(np.empty([0, 2, 2]))
182    self._verifyCholesky(np.empty([2, 0, 0]))
183
184  @test_util.run_deprecated_v1
185  def testConcurrentExecutesWithoutError(self):
186    with self.session(use_gpu=True) as sess:
187      matrix1 = random_ops.random_normal([5, 5], seed=42)
188      matrix2 = random_ops.random_normal([5, 5], seed=42)
189      matrix1 = math_ops.matmul(matrix1, matrix1, adjoint_a=True)
190      matrix2 = math_ops.matmul(matrix2, matrix2, adjoint_a=True)
191      c1 = linalg_ops.cholesky(matrix1)
192      c2 = linalg_ops.cholesky(matrix2)
193      c1_val, c2_val = self.evaluate([c1, c2])
194      self.assertAllClose(c1_val, c2_val)
195
196
197class CholeskyGradTest(test.TestCase):
198  _backprop_block_size = 32
199
200  def getShapes(self, shapeList):
201    return ((elem, int(np.floor(1.2 * elem))) for elem in shapeList)
202
203  @test_util.run_deprecated_v1
204  def testSmallMatrices(self):
205    np.random.seed(0)
206    shapes = self.getShapes([1, 2, 10])
207    self.runFiniteDifferences(
208        shapes, dtypes=(dtypes_lib.float32, dtypes_lib.float64))
209
210  @test_util.run_deprecated_v1
211  def testSmallMatricesComplex(self):
212    np.random.seed(0)
213    shapes = self.getShapes([1, 2, 10])
214    self.runFiniteDifferences(
215        shapes, dtypes=(dtypes_lib.complex64, dtypes_lib.complex128))
216
217  @test_util.run_deprecated_v1
218  def testOneBlockMatrices(self):
219    np.random.seed(0)
220    shapes = self.getShapes([self._backprop_block_size + 1])
221    self.runFiniteDifferences(
222        shapes,
223        dtypes=(dtypes_lib.float32, dtypes_lib.float64),
224        scalarTest=True)
225
226  @test_util.run_deprecated_v1
227  def testTwoBlockMatrixFloat(self):
228    np.random.seed(0)
229    shapes = self.getShapes([2 * self._backprop_block_size + 1])
230    self.runFiniteDifferences(
231        shapes, dtypes=(dtypes_lib.float32,), scalarTest=True)
232
233  @test_util.run_deprecated_v1
234  def testTwoBlockMatrixDouble(self):
235    np.random.seed(0)
236    shapes = self.getShapes([2 * self._backprop_block_size + 1])
237    self.runFiniteDifferences(
238        shapes, dtypes=(dtypes_lib.float64,), scalarTest=True)
239
240  @test_util.run_v1_only("b/120545219")
241  def testTwoBlockMatrixComplexFloat(self):
242    np.random.seed(0)
243    shapes = self.getShapes([2 * self._backprop_block_size + 1])
244    self.runFiniteDifferences(
245        shapes, dtypes=(dtypes_lib.complex64,), scalarTest=True)
246
247  @test_util.run_deprecated_v1
248  def testTwoBlockMatrixComplexDouble(self):
249    np.random.seed(0)
250    shapes = self.getShapes([2 * self._backprop_block_size + 1])
251    self.runFiniteDifferences(
252        shapes, dtypes=(dtypes_lib.complex128,), scalarTest=True)
253
254  def testAgainstSpecialized(self):
255    np.random.seed(0)
256    data = np.random.randn(33, 33).astype(np.float32)
257    data = np.matmul(data, data.T)
258    grad_data = np.random.randn(*data.shape).astype(np.float32)
259
260    with ops.Graph().as_default(), self.session(use_gpu=False) as s:
261      x = constant_op.constant(data, dtypes_lib.float32)
262      chol = linalg_ops.cholesky(x)
263      composite_grad = gradients_impl.gradients(chol, x, grad_data)[0]
264      specialized_grad = SpecializedGrad(chol, grad_data)
265      reference, actual = s.run([specialized_grad, composite_grad])
266    self.assertAllClose(reference, actual)
267
268  def runFiniteDifferences(self,
269                           shapes,
270                           dtypes=(dtypes_lib.float32, dtypes_lib.float64,
271                                   dtypes_lib.complex64, dtypes_lib.complex128),
272                           scalarTest=False):
273    with self.session(use_gpu=True):
274      for shape in shapes:
275        for batch in False, True:
276          for dtype in dtypes:
277            if not scalarTest:
278              data = np.random.randn(shape[0], shape[1])
279              if dtype.is_complex:
280                data = data.astype(np.complex64)
281                data += 1j * np.random.randn(shape[0], shape[1])
282              x = constant_op.constant(data, dtype)
283              tensor = math_ops.matmul(
284                  x, math_ops.conj(array_ops.transpose(x))) / shape[0]
285            else:
286              # This is designed to be a faster test for larger matrices.
287              data = np.random.randn()
288              if dtype.is_complex:
289                data = np.complex64(data)
290                data += 1j * np.random.randn()
291              x = constant_op.constant(data, dtype)
292              R = constant_op.constant(
293                  np.random.randn(shape[0], shape[1]), dtype)
294              e = math_ops.multiply(R, x)
295              tensor = math_ops.matmul(
296                  e, math_ops.conj(array_ops.transpose(e))) / shape[0]
297
298            # Inner-most matrices in tensor are positive definite.
299            if batch:
300              tensor = array_ops.tile(
301                  array_ops.expand_dims(tensor, 0), [4, 1, 1])
302            y = linalg_ops.cholesky(tensor)
303            if scalarTest:
304              y = math_ops.reduce_mean(y)
305            error = gradient_checker.compute_gradient_error(
306                x, x._shape_as_list(), y, y._shape_as_list())
307            tf_logging.info("error = %f", error)
308            if dtype == dtypes_lib.float64:
309              self.assertLess(error, 1e-5)
310            elif dtype == dtypes_lib.complex128:
311              self.assertLess(error, 5e-5)
312            else:
313              self.assertLess(error, 5e-3)
314
315
316class CholeskyBenchmark(test.Benchmark):
317
318  shapes = [
319      (4, 4),
320      (10, 10),
321      (16, 16),
322      (101, 101),
323      (256, 256),
324      (1000, 1000),
325      (1024, 1024),
326      (2048, 2048),
327      (513, 2, 2),
328      (513, 8, 8),
329      (513, 256, 256),
330      (4, 513, 2, 2),
331  ]
332
333  def _GenerateMatrix(self, shape):
334    batch_shape = shape[:-2]
335    shape = shape[-2:]
336    assert shape[0] == shape[1]
337    n = shape[0]
338    matrix = np.ones(shape).astype(np.float32) / (
339        2.0 * n) + np.diag(np.ones(n).astype(np.float32))
340    return np.tile(matrix, batch_shape + (1, 1))
341
342  def benchmarkCholeskyOp(self):
343    for shape in self.shapes:
344      with ops.Graph().as_default(), \
345          session.Session(config=benchmark.benchmark_config()) as sess, \
346          ops.device("/cpu:0"):
347        matrix = variables.Variable(self._GenerateMatrix(shape))
348        l = linalg_ops.cholesky(matrix)
349        variables.global_variables_initializer().run()
350        self.run_op_benchmark(
351            sess,
352            control_flow_ops.group(
353                l,),
354            min_iters=25,
355            name="cholesky_cpu_{shape}".format(shape=shape))
356
357      if test.is_gpu_available(True):
358        with ops.Graph().as_default(), \
359            session.Session(config=benchmark.benchmark_config()) as sess, \
360            ops.device("/device:GPU:0"):
361          matrix = variables.Variable(self._GenerateMatrix(shape))
362          l = linalg_ops.cholesky(matrix)
363          variables.global_variables_initializer().run()
364          self.run_op_benchmark(
365              sess,
366              control_flow_ops.group(
367                  l,),
368              min_iters=25,
369              name="cholesky_gpu_{shape}".format(shape=shape))
370
371  def benchmarkGradVariants(self):
372
373    def _BenchmarkGrad(grad_fn, name, device):
374      for shape in self.shapes:
375        matrix = self._GenerateMatrix(shape)
376        with ops.Graph().as_default(), \
377            session.Session(config=benchmark.benchmark_config()) as sess, \
378            ops.device(device):
379          l = variables.Variable(np.linalg.cholesky(matrix))
380          grad_matrix = variables.Variable(
381              np.random.randn(*matrix.shape).astype(np.float32))
382          grad = grad_fn(l, grad_matrix)
383          variables.global_variables_initializer().run()
384          self.run_op_benchmark(
385              sess,
386              control_flow_ops.group(
387                  grad,),
388              min_iters=25,
389              name="{name}_{dev}_{shape}".format(
390                  name=name, dev=grad.device, shape=shape))
391
392    if test.is_gpu_available(True):
393      _BenchmarkGrad(MatrixInverseCompositeGrad, "composite_matrix_inverse",
394                     "/device:GPU:0")
395      _BenchmarkGrad(TriAngInvCompositeGrad, "composite_tri_ang_inverse",
396                     "/device:GPU:0")
397      _BenchmarkGrad(TriAngSolveCompositeGrad, "composite_triangular_solve",
398                     "/device:GPU:0")
399
400    _BenchmarkGrad(MatrixInverseCompositeGrad, "composite_matrix_inverse",
401                   "/cpu:0")
402    _BenchmarkGrad(TriAngInvCompositeGrad, "composite_tri_ang_inverse",
403                   "/cpu:0")
404    _BenchmarkGrad(TriAngSolveCompositeGrad, "composite_triangular_solve",
405                   "/cpu:0")
406    _BenchmarkGrad(SpecializedGrad, "specialized", "/cpu:0")
407
408
409if __name__ == "__main__":
410  test.main()
411