1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Tests for tensorflow.ops.tf.Cholesky.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import numpy as np 22from six.moves import xrange # pylint: disable=redefined-builtin 23 24from tensorflow.python.client import session 25from tensorflow.python.framework import constant_op 26from tensorflow.python.framework import dtypes as dtypes_lib 27from tensorflow.python.framework import errors_impl 28from tensorflow.python.framework import ops 29from tensorflow.python.framework import test_util 30from tensorflow.python.ops import array_ops 31from tensorflow.python.ops import control_flow_ops 32from tensorflow.python.ops import gen_linalg_ops 33from tensorflow.python.ops import gradient_checker 34from tensorflow.python.ops import gradients_impl 35from tensorflow.python.ops import linalg_ops 36from tensorflow.python.ops import math_ops 37from tensorflow.python.ops import random_ops 38from tensorflow.python.ops import variables 39from tensorflow.python.ops.linalg import linalg 40from tensorflow.python.platform import benchmark 41from tensorflow.python.platform import test 42from tensorflow.python.platform import tf_logging 43 44 45# Different gradient implementations for benchmark purposes 46def SpecializedGrad(l, grad): 47 return gen_linalg_ops.cholesky_grad(l, grad) 48 49 50def _GradWithInverseL(l, l_inverse, grad): 51 middle = math_ops.matmul(l, grad, adjoint_a=True) 52 middle = array_ops.matrix_set_diag(middle, 53 0.5 * array_ops.matrix_diag_part(middle)) 54 middle = array_ops.matrix_band_part(middle, -1, 0) 55 grad_a = math_ops.matmul( 56 math_ops.matmul(l_inverse, middle, adjoint_a=True), l_inverse) 57 grad_a += math_ops.conj(array_ops.matrix_transpose(grad_a)) 58 return grad_a * 0.5 59 60 61def TriAngSolveCompositeGrad(l, grad): 62 # Gradient is l^{-H} @ ((l^{H} @ grad) * (tril(ones)-1/2*eye)) @ l^{-1} 63 64 # Compute ((l^{H} @ grad) * (tril(ones)-1/2*eye)) = middle 65 middle = math_ops.matmul(l, grad, adjoint_a=True) 66 middle = array_ops.matrix_set_diag(middle, 67 0.5 * array_ops.matrix_diag_part(middle)) 68 middle = array_ops.matrix_band_part(middle, -1, 0) 69 70 # Compute l^{-H} @ middle = z 71 l_inverse_middle = linalg_ops.matrix_triangular_solve(l, middle, adjoint=True) 72 73 # We need to compute z @ l^{-1}. With matrix_triangular_solve we 74 # actually compute l^{-H} @ z^{H} = grad. Since we later add grad^{H} 75 # we can ommit the conjugate transpose here. 76 z_h = math_ops.conj(array_ops.matrix_transpose(l_inverse_middle)) 77 grad_a = linalg_ops.matrix_triangular_solve(l, z_h, adjoint=True) 78 grad_a += linalg.adjoint(grad_a) 79 return grad_a * 0.5 80 81 82def MatrixInverseCompositeGrad(l, grad): 83 l_inverse = linalg_ops.matrix_inverse(l) 84 return _GradWithInverseL(l, l_inverse, grad) 85 86 87def TriAngInvCompositeGrad(l, grad): 88 num_rows = array_ops.shape(l)[-1] 89 batch_shape = array_ops.shape(l)[:-2] 90 l_inverse = linalg_ops.matrix_triangular_solve(l, 91 linalg_ops.eye( 92 num_rows, 93 batch_shape=batch_shape, 94 dtype=l.dtype)) 95 return _GradWithInverseL(l, l_inverse, grad) 96 97 98class CholeskyOpTest(test.TestCase): 99 100 def _verifyCholeskyBase(self, sess, x, chol, verification): 101 chol_np, verification_np = self.evaluate([chol, verification]) 102 self.assertAllClose(x, verification_np) 103 self.assertShapeEqual(x, chol) 104 # Check that the cholesky is lower triangular, and has positive diagonal 105 # elements. 106 if chol_np.shape[-1] > 0: 107 chol_reshaped = np.reshape(chol_np, (-1, chol_np.shape[-2], 108 chol_np.shape[-1])) 109 for chol_matrix in chol_reshaped: 110 self.assertAllClose(chol_matrix, np.tril(chol_matrix)) 111 self.assertTrue((np.diag(chol_matrix) > 0.0).all()) 112 113 def _verifyCholesky(self, x): 114 # Verify that LL^T == x. 115 with self.cached_session(use_gpu=True) as sess: 116 chol = linalg_ops.cholesky(x) 117 verification = math_ops.matmul(chol, chol, adjoint_b=True) 118 self._verifyCholeskyBase(sess, x, chol, verification) 119 120 def testBasic(self): 121 data = np.array([[4., -1., 2.], [-1., 6., 0], [2., 0., 5.]]) 122 for dtype in (np.float32, np.float64): 123 self._verifyCholesky(data.astype(dtype)) 124 for dtype in (np.complex64, np.complex128): 125 complex_data = np.tril(1j * data, -1).astype(dtype) 126 complex_data += np.triu(-1j * data, 1).astype(dtype) 127 complex_data += data 128 self._verifyCholesky(complex_data) 129 130 def testBatch(self): 131 simple_array = np.array([[[1., 0.], [0., 5.]]]) # shape (1, 2, 2) 132 self._verifyCholesky(simple_array) 133 self._verifyCholesky(np.vstack((simple_array, simple_array))) 134 odd_sized_array = np.array([[[4., -1., 2.], [-1., 6., 0], [2., 0., 5.]]]) 135 self._verifyCholesky(np.vstack((odd_sized_array, odd_sized_array))) 136 137 # Generate random positive-definite matrices. 138 matrices = np.random.rand(10, 5, 5) 139 for i in xrange(10): 140 matrices[i] = np.dot(matrices[i].T, matrices[i]) 141 self._verifyCholesky(matrices) 142 143 # Generate random complex valued positive-definite matrices. 144 matrices = np.random.rand(10, 5, 5) + 1j * np.random.rand(10, 5, 5) 145 for i in xrange(10): 146 matrices[i] = np.dot(matrices[i].T.conj(), matrices[i]) 147 self._verifyCholesky(matrices) 148 149 @test_util.run_deprecated_v1 150 def testNonSquareMatrix(self): 151 with self.assertRaises(ValueError): 152 linalg_ops.cholesky(np.array([[1., 2., 3.], [3., 4., 5.]])) 153 with self.assertRaises(ValueError): 154 linalg_ops.cholesky( 155 np.array([[[1., 2., 3.], [3., 4., 5.]], [[1., 2., 3.], [3., 4., 5.]] 156 ])) 157 158 @test_util.run_v1_only("b/120545219") 159 def testWrongDimensions(self): 160 tensor3 = constant_op.constant([1., 2.]) 161 with self.assertRaises(ValueError): 162 linalg_ops.cholesky(tensor3) 163 with self.assertRaises(ValueError): 164 linalg_ops.cholesky(tensor3) 165 166 # The below invalid Cholesky call returns an error with TF Classic and just 167 # returns NaNs with XLA. 168 @test_util.disable_xla("b/123337890") 169 def testNotInvertibleCPU(self): 170 # The input should be invertible. 171 with self.session(use_gpu=True): 172 with self.assertRaisesRegexp( 173 errors_impl.InvalidArgumentError, 174 "Cholesky decomposition was not successful. The" 175 " input might not be valid."): 176 # All rows of the matrix below add to zero 177 self._verifyCholesky( 178 np.array([[1., -1., 0.], [-1., 1., -1.], [0., -1., 1.]])) 179 180 def testEmpty(self): 181 self._verifyCholesky(np.empty([0, 2, 2])) 182 self._verifyCholesky(np.empty([2, 0, 0])) 183 184 @test_util.run_deprecated_v1 185 def testConcurrentExecutesWithoutError(self): 186 with self.session(use_gpu=True) as sess: 187 matrix1 = random_ops.random_normal([5, 5], seed=42) 188 matrix2 = random_ops.random_normal([5, 5], seed=42) 189 matrix1 = math_ops.matmul(matrix1, matrix1, adjoint_a=True) 190 matrix2 = math_ops.matmul(matrix2, matrix2, adjoint_a=True) 191 c1 = linalg_ops.cholesky(matrix1) 192 c2 = linalg_ops.cholesky(matrix2) 193 c1_val, c2_val = self.evaluate([c1, c2]) 194 self.assertAllClose(c1_val, c2_val) 195 196 197class CholeskyGradTest(test.TestCase): 198 _backprop_block_size = 32 199 200 def getShapes(self, shapeList): 201 return ((elem, int(np.floor(1.2 * elem))) for elem in shapeList) 202 203 @test_util.run_deprecated_v1 204 def testSmallMatrices(self): 205 np.random.seed(0) 206 shapes = self.getShapes([1, 2, 10]) 207 self.runFiniteDifferences( 208 shapes, dtypes=(dtypes_lib.float32, dtypes_lib.float64)) 209 210 @test_util.run_deprecated_v1 211 def testSmallMatricesComplex(self): 212 np.random.seed(0) 213 shapes = self.getShapes([1, 2, 10]) 214 self.runFiniteDifferences( 215 shapes, dtypes=(dtypes_lib.complex64, dtypes_lib.complex128)) 216 217 @test_util.run_deprecated_v1 218 def testOneBlockMatrices(self): 219 np.random.seed(0) 220 shapes = self.getShapes([self._backprop_block_size + 1]) 221 self.runFiniteDifferences( 222 shapes, 223 dtypes=(dtypes_lib.float32, dtypes_lib.float64), 224 scalarTest=True) 225 226 @test_util.run_deprecated_v1 227 def testTwoBlockMatrixFloat(self): 228 np.random.seed(0) 229 shapes = self.getShapes([2 * self._backprop_block_size + 1]) 230 self.runFiniteDifferences( 231 shapes, dtypes=(dtypes_lib.float32,), scalarTest=True) 232 233 @test_util.run_deprecated_v1 234 def testTwoBlockMatrixDouble(self): 235 np.random.seed(0) 236 shapes = self.getShapes([2 * self._backprop_block_size + 1]) 237 self.runFiniteDifferences( 238 shapes, dtypes=(dtypes_lib.float64,), scalarTest=True) 239 240 @test_util.run_v1_only("b/120545219") 241 def testTwoBlockMatrixComplexFloat(self): 242 np.random.seed(0) 243 shapes = self.getShapes([2 * self._backprop_block_size + 1]) 244 self.runFiniteDifferences( 245 shapes, dtypes=(dtypes_lib.complex64,), scalarTest=True) 246 247 @test_util.run_deprecated_v1 248 def testTwoBlockMatrixComplexDouble(self): 249 np.random.seed(0) 250 shapes = self.getShapes([2 * self._backprop_block_size + 1]) 251 self.runFiniteDifferences( 252 shapes, dtypes=(dtypes_lib.complex128,), scalarTest=True) 253 254 def testAgainstSpecialized(self): 255 np.random.seed(0) 256 data = np.random.randn(33, 33).astype(np.float32) 257 data = np.matmul(data, data.T) 258 grad_data = np.random.randn(*data.shape).astype(np.float32) 259 260 with ops.Graph().as_default(), self.session(use_gpu=False) as s: 261 x = constant_op.constant(data, dtypes_lib.float32) 262 chol = linalg_ops.cholesky(x) 263 composite_grad = gradients_impl.gradients(chol, x, grad_data)[0] 264 specialized_grad = SpecializedGrad(chol, grad_data) 265 reference, actual = s.run([specialized_grad, composite_grad]) 266 self.assertAllClose(reference, actual) 267 268 def runFiniteDifferences(self, 269 shapes, 270 dtypes=(dtypes_lib.float32, dtypes_lib.float64, 271 dtypes_lib.complex64, dtypes_lib.complex128), 272 scalarTest=False): 273 with self.session(use_gpu=True): 274 for shape in shapes: 275 for batch in False, True: 276 for dtype in dtypes: 277 if not scalarTest: 278 data = np.random.randn(shape[0], shape[1]) 279 if dtype.is_complex: 280 data = data.astype(np.complex64) 281 data += 1j * np.random.randn(shape[0], shape[1]) 282 x = constant_op.constant(data, dtype) 283 tensor = math_ops.matmul( 284 x, math_ops.conj(array_ops.transpose(x))) / shape[0] 285 else: 286 # This is designed to be a faster test for larger matrices. 287 data = np.random.randn() 288 if dtype.is_complex: 289 data = np.complex64(data) 290 data += 1j * np.random.randn() 291 x = constant_op.constant(data, dtype) 292 R = constant_op.constant( 293 np.random.randn(shape[0], shape[1]), dtype) 294 e = math_ops.multiply(R, x) 295 tensor = math_ops.matmul( 296 e, math_ops.conj(array_ops.transpose(e))) / shape[0] 297 298 # Inner-most matrices in tensor are positive definite. 299 if batch: 300 tensor = array_ops.tile( 301 array_ops.expand_dims(tensor, 0), [4, 1, 1]) 302 y = linalg_ops.cholesky(tensor) 303 if scalarTest: 304 y = math_ops.reduce_mean(y) 305 error = gradient_checker.compute_gradient_error( 306 x, x._shape_as_list(), y, y._shape_as_list()) 307 tf_logging.info("error = %f", error) 308 if dtype == dtypes_lib.float64: 309 self.assertLess(error, 1e-5) 310 elif dtype == dtypes_lib.complex128: 311 self.assertLess(error, 5e-5) 312 else: 313 self.assertLess(error, 5e-3) 314 315 316class CholeskyBenchmark(test.Benchmark): 317 318 shapes = [ 319 (4, 4), 320 (10, 10), 321 (16, 16), 322 (101, 101), 323 (256, 256), 324 (1000, 1000), 325 (1024, 1024), 326 (2048, 2048), 327 (513, 2, 2), 328 (513, 8, 8), 329 (513, 256, 256), 330 (4, 513, 2, 2), 331 ] 332 333 def _GenerateMatrix(self, shape): 334 batch_shape = shape[:-2] 335 shape = shape[-2:] 336 assert shape[0] == shape[1] 337 n = shape[0] 338 matrix = np.ones(shape).astype(np.float32) / ( 339 2.0 * n) + np.diag(np.ones(n).astype(np.float32)) 340 return np.tile(matrix, batch_shape + (1, 1)) 341 342 def benchmarkCholeskyOp(self): 343 for shape in self.shapes: 344 with ops.Graph().as_default(), \ 345 session.Session(config=benchmark.benchmark_config()) as sess, \ 346 ops.device("/cpu:0"): 347 matrix = variables.Variable(self._GenerateMatrix(shape)) 348 l = linalg_ops.cholesky(matrix) 349 variables.global_variables_initializer().run() 350 self.run_op_benchmark( 351 sess, 352 control_flow_ops.group( 353 l,), 354 min_iters=25, 355 name="cholesky_cpu_{shape}".format(shape=shape)) 356 357 if test.is_gpu_available(True): 358 with ops.Graph().as_default(), \ 359 session.Session(config=benchmark.benchmark_config()) as sess, \ 360 ops.device("/device:GPU:0"): 361 matrix = variables.Variable(self._GenerateMatrix(shape)) 362 l = linalg_ops.cholesky(matrix) 363 variables.global_variables_initializer().run() 364 self.run_op_benchmark( 365 sess, 366 control_flow_ops.group( 367 l,), 368 min_iters=25, 369 name="cholesky_gpu_{shape}".format(shape=shape)) 370 371 def benchmarkGradVariants(self): 372 373 def _BenchmarkGrad(grad_fn, name, device): 374 for shape in self.shapes: 375 matrix = self._GenerateMatrix(shape) 376 with ops.Graph().as_default(), \ 377 session.Session(config=benchmark.benchmark_config()) as sess, \ 378 ops.device(device): 379 l = variables.Variable(np.linalg.cholesky(matrix)) 380 grad_matrix = variables.Variable( 381 np.random.randn(*matrix.shape).astype(np.float32)) 382 grad = grad_fn(l, grad_matrix) 383 variables.global_variables_initializer().run() 384 self.run_op_benchmark( 385 sess, 386 control_flow_ops.group( 387 grad,), 388 min_iters=25, 389 name="{name}_{dev}_{shape}".format( 390 name=name, dev=grad.device, shape=shape)) 391 392 if test.is_gpu_available(True): 393 _BenchmarkGrad(MatrixInverseCompositeGrad, "composite_matrix_inverse", 394 "/device:GPU:0") 395 _BenchmarkGrad(TriAngInvCompositeGrad, "composite_tri_ang_inverse", 396 "/device:GPU:0") 397 _BenchmarkGrad(TriAngSolveCompositeGrad, "composite_triangular_solve", 398 "/device:GPU:0") 399 400 _BenchmarkGrad(MatrixInverseCompositeGrad, "composite_matrix_inverse", 401 "/cpu:0") 402 _BenchmarkGrad(TriAngInvCompositeGrad, "composite_tri_ang_inverse", 403 "/cpu:0") 404 _BenchmarkGrad(TriAngSolveCompositeGrad, "composite_triangular_solve", 405 "/cpu:0") 406 _BenchmarkGrad(SpecializedGrad, "specialized", "/cpu:0") 407 408 409if __name__ == "__main__": 410 test.main() 411