1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Operations for linear algebra.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import numpy as np 22 23from tensorflow.python.framework import dtypes 24from tensorflow.python.framework import ops 25from tensorflow.python.ops import array_ops 26from tensorflow.python.ops import control_flow_ops 27from tensorflow.python.ops import gen_array_ops 28from tensorflow.python.ops import gen_linalg_ops 29from tensorflow.python.ops import linalg_ops_impl 30from tensorflow.python.ops import map_fn 31from tensorflow.python.ops import math_ops 32# pylint: disable=wildcard-import 33from tensorflow.python.ops.gen_linalg_ops import * 34# pylint: enable=wildcard-import 35from tensorflow.python.util import deprecation 36from tensorflow.python.util import dispatch 37from tensorflow.python.util.tf_export import tf_export 38 39# Names below are lower_case. 40# pylint: disable=invalid-name 41 42 43def _RegularizedGramianCholesky(matrix, l2_regularizer, first_kind): 44 r"""Computes Cholesky factorization of regularized gramian matrix. 45 46 Below we will use the following notation for each pair of matrix and 47 right-hand sides in the batch: 48 49 `matrix`=\\(A \in \Re^{m \times n}\\), 50 `output`=\\(C \in \Re^{\min(m, n) \times \min(m,n)}\\), 51 `l2_regularizer`=\\(\lambda\\). 52 53 If `first_kind` is True, returns the Cholesky factorization \\(L\\) such that 54 \\(L L^H = A^H A + \lambda I\\). 55 If `first_kind` is False, returns the Cholesky factorization \\(L\\) such that 56 \\(L L^H = A A^H + \lambda I\\). 57 58 Args: 59 matrix: `Tensor` of shape `[..., M, N]`. 60 l2_regularizer: 0-D `double` `Tensor`. Ignored if `fast=False`. 61 first_kind: bool. Controls what gramian matrix to factor. 62 Returns: 63 output: `Tensor` of shape `[..., min(M,N), min(M,N)]` whose inner-most 2 64 dimensions contain the Cholesky factors \\(L\\) described above. 65 """ 66 67 gramian = math_ops.matmul( 68 matrix, matrix, adjoint_a=first_kind, adjoint_b=not first_kind) 69 if isinstance(l2_regularizer, ops.Tensor) or l2_regularizer != 0: 70 matrix_shape = array_ops.shape(matrix) 71 batch_shape = matrix_shape[:-2] 72 if first_kind: 73 small_dim = matrix_shape[-1] 74 else: 75 small_dim = matrix_shape[-2] 76 identity = eye(small_dim, batch_shape=batch_shape, dtype=matrix.dtype) 77 small_dim_static = matrix.shape[-1 if first_kind else -2] 78 identity.set_shape( 79 matrix.shape[:-2].concatenate([small_dim_static, small_dim_static])) 80 gramian += l2_regularizer * identity 81 return gen_linalg_ops.cholesky(gramian) 82 83 84@tf_export( 85 'linalg.triangular_solve', 86 v1=['linalg.triangular_solve', 'matrix_triangular_solve']) 87@dispatch.add_dispatch_support 88def matrix_triangular_solve(matrix, rhs, lower=True, adjoint=False, name=None): 89 """Solve systems of linear equations with upper or lower triangular matrices. 90 91 `matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions form 92 square matrices. If `lower` is `True` then the strictly upper triangular part 93 of each inner-most matrix is assumed to be zero and not accessed. If `lower` 94 is `False` then the strictly lower triangular part of each inner-most matrix 95 is assumed to be zero and not accessed. `rhs` is a tensor of shape 96 `[..., M, N]`. 97 98 The output is a tensor of shape `[..., M, N]`. If `adjoint` is `True` then the 99 innermost matrices in output satisfy matrix equations ` 100 sum_k matrix[..., i, k] * output[..., k, j] = rhs[..., i, j]`. 101 If `adjoint` is `False` then the 102 innermost matrices in output satisfy matrix equations 103 `sum_k adjoint(matrix[..., i, k]) * output[..., k, j] = rhs[..., i, j]`. 104 105 Example: 106 107 >>> a = tf.constant([[3, 0, 0, 0], 108 ... [2, 1, 0, 0], 109 ... [1, 0, 1, 0], 110 ... [1, 1, 1, 1]], dtype=tf.float32) 111 112 >>> b = tf.constant([[4], [2], [4], [2]], dtype=tf.float32) 113 >>> x = tf.linalg.triangular_solve(a, b, lower=True) 114 >>> x 115 <tf.Tensor: shape=(4, 1), dtype=float32, numpy= 116 array([[ 1.3333334 ], 117 [-0.66666675], 118 [ 2.6666665 ], 119 [-1.3333331 ]], dtype=float32)> 120 >>> tf.matmul(a, x) 121 <tf.Tensor: shape=(4, 1), dtype=float32, numpy= 122 array([[4.], 123 [2.], 124 [4.], 125 [2.]], dtype=float32)> 126 127 Args: 128 matrix: A `Tensor`. Must be one of the following types: `float64`, 129 `float32`, `half`, `complex64`, `complex128`. Shape is `[..., M, M]`. 130 rhs: A `Tensor`. Must have the same type as `matrix`. Shape is `[..., M, 131 N]`. 132 lower: An optional `bool`. Defaults to `True`. Boolean indicating whether 133 the innermost matrices in matrix are lower or upper triangular. 134 adjoint: An optional `bool`. Defaults to `False`. Boolean indicating whether 135 to solve with matrix or its (block-wise) adjoint. 136 name: A name for the operation (optional). 137 138 Returns: 139 A `Tensor`. Has the same type as matrix, and shape is `[..., M, N]`. 140 141 """ 142 with ops.name_scope(name, 'triangular_solve', [matrix, rhs]): 143 return gen_linalg_ops.matrix_triangular_solve( 144 matrix, rhs, lower=lower, adjoint=adjoint) 145 146 147@tf_export( 148 'linalg.cholesky_solve', v1=['linalg.cholesky_solve', 'cholesky_solve']) 149@dispatch.add_dispatch_support 150@deprecation.deprecated_endpoints('cholesky_solve') 151def cholesky_solve(chol, rhs, name=None): 152 """Solves systems of linear eqns `A X = RHS`, given Cholesky factorizations. 153 154 Specifically, returns `X` from `A X = RHS`, where `A = L L^T`, `L` is the 155 `chol` arg and `RHS` is the `rhs` arg. 156 157 ```python 158 # Solve 10 separate 2x2 linear systems: 159 A = ... # shape 10 x 2 x 2 160 RHS = ... # shape 10 x 2 x 1 161 chol = tf.linalg.cholesky(A) # shape 10 x 2 x 2 162 X = tf.linalg.cholesky_solve(chol, RHS) # shape 10 x 2 x 1 163 # tf.matmul(A, X) ~ RHS 164 X[3, :, 0] # Solution to the linear system A[3, :, :] x = RHS[3, :, 0] 165 166 # Solve five linear systems (K = 5) for every member of the length 10 batch. 167 A = ... # shape 10 x 2 x 2 168 RHS = ... # shape 10 x 2 x 5 169 ... 170 X[3, :, 2] # Solution to the linear system A[3, :, :] x = RHS[3, :, 2] 171 ``` 172 173 Args: 174 chol: A `Tensor`. Must be `float32` or `float64`, shape is `[..., M, M]`. 175 Cholesky factorization of `A`, e.g. `chol = tf.linalg.cholesky(A)`. 176 For that reason, only the lower triangular parts (including the diagonal) 177 of the last two dimensions of `chol` are used. The strictly upper part is 178 assumed to be zero and not accessed. 179 rhs: A `Tensor`, same type as `chol`, shape is `[..., M, K]`. 180 name: A name to give this `Op`. Defaults to `cholesky_solve`. 181 182 Returns: 183 Solution to `A x = rhs`, shape `[..., M, K]`. 184 """ 185 # To solve C C^* x = rhs, we 186 # 1. Solve C y = rhs for y, thus y = C^* x 187 # 2. Solve C^* x = y for x 188 with ops.name_scope(name, 'cholesky_solve', [chol, rhs]): 189 y = gen_linalg_ops.matrix_triangular_solve( 190 chol, rhs, adjoint=False, lower=True) 191 x = gen_linalg_ops.matrix_triangular_solve( 192 chol, y, adjoint=True, lower=True) 193 return x 194 195 196@tf_export('eye', 'linalg.eye') 197@dispatch.add_dispatch_support 198def eye(num_rows, 199 num_columns=None, 200 batch_shape=None, 201 dtype=dtypes.float32, 202 name=None): 203 """Construct an identity matrix, or a batch of matrices. 204 205 See also `tf.ones`, `tf.zeros`, `tf.fill`, `tf.one_hot`. 206 207 ```python 208 # Construct one identity matrix. 209 tf.eye(2) 210 ==> [[1., 0.], 211 [0., 1.]] 212 213 # Construct a batch of 3 identity matrices, each 2 x 2. 214 # batch_identity[i, :, :] is a 2 x 2 identity matrix, i = 0, 1, 2. 215 batch_identity = tf.eye(2, batch_shape=[3]) 216 217 # Construct one 2 x 3 "identity" matrix 218 tf.eye(2, num_columns=3) 219 ==> [[ 1., 0., 0.], 220 [ 0., 1., 0.]] 221 ``` 222 223 Args: 224 num_rows: Non-negative `int32` scalar `Tensor` giving the number of rows 225 in each batch matrix. 226 num_columns: Optional non-negative `int32` scalar `Tensor` giving the number 227 of columns in each batch matrix. Defaults to `num_rows`. 228 batch_shape: A list or tuple of Python integers or a 1-D `int32` `Tensor`. 229 If provided, the returned `Tensor` will have leading batch dimensions of 230 this shape. 231 dtype: The type of an element in the resulting `Tensor` 232 name: A name for this `Op`. Defaults to "eye". 233 234 Returns: 235 A `Tensor` of shape `batch_shape + [num_rows, num_columns]` 236 """ 237 return linalg_ops_impl.eye(num_rows, 238 num_columns=num_columns, 239 batch_shape=batch_shape, 240 dtype=dtype, 241 name=name) 242 243 244@tf_export('linalg.lstsq', v1=['linalg.lstsq', 'matrix_solve_ls']) 245@dispatch.add_dispatch_support 246@deprecation.deprecated_endpoints('matrix_solve_ls') 247def matrix_solve_ls(matrix, rhs, l2_regularizer=0.0, fast=True, name=None): 248 r"""Solves one or more linear least-squares problems. 249 250 `matrix` is a tensor of shape `[..., M, N]` whose inner-most 2 dimensions 251 form `M`-by-`N` matrices. Rhs is a tensor of shape `[..., M, K]` whose 252 inner-most 2 dimensions form `M`-by-`K` matrices. The computed output is a 253 `Tensor` of shape `[..., N, K]` whose inner-most 2 dimensions form `M`-by-`K` 254 matrices that solve the equations 255 `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]` in the least squares 256 sense. 257 258 Below we will use the following notation for each pair of matrix and 259 right-hand sides in the batch: 260 261 `matrix`=\\(A \in \Re^{m \times n}\\), 262 `rhs`=\\(B \in \Re^{m \times k}\\), 263 `output`=\\(X \in \Re^{n \times k}\\), 264 `l2_regularizer`=\\(\lambda\\). 265 266 If `fast` is `True`, then the solution is computed by solving the normal 267 equations using Cholesky decomposition. Specifically, if \\(m \ge n\\) then 268 \\(X = (A^T A + \lambda I)^{-1} A^T B\\), which solves the least-squares 269 problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k}} ||A Z - B||_F^2 + 270 \lambda ||Z||_F^2\\). If \\(m \lt n\\) then `output` is computed as 271 \\(X = A^T (A A^T + \lambda I)^{-1} B\\), which (for \\(\lambda = 0\\)) is 272 the minimum-norm solution to the under-determined linear system, i.e. 273 \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k}} ||Z||_F^2 \\), subject to 274 \\(A Z = B\\). Notice that the fast path is only numerically stable when 275 \\(A\\) is numerically full rank and has a condition number 276 \\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach}}}\\) or\\(\lambda\\) 277 is sufficiently large. 278 279 If `fast` is `False` an algorithm based on the numerically robust complete 280 orthogonal decomposition is used. This computes the minimum-norm 281 least-squares solution, even when \\(A\\) is rank deficient. This path is 282 typically 6-7 times slower than the fast path. If `fast` is `False` then 283 `l2_regularizer` is ignored. 284 285 Args: 286 matrix: `Tensor` of shape `[..., M, N]`. 287 rhs: `Tensor` of shape `[..., M, K]`. 288 l2_regularizer: 0-D `double` `Tensor`. Ignored if `fast=False`. 289 fast: bool. Defaults to `True`. 290 name: string, optional name of the operation. 291 292 Returns: 293 output: `Tensor` of shape `[..., N, K]` whose inner-most 2 dimensions form 294 `M`-by-`K` matrices that solve the equations 295 `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]` in the least 296 squares sense. 297 298 Raises: 299 NotImplementedError: linalg.lstsq is currently disabled for complex128 300 and l2_regularizer != 0 due to poor accuracy. 301 """ 302 303 # pylint: disable=long-lambda 304 def _use_composite_impl(fast, tensor_shape): 305 """Determines whether to use the composite or specialized CPU kernel. 306 307 When the total size of the tensor is larger than the cache size and the 308 batch size is large compared to the smallest matrix dimension, then the 309 composite implementation is inefficient since it has to read the entire 310 tensor from memory multiple times. In this case we fall back to the 311 original CPU kernel, which does all the computational steps on each 312 matrix separately. 313 314 Only fast mode is supported by the composite impl, so `False` is returned 315 if `fast` is `False`. 316 317 Args: 318 fast: bool indicating if fast mode in the solver was requested. 319 tensor_shape: The shape of the tensor. 320 321 Returns: 322 True if the composite impl should be used. False otherwise. 323 """ 324 if fast is False: 325 return False 326 batch_shape = tensor_shape[:-2] 327 matrix_shape = tensor_shape[-2:] 328 if not tensor_shape.is_fully_defined(): 329 return True 330 tensor_size = tensor_shape.num_elements() * matrix.dtype.size 331 is_io_bound = batch_shape.num_elements() > np.min(matrix_shape) 332 L2_CACHE_SIZE_GUESSTIMATE = 256000 333 if tensor_size > L2_CACHE_SIZE_GUESSTIMATE and is_io_bound: 334 return False 335 else: 336 return True 337 338 def _overdetermined(matrix, rhs, l2_regularizer): 339 """Computes (A^H*A + l2_regularizer)^{-1} * A^H * rhs.""" 340 chol = _RegularizedGramianCholesky( 341 matrix, l2_regularizer=l2_regularizer, first_kind=True) 342 return cholesky_solve(chol, math_ops.matmul(matrix, rhs, adjoint_a=True)) 343 344 def _underdetermined(matrix, rhs, l2_regularizer): 345 """Computes A^H * (A*A^H + l2_regularizer)^{-1} * rhs.""" 346 chol = _RegularizedGramianCholesky( 347 matrix, l2_regularizer=l2_regularizer, first_kind=False) 348 return math_ops.matmul(matrix, cholesky_solve(chol, rhs), adjoint_a=True) 349 350 def _composite_impl(matrix, rhs, l2_regularizer): 351 """Composite implementation of matrix_solve_ls that supports GPU.""" 352 with ops.name_scope(name, 'matrix_solve_ls', [matrix, rhs, l2_regularizer]): 353 matrix_shape = matrix.get_shape()[-2:] 354 if matrix_shape.is_fully_defined(): 355 if matrix_shape[-2] >= matrix_shape[-1]: 356 return _overdetermined(matrix, rhs, l2_regularizer) 357 else: 358 return _underdetermined(matrix, rhs, l2_regularizer) 359 else: 360 # We have to defer determining the shape to runtime and use 361 # conditional execution of the appropriate graph. 362 matrix_shape = array_ops.shape(matrix)[-2:] 363 return control_flow_ops.cond( 364 matrix_shape[-2] >= matrix_shape[-1], 365 lambda: _overdetermined(matrix, rhs, l2_regularizer), 366 lambda: _underdetermined(matrix, rhs, l2_regularizer)) 367 368 matrix = ops.convert_to_tensor(matrix, name='matrix') 369 if matrix.dtype == dtypes.complex128 and l2_regularizer != 0: 370 # TODO(rmlarsen): Investigate and fix accuracy bug. 371 raise NotImplementedError('matrix_solve_ls is currently disabled for ' 372 'complex128 and l2_regularizer != 0 due to ' 373 'poor accuracy.') 374 tensor_shape = matrix.get_shape() 375 if _use_composite_impl(fast, tensor_shape): 376 return _composite_impl(matrix, rhs, l2_regularizer) 377 else: 378 return gen_linalg_ops.matrix_solve_ls( 379 matrix, rhs, l2_regularizer, fast=fast, name=name) 380 381 382@tf_export('linalg.eig', 'eig', v1=[]) 383@dispatch.add_dispatch_support 384def eig(tensor, name=None): 385 """Computes the eigen decomposition of a batch of matrices. 386 387 The eigenvalues 388 and eigenvectors for a non-Hermitian matrix in general are complex. The 389 eigenvectors are not guaranteed to be linearly independent. 390 391 Computes the eigenvalues and right eigenvectors of the innermost 392 N-by-N matrices in `tensor` such that 393 `tensor[...,:,:] * v[..., :,i] = e[..., i] * v[...,:,i]`, for i=0...N-1. 394 395 Args: 396 tensor: `Tensor` of shape `[..., N, N]`. Only the lower triangular part of 397 each inner inner matrix is referenced. 398 name: string, optional name of the operation. 399 400 Returns: 401 e: Eigenvalues. Shape is `[..., N]`. Sorted in non-decreasing order. 402 v: Eigenvectors. Shape is `[..., N, N]`. The columns of the inner most 403 matrices contain eigenvectors of the corresponding matrices in `tensor` 404 """ 405 if tensor.dtype == dtypes.float32 or tensor.dtype == dtypes.complex64: 406 out_dtype = dtypes.complex64 407 elif tensor.dtype == dtypes.float64 or tensor.dtype == dtypes.complex128: 408 out_dtype = dtypes.complex128 409 e, v = gen_linalg_ops.eig(tensor, Tout=out_dtype, compute_v=True, name=name) 410 return e, v 411 412 413@tf_export('linalg.eigvals', 'eigvals', v1=[]) 414@dispatch.add_dispatch_support 415def eigvals(tensor, name=None): 416 """Computes the eigenvalues of one or more matrices. 417 418 Note: If your program backpropagates through this function, you should replace 419 it with a call to tf.linalg.eig (possibly ignoring the second output) to 420 avoid computing the eigen decomposition twice. This is because the 421 eigenvectors are used to compute the gradient w.r.t. the eigenvalues. See 422 _SelfAdjointEigV2Grad in linalg_grad.py. 423 424 Args: 425 tensor: `Tensor` of shape `[..., N, N]`. 426 name: string, optional name of the operation. 427 428 Returns: 429 e: Eigenvalues. Shape is `[..., N]`. The vector `e[..., :]` contains the `N` 430 eigenvalues of `tensor[..., :, :]`. 431 """ 432 if tensor.dtype == dtypes.float32 or tensor.dtype == dtypes.complex64: 433 out_dtype = dtypes.complex64 434 elif tensor.dtype == dtypes.float64 or tensor.dtype == dtypes.complex128: 435 out_dtype = dtypes.complex128 436 e, _ = gen_linalg_ops.eig(tensor, Tout=out_dtype, compute_v=False, name=name) 437 return e 438 439 440@tf_export('linalg.eigh', v1=['linalg.eigh', 'self_adjoint_eig']) 441@dispatch.add_dispatch_support 442@deprecation.deprecated_endpoints('self_adjoint_eig') 443def self_adjoint_eig(tensor, name=None): 444 """Computes the eigen decomposition of a batch of self-adjoint matrices. 445 446 Computes the eigenvalues and eigenvectors of the innermost N-by-N matrices 447 in `tensor` such that 448 `tensor[...,:,:] * v[..., :,i] = e[..., i] * v[...,:,i]`, for i=0...N-1. 449 450 Args: 451 tensor: `Tensor` of shape `[..., N, N]`. Only the lower triangular part of 452 each inner inner matrix is referenced. 453 name: string, optional name of the operation. 454 455 Returns: 456 e: Eigenvalues. Shape is `[..., N]`. Sorted in non-decreasing order. 457 v: Eigenvectors. Shape is `[..., N, N]`. The columns of the inner most 458 matrices contain eigenvectors of the corresponding matrices in `tensor` 459 """ 460 e, v = gen_linalg_ops.self_adjoint_eig_v2(tensor, compute_v=True, name=name) 461 return e, v 462 463 464@tf_export('linalg.eigvalsh', v1=['linalg.eigvalsh', 'self_adjoint_eigvals']) 465@dispatch.add_dispatch_support 466@deprecation.deprecated_endpoints('self_adjoint_eigvals') 467def self_adjoint_eigvals(tensor, name=None): 468 """Computes the eigenvalues of one or more self-adjoint matrices. 469 470 Note: If your program backpropagates through this function, you should replace 471 it with a call to tf.linalg.eigh (possibly ignoring the second output) to 472 avoid computing the eigen decomposition twice. This is because the 473 eigenvectors are used to compute the gradient w.r.t. the eigenvalues. See 474 _SelfAdjointEigV2Grad in linalg_grad.py. 475 476 Args: 477 tensor: `Tensor` of shape `[..., N, N]`. 478 name: string, optional name of the operation. 479 480 Returns: 481 e: Eigenvalues. Shape is `[..., N]`. The vector `e[..., :]` contains the `N` 482 eigenvalues of `tensor[..., :, :]`. 483 """ 484 e, _ = gen_linalg_ops.self_adjoint_eig_v2(tensor, compute_v=False, name=name) 485 return e 486 487 488@tf_export('linalg.svd', v1=['linalg.svd', 'svd']) 489@dispatch.add_dispatch_support 490@deprecation.deprecated_endpoints('svd') 491def svd(tensor, full_matrices=False, compute_uv=True, name=None): 492 r"""Computes the singular value decompositions of one or more matrices. 493 494 Computes the SVD of each inner matrix in `tensor` such that 495 `tensor[..., :, :] = u[..., :, :] * diag(s[..., :, :]) * 496 transpose(conj(v[..., :, :]))` 497 498 ```python 499 # a is a tensor. 500 # s is a tensor of singular values. 501 # u is a tensor of left singular vectors. 502 # v is a tensor of right singular vectors. 503 s, u, v = svd(a) 504 s = svd(a, compute_uv=False) 505 ``` 506 507 Args: 508 tensor: `Tensor` of shape `[..., M, N]`. Let `P` be the minimum of `M` and 509 `N`. 510 full_matrices: If true, compute full-sized `u` and `v`. If false 511 (the default), compute only the leading `P` singular vectors. 512 Ignored if `compute_uv` is `False`. 513 compute_uv: If `True` then left and right singular vectors will be 514 computed and returned in `u` and `v`, respectively. Otherwise, only the 515 singular values will be computed, which can be significantly faster. 516 name: string, optional name of the operation. 517 518 Returns: 519 s: Singular values. Shape is `[..., P]`. The values are sorted in reverse 520 order of magnitude, so s[..., 0] is the largest value, s[..., 1] is the 521 second largest, etc. 522 u: Left singular vectors. If `full_matrices` is `False` (default) then 523 shape is `[..., M, P]`; if `full_matrices` is `True` then shape is 524 `[..., M, M]`. Not returned if `compute_uv` is `False`. 525 v: Right singular vectors. If `full_matrices` is `False` (default) then 526 shape is `[..., N, P]`. If `full_matrices` is `True` then shape is 527 `[..., N, N]`. Not returned if `compute_uv` is `False`. 528 529 @compatibility(numpy) 530 Mostly equivalent to numpy.linalg.svd, except that 531 * The order of output arguments here is `s`, `u`, `v` when `compute_uv` is 532 `True`, as opposed to `u`, `s`, `v` for numpy.linalg.svd. 533 * full_matrices is `False` by default as opposed to `True` for 534 numpy.linalg.svd. 535 * tf.linalg.svd uses the standard definition of the SVD 536 \\(A = U \Sigma V^H\\), such that the left singular vectors of `a` are 537 the columns of `u`, while the right singular vectors of `a` are the 538 columns of `v`. On the other hand, numpy.linalg.svd returns the adjoint 539 \\(V^H\\) as the third output argument. 540 ```python 541 import tensorflow as tf 542 import numpy as np 543 s, u, v = tf.linalg.svd(a) 544 tf_a_approx = tf.matmul(u, tf.matmul(tf.linalg.diag(s), v, adjoint_b=True)) 545 u, s, v_adj = np.linalg.svd(a, full_matrices=False) 546 np_a_approx = np.dot(u, np.dot(np.diag(s), v_adj)) 547 # tf_a_approx and np_a_approx should be numerically close. 548 ``` 549 @end_compatibility 550 """ 551 s, u, v = gen_linalg_ops.svd( 552 tensor, compute_uv=compute_uv, full_matrices=full_matrices, name=name) 553 if compute_uv: 554 return math_ops.real(s), u, v 555 else: 556 return math_ops.real(s) 557 558 559# pylint: disable=redefined-builtin 560@tf_export('norm', 'linalg.norm', v1=[]) 561@dispatch.add_dispatch_support 562def norm_v2(tensor, 563 ord='euclidean', 564 axis=None, 565 keepdims=None, 566 name=None): 567 r"""Computes the norm of vectors, matrices, and tensors. 568 569 This function can compute several different vector norms (the 1-norm, the 570 Euclidean or 2-norm, the inf-norm, and in general the p-norm for p > 0) and 571 matrix norms (Frobenius, 1-norm, 2-norm and inf-norm). 572 573 Args: 574 tensor: `Tensor` of types `float32`, `float64`, `complex64`, `complex128` 575 ord: Order of the norm. Supported values are `'fro'`, `'euclidean'`, 576 `1`, `2`, `np.inf` and any positive real number yielding the corresponding 577 p-norm. Default is `'euclidean'` which is equivalent to Frobenius norm if 578 `tensor` is a matrix and equivalent to 2-norm for vectors. 579 Some restrictions apply: 580 a) The Frobenius norm `'fro'` is not defined for vectors, 581 b) If axis is a 2-tuple (matrix norm), only `'euclidean'`, '`fro'`, `1`, 582 `2`, `np.inf` are supported. 583 See the description of `axis` on how to compute norms for a batch of 584 vectors or matrices stored in a tensor. 585 axis: If `axis` is `None` (the default), the input is considered a vector 586 and a single vector norm is computed over the entire set of values in the 587 tensor, i.e. `norm(tensor, ord=ord)` is equivalent to 588 `norm(reshape(tensor, [-1]), ord=ord)`. 589 If `axis` is a Python integer, the input is considered a batch of vectors, 590 and `axis` determines the axis in `tensor` over which to compute vector 591 norms. 592 If `axis` is a 2-tuple of Python integers it is considered a batch of 593 matrices and `axis` determines the axes in `tensor` over which to compute 594 a matrix norm. 595 Negative indices are supported. Example: If you are passing a tensor that 596 can be either a matrix or a batch of matrices at runtime, pass 597 `axis=[-2,-1]` instead of `axis=None` to make sure that matrix norms are 598 computed. 599 keepdims: If True, the axis indicated in `axis` are kept with size 1. 600 Otherwise, the dimensions in `axis` are removed from the output shape. 601 name: The name of the op. 602 603 Returns: 604 output: A `Tensor` of the same type as tensor, containing the vector or 605 matrix norms. If `keepdims` is True then the rank of output is equal to 606 the rank of `tensor`. Otherwise, if `axis` is none the output is a scalar, 607 if `axis` is an integer, the rank of `output` is one less than the rank 608 of `tensor`, if `axis` is a 2-tuple the rank of `output` is two less 609 than the rank of `tensor`. 610 611 Raises: 612 ValueError: If `ord` or `axis` is invalid. 613 614 @compatibility(numpy) 615 Mostly equivalent to numpy.linalg.norm. 616 Not supported: ord <= 0, 2-norm for matrices, nuclear norm. 617 Other differences: 618 a) If axis is `None`, treats the flattened `tensor` as a vector 619 regardless of rank. 620 b) Explicitly supports 'euclidean' norm as the default, including for 621 higher order tensors. 622 @end_compatibility 623 """ 624 return norm(tensor=tensor, 625 ord=ord, 626 axis=axis, 627 keepdims=keepdims, 628 name=name) 629 630 631# pylint: disable=redefined-builtin 632@tf_export(v1=['norm', 'linalg.norm']) 633@dispatch.add_dispatch_support 634@deprecation.deprecated_args( 635 None, 'keep_dims is deprecated, use keepdims instead', 'keep_dims') 636def norm(tensor, 637 ord='euclidean', 638 axis=None, 639 keepdims=None, 640 name=None, 641 keep_dims=None): 642 r"""Computes the norm of vectors, matrices, and tensors. 643 644 This function can compute several different vector norms (the 1-norm, the 645 Euclidean or 2-norm, the inf-norm, and in general the p-norm for p > 0) and 646 matrix norms (Frobenius, 1-norm, 2-norm and inf-norm). 647 648 Args: 649 tensor: `Tensor` of types `float32`, `float64`, `complex64`, `complex128` 650 ord: Order of the norm. Supported values are 'fro', 'euclidean', 651 `1`, `2`, `np.inf` and any positive real number yielding the corresponding 652 p-norm. Default is 'euclidean' which is equivalent to Frobenius norm if 653 `tensor` is a matrix and equivalent to 2-norm for vectors. 654 Some restrictions apply: 655 a) The Frobenius norm `fro` is not defined for vectors, 656 b) If axis is a 2-tuple (matrix norm), only 'euclidean', 'fro', `1`, 657 `2`, `np.inf` are supported. 658 See the description of `axis` on how to compute norms for a batch of 659 vectors or matrices stored in a tensor. 660 axis: If `axis` is `None` (the default), the input is considered a vector 661 and a single vector norm is computed over the entire set of values in the 662 tensor, i.e. `norm(tensor, ord=ord)` is equivalent to 663 `norm(reshape(tensor, [-1]), ord=ord)`. 664 If `axis` is a Python integer, the input is considered a batch of vectors, 665 and `axis` determines the axis in `tensor` over which to compute vector 666 norms. 667 If `axis` is a 2-tuple of Python integers it is considered a batch of 668 matrices and `axis` determines the axes in `tensor` over which to compute 669 a matrix norm. 670 Negative indices are supported. Example: If you are passing a tensor that 671 can be either a matrix or a batch of matrices at runtime, pass 672 `axis=[-2,-1]` instead of `axis=None` to make sure that matrix norms are 673 computed. 674 keepdims: If True, the axis indicated in `axis` are kept with size 1. 675 Otherwise, the dimensions in `axis` are removed from the output shape. 676 name: The name of the op. 677 keep_dims: Deprecated alias for `keepdims`. 678 679 Returns: 680 output: A `Tensor` of the same type as tensor, containing the vector or 681 matrix norms. If `keepdims` is True then the rank of output is equal to 682 the rank of `tensor`. Otherwise, if `axis` is none the output is a scalar, 683 if `axis` is an integer, the rank of `output` is one less than the rank 684 of `tensor`, if `axis` is a 2-tuple the rank of `output` is two less 685 than the rank of `tensor`. 686 687 Raises: 688 ValueError: If `ord` or `axis` is invalid. 689 690 @compatibility(numpy) 691 Mostly equivalent to numpy.linalg.norm. 692 Not supported: ord <= 0, 2-norm for matrices, nuclear norm. 693 Other differences: 694 a) If axis is `None`, treats the flattened `tensor` as a vector 695 regardless of rank. 696 b) Explicitly supports 'euclidean' norm as the default, including for 697 higher order tensors. 698 @end_compatibility 699 """ 700 keepdims = deprecation.deprecated_argument_lookup('keepdims', keepdims, 701 'keep_dims', keep_dims) 702 if keepdims is None: 703 keepdims = False 704 705 is_matrix_norm = ((isinstance(axis, tuple) or isinstance(axis, list)) and 706 len(axis) == 2) 707 if is_matrix_norm: 708 axis = tuple(axis) 709 if (not isinstance(axis[0], int) or not isinstance(axis[1], int) or 710 axis[0] == axis[1]): 711 raise ValueError( 712 "'axis' must be None, an integer, or a tuple of 2 unique integers") 713 supported_matrix_norms = ['euclidean', 'fro', 1, 2, np.inf] 714 if ord not in supported_matrix_norms: 715 raise ValueError("'ord' must be a supported matrix norm in %s, got %s" % 716 (supported_matrix_norms, ord)) 717 else: 718 if not (isinstance(axis, int) or axis is None): 719 raise ValueError( 720 "'axis' must be None, an integer, or a tuple of 2 unique integers") 721 722 supported_vector_norms = ['euclidean', 1, 2, np.inf] 723 if (not np.isreal(ord) or ord <= 0) and ord not in supported_vector_norms: 724 raise ValueError("'ord' must be a supported vector norm, got %s" % ord) 725 if axis is not None: 726 axis = (axis,) 727 728 with ops.name_scope(name, 'norm', [tensor]): 729 tensor = ops.convert_to_tensor(tensor) 730 731 if ord in ['fro', 'euclidean', 2, 2.0]: 732 if is_matrix_norm and ord in [2, 2.0]: 733 rank = array_ops.rank(tensor) 734 positive_axis = map_fn.map_fn( 735 lambda i: control_flow_ops.cond(i >= 0, lambda: i, lambda: i + rank), 736 ops.convert_to_tensor(axis)) 737 axes = math_ops.range(rank) 738 perm_before = array_ops.concat([ 739 gen_array_ops.list_diff(axes, positive_axis, dtypes.int32)[0], 740 positive_axis 741 ], 742 axis=0) 743 perm_after = map_fn.map_fn( 744 lambda i: math_ops.cast( 745 array_ops.squeeze( 746 array_ops.where_v2(math_ops.equal(perm_before, i))), 747 dtype=dtypes.int32), axes) 748 permed = array_ops.transpose(tensor, perm=perm_before) 749 matrix_2_norm = array_ops.expand_dims( 750 math_ops.reduce_max( 751 math_ops.abs(gen_linalg_ops.svd(permed, compute_uv=False)[0]), 752 axis=-1, 753 keepdims=True), 754 axis=-1) 755 result = array_ops.transpose(matrix_2_norm, perm=perm_after) 756 else: 757 result = math_ops.sqrt( 758 math_ops.reduce_sum( 759 tensor * math_ops.conj(tensor), axis, keepdims=True)) 760 # TODO(rmlarsen): Replace with the following, once gradients are defined 761 # result = math_ops.reduce_euclidean_norm(tensor, axis, keepdims=True) 762 else: 763 result = math_ops.abs(tensor) 764 if ord == 1: 765 sum_axis = None if axis is None else axis[0] 766 result = math_ops.reduce_sum(result, sum_axis, keepdims=True) 767 if is_matrix_norm: 768 result = math_ops.reduce_max(result, axis[-1], keepdims=True) 769 elif ord == np.inf: 770 if is_matrix_norm: 771 result = math_ops.reduce_sum(result, axis[1], keepdims=True) 772 max_axis = None if axis is None else axis[0] 773 result = math_ops.reduce_max(result, max_axis, keepdims=True) 774 else: 775 # General p-norms (positive p only) 776 result = math_ops.pow( 777 math_ops.reduce_sum(math_ops.pow(result, ord), axis, keepdims=True), 778 1.0 / ord) 779 if not keepdims: 780 result = array_ops.squeeze(result, axis) 781 return result 782 783 784# pylint: enable=invalid-name,redefined-builtin 785