1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Tests for sparse_ops.sparse_tensor_dense_matmul.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import sys 22import time 23 24import numpy as np 25 26from tensorflow.core.protobuf import config_pb2 27from tensorflow.python.client import session 28from tensorflow.python.framework import constant_op 29from tensorflow.python.framework import dtypes 30from tensorflow.python.framework import ops 31from tensorflow.python.framework import sparse_tensor 32from tensorflow.python.framework import tensor_shape 33from tensorflow.python.framework import test_util 34from tensorflow.python.ops import array_ops 35from tensorflow.python.ops import control_flow_ops 36from tensorflow.python.ops import math_ops 37from tensorflow.python.ops import sparse_ops 38from tensorflow.python.platform import app 39from tensorflow.python.platform import test 40 41 42def _maybe_complex(x): 43 if x.dtype.kind == "c": # complex 44 return (x + 1j * x) / 2 45 return x 46 47 48class SparseTensorDenseMatMulTest(test.TestCase): 49 50 def _testMatmul(self, 51 x, 52 y, 53 adjoint_a=False, 54 adjoint_b=False, 55 indices_dtype=np.int64): 56 x_mat = np.matrix(x) 57 if adjoint_a: 58 x_mat = x_mat.H 59 y_mat = np.matrix(y) 60 if adjoint_b: 61 y_mat = y_mat.H 62 63 np_ans = x_mat * y_mat 64 65 x_indices = np.vstack(np.where(x)).astype(indices_dtype).T 66 x_values = x[np.where(x)] 67 x_shape = x.shape 68 69 with self.cached_session(use_gpu=True): 70 sp_x_value = sparse_tensor.SparseTensorValue( 71 indices=x_indices, values=x_values, dense_shape=x_shape) 72 tf_value_ans = sparse_ops.sparse_tensor_dense_matmul( 73 sp_x_value, y, adjoint_a=adjoint_a, adjoint_b=adjoint_b) 74 tf_tensor_ans = sparse_ops.sparse_tensor_dense_matmul( 75 sparse_tensor.SparseTensor.from_value(sp_x_value), 76 y, 77 adjoint_a=adjoint_a, 78 adjoint_b=adjoint_b) 79 80 # Ensure that the RHS shape is known at least. 81 self.assertEqual(tf_value_ans.get_shape()[1], np_ans.shape[1]) 82 self.assertEqual(tf_tensor_ans.get_shape()[1], np_ans.shape[1]) 83 84 for out in (self.evaluate(tf_value_ans), self.evaluate(tf_tensor_ans)): 85 if x.dtype == np.float32: 86 self.assertAllClose(np_ans, out, rtol=1e-4, atol=1e-4) 87 elif x.dtype == np.float64: 88 self.assertAllClose(np_ans, out, rtol=1e-6, atol=1e-6) 89 else: 90 self.assertAllClose(np_ans, out, rtol=1e-4, atol=1e-4) 91 92 def _testBasic(self, value_dtype, indices_dtype=np.int64): 93 x = _maybe_complex(np.random.rand(10, 10).astype(value_dtype)) 94 x[np.abs(x) < 0.5] = 0 # Make it sparse 95 96 y = _maybe_complex(np.random.randn(10, 20).astype(value_dtype)) 97 98 self._testMatmul(x, y, indices_dtype=indices_dtype) 99 100 @test_util.run_deprecated_v1 101 def testBasic(self): 102 np.random.seed(127) # Repeatable results 103 self._testBasic(np.int32) 104 self._testBasic(np.float32) 105 self._testBasic(np.float64) 106 self._testBasic(np.complex64) 107 self._testBasic(np.complex128) 108 self._testBasic(np.int32, indices_dtype=np.int32) 109 self._testBasic(np.float32, indices_dtype=np.int32) 110 111 @test_util.run_deprecated_v1 112 def testShapeInference(self): 113 x = np.random.rand(10, 10) 114 x[np.abs(x) < 0.5] = 0 # Make it sparse 115 y = np.random.randn(10, 20) 116 x_indices = np.vstack(np.where(x)).astype(np.int64).T 117 x_values = x[np.where(x)] 118 x_shape = x.shape 119 x_st = sparse_tensor.SparseTensor(x_indices, x_values, x_shape) 120 result = sparse_ops.sparse_tensor_dense_matmul(x_st, y) 121 self.assertEqual(result.get_shape(), (10, 20)) 122 123 x_shape_unknown = array_ops.placeholder(dtype=dtypes.int64, shape=None) 124 x_st_shape_unknown = sparse_tensor.SparseTensor(x_indices, x_values, 125 x_shape_unknown) 126 result_left_shape_unknown = sparse_ops.sparse_tensor_dense_matmul( 127 x_st_shape_unknown, y) 128 self.assertEqual(result_left_shape_unknown.get_shape().as_list(), 129 [None, 20]) 130 131 x_shape_inconsistent = [10, 15] 132 x_st_shape_inconsistent = sparse_tensor.SparseTensor(x_indices, x_values, 133 x_shape_inconsistent) 134 with self.assertRaisesRegexp(ValueError, "Dimensions must be equal"): 135 sparse_ops.sparse_tensor_dense_matmul(x_st_shape_inconsistent, y) 136 137 @test_util.deprecated_graph_mode_only 138 def testInvalidIndicesForSparseTensorDenseMatmul(self): 139 # Note: use_gpu=False because nice errors are only returned from CPU kernel. 140 with self.session(use_gpu=False): 141 indices = np.matrix([[1, 10]]).astype(np.int64) 142 values = np.array([10]).astype(np.float32) 143 shape = [3, 2] 144 sparse_t = sparse_tensor.SparseTensor(indices, values, shape) 145 146 # Test multiplying by both a small and large dense matrix, to hit 147 # both cases in the kernel. 148 dense_t = np.matrix([[1] * 5, [2] * 5], dtype=np.float32) 149 with self.assertRaisesOpError( 150 "k .10. from index.0,1. out of bounds .>=2."): 151 self.evaluate(sparse_ops.sparse_tensor_dense_matmul(sparse_t, dense_t)) 152 dense_t = np.matrix([[1] * 500, [2] * 500], dtype=np.float32) 153 with self.assertRaisesOpError( 154 "k .10. from index.0,1. out of bounds .>=2."): 155 self.evaluate(sparse_ops.sparse_tensor_dense_matmul(sparse_t, dense_t)) 156 157 # Repeat with adjoint_a, to get a different error. 158 dense_t = np.matrix([[1] * 5, [2] * 5, [3] * 5], dtype=np.float32) 159 with self.assertRaisesOpError( 160 "m .10. from index.0,1. out of bounds .>=2."): 161 self.evaluate( 162 sparse_ops.sparse_tensor_dense_matmul( 163 sparse_t, dense_t, adjoint_a=True)) 164 dense_t = np.matrix([[1] * 500, [2] * 500, [3] * 500], dtype=np.float32) 165 with self.assertRaisesOpError( 166 "m .10. from index.0,1. out of bounds .>=2."): 167 self.evaluate( 168 sparse_ops.sparse_tensor_dense_matmul( 169 sparse_t, dense_t, adjoint_a=True)) 170 171 def testInvalidIndicesForSparseTensorDenseMatmulOnGPU(self): 172 # Note: use_gpu=False because nice errors are only returned from CPU kerne 173 if not test.is_gpu_available(): 174 return 175 with self.session(use_gpu=True): 176 indices = np.array([[1, 10]]).astype(np.int64) 177 values = np.array([10]).astype(np.float32) 178 shape = [3, 2] 179 sparse_t = sparse_tensor.SparseTensor(indices, values, shape) 180 181 # Test multiplying by both a small and large dense matrix, to hit 182 # both cases in the kernel. 183 dense_t = np.matrix([[1] * 5, [2] * 5], dtype=np.float32) 184 expected_t = np.array([[0] * 5, [np.nan] * 5, [0] * 5], dtype=np.float32) 185 self.assertAllClose(expected_t, 186 sparse_ops.sparse_tensor_dense_matmul( 187 sparse_t, dense_t)) 188 dense_t = np.matrix([[1] * 500, [2] * 500], dtype=np.float32) 189 expected_t = np.array( 190 [[0] * 500, [np.nan] * 500, [0] * 500], dtype=np.float32) 191 self.assertAllClose(expected_t, 192 sparse_ops.sparse_tensor_dense_matmul( 193 sparse_t, dense_t)) 194 195 # Repeat with adjoint_a, now the error is that the sparse index 196 # is OOO w.r.t. the output. The GPU kernel can't do much here, 197 # so it just doesn't accumulate. 198 199 dense_t = np.matrix([[1] * 5, [2] * 5, [3] * 5], dtype=np.float32) 200 expected_t = np.array([[0] * 5, [0] * 5], dtype=np.float32) 201 self.assertAllClose(expected_t, 202 sparse_ops.sparse_tensor_dense_matmul( 203 sparse_t, dense_t, adjoint_a=True)) 204 205 dense_t = np.matrix([[1] * 500, [2] * 500, [3] * 500], dtype=np.float32) 206 expected_t = np.array([[0] * 500, [0] * 500], dtype=np.float32) 207 self.assertAllClose(expected_t, 208 sparse_ops.sparse_tensor_dense_matmul( 209 sparse_t, dense_t, adjoint_a=True)) 210 211 # Tests setting one dimension to be a high value. 212 def _testLarge(self, np_dtype): 213 r1 = np.random.randint(6000, 20000) 214 r2 = np.random.randint(1, 10) 215 r3 = np.random.randint(1, 10) 216 217 for m, k, n in [(r1, r2, r3), 218 (r2, r1, r3), 219 (r2, r3, r1)]: 220 x = _maybe_complex(np.random.rand(m, k).astype(np_dtype)) 221 x[np.abs(x) < 0.8] = 0 222 223 y = _maybe_complex(np.random.randn(k, n).astype(np_dtype)) 224 225 self._testMatmul(x, y, adjoint_a=False, adjoint_b=False) 226 self._testMatmul(x.transpose(), y, adjoint_a=True, adjoint_b=False) 227 self._testMatmul(x, y.transpose(), adjoint_a=False, adjoint_b=True) 228 self._testMatmul( 229 x.transpose(), y.transpose(), adjoint_a=True, adjoint_b=True) 230 231 np.random.seed(127) # Repeatable results 232 self._testLarge(np.float32) 233 self._testLarge(np.float64) 234 self._testLarge(np.complex64) 235 self._testLarge(np.complex128) 236 237 # Tests random sized matrices. 238 @test_util.run_deprecated_v1 239 def testFloatRandom(self): 240 np.random.seed(127) # Repeatable results 241 for _ in range(8): 242 for adjoint_a in [True, False]: 243 for adjoint_b in [True, False]: 244 for thresh in [0.0, 0.2, 0.8, 1.0]: 245 n, k, m = np.random.randint(1, 100, size=3) 246 x = np.random.rand(n, k).astype(np.float32) 247 x[x < thresh] = 0 # Make it sparse 248 y = np.random.randn(k, m).astype(np.float32) 249 x = x.transpose() if adjoint_a else x 250 y = y.transpose() if adjoint_b else y 251 self._testMatmul(x, y, adjoint_a, adjoint_b) 252 253 254def _sparse_tensor_dense_vs_dense_matmul_benchmark_dense(x, y, adjoint_a, 255 adjoint_b): 256 257 def body(t, prev): 258 with ops.control_dependencies([prev]): 259 return (t + 1, math_ops.matmul( 260 x, 261 y, 262 transpose_a=adjoint_a, 263 transpose_b=adjoint_b, 264 a_is_sparse=True, 265 b_is_sparse=False)) 266 267 t0 = constant_op.constant(0) 268 v0 = constant_op.constant(0.0) 269 270 def _timeit(iterations, _): 271 (_, final) = control_flow_ops.while_loop( 272 lambda t, _: t < iterations, 273 body, (t0, v0), 274 parallel_iterations=1, 275 back_prop=False, 276 shape_invariants=(tensor_shape.TensorShape(()), 277 tensor_shape.TensorShape(None))) 278 return [final] 279 280 return _timeit 281 282 283def _sparse_tensor_dense_vs_dense_matmul_benchmark_sparse(x_ind, x_val, x_shape, 284 y, adjoint_a, 285 adjoint_b): 286 sp_x = sparse_tensor.SparseTensor( 287 indices=x_ind, values=x_val, dense_shape=x_shape) 288 289 def body(t, prev): 290 with ops.control_dependencies([prev]): 291 return (t + 1, sparse_ops.sparse_tensor_dense_matmul( 292 sp_x, y, adjoint_a=adjoint_a, adjoint_b=adjoint_b)) 293 294 t0 = constant_op.constant(0) 295 v0 = constant_op.constant(0.0) 296 297 def _timeit(iterations, _): 298 (_, final) = control_flow_ops.while_loop( 299 lambda t, _: t < iterations, 300 body, (t0, v0), 301 parallel_iterations=1, 302 back_prop=False, 303 shape_invariants=(tensor_shape.TensorShape(()), 304 tensor_shape.TensorShape(None))) 305 return [final] 306 307 return _timeit 308 309 310def sparse_tensor_dense_vs_dense_matmul_benchmark(thresh, 311 m, 312 k, 313 n, 314 adjoint_a, 315 adjoint_b, 316 use_gpu, 317 skip_dense=False): 318 config = config_pb2.ConfigProto() 319 config.allow_soft_placement = True 320 321 # Configurable for benchmarking: 322 # config.intra_op_parallelism_threads = 100 323 # config.gpu_options.per_process_gpu_memory_fraction = 0.3 324 325 np.random.seed([6, 117]) # Reproducibility 326 x = np.random.rand(m, k).astype(np.float32) 327 x[x < thresh] = 0 328 y = np.random.randn(k, n).astype(np.float32) 329 if adjoint_a: 330 x = x.T 331 if adjoint_b: 332 y = y.T 333 334 def _timer(sess, ops_fn, iterations): 335 # Warm in 336 sess.run(ops_fn(10, sess)) 337 338 # Timing run 339 start = time.time() 340 sess.run(ops_fn(iterations, sess)) 341 end = time.time() 342 343 return (end - start) / (1.0 * iterations) # Average runtime per iteration 344 345 # Using regular matmul, marking one of the matrices as dense. 346 if skip_dense: 347 delta_dense = float("nan") 348 else: 349 with session.Session(config=config, graph=ops.Graph()) as sess: 350 if not use_gpu: 351 with ops.device("/cpu:0"): 352 x_t = constant_op.constant(x) 353 y_t = constant_op.constant(y) 354 ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_dense( 355 x_t, y_t, adjoint_a, adjoint_b) 356 else: 357 with ops.device("/device:GPU:0"): 358 x_t = constant_op.constant(x) 359 y_t = constant_op.constant(y) 360 ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_dense( 361 x_t, y_t, adjoint_a, adjoint_b) 362 delta_dense = _timer(sess, ops_fn, 200) 363 364 # Using sparse_tensor_dense_matmul. 365 with session.Session("", config=config, graph=ops.Graph()) as sess: 366 if not use_gpu: 367 with ops.device("/cpu:0"): 368 x_ind = constant_op.constant(np.vstack(np.where(x)).astype(np.int64).T) 369 x_val = constant_op.constant(x[np.where(x)]) 370 x_shape = constant_op.constant(np.array(x.shape).astype(np.int64)) 371 y_t = constant_op.constant(y) 372 ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_sparse( 373 x_ind, x_val, x_shape, y_t, adjoint_a, adjoint_b) 374 else: 375 with ops.device("/device:GPU:0"): 376 x_ind = constant_op.constant(np.vstack(np.where(x)).astype(np.int64).T) 377 x_val = constant_op.constant(x[np.where(x)]) 378 x_shape = constant_op.constant(np.array(x.shape).astype(np.int64)) 379 y_t = constant_op.constant(y) 380 ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_sparse( 381 x_ind, x_val, x_shape, y_t, adjoint_a, adjoint_b) 382 delta_sparse = _timer(sess, ops_fn, 200) 383 384 print("%g \t %d \t %s \t %d \t %d \t %g \t %g \t %g" % 385 (1 - thresh, n, use_gpu, m, k, delta_dense, delta_sparse, 386 delta_sparse / delta_dense)) 387 388 389def main(_): 390 print("DenseDense MatMul (w/ Sparse Flag) vs. SparseTensorDense MatMul") 391 print("Matrix sizes:") 392 print(" A sparse [m, k] with % nonzero values between 1% and 80%") 393 print(" B dense [k, n]") 394 print("") 395 print("% nnz \t n \t gpu \t m \t k \t dt(dense) \t dt(sparse) " 396 "\t dt(sparse)/dt(dense)") 397 398 for thresh in (0.99, 0.8, 0.5, 0.2): 399 for n in (50, 100): 400 for use_gpu in (True, False): 401 for m in (100, 1000): 402 for k in (100, 1000): 403 sparse_tensor_dense_vs_dense_matmul_benchmark( 404 thresh, m, k, n, False, False, use_gpu=use_gpu) 405 406 # Enable for large scale benchmarks, these ones take a long time to run. 407 # 408 # for use_gpu in (True, False): 409 # sparse_tensor_dense_vs_dense_matmul_benchmark( 410 # thresh=0.99, m=1000000, k=1000, n=100, adjoint_a=False, 411 # adjoint_b=False, use_gpu=use_gpu, skip_dense=True) 412 413 414if __name__ == "__main__": 415 if "--benchmarks" in sys.argv: 416 sys.argv.remove("--benchmarks") 417 app.run() 418 else: 419 test.main() 420