1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Tests for sparse_ops.sparse_tensor_dense_matmul.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import sys 22import time 23 24from absl import app 25import numpy as np 26 27from tensorflow.core.protobuf import config_pb2 28from tensorflow.python.client import session 29from tensorflow.python.framework import constant_op 30from tensorflow.python.framework import dtypes 31from tensorflow.python.framework import ops 32from tensorflow.python.framework import sparse_tensor 33from tensorflow.python.framework import tensor_shape 34from tensorflow.python.framework import test_util 35from tensorflow.python.ops import array_ops 36from tensorflow.python.ops import control_flow_ops 37from tensorflow.python.ops import math_ops 38from tensorflow.python.ops import sparse_ops 39from tensorflow.python.platform import test 40 41 42def _maybe_complex(x): 43 if x.dtype.kind == "c": # complex 44 return x + 1j * x 45 return x 46 47 48class SparseTensorDenseMatMulTest(test.TestCase): 49 50 def _testMatmul(self, 51 x, 52 y, 53 adjoint_a=False, 54 adjoint_b=False, 55 indices_dtype=np.int64): 56 x_mat = np.array(x) 57 if adjoint_a: 58 x_mat = x_mat.T.conj() 59 y_mat = np.array(y) 60 if adjoint_b: 61 y_mat = y_mat.T.conj() 62 63 np_ans = x_mat.dot(y_mat) 64 65 x_indices = np.vstack(np.where(x)).astype(indices_dtype).T 66 x_values = x[np.where(x)] 67 x_shape = x.shape 68 69 with self.cached_session(): 70 sp_x_value = sparse_tensor.SparseTensorValue( 71 indices=x_indices, values=x_values, dense_shape=x_shape) 72 tf_value_ans = sparse_ops.sparse_tensor_dense_matmul( 73 sp_x_value, y, adjoint_a=adjoint_a, adjoint_b=adjoint_b) 74 tf_tensor_ans = sparse_ops.sparse_tensor_dense_matmul( 75 sparse_tensor.SparseTensor.from_value(sp_x_value), 76 y, 77 adjoint_a=adjoint_a, 78 adjoint_b=adjoint_b) 79 80 # Ensure that the RHS shape is known at least. 81 self.assertEqual(tf_value_ans.get_shape()[1], np_ans.shape[1]) 82 self.assertEqual(tf_tensor_ans.get_shape()[1], np_ans.shape[1]) 83 84 for out in (self.evaluate(tf_value_ans), self.evaluate(tf_tensor_ans)): 85 if x.dtype == np.float32: 86 self.assertAllClose(np_ans, out, rtol=1e-4, atol=1e-4) 87 elif x.dtype == np.float64: 88 self.assertAllClose(np_ans, out, rtol=1e-6, atol=1e-6) 89 elif x.dtype == np.float16: 90 self.assertAllClose(np_ans, out, rtol=1e-3, atol=1e-3) 91 else: 92 self.assertAllClose(np_ans, out, rtol=1e-3, atol=1e-3) 93 94 def _testBasic(self, value_dtype, indices_dtype=np.int64): 95 x = _maybe_complex(np.random.rand(10, 10).astype(value_dtype)) 96 x[np.abs(x) < 0.5] = 0 # Make it sparse 97 98 y = _maybe_complex(np.random.randn(10, 20).astype(value_dtype)) 99 100 self._testMatmul(x, y, indices_dtype=indices_dtype) 101 102 def testBasic(self): 103 np.random.seed(127) # Repeatable results 104 self._testBasic(np.int32) 105 self._testBasic(np.float16) 106 self._testBasic(np.float32) 107 self._testBasic(np.float64) 108 self._testBasic(np.complex64) 109 self._testBasic(np.complex128) 110 self._testBasic(np.int32, indices_dtype=np.int32) 111 self._testBasic(np.float32, indices_dtype=np.int32) 112 113 def testShapeInference(self): 114 x = np.random.rand(10, 10) 115 x[np.abs(x) < 0.5] = 0 # Make it sparse 116 y = np.random.randn(10, 20) 117 x_indices = np.vstack(np.where(x)).astype(np.int64).T 118 x_values = x[np.where(x)] 119 x_shape = x.shape 120 121 with ops.Graph().as_default(): 122 x_st = sparse_tensor.SparseTensor(x_indices, x_values, x_shape) 123 result = sparse_ops.sparse_tensor_dense_matmul(x_st, y) 124 self.assertEqual(result.get_shape(), (10, 20)) 125 126 x_shape_unknown = array_ops.placeholder(dtype=dtypes.int64, shape=None) 127 x_st_shape_unknown = sparse_tensor.SparseTensor(x_indices, x_values, 128 x_shape_unknown) 129 result_left_shape_unknown = sparse_ops.sparse_tensor_dense_matmul( 130 x_st_shape_unknown, y) 131 self.assertEqual(result_left_shape_unknown.get_shape().as_list(), 132 [None, 20]) 133 134 x_shape_inconsistent = [10, 15] 135 x_st_shape_inconsistent = sparse_tensor.SparseTensor( 136 x_indices, x_values, x_shape_inconsistent) 137 with self.assertRaisesRegex(ValueError, "Dimensions must be equal"): 138 sparse_ops.sparse_tensor_dense_matmul(x_st_shape_inconsistent, y) 139 140 @test_util.run_in_graph_and_eager_modes(use_gpu=False) 141 def testInvalidIndicesForSparseTensorDenseMatmul(self): 142 # TODO(b/169813429): Make GPU kernel return nice errors too. 143 indices = np.array([[1, 10]]).astype(np.int64) 144 values = np.array([10]).astype(np.float32) 145 shape = [3, 2] 146 sparse_t = sparse_tensor.SparseTensor(indices, values, shape) 147 148 # Test multiplying by both a small and large dense matrix, to hit 149 # both cases in the kernel. 150 dense_t = np.array([[1] * 5, [2] * 5], dtype=np.float32) 151 with self.assertRaisesOpError("k .10. from index.0,1. out of bounds .>=2."): 152 self.evaluate(sparse_ops.sparse_tensor_dense_matmul(sparse_t, dense_t)) 153 dense_t = np.array([[1] * 500, [2] * 500], dtype=np.float32) 154 with self.assertRaisesOpError("k .10. from index.0,1. out of bounds .>=2."): 155 self.evaluate(sparse_ops.sparse_tensor_dense_matmul(sparse_t, dense_t)) 156 157 # Repeat with adjoint_a, to get a different error. 158 dense_t = np.array([[1] * 5, [2] * 5, [3] * 5], dtype=np.float32) 159 with self.assertRaisesOpError("m .10. from index.0,1. out of bounds .>=2."): 160 self.evaluate( 161 sparse_ops.sparse_tensor_dense_matmul( 162 sparse_t, dense_t, adjoint_a=True)) 163 dense_t = np.array([[1] * 500, [2] * 500, [3] * 500], dtype=np.float32) 164 with self.assertRaisesOpError("m .10. from index.0,1. out of bounds .>=2."): 165 self.evaluate( 166 sparse_ops.sparse_tensor_dense_matmul( 167 sparse_t, dense_t, adjoint_a=True)) 168 169 def testUnorderedIndicesForSparseTensorDenseMatmul(self): 170 indices = np.array([(2, 1), (0, 0)]).astype(np.int64) 171 values = np.array([10, 11]).astype(np.float32) 172 shape = [3, 2] 173 sparse_t = sparse_tensor.SparseTensor(indices, values, shape) 174 175 dense_t = np.array([[1] * 500, [2] * 500], dtype=np.float32) 176 expected_t = np.array([[11] * 500, [0] * 500, [20] * 500], dtype=np.float32) 177 178 self.assertAllClose( 179 expected_t, sparse_ops.sparse_tensor_dense_matmul(sparse_t, dense_t)) 180 181 @test_util.run_gpu_only 182 def testInvalidIndicesForSparseTensorDenseMatmulOnGPU(self): 183 indices = np.array([[1, 10]]).astype(np.int64) 184 values = np.array([10]).astype(np.float32) 185 shape = [3, 2] 186 sparse_t = sparse_tensor.SparseTensor(indices, values, shape) 187 188 # Test multiplying by both a small and large dense matrix, to hit 189 # both cases in the kernel. 190 dense_t = np.array([[1] * 5, [2] * 5], dtype=np.float32) 191 expected_t = np.array([[0] * 5, [np.nan] * 5, [0] * 5], dtype=np.float32) 192 self.assertAllClose( 193 expected_t, sparse_ops.sparse_tensor_dense_matmul(sparse_t, dense_t)) 194 dense_t = np.array([[1] * 500, [2] * 500], dtype=np.float32) 195 expected_t = np.array([[0] * 500, [np.nan] * 500, [0] * 500], 196 dtype=np.float32) 197 self.assertAllClose( 198 expected_t, sparse_ops.sparse_tensor_dense_matmul(sparse_t, dense_t)) 199 200 # Repeat with adjoint_a, now the error is that the sparse index 201 # is OOO w.r.t. the output. The GPU kernel can't do much here, 202 # so it just doesn't accumulate. 203 204 dense_t = np.array([[1] * 5, [2] * 5, [3] * 5], dtype=np.float32) 205 expected_t = np.array([[0] * 5, [0] * 5], dtype=np.float32) 206 self.assertAllClose( 207 expected_t, 208 sparse_ops.sparse_tensor_dense_matmul( 209 sparse_t, dense_t, adjoint_a=True)) 210 211 dense_t = np.array([[1] * 500, [2] * 500, [3] * 500], dtype=np.float32) 212 expected_t = np.array([[0] * 500, [0] * 500], dtype=np.float32) 213 self.assertAllClose( 214 expected_t, 215 sparse_ops.sparse_tensor_dense_matmul( 216 sparse_t, dense_t, adjoint_a=True)) 217 218 def _testLarge(self, np_dtype): 219 r1 = np.random.randint(6000, 20000) 220 r2 = np.random.randint(1, 10) 221 r3 = np.random.randint(1, 10) 222 223 for m, k, n in [(r1, r2, r3), 224 (r2, r1, r3), 225 (r2, r3, r1)]: 226 x = _maybe_complex(np.random.rand(m, k).astype(np_dtype)) 227 x[np.abs(x) < 0.8] = 0 228 229 y = _maybe_complex(np.random.randn(k, n).astype(np_dtype)) 230 231 self._testMatmul(x, y, adjoint_a=False, adjoint_b=False) 232 self._testMatmul(x.transpose(), y, adjoint_a=True, adjoint_b=False) 233 self._testMatmul(x, y.transpose(), adjoint_a=False, adjoint_b=True) 234 self._testMatmul( 235 x.transpose(), y.transpose(), adjoint_a=True, adjoint_b=True) 236 237 # Tests setting one dimension to be a high value. 238 def testLarge(self): 239 np.random.seed(127) # Repeatable results 240 self._testLarge(np.float32) 241 self._testLarge(np.float64) 242 self._testLarge(np.complex64) 243 self._testLarge(np.complex128) 244 245 # Tests random sized matrices. 246 def testFloatRandom(self): 247 np.random.seed(127) # Repeatable results 248 for _ in range(8): 249 for adjoint_a in [True, False]: 250 for adjoint_b in [True, False]: 251 for thresh in [0.0, 0.2, 0.8, 1.0]: 252 n, k, m = np.random.randint(1, 100, size=3) 253 x = np.random.rand(n, k).astype(np.float32) 254 x[x < thresh] = 0 # Make it sparse 255 y = np.random.randn(k, m).astype(np.float32) 256 x = x.transpose() if adjoint_a else x 257 y = y.transpose() if adjoint_b else y 258 self._testMatmul(x, y, adjoint_a, adjoint_b) 259 260 261def _sparse_tensor_dense_vs_dense_matmul_benchmark_dense(x, y, adjoint_a, 262 adjoint_b): 263 264 def body(t, prev): 265 with ops.control_dependencies([prev]): 266 return (t + 1, math_ops.matmul( 267 x, 268 y, 269 transpose_a=adjoint_a, 270 transpose_b=adjoint_b, 271 a_is_sparse=True, 272 b_is_sparse=False)) 273 274 t0 = constant_op.constant(0) 275 v0 = constant_op.constant(0.0) 276 277 def _timeit(iterations, _): 278 (_, final) = control_flow_ops.while_loop( 279 lambda t, _: t < iterations, 280 body, (t0, v0), 281 parallel_iterations=1, 282 back_prop=False, 283 shape_invariants=(tensor_shape.TensorShape(()), 284 tensor_shape.TensorShape(None))) 285 return [final] 286 287 return _timeit 288 289 290def _sparse_tensor_dense_vs_dense_matmul_benchmark_sparse(x_ind, x_val, x_shape, 291 y, adjoint_a, 292 adjoint_b): 293 sp_x = sparse_tensor.SparseTensor( 294 indices=x_ind, values=x_val, dense_shape=x_shape) 295 296 def body(t, prev): 297 with ops.control_dependencies([prev]): 298 return (t + 1, sparse_ops.sparse_tensor_dense_matmul( 299 sp_x, y, adjoint_a=adjoint_a, adjoint_b=adjoint_b)) 300 301 t0 = constant_op.constant(0) 302 v0 = constant_op.constant(0.0) 303 304 def _timeit(iterations, _): 305 (_, final) = control_flow_ops.while_loop( 306 lambda t, _: t < iterations, 307 body, (t0, v0), 308 parallel_iterations=1, 309 back_prop=False, 310 shape_invariants=(tensor_shape.TensorShape(()), 311 tensor_shape.TensorShape(None))) 312 return [final] 313 314 return _timeit 315 316 317def sparse_tensor_dense_vs_dense_matmul_benchmark(thresh, 318 m, 319 k, 320 n, 321 adjoint_a, 322 adjoint_b, 323 use_gpu, 324 skip_dense=False): 325 config = config_pb2.ConfigProto() 326 config.allow_soft_placement = True 327 328 # Configurable for benchmarking: 329 # config.intra_op_parallelism_threads = 100 330 # config.gpu_options.per_process_gpu_memory_fraction = 0.3 331 332 np.random.seed([6, 117]) # Reproducibility 333 x = np.random.rand(m, k).astype(np.float32) 334 x[x < thresh] = 0 335 y = np.random.randn(k, n).astype(np.float32) 336 if adjoint_a: 337 x = x.T 338 if adjoint_b: 339 y = y.T 340 341 def _timer(sess, ops_fn, iterations): 342 # Warm in 343 sess.run(ops_fn(10, sess)) 344 345 # Timing run 346 start = time.time() 347 sess.run(ops_fn(iterations, sess)) 348 end = time.time() 349 350 return (end - start) / (1.0 * iterations) # Average runtime per iteration 351 352 # Using regular matmul, marking one of the matrices as dense. 353 if skip_dense: 354 delta_dense = float("nan") 355 else: 356 with session.Session(config=config, graph=ops.Graph()) as sess: 357 if not use_gpu: 358 with ops.device("/cpu:0"): 359 x_t = constant_op.constant(x) 360 y_t = constant_op.constant(y) 361 ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_dense( 362 x_t, y_t, adjoint_a, adjoint_b) 363 else: 364 with ops.device("/device:GPU:0"): 365 x_t = constant_op.constant(x) 366 y_t = constant_op.constant(y) 367 ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_dense( 368 x_t, y_t, adjoint_a, adjoint_b) 369 delta_dense = _timer(sess, ops_fn, 200) 370 371 # Using sparse_tensor_dense_matmul. 372 with session.Session("", config=config, graph=ops.Graph()) as sess: 373 if not use_gpu: 374 with ops.device("/cpu:0"): 375 x_ind = constant_op.constant(np.vstack(np.where(x)).astype(np.int64).T) 376 x_val = constant_op.constant(x[np.where(x)]) 377 x_shape = constant_op.constant(np.array(x.shape).astype(np.int64)) 378 y_t = constant_op.constant(y) 379 ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_sparse( 380 x_ind, x_val, x_shape, y_t, adjoint_a, adjoint_b) 381 else: 382 with ops.device("/device:GPU:0"): 383 x_ind = constant_op.constant(np.vstack(np.where(x)).astype(np.int64).T) 384 x_val = constant_op.constant(x[np.where(x)]) 385 x_shape = constant_op.constant(np.array(x.shape).astype(np.int64)) 386 y_t = constant_op.constant(y) 387 ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_sparse( 388 x_ind, x_val, x_shape, y_t, adjoint_a, adjoint_b) 389 delta_sparse = _timer(sess, ops_fn, 200) 390 391 print("%g \t %d \t %s \t %d \t %d \t %g \t %g \t %g" % 392 (1 - thresh, n, use_gpu, m, k, delta_dense, delta_sparse, 393 delta_sparse / delta_dense)) 394 395 396def main(_): 397 print("DenseDense MatMul (w/ Sparse Flag) vs. SparseTensorDense MatMul") 398 print("Matrix sizes:") 399 print(" A sparse [m, k] with % nonzero values between 1% and 80%") 400 print(" B dense [k, n]") 401 print("") 402 print("% nnz \t n \t gpu \t m \t k \t dt(dense) \t dt(sparse) " 403 "\t dt(sparse)/dt(dense)") 404 405 for thresh in (0.99, 0.8, 0.5, 0.2): 406 for n in (50, 100): 407 for use_gpu in (True, False): 408 for m in (100, 1000): 409 for k in (100, 1000): 410 sparse_tensor_dense_vs_dense_matmul_benchmark( 411 thresh, m, k, n, False, False, use_gpu=use_gpu) 412 413 # Enable for large scale benchmarks, these ones take a long time to run. 414 # 415 # for use_gpu in (True, False): 416 # sparse_tensor_dense_vs_dense_matmul_benchmark( 417 # thresh=0.99, m=1000000, k=1000, n=100, adjoint_a=False, 418 # adjoint_b=False, use_gpu=use_gpu, skip_dense=True) 419 420 421if __name__ == "__main__": 422 if "--benchmarks" in sys.argv: 423 sys.argv.remove("--benchmarks") 424 app.run() # pylint: disable=no-value-for-parameter 425 else: 426 test.main() 427