1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include <vector> 17 18 #define EIGEN_USE_THREADS 19 20 #include "third_party/eigen3/Eigen/Core" 21 #include "third_party/eigen3/Eigen/SparseCholesky" 22 #include "third_party/eigen3/Eigen/SparseCore" 23 #include "third_party/eigen3/Eigen/OrderingMethods" 24 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 25 #include "tensorflow/core/framework/allocator.h" 26 #include "tensorflow/core/framework/op.h" 27 #include "tensorflow/core/framework/op_kernel.h" 28 #include "tensorflow/core/framework/variant_op_registry.h" 29 #include "tensorflow/core/kernels/sparse/kernels.h" 30 #include "tensorflow/core/kernels/sparse/sparse_matrix.h" 31 #include "tensorflow/core/util/work_sharder.h" 32 33 namespace tensorflow { 34 35 // Op to compute the Approximate Minimum Degree (AMD) ordering for a sparse 36 // matrix. 37 // 38 // Accepts a CSRSparseMatrix which may represent a single sparse matrix (rank 2) 39 // or a batch of sparse matrices (rank 3). Each component must be a square 40 // matrix. The input is assumed to be symmetric; only the lower triangular part 41 // of each component matrix is read. The numeric values of the sparse matrix 42 // does not affect the returned AMD ordering; only the sparsity pattern does. 43 // 44 // For each component sparse matrix A, the corresponding output Tensor 45 // represents the AMD ordering of A's rows and columns. The ordering is returned 46 // as a 1D Tensor (per batch) containing the list of indices, i.e. it contains 47 // each of the integers {0, .. N-1} exactly once; where N is the number of rows 48 // of the sparse matrix. The ith element represents the index of the row that 49 // the ith row should map to. 50 51 // If P represents the permutation matrix corresponding to the indices, then the 52 // matrix: 53 // P^{-1} * A * P 54 // would have a sparse Cholesky decomposition with fewer structural non-zero 55 // elements than the sparse Cholesky decomposition of A itself. 56 class CSROrderingAMDCPUOp : public OpKernel { 57 using SparseMatrix = Eigen::SparseMatrix<int, Eigen::RowMajor>; 58 using Indices = 59 Eigen::Matrix<int, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>; 60 using IndicesMap = Eigen::Map<Indices>; 61 using ConstIndicesMap = Eigen::Map<const Indices>; 62 63 public: CSROrderingAMDCPUOp(OpKernelConstruction * c)64 explicit CSROrderingAMDCPUOp(OpKernelConstruction* c) : OpKernel(c) {} 65 Compute(OpKernelContext * ctx)66 void Compute(OpKernelContext* ctx) final { 67 // Extract the input CSRSparseMatrix. 68 const CSRSparseMatrix* input_matrix; 69 OP_REQUIRES_OK(ctx, ExtractVariantFromInput(ctx, 0, &input_matrix)); 70 71 const Tensor& dense_shape = input_matrix->dense_shape(); 72 const int rank = dense_shape.dim_size(0); 73 OP_REQUIRES(ctx, rank == 2 || rank == 3, 74 errors::InvalidArgument("sparse matrix must have rank 2 or 3; ", 75 "but dense_shape has size ", rank)); 76 77 auto dense_shape_vec = dense_shape.vec<int64>(); 78 const int64 num_rows = dense_shape_vec((rank == 2) ? 0 : 1); 79 const int64 num_cols = dense_shape_vec((rank == 2) ? 1 : 2); 80 81 OP_REQUIRES(ctx, num_rows == num_cols, 82 errors::InvalidArgument("sparse matrix must be square; got: ", 83 num_rows, " != ", num_cols)); 84 85 // Allocate the output permutation indices. 86 const int batch_size = input_matrix->batch_size(); 87 TensorShape permutation_indices_shape = 88 (rank == 2) ? TensorShape{num_rows} : TensorShape{batch_size, num_rows}; 89 Tensor permutation_indices(cpu_allocator(), DT_INT32, 90 permutation_indices_shape); 91 ctx->set_output(0, permutation_indices); 92 93 // Parallelize AMD computation across batches using a threadpool. 94 auto worker_threads = *(ctx->device()->tensorflow_cpu_worker_threads()); 95 const int64 amd_cost_per_batch = 96 10 * num_rows * (input_matrix->total_nnz() / batch_size); 97 Shard( 98 worker_threads.num_threads, worker_threads.workers, batch_size, 99 amd_cost_per_batch, [&](int64 batch_begin, int64 batch_end) { 100 for (int64 batch_index = batch_begin; batch_index < batch_end; 101 ++batch_index) { 102 // Define an Eigen SparseMatrix Map to operate on the 103 // CSRSparseMatrix component without copying the data. 104 // The values doesn't matter for computing the ordering, hence we 105 // reuse the column pointers as dummy values. 106 Eigen::Map<const SparseMatrix> sparse_matrix( 107 num_rows, num_rows, input_matrix->nnz(batch_index), 108 input_matrix->row_pointers_vec(batch_index).data(), 109 input_matrix->col_indices_vec(batch_index).data(), 110 input_matrix->col_indices_vec(batch_index).data()); 111 Eigen::PermutationMatrix<Eigen::Dynamic, Eigen::Dynamic, int> 112 permutation_matrix; 113 // Compute the AMD ordering. 114 Eigen::AMDOrdering<int> amd_ordering; 115 amd_ordering(sparse_matrix.template selfadjointView<Eigen::Lower>(), 116 permutation_matrix); 117 // Define an Eigen Map over the allocated output Tensor so that it 118 // can be mutated in place. 119 IndicesMap permutation_map( 120 permutation_indices.flat<int>().data() + batch_index * num_rows, 121 num_rows, 1); 122 permutation_map = permutation_matrix.indices(); 123 } 124 }); 125 } 126 }; 127 128 REGISTER_KERNEL_BUILDER(Name("SparseMatrixOrderingAMD").Device(DEVICE_CPU), 129 CSROrderingAMDCPUOp); 130 131 } // namespace tensorflow 132