• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include <vector>
17 
18 #define EIGEN_USE_THREADS
19 
20 #include "third_party/eigen3/Eigen/Core"
21 #include "third_party/eigen3/Eigen/SparseCholesky"
22 #include "third_party/eigen3/Eigen/SparseCore"
23 #include "third_party/eigen3/Eigen/OrderingMethods"
24 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
25 #include "tensorflow/core/framework/allocator.h"
26 #include "tensorflow/core/framework/op.h"
27 #include "tensorflow/core/framework/op_kernel.h"
28 #include "tensorflow/core/framework/variant_op_registry.h"
29 #include "tensorflow/core/kernels/sparse/kernels.h"
30 #include "tensorflow/core/kernels/sparse/sparse_matrix.h"
31 #include "tensorflow/core/util/work_sharder.h"
32 
33 namespace tensorflow {
34 
35 // Op to compute the Approximate Minimum Degree (AMD) ordering for a sparse
36 // matrix.
37 //
38 // Accepts a CSRSparseMatrix which may represent a single sparse matrix (rank 2)
39 // or a batch of sparse matrices (rank 3). Each component must be a square
40 // matrix. The input is assumed to be symmetric; only the lower triangular part
41 // of each component matrix is read. The numeric values of the sparse matrix
42 // does not affect the returned AMD ordering; only the sparsity pattern does.
43 //
44 // For each component sparse matrix A, the corresponding output Tensor
45 // represents the AMD ordering of A's rows and columns. The ordering is returned
46 // as a 1D Tensor (per batch) containing the list of indices, i.e. it contains
47 // each of the integers {0, .. N-1} exactly once; where N is the number of rows
48 // of the sparse matrix. The ith element represents the index of the row that
49 // the ith row should map to.
50 
51 // If P represents the permutation matrix corresponding to the indices, then the
52 // matrix:
53 //   P^{-1} * A * P
54 // would have a sparse Cholesky decomposition with fewer structural non-zero
55 // elements than the sparse Cholesky decomposition of A itself.
56 class CSROrderingAMDCPUOp : public OpKernel {
57   using SparseMatrix = Eigen::SparseMatrix<int, Eigen::RowMajor>;
58   using Indices =
59       Eigen::Matrix<int, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
60   using IndicesMap = Eigen::Map<Indices>;
61   using ConstIndicesMap = Eigen::Map<const Indices>;
62 
63  public:
CSROrderingAMDCPUOp(OpKernelConstruction * c)64   explicit CSROrderingAMDCPUOp(OpKernelConstruction* c) : OpKernel(c) {}
65 
Compute(OpKernelContext * ctx)66   void Compute(OpKernelContext* ctx) final {
67     // Extract the input CSRSparseMatrix.
68     const CSRSparseMatrix* input_matrix;
69     OP_REQUIRES_OK(ctx, ExtractVariantFromInput(ctx, 0, &input_matrix));
70 
71     const Tensor& dense_shape = input_matrix->dense_shape();
72     const int rank = dense_shape.dim_size(0);
73     OP_REQUIRES(ctx, rank == 2 || rank == 3,
74                 errors::InvalidArgument("sparse matrix must have rank 2 or 3; ",
75                                         "but dense_shape has size ", rank));
76 
77     auto dense_shape_vec = dense_shape.vec<int64>();
78     const int64 num_rows = dense_shape_vec((rank == 2) ? 0 : 1);
79     const int64 num_cols = dense_shape_vec((rank == 2) ? 1 : 2);
80 
81     OP_REQUIRES(ctx, num_rows == num_cols,
82                 errors::InvalidArgument("sparse matrix must be square; got: ",
83                                         num_rows, " != ", num_cols));
84 
85     // Allocate the output permutation indices.
86     const int batch_size = input_matrix->batch_size();
87     TensorShape permutation_indices_shape =
88         (rank == 2) ? TensorShape{num_rows} : TensorShape{batch_size, num_rows};
89     Tensor permutation_indices(cpu_allocator(), DT_INT32,
90                                permutation_indices_shape);
91     ctx->set_output(0, permutation_indices);
92 
93     // Parallelize AMD computation across batches using a threadpool.
94     auto worker_threads = *(ctx->device()->tensorflow_cpu_worker_threads());
95     const int64 amd_cost_per_batch =
96         10 * num_rows * (input_matrix->total_nnz() / batch_size);
97     Shard(
98         worker_threads.num_threads, worker_threads.workers, batch_size,
99         amd_cost_per_batch, [&](int64 batch_begin, int64 batch_end) {
100           for (int64 batch_index = batch_begin; batch_index < batch_end;
101                ++batch_index) {
102             // Define an Eigen SparseMatrix Map to operate on the
103             // CSRSparseMatrix component without copying the data.
104             // The values doesn't matter for computing the ordering, hence we
105             // reuse the column pointers as dummy values.
106             Eigen::Map<const SparseMatrix> sparse_matrix(
107                 num_rows, num_rows, input_matrix->nnz(batch_index),
108                 input_matrix->row_pointers_vec(batch_index).data(),
109                 input_matrix->col_indices_vec(batch_index).data(),
110                 input_matrix->col_indices_vec(batch_index).data());
111             Eigen::PermutationMatrix<Eigen::Dynamic, Eigen::Dynamic, int>
112                 permutation_matrix;
113             // Compute the AMD ordering.
114             Eigen::AMDOrdering<int> amd_ordering;
115             amd_ordering(sparse_matrix.template selfadjointView<Eigen::Lower>(),
116                          permutation_matrix);
117             // Define an Eigen Map over the allocated output Tensor so that it
118             // can be mutated in place.
119             IndicesMap permutation_map(
120                 permutation_indices.flat<int>().data() + batch_index * num_rows,
121                 num_rows, 1);
122             permutation_map = permutation_matrix.indices();
123           }
124         });
125   }
126 };
127 
128 REGISTER_KERNEL_BUILDER(Name("SparseMatrixOrderingAMD").Device(DEVICE_CPU),
129                         CSROrderingAMDCPUOp);
130 
131 }  // namespace tensorflow
132