1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr> 5 // 6 // This Source Code Form is subject to the terms of the Mozilla 7 // Public License v. 2.0. If a copy of the MPL was not distributed 8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 10 #ifndef EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H 11 #define EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H 12 13 namespace Eigen { 14 15 namespace internal { 16 17 template<typename Lhs, typename Rhs, typename ResultType> 18 static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res, bool sortedInsertion = false) 19 { 20 typedef typename remove_all<Lhs>::type::Scalar Scalar; 21 22 // make sure to call innerSize/outerSize since we fake the storage order. 23 Index rows = lhs.innerSize(); 24 Index cols = rhs.outerSize(); 25 eigen_assert(lhs.outerSize() == rhs.innerSize()); 26 27 ei_declare_aligned_stack_constructed_variable(bool, mask, rows, 0); 28 ei_declare_aligned_stack_constructed_variable(Scalar, values, rows, 0); 29 ei_declare_aligned_stack_constructed_variable(Index, indices, rows, 0); 30 31 std::memset(mask,0,sizeof(bool)*rows); 32 33 evaluator<Lhs> lhsEval(lhs); 34 evaluator<Rhs> rhsEval(rhs); 35 36 // estimate the number of non zero entries 37 // given a rhs column containing Y non zeros, we assume that the respective Y columns 38 // of the lhs differs in average of one non zeros, thus the number of non zeros for 39 // the product of a rhs column with the lhs is X+Y where X is the average number of non zero 40 // per column of the lhs. 41 // Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs) 42 Index estimated_nnz_prod = lhsEval.nonZerosEstimate() + rhsEval.nonZerosEstimate(); 43 44 res.setZero(); 45 res.reserve(Index(estimated_nnz_prod)); 46 // we compute each column of the result, one after the other 47 for (Index j=0; j<cols; ++j) 48 { 49 50 res.startVec(j); 51 Index nnz = 0; 52 for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) 53 { 54 Scalar y = rhsIt.value(); 55 Index k = rhsIt.index(); 56 for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt) 57 { 58 Index i = lhsIt.index(); 59 Scalar x = lhsIt.value(); 60 if(!mask[i]) 61 { 62 mask[i] = true; 63 values[i] = x * y; 64 indices[nnz] = i; 65 ++nnz; 66 } 67 else 68 values[i] += x * y; 69 } 70 } 71 if(!sortedInsertion) 72 { 73 // unordered insertion 74 for(Index k=0; k<nnz; ++k) 75 { 76 Index i = indices[k]; 77 res.insertBackByOuterInnerUnordered(j,i) = values[i]; 78 mask[i] = false; 79 } 80 } 81 else 82 { 83 // alternative ordered insertion code: 84 const Index t200 = rows/11; // 11 == (log2(200)*1.39) 85 const Index t = (rows*100)/139; 86 87 // FIXME reserve nnz non zeros 88 // FIXME implement faster sorting algorithms for very small nnz 89 // if the result is sparse enough => use a quick sort 90 // otherwise => loop through the entire vector 91 // In order to avoid to perform an expensive log2 when the 92 // result is clearly very sparse we use a linear bound up to 200. 93 if((nnz<200 && nnz<t200) || nnz * numext::log2(int(nnz)) < t) 94 { 95 if(nnz>1) std::sort(indices,indices+nnz); 96 for(Index k=0; k<nnz; ++k) 97 { 98 Index i = indices[k]; 99 res.insertBackByOuterInner(j,i) = values[i]; 100 mask[i] = false; 101 } 102 } 103 else 104 { 105 // dense path 106 for(Index i=0; i<rows; ++i) 107 { 108 if(mask[i]) 109 { 110 mask[i] = false; 111 res.insertBackByOuterInner(j,i) = values[i]; 112 } 113 } 114 } 115 } 116 } 117 res.finalize(); 118 } 119 120 121 } // end namespace internal 122 123 namespace internal { 124 125 template<typename Lhs, typename Rhs, typename ResultType, 126 int LhsStorageOrder = (traits<Lhs>::Flags&RowMajorBit) ? RowMajor : ColMajor, 127 int RhsStorageOrder = (traits<Rhs>::Flags&RowMajorBit) ? RowMajor : ColMajor, 128 int ResStorageOrder = (traits<ResultType>::Flags&RowMajorBit) ? RowMajor : ColMajor> 129 struct conservative_sparse_sparse_product_selector; 130 131 template<typename Lhs, typename Rhs, typename ResultType> 132 struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,ColMajor> 133 { 134 typedef typename remove_all<Lhs>::type LhsCleaned; 135 typedef typename LhsCleaned::Scalar Scalar; 136 137 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 138 { 139 typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix; 140 typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrixAux; 141 typedef typename sparse_eval<ColMajorMatrixAux,ResultType::RowsAtCompileTime,ResultType::ColsAtCompileTime,ColMajorMatrixAux::Flags>::type ColMajorMatrix; 142 143 // If the result is tall and thin (in the extreme case a column vector) 144 // then it is faster to sort the coefficients inplace instead of transposing twice. 145 // FIXME, the following heuristic is probably not very good. 146 if(lhs.rows()>rhs.cols()) 147 { 148 ColMajorMatrix resCol(lhs.rows(),rhs.cols()); 149 // perform sorted insertion 150 internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol, true); 151 res = resCol.markAsRValue(); 152 } 153 else 154 { 155 ColMajorMatrixAux resCol(lhs.rows(),rhs.cols()); 156 // ressort to transpose to sort the entries 157 internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrixAux>(lhs, rhs, resCol, false); 158 RowMajorMatrix resRow(resCol); 159 res = resRow.markAsRValue(); 160 } 161 } 162 }; 163 164 template<typename Lhs, typename Rhs, typename ResultType> 165 struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor,ColMajor> 166 { 167 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 168 { 169 typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix; 170 RowMajorMatrix rhsRow = rhs; 171 RowMajorMatrix resRow(lhs.rows(), rhs.cols()); 172 internal::conservative_sparse_sparse_product_impl<RowMajorMatrix,Lhs,RowMajorMatrix>(rhsRow, lhs, resRow); 173 res = resRow; 174 } 175 }; 176 177 template<typename Lhs, typename Rhs, typename ResultType> 178 struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,RowMajor,ColMajor> 179 { 180 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 181 { 182 typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix; 183 RowMajorMatrix lhsRow = lhs; 184 RowMajorMatrix resRow(lhs.rows(), rhs.cols()); 185 internal::conservative_sparse_sparse_product_impl<Rhs,RowMajorMatrix,RowMajorMatrix>(rhs, lhsRow, resRow); 186 res = resRow; 187 } 188 }; 189 190 template<typename Lhs, typename Rhs, typename ResultType> 191 struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,RowMajor,ColMajor> 192 { 193 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 194 { 195 typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix; 196 RowMajorMatrix resRow(lhs.rows(), rhs.cols()); 197 internal::conservative_sparse_sparse_product_impl<Rhs,Lhs,RowMajorMatrix>(rhs, lhs, resRow); 198 res = resRow; 199 } 200 }; 201 202 203 template<typename Lhs, typename Rhs, typename ResultType> 204 struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,RowMajor> 205 { 206 typedef typename traits<typename remove_all<Lhs>::type>::Scalar Scalar; 207 208 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 209 { 210 typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix; 211 ColMajorMatrix resCol(lhs.rows(), rhs.cols()); 212 internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol); 213 res = resCol; 214 } 215 }; 216 217 template<typename Lhs, typename Rhs, typename ResultType> 218 struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor,RowMajor> 219 { 220 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 221 { 222 typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix; 223 ColMajorMatrix lhsCol = lhs; 224 ColMajorMatrix resCol(lhs.rows(), rhs.cols()); 225 internal::conservative_sparse_sparse_product_impl<ColMajorMatrix,Rhs,ColMajorMatrix>(lhsCol, rhs, resCol); 226 res = resCol; 227 } 228 }; 229 230 template<typename Lhs, typename Rhs, typename ResultType> 231 struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,RowMajor,RowMajor> 232 { 233 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 234 { 235 typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix; 236 ColMajorMatrix rhsCol = rhs; 237 ColMajorMatrix resCol(lhs.rows(), rhs.cols()); 238 internal::conservative_sparse_sparse_product_impl<Lhs,ColMajorMatrix,ColMajorMatrix>(lhs, rhsCol, resCol); 239 res = resCol; 240 } 241 }; 242 243 template<typename Lhs, typename Rhs, typename ResultType> 244 struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,RowMajor,RowMajor> 245 { 246 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 247 { 248 typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix; 249 typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix; 250 RowMajorMatrix resRow(lhs.rows(),rhs.cols()); 251 internal::conservative_sparse_sparse_product_impl<Rhs,Lhs,RowMajorMatrix>(rhs, lhs, resRow); 252 // sort the non zeros: 253 ColMajorMatrix resCol(resRow); 254 res = resCol; 255 } 256 }; 257 258 } // end namespace internal 259 260 261 namespace internal { 262 263 template<typename Lhs, typename Rhs, typename ResultType> 264 static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res) 265 { 266 typedef typename remove_all<Lhs>::type::Scalar Scalar; 267 Index cols = rhs.outerSize(); 268 eigen_assert(lhs.outerSize() == rhs.innerSize()); 269 270 evaluator<Lhs> lhsEval(lhs); 271 evaluator<Rhs> rhsEval(rhs); 272 273 for (Index j=0; j<cols; ++j) 274 { 275 for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) 276 { 277 Scalar y = rhsIt.value(); 278 Index k = rhsIt.index(); 279 for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt) 280 { 281 Index i = lhsIt.index(); 282 Scalar x = lhsIt.value(); 283 res.coeffRef(i,j) += x * y; 284 } 285 } 286 } 287 } 288 289 290 } // end namespace internal 291 292 namespace internal { 293 294 template<typename Lhs, typename Rhs, typename ResultType, 295 int LhsStorageOrder = (traits<Lhs>::Flags&RowMajorBit) ? RowMajor : ColMajor, 296 int RhsStorageOrder = (traits<Rhs>::Flags&RowMajorBit) ? RowMajor : ColMajor> 297 struct sparse_sparse_to_dense_product_selector; 298 299 template<typename Lhs, typename Rhs, typename ResultType> 300 struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor> 301 { 302 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 303 { 304 internal::sparse_sparse_to_dense_product_impl<Lhs,Rhs,ResultType>(lhs, rhs, res); 305 } 306 }; 307 308 template<typename Lhs, typename Rhs, typename ResultType> 309 struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor> 310 { 311 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 312 { 313 typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix; 314 ColMajorMatrix lhsCol(lhs); 315 internal::sparse_sparse_to_dense_product_impl<ColMajorMatrix,Rhs,ResultType>(lhsCol, rhs, res); 316 } 317 }; 318 319 template<typename Lhs, typename Rhs, typename ResultType> 320 struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,ColMajor,RowMajor> 321 { 322 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 323 { 324 typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix; 325 ColMajorMatrix rhsCol(rhs); 326 internal::sparse_sparse_to_dense_product_impl<Lhs,ColMajorMatrix,ResultType>(lhs, rhsCol, res); 327 } 328 }; 329 330 template<typename Lhs, typename Rhs, typename ResultType> 331 struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,RowMajor,RowMajor> 332 { 333 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 334 { 335 Transpose<ResultType> trRes(res); 336 internal::sparse_sparse_to_dense_product_impl<Rhs,Lhs,Transpose<ResultType> >(rhs, lhs, trRes); 337 } 338 }; 339 340 341 } // end namespace internal 342 343 } // end namespace Eigen 344 345 #endif // EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H 346