1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package androidx.renderscript; 18 19 import android.support.annotation.IntDef; 20 import java.lang.annotation.Retention; 21 import java.lang.annotation.RetentionPolicy; 22 23 /** 24 * 25 * ScriptIntrinsicBLAS class provides high performance RenderScript APIs to BLAS. 26 * 27 * The BLAS (Basic Linear Algebra Subprograms) are routines that provide standard 28 * building blocks for performing basic vector and matrix operations. 29 * 30 * For detailed description of BLAS, please refer to http://www.netlib.org/blas/ 31 * 32 * @deprecated Renderscript has been deprecated in API level 31. Please refer to the <a 33 * href="https://developer.android.com/guide/topics/renderscript/migration-guide">migration 34 * guide</a> for the proposed alternatives. 35 **/ 36 @Deprecated 37 public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { 38 private Allocation mLUT; 39 private static final int INTRINSIC_API_LEVEL = 23; 40 ScriptIntrinsicBLAS(long id, RenderScript rs)41 private ScriptIntrinsicBLAS(long id, RenderScript rs) { 42 super(id, rs); 43 } 44 45 private static final int RsBlas_sdsdot = 1; 46 private static final int RsBlas_dsdot = 2; 47 private static final int RsBlas_sdot = 3; 48 private static final int RsBlas_ddot = 4; 49 private static final int RsBlas_cdotu_sub = 5; 50 private static final int RsBlas_cdotc_sub = 6; 51 private static final int RsBlas_zdotu_sub = 7; 52 private static final int RsBlas_zdotc_sub = 8; 53 private static final int RsBlas_snrm2 = 9; 54 private static final int RsBlas_sasum = 10; 55 private static final int RsBlas_dnrm2 = 11; 56 private static final int RsBlas_dasum = 12; 57 private static final int RsBlas_scnrm2 = 13; 58 private static final int RsBlas_scasum = 14; 59 private static final int RsBlas_dznrm2 = 15; 60 private static final int RsBlas_dzasum = 16; 61 private static final int RsBlas_isamax = 17; 62 private static final int RsBlas_idamax = 18; 63 private static final int RsBlas_icamax = 19; 64 private static final int RsBlas_izamax = 20; 65 private static final int RsBlas_sswap = 21; 66 private static final int RsBlas_scopy = 22; 67 private static final int RsBlas_saxpy = 23; 68 private static final int RsBlas_dswap = 24; 69 private static final int RsBlas_dcopy = 25; 70 private static final int RsBlas_daxpy = 26; 71 private static final int RsBlas_cswap = 27; 72 private static final int RsBlas_ccopy = 28; 73 private static final int RsBlas_caxpy = 29; 74 private static final int RsBlas_zswap = 30; 75 private static final int RsBlas_zcopy = 31; 76 private static final int RsBlas_zaxpy = 32; 77 private static final int RsBlas_srotg = 33; 78 private static final int RsBlas_srotmg = 34; 79 private static final int RsBlas_srot = 35; 80 private static final int RsBlas_srotm = 36; 81 private static final int RsBlas_drotg = 37; 82 private static final int RsBlas_drotmg = 38; 83 private static final int RsBlas_drot = 39; 84 private static final int RsBlas_drotm = 40; 85 private static final int RsBlas_sscal = 41; 86 private static final int RsBlas_dscal = 42; 87 private static final int RsBlas_cscal = 43; 88 private static final int RsBlas_zscal = 44; 89 private static final int RsBlas_csscal = 45; 90 private static final int RsBlas_zdscal = 46; 91 private static final int RsBlas_sgemv = 47; 92 private static final int RsBlas_sgbmv = 48; 93 private static final int RsBlas_strmv = 49; 94 private static final int RsBlas_stbmv = 50; 95 private static final int RsBlas_stpmv = 51; 96 private static final int RsBlas_strsv = 52; 97 private static final int RsBlas_stbsv = 53; 98 private static final int RsBlas_stpsv = 54; 99 private static final int RsBlas_dgemv = 55; 100 private static final int RsBlas_dgbmv = 56; 101 private static final int RsBlas_dtrmv = 57; 102 private static final int RsBlas_dtbmv = 58; 103 private static final int RsBlas_dtpmv = 59; 104 private static final int RsBlas_dtrsv = 60; 105 private static final int RsBlas_dtbsv = 61; 106 private static final int RsBlas_dtpsv = 62; 107 private static final int RsBlas_cgemv = 63; 108 private static final int RsBlas_cgbmv = 64; 109 private static final int RsBlas_ctrmv = 65; 110 private static final int RsBlas_ctbmv = 66; 111 private static final int RsBlas_ctpmv = 67; 112 private static final int RsBlas_ctrsv = 68; 113 private static final int RsBlas_ctbsv = 69; 114 private static final int RsBlas_ctpsv = 70; 115 private static final int RsBlas_zgemv = 71; 116 private static final int RsBlas_zgbmv = 72; 117 private static final int RsBlas_ztrmv = 73; 118 private static final int RsBlas_ztbmv = 74; 119 private static final int RsBlas_ztpmv = 75; 120 private static final int RsBlas_ztrsv = 76; 121 private static final int RsBlas_ztbsv = 77; 122 private static final int RsBlas_ztpsv = 78; 123 private static final int RsBlas_ssymv = 79; 124 private static final int RsBlas_ssbmv = 80; 125 private static final int RsBlas_sspmv = 81; 126 private static final int RsBlas_sger = 82; 127 private static final int RsBlas_ssyr = 83; 128 private static final int RsBlas_sspr = 84; 129 private static final int RsBlas_ssyr2 = 85; 130 private static final int RsBlas_sspr2 = 86; 131 private static final int RsBlas_dsymv = 87; 132 private static final int RsBlas_dsbmv = 88; 133 private static final int RsBlas_dspmv = 89; 134 private static final int RsBlas_dger = 90; 135 private static final int RsBlas_dsyr = 91; 136 private static final int RsBlas_dspr = 92; 137 private static final int RsBlas_dsyr2 = 93; 138 private static final int RsBlas_dspr2 = 94; 139 private static final int RsBlas_chemv = 95; 140 private static final int RsBlas_chbmv = 96; 141 private static final int RsBlas_chpmv = 97; 142 private static final int RsBlas_cgeru = 98; 143 private static final int RsBlas_cgerc = 99; 144 private static final int RsBlas_cher = 100; 145 private static final int RsBlas_chpr = 101; 146 private static final int RsBlas_cher2 = 102; 147 private static final int RsBlas_chpr2 = 103; 148 private static final int RsBlas_zhemv = 104; 149 private static final int RsBlas_zhbmv = 105; 150 private static final int RsBlas_zhpmv = 106; 151 private static final int RsBlas_zgeru = 107; 152 private static final int RsBlas_zgerc = 108; 153 private static final int RsBlas_zher = 109; 154 private static final int RsBlas_zhpr = 110; 155 private static final int RsBlas_zher2 = 111; 156 private static final int RsBlas_zhpr2 = 112; 157 private static final int RsBlas_sgemm = 113; 158 private static final int RsBlas_ssymm = 114; 159 private static final int RsBlas_ssyrk = 115; 160 private static final int RsBlas_ssyr2k = 116; 161 private static final int RsBlas_strmm = 117; 162 private static final int RsBlas_strsm = 118; 163 private static final int RsBlas_dgemm = 119; 164 private static final int RsBlas_dsymm = 120; 165 private static final int RsBlas_dsyrk = 121; 166 private static final int RsBlas_dsyr2k = 122; 167 private static final int RsBlas_dtrmm = 123; 168 private static final int RsBlas_dtrsm = 124; 169 private static final int RsBlas_cgemm = 125; 170 private static final int RsBlas_csymm = 126; 171 private static final int RsBlas_csyrk = 127; 172 private static final int RsBlas_csyr2k = 128; 173 private static final int RsBlas_ctrmm = 129; 174 private static final int RsBlas_ctrsm = 130; 175 private static final int RsBlas_zgemm = 131; 176 private static final int RsBlas_zsymm = 132; 177 private static final int RsBlas_zsyrk = 133; 178 private static final int RsBlas_zsyr2k = 134; 179 private static final int RsBlas_ztrmm = 135; 180 private static final int RsBlas_ztrsm = 136; 181 private static final int RsBlas_chemm = 137; 182 private static final int RsBlas_cherk = 138; 183 private static final int RsBlas_cher2k = 139; 184 private static final int RsBlas_zhemm = 140; 185 private static final int RsBlas_zherk = 141; 186 private static final int RsBlas_zher2k = 142; 187 188 // BLAS extensions start here 189 private static final int RsBlas_bnnm = 1000; 190 191 /** 192 * Create an intrinsic to access BLAS subroutines. 193 * 194 * @param rs The RenderScript context 195 * @return ScriptIntrinsicBLAS 196 */ create(RenderScript rs)197 public static ScriptIntrinsicBLAS create(RenderScript rs) { 198 long id; 199 boolean mUseIncSupp = rs.isUseNative() && 200 android.os.Build.VERSION.SDK_INT < INTRINSIC_API_LEVEL; 201 202 id = rs.nScriptIntrinsicCreate(13, Element.U32(rs).getID(rs), mUseIncSupp); 203 ScriptIntrinsicBLAS si = new ScriptIntrinsicBLAS(id, rs); 204 si.setIncSupp(mUseIncSupp); 205 return si; 206 } 207 208 /** 209 * @hide 210 */ 211 @IntDef({NO_TRANSPOSE, TRANSPOSE, CONJ_TRANSPOSE}) 212 @Retention(RetentionPolicy.SOURCE) 213 public @interface Transpose {} 214 215 /** 216 * @hide 217 */ 218 @IntDef({UPPER, LOWER}) 219 @Retention(RetentionPolicy.SOURCE) 220 public @interface Uplo {} 221 222 /** 223 * @hide 224 */ 225 @IntDef({NON_UNIT, UNIT}) 226 @Retention(RetentionPolicy.SOURCE) 227 public @interface Diag {} 228 229 /** 230 * @hide 231 */ 232 @IntDef({LEFT, RIGHT}) 233 @Retention(RetentionPolicy.SOURCE) 234 public @interface Side {} 235 236 public static final int NO_TRANSPOSE = 111; 237 public static final int TRANSPOSE = 112; 238 public static final int CONJ_TRANSPOSE = 113; 239 240 public static final int UPPER = 121; 241 public static final int LOWER = 122; 242 243 public static final int NON_UNIT = 131; 244 public static final int UNIT = 132; 245 246 public static final int LEFT = 141; 247 public static final int RIGHT = 142; 248 249 static void validateSide(@Side int Side) { 250 if (Side != LEFT && Side != RIGHT) { 251 throw new RSRuntimeException("Invalid side passed to BLAS"); 252 } 253 } 254 255 static void validateTranspose(@Transpose int Trans) { 256 if (Trans != NO_TRANSPOSE && Trans != TRANSPOSE && 257 Trans != CONJ_TRANSPOSE) { 258 throw new RSRuntimeException("Invalid transpose passed to BLAS"); 259 } 260 } 261 262 static void validateConjTranspose(@Transpose int Trans) { 263 if (Trans != NO_TRANSPOSE && 264 Trans != CONJ_TRANSPOSE) { 265 throw new RSRuntimeException("Invalid transpose passed to BLAS"); 266 } 267 } 268 269 static void validateDiag(@Diag int Diag) { 270 if (Diag != NON_UNIT && Diag != UNIT) { 271 throw new RSRuntimeException("Invalid diag passed to BLAS"); 272 } 273 } 274 275 static void validateUplo(@Uplo int Uplo) { 276 if (Uplo != UPPER && Uplo != LOWER) { 277 throw new RSRuntimeException("Invalid uplo passed to BLAS"); 278 } 279 } 280 281 282 /** 283 * Level 2 BLAS 284 */ 285 286 static void validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY) { 287 validateTranspose(TransA); 288 int M = A.getType().getY(); 289 int N = A.getType().getX(); 290 if (!A.getType().getElement().isCompatible(e) || 291 !X.getType().getElement().isCompatible(e) || 292 !Y.getType().getElement().isCompatible(e)) { 293 throw new RSRuntimeException("Called BLAS with wrong Element type"); 294 } 295 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 296 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 297 } 298 299 if (incX <= 0 || incY <= 0) { 300 throw new RSRuntimeException("Vector increments must be greater than 0"); 301 } 302 int expectedXDim = -1, expectedYDim = -1; 303 if (TransA == NO_TRANSPOSE) { 304 expectedXDim = 1 + (N - 1) * incX; 305 expectedYDim = 1 + (M - 1) * incY; 306 } else { 307 expectedXDim = 1 + (M - 1) * incX; 308 expectedYDim = 1 + (N - 1) * incY; 309 } 310 if (X.getType().getX() != expectedXDim || 311 Y.getType().getX() != expectedYDim) { 312 throw new RSRuntimeException("Incorrect vector dimensions for GEMV"); 313 } 314 } 315 316 /** 317 * SGEMV performs one of the matrix-vector operations 318 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 319 * 320 * Details: http://www.netlib.org/lapack/explore-html/db/d58/sgemv_8f.html 321 * 322 * @param TransA The type of transpose applied to matrix A. 323 * @param alpha The scalar alpha. 324 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 325 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 326 * @param incX The increment for the elements of vector x, must be larger than zero. 327 * @param beta The scalar beta. 328 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 329 * @param incY The increment for the elements of vector y, must be larger than zero. 330 */ SGEMV(@ranspose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)331 public void SGEMV(@Transpose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 332 validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY); 333 int M = A.getType().getY(); 334 int N = A.getType().getX(); 335 336 boolean mUseIncSupp = isIncSupp(); 337 long aID = A.getID(mRS); 338 long xID = X.getID(mRS); 339 long yID = Y.getID(mRS); 340 if (mUseIncSupp) { 341 aID = getDummyAlloc(A); 342 xID = getDummyAlloc(X); 343 yID = getDummyAlloc(Y); 344 } 345 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 346 } 347 348 /** 349 * DGEMV performs one of the matrix-vector operations 350 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 351 * 352 * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html 353 * 354 * @param TransA The type of transpose applied to matrix A. 355 * @param alpha The scalar alpha. 356 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 357 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 358 * @param incX The increment for the elements of vector x, must be larger than zero. 359 * @param beta The scalar beta. 360 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 361 * @param incY The increment for the elements of vector y, must be larger than zero. 362 */ DGEMV(@ranspose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)363 public void DGEMV(@Transpose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 364 validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY); 365 int M = A.getType().getY(); 366 int N = A.getType().getX(); 367 368 boolean mUseIncSupp = isIncSupp(); 369 long aID = A.getID(mRS); 370 long xID = X.getID(mRS); 371 long yID = Y.getID(mRS); 372 if (mUseIncSupp) { 373 aID = getDummyAlloc(A); 374 xID = getDummyAlloc(X); 375 yID = getDummyAlloc(Y); 376 } 377 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 378 } 379 380 /** 381 * CGEMV performs one of the matrix-vector operations 382 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 383 * 384 * Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html 385 * 386 * @param TransA The type of transpose applied to matrix A. 387 * @param alpha The scalar alpha. 388 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 389 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 390 * @param incX The increment for the elements of vector x, must be larger than zero. 391 * @param beta The scalar beta. 392 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 393 * @param incY The increment for the elements of vector y, must be larger than zero. 394 */ CGEMV(@ranspose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)395 public void CGEMV(@Transpose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 396 validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY); 397 int M = A.getType().getY(); 398 int N = A.getType().getX(); 399 400 boolean mUseIncSupp = isIncSupp(); 401 long aID = A.getID(mRS); 402 long xID = X.getID(mRS); 403 long yID = Y.getID(mRS); 404 if (mUseIncSupp) { 405 aID = getDummyAlloc(A); 406 xID = getDummyAlloc(X); 407 yID = getDummyAlloc(Y); 408 } 409 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 410 } 411 412 /** 413 * ZGEMV performs one of the matrix-vector operations 414 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 415 * 416 * Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html 417 * 418 * @param TransA The type of transpose applied to matrix A. 419 * @param alpha The scalar alpha. 420 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 421 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 422 * @param incX The increment for the elements of vector x, must be larger than zero. 423 * @param beta The scalar beta. 424 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 425 * @param incY The increment for the elements of vector y, must be larger than zero. 426 */ ZGEMV(@ranspose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)427 public void ZGEMV(@Transpose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 428 validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY); 429 int M = A.getType().getY(); 430 int N = A.getType().getX(); 431 432 boolean mUseIncSupp = isIncSupp(); 433 long aID = A.getID(mRS); 434 long xID = X.getID(mRS); 435 long yID = Y.getID(mRS); 436 if (mUseIncSupp) { 437 aID = getDummyAlloc(A); 438 xID = getDummyAlloc(X); 439 yID = getDummyAlloc(Y); 440 } 441 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 442 } 443 444 /** 445 * SGBMV performs one of the matrix-vector operations 446 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 447 * 448 * Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html 449 * 450 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 451 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 452 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 453 * for i in range(0, m): 454 * for j in range(max(0, i-kl), min(i+ku+1, n)): 455 * b[i, j-i+kl] = a[i, j] 456 * 457 * @param TransA The type of transpose applied to matrix A. 458 * @param KL The number of sub-diagonals of the matrix A. 459 * @param KU The number of super-diagonals of the matrix A. 460 * @param alpha The scalar alpha. 461 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32}. 462 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 463 * @param incX The increment for the elements of vector x, must be larger than zero. 464 * @param beta The scalar beta. 465 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 466 * @param incY The increment for the elements of vector y, must be larger than zero. 467 */ SGBMV(@ranspose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)468 public void SGBMV(@Transpose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 469 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 470 validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY); 471 if (KL < 0 || KU < 0) { 472 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 473 } 474 int M = A.getType().getY(); 475 int N = A.getType().getX(); 476 477 boolean mUseIncSupp = isIncSupp(); 478 long aID = A.getID(mRS); 479 long xID = X.getID(mRS); 480 long yID = Y.getID(mRS); 481 if (mUseIncSupp) { 482 aID = getDummyAlloc(A); 483 xID = getDummyAlloc(X); 484 yID = getDummyAlloc(Y); 485 } 486 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, KL, KU, mUseIncSupp); 487 } 488 489 /** 490 * DGBMV performs one of the matrix-vector operations 491 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 492 * 493 * Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html 494 * 495 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 496 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 497 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 498 * for i in range(0, m): 499 * for j in range(max(0, i-kl), min(i+ku+1, n)): 500 * b[i, j-i+kl] = a[i, j] 501 * 502 * @param TransA The type of transpose applied to matrix A. 503 * @param KL The number of sub-diagonals of the matrix A. 504 * @param KU The number of super-diagonals of the matrix A. 505 * @param alpha The scalar alpha. 506 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64}. 507 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 508 * @param incX The increment for the elements of vector x, must be larger than zero. 509 * @param beta The scalar beta. 510 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 511 * @param incY The increment for the elements of vector y, must be larger than zero. 512 */ DGBMV(@ranspose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)513 public void DGBMV(@Transpose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 514 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 515 validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY); 516 if (KL < 0 || KU < 0) { 517 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 518 } 519 int M = A.getType().getY(); 520 int N = A.getType().getX(); 521 522 boolean mUseIncSupp = isIncSupp(); 523 long aID = A.getID(mRS); 524 long xID = X.getID(mRS); 525 long yID = Y.getID(mRS); 526 if (mUseIncSupp) { 527 aID = getDummyAlloc(A); 528 xID = getDummyAlloc(X); 529 yID = getDummyAlloc(Y); 530 } 531 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, KL, KU, mUseIncSupp); 532 } 533 534 /** 535 * CGBMV performs one of the matrix-vector operations 536 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 537 * 538 * Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html 539 * 540 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 541 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 542 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 543 * for i in range(0, m): 544 * for j in range(max(0, i-kl), min(i+ku+1, n)): 545 * b[i, j-i+kl] = a[i, j] 546 * 547 * @param TransA The type of transpose applied to matrix A. 548 * @param KL The number of sub-diagonals of the matrix A. 549 * @param KU The number of super-diagonals of the matrix A. 550 * @param alpha The scalar alpha. 551 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32_2}. 552 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 553 * @param incX The increment for the elements of vector x, must be larger than zero. 554 * @param beta The scalar beta. 555 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 556 * @param incY The increment for the elements of vector y, must be larger than zero. 557 */ CGBMV(@ranspose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)558 public void CGBMV(@Transpose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 559 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 560 validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY); 561 if (KL < 0 || KU < 0) { 562 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 563 } 564 int M = A.getType().getY(); 565 int N = A.getType().getX(); 566 567 boolean mUseIncSupp = isIncSupp(); 568 long aID = A.getID(mRS); 569 long xID = X.getID(mRS); 570 long yID = Y.getID(mRS); 571 if (mUseIncSupp) { 572 aID = getDummyAlloc(A); 573 xID = getDummyAlloc(X); 574 yID = getDummyAlloc(Y); 575 } 576 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, KL, KU, mUseIncSupp); 577 } 578 579 /** 580 * ZGBMV performs one of the matrix-vector operations 581 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 582 * 583 * Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html 584 * 585 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 586 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 587 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 588 * for i in range(0, m): 589 * for j in range(max(0, i-kl), min(i+ku+1, n)): 590 * b[i, j-i+kl] = a[i, j] 591 * 592 * @param TransA The type of transpose applied to matrix A. 593 * @param KL The number of sub-diagonals of the matrix A. 594 * @param KU The number of super-diagonals of the matrix A. 595 * @param alpha The scalar alpha. 596 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64_2}. 597 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 598 * @param incX The increment for the elements of vector x, must be larger than zero. 599 * @param beta The scalar beta. 600 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 601 * @param incY The increment for the elements of vector y, must be larger than zero. 602 */ ZGBMV(@ranspose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)603 public void ZGBMV(@Transpose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 604 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 605 validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY); 606 if (KL < 0 || KU < 0) { 607 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 608 } 609 int M = A.getType().getY(); 610 int N = A.getType().getX(); 611 612 boolean mUseIncSupp = isIncSupp(); 613 long aID = A.getID(mRS); 614 long xID = X.getID(mRS); 615 long yID = Y.getID(mRS); 616 if (mUseIncSupp) { 617 aID = getDummyAlloc(A); 618 xID = getDummyAlloc(X); 619 yID = getDummyAlloc(Y); 620 } 621 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, KL, KU, mUseIncSupp); 622 } 623 validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)624 static void validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 625 validateTranspose(TransA); 626 validateUplo(Uplo); 627 validateDiag(Diag); 628 int N = A.getType().getY(); 629 if (A.getType().getX() != N) { 630 throw new RSRuntimeException("A must be a square matrix for TRMV"); 631 } 632 if (!A.getType().getElement().isCompatible(e) || 633 !X.getType().getElement().isCompatible(e)) { 634 throw new RSRuntimeException("Called BLAS with wrong Element type"); 635 } 636 if (X.getType().getY() > 1) { 637 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 638 } 639 640 if (incX <= 0) { 641 throw new RSRuntimeException("Vector increments must be greater than 0"); 642 } 643 int expectedXDim = 1 + (N - 1) * incX; 644 if (X.getType().getX() != expectedXDim) { 645 throw new RSRuntimeException("Incorrect vector dimensions for TRMV"); 646 } 647 } 648 validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)649 static int validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 650 validateTranspose(TransA); 651 validateUplo(Uplo); 652 validateDiag(Diag); 653 if (!Ap.getType().getElement().isCompatible(e) || 654 !X.getType().getElement().isCompatible(e)) { 655 throw new RSRuntimeException("Called BLAS with wrong Element type"); 656 } 657 if (X.getType().getY() > 1) { 658 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 659 } 660 661 if (Ap.getType().getY() > 1) { 662 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 663 } 664 665 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 666 //is it really doing anything? 667 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 668 throw new RSRuntimeException("Invalid dimension for Ap"); 669 } 670 if (incX <= 0) { 671 throw new RSRuntimeException("Vector increments must be greater than 0"); 672 } 673 int expectedXDim = 1 + (N - 1) * incX; 674 if (X.getType().getX() != expectedXDim) { 675 throw new RSRuntimeException("Incorrect vector dimensions for TPMV"); 676 } 677 678 return N; 679 } 680 681 /** 682 * STRMV performs one of the matrix-vector operations 683 * x := A*x or x := A**T*x 684 * 685 * Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html 686 * 687 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 688 * @param TransA The type of transpose applied to matrix A. 689 * @param Diag Specifies whether or not A is unit triangular. 690 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 691 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 692 * @param incX The increment for the elements of vector x, must be larger than zero. 693 */ STRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)694 public void STRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 695 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 696 int N = A.getType().getY(); 697 698 boolean mUseIncSupp = isIncSupp(); 699 long aID = A.getID(mRS); 700 long xID = X.getID(mRS); 701 if (mUseIncSupp) { 702 aID = getDummyAlloc(A); 703 xID = getDummyAlloc(X); 704 } 705 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 706 } 707 708 /** 709 * DTRMV performs one of the matrix-vector operations 710 * x := A*x or x := A**T*x 711 * 712 * Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html 713 * 714 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 715 * @param TransA The type of transpose applied to matrix A. 716 * @param Diag Specifies whether or not A is unit triangular. 717 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 718 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 719 * @param incX The increment for the elements of vector x, must be larger than zero. 720 */ DTRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)721 public void DTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 722 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 723 int N = A.getType().getY(); 724 725 boolean mUseIncSupp = isIncSupp(); 726 long aID = A.getID(mRS); 727 long xID = X.getID(mRS); 728 if (mUseIncSupp) { 729 aID = getDummyAlloc(A); 730 xID = getDummyAlloc(X); 731 } 732 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 733 } 734 735 /** 736 * CTRMV performs one of the matrix-vector operations 737 * x := A*x or x := A**T*x or x := A**H*x 738 * 739 * Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html 740 * 741 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 742 * @param TransA The type of transpose applied to matrix A. 743 * @param Diag Specifies whether or not A is unit triangular. 744 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 745 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 746 * @param incX The increment for the elements of vector x, must be larger than zero. 747 */ CTRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)748 public void CTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 749 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 750 int N = A.getType().getY(); 751 752 boolean mUseIncSupp = isIncSupp(); 753 long aID = A.getID(mRS); 754 long xID = X.getID(mRS); 755 if (mUseIncSupp) { 756 aID = getDummyAlloc(A); 757 xID = getDummyAlloc(X); 758 } 759 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 760 } 761 762 /** 763 * ZTRMV performs one of the matrix-vector operations 764 * x := A*x or x := A**T*x or x := A**H*x 765 * 766 * Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html 767 * 768 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 769 * @param TransA The type of transpose applied to matrix A. 770 * @param Diag Specifies whether or not A is unit triangular. 771 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 772 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 773 * @param incX The increment for the elements of vector x, must be larger than zero. 774 */ ZTRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)775 public void ZTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 776 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 777 int N = A.getType().getY(); 778 779 boolean mUseIncSupp = isIncSupp(); 780 long aID = A.getID(mRS); 781 long xID = X.getID(mRS); 782 if (mUseIncSupp) { 783 aID = getDummyAlloc(A); 784 xID = getDummyAlloc(X); 785 } 786 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 787 } 788 789 /** 790 * STBMV performs one of the matrix-vector operations 791 * x := A*x or x := A**T*x 792 * 793 * Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html 794 * 795 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 796 * but only the region N*(K+1) will be referenced. The following subroutine can is an 797 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 798 * for i in range(0, n): 799 * for j in range(i, min(i+k+1, n)): 800 * b[i, j-i] = a[i, j] 801 * 802 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 803 * @param TransA The type of transpose applied to matrix A. 804 * @param Diag Specifies whether or not A is unit triangular. 805 * @param K The number of off-diagonals of the matrix A 806 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 807 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 808 * @param incX The increment for the elements of vector x, must be larger than zero. 809 */ STBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)810 public void STBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 811 // TBMV has the same requirements as TRMV + K >= 0 812 if (K < 0) { 813 throw new RSRuntimeException("K must be greater than or equal to 0"); 814 } 815 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 816 int N = A.getType().getY(); 817 818 boolean mUseIncSupp = isIncSupp(); 819 long aID = A.getID(mRS); 820 long xID = X.getID(mRS); 821 if (mUseIncSupp) { 822 aID = getDummyAlloc(A); 823 xID = getDummyAlloc(X); 824 } 825 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 826 } 827 828 /** 829 * DTBMV performs one of the matrix-vector operations 830 * x := A*x or x := A**T*x 831 * 832 * Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html 833 * 834 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 835 * but only the region N*(K+1) will be referenced. The following subroutine can is an 836 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 837 * for i in range(0, n): 838 * for j in range(i, min(i+k+1, n)): 839 * b[i, j-i] = a[i, j] 840 * 841 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 842 * @param TransA The type of transpose applied to matrix A. 843 * @param Diag Specifies whether or not A is unit triangular. 844 * @param K The number of off-diagonals of the matrix A 845 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 846 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 847 * @param incX The increment for the elements of vector x, must be larger than zero. 848 */ DTBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)849 public void DTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 850 // TBMV has the same requirements as TRMV + K >= 0 851 if (K < 0) { 852 throw new RSRuntimeException("K must be greater than or equal to 0"); 853 } 854 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 855 int N = A.getType().getY(); 856 857 boolean mUseIncSupp = isIncSupp(); 858 long aID = A.getID(mRS); 859 long xID = X.getID(mRS); 860 if (mUseIncSupp) { 861 aID = getDummyAlloc(A); 862 xID = getDummyAlloc(X); 863 } 864 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 865 } 866 867 /** 868 * CTBMV performs one of the matrix-vector operations 869 * x := A*x or x := A**T*x or x := A**H*x 870 * 871 * Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html 872 * 873 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 874 * but only the region N*(K+1) will be referenced. The following subroutine can is an 875 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 876 * for i in range(0, n): 877 * for j in range(i, min(i+k+1, n)): 878 * b[i, j-i] = a[i, j] 879 * 880 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 881 * @param TransA The type of transpose applied to matrix A. 882 * @param Diag Specifies whether or not A is unit triangular. 883 * @param K The number of off-diagonals of the matrix A 884 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 885 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 886 * @param incX The increment for the elements of vector x, must be larger than zero. 887 */ CTBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)888 public void CTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 889 // TBMV has the same requirements as TRMV + K >= 0 890 if (K < 0) { 891 throw new RSRuntimeException("K must be greater than or equal to 0"); 892 } 893 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 894 int N = A.getType().getY(); 895 896 boolean mUseIncSupp = isIncSupp(); 897 long aID = A.getID(mRS); 898 long xID = X.getID(mRS); 899 if (mUseIncSupp) { 900 aID = getDummyAlloc(A); 901 xID = getDummyAlloc(X); 902 } 903 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 904 } 905 906 /** 907 * ZTBMV performs one of the matrix-vector operations 908 * x := A*x or x := A**T*x or x := A**H*x 909 * 910 * Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html 911 * 912 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 913 * but only the region N*(K+1) will be referenced. The following subroutine can is an 914 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 915 * for i in range(0, n): 916 * for j in range(i, min(i+k+1, n)): 917 * b[i, j-i] = a[i, j] 918 * 919 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 920 * @param TransA The type of transpose applied to matrix A. 921 * @param Diag Specifies whether or not A is unit triangular. 922 * @param K The number of off-diagonals of the matrix A 923 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 924 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 925 * @param incX The increment for the elements of vector x, must be larger than zero. 926 */ ZTBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)927 public void ZTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 928 // TBMV has the same requirements as TRMV + K >= 0 929 if (K < 0) { 930 throw new RSRuntimeException("K must be greater than or equal to 0"); 931 } 932 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 933 int N = A.getType().getY(); 934 935 boolean mUseIncSupp = isIncSupp(); 936 long aID = A.getID(mRS); 937 long xID = X.getID(mRS); 938 if (mUseIncSupp) { 939 aID = getDummyAlloc(A); 940 xID = getDummyAlloc(X); 941 } 942 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 943 } 944 945 /** 946 * STPMV performs one of the matrix-vector operations 947 * x := A*x or x := A**T*x 948 * 949 * Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html 950 * 951 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 952 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 953 * 'a' to packed matrix 'b'. 954 * k = 0 955 * for i in range(0, n): 956 * for j in range(i, n): 957 * b[k++] = a[i, j] 958 * 959 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 960 * @param TransA The type of transpose applied to matrix A. 961 * @param Diag Specifies whether or not A is unit triangular. 962 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}. 963 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 964 * @param incX The increment for the elements of vector x, must be larger than zero. 965 */ STPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)966 public void STPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 967 int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX); 968 969 boolean mUseIncSupp = isIncSupp(); 970 long apID = Ap.getID(mRS); 971 long xID = X.getID(mRS); 972 if (mUseIncSupp) { 973 apID = getDummyAlloc(Ap); 974 xID = getDummyAlloc(X); 975 } 976 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 977 } 978 979 /** 980 * DTPMV performs one of the matrix-vector operations 981 * x := A*x or x := A**T*x 982 * 983 * Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html 984 * 985 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 986 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 987 * 'a' to packed matrix 'b'. 988 * k = 0 989 * for i in range(0, n): 990 * for j in range(i, n): 991 * b[k++] = a[i, j] 992 * 993 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 994 * @param TransA The type of transpose applied to matrix A. 995 * @param Diag Specifies whether or not A is unit triangular. 996 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}. 997 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 998 * @param incX The increment for the elements of vector x, must be larger than zero. 999 */ DTPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1000 public void DTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1001 int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX); 1002 1003 boolean mUseIncSupp = isIncSupp(); 1004 long apID = Ap.getID(mRS); 1005 long xID = X.getID(mRS); 1006 if (mUseIncSupp) { 1007 apID = getDummyAlloc(Ap); 1008 xID = getDummyAlloc(X); 1009 } 1010 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1011 } 1012 1013 /** 1014 * CTPMV performs one of the matrix-vector operations 1015 * x := A*x or x := A**T*x or x := A**H*x 1016 * 1017 * Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html 1018 * 1019 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1020 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1021 * 'a' to packed matrix 'b'. 1022 * k = 0 1023 * for i in range(0, n): 1024 * for j in range(i, n): 1025 * b[k++] = a[i, j] 1026 * 1027 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1028 * @param TransA The type of transpose applied to matrix A. 1029 * @param Diag Specifies whether or not A is unit triangular. 1030 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}. 1031 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1032 * @param incX The increment for the elements of vector x, must be larger than zero. 1033 */ CTPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1034 public void CTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1035 int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 1036 1037 boolean mUseIncSupp = isIncSupp(); 1038 long apID = Ap.getID(mRS); 1039 long xID = X.getID(mRS); 1040 if (mUseIncSupp) { 1041 apID = getDummyAlloc(Ap); 1042 xID = getDummyAlloc(X); 1043 } 1044 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1045 } 1046 1047 /** 1048 * ZTPMV performs one of the matrix-vector operations 1049 * x := A*x or x := A**T*x or x := A**H*x 1050 * 1051 * Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html 1052 * 1053 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1054 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1055 * 'a' to packed matrix 'b'. 1056 * k = 0 1057 * for i in range(0, n): 1058 * for j in range(i, n): 1059 * b[k++] = a[i, j] 1060 * 1061 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1062 * @param TransA The type of transpose applied to matrix A. 1063 * @param Diag Specifies whether or not A is unit triangular. 1064 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}. 1065 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 1066 * @param incX The increment for the elements of vector x, must be larger than zero. 1067 */ ZTPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1068 public void ZTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1069 int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 1070 1071 boolean mUseIncSupp = isIncSupp(); 1072 long apID = Ap.getID(mRS); 1073 long xID = X.getID(mRS); 1074 if (mUseIncSupp) { 1075 apID = getDummyAlloc(Ap); 1076 xID = getDummyAlloc(X); 1077 } 1078 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1079 } 1080 1081 /** 1082 * STRSV solves one of the systems of equations 1083 * A*x = b or A**T*x = b 1084 * 1085 * Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html 1086 * 1087 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1088 * @param TransA The type of transpose applied to matrix A. 1089 * @param Diag Specifies whether or not A is unit triangular. 1090 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1091 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1092 * @param incX The increment for the elements of vector x, must be larger than zero. 1093 */ STRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)1094 public void STRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 1095 // TRSV is the same as TRMV 1096 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 1097 int N = A.getType().getY(); 1098 1099 boolean mUseIncSupp = isIncSupp(); 1100 long aID = A.getID(mRS); 1101 long xID = X.getID(mRS); 1102 if (mUseIncSupp) { 1103 aID = getDummyAlloc(A); 1104 xID = getDummyAlloc(X); 1105 } 1106 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1107 1108 } 1109 1110 /** 1111 * DTRSV solves one of the systems of equations 1112 * A*x = b or A**T*x = b 1113 * 1114 * Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html 1115 * 1116 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1117 * @param TransA The type of transpose applied to matrix A. 1118 * @param Diag Specifies whether or not A is unit triangular. 1119 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1120 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1121 * @param incX The increment for the elements of vector x, must be larger than zero. 1122 */ DTRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)1123 public void DTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 1124 // TRSV is the same as TRMV 1125 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 1126 int N = A.getType().getY(); 1127 1128 boolean mUseIncSupp = isIncSupp(); 1129 long aID = A.getID(mRS); 1130 long xID = X.getID(mRS); 1131 if (mUseIncSupp) { 1132 aID = getDummyAlloc(A); 1133 xID = getDummyAlloc(X); 1134 } 1135 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1136 1137 } 1138 1139 /** 1140 * CTRSV solves one of the systems of equations 1141 * A*x = b or A**T*x = b or A**H*x = b 1142 * 1143 * Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html 1144 * 1145 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1146 * @param TransA The type of transpose applied to matrix A. 1147 * @param Diag Specifies whether or not A is unit triangular. 1148 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1149 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1150 * @param incX The increment for the elements of vector x, must be larger than zero. 1151 */ CTRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)1152 public void CTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 1153 // TRSV is the same as TRMV 1154 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 1155 int N = A.getType().getY(); 1156 1157 boolean mUseIncSupp = isIncSupp(); 1158 long aID = A.getID(mRS); 1159 long xID = X.getID(mRS); 1160 if (mUseIncSupp) { 1161 aID = getDummyAlloc(A); 1162 xID = getDummyAlloc(X); 1163 } 1164 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1165 1166 } 1167 1168 /** 1169 * ZTRSV solves one of the systems of equations 1170 * A*x = b or A**T*x = b or A**H*x = b 1171 * 1172 * Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html 1173 * 1174 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1175 * @param TransA The type of transpose applied to matrix A. 1176 * @param Diag Specifies whether or not A is unit triangular. 1177 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 1178 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 1179 * @param incX The increment for the elements of vector x, must be larger than zero. 1180 */ ZTRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)1181 public void ZTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 1182 // TRSV is the same as TRMV 1183 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 1184 int N = A.getType().getY(); 1185 1186 boolean mUseIncSupp = isIncSupp(); 1187 long aID = A.getID(mRS); 1188 long xID = X.getID(mRS); 1189 if (mUseIncSupp) { 1190 aID = getDummyAlloc(A); 1191 xID = getDummyAlloc(X); 1192 } 1193 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1194 1195 } 1196 1197 /** 1198 * STBSV solves one of the systems of equations 1199 * A*x = b or A**T*x = b 1200 * 1201 * Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html 1202 * 1203 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1204 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1205 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1206 * for i in range(0, n): 1207 * for j in range(i, min(i+k+1, n)): 1208 * b[i, j-i] = a[i, j] 1209 * 1210 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1211 * @param TransA The type of transpose applied to matrix A. 1212 * @param Diag Specifies whether or not A is unit triangular. 1213 * @param K The number of off-diagonals of the matrix A 1214 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1215 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1216 * @param incX The increment for the elements of vector x, must be larger than zero. 1217 */ STBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1218 public void STBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1219 // TBSV is the same as TRMV + K >= 0 1220 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 1221 int N = A.getType().getY(); 1222 if (K < 0) { 1223 throw new RSRuntimeException("Number of diagonals must be positive"); 1224 } 1225 1226 boolean mUseIncSupp = isIncSupp(); 1227 long aID = A.getID(mRS); 1228 long xID = X.getID(mRS); 1229 if (mUseIncSupp) { 1230 aID = getDummyAlloc(A); 1231 xID = getDummyAlloc(X); 1232 } 1233 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1234 } 1235 1236 /** 1237 * DTBSV solves one of the systems of equations 1238 * A*x = b or A**T*x = b 1239 * 1240 * Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html 1241 * 1242 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1243 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1244 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1245 * for i in range(0, n): 1246 * for j in range(i, min(i+k+1, n)): 1247 * b[i, j-i] = a[i, j] 1248 * 1249 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1250 * @param TransA The type of transpose applied to matrix A. 1251 * @param Diag Specifies whether or not A is unit triangular. 1252 * @param K The number of off-diagonals of the matrix A 1253 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1254 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1255 * @param incX The increment for the elements of vector x, must be larger than zero. 1256 */ DTBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1257 public void DTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1258 // TBSV is the same as TRMV + K >= 0 1259 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 1260 int N = A.getType().getY(); 1261 if (K < 0) { 1262 throw new RSRuntimeException("Number of diagonals must be positive"); 1263 } 1264 1265 boolean mUseIncSupp = isIncSupp(); 1266 long aID = A.getID(mRS); 1267 long xID = X.getID(mRS); 1268 if (mUseIncSupp) { 1269 aID = getDummyAlloc(A); 1270 xID = getDummyAlloc(X); 1271 } 1272 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1273 } 1274 1275 /** 1276 * CTBSV solves one of the systems of equations 1277 * A*x = b or A**T*x = b or A**H*x = b 1278 * 1279 * Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html 1280 * 1281 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1282 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1283 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1284 * for i in range(0, n): 1285 * for j in range(i, min(i+k+1, n)): 1286 * b[i, j-i] = a[i, j] 1287 * 1288 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1289 * @param TransA The type of transpose applied to matrix A. 1290 * @param Diag Specifies whether or not A is unit triangular. 1291 * @param K The number of off-diagonals of the matrix A 1292 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1293 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1294 * @param incX The increment for the elements of vector x, must be larger than zero. 1295 */ CTBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1296 public void CTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1297 // TBSV is the same as TRMV + K >= 0 1298 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 1299 int N = A.getType().getY(); 1300 if (K < 0) { 1301 throw new RSRuntimeException("Number of diagonals must be positive"); 1302 } 1303 1304 boolean mUseIncSupp = isIncSupp(); 1305 long aID = A.getID(mRS); 1306 long xID = X.getID(mRS); 1307 if (mUseIncSupp) { 1308 aID = getDummyAlloc(A); 1309 xID = getDummyAlloc(X); 1310 } 1311 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1312 } 1313 1314 /** 1315 * ZTBSV solves one of the systems of equations 1316 * A*x = b or A**T*x = b or A**H*x = b 1317 * 1318 * Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html 1319 * 1320 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1321 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1322 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1323 * for i in range(0, n): 1324 * for j in range(i, min(i+k+1, n)): 1325 * b[i, j-i] = a[i, j] 1326 * 1327 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1328 * @param TransA The type of transpose applied to matrix A. 1329 * @param Diag Specifies whether or not A is unit triangular. 1330 * @param K The number of off-diagonals of the matrix A 1331 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 1332 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 1333 * @param incX The increment for the elements of vector x, must be larger than zero. 1334 */ ZTBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1335 public void ZTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1336 // TBSV is the same as TRMV + K >= 0 1337 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 1338 int N = A.getType().getY(); 1339 if (K < 0) { 1340 throw new RSRuntimeException("Number of diagonals must be positive"); 1341 } 1342 1343 boolean mUseIncSupp = isIncSupp(); 1344 long aID = A.getID(mRS); 1345 long xID = X.getID(mRS); 1346 if (mUseIncSupp) { 1347 aID = getDummyAlloc(A); 1348 xID = getDummyAlloc(X); 1349 } 1350 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1351 } 1352 1353 /** 1354 * STPSV solves one of the systems of equations 1355 * A*x = b or A**T*x = b 1356 * 1357 * Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html 1358 * 1359 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1360 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1361 * 'a' to packed matrix 'b'. 1362 * k = 0 1363 * for i in range(0, n): 1364 * for j in range(i, n): 1365 * b[k++] = a[i, j] 1366 * 1367 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1368 * @param TransA The type of transpose applied to matrix A. 1369 * @param Diag Specifies whether or not A is unit triangular. 1370 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}. 1371 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1372 * @param incX The increment for the elements of vector x, must be larger than zero. 1373 */ STPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1374 public void STPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1375 // TPSV is same as TPMV 1376 int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX); 1377 1378 boolean mUseIncSupp = isIncSupp(); 1379 long apID = Ap.getID(mRS); 1380 long xID = X.getID(mRS); 1381 if (mUseIncSupp) { 1382 apID = getDummyAlloc(Ap); 1383 xID = getDummyAlloc(X); 1384 } 1385 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1386 } 1387 1388 /** 1389 * DTPSV solves one of the systems of equations 1390 * A*x = b or A**T*x = b 1391 * 1392 * Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html 1393 * 1394 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1395 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1396 * 'a' to packed matrix 'b'. 1397 * k = 0 1398 * for i in range(0, n): 1399 * for j in range(i, n): 1400 * b[k++] = a[i, j] 1401 * 1402 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1403 * @param TransA The type of transpose applied to matrix A. 1404 * @param Diag Specifies whether or not A is unit triangular. 1405 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}. 1406 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1407 * @param incX The increment for the elements of vector x, must be larger than zero. 1408 */ DTPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1409 public void DTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1410 // TPSV is same as TPMV 1411 int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX); 1412 1413 boolean mUseIncSupp = isIncSupp(); 1414 long apID = Ap.getID(mRS); 1415 long xID = X.getID(mRS); 1416 if (mUseIncSupp) { 1417 apID = getDummyAlloc(Ap); 1418 xID = getDummyAlloc(X); 1419 } 1420 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1421 } 1422 1423 /** 1424 * CTPSV solves one of the systems of equations 1425 * A*x = b or A**T*x = b or A**H*x = b 1426 * 1427 * Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html 1428 * 1429 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1430 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1431 * 'a' to packed matrix 'b'. 1432 * k = 0 1433 * for i in range(0, n): 1434 * for j in range(i, n): 1435 * b[k++] = a[i, j] 1436 * 1437 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1438 * @param TransA The type of transpose applied to matrix A. 1439 * @param Diag Specifies whether or not A is unit triangular. 1440 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}. 1441 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1442 * @param incX The increment for the elements of vector x, must be larger than zero. 1443 */ CTPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1444 public void CTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1445 // TPSV is same as TPMV 1446 int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 1447 1448 boolean mUseIncSupp = isIncSupp(); 1449 long apID = Ap.getID(mRS); 1450 long xID = X.getID(mRS); 1451 if (mUseIncSupp) { 1452 apID = getDummyAlloc(Ap); 1453 xID = getDummyAlloc(X); 1454 } 1455 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1456 } 1457 1458 /** 1459 * ZTPSV solves one of the systems of equations 1460 * A*x = b or A**T*x = b or A**H*x = b 1461 * 1462 * Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html 1463 * 1464 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1465 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1466 * 'a' to packed matrix 'b'. 1467 * k = 0 1468 * for i in range(0, n): 1469 * for j in range(i, n): 1470 * b[k++] = a[i, j] 1471 * 1472 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1473 * @param TransA The type of transpose applied to matrix A. 1474 * @param Diag Specifies whether or not A is unit triangular. 1475 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}. 1476 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 1477 * @param incX The increment for the elements of vector x, must be larger than zero. 1478 */ ZTPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1479 public void ZTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1480 // TPSV is same as TPMV 1481 int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 1482 1483 boolean mUseIncSupp = isIncSupp(); 1484 long apID = Ap.getID(mRS); 1485 long xID = X.getID(mRS); 1486 if (mUseIncSupp) { 1487 apID = getDummyAlloc(Ap); 1488 xID = getDummyAlloc(X); 1489 } 1490 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1491 } 1492 1493 /** 1494 * Level 2, S and D only 1495 */ validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY)1496 static int validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY) { 1497 validateUplo(Uplo); 1498 int N = A.getType().getY(); 1499 if (A.getType().getX() != N) { 1500 throw new RSRuntimeException("A must be a square matrix for SYMV"); 1501 } 1502 if (!A.getType().getElement().isCompatible(e) || 1503 !X.getType().getElement().isCompatible(e) || 1504 !Y.getType().getElement().isCompatible(e) ) { 1505 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1506 } 1507 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1508 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1509 } 1510 1511 if (incX <= 0 || incY <= 0) { 1512 throw new RSRuntimeException("Vector increments must be greater than 0"); 1513 } 1514 int expectedXDim = 1 + (N - 1) * incX; 1515 if (X.getType().getX() != expectedXDim) { 1516 throw new RSRuntimeException("Incorrect vector dimensions for SYMV"); 1517 } 1518 int expectedYDim = 1 + (N - 1) * incY; 1519 if (Y.getType().getX() != expectedYDim) { 1520 throw new RSRuntimeException("Incorrect vector dimensions for SYMV"); 1521 } 1522 return N; 1523 } validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY)1524 static int validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY) { 1525 validateUplo(Uplo); 1526 if (!Ap.getType().getElement().isCompatible(e) || 1527 !X.getType().getElement().isCompatible(e) || 1528 !Y.getType().getElement().isCompatible(e)) { 1529 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1530 } 1531 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1532 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1533 } 1534 1535 if (Ap.getType().getY() > 1) { 1536 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 1537 } 1538 1539 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 1540 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 1541 throw new RSRuntimeException("Invalid dimension for Ap"); 1542 } 1543 if (incX <= 0 || incY <= 0) { 1544 throw new RSRuntimeException("Vector increments must be greater than 0"); 1545 } 1546 int expectedXDim = 1 + (N - 1) * incX; 1547 if (X.getType().getX() != expectedXDim) { 1548 throw new RSRuntimeException("Incorrect vector dimensions for SPMV"); 1549 } 1550 int expectedYDim = 1 + (N - 1) * incY; 1551 if (Y.getType().getX() != expectedYDim) { 1552 throw new RSRuntimeException("Incorrect vector dimensions for SPMV"); 1553 } 1554 1555 return N; 1556 } validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A)1557 static void validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1558 if (!A.getType().getElement().isCompatible(e) || 1559 !X.getType().getElement().isCompatible(e) || 1560 !Y.getType().getElement().isCompatible(e) ) { 1561 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1562 } 1563 1564 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1565 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1566 } 1567 1568 int M = A.getType().getY(); 1569 int N = A.getType().getX(); 1570 1571 if (N < 1 || M < 1) { 1572 throw new RSRuntimeException("M and N must be 1 or greater for GER"); 1573 } 1574 if (incX <= 0 || incY <= 0) { 1575 throw new RSRuntimeException("Vector increments must be greater than 0"); 1576 } 1577 int expectedXDim = 1 + (M - 1) * incX; 1578 if (X.getType().getX() != expectedXDim) { 1579 throw new RSRuntimeException("Incorrect vector dimensions for GER"); 1580 } 1581 int expectedYDim = 1 + (N - 1) * incY; 1582 if (Y.getType().getX() != expectedYDim) { 1583 throw new RSRuntimeException("Incorrect vector dimensions for GER"); 1584 } 1585 1586 1587 } validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A)1588 static int validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A) { 1589 validateUplo(Uplo); 1590 if (!A.getType().getElement().isCompatible(e) || 1591 !X.getType().getElement().isCompatible(e)) { 1592 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1593 } 1594 1595 int N = A.getType().getX(); 1596 1597 if (X.getType().getY() > 1) { 1598 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1599 } 1600 if (N != A.getType().getY()) { 1601 throw new RSRuntimeException("A must be a symmetric matrix"); 1602 } 1603 if (incX <= 0) { 1604 throw new RSRuntimeException("Vector increments must be greater than 0"); 1605 } 1606 int expectedXDim = 1 + (N - 1) * incX; 1607 if (X.getType().getX() != expectedXDim) { 1608 throw new RSRuntimeException("Incorrect vector dimensions for SYR"); 1609 } 1610 return N; 1611 } validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap)1612 static int validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap) { 1613 validateUplo(Uplo); 1614 if (!Ap.getType().getElement().isCompatible(e) || 1615 !X.getType().getElement().isCompatible(e)) { 1616 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1617 } 1618 if (X.getType().getY() > 1) { 1619 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1620 } 1621 1622 if (Ap.getType().getY() > 1) { 1623 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 1624 } 1625 1626 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 1627 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 1628 throw new RSRuntimeException("Invalid dimension for Ap"); 1629 } 1630 if (incX <= 0) { 1631 throw new RSRuntimeException("Vector increments must be greater than 0"); 1632 } 1633 int expectedXDim = 1 + (N - 1) * incX; 1634 if (X.getType().getX() != expectedXDim) { 1635 throw new RSRuntimeException("Incorrect vector dimensions for SPR"); 1636 } 1637 1638 return N; 1639 } 1640 validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A)1641 static int validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1642 validateUplo(Uplo); 1643 if (!A.getType().getElement().isCompatible(e) || 1644 !X.getType().getElement().isCompatible(e) || 1645 !Y.getType().getElement().isCompatible(e)) { 1646 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1647 } 1648 1649 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1650 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1651 } 1652 1653 int N = A.getType().getX(); 1654 1655 if (N != A.getType().getY()) { 1656 throw new RSRuntimeException("A must be a symmetric matrix"); 1657 } 1658 if (incX <= 0 || incY <= 0) { 1659 throw new RSRuntimeException("Vector increments must be greater than 0"); 1660 } 1661 int expectedXDim = 1 + (N - 1) * incX; 1662 int expectedYDim = 1 + (N - 1) * incY; 1663 if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) { 1664 throw new RSRuntimeException("Incorrect vector dimensions for SYR"); 1665 } 1666 return N; 1667 1668 } validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)1669 static int validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 1670 validateUplo(Uplo); 1671 if (!Ap.getType().getElement().isCompatible(e) || 1672 !X.getType().getElement().isCompatible(e) || 1673 !Y.getType().getElement().isCompatible(e)) { 1674 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1675 } 1676 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1677 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1678 } 1679 1680 if (Ap.getType().getY() > 1) { 1681 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 1682 } 1683 1684 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 1685 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 1686 throw new RSRuntimeException("Invalid dimension for Ap"); 1687 } 1688 if (incX <= 0 || incY <= 0) { 1689 throw new RSRuntimeException("Vector increments must be greater than 0"); 1690 } 1691 int expectedXDim = 1 + (N - 1) * incX; 1692 int expectedYDim = 1 + (N - 1) * incY; 1693 if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) { 1694 throw new RSRuntimeException("Incorrect vector dimensions for SPR2"); 1695 } 1696 1697 return N; 1698 } 1699 1700 /** 1701 * SSYMV performs the matrix-vector operation 1702 * y := alpha*A*x + beta*y 1703 * 1704 * Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html 1705 * 1706 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1707 * @param alpha The scalar alpha. 1708 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1709 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1710 * @param incX The increment for the elements of vector x, must be larger than zero. 1711 * @param beta The scalar beta. 1712 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1713 * @param incY The increment for the elements of vector y, must be larger than zero. 1714 */ SSYMV(@plo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)1715 public void SSYMV(@Uplo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 1716 int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY); 1717 1718 boolean mUseIncSupp = isIncSupp(); 1719 long aID = A.getID(mRS); 1720 long xID = X.getID(mRS); 1721 long yID = Y.getID(mRS); 1722 if (mUseIncSupp) { 1723 aID = getDummyAlloc(A); 1724 xID = getDummyAlloc(X); 1725 yID = getDummyAlloc(Y); 1726 } 1727 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 1728 } 1729 1730 /** 1731 * SSBMV performs the matrix-vector operation 1732 * y := alpha*A*x + beta*y 1733 * 1734 * Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html 1735 * 1736 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1737 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1738 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1739 * for i in range(0, n): 1740 * for j in range(i, min(i+k+1, n)): 1741 * b[i, j-i] = a[i, j] 1742 * 1743 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 1744 * @param K The number of off-diagonals of the matrix A 1745 * @param alpha The scalar alpha. 1746 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1747 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1748 * @param incX The increment for the elements of vector x, must be larger than zero. 1749 * @param beta The scalar beta. 1750 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1751 * @param incY The increment for the elements of vector y, must be larger than zero. 1752 */ SSBMV(@plo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)1753 public void SSBMV(@Uplo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 1754 // SBMV is the same as SYMV + K >= 0 1755 if (K < 0) { 1756 throw new RSRuntimeException("K must be greater than or equal to 0"); 1757 } 1758 int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY); 1759 1760 boolean mUseIncSupp = isIncSupp(); 1761 long aID = A.getID(mRS); 1762 long xID = X.getID(mRS); 1763 long yID = Y.getID(mRS); 1764 if (mUseIncSupp) { 1765 aID = getDummyAlloc(A); 1766 xID = getDummyAlloc(X); 1767 yID = getDummyAlloc(Y); 1768 } 1769 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 1770 } 1771 1772 /** 1773 * SSPMV performs the matrix-vector operation 1774 * y := alpha*A*x + beta*y 1775 * 1776 * Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html 1777 * 1778 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1779 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1780 * 'a' to packed matrix 'b'. 1781 * k = 0 1782 * for i in range(0, n): 1783 * for j in range(i, n): 1784 * b[k++] = a[i, j] 1785 * 1786 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 1787 * @param alpha The scalar alpha. 1788 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. 1789 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1790 * @param incX The increment for the elements of vector x, must be larger than zero. 1791 * @param beta The scalar beta. 1792 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1793 * @param incY The increment for the elements of vector y, must be larger than zero. 1794 */ SSPMV(@plo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY)1795 public void SSPMV(@Uplo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY) { 1796 int N = validateSPMV(Element.F32(mRS), Uplo, Ap, X, incX, Y, incY); 1797 1798 boolean mUseIncSupp = isIncSupp(); 1799 long apID = Ap.getID(mRS); 1800 long xID = X.getID(mRS); 1801 long yID = Y.getID(mRS); 1802 if (mUseIncSupp) { 1803 apID = getDummyAlloc(Ap); 1804 xID = getDummyAlloc(X); 1805 yID = getDummyAlloc(Y); 1806 } 1807 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, apID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 1808 } 1809 1810 /** 1811 * SGER performs the rank 1 operation 1812 * A := alpha*x*y**T + A 1813 * 1814 * Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html 1815 * 1816 * @param alpha The scalar alpha. 1817 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1818 * @param incX The increment for the elements of vector x, must be larger than zero. 1819 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1820 * @param incY The increment for the elements of vector y, must be larger than zero. 1821 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1822 */ SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)1823 public void SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1824 int M = A.getType().getY(); 1825 int N = A.getType().getX(); 1826 validateGER(Element.F32(mRS), X, incX, Y, incY, A); 1827 1828 boolean mUseIncSupp = isIncSupp(); 1829 long aID = A.getID(mRS); 1830 long xID = X.getID(mRS); 1831 long yID = Y.getID(mRS); 1832 if (mUseIncSupp) { 1833 aID = getDummyAlloc(A); 1834 xID = getDummyAlloc(X); 1835 yID = getDummyAlloc(Y); 1836 } 1837 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sger, 0, 0, 0, 0, 0, M, N, 0, alpha, xID, yID, 0.f, aID, incX, incY, 0, 0, mUseIncSupp); 1838 } 1839 1840 /** 1841 * SSYR performs the rank 1 operation 1842 * A := alpha*x*x**T + A 1843 * 1844 * Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html 1845 * 1846 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1847 * @param alpha The scalar alpha. 1848 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1849 * @param incX The increment for the elements of vector x, must be larger than zero. 1850 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1851 */ SSYR(@plo int Uplo, float alpha, Allocation X, int incX, Allocation A)1852 public void SSYR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) { 1853 int N = validateSYR(Element.F32(mRS), Uplo, X, incX, A); 1854 1855 boolean mUseIncSupp = isIncSupp(); 1856 long aID = A.getID(mRS); 1857 long xID = X.getID(mRS); 1858 if (mUseIncSupp) { 1859 aID = getDummyAlloc(A); 1860 xID = getDummyAlloc(X); 1861 } 1862 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, aID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp); 1863 } 1864 1865 /** 1866 * SSPR performs the rank 1 operation 1867 * A := alpha*x*x**T + A 1868 * 1869 * Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html 1870 * 1871 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1872 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1873 * 'a' to packed matrix 'b'. 1874 * k = 0 1875 * for i in range(0, n): 1876 * for j in range(i, n): 1877 * b[k++] = a[i, j] 1878 * 1879 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 1880 * @param alpha The scalar alpha. 1881 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1882 * @param incX The increment for the elements of vector x, must be larger than zero. 1883 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. 1884 */ SSPR(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Ap)1885 public void SSPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) { 1886 int N = validateSPR(Element.F32(mRS), Uplo, X, incX, Ap); 1887 1888 boolean mUseIncSupp = isIncSupp(); 1889 long apID = Ap.getID(mRS); 1890 long xID = X.getID(mRS); 1891 if (mUseIncSupp) { 1892 apID = getDummyAlloc(Ap); 1893 xID = getDummyAlloc(X); 1894 } 1895 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, apID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp); 1896 } 1897 1898 /** 1899 * SSYR2 performs the symmetric rank 2 operation 1900 * A := alpha*x*y**T + alpha*y*x**T + A 1901 * 1902 * Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html 1903 * 1904 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1905 * @param alpha The scalar alpha. 1906 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1907 * @param incX The increment for the elements of vector x, must be larger than zero. 1908 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1909 * @param incY The increment for the elements of vector y, must be larger than zero. 1910 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1911 */ SSYR2(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)1912 public void SSYR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1913 int N = validateSYR2(Element.F32(mRS), Uplo, X, incX, Y, incY, A); 1914 1915 boolean mUseIncSupp = isIncSupp(); 1916 long aID = A.getID(mRS); 1917 long xID = X.getID(mRS); 1918 long yID = Y.getID(mRS); 1919 if (mUseIncSupp) { 1920 aID = getDummyAlloc(A); 1921 xID = getDummyAlloc(X); 1922 yID = getDummyAlloc(Y); 1923 } 1924 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, aID, incX, incY, 0, 0, mUseIncSupp); 1925 } 1926 1927 /** 1928 * SSPR2 performs the symmetric rank 2 operation 1929 * A := alpha*x*y**T + alpha*y*x**T + A 1930 * 1931 * Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html 1932 * 1933 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1934 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1935 * 'a' to packed matrix 'b'. 1936 * k = 0 1937 * for i in range(0, n): 1938 * for j in range(i, n): 1939 * b[k++] = a[i, j] 1940 * 1941 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 1942 * @param alpha The scalar alpha. 1943 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1944 * @param incX The increment for the elements of vector x, must be larger than zero. 1945 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1946 * @param incY The increment for the elements of vector y, must be larger than zero. 1947 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. 1948 */ SSPR2(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)1949 public void SSPR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 1950 int N = validateSPR2(Element.F32(mRS), Uplo, X, incX, Y, incY, Ap); 1951 1952 boolean mUseIncSupp = isIncSupp(); 1953 long apID = Ap.getID(mRS); 1954 long xID = X.getID(mRS); 1955 long yID = Y.getID(mRS); 1956 if (mUseIncSupp) { 1957 apID = getDummyAlloc(Ap); 1958 xID = getDummyAlloc(X); 1959 yID = getDummyAlloc(Y); 1960 } 1961 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, apID, incX, incY, 0, 0, mUseIncSupp); 1962 } 1963 1964 /** 1965 * DSYMV performs the matrix-vector operation 1966 * y := alpha*A*x + beta*y 1967 * 1968 * Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html 1969 * 1970 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1971 * @param alpha The scalar alpha. 1972 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1973 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1974 * @param incX The increment for the elements of vector x, must be larger than zero. 1975 * @param beta The scalar beta. 1976 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 1977 * @param incY The increment for the elements of vector y, must be larger than zero. 1978 */ DSYMV(@plo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)1979 public void DSYMV(@Uplo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 1980 int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY); 1981 1982 boolean mUseIncSupp = isIncSupp(); 1983 long aID = A.getID(mRS); 1984 long xID = X.getID(mRS); 1985 long yID = Y.getID(mRS); 1986 if (mUseIncSupp) { 1987 aID = getDummyAlloc(A); 1988 xID = getDummyAlloc(X); 1989 yID = getDummyAlloc(Y); 1990 } 1991 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 1992 } 1993 1994 /** 1995 * DSBMV performs the matrix-vector operation 1996 * y := alpha*A*x + beta*y 1997 * 1998 * Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html 1999 * 2000 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 2001 * but only the region N*(K+1) will be referenced. The following subroutine can is an 2002 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 2003 * for i in range(0, n): 2004 * for j in range(i, min(i+k+1, n)): 2005 * b[i, j-i] = a[i, j] 2006 * 2007 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 2008 * @param K The number of off-diagonals of the matrix A 2009 * @param alpha The scalar alpha. 2010 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2011 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2012 * @param incX The increment for the elements of vector x, must be larger than zero. 2013 * @param beta The scalar beta. 2014 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 2015 * @param incY The increment for the elements of vector y, must be larger than zero. 2016 */ DSBMV(@plo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)2017 public void DSBMV(@Uplo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 2018 // SBMV is the same as SYMV + K >= 0 2019 if (K < 0) { 2020 throw new RSRuntimeException("K must be greater than or equal to 0"); 2021 } 2022 int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY); 2023 2024 boolean mUseIncSupp = isIncSupp(); 2025 long aID = A.getID(mRS); 2026 long xID = X.getID(mRS); 2027 long yID = Y.getID(mRS); 2028 if (mUseIncSupp) { 2029 aID = getDummyAlloc(A); 2030 xID = getDummyAlloc(X); 2031 yID = getDummyAlloc(Y); 2032 } 2033 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 2034 } 2035 2036 /** 2037 * DSPMV performs the matrix-vector operation 2038 * y := alpha*A*x + beta*y 2039 * 2040 * Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html 2041 * 2042 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2043 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2044 * 'a' to packed matrix 'b'. 2045 * k = 0 2046 * for i in range(0, n): 2047 * for j in range(i, n): 2048 * b[k++] = a[i, j] 2049 * 2050 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 2051 * @param alpha The scalar alpha. 2052 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. 2053 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2054 * @param incX The increment for the elements of vector x, must be larger than zero. 2055 * @param beta The scalar beta. 2056 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 2057 * @param incY The increment for the elements of vector y, must be larger than zero. 2058 */ DSPMV(@plo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY)2059 public void DSPMV(@Uplo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY) { 2060 int N = validateSPMV(Element.F64(mRS), Uplo, Ap, X, incX, Y, incY); 2061 2062 boolean mUseIncSupp = isIncSupp(); 2063 long apID = Ap.getID(mRS); 2064 long xID = X.getID(mRS); 2065 long yID = Y.getID(mRS); 2066 if (mUseIncSupp) { 2067 apID = getDummyAlloc(Ap); 2068 xID = getDummyAlloc(X); 2069 yID = getDummyAlloc(Y); 2070 } 2071 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, apID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 2072 } 2073 2074 /** 2075 * DGER performs the rank 1 operation 2076 * A := alpha*x*y**T + A 2077 * 2078 * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html 2079 * 2080 * @param alpha The scalar alpha. 2081 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2082 * @param incX The increment for the elements of vector x, must be larger than zero. 2083 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 2084 * @param incY The increment for the elements of vector y, must be larger than zero. 2085 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2086 */ DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2087 public void DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2088 int M = A.getType().getY(); 2089 int N = A.getType().getX(); 2090 validateGER(Element.F64(mRS), X, incX, Y, incY, A); 2091 2092 boolean mUseIncSupp = isIncSupp(); 2093 long aID = A.getID(mRS); 2094 long xID = X.getID(mRS); 2095 long yID = Y.getID(mRS); 2096 if (mUseIncSupp) { 2097 aID = getDummyAlloc(A); 2098 xID = getDummyAlloc(X); 2099 yID = getDummyAlloc(Y); 2100 } 2101 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dger, 0, 0, 0, 0, 0, M, N, 0, alpha, xID, yID, 0.f, aID, incX, incY, 0, 0, mUseIncSupp); 2102 } 2103 2104 /** 2105 * DSYR performs the rank 1 operation 2106 * A := alpha*x*x**T + A 2107 * 2108 * Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html 2109 * 2110 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2111 * @param alpha The scalar alpha. 2112 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2113 * @param incX The increment for the elements of vector x, must be larger than zero. 2114 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2115 */ DSYR(@plo int Uplo, double alpha, Allocation X, int incX, Allocation A)2116 public void DSYR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) { 2117 int N = validateSYR(Element.F64(mRS), Uplo, X, incX, A); 2118 2119 boolean mUseIncSupp = isIncSupp(); 2120 long aID = A.getID(mRS); 2121 long xID = X.getID(mRS); 2122 if (mUseIncSupp) { 2123 aID = getDummyAlloc(A); 2124 xID = getDummyAlloc(X); 2125 } 2126 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, aID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp); 2127 } 2128 2129 /** 2130 * DSPR performs the rank 1 operation 2131 * A := alpha*x*x**T + A 2132 * 2133 * Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html 2134 * 2135 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2136 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2137 * 'a' to packed matrix 'b'. 2138 * k = 0 2139 * for i in range(0, n): 2140 * for j in range(i, n): 2141 * b[k++] = a[i, j] 2142 * 2143 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2144 * @param alpha The scalar alpha. 2145 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2146 * @param incX The increment for the elements of vector x, must be larger than zero. 2147 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. 2148 */ DSPR(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Ap)2149 public void DSPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) { 2150 int N = validateSPR(Element.F64(mRS), Uplo, X, incX, Ap); 2151 2152 boolean mUseIncSupp = isIncSupp(); 2153 long apID = Ap.getID(mRS); 2154 long xID = X.getID(mRS); 2155 if (mUseIncSupp) { 2156 apID = getDummyAlloc(Ap); 2157 xID = getDummyAlloc(X); 2158 } 2159 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, apID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp); 2160 } 2161 2162 /** 2163 * DSYR2 performs the symmetric rank 2 operation 2164 * A := alpha*x*y**T + alpha*y*x**T + A 2165 * 2166 * Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html 2167 * 2168 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2169 * @param alpha The scalar alpha. 2170 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2171 * @param incX The increment for the elements of vector x, must be larger than zero. 2172 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 2173 * @param incY The increment for the elements of vector y, must be larger than zero. 2174 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2175 */ DSYR2(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2176 public void DSYR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2177 int N = validateSYR2(Element.F64(mRS), Uplo, X, incX, Y, incY, A); 2178 2179 boolean mUseIncSupp = isIncSupp(); 2180 long aID = A.getID(mRS); 2181 long xID = X.getID(mRS); 2182 long yID = Y.getID(mRS); 2183 if (mUseIncSupp) { 2184 aID = getDummyAlloc(A); 2185 xID = getDummyAlloc(X); 2186 yID = getDummyAlloc(Y); 2187 } 2188 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2189 } 2190 2191 /** 2192 * DSPR2 performs the symmetric rank 2 operation 2193 * A := alpha*x*y**T + alpha*y*x**T + A 2194 * 2195 * Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html 2196 * 2197 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2198 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2199 * 'a' to packed matrix 'b'. 2200 * k = 0 2201 * for i in range(0, n): 2202 * for j in range(i, n): 2203 * b[k++] = a[i, j] 2204 * 2205 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2206 * @param alpha The scalar alpha. 2207 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2208 * @param incX The increment for the elements of vector x, must be larger than zero. 2209 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 2210 * @param incY The increment for the elements of vector y, must be larger than zero. 2211 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. 2212 */ DSPR2(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)2213 public void DSPR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 2214 int N = validateSPR2(Element.F64(mRS), Uplo, X, incX, Y, incY, Ap); 2215 2216 boolean mUseIncSupp = isIncSupp(); 2217 long apID = Ap.getID(mRS); 2218 long xID = X.getID(mRS); 2219 long yID = Y.getID(mRS); 2220 if (mUseIncSupp) { 2221 apID = getDummyAlloc(Ap); 2222 xID = getDummyAlloc(X); 2223 yID = getDummyAlloc(Y); 2224 } 2225 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, apID, incX, incY, 0, 0, mUseIncSupp); 2226 } 2227 2228 2229 /** 2230 * Level 2, C and Z only 2231 */ 2232 validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A)2233 static void validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2234 if (!A.getType().getElement().isCompatible(e) || 2235 !X.getType().getElement().isCompatible(e) || 2236 !Y.getType().getElement().isCompatible(e)) { 2237 throw new RSRuntimeException("Called BLAS with wrong Element type"); 2238 } 2239 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 2240 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 2241 } 2242 2243 int M = A.getType().getY(); 2244 int N = A.getType().getX(); 2245 if (incX <= 0 || incY <= 0) { 2246 throw new RSRuntimeException("Vector increments must be greater than 0"); 2247 } 2248 int expectedXDim = 1 + (M - 1) * incX; 2249 if (X.getType().getX() != expectedXDim) { 2250 throw new RSRuntimeException("Incorrect vector dimensions for GERU"); 2251 } 2252 int expectedYDim = 1 + (N - 1) * incY; 2253 if (Y.getType().getX() != expectedYDim) { 2254 throw new RSRuntimeException("Incorrect vector dimensions for GERU"); 2255 } 2256 2257 } 2258 2259 /** 2260 * CHEMV performs the matrix-vector operation 2261 * y := alpha*A*x + beta*y 2262 * 2263 * Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html 2264 * 2265 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2266 * @param alpha The scalar alpha. 2267 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2268 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2269 * @param incX The increment for the elements of vector x, must be larger than zero. 2270 * @param beta The scalar beta. 2271 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2272 * @param incY The increment for the elements of vector y, must be larger than zero. 2273 */ CHEMV(@plo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)2274 public void CHEMV(@Uplo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 2275 // HEMV is the same as SYR2 validation-wise 2276 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); 2277 2278 boolean mUseIncSupp = isIncSupp(); 2279 long aID = A.getID(mRS); 2280 long xID = X.getID(mRS); 2281 long yID = Y.getID(mRS); 2282 if (mUseIncSupp) { 2283 aID = getDummyAlloc(A); 2284 xID = getDummyAlloc(X); 2285 yID = getDummyAlloc(Y); 2286 } 2287 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2288 } 2289 2290 /** 2291 * CHBMV performs the matrix-vector operation 2292 * y := alpha*A*x + beta*y 2293 * 2294 * Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html 2295 * 2296 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 2297 * but only the region N*(K+1) will be referenced. The following subroutine can is an 2298 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 2299 * for i in range(0, n): 2300 * for j in range(i, min(i+k+1, n)): 2301 * b[i, j-i] = a[i, j] 2302 * 2303 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 2304 * @param K The number of off-diagonals of the matrix A 2305 * @param alpha The scalar alpha. 2306 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2307 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2308 * @param incX The increment for the elements of vector x, must be larger than zero. 2309 * @param beta The scalar beta. 2310 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2311 * @param incY The increment for the elements of vector y, must be larger than zero. 2312 */ CHBMV(@plo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)2313 public void CHBMV(@Uplo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 2314 // HBMV is the same as SYR2 validation-wise 2315 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); 2316 if (K < 0) { 2317 throw new RSRuntimeException("K must be 0 or greater for HBMV"); 2318 } 2319 2320 boolean mUseIncSupp = isIncSupp(); 2321 long aID = A.getID(mRS); 2322 long xID = X.getID(mRS); 2323 long yID = Y.getID(mRS); 2324 if (mUseIncSupp) { 2325 aID = getDummyAlloc(A); 2326 xID = getDummyAlloc(X); 2327 yID = getDummyAlloc(Y); 2328 } 2329 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2330 } 2331 2332 /** 2333 * CHPMV performs the matrix-vector operation 2334 * y := alpha*A*x + beta*y 2335 * 2336 * Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html 2337 * 2338 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2339 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2340 * 'a' to packed matrix 'b'. 2341 * k = 0 2342 * for i in range(0, n): 2343 * for j in range(i, n): 2344 * b[k++] = a[i, j] 2345 * 2346 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 2347 * @param alpha The scalar alpha. 2348 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2349 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2350 * @param incX The increment for the elements of vector x, must be larger than zero. 2351 * @param beta The scalar beta. 2352 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2353 * @param incY The increment for the elements of vector y, must be larger than zero. 2354 */ CHPMV(@plo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY)2355 public void CHPMV(@Uplo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 2356 // HPMV is the same as SPR2 2357 int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap); 2358 2359 boolean mUseIncSupp = isIncSupp(); 2360 long apID = Ap.getID(mRS); 2361 long xID = X.getID(mRS); 2362 long yID = Y.getID(mRS); 2363 if (mUseIncSupp) { 2364 apID = getDummyAlloc(Ap); 2365 xID = getDummyAlloc(X); 2366 yID = getDummyAlloc(Y); 2367 } 2368 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, apID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2369 } 2370 2371 /** 2372 * CGERU performs the rank 1 operation 2373 * A := alpha*x*y**T + A 2374 * 2375 * Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html 2376 * 2377 * @param alpha The scalar alpha. 2378 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2379 * @param incX The increment for the elements of vector x, must be larger than zero. 2380 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2381 * @param incY The increment for the elements of vector y, must be larger than zero. 2382 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2383 */ CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2384 public void CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2385 validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A); 2386 int M = A.getType().getY(); 2387 int N = A.getType().getX(); 2388 2389 boolean mUseIncSupp = isIncSupp(); 2390 long aID = A.getID(mRS); 2391 long xID = X.getID(mRS); 2392 long yID = Y.getID(mRS); 2393 if (mUseIncSupp) { 2394 aID = getDummyAlloc(A); 2395 xID = getDummyAlloc(X); 2396 yID = getDummyAlloc(Y); 2397 } 2398 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2399 } 2400 2401 /** 2402 * CGERC performs the rank 1 operation 2403 * A := alpha*x*y**H + A 2404 * 2405 * Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html 2406 * 2407 * @param alpha The scalar alpha. 2408 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2409 * @param incX The increment for the elements of vector x, must be larger than zero. 2410 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2411 * @param incY The increment for the elements of vector y, must be larger than zero. 2412 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2413 */ CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2414 public void CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2415 // same as GERU 2416 validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A); 2417 int M = A.getType().getY(); 2418 int N = A.getType().getX(); 2419 2420 boolean mUseIncSupp = isIncSupp(); 2421 long aID = A.getID(mRS); 2422 long xID = X.getID(mRS); 2423 long yID = Y.getID(mRS); 2424 if (mUseIncSupp) { 2425 aID = getDummyAlloc(A); 2426 xID = getDummyAlloc(X); 2427 yID = getDummyAlloc(Y); 2428 } 2429 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2430 } 2431 2432 /** 2433 * CHER performs the rank 1 operation 2434 * A := alpha*x*x**H + A 2435 * 2436 * Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html 2437 * 2438 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2439 * @param alpha The scalar alpha. 2440 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2441 * @param incX The increment for the elements of vector x, must be larger than zero. 2442 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2443 */ CHER(@plo int Uplo, float alpha, Allocation X, int incX, Allocation A)2444 public void CHER(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) { 2445 // same as SYR 2446 int N = validateSYR(Element.F32_2(mRS), Uplo, X, incX, A); 2447 2448 boolean mUseIncSupp = isIncSupp(); 2449 long aID = A.getID(mRS); 2450 long xID = X.getID(mRS); 2451 if (mUseIncSupp) { 2452 aID = getDummyAlloc(A); 2453 xID = getDummyAlloc(X); 2454 } 2455 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, aID, incX, 0, 0, 0, mUseIncSupp); 2456 } 2457 2458 /** 2459 * CHPR performs the rank 1 operation 2460 * A := alpha*x*x**H + A 2461 * 2462 * Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html 2463 * 2464 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2465 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2466 * 'a' to packed matrix 'b'. 2467 * k = 0 2468 * for i in range(0, n): 2469 * for j in range(i, n): 2470 * b[k++] = a[i, j] 2471 * 2472 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2473 * @param alpha The scalar alpha. 2474 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2475 * @param incX The increment for the elements of vector x, must be larger than zero. 2476 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2477 */ CHPR(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Ap)2478 public void CHPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) { 2479 // equivalent to SPR for validation 2480 int N = validateSPR(Element.F32_2(mRS), Uplo, X, incX, Ap); 2481 2482 boolean mUseIncSupp = isIncSupp(); 2483 long apID = Ap.getID(mRS); 2484 long xID = X.getID(mRS); 2485 if (mUseIncSupp) { 2486 apID = getDummyAlloc(Ap); 2487 xID = getDummyAlloc(X); 2488 } 2489 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, apID, incX, 0, 0, 0, mUseIncSupp); 2490 } 2491 2492 /** 2493 * CHER2 performs the symmetric rank 2 operation 2494 * A := alpha*x*y**H + alpha*y*x**H + A 2495 * 2496 * Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html 2497 * 2498 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2499 * @param alpha The scalar alpha. 2500 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2501 * @param incX The increment for the elements of vector x, must be larger than zero. 2502 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2503 * @param incY The increment for the elements of vector y, must be larger than zero. 2504 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2505 */ CHER2(@plo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2506 public void CHER2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2507 // same as SYR2 2508 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); 2509 2510 boolean mUseIncSupp = isIncSupp(); 2511 long aID = A.getID(mRS); 2512 long xID = X.getID(mRS); 2513 long yID = Y.getID(mRS); 2514 if (mUseIncSupp) { 2515 aID = getDummyAlloc(A); 2516 xID = getDummyAlloc(X); 2517 yID = getDummyAlloc(Y); 2518 } 2519 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2520 } 2521 2522 /** 2523 * CHPR2 performs the symmetric rank 2 operation 2524 * A := alpha*x*y**H + alpha*y*x**H + A 2525 * 2526 * Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html 2527 * 2528 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2529 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2530 * 'a' to packed matrix 'b'. 2531 * k = 0 2532 * for i in range(0, n): 2533 * for j in range(i, n): 2534 * b[k++] = a[i, j] 2535 * 2536 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2537 * @param alpha The scalar alpha. 2538 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2539 * @param incX The increment for the elements of vector x, must be larger than zero. 2540 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2541 * @param incY The increment for the elements of vector y, must be larger than zero. 2542 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2543 */ CHPR2(@plo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)2544 public void CHPR2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 2545 // same as SPR2 2546 int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap); 2547 2548 boolean mUseIncSupp = isIncSupp(); 2549 long apID = Ap.getID(mRS); 2550 long xID = X.getID(mRS); 2551 long yID = Y.getID(mRS); 2552 if (mUseIncSupp) { 2553 apID = getDummyAlloc(Ap); 2554 xID = getDummyAlloc(X); 2555 yID = getDummyAlloc(Y); 2556 } 2557 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, apID, incX, incY, 0, 0, mUseIncSupp); 2558 } 2559 2560 /** 2561 * ZHEMV performs the matrix-vector operation 2562 * y := alpha*A*x + beta*y 2563 * 2564 * Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html 2565 * 2566 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2567 * @param alpha The scalar alpha. 2568 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2569 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2570 * @param incX The increment for the elements of vector x, must be larger than zero. 2571 * @param beta The scalar beta. 2572 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2573 * @param incY The increment for the elements of vector y, must be larger than zero. 2574 */ ZHEMV(@plo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)2575 public void ZHEMV(@Uplo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 2576 // HEMV is the same as SYR2 validation-wise 2577 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); 2578 2579 boolean mUseIncSupp = isIncSupp(); 2580 long aID = A.getID(mRS); 2581 long xID = X.getID(mRS); 2582 long yID = Y.getID(mRS); 2583 if (mUseIncSupp) { 2584 aID = getDummyAlloc(A); 2585 xID = getDummyAlloc(X); 2586 yID = getDummyAlloc(Y); 2587 } 2588 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2589 } 2590 2591 /** 2592 * ZHBMV performs the matrix-vector operation 2593 * y := alpha*A*x + beta*y 2594 * 2595 * Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html 2596 * 2597 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 2598 * but only the region N*(K+1) will be referenced. The following subroutine can is an 2599 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 2600 * for i in range(0, n): 2601 * for j in range(i, min(i+k+1, n)): 2602 * b[i, j-i] = a[i, j] 2603 * 2604 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 2605 * @param K The number of off-diagonals of the matrix A 2606 * @param alpha The scalar alpha. 2607 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2608 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2609 * @param incX The increment for the elements of vector x, must be larger than zero. 2610 * @param beta The scalar beta. 2611 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2612 * @param incY The increment for the elements of vector y, must be larger than zero. 2613 */ ZHBMV(@plo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)2614 public void ZHBMV(@Uplo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 2615 // HBMV is the same as SYR2 validation-wise 2616 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); 2617 if (K < 0) { 2618 throw new RSRuntimeException("K must be 0 or greater for HBMV"); 2619 } 2620 2621 boolean mUseIncSupp = isIncSupp(); 2622 long aID = A.getID(mRS); 2623 long xID = X.getID(mRS); 2624 long yID = Y.getID(mRS); 2625 if (mUseIncSupp) { 2626 aID = getDummyAlloc(A); 2627 xID = getDummyAlloc(X); 2628 yID = getDummyAlloc(Y); 2629 } 2630 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2631 } 2632 2633 /** 2634 * ZHPMV performs the matrix-vector operation 2635 * y := alpha*A*x + beta*y 2636 * 2637 * Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html 2638 * 2639 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2640 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2641 * 'a' to packed matrix 'b'. 2642 * k = 0 2643 * for i in range(0, n): 2644 * for j in range(i, n): 2645 * b[k++] = a[i, j] 2646 * 2647 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 2648 * @param alpha The scalar alpha. 2649 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2650 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2651 * @param incX The increment for the elements of vector x, must be larger than zero. 2652 * @param beta The scalar beta. 2653 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2654 * @param incY The increment for the elements of vector y, must be larger than zero. 2655 */ ZHPMV(@plo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY)2656 public void ZHPMV(@Uplo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 2657 // HPMV is the same as SPR2 2658 int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap); 2659 2660 boolean mUseIncSupp = isIncSupp(); 2661 long apID = Ap.getID(mRS); 2662 long xID = X.getID(mRS); 2663 long yID = Y.getID(mRS); 2664 if (mUseIncSupp) { 2665 apID = getDummyAlloc(Ap); 2666 xID = getDummyAlloc(X); 2667 yID = getDummyAlloc(Y); 2668 } 2669 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, apID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2670 } 2671 2672 /** 2673 * ZGERU performs the rank 1 operation 2674 * A := alpha*x*y**T + A 2675 * 2676 * Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html 2677 * 2678 * @param alpha The scalar alpha. 2679 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2680 * @param incX The increment for the elements of vector x, must be larger than zero. 2681 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2682 * @param incY The increment for the elements of vector y, must be larger than zero. 2683 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2684 */ ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2685 public void ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2686 validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A); 2687 int M = A.getType().getY(); 2688 int N = A.getType().getX(); 2689 2690 boolean mUseIncSupp = isIncSupp(); 2691 long aID = A.getID(mRS); 2692 long xID = X.getID(mRS); 2693 long yID = Y.getID(mRS); 2694 if (mUseIncSupp) { 2695 aID = getDummyAlloc(A); 2696 xID = getDummyAlloc(X); 2697 yID = getDummyAlloc(Y); 2698 } 2699 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2700 } 2701 2702 /** 2703 * ZGERC performs the rank 1 operation 2704 * A := alpha*x*y**H + A 2705 * 2706 * Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html 2707 * 2708 * @param alpha The scalar alpha. 2709 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2710 * @param incX The increment for the elements of vector x, must be larger than zero. 2711 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2712 * @param incY The increment for the elements of vector y, must be larger than zero. 2713 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2714 */ ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2715 public void ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2716 // same as GERU 2717 validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A); 2718 int M = A.getType().getY(); 2719 int N = A.getType().getX(); 2720 2721 boolean mUseIncSupp = isIncSupp(); 2722 long aID = A.getID(mRS); 2723 long xID = X.getID(mRS); 2724 long yID = Y.getID(mRS); 2725 if (mUseIncSupp) { 2726 aID = getDummyAlloc(A); 2727 xID = getDummyAlloc(X); 2728 yID = getDummyAlloc(Y); 2729 } 2730 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2731 } 2732 2733 /** 2734 * ZHER performs the rank 1 operation 2735 * A := alpha*x*x**H + A 2736 * 2737 * Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html 2738 * 2739 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2740 * @param alpha The scalar alpha. 2741 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2742 * @param incX The increment for the elements of vector x, must be larger than zero. 2743 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2744 */ ZHER(@plo int Uplo, double alpha, Allocation X, int incX, Allocation A)2745 public void ZHER(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) { 2746 // same as SYR 2747 int N = validateSYR(Element.F64_2(mRS), Uplo, X, incX, A); 2748 2749 boolean mUseIncSupp = isIncSupp(); 2750 long aID = A.getID(mRS); 2751 long xID = X.getID(mRS); 2752 if (mUseIncSupp) { 2753 aID = getDummyAlloc(A); 2754 xID = getDummyAlloc(X); 2755 } 2756 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, aID, incX, 0, 0, 0, mUseIncSupp); 2757 } 2758 2759 /** 2760 * ZHPR performs the rank 1 operation 2761 * A := alpha*x*x**H + A 2762 * 2763 * Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html 2764 * 2765 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2766 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2767 * 'a' to packed matrix 'b'. 2768 * k = 0 2769 * for i in range(0, n): 2770 * for j in range(i, n): 2771 * b[k++] = a[i, j] 2772 * 2773 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2774 * @param alpha The scalar alpha. 2775 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2776 * @param incX The increment for the elements of vector x, must be larger than zero. 2777 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2778 */ ZHPR(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Ap)2779 public void ZHPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) { 2780 // equivalent to SPR for validation 2781 int N = validateSPR(Element.F64_2(mRS), Uplo, X, incX, Ap); 2782 2783 boolean mUseIncSupp = isIncSupp(); 2784 long apID = Ap.getID(mRS); 2785 long xID = X.getID(mRS); 2786 if (mUseIncSupp) { 2787 apID = getDummyAlloc(Ap); 2788 xID = getDummyAlloc(X); 2789 } 2790 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, apID, incX, 0, 0, 0, mUseIncSupp); 2791 } 2792 2793 /** 2794 * ZHER2 performs the symmetric rank 2 operation 2795 * A := alpha*x*y**H + alpha*y*x**H + A 2796 * 2797 * Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html 2798 * 2799 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2800 * @param alpha The scalar alpha. 2801 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2802 * @param incX The increment for the elements of vector x, must be larger than zero. 2803 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2804 * @param incY The increment for the elements of vector y, must be larger than zero. 2805 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2806 */ ZHER2(@plo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2807 public void ZHER2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2808 // same as SYR2 2809 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); 2810 2811 boolean mUseIncSupp = isIncSupp(); 2812 long aID = A.getID(mRS); 2813 long xID = X.getID(mRS); 2814 long yID = Y.getID(mRS); 2815 if (mUseIncSupp) { 2816 aID = getDummyAlloc(A); 2817 xID = getDummyAlloc(X); 2818 yID = getDummyAlloc(Y); 2819 } 2820 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2821 } 2822 2823 /** 2824 * ZHPR2 performs the symmetric rank 2 operation 2825 * A := alpha*x*y**H + alpha*y*x**H + A 2826 * 2827 * Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html 2828 * 2829 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2830 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2831 * 'a' to packed matrix 'b'. 2832 * k = 0 2833 * for i in range(0, n): 2834 * for j in range(i, n): 2835 * b[k++] = a[i, j] 2836 * 2837 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2838 * @param alpha The scalar alpha. 2839 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2840 * @param incX The increment for the elements of vector x, must be larger than zero. 2841 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2842 * @param incY The increment for the elements of vector y, must be larger than zero. 2843 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2844 */ ZHPR2(@plo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)2845 public void ZHPR2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 2846 // same as SPR2 2847 int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap); 2848 2849 boolean mUseIncSupp = isIncSupp(); 2850 long apID = Ap.getID(mRS); 2851 long xID = X.getID(mRS); 2852 long yID = Y.getID(mRS); 2853 if (mUseIncSupp) { 2854 apID = getDummyAlloc(Ap); 2855 xID = getDummyAlloc(X); 2856 yID = getDummyAlloc(Y); 2857 } 2858 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, apID, incX, incY, 0, 0, mUseIncSupp); 2859 } 2860 2861 2862 /** 2863 * Level 3 BLAS 2864 */ 2865 validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C)2866 static void validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) { 2867 int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1; 2868 if ((A != null && !A.getType().getElement().isCompatible(e)) || 2869 (B != null && !B.getType().getElement().isCompatible(e)) || 2870 (C != null && !C.getType().getElement().isCompatible(e))) { 2871 throw new RSRuntimeException("Called BLAS with wrong Element type"); 2872 } 2873 if (C == null) { 2874 //since matrix C is used to store the result, it cannot be null. 2875 throw new RSRuntimeException("Allocation C cannot be null"); 2876 } 2877 cM = C.getType().getY(); 2878 cN = C.getType().getX(); 2879 2880 if (Side == RIGHT) { 2881 if ((A == null && B != null) || (A != null && B == null)) { 2882 throw new RSRuntimeException("Provided Matrix A without Matrix B, or vice versa"); 2883 } 2884 if (B != null) { 2885 bM = A.getType().getY(); 2886 bN = A.getType().getX(); 2887 } 2888 if (A != null) { 2889 aM = B.getType().getY(); 2890 aN = B.getType().getX(); 2891 } 2892 } else { 2893 if (A != null) { 2894 if (TransA == TRANSPOSE || TransA == CONJ_TRANSPOSE) { 2895 aN = A.getType().getY(); 2896 aM = A.getType().getX(); 2897 } else { 2898 aM = A.getType().getY(); 2899 aN = A.getType().getX(); 2900 } 2901 } 2902 if (B != null) { 2903 if (TransB == TRANSPOSE || TransB == CONJ_TRANSPOSE) { 2904 bN = B.getType().getY(); 2905 bM = B.getType().getX(); 2906 } else { 2907 bM = B.getType().getY(); 2908 bN = B.getType().getX(); 2909 } 2910 } 2911 } 2912 if (A != null && B != null && C != null) { 2913 if (aN != bM || aM != cM || bN != cN) { 2914 throw new RSRuntimeException("Called BLAS with invalid dimensions"); 2915 } 2916 } else if (A != null && C != null) { 2917 // A and C only, for SYRK 2918 if (cM != cN) { 2919 throw new RSRuntimeException("Matrix C is not symmetric"); 2920 } 2921 if (aM != cM) { 2922 throw new RSRuntimeException("Called BLAS with invalid dimensions"); 2923 } 2924 } else if (A != null && B != null) { 2925 // A and B only 2926 if (aN != bM) { 2927 throw new RSRuntimeException("Called BLAS with invalid dimensions"); 2928 } 2929 } 2930 2931 } 2932 2933 /** 2934 * SGEMM performs one of the matrix-matrix operations 2935 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T 2936 * 2937 * Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html 2938 * 2939 * @param TransA The type of transpose applied to matrix A. 2940 * @param TransB The type of transpose applied to matrix B. 2941 * @param alpha The scalar alpha. 2942 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 2943 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 2944 * @param beta The scalar beta. 2945 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 2946 */ SGEMM(@ranspose int TransA, @Transpose int TransB, float alpha, Allocation A, Allocation B, float beta, Allocation C)2947 public void SGEMM(@Transpose int TransA, @Transpose int TransB, float alpha, Allocation A, 2948 Allocation B, float beta, Allocation C) { 2949 validateTranspose(TransA); 2950 validateTranspose(TransB); 2951 validateL3(Element.F32(mRS), TransA, TransB, 0, A, B, C); 2952 2953 int M = -1, N = -1, K = -1; 2954 if (TransA != NO_TRANSPOSE) { 2955 M = A.getType().getX(); 2956 K = A.getType().getY(); 2957 } else { 2958 M = A.getType().getY(); 2959 K = A.getType().getX(); 2960 } 2961 if (TransB != NO_TRANSPOSE) { 2962 N = B.getType().getY(); 2963 } else { 2964 N = B.getType().getX(); 2965 } 2966 2967 boolean mUseIncSupp = isIncSupp(); 2968 long aID = A.getID(mRS); 2969 long bID = B.getID(mRS); 2970 long cID = C.getID(mRS); 2971 if (mUseIncSupp) { 2972 aID = getDummyAlloc(A); 2973 bID = getDummyAlloc(B); 2974 cID = getDummyAlloc(C); 2975 } 2976 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, aID, bID, 2977 beta, cID, 0, 0, 0, 0, mUseIncSupp); 2978 } 2979 2980 /** 2981 * DGEMM performs one of the matrix-matrix operations 2982 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T 2983 * 2984 * Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html 2985 * 2986 * @param TransA The type of transpose applied to matrix A. 2987 * @param TransB The type of transpose applied to matrix B. 2988 * @param alpha The scalar alpha. 2989 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2990 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 2991 * @param beta The scalar beta. 2992 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 2993 */ DGEMM(@ranspose int TransA, @Transpose int TransB, double alpha, Allocation A, Allocation B, double beta, Allocation C)2994 public void DGEMM(@Transpose int TransA, @Transpose int TransB, double alpha, Allocation A, 2995 Allocation B, double beta, Allocation C) { 2996 validateTranspose(TransA); 2997 validateTranspose(TransB); 2998 validateL3(Element.F64(mRS), TransA, TransB, 0, A, B, C); 2999 int M = -1, N = -1, K = -1; 3000 if (TransA != NO_TRANSPOSE) { 3001 M = A.getType().getX(); 3002 K = A.getType().getY(); 3003 } else { 3004 M = A.getType().getY(); 3005 K = A.getType().getX(); 3006 } 3007 if (TransB != NO_TRANSPOSE) { 3008 N = B.getType().getY(); 3009 } else { 3010 N = B.getType().getX(); 3011 } 3012 3013 boolean mUseIncSupp = isIncSupp(); 3014 long aID = A.getID(mRS); 3015 long bID = B.getID(mRS); 3016 long cID = C.getID(mRS); 3017 if (mUseIncSupp) { 3018 aID = getDummyAlloc(A); 3019 bID = getDummyAlloc(B); 3020 cID = getDummyAlloc(C); 3021 } 3022 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, aID, bID, 3023 beta, cID, 0, 0, 0, 0, mUseIncSupp); 3024 } 3025 3026 /** 3027 * CGEMM performs one of the matrix-matrix operations 3028 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H 3029 * 3030 * Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html 3031 * 3032 * @param TransA The type of transpose applied to matrix A. 3033 * @param TransB The type of transpose applied to matrix B. 3034 * @param alpha The scalar alpha. 3035 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3036 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3037 * @param beta The scalar beta. 3038 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3039 */ CGEMM(@ranspose int TransA, @Transpose int TransB, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)3040 public void CGEMM(@Transpose int TransA, @Transpose int TransB, Float2 alpha, Allocation A, 3041 Allocation B, Float2 beta, Allocation C) { 3042 validateTranspose(TransA); 3043 validateTranspose(TransB); 3044 validateL3(Element.F32_2(mRS), TransA, TransB, 0, A, B, C); 3045 int M = -1, N = -1, K = -1; 3046 if (TransA != NO_TRANSPOSE) { 3047 M = A.getType().getX(); 3048 K = A.getType().getY(); 3049 } else { 3050 M = A.getType().getY(); 3051 K = A.getType().getX(); 3052 } 3053 if (TransB != NO_TRANSPOSE) { 3054 N = B.getType().getY(); 3055 } else { 3056 N = B.getType().getX(); 3057 } 3058 3059 boolean mUseIncSupp = isIncSupp(); 3060 long aID = A.getID(mRS); 3061 long bID = B.getID(mRS); 3062 long cID = C.getID(mRS); 3063 if (mUseIncSupp) { 3064 aID = getDummyAlloc(A); 3065 bID = getDummyAlloc(B); 3066 cID = getDummyAlloc(C); 3067 } 3068 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha.x, alpha.y, aID, bID, 3069 beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3070 } 3071 3072 /** 3073 * ZGEMM performs one of the matrix-matrix operations 3074 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H 3075 * 3076 * Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html 3077 * 3078 * @param TransA The type of transpose applied to matrix A. 3079 * @param TransB The type of transpose applied to matrix B. 3080 * @param alpha The scalar alpha. 3081 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2 3082 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2 3083 * @param beta The scalar beta. 3084 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2 3085 */ ZGEMM(@ranspose int TransA, @Transpose int TransB, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)3086 public void ZGEMM(@Transpose int TransA, @Transpose int TransB, Double2 alpha, Allocation A, 3087 Allocation B, Double2 beta, Allocation C) { 3088 validateTranspose(TransA); 3089 validateTranspose(TransB); 3090 validateL3(Element.F64_2(mRS), TransA, TransB, 0, A, B, C); 3091 int M = -1, N = -1, K = -1; 3092 if (TransA != NO_TRANSPOSE) { 3093 M = A.getType().getX(); 3094 K = A.getType().getY(); 3095 } else { 3096 M = A.getType().getY(); 3097 K = A.getType().getX(); 3098 } 3099 if (TransB != NO_TRANSPOSE) { 3100 N = B.getType().getY(); 3101 } else { 3102 N = B.getType().getX(); 3103 } 3104 3105 boolean mUseIncSupp = isIncSupp(); 3106 long aID = A.getID(mRS); 3107 long bID = B.getID(mRS); 3108 long cID = C.getID(mRS); 3109 if (mUseIncSupp) { 3110 aID = getDummyAlloc(A); 3111 bID = getDummyAlloc(B); 3112 cID = getDummyAlloc(C); 3113 } 3114 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha.x, alpha.y, aID, bID, 3115 beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3116 } 3117 3118 /** 3119 * SSYMM performs one of the matrix-matrix operations 3120 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3121 * 3122 * Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html 3123 * 3124 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3125 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3126 * @param alpha The scalar alpha. 3127 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 3128 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 3129 * @param beta The scalar beta. 3130 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 3131 */ SSYMM(@ide int Side, @Uplo int Uplo, float alpha, Allocation A, Allocation B, float beta, Allocation C)3132 public void SSYMM(@Side int Side, @Uplo int Uplo, float alpha, Allocation A, 3133 Allocation B, float beta, Allocation C) { 3134 validateSide(Side); 3135 validateUplo(Uplo); 3136 //For SYMM, Matrix A should be symmetric 3137 if (A.getType().getX() != A.getType().getY()) { 3138 throw new RSRuntimeException("Matrix A is not symmetric"); 3139 } 3140 validateL3(Element.F32(mRS), 0, 0, Side, A, B, C); 3141 3142 boolean mUseIncSupp = isIncSupp(); 3143 long aID = A.getID(mRS); 3144 long bID = B.getID(mRS); 3145 long cID = C.getID(mRS); 3146 if (mUseIncSupp) { 3147 aID = getDummyAlloc(A); 3148 bID = getDummyAlloc(B); 3149 cID = getDummyAlloc(C); 3150 } 3151 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, aID, bID, 3152 beta, cID, 0, 0, 0, 0, mUseIncSupp); 3153 } 3154 3155 /** 3156 * DSYMM performs one of the matrix-matrix operations 3157 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3158 * 3159 * Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html 3160 * 3161 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3162 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3163 * @param alpha The scalar alpha. 3164 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 3165 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 3166 * @param beta The scalar beta. 3167 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 3168 */ DSYMM(@ide int Side, @Uplo int Uplo, double alpha, Allocation A, Allocation B, double beta, Allocation C)3169 public void DSYMM(@Side int Side, @Uplo int Uplo, double alpha, Allocation A, 3170 Allocation B, double beta, Allocation C) { 3171 validateSide(Side); 3172 validateUplo(Uplo); 3173 if (A.getType().getX() != A.getType().getY()) { 3174 throw new RSRuntimeException("Matrix A is not symmetric"); 3175 } 3176 validateL3(Element.F64(mRS), 0, 0, Side, A, B, C); 3177 3178 boolean mUseIncSupp = isIncSupp(); 3179 long aID = A.getID(mRS); 3180 long bID = B.getID(mRS); 3181 long cID = C.getID(mRS); 3182 if (mUseIncSupp) { 3183 aID = getDummyAlloc(A); 3184 bID = getDummyAlloc(B); 3185 cID = getDummyAlloc(C); 3186 } 3187 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, aID, bID, 3188 beta, cID, 0, 0, 0, 0, mUseIncSupp); 3189 } 3190 3191 /** 3192 * CSYMM performs one of the matrix-matrix operations 3193 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3194 * 3195 * Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html 3196 * 3197 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3198 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3199 * @param alpha The scalar alpha. 3200 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3201 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3202 * @param beta The scalar beta. 3203 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3204 */ CSYMM(@ide int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)3205 public void CSYMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, 3206 Allocation B, Float2 beta, Allocation C) { 3207 validateSide(Side); 3208 validateUplo(Uplo); 3209 if (A.getType().getX() != A.getType().getY()) { 3210 throw new RSRuntimeException("Matrix A is not symmetric"); 3211 } 3212 validateL3(Element.F32_2(mRS), 0, 0, Side, A, B, C); 3213 3214 boolean mUseIncSupp = isIncSupp(); 3215 long aID = A.getID(mRS); 3216 long bID = B.getID(mRS); 3217 long cID = C.getID(mRS); 3218 if (mUseIncSupp) { 3219 aID = getDummyAlloc(A); 3220 bID = getDummyAlloc(B); 3221 cID = getDummyAlloc(C); 3222 } 3223 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, aID, bID, 3224 beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3225 } 3226 3227 /** 3228 * ZSYMM performs one of the matrix-matrix operations 3229 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3230 * 3231 * Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html 3232 * 3233 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3234 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3235 * @param alpha The scalar alpha. 3236 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3237 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3238 * @param beta The scalar beta. 3239 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3240 */ ZSYMM(@ide int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)3241 public void ZSYMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, 3242 Allocation B, Double2 beta, Allocation C) { 3243 validateSide(Side); 3244 validateUplo(Uplo); 3245 if (A.getType().getX() != A.getType().getY()) { 3246 throw new RSRuntimeException("Matrix A is not symmetric"); 3247 } 3248 validateL3(Element.F64_2(mRS), 0, 0, Side, A, B, C); 3249 3250 boolean mUseIncSupp = isIncSupp(); 3251 long aID = A.getID(mRS); 3252 long bID = B.getID(mRS); 3253 long cID = C.getID(mRS); 3254 if (mUseIncSupp) { 3255 aID = getDummyAlloc(A); 3256 bID = getDummyAlloc(B); 3257 cID = getDummyAlloc(C); 3258 } 3259 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, aID, bID, 3260 beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3261 } 3262 3263 /** 3264 * SSYRK performs one of the symmetric rank k operations 3265 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 3266 * 3267 * Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html 3268 * 3269 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3270 * @param Trans The type of transpose applied to the operation. 3271 * @param alpha The scalar alpha. 3272 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 3273 * @param beta The scalar beta. 3274 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 3275 */ SSYRK(@plo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C)3276 public void SSYRK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) { 3277 validateTranspose(Trans); 3278 validateUplo(Uplo); 3279 validateL3(Element.F32(mRS), Trans, 0, 0, A, null, C); 3280 int K = -1; 3281 if (Trans != NO_TRANSPOSE) { 3282 K = A.getType().getY(); 3283 } else { 3284 K = A.getType().getX(); 3285 } 3286 3287 boolean mUseIncSupp = isIncSupp(); 3288 long aID = A.getID(mRS); 3289 long cID = C.getID(mRS); 3290 if (mUseIncSupp) { 3291 aID = getDummyAlloc(A); 3292 cID = getDummyAlloc(C); 3293 } 3294 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, 0, beta, cID, 0, 0, 0, 0, mUseIncSupp); 3295 } 3296 3297 /** 3298 * DSYRK performs one of the symmetric rank k operations 3299 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 3300 * 3301 * Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html 3302 * 3303 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3304 * @param Trans The type of transpose applied to the operation. 3305 * @param alpha The scalar alpha. 3306 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 3307 * @param beta The scalar beta. 3308 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 3309 */ DSYRK(@plo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C)3310 public void DSYRK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) { 3311 validateTranspose(Trans); 3312 validateUplo(Uplo); 3313 validateL3(Element.F64(mRS), Trans, 0, 0, A, null, C); 3314 int K = -1; 3315 if (Trans != NO_TRANSPOSE) { 3316 K = A.getType().getY(); 3317 } else { 3318 K = A.getType().getX(); 3319 } 3320 3321 boolean mUseIncSupp = isIncSupp(); 3322 long aID = A.getID(mRS); 3323 long cID = C.getID(mRS); 3324 if (mUseIncSupp) { 3325 aID = getDummyAlloc(A); 3326 cID = getDummyAlloc(C); 3327 } 3328 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, 0, beta, cID, 0, 0, 0, 0, mUseIncSupp); 3329 } 3330 3331 /** 3332 * CSYRK performs one of the symmetric rank k operations 3333 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 3334 * 3335 * Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html 3336 * 3337 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3338 * @param Trans The type of transpose applied to the operation. 3339 * @param alpha The scalar alpha. 3340 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3341 * @param beta The scalar beta. 3342 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3343 */ CSYRK(@plo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C)3344 public void CSYRK(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C) { 3345 validateTranspose(Trans); 3346 validateUplo(Uplo); 3347 validateL3(Element.F32_2(mRS), Trans, 0, 0, A, null, C); 3348 int K = -1; 3349 if (Trans != NO_TRANSPOSE) { 3350 K = A.getType().getY(); 3351 } else { 3352 K = A.getType().getX(); 3353 } 3354 3355 boolean mUseIncSupp = isIncSupp(); 3356 long aID = A.getID(mRS); 3357 long cID = C.getID(mRS); 3358 if (mUseIncSupp) { 3359 aID = getDummyAlloc(A); 3360 cID = getDummyAlloc(C); 3361 } 3362 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, 0, beta.x, beta.y, 3363 C.getID(mRS), 0, 0, 0, 0, mUseIncSupp); 3364 } 3365 3366 /** 3367 * ZSYRK performs one of the symmetric rank k operations 3368 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 3369 * 3370 * Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html 3371 * 3372 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3373 * @param Trans The type of transpose applied to the operation. 3374 * @param alpha The scalar alpha. 3375 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3376 * @param beta The scalar beta. 3377 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3378 */ ZSYRK(@plo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C)3379 public void ZSYRK(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C) { 3380 validateTranspose(Trans); 3381 validateUplo(Uplo); 3382 validateL3(Element.F64_2(mRS), Trans, 0, 0, A, null, C); 3383 int K = -1; 3384 if (Trans != NO_TRANSPOSE) { 3385 K = A.getType().getY(); 3386 } else { 3387 K = A.getType().getX(); 3388 } 3389 3390 boolean mUseIncSupp = isIncSupp(); 3391 long aID = A.getID(mRS); 3392 long cID = C.getID(mRS); 3393 if (mUseIncSupp) { 3394 aID = getDummyAlloc(A); 3395 cID = getDummyAlloc(C); 3396 } 3397 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, 0, beta.x, beta.y, 3398 C.getID(mRS), 0, 0, 0, 0, mUseIncSupp); 3399 } 3400 validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C)3401 static void validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) { 3402 validateTranspose(Trans); 3403 if (!A.getType().getElement().isCompatible(e) || 3404 !B.getType().getElement().isCompatible(e) || 3405 !C.getType().getElement().isCompatible(e)) { 3406 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3407 } 3408 int Cdim = -1; 3409 // A is n x k if no transpose, k x n if transpose 3410 // C is n x n 3411 if (Trans == TRANSPOSE) { 3412 // check columns versus C 3413 Cdim = A.getType().getX(); 3414 } else { 3415 // check rows versus C 3416 Cdim = A.getType().getY(); 3417 } 3418 if (C.getType().getX() != Cdim || C.getType().getY() != Cdim) { 3419 throw new RSRuntimeException("Invalid symmetric matrix in SYR2K"); 3420 } 3421 // A dims == B dims 3422 if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) { 3423 throw new RSRuntimeException("Invalid A and B in SYR2K"); 3424 } 3425 } 3426 3427 /** 3428 * SSYR2K performs one of the symmetric rank 2k operations 3429 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 3430 * 3431 * Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html 3432 * 3433 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3434 * @param Trans The type of transpose applied to the operation. 3435 * @param alpha The scalar alpha. 3436 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 3437 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 3438 * @param beta The scalar beta. 3439 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 3440 */ SSYR2K(@plo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C)3441 public void SSYR2K(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C) { 3442 validateUplo(Uplo); 3443 validateSYR2K(Element.F32(mRS), Trans, A, B, C); 3444 int K = -1; 3445 if (Trans != NO_TRANSPOSE) { 3446 K = A.getType().getY(); 3447 } else { 3448 K = A.getType().getX(); 3449 } 3450 3451 boolean mUseIncSupp = isIncSupp(); 3452 long aID = A.getID(mRS); 3453 long bID = B.getID(mRS); 3454 long cID = C.getID(mRS); 3455 if (mUseIncSupp) { 3456 aID = getDummyAlloc(A); 3457 bID = getDummyAlloc(B); 3458 cID = getDummyAlloc(C); 3459 } 3460 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, bID, beta, cID, 0, 0, 0, 0, mUseIncSupp); 3461 } 3462 3463 /** 3464 * DSYR2K performs one of the symmetric rank 2k operations 3465 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 3466 * 3467 * Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html 3468 * 3469 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3470 * @param Trans The type of transpose applied to the operation. 3471 * @param alpha The scalar alpha. 3472 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 3473 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 3474 * @param beta The scalar beta. 3475 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 3476 */ DSYR2K(@plo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C)3477 public void DSYR2K(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C) { 3478 validateUplo(Uplo); 3479 validateSYR2K(Element.F64(mRS), Trans, A, B, C); 3480 int K = -1; 3481 if (Trans != NO_TRANSPOSE) { 3482 K = A.getType().getY(); 3483 } else { 3484 K = A.getType().getX(); 3485 } 3486 3487 boolean mUseIncSupp = isIncSupp(); 3488 long aID = A.getID(mRS); 3489 long bID = B.getID(mRS); 3490 long cID = C.getID(mRS); 3491 if (mUseIncSupp) { 3492 aID = getDummyAlloc(A); 3493 bID = getDummyAlloc(B); 3494 cID = getDummyAlloc(C); 3495 } 3496 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, bID, beta, cID, 0, 0, 0, 0, mUseIncSupp); 3497 } 3498 3499 /** 3500 * CSYR2K performs one of the symmetric rank 2k operations 3501 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 3502 * 3503 * Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html 3504 * 3505 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3506 * @param Trans The type of transpose applied to the operation. 3507 * @param alpha The scalar alpha. 3508 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3509 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3510 * @param beta The scalar beta. 3511 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3512 */ CSYR2K(@plo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)3513 public void CSYR2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) { 3514 validateUplo(Uplo); 3515 validateSYR2K(Element.F32_2(mRS), Trans, A, B, C); 3516 int K = -1; 3517 if (Trans != NO_TRANSPOSE) { 3518 K = A.getType().getY(); 3519 } else { 3520 K = A.getType().getX(); 3521 } 3522 3523 boolean mUseIncSupp = isIncSupp(); 3524 long aID = A.getID(mRS); 3525 long bID = B.getID(mRS); 3526 long cID = C.getID(mRS); 3527 if (mUseIncSupp) { 3528 aID = getDummyAlloc(A); 3529 bID = getDummyAlloc(B); 3530 cID = getDummyAlloc(C); 3531 } 3532 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3533 } 3534 3535 /** 3536 * ZSYR2K performs one of the symmetric rank 2k operations 3537 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 3538 * 3539 * Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html 3540 * 3541 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3542 * @param Trans The type of transpose applied to the operation. 3543 * @param alpha The scalar alpha. 3544 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3545 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3546 * @param beta The scalar beta. 3547 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3548 */ ZSYR2K(@plo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)3549 public void ZSYR2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) { 3550 validateUplo(Uplo); 3551 validateSYR2K(Element.F64_2(mRS), Trans, A, B, C); 3552 int K = -1; 3553 if (Trans != NO_TRANSPOSE) { 3554 K = A.getType().getY(); 3555 } else { 3556 K = A.getType().getX(); 3557 } 3558 3559 boolean mUseIncSupp = isIncSupp(); 3560 long aID = A.getID(mRS); 3561 long bID = B.getID(mRS); 3562 long cID = C.getID(mRS); 3563 if (mUseIncSupp) { 3564 aID = getDummyAlloc(A); 3565 bID = getDummyAlloc(B); 3566 cID = getDummyAlloc(C); 3567 } 3568 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3569 } 3570 validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B)3571 static void validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) { 3572 validateSide(Side); 3573 validateTranspose(TransA); 3574 int aM = -1, aN = -1, bM = -1, bN = -1; 3575 if (!A.getType().getElement().isCompatible(e) || 3576 !B.getType().getElement().isCompatible(e)) { 3577 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3578 } 3579 3580 aM = A.getType().getY(); 3581 aN = A.getType().getX(); 3582 if (aM != aN) { 3583 throw new RSRuntimeException("Called TRMM with a non-symmetric matrix A"); 3584 } 3585 3586 bM = B.getType().getY(); 3587 bN = B.getType().getX(); 3588 if (Side == LEFT) { 3589 if (aN != bM) { 3590 throw new RSRuntimeException("Called TRMM with invalid matrices"); 3591 } 3592 } else { 3593 if (bN != aM) { 3594 throw new RSRuntimeException("Called TRMM with invalid matrices"); 3595 } 3596 } 3597 } 3598 3599 /** 3600 * STRMM performs one of the matrix-matrix operations 3601 * B := alpha*op(A)*B or B := alpha*B*op(A) 3602 * op(A) is one of op(A) = A or op(A) = A**T 3603 * 3604 * Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html 3605 * 3606 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3607 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3608 * @param TransA The type of transpose applied to matrix A. 3609 * @param Diag Specifies whether or not A is unit triangular. 3610 * @param alpha The scalar alpha. 3611 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 3612 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 3613 */ STRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B)3614 public void STRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) { 3615 validateUplo(Uplo); 3616 validateDiag(Diag); 3617 validateTRMM(Element.F32(mRS), Side, TransA, A, B); 3618 3619 boolean mUseIncSupp = isIncSupp(); 3620 long aID = A.getID(mRS); 3621 long bID = B.getID(mRS); 3622 if (mUseIncSupp) { 3623 aID = getDummyAlloc(A); 3624 bID = getDummyAlloc(B); 3625 } 3626 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3627 alpha, aID, bID, 0.f, 0, 0, 0, 0, 0, mUseIncSupp); 3628 } 3629 3630 /** 3631 * DTRMM performs one of the matrix-matrix operations 3632 * B := alpha*op(A)*B or B := alpha*B*op(A) 3633 * op(A) is one of op(A) = A or op(A) = A**T 3634 * 3635 * Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html 3636 * 3637 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3638 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3639 * @param TransA The type of transpose applied to matrix A. 3640 * @param Diag Specifies whether or not A is unit triangular. 3641 * @param alpha The scalar alpha. 3642 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 3643 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 3644 */ DTRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B)3645 public void DTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) { 3646 validateUplo(Uplo); 3647 validateDiag(Diag); 3648 validateTRMM(Element.F64(mRS), Side, TransA, A, B); 3649 3650 boolean mUseIncSupp = isIncSupp(); 3651 long aID = A.getID(mRS); 3652 long bID = B.getID(mRS); 3653 if (mUseIncSupp) { 3654 aID = getDummyAlloc(A); 3655 bID = getDummyAlloc(B); 3656 } 3657 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3658 alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3659 } 3660 3661 /** 3662 * CTRMM performs one of the matrix-matrix operations 3663 * B := alpha*op(A)*B or B := alpha*B*op(A) 3664 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 3665 * 3666 * Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html 3667 * 3668 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3669 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3670 * @param TransA The type of transpose applied to matrix A. 3671 * @param Diag Specifies whether or not A is unit triangular. 3672 * @param alpha The scalar alpha. 3673 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3674 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3675 */ CTRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B)3676 public void CTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) { 3677 validateUplo(Uplo); 3678 validateDiag(Diag); 3679 validateTRMM(Element.F32_2(mRS), Side, TransA, A, B); 3680 3681 boolean mUseIncSupp = isIncSupp(); 3682 long aID = A.getID(mRS); 3683 long bID = B.getID(mRS); 3684 if (mUseIncSupp) { 3685 aID = getDummyAlloc(A); 3686 bID = getDummyAlloc(B); 3687 } 3688 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3689 alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3690 } 3691 3692 /** 3693 * ZTRMM performs one of the matrix-matrix operations 3694 * B := alpha*op(A)*B or B := alpha*B*op(A) 3695 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 3696 * 3697 * Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html 3698 * 3699 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3700 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3701 * @param TransA The type of transpose applied to matrix A. 3702 * @param Diag Specifies whether or not A is unit triangular. 3703 * @param alpha The scalar alpha. 3704 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3705 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3706 */ ZTRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B)3707 public void ZTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) { 3708 validateUplo(Uplo); 3709 validateDiag(Diag); 3710 validateTRMM(Element.F64_2(mRS), Side, TransA, A, B); 3711 3712 boolean mUseIncSupp = isIncSupp(); 3713 long aID = A.getID(mRS); 3714 long bID = B.getID(mRS); 3715 if (mUseIncSupp) { 3716 aID = getDummyAlloc(A); 3717 bID = getDummyAlloc(B); 3718 } 3719 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3720 alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3721 } 3722 validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B)3723 static void validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) { 3724 int adim = -1, bM = -1, bN = -1; 3725 validateSide(Side); 3726 validateTranspose(TransA); 3727 if (!A.getType().getElement().isCompatible(e) || 3728 !B.getType().getElement().isCompatible(e)) { 3729 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3730 } 3731 adim = A.getType().getX(); 3732 if (adim != A.getType().getY()) { 3733 // this may be unnecessary, the restriction could potentially be relaxed 3734 // A needs to contain at least that symmetric matrix but could theoretically be larger 3735 // for now we assume adapters are sufficient, will reevaluate in the future 3736 throw new RSRuntimeException("Called TRSM with a non-symmetric matrix A"); 3737 } 3738 bM = B.getType().getY(); 3739 bN = B.getType().getX(); 3740 if (Side == LEFT) { 3741 // A is M*M 3742 if (adim != bM) { 3743 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions"); 3744 } 3745 } else { 3746 // A is N*N 3747 if (adim != bN) { 3748 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions"); 3749 } 3750 } 3751 } 3752 3753 /** 3754 * STRSM solves one of the matrix equations 3755 * op(A)*X := alpha*B or X*op(A) := alpha*B 3756 * op(A) is one of op(A) = A or op(A) = A**T 3757 * 3758 * Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html 3759 * 3760 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3761 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3762 * @param TransA The type of transpose applied to matrix A. 3763 * @param Diag Specifies whether or not A is unit triangular. 3764 * @param alpha The scalar alpha. 3765 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 3766 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 3767 */ STRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B)3768 public void STRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) { 3769 validateUplo(Uplo); 3770 validateDiag(Diag); 3771 validateTRSM(Element.F32(mRS), Side, TransA, A, B); 3772 3773 boolean mUseIncSupp = isIncSupp(); 3774 long aID = A.getID(mRS); 3775 long bID = B.getID(mRS); 3776 if (mUseIncSupp) { 3777 aID = getDummyAlloc(A); 3778 bID = getDummyAlloc(B); 3779 } 3780 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3781 alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3782 } 3783 3784 /** 3785 * DTRSM solves one of the matrix equations 3786 * op(A)*X := alpha*B or X*op(A) := alpha*B 3787 * op(A) is one of op(A) = A or op(A) = A**T 3788 * 3789 * Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html 3790 * 3791 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3792 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3793 * @param TransA The type of transpose applied to matrix A. 3794 * @param Diag Specifies whether or not A is unit triangular. 3795 * @param alpha The scalar alpha. 3796 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 3797 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 3798 */ DTRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B)3799 public void DTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) { 3800 validateUplo(Uplo); 3801 validateDiag(Diag); 3802 validateTRSM(Element.F64(mRS), Side, TransA, A, B); 3803 3804 boolean mUseIncSupp = isIncSupp(); 3805 long aID = A.getID(mRS); 3806 long bID = B.getID(mRS); 3807 if (mUseIncSupp) { 3808 aID = getDummyAlloc(A); 3809 bID = getDummyAlloc(B); 3810 } 3811 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3812 alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3813 } 3814 3815 /** 3816 * CTRSM solves one of the matrix equations 3817 * op(A)*X := alpha*B or X*op(A) := alpha*B 3818 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 3819 * 3820 * Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html 3821 * 3822 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3823 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3824 * @param TransA The type of transpose applied to matrix A. 3825 * @param Diag Specifies whether or not A is unit triangular. 3826 * @param alpha The scalar alpha. 3827 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3828 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3829 */ CTRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B)3830 public void CTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) { 3831 validateUplo(Uplo); 3832 validateDiag(Diag); 3833 validateTRSM(Element.F32_2(mRS), Side, TransA, A, B); 3834 3835 boolean mUseIncSupp = isIncSupp(); 3836 long aID = A.getID(mRS); 3837 long bID = B.getID(mRS); 3838 if (mUseIncSupp) { 3839 aID = getDummyAlloc(A); 3840 bID = getDummyAlloc(B); 3841 } 3842 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3843 alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3844 } 3845 3846 /** 3847 * ZTRSM solves one of the matrix equations 3848 * op(A)*X := alpha*B or X*op(A) := alpha*B 3849 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 3850 * 3851 * Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html 3852 * 3853 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3854 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3855 * @param TransA The type of transpose applied to matrix A. 3856 * @param Diag Specifies whether or not A is unit triangular. 3857 * @param alpha The scalar alpha. 3858 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3859 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3860 */ ZTRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B)3861 public void ZTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) { 3862 validateUplo(Uplo); 3863 validateDiag(Diag); 3864 validateTRSM(Element.F64_2(mRS), Side, TransA, A, B); 3865 3866 boolean mUseIncSupp = isIncSupp(); 3867 long aID = A.getID(mRS); 3868 long bID = B.getID(mRS); 3869 if (mUseIncSupp) { 3870 aID = getDummyAlloc(A); 3871 bID = getDummyAlloc(B); 3872 } 3873 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3874 alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3875 } 3876 validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C)3877 static void validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C) { 3878 validateSide(Side); 3879 3880 if (!A.getType().getElement().isCompatible(e) || 3881 !B.getType().getElement().isCompatible(e) || 3882 !C.getType().getElement().isCompatible(e)) { 3883 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3884 } 3885 3886 // A must be square; can potentially be relaxed similar to TRSM 3887 int adim = A.getType().getX(); 3888 if (adim != A.getType().getY()) { 3889 throw new RSRuntimeException("Called HEMM with non-square A"); 3890 } 3891 if ((Side == LEFT && adim != B.getType().getY()) || 3892 (Side == RIGHT && adim != B.getType().getX())) { 3893 throw new RSRuntimeException("Called HEMM with invalid B"); 3894 } 3895 if (B.getType().getX() != C.getType().getX() || 3896 B.getType().getY() != C.getType().getY()) { 3897 throw new RSRuntimeException("Called HEMM with mismatched B and C"); 3898 } 3899 } 3900 3901 /** 3902 * CHEMM performs one of the matrix-matrix operations 3903 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3904 * 3905 * Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html 3906 * 3907 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3908 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3909 * @param alpha The scalar alpha. 3910 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3911 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3912 * @param beta The scalar beta. 3913 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3914 */ CHEMM(@ide int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)3915 public void CHEMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) { 3916 validateUplo(Uplo); 3917 validateHEMM(Element.F32_2(mRS), Side, A, B, C); 3918 3919 boolean mUseIncSupp = isIncSupp(); 3920 long aID = A.getID(mRS); 3921 long bID = B.getID(mRS); 3922 long cID = C.getID(mRS); 3923 if (mUseIncSupp) { 3924 aID = getDummyAlloc(A); 3925 bID = getDummyAlloc(B); 3926 cID = getDummyAlloc(C); 3927 } 3928 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, 3929 alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3930 } 3931 3932 /** 3933 * ZHEMM performs one of the matrix-matrix operations 3934 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3935 * 3936 * Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html 3937 * 3938 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3939 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3940 * @param alpha The scalar alpha. 3941 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3942 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3943 * @param beta The scalar beta. 3944 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3945 */ ZHEMM(@ide int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)3946 public void ZHEMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) { 3947 validateUplo(Uplo); 3948 validateHEMM(Element.F64_2(mRS), Side, A, B, C); 3949 3950 boolean mUseIncSupp = isIncSupp(); 3951 long aID = A.getID(mRS); 3952 long bID = B.getID(mRS); 3953 long cID = C.getID(mRS); 3954 if (mUseIncSupp) { 3955 aID = getDummyAlloc(A); 3956 bID = getDummyAlloc(B); 3957 cID = getDummyAlloc(C); 3958 } 3959 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, 3960 alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3961 } 3962 validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C)3963 static void validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C) { 3964 if (!A.getType().getElement().isCompatible(e) || 3965 !C.getType().getElement().isCompatible(e)) { 3966 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3967 } 3968 validateConjTranspose(Trans); 3969 int cdim = C.getType().getX(); 3970 if (cdim != C.getType().getY()) { 3971 throw new RSRuntimeException("Called HERK with non-square C"); 3972 } 3973 if (Trans == NO_TRANSPOSE) { 3974 if (cdim != A.getType().getY()) { 3975 throw new RSRuntimeException("Called HERK with invalid A"); 3976 } 3977 } else { 3978 if (cdim != A.getType().getX()) { 3979 throw new RSRuntimeException("Called HERK with invalid A"); 3980 } 3981 } 3982 } 3983 3984 /** 3985 * CHERK performs one of the hermitian rank k operations 3986 * C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C 3987 * 3988 * Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html 3989 * 3990 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3991 * @param Trans The type of transpose applied to the operation. 3992 * @param alpha The scalar alpha. 3993 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3994 * @param beta The scalar beta. 3995 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3996 */ CHERK(@plo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C)3997 public void CHERK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) { 3998 validateUplo(Uplo); 3999 validateHERK(Element.F32_2(mRS), Trans, A, C); 4000 int k = 0; 4001 if (Trans == CONJ_TRANSPOSE) { 4002 k = A.getType().getY(); 4003 } else { 4004 k = A.getType().getX(); 4005 } 4006 4007 boolean mUseIncSupp = isIncSupp(); 4008 long aID = A.getID(mRS); 4009 long cID = C.getID(mRS); 4010 if (mUseIncSupp) { 4011 aID = getDummyAlloc(A); 4012 cID = getDummyAlloc(C); 4013 } 4014 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, 4015 alpha, 0, aID, 0, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp); 4016 } 4017 4018 /** 4019 * ZHERK performs one of the hermitian rank k operations 4020 * C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C 4021 * 4022 * Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html 4023 * 4024 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 4025 * @param Trans The type of transpose applied to the operation. 4026 * @param alpha The scalar alpha. 4027 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 4028 * @param beta The scalar beta. 4029 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 4030 */ ZHERK(@plo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C)4031 public void ZHERK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) { 4032 validateUplo(Uplo); 4033 validateHERK(Element.F64_2(mRS), Trans, A, C); 4034 int k = 0; 4035 if (Trans == CONJ_TRANSPOSE) { 4036 k = A.getType().getY(); 4037 } else { 4038 k = A.getType().getX(); 4039 } 4040 4041 boolean mUseIncSupp = isIncSupp(); 4042 long aID = A.getID(mRS); 4043 long cID = C.getID(mRS); 4044 if (mUseIncSupp) { 4045 aID = getDummyAlloc(A); 4046 cID = getDummyAlloc(C); 4047 } 4048 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, 4049 alpha, 0, aID, 0, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp); 4050 } 4051 validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C)4052 static void validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) { 4053 if (!A.getType().getElement().isCompatible(e) || 4054 !B.getType().getElement().isCompatible(e) || 4055 !C.getType().getElement().isCompatible(e)) { 4056 throw new RSRuntimeException("Called BLAS with wrong Element type"); 4057 } 4058 validateConjTranspose(Trans); 4059 int cdim = C.getType().getX(); 4060 if (cdim != C.getType().getY()) { 4061 throw new RSRuntimeException("Called HER2K with non-square C"); 4062 } 4063 if (Trans == NO_TRANSPOSE) { 4064 if (A.getType().getY() != cdim) { 4065 throw new RSRuntimeException("Called HER2K with invalid matrices"); 4066 } 4067 } else { 4068 if (A.getType().getX() != cdim) { 4069 throw new RSRuntimeException("Called HER2K with invalid matrices"); 4070 } 4071 } 4072 if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) { 4073 throw new RSRuntimeException("Called HER2K with invalid A and B matrices"); 4074 } 4075 } 4076 4077 /** 4078 * CHER2K performs one of the hermitian rank 2k operations 4079 * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C 4080 * 4081 * Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html 4082 * 4083 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 4084 * @param Trans The type of transpose applied to the operation. 4085 * @param alpha The scalar alpha. 4086 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 4087 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 4088 * @param beta The scalar beta. 4089 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 4090 */ CHER2K(@plo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C)4091 public void CHER2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C) { 4092 validateUplo(Uplo); 4093 validateHER2K(Element.F32_2(mRS), Trans, A, B, C); 4094 int k = 0; 4095 if (Trans == NO_TRANSPOSE) { 4096 k = A.getType().getX(); 4097 } else { 4098 k = A.getType().getY(); 4099 } 4100 4101 boolean mUseIncSupp = isIncSupp(); 4102 long aID = A.getID(mRS); 4103 long bID = B.getID(mRS); 4104 long cID = C.getID(mRS); 4105 if (mUseIncSupp) { 4106 aID = getDummyAlloc(A); 4107 bID = getDummyAlloc(B); 4108 cID = getDummyAlloc(C); 4109 } 4110 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y, 4111 A.getID(mRS), bID, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp); 4112 } 4113 4114 /** 4115 * ZHER2K performs one of the hermitian rank 2k operations 4116 * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C 4117 * 4118 * Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html 4119 * 4120 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 4121 * @param Trans The type of transpose applied to the operation. 4122 * @param alpha The scalar alpha. 4123 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 4124 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 4125 * @param beta The scalar beta. 4126 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 4127 */ ZHER2K(@plo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C)4128 public void ZHER2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C) { 4129 validateUplo(Uplo); 4130 validateHER2K(Element.F64_2(mRS), Trans, A, B, C); 4131 int k = 0; 4132 if (Trans == NO_TRANSPOSE) { 4133 k = A.getType().getX(); 4134 } else { 4135 k = A.getType().getY(); 4136 } 4137 4138 boolean mUseIncSupp = isIncSupp(); 4139 long aID = A.getID(mRS); 4140 long bID = B.getID(mRS); 4141 long cID = C.getID(mRS); 4142 if (mUseIncSupp) { 4143 aID = getDummyAlloc(A); 4144 bID = getDummyAlloc(B); 4145 cID = getDummyAlloc(C); 4146 } 4147 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y, 4148 A.getID(mRS), bID, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp); 4149 } 4150 4151 4152 /** 4153 * 8-bit GEMM-like operation for neural networks: C = A * Transpose(B) 4154 * Calculations are done in 1.10.21 fixed-point format for the final output, 4155 * just before there's a shift down to drop the fractional parts. The output 4156 * values are gated to 0 to 255 to fit in a byte, but the 10-bit format 4157 * gives some headroom to avoid wrapping around on small overflows. 4158 * 4159 * @param A The input allocation contains matrix A, supported elements type {@link Element#U8}. 4160 * @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset. Value should be from 0 to 255. 4161 * @param B The input allocation contains matrix B, supported elements type {@link Element#U8}. 4162 * @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset. Value should be from 0 to 255. 4163 * @param C The input allocation contains matrix C, supported elements type {@link Element#U8}. 4164 * @param c_offset The offset for all values in matrix C. 4165 * @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult. 4166 **/ BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult)4167 public void BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult) { 4168 validateL3(Element.U8(mRS), NO_TRANSPOSE, TRANSPOSE, 0, A, B, C); 4169 4170 if (a_offset < 0 || a_offset > 255) { 4171 throw new RSRuntimeException("Invalid a_offset passed to BNNM"); 4172 } 4173 if (b_offset < 0 || b_offset > 255) { 4174 throw new RSRuntimeException("Invalid b_offset passed to BNNM"); 4175 } 4176 int M = -1, N = -1, K = -1; 4177 M = A.getType().getY(); 4178 N = B.getType().getY(); 4179 K = A.getType().getX(); 4180 4181 boolean mUseIncSupp = isIncSupp(); 4182 long aID = A.getID(mRS); 4183 long bID = B.getID(mRS); 4184 long cID = C.getID(mRS); 4185 if (mUseIncSupp) { 4186 aID = getDummyAlloc(A); 4187 bID = getDummyAlloc(B); 4188 cID = getDummyAlloc(C); 4189 } 4190 mRS.nScriptIntrinsicBLAS_BNNM(getID(mRS), M, N, K, aID, a_offset, bID, b_offset, cID, c_offset, c_mult, mUseIncSupp); 4191 4192 } 4193 4194 } 4195