1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.renderscript; 18 19 import android.annotation.IntDef; 20 import java.lang.annotation.Retention; 21 import java.lang.annotation.RetentionPolicy; 22 23 /** 24 * 25 * ScriptIntrinsicBLAS class provides high performance RenderScript APIs to BLAS. 26 * 27 * The BLAS (Basic Linear Algebra Subprograms) are routines that provide standard 28 * building blocks for performing basic vector and matrix operations. 29 * 30 * For detailed description of BLAS, please refer to http://www.netlib.org/blas/ 31 * 32 **/ 33 public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { 34 private Allocation mLUT; 35 ScriptIntrinsicBLAS(long id, RenderScript rs)36 private ScriptIntrinsicBLAS(long id, RenderScript rs) { 37 super(id, rs); 38 } 39 40 private static final int RsBlas_sdsdot = 1; 41 private static final int RsBlas_dsdot = 2; 42 private static final int RsBlas_sdot = 3; 43 private static final int RsBlas_ddot = 4; 44 private static final int RsBlas_cdotu_sub = 5; 45 private static final int RsBlas_cdotc_sub = 6; 46 private static final int RsBlas_zdotu_sub = 7; 47 private static final int RsBlas_zdotc_sub = 8; 48 private static final int RsBlas_snrm2 = 9; 49 private static final int RsBlas_sasum = 10; 50 private static final int RsBlas_dnrm2 = 11; 51 private static final int RsBlas_dasum = 12; 52 private static final int RsBlas_scnrm2 = 13; 53 private static final int RsBlas_scasum = 14; 54 private static final int RsBlas_dznrm2 = 15; 55 private static final int RsBlas_dzasum = 16; 56 private static final int RsBlas_isamax = 17; 57 private static final int RsBlas_idamax = 18; 58 private static final int RsBlas_icamax = 19; 59 private static final int RsBlas_izamax = 20; 60 private static final int RsBlas_sswap = 21; 61 private static final int RsBlas_scopy = 22; 62 private static final int RsBlas_saxpy = 23; 63 private static final int RsBlas_dswap = 24; 64 private static final int RsBlas_dcopy = 25; 65 private static final int RsBlas_daxpy = 26; 66 private static final int RsBlas_cswap = 27; 67 private static final int RsBlas_ccopy = 28; 68 private static final int RsBlas_caxpy = 29; 69 private static final int RsBlas_zswap = 30; 70 private static final int RsBlas_zcopy = 31; 71 private static final int RsBlas_zaxpy = 32; 72 private static final int RsBlas_srotg = 33; 73 private static final int RsBlas_srotmg = 34; 74 private static final int RsBlas_srot = 35; 75 private static final int RsBlas_srotm = 36; 76 private static final int RsBlas_drotg = 37; 77 private static final int RsBlas_drotmg = 38; 78 private static final int RsBlas_drot = 39; 79 private static final int RsBlas_drotm = 40; 80 private static final int RsBlas_sscal = 41; 81 private static final int RsBlas_dscal = 42; 82 private static final int RsBlas_cscal = 43; 83 private static final int RsBlas_zscal = 44; 84 private static final int RsBlas_csscal = 45; 85 private static final int RsBlas_zdscal = 46; 86 private static final int RsBlas_sgemv = 47; 87 private static final int RsBlas_sgbmv = 48; 88 private static final int RsBlas_strmv = 49; 89 private static final int RsBlas_stbmv = 50; 90 private static final int RsBlas_stpmv = 51; 91 private static final int RsBlas_strsv = 52; 92 private static final int RsBlas_stbsv = 53; 93 private static final int RsBlas_stpsv = 54; 94 private static final int RsBlas_dgemv = 55; 95 private static final int RsBlas_dgbmv = 56; 96 private static final int RsBlas_dtrmv = 57; 97 private static final int RsBlas_dtbmv = 58; 98 private static final int RsBlas_dtpmv = 59; 99 private static final int RsBlas_dtrsv = 60; 100 private static final int RsBlas_dtbsv = 61; 101 private static final int RsBlas_dtpsv = 62; 102 private static final int RsBlas_cgemv = 63; 103 private static final int RsBlas_cgbmv = 64; 104 private static final int RsBlas_ctrmv = 65; 105 private static final int RsBlas_ctbmv = 66; 106 private static final int RsBlas_ctpmv = 67; 107 private static final int RsBlas_ctrsv = 68; 108 private static final int RsBlas_ctbsv = 69; 109 private static final int RsBlas_ctpsv = 70; 110 private static final int RsBlas_zgemv = 71; 111 private static final int RsBlas_zgbmv = 72; 112 private static final int RsBlas_ztrmv = 73; 113 private static final int RsBlas_ztbmv = 74; 114 private static final int RsBlas_ztpmv = 75; 115 private static final int RsBlas_ztrsv = 76; 116 private static final int RsBlas_ztbsv = 77; 117 private static final int RsBlas_ztpsv = 78; 118 private static final int RsBlas_ssymv = 79; 119 private static final int RsBlas_ssbmv = 80; 120 private static final int RsBlas_sspmv = 81; 121 private static final int RsBlas_sger = 82; 122 private static final int RsBlas_ssyr = 83; 123 private static final int RsBlas_sspr = 84; 124 private static final int RsBlas_ssyr2 = 85; 125 private static final int RsBlas_sspr2 = 86; 126 private static final int RsBlas_dsymv = 87; 127 private static final int RsBlas_dsbmv = 88; 128 private static final int RsBlas_dspmv = 89; 129 private static final int RsBlas_dger = 90; 130 private static final int RsBlas_dsyr = 91; 131 private static final int RsBlas_dspr = 92; 132 private static final int RsBlas_dsyr2 = 93; 133 private static final int RsBlas_dspr2 = 94; 134 private static final int RsBlas_chemv = 95; 135 private static final int RsBlas_chbmv = 96; 136 private static final int RsBlas_chpmv = 97; 137 private static final int RsBlas_cgeru = 98; 138 private static final int RsBlas_cgerc = 99; 139 private static final int RsBlas_cher = 100; 140 private static final int RsBlas_chpr = 101; 141 private static final int RsBlas_cher2 = 102; 142 private static final int RsBlas_chpr2 = 103; 143 private static final int RsBlas_zhemv = 104; 144 private static final int RsBlas_zhbmv = 105; 145 private static final int RsBlas_zhpmv = 106; 146 private static final int RsBlas_zgeru = 107; 147 private static final int RsBlas_zgerc = 108; 148 private static final int RsBlas_zher = 109; 149 private static final int RsBlas_zhpr = 110; 150 private static final int RsBlas_zher2 = 111; 151 private static final int RsBlas_zhpr2 = 112; 152 private static final int RsBlas_sgemm = 113; 153 private static final int RsBlas_ssymm = 114; 154 private static final int RsBlas_ssyrk = 115; 155 private static final int RsBlas_ssyr2k = 116; 156 private static final int RsBlas_strmm = 117; 157 private static final int RsBlas_strsm = 118; 158 private static final int RsBlas_dgemm = 119; 159 private static final int RsBlas_dsymm = 120; 160 private static final int RsBlas_dsyrk = 121; 161 private static final int RsBlas_dsyr2k = 122; 162 private static final int RsBlas_dtrmm = 123; 163 private static final int RsBlas_dtrsm = 124; 164 private static final int RsBlas_cgemm = 125; 165 private static final int RsBlas_csymm = 126; 166 private static final int RsBlas_csyrk = 127; 167 private static final int RsBlas_csyr2k = 128; 168 private static final int RsBlas_ctrmm = 129; 169 private static final int RsBlas_ctrsm = 130; 170 private static final int RsBlas_zgemm = 131; 171 private static final int RsBlas_zsymm = 132; 172 private static final int RsBlas_zsyrk = 133; 173 private static final int RsBlas_zsyr2k = 134; 174 private static final int RsBlas_ztrmm = 135; 175 private static final int RsBlas_ztrsm = 136; 176 private static final int RsBlas_chemm = 137; 177 private static final int RsBlas_cherk = 138; 178 private static final int RsBlas_cher2k = 139; 179 private static final int RsBlas_zhemm = 140; 180 private static final int RsBlas_zherk = 141; 181 private static final int RsBlas_zher2k = 142; 182 183 // BLAS extensions start here 184 private static final int RsBlas_bnnm = 1000; 185 186 /** 187 * Create an intrinsic to access BLAS subroutines. 188 * 189 * @param rs The RenderScript context 190 * @return ScriptIntrinsicBLAS 191 */ create(RenderScript rs)192 public static ScriptIntrinsicBLAS create(RenderScript rs) { 193 long id = rs.nScriptIntrinsicCreate(13, Element.U32(rs).getID(rs)); 194 return new ScriptIntrinsicBLAS(id, rs); 195 } 196 197 /** 198 * @hide 199 */ 200 @IntDef({NO_TRANSPOSE, TRANSPOSE, CONJ_TRANSPOSE}) 201 @Retention(RetentionPolicy.SOURCE) 202 public @interface Transpose {} 203 204 /** 205 * @hide 206 */ 207 @IntDef({UPPER, LOWER}) 208 @Retention(RetentionPolicy.SOURCE) 209 public @interface Uplo {} 210 211 /** 212 * @hide 213 */ 214 @IntDef({NON_UNIT, UNIT}) 215 @Retention(RetentionPolicy.SOURCE) 216 public @interface Diag {} 217 218 /** 219 * @hide 220 */ 221 @IntDef({LEFT, RIGHT}) 222 @Retention(RetentionPolicy.SOURCE) 223 public @interface Side {} 224 225 public static final int NO_TRANSPOSE = 111; 226 public static final int TRANSPOSE = 112; 227 public static final int CONJ_TRANSPOSE = 113; 228 229 public static final int UPPER = 121; 230 public static final int LOWER = 122; 231 232 public static final int NON_UNIT = 131; 233 public static final int UNIT = 132; 234 235 public static final int LEFT = 141; 236 public static final int RIGHT = 142; 237 validateSide(@ide int Side)238 static void validateSide(@Side int Side) { 239 if (Side != LEFT && Side != RIGHT) { 240 throw new RSRuntimeException("Invalid side passed to BLAS"); 241 } 242 } 243 validateTranspose(@ranspose int Trans)244 static void validateTranspose(@Transpose int Trans) { 245 if (Trans != NO_TRANSPOSE && Trans != TRANSPOSE && 246 Trans != CONJ_TRANSPOSE) { 247 throw new RSRuntimeException("Invalid transpose passed to BLAS"); 248 } 249 } 250 validateConjTranspose(@ranspose int Trans)251 static void validateConjTranspose(@Transpose int Trans) { 252 if (Trans != NO_TRANSPOSE && 253 Trans != CONJ_TRANSPOSE) { 254 throw new RSRuntimeException("Invalid transpose passed to BLAS"); 255 } 256 } 257 validateDiag(@iag int Diag)258 static void validateDiag(@Diag int Diag) { 259 if (Diag != NON_UNIT && Diag != UNIT) { 260 throw new RSRuntimeException("Invalid diag passed to BLAS"); 261 } 262 } 263 validateUplo(@plo int Uplo)264 static void validateUplo(@Uplo int Uplo) { 265 if (Uplo != UPPER && Uplo != LOWER) { 266 throw new RSRuntimeException("Invalid uplo passed to BLAS"); 267 } 268 } 269 270 271 /** 272 * Level 2 BLAS 273 */ 274 validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY)275 static void validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY) { 276 validateTranspose(TransA); 277 int M = A.getType().getY(); 278 int N = A.getType().getX(); 279 if (!A.getType().getElement().isCompatible(e) || 280 !X.getType().getElement().isCompatible(e) || 281 !Y.getType().getElement().isCompatible(e)) { 282 throw new RSRuntimeException("Called BLAS with wrong Element type"); 283 } 284 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 285 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 286 } 287 288 if (incX <= 0 || incY <= 0) { 289 throw new RSRuntimeException("Vector increments must be greater than 0"); 290 } 291 int expectedXDim = -1, expectedYDim = -1; 292 if (TransA == NO_TRANSPOSE) { 293 expectedXDim = 1 + (N - 1) * incX; 294 expectedYDim = 1 + (M - 1) * incY; 295 } else { 296 expectedXDim = 1 + (M - 1) * incX; 297 expectedYDim = 1 + (N - 1) * incY; 298 } 299 if (X.getType().getX() != expectedXDim || 300 Y.getType().getX() != expectedYDim) { 301 throw new RSRuntimeException("Incorrect vector dimensions for GEMV"); 302 } 303 } 304 305 /** 306 * SGEMV performs one of the matrix-vector operations 307 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 308 * 309 * Details: http://www.netlib.org/lapack/explore-html/db/d58/sgemv_8f.html 310 * 311 * @param TransA The type of transpose applied to matrix A. 312 * @param alpha The scalar alpha. 313 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 314 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 315 * @param incX The increment for the elements of vector x, must be larger than zero. 316 * @param beta The scalar beta. 317 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 318 * @param incY The increment for the elements of vector y, must be larger than zero. 319 */ SGEMV(@ranspose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)320 public void SGEMV(@Transpose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 321 validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY); 322 int M = A.getType().getY(); 323 int N = A.getType().getX(); 324 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); 325 } 326 327 /** 328 * DGEMV performs one of the matrix-vector operations 329 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 330 * 331 * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html 332 * 333 * @param TransA The type of transpose applied to matrix A. 334 * @param alpha The scalar alpha. 335 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 336 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 337 * @param incX The increment for the elements of vector x, must be larger than zero. 338 * @param beta The scalar beta. 339 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 340 * @param incY The increment for the elements of vector y, must be larger than zero. 341 */ DGEMV(@ranspose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)342 public void DGEMV(@Transpose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 343 validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY); 344 int M = A.getType().getY(); 345 int N = A.getType().getX(); 346 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); 347 } 348 349 /** 350 * CGEMV performs one of the matrix-vector operations 351 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 352 * 353 * Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html 354 * 355 * @param TransA The type of transpose applied to matrix A. 356 * @param alpha The scalar alpha. 357 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 358 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 359 * @param incX The increment for the elements of vector x, must be larger than zero. 360 * @param beta The scalar beta. 361 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 362 * @param incY The increment for the elements of vector y, must be larger than zero. 363 */ CGEMV(@ranspose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)364 public void CGEMV(@Transpose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 365 validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY); 366 int M = A.getType().getY(); 367 int N = A.getType().getX(); 368 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); 369 } 370 371 /** 372 * ZGEMV performs one of the matrix-vector operations 373 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 374 * 375 * Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html 376 * 377 * @param TransA The type of transpose applied to matrix A. 378 * @param alpha The scalar alpha. 379 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 380 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 381 * @param incX The increment for the elements of vector x, must be larger than zero. 382 * @param beta The scalar beta. 383 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 384 * @param incY The increment for the elements of vector y, must be larger than zero. 385 */ ZGEMV(@ranspose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)386 public void ZGEMV(@Transpose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 387 validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY); 388 int M = A.getType().getY(); 389 int N = A.getType().getX(); 390 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); 391 } 392 393 /** 394 * SGBMV performs one of the matrix-vector operations 395 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 396 * 397 * Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html 398 * 399 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 400 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 401 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 402 * for i in range(0, m): 403 * for j in range(max(0, i-kl), min(i+ku+1, n)): 404 * b[i, j-i+kl] = a[i, j] 405 * 406 * @param TransA The type of transpose applied to matrix A. 407 * @param KL The number of sub-diagonals of the matrix A. 408 * @param KU The number of super-diagonals of the matrix A. 409 * @param alpha The scalar alpha. 410 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32}. 411 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 412 * @param incX The increment for the elements of vector x, must be larger than zero. 413 * @param beta The scalar beta. 414 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 415 * @param incY The increment for the elements of vector y, must be larger than zero. 416 */ SGBMV(@ranspose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)417 public void SGBMV(@Transpose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 418 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 419 validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY); 420 if (KL < 0 || KU < 0) { 421 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 422 } 423 int M = A.getType().getY(); 424 int N = A.getType().getX(); 425 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, KL, KU); 426 } 427 428 /** 429 * DGBMV performs one of the matrix-vector operations 430 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 431 * 432 * Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html 433 * 434 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 435 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 436 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 437 * for i in range(0, m): 438 * for j in range(max(0, i-kl), min(i+ku+1, n)): 439 * b[i, j-i+kl] = a[i, j] 440 * 441 * @param TransA The type of transpose applied to matrix A. 442 * @param KL The number of sub-diagonals of the matrix A. 443 * @param KU The number of super-diagonals of the matrix A. 444 * @param alpha The scalar alpha. 445 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64}. 446 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 447 * @param incX The increment for the elements of vector x, must be larger than zero. 448 * @param beta The scalar beta. 449 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 450 * @param incY The increment for the elements of vector y, must be larger than zero. 451 */ DGBMV(@ranspose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)452 public void DGBMV(@Transpose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 453 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 454 validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY); 455 if (KL < 0 || KU < 0) { 456 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 457 } 458 int M = A.getType().getY(); 459 int N = A.getType().getX(); 460 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, KL, KU); 461 } 462 463 /** 464 * CGBMV performs one of the matrix-vector operations 465 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 466 * 467 * Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html 468 * 469 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 470 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 471 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 472 * for i in range(0, m): 473 * for j in range(max(0, i-kl), min(i+ku+1, n)): 474 * b[i, j-i+kl] = a[i, j] 475 * 476 * @param TransA The type of transpose applied to matrix A. 477 * @param KL The number of sub-diagonals of the matrix A. 478 * @param KU The number of super-diagonals of the matrix A. 479 * @param alpha The scalar alpha. 480 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32_2}. 481 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 482 * @param incX The increment for the elements of vector x, must be larger than zero. 483 * @param beta The scalar beta. 484 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 485 * @param incY The increment for the elements of vector y, must be larger than zero. 486 */ CGBMV(@ranspose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)487 public void CGBMV(@Transpose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 488 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 489 validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY); 490 if (KL < 0 || KU < 0) { 491 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 492 } 493 int M = A.getType().getY(); 494 int N = A.getType().getX(); 495 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, KL, KU); 496 } 497 498 /** 499 * ZGBMV performs one of the matrix-vector operations 500 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 501 * 502 * Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html 503 * 504 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 505 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 506 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 507 * for i in range(0, m): 508 * for j in range(max(0, i-kl), min(i+ku+1, n)): 509 * b[i, j-i+kl] = a[i, j] 510 * 511 * @param TransA The type of transpose applied to matrix A. 512 * @param KL The number of sub-diagonals of the matrix A. 513 * @param KU The number of super-diagonals of the matrix A. 514 * @param alpha The scalar alpha. 515 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64_2}. 516 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 517 * @param incX The increment for the elements of vector x, must be larger than zero. 518 * @param beta The scalar beta. 519 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 520 * @param incY The increment for the elements of vector y, must be larger than zero. 521 */ ZGBMV(@ranspose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)522 public void ZGBMV(@Transpose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 523 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 524 validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY); 525 if (KL < 0 || KU < 0) { 526 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 527 } 528 int M = A.getType().getY(); 529 int N = A.getType().getX(); 530 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, KL, KU); 531 } 532 validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)533 static void validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 534 validateTranspose(TransA); 535 validateUplo(Uplo); 536 validateDiag(Diag); 537 int N = A.getType().getY(); 538 if (A.getType().getX() != N) { 539 throw new RSRuntimeException("A must be a square matrix for TRMV"); 540 } 541 if (!A.getType().getElement().isCompatible(e) || 542 !X.getType().getElement().isCompatible(e)) { 543 throw new RSRuntimeException("Called BLAS with wrong Element type"); 544 } 545 if (X.getType().getY() > 1) { 546 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 547 } 548 549 if (incX <= 0) { 550 throw new RSRuntimeException("Vector increments must be greater than 0"); 551 } 552 int expectedXDim = 1 + (N - 1) * incX; 553 if (X.getType().getX() != expectedXDim) { 554 throw new RSRuntimeException("Incorrect vector dimensions for TRMV"); 555 } 556 } 557 validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)558 static int validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 559 validateTranspose(TransA); 560 validateUplo(Uplo); 561 validateDiag(Diag); 562 if (!Ap.getType().getElement().isCompatible(e) || 563 !X.getType().getElement().isCompatible(e)) { 564 throw new RSRuntimeException("Called BLAS with wrong Element type"); 565 } 566 if (X.getType().getY() > 1) { 567 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 568 } 569 570 if (Ap.getType().getY() > 1) { 571 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 572 } 573 574 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 575 //is it really doing anything? 576 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 577 throw new RSRuntimeException("Invalid dimension for Ap"); 578 } 579 if (incX <= 0) { 580 throw new RSRuntimeException("Vector increments must be greater than 0"); 581 } 582 int expectedXDim = 1 + (N - 1) * incX; 583 if (X.getType().getX() != expectedXDim) { 584 throw new RSRuntimeException("Incorrect vector dimensions for TPMV"); 585 } 586 587 return N; 588 } 589 590 /** 591 * STRMV performs one of the matrix-vector operations 592 * x := A*x or x := A**T*x 593 * 594 * Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html 595 * 596 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 597 * @param TransA The type of transpose applied to matrix A. 598 * @param Diag Specifies whether or not A is unit triangular. 599 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 600 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 601 * @param incX The increment for the elements of vector x, must be larger than zero. 602 */ STRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)603 public void STRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 604 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 605 int N = A.getType().getY(); 606 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 607 } 608 609 /** 610 * DTRMV performs one of the matrix-vector operations 611 * x := A*x or x := A**T*x 612 * 613 * Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html 614 * 615 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 616 * @param TransA The type of transpose applied to matrix A. 617 * @param Diag Specifies whether or not A is unit triangular. 618 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 619 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 620 * @param incX The increment for the elements of vector x, must be larger than zero. 621 */ DTRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)622 public void DTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 623 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 624 int N = A.getType().getY(); 625 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 626 } 627 628 /** 629 * CTRMV performs one of the matrix-vector operations 630 * x := A*x or x := A**T*x or x := A**H*x 631 * 632 * Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html 633 * 634 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 635 * @param TransA The type of transpose applied to matrix A. 636 * @param Diag Specifies whether or not A is unit triangular. 637 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 638 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 639 * @param incX The increment for the elements of vector x, must be larger than zero. 640 */ CTRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)641 public void CTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 642 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 643 int N = A.getType().getY(); 644 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 645 } 646 647 /** 648 * ZTRMV performs one of the matrix-vector operations 649 * x := A*x or x := A**T*x or x := A**H*x 650 * 651 * Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html 652 * 653 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 654 * @param TransA The type of transpose applied to matrix A. 655 * @param Diag Specifies whether or not A is unit triangular. 656 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 657 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 658 * @param incX The increment for the elements of vector x, must be larger than zero. 659 */ ZTRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)660 public void ZTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 661 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 662 int N = A.getType().getY(); 663 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 664 } 665 666 /** 667 * STBMV performs one of the matrix-vector operations 668 * x := A*x or x := A**T*x 669 * 670 * Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html 671 * 672 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 673 * but only the region N*(K+1) will be referenced. The following subroutine can is an 674 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 675 * for i in range(0, n): 676 * for j in range(i, min(i+k+1, n)): 677 * b[i, j-i] = a[i, j] 678 * 679 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 680 * @param TransA The type of transpose applied to matrix A. 681 * @param Diag Specifies whether or not A is unit triangular. 682 * @param K The number of off-diagonals of the matrix A 683 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 684 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 685 * @param incX The increment for the elements of vector x, must be larger than zero. 686 */ STBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)687 public void STBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 688 // TBMV has the same requirements as TRMV + K >= 0 689 if (K < 0) { 690 throw new RSRuntimeException("K must be greater than or equal to 0"); 691 } 692 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 693 int N = A.getType().getY(); 694 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 695 } 696 697 /** 698 * DTBMV performs one of the matrix-vector operations 699 * x := A*x or x := A**T*x 700 * 701 * Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html 702 * 703 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 704 * but only the region N*(K+1) will be referenced. The following subroutine can is an 705 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 706 * for i in range(0, n): 707 * for j in range(i, min(i+k+1, n)): 708 * b[i, j-i] = a[i, j] 709 * 710 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 711 * @param TransA The type of transpose applied to matrix A. 712 * @param Diag Specifies whether or not A is unit triangular. 713 * @param K The number of off-diagonals of the matrix A 714 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 715 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 716 * @param incX The increment for the elements of vector x, must be larger than zero. 717 */ DTBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)718 public void DTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 719 // TBMV has the same requirements as TRMV + K >= 0 720 if (K < 0) { 721 throw new RSRuntimeException("K must be greater than or equal to 0"); 722 } 723 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 724 int N = A.getType().getY(); 725 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 726 } 727 728 /** 729 * CTBMV performs one of the matrix-vector operations 730 * x := A*x or x := A**T*x or x := A**H*x 731 * 732 * Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html 733 * 734 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 735 * but only the region N*(K+1) will be referenced. The following subroutine can is an 736 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 737 * for i in range(0, n): 738 * for j in range(i, min(i+k+1, n)): 739 * b[i, j-i] = a[i, j] 740 * 741 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 742 * @param TransA The type of transpose applied to matrix A. 743 * @param Diag Specifies whether or not A is unit triangular. 744 * @param K The number of off-diagonals of the matrix A 745 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 746 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 747 * @param incX The increment for the elements of vector x, must be larger than zero. 748 */ CTBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)749 public void CTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 750 // TBMV has the same requirements as TRMV + K >= 0 751 if (K < 0) { 752 throw new RSRuntimeException("K must be greater than or equal to 0"); 753 } 754 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 755 int N = A.getType().getY(); 756 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 757 } 758 759 /** 760 * ZTBMV performs one of the matrix-vector operations 761 * x := A*x or x := A**T*x or x := A**H*x 762 * 763 * Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html 764 * 765 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 766 * but only the region N*(K+1) will be referenced. The following subroutine can is an 767 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 768 * for i in range(0, n): 769 * for j in range(i, min(i+k+1, n)): 770 * b[i, j-i] = a[i, j] 771 * 772 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 773 * @param TransA The type of transpose applied to matrix A. 774 * @param Diag Specifies whether or not A is unit triangular. 775 * @param K The number of off-diagonals of the matrix A 776 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 777 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 778 * @param incX The increment for the elements of vector x, must be larger than zero. 779 */ ZTBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)780 public void ZTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 781 // TBMV has the same requirements as TRMV + K >= 0 782 if (K < 0) { 783 throw new RSRuntimeException("K must be greater than or equal to 0"); 784 } 785 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 786 int N = A.getType().getY(); 787 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 788 } 789 790 /** 791 * STPMV performs one of the matrix-vector operations 792 * x := A*x or x := A**T*x 793 * 794 * Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html 795 * 796 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 797 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 798 * 'a' to packed matrix 'b'. 799 * k = 0 800 * for i in range(0, n): 801 * for j in range(i, n): 802 * b[k++] = a[i, j] 803 * 804 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 805 * @param TransA The type of transpose applied to matrix A. 806 * @param Diag Specifies whether or not A is unit triangular. 807 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}. 808 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 809 * @param incX The increment for the elements of vector x, must be larger than zero. 810 */ STPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)811 public void STPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 812 int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX); 813 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 814 } 815 816 /** 817 * DTPMV performs one of the matrix-vector operations 818 * x := A*x or x := A**T*x 819 * 820 * Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html 821 * 822 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 823 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 824 * 'a' to packed matrix 'b'. 825 * k = 0 826 * for i in range(0, n): 827 * for j in range(i, n): 828 * b[k++] = a[i, j] 829 * 830 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 831 * @param TransA The type of transpose applied to matrix A. 832 * @param Diag Specifies whether or not A is unit triangular. 833 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}. 834 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 835 * @param incX The increment for the elements of vector x, must be larger than zero. 836 */ DTPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)837 public void DTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 838 int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX); 839 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 840 } 841 842 /** 843 * CTPMV performs one of the matrix-vector operations 844 * x := A*x or x := A**T*x or x := A**H*x 845 * 846 * Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html 847 * 848 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 849 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 850 * 'a' to packed matrix 'b'. 851 * k = 0 852 * for i in range(0, n): 853 * for j in range(i, n): 854 * b[k++] = a[i, j] 855 * 856 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 857 * @param TransA The type of transpose applied to matrix A. 858 * @param Diag Specifies whether or not A is unit triangular. 859 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}. 860 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 861 * @param incX The increment for the elements of vector x, must be larger than zero. 862 */ CTPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)863 public void CTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 864 int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 865 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 866 } 867 868 /** 869 * ZTPMV performs one of the matrix-vector operations 870 * x := A*x or x := A**T*x or x := A**H*x 871 * 872 * Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html 873 * 874 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 875 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 876 * 'a' to packed matrix 'b'. 877 * k = 0 878 * for i in range(0, n): 879 * for j in range(i, n): 880 * b[k++] = a[i, j] 881 * 882 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 883 * @param TransA The type of transpose applied to matrix A. 884 * @param Diag Specifies whether or not A is unit triangular. 885 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}. 886 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 887 * @param incX The increment for the elements of vector x, must be larger than zero. 888 */ ZTPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)889 public void ZTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 890 int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 891 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 892 } 893 894 /** 895 * STRSV solves one of the systems of equations 896 * A*x = b or A**T*x = b 897 * 898 * Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html 899 * 900 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 901 * @param TransA The type of transpose applied to matrix A. 902 * @param Diag Specifies whether or not A is unit triangular. 903 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 904 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 905 * @param incX The increment for the elements of vector x, must be larger than zero. 906 */ STRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)907 public void STRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 908 // TRSV is the same as TRMV 909 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 910 int N = A.getType().getY(); 911 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 912 913 } 914 915 /** 916 * DTRSV solves one of the systems of equations 917 * A*x = b or A**T*x = b 918 * 919 * Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html 920 * 921 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 922 * @param TransA The type of transpose applied to matrix A. 923 * @param Diag Specifies whether or not A is unit triangular. 924 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 925 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 926 * @param incX The increment for the elements of vector x, must be larger than zero. 927 */ DTRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)928 public void DTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 929 // TRSV is the same as TRMV 930 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 931 int N = A.getType().getY(); 932 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 933 934 } 935 936 /** 937 * CTRSV solves one of the systems of equations 938 * A*x = b or A**T*x = b or A**H*x = b 939 * 940 * Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html 941 * 942 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 943 * @param TransA The type of transpose applied to matrix A. 944 * @param Diag Specifies whether or not A is unit triangular. 945 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 946 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 947 * @param incX The increment for the elements of vector x, must be larger than zero. 948 */ CTRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)949 public void CTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 950 // TRSV is the same as TRMV 951 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 952 int N = A.getType().getY(); 953 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 954 955 } 956 957 /** 958 * ZTRSV solves one of the systems of equations 959 * A*x = b or A**T*x = b or A**H*x = b 960 * 961 * Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html 962 * 963 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 964 * @param TransA The type of transpose applied to matrix A. 965 * @param Diag Specifies whether or not A is unit triangular. 966 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 967 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 968 * @param incX The increment for the elements of vector x, must be larger than zero. 969 */ ZTRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)970 public void ZTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 971 // TRSV is the same as TRMV 972 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 973 int N = A.getType().getY(); 974 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 975 976 } 977 978 /** 979 * STBSV solves one of the systems of equations 980 * A*x = b or A**T*x = b 981 * 982 * Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html 983 * 984 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 985 * but only the region N*(K+1) will be referenced. The following subroutine can is an 986 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 987 * for i in range(0, n): 988 * for j in range(i, min(i+k+1, n)): 989 * b[i, j-i] = a[i, j] 990 * 991 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 992 * @param TransA The type of transpose applied to matrix A. 993 * @param Diag Specifies whether or not A is unit triangular. 994 * @param K The number of off-diagonals of the matrix A 995 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 996 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 997 * @param incX The increment for the elements of vector x, must be larger than zero. 998 */ STBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)999 public void STBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1000 // TBSV is the same as TRMV + K >= 0 1001 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 1002 int N = A.getType().getY(); 1003 if (K < 0) { 1004 throw new RSRuntimeException("Number of diagonals must be positive"); 1005 } 1006 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 1007 } 1008 1009 /** 1010 * DTBSV solves one of the systems of equations 1011 * A*x = b or A**T*x = b 1012 * 1013 * Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html 1014 * 1015 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1016 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1017 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1018 * for i in range(0, n): 1019 * for j in range(i, min(i+k+1, n)): 1020 * b[i, j-i] = a[i, j] 1021 * 1022 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1023 * @param TransA The type of transpose applied to matrix A. 1024 * @param Diag Specifies whether or not A is unit triangular. 1025 * @param K The number of off-diagonals of the matrix A 1026 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1027 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1028 * @param incX The increment for the elements of vector x, must be larger than zero. 1029 */ DTBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1030 public void DTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1031 // TBSV is the same as TRMV + K >= 0 1032 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 1033 int N = A.getType().getY(); 1034 if (K < 0) { 1035 throw new RSRuntimeException("Number of diagonals must be positive"); 1036 } 1037 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 1038 } 1039 1040 /** 1041 * CTBSV solves one of the systems of equations 1042 * A*x = b or A**T*x = b or A**H*x = b 1043 * 1044 * Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html 1045 * 1046 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1047 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1048 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1049 * for i in range(0, n): 1050 * for j in range(i, min(i+k+1, n)): 1051 * b[i, j-i] = a[i, j] 1052 * 1053 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1054 * @param TransA The type of transpose applied to matrix A. 1055 * @param Diag Specifies whether or not A is unit triangular. 1056 * @param K The number of off-diagonals of the matrix A 1057 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1058 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1059 * @param incX The increment for the elements of vector x, must be larger than zero. 1060 */ CTBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1061 public void CTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1062 // TBSV is the same as TRMV + K >= 0 1063 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 1064 int N = A.getType().getY(); 1065 if (K < 0) { 1066 throw new RSRuntimeException("Number of diagonals must be positive"); 1067 } 1068 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 1069 } 1070 1071 /** 1072 * ZTBSV solves one of the systems of equations 1073 * A*x = b or A**T*x = b or A**H*x = b 1074 * 1075 * Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html 1076 * 1077 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1078 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1079 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1080 * for i in range(0, n): 1081 * for j in range(i, min(i+k+1, n)): 1082 * b[i, j-i] = a[i, j] 1083 * 1084 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1085 * @param TransA The type of transpose applied to matrix A. 1086 * @param Diag Specifies whether or not A is unit triangular. 1087 * @param K The number of off-diagonals of the matrix A 1088 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 1089 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 1090 * @param incX The increment for the elements of vector x, must be larger than zero. 1091 */ ZTBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1092 public void ZTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1093 // TBSV is the same as TRMV + K >= 0 1094 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 1095 int N = A.getType().getY(); 1096 if (K < 0) { 1097 throw new RSRuntimeException("Number of diagonals must be positive"); 1098 } 1099 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 1100 } 1101 1102 /** 1103 * STPSV solves one of the systems of equations 1104 * A*x = b or A**T*x = b 1105 * 1106 * Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html 1107 * 1108 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1109 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1110 * 'a' to packed matrix 'b'. 1111 * k = 0 1112 * for i in range(0, n): 1113 * for j in range(i, n): 1114 * b[k++] = a[i, j] 1115 * 1116 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1117 * @param TransA The type of transpose applied to matrix A. 1118 * @param Diag Specifies whether or not A is unit triangular. 1119 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}. 1120 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1121 * @param incX The increment for the elements of vector x, must be larger than zero. 1122 */ STPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1123 public void STPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1124 // TPSV is same as TPMV 1125 int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX); 1126 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 1127 } 1128 1129 /** 1130 * DTPSV solves one of the systems of equations 1131 * A*x = b or A**T*x = b 1132 * 1133 * Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html 1134 * 1135 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1136 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1137 * 'a' to packed matrix 'b'. 1138 * k = 0 1139 * for i in range(0, n): 1140 * for j in range(i, n): 1141 * b[k++] = a[i, j] 1142 * 1143 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1144 * @param TransA The type of transpose applied to matrix A. 1145 * @param Diag Specifies whether or not A is unit triangular. 1146 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}. 1147 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1148 * @param incX The increment for the elements of vector x, must be larger than zero. 1149 */ DTPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1150 public void DTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1151 // TPSV is same as TPMV 1152 int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX); 1153 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 1154 } 1155 1156 /** 1157 * CTPSV solves one of the systems of equations 1158 * A*x = b or A**T*x = b or A**H*x = b 1159 * 1160 * Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html 1161 * 1162 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1163 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1164 * 'a' to packed matrix 'b'. 1165 * k = 0 1166 * for i in range(0, n): 1167 * for j in range(i, n): 1168 * b[k++] = a[i, j] 1169 * 1170 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1171 * @param TransA The type of transpose applied to matrix A. 1172 * @param Diag Specifies whether or not A is unit triangular. 1173 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}. 1174 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1175 * @param incX The increment for the elements of vector x, must be larger than zero. 1176 */ CTPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1177 public void CTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1178 // TPSV is same as TPMV 1179 int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 1180 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 1181 } 1182 1183 /** 1184 * ZTPSV solves one of the systems of equations 1185 * A*x = b or A**T*x = b or A**H*x = b 1186 * 1187 * Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html 1188 * 1189 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1190 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1191 * 'a' to packed matrix 'b'. 1192 * k = 0 1193 * for i in range(0, n): 1194 * for j in range(i, n): 1195 * b[k++] = a[i, j] 1196 * 1197 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1198 * @param TransA The type of transpose applied to matrix A. 1199 * @param Diag Specifies whether or not A is unit triangular. 1200 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}. 1201 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 1202 * @param incX The increment for the elements of vector x, must be larger than zero. 1203 */ ZTPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1204 public void ZTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1205 // TPSV is same as TPMV 1206 int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 1207 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 1208 } 1209 1210 /** 1211 * Level 2, S and D only 1212 */ validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY)1213 static int validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY) { 1214 validateUplo(Uplo); 1215 int N = A.getType().getY(); 1216 if (A.getType().getX() != N) { 1217 throw new RSRuntimeException("A must be a square matrix for SYMV"); 1218 } 1219 if (!A.getType().getElement().isCompatible(e) || 1220 !X.getType().getElement().isCompatible(e) || 1221 !Y.getType().getElement().isCompatible(e) ) { 1222 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1223 } 1224 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1225 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1226 } 1227 1228 if (incX <= 0 || incY <= 0) { 1229 throw new RSRuntimeException("Vector increments must be greater than 0"); 1230 } 1231 int expectedXDim = 1 + (N - 1) * incX; 1232 if (X.getType().getX() != expectedXDim) { 1233 throw new RSRuntimeException("Incorrect vector dimensions for SYMV"); 1234 } 1235 int expectedYDim = 1 + (N - 1) * incY; 1236 if (Y.getType().getX() != expectedYDim) { 1237 throw new RSRuntimeException("Incorrect vector dimensions for SYMV"); 1238 } 1239 return N; 1240 } validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY)1241 static int validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY) { 1242 validateUplo(Uplo); 1243 if (!Ap.getType().getElement().isCompatible(e) || 1244 !X.getType().getElement().isCompatible(e) || 1245 !Y.getType().getElement().isCompatible(e)) { 1246 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1247 } 1248 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1249 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1250 } 1251 1252 if (Ap.getType().getY() > 1) { 1253 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 1254 } 1255 1256 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 1257 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 1258 throw new RSRuntimeException("Invalid dimension for Ap"); 1259 } 1260 if (incX <= 0 || incY <= 0) { 1261 throw new RSRuntimeException("Vector increments must be greater than 0"); 1262 } 1263 int expectedXDim = 1 + (N - 1) * incX; 1264 if (X.getType().getX() != expectedXDim) { 1265 throw new RSRuntimeException("Incorrect vector dimensions for SPMV"); 1266 } 1267 int expectedYDim = 1 + (N - 1) * incY; 1268 if (Y.getType().getX() != expectedYDim) { 1269 throw new RSRuntimeException("Incorrect vector dimensions for SPMV"); 1270 } 1271 1272 return N; 1273 } validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A)1274 static void validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1275 if (!A.getType().getElement().isCompatible(e) || 1276 !X.getType().getElement().isCompatible(e) || 1277 !Y.getType().getElement().isCompatible(e) ) { 1278 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1279 } 1280 1281 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1282 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1283 } 1284 1285 int M = A.getType().getY(); 1286 int N = A.getType().getX(); 1287 1288 if (N < 1 || M < 1) { 1289 throw new RSRuntimeException("M and N must be 1 or greater for GER"); 1290 } 1291 if (incX <= 0 || incY <= 0) { 1292 throw new RSRuntimeException("Vector increments must be greater than 0"); 1293 } 1294 int expectedXDim = 1 + (M - 1) * incX; 1295 if (X.getType().getX() != expectedXDim) { 1296 throw new RSRuntimeException("Incorrect vector dimensions for GER"); 1297 } 1298 int expectedYDim = 1 + (N - 1) * incY; 1299 if (Y.getType().getX() != expectedYDim) { 1300 throw new RSRuntimeException("Incorrect vector dimensions for GER"); 1301 } 1302 1303 1304 } validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A)1305 static int validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A) { 1306 validateUplo(Uplo); 1307 if (!A.getType().getElement().isCompatible(e) || 1308 !X.getType().getElement().isCompatible(e)) { 1309 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1310 } 1311 1312 int N = A.getType().getX(); 1313 1314 if (X.getType().getY() > 1) { 1315 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1316 } 1317 if (N != A.getType().getY()) { 1318 throw new RSRuntimeException("A must be a symmetric matrix"); 1319 } 1320 if (incX <= 0) { 1321 throw new RSRuntimeException("Vector increments must be greater than 0"); 1322 } 1323 int expectedXDim = 1 + (N - 1) * incX; 1324 if (X.getType().getX() != expectedXDim) { 1325 throw new RSRuntimeException("Incorrect vector dimensions for SYR"); 1326 } 1327 return N; 1328 } validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap)1329 static int validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap) { 1330 validateUplo(Uplo); 1331 if (!Ap.getType().getElement().isCompatible(e) || 1332 !X.getType().getElement().isCompatible(e)) { 1333 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1334 } 1335 if (X.getType().getY() > 1) { 1336 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1337 } 1338 1339 if (Ap.getType().getY() > 1) { 1340 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 1341 } 1342 1343 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 1344 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 1345 throw new RSRuntimeException("Invalid dimension for Ap"); 1346 } 1347 if (incX <= 0) { 1348 throw new RSRuntimeException("Vector increments must be greater than 0"); 1349 } 1350 int expectedXDim = 1 + (N - 1) * incX; 1351 if (X.getType().getX() != expectedXDim) { 1352 throw new RSRuntimeException("Incorrect vector dimensions for SPR"); 1353 } 1354 1355 return N; 1356 } 1357 validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A)1358 static int validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1359 validateUplo(Uplo); 1360 if (!A.getType().getElement().isCompatible(e) || 1361 !X.getType().getElement().isCompatible(e) || 1362 !Y.getType().getElement().isCompatible(e)) { 1363 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1364 } 1365 1366 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1367 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1368 } 1369 1370 int N = A.getType().getX(); 1371 1372 if (N != A.getType().getY()) { 1373 throw new RSRuntimeException("A must be a symmetric matrix"); 1374 } 1375 if (incX <= 0 || incY <= 0) { 1376 throw new RSRuntimeException("Vector increments must be greater than 0"); 1377 } 1378 int expectedXDim = 1 + (N - 1) * incX; 1379 int expectedYDim = 1 + (N - 1) * incY; 1380 if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) { 1381 throw new RSRuntimeException("Incorrect vector dimensions for SYR"); 1382 } 1383 return N; 1384 1385 } validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)1386 static int validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 1387 validateUplo(Uplo); 1388 if (!Ap.getType().getElement().isCompatible(e) || 1389 !X.getType().getElement().isCompatible(e) || 1390 !Y.getType().getElement().isCompatible(e)) { 1391 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1392 } 1393 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1394 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1395 } 1396 1397 if (Ap.getType().getY() > 1) { 1398 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 1399 } 1400 1401 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 1402 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 1403 throw new RSRuntimeException("Invalid dimension for Ap"); 1404 } 1405 if (incX <= 0 || incY <= 0) { 1406 throw new RSRuntimeException("Vector increments must be greater than 0"); 1407 } 1408 int expectedXDim = 1 + (N - 1) * incX; 1409 int expectedYDim = 1 + (N - 1) * incY; 1410 if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) { 1411 throw new RSRuntimeException("Incorrect vector dimensions for SPR2"); 1412 } 1413 1414 return N; 1415 } 1416 1417 /** 1418 * SSYMV performs the matrix-vector operation 1419 * y := alpha*A*x + beta*y 1420 * 1421 * Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html 1422 * 1423 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1424 * @param alpha The scalar alpha. 1425 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1426 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1427 * @param incX The increment for the elements of vector x, must be larger than zero. 1428 * @param beta The scalar beta. 1429 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1430 * @param incY The increment for the elements of vector y, must be larger than zero. 1431 */ SSYMV(@plo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)1432 public void SSYMV(@Uplo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 1433 int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY); 1434 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); 1435 } 1436 1437 /** 1438 * SSBMV performs the matrix-vector operation 1439 * y := alpha*A*x + beta*y 1440 * 1441 * Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html 1442 * 1443 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1444 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1445 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1446 * for i in range(0, n): 1447 * for j in range(i, min(i+k+1, n)): 1448 * b[i, j-i] = a[i, j] 1449 * 1450 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 1451 * @param K The number of off-diagonals of the matrix A 1452 * @param alpha The scalar alpha. 1453 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1454 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1455 * @param incX The increment for the elements of vector x, must be larger than zero. 1456 * @param beta The scalar beta. 1457 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1458 * @param incY The increment for the elements of vector y, must be larger than zero. 1459 */ SSBMV(@plo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)1460 public void SSBMV(@Uplo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 1461 // SBMV is the same as SYMV + K >= 0 1462 if (K < 0) { 1463 throw new RSRuntimeException("K must be greater than or equal to 0"); 1464 } 1465 int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY); 1466 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); 1467 } 1468 1469 /** 1470 * SSPMV performs the matrix-vector operation 1471 * y := alpha*A*x + beta*y 1472 * 1473 * Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html 1474 * 1475 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1476 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1477 * 'a' to packed matrix 'b'. 1478 * k = 0 1479 * for i in range(0, n): 1480 * for j in range(i, n): 1481 * b[k++] = a[i, j] 1482 * 1483 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 1484 * @param alpha The scalar alpha. 1485 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. 1486 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1487 * @param incX The increment for the elements of vector x, must be larger than zero. 1488 * @param beta The scalar beta. 1489 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1490 * @param incY The increment for the elements of vector y, must be larger than zero. 1491 */ SSPMV(@plo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY)1492 public void SSPMV(@Uplo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY) { 1493 int N = validateSPMV(Element.F32(mRS), Uplo, Ap, X, incX, Y, incY); 1494 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, Ap.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); 1495 } 1496 1497 /** 1498 * SGER performs the rank 1 operation 1499 * A := alpha*x*y**T + A 1500 * 1501 * Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html 1502 * 1503 * @param alpha The scalar alpha. 1504 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1505 * @param incX The increment for the elements of vector x, must be larger than zero. 1506 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1507 * @param incY The increment for the elements of vector y, must be larger than zero. 1508 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1509 */ SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)1510 public void SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1511 int M = A.getType().getY(); 1512 int N = A.getType().getX(); 1513 validateGER(Element.F32(mRS), X, incX, Y, incY, A); 1514 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sger, 0, 0, 0, 0, 0, M, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0.f, A.getID(mRS), incX, incY, 0, 0); 1515 } 1516 1517 /** 1518 * SSYR performs the rank 1 operation 1519 * A := alpha*x*x**T + A 1520 * 1521 * Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html 1522 * 1523 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1524 * @param alpha The scalar alpha. 1525 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1526 * @param incX The increment for the elements of vector x, must be larger than zero. 1527 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1528 */ SSYR(@plo int Uplo, float alpha, Allocation X, int incX, Allocation A)1529 public void SSYR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) { 1530 int N = validateSYR(Element.F32(mRS), Uplo, X, incX, A); 1531 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), A.getID(mRS), 0.f, 0, incX, 0, 0, 0); 1532 } 1533 1534 /** 1535 * SSPR performs the rank 1 operation 1536 * A := alpha*x*x**T + A 1537 * 1538 * Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html 1539 * 1540 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1541 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1542 * 'a' to packed matrix 'b'. 1543 * k = 0 1544 * for i in range(0, n): 1545 * for j in range(i, n): 1546 * b[k++] = a[i, j] 1547 * 1548 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 1549 * @param alpha The scalar alpha. 1550 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1551 * @param incX The increment for the elements of vector x, must be larger than zero. 1552 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. 1553 */ SSPR(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Ap)1554 public void SSPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) { 1555 int N = validateSPR(Element.F32(mRS), Uplo, X, incX, Ap); 1556 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Ap.getID(mRS), 0.f, 0, incX, 0, 0, 0); 1557 } 1558 1559 /** 1560 * SSYR2 performs the symmetric rank 2 operation 1561 * A := alpha*x*y**T + alpha*y*x**T + A 1562 * 1563 * Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html 1564 * 1565 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1566 * @param alpha The scalar alpha. 1567 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1568 * @param incX The increment for the elements of vector x, must be larger than zero. 1569 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1570 * @param incY The increment for the elements of vector y, must be larger than zero. 1571 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1572 */ SSYR2(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)1573 public void SSYR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1574 int N = validateSYR2(Element.F32(mRS), Uplo, X, incX, Y, incY, A); 1575 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, A.getID(mRS), incX, incY, 0, 0); 1576 } 1577 1578 /** 1579 * SSPR2 performs the symmetric rank 2 operation 1580 * A := alpha*x*y**T + alpha*y*x**T + A 1581 * 1582 * Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html 1583 * 1584 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1585 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1586 * 'a' to packed matrix 'b'. 1587 * k = 0 1588 * for i in range(0, n): 1589 * for j in range(i, n): 1590 * b[k++] = a[i, j] 1591 * 1592 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 1593 * @param alpha The scalar alpha. 1594 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1595 * @param incX The increment for the elements of vector x, must be larger than zero. 1596 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1597 * @param incY The increment for the elements of vector y, must be larger than zero. 1598 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. 1599 */ SSPR2(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)1600 public void SSPR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 1601 int N = validateSPR2(Element.F32(mRS), Uplo, X, incX, Y, incY, Ap); 1602 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, Ap.getID(mRS), incX, incY, 0, 0); 1603 } 1604 1605 /** 1606 * DSYMV performs the matrix-vector operation 1607 * y := alpha*A*x + beta*y 1608 * 1609 * Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html 1610 * 1611 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1612 * @param alpha The scalar alpha. 1613 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1614 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1615 * @param incX The increment for the elements of vector x, must be larger than zero. 1616 * @param beta The scalar beta. 1617 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 1618 * @param incY The increment for the elements of vector y, must be larger than zero. 1619 */ DSYMV(@plo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)1620 public void DSYMV(@Uplo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 1621 int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY); 1622 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); 1623 } 1624 1625 /** 1626 * DSBMV performs the matrix-vector operation 1627 * y := alpha*A*x + beta*y 1628 * 1629 * Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html 1630 * 1631 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1632 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1633 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1634 * for i in range(0, n): 1635 * for j in range(i, min(i+k+1, n)): 1636 * b[i, j-i] = a[i, j] 1637 * 1638 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 1639 * @param K The number of off-diagonals of the matrix A 1640 * @param alpha The scalar alpha. 1641 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1642 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1643 * @param incX The increment for the elements of vector x, must be larger than zero. 1644 * @param beta The scalar beta. 1645 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 1646 * @param incY The increment for the elements of vector y, must be larger than zero. 1647 */ DSBMV(@plo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)1648 public void DSBMV(@Uplo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 1649 // SBMV is the same as SYMV + K >= 0 1650 if (K < 0) { 1651 throw new RSRuntimeException("K must be greater than or equal to 0"); 1652 } 1653 int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY); 1654 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); 1655 } 1656 1657 /** 1658 * DSPMV performs the matrix-vector operation 1659 * y := alpha*A*x + beta*y 1660 * 1661 * Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html 1662 * 1663 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1664 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1665 * 'a' to packed matrix 'b'. 1666 * k = 0 1667 * for i in range(0, n): 1668 * for j in range(i, n): 1669 * b[k++] = a[i, j] 1670 * 1671 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 1672 * @param alpha The scalar alpha. 1673 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. 1674 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1675 * @param incX The increment for the elements of vector x, must be larger than zero. 1676 * @param beta The scalar beta. 1677 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 1678 * @param incY The increment for the elements of vector y, must be larger than zero. 1679 */ DSPMV(@plo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY)1680 public void DSPMV(@Uplo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY) { 1681 int N = validateSPMV(Element.F64(mRS), Uplo, Ap, X, incX, Y, incY); 1682 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, Ap.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); 1683 } 1684 1685 /** 1686 * DGER performs the rank 1 operation 1687 * A := alpha*x*y**T + A 1688 * 1689 * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html 1690 * 1691 * @param alpha The scalar alpha. 1692 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1693 * @param incX The increment for the elements of vector x, must be larger than zero. 1694 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 1695 * @param incY The increment for the elements of vector y, must be larger than zero. 1696 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1697 */ DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)1698 public void DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1699 int M = A.getType().getY(); 1700 int N = A.getType().getX(); 1701 validateGER(Element.F64(mRS), X, incX, Y, incY, A); 1702 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dger, 0, 0, 0, 0, 0, M, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0.f, A.getID(mRS), incX, incY, 0, 0); 1703 } 1704 1705 /** 1706 * DSYR performs the rank 1 operation 1707 * A := alpha*x*x**T + A 1708 * 1709 * Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html 1710 * 1711 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1712 * @param alpha The scalar alpha. 1713 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1714 * @param incX The increment for the elements of vector x, must be larger than zero. 1715 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1716 */ DSYR(@plo int Uplo, double alpha, Allocation X, int incX, Allocation A)1717 public void DSYR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) { 1718 int N = validateSYR(Element.F64(mRS), Uplo, X, incX, A); 1719 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), A.getID(mRS), 0.f, 0, incX, 0, 0, 0); 1720 } 1721 1722 /** 1723 * DSPR performs the rank 1 operation 1724 * A := alpha*x*x**T + A 1725 * 1726 * Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html 1727 * 1728 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1729 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1730 * 'a' to packed matrix 'b'. 1731 * k = 0 1732 * for i in range(0, n): 1733 * for j in range(i, n): 1734 * b[k++] = a[i, j] 1735 * 1736 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 1737 * @param alpha The scalar alpha. 1738 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1739 * @param incX The increment for the elements of vector x, must be larger than zero. 1740 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. 1741 */ DSPR(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Ap)1742 public void DSPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) { 1743 int N = validateSPR(Element.F64(mRS), Uplo, X, incX, Ap); 1744 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Ap.getID(mRS), 0.f, 0, incX, 0, 0, 0); 1745 } 1746 1747 /** 1748 * DSYR2 performs the symmetric rank 2 operation 1749 * A := alpha*x*y**T + alpha*y*x**T + A 1750 * 1751 * Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html 1752 * 1753 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1754 * @param alpha The scalar alpha. 1755 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1756 * @param incX The increment for the elements of vector x, must be larger than zero. 1757 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 1758 * @param incY The increment for the elements of vector y, must be larger than zero. 1759 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1760 */ DSYR2(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)1761 public void DSYR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1762 int N = validateSYR2(Element.F64(mRS), Uplo, X, incX, Y, incY, A); 1763 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, A.getID(mRS), incX, incY, 0, 0); 1764 } 1765 1766 /** 1767 * DSPR2 performs the symmetric rank 2 operation 1768 * A := alpha*x*y**T + alpha*y*x**T + A 1769 * 1770 * Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html 1771 * 1772 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1773 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1774 * 'a' to packed matrix 'b'. 1775 * k = 0 1776 * for i in range(0, n): 1777 * for j in range(i, n): 1778 * b[k++] = a[i, j] 1779 * 1780 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 1781 * @param alpha The scalar alpha. 1782 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1783 * @param incX The increment for the elements of vector x, must be larger than zero. 1784 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 1785 * @param incY The increment for the elements of vector y, must be larger than zero. 1786 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. 1787 */ DSPR2(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)1788 public void DSPR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 1789 int N = validateSPR2(Element.F64(mRS), Uplo, X, incX, Y, incY, Ap); 1790 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, Ap.getID(mRS), incX, incY, 0, 0); 1791 } 1792 1793 1794 /** 1795 * Level 2, C and Z only 1796 */ 1797 validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A)1798 static void validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1799 if (!A.getType().getElement().isCompatible(e) || 1800 !X.getType().getElement().isCompatible(e) || 1801 !Y.getType().getElement().isCompatible(e)) { 1802 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1803 } 1804 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1805 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1806 } 1807 1808 int M = A.getType().getY(); 1809 int N = A.getType().getX(); 1810 if (incX <= 0 || incY <= 0) { 1811 throw new RSRuntimeException("Vector increments must be greater than 0"); 1812 } 1813 int expectedXDim = 1 + (M - 1) * incX; 1814 if (X.getType().getX() != expectedXDim) { 1815 throw new RSRuntimeException("Incorrect vector dimensions for GERU"); 1816 } 1817 int expectedYDim = 1 + (N - 1) * incY; 1818 if (Y.getType().getX() != expectedYDim) { 1819 throw new RSRuntimeException("Incorrect vector dimensions for GERU"); 1820 } 1821 1822 } 1823 1824 /** 1825 * CHEMV performs the matrix-vector operation 1826 * y := alpha*A*x + beta*y 1827 * 1828 * Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html 1829 * 1830 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1831 * @param alpha The scalar alpha. 1832 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1833 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1834 * @param incX The increment for the elements of vector x, must be larger than zero. 1835 * @param beta The scalar beta. 1836 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 1837 * @param incY The increment for the elements of vector y, must be larger than zero. 1838 */ CHEMV(@plo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)1839 public void CHEMV(@Uplo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 1840 // HEMV is the same as SYR2 validation-wise 1841 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); 1842 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); 1843 } 1844 1845 /** 1846 * CHBMV performs the matrix-vector operation 1847 * y := alpha*A*x + beta*y 1848 * 1849 * Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html 1850 * 1851 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1852 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1853 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1854 * for i in range(0, n): 1855 * for j in range(i, min(i+k+1, n)): 1856 * b[i, j-i] = a[i, j] 1857 * 1858 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 1859 * @param K The number of off-diagonals of the matrix A 1860 * @param alpha The scalar alpha. 1861 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1862 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1863 * @param incX The increment for the elements of vector x, must be larger than zero. 1864 * @param beta The scalar beta. 1865 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 1866 * @param incY The increment for the elements of vector y, must be larger than zero. 1867 */ CHBMV(@plo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)1868 public void CHBMV(@Uplo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 1869 // HBMV is the same as SYR2 validation-wise 1870 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); 1871 if (K < 0) { 1872 throw new RSRuntimeException("K must be 0 or greater for HBMV"); 1873 } 1874 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); 1875 } 1876 1877 /** 1878 * CHPMV performs the matrix-vector operation 1879 * y := alpha*A*x + beta*y 1880 * 1881 * Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html 1882 * 1883 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1884 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1885 * 'a' to packed matrix 'b'. 1886 * k = 0 1887 * for i in range(0, n): 1888 * for j in range(i, n): 1889 * b[k++] = a[i, j] 1890 * 1891 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 1892 * @param alpha The scalar alpha. 1893 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1894 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1895 * @param incX The increment for the elements of vector x, must be larger than zero. 1896 * @param beta The scalar beta. 1897 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 1898 * @param incY The increment for the elements of vector y, must be larger than zero. 1899 */ CHPMV(@plo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY)1900 public void CHPMV(@Uplo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 1901 // HPMV is the same as SPR2 1902 int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap); 1903 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, Ap.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); 1904 } 1905 1906 /** 1907 * CGERU performs the rank 1 operation 1908 * A := alpha*x*y**T + A 1909 * 1910 * Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html 1911 * 1912 * @param alpha The scalar alpha. 1913 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1914 * @param incX The increment for the elements of vector x, must be larger than zero. 1915 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 1916 * @param incY The increment for the elements of vector y, must be larger than zero. 1917 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1918 */ CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)1919 public void CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1920 validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A); 1921 int M = A.getType().getY(); 1922 int N = A.getType().getX(); 1923 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); 1924 } 1925 1926 /** 1927 * CGERC performs the rank 1 operation 1928 * A := alpha*x*y**H + A 1929 * 1930 * Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html 1931 * 1932 * @param alpha The scalar alpha. 1933 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1934 * @param incX The increment for the elements of vector x, must be larger than zero. 1935 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 1936 * @param incY The increment for the elements of vector y, must be larger than zero. 1937 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1938 */ CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)1939 public void CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1940 // same as GERU 1941 validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A); 1942 int M = A.getType().getY(); 1943 int N = A.getType().getX(); 1944 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); 1945 } 1946 1947 /** 1948 * CHER performs the rank 1 operation 1949 * A := alpha*x*x**H + A 1950 * 1951 * Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html 1952 * 1953 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1954 * @param alpha The scalar alpha. 1955 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1956 * @param incX The increment for the elements of vector x, must be larger than zero. 1957 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1958 */ CHER(@plo int Uplo, float alpha, Allocation X, int incX, Allocation A)1959 public void CHER(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) { 1960 // same as SYR 1961 int N = validateSYR(Element.F32_2(mRS), Uplo, X, incX, A); 1962 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, A.getID(mRS), incX, 0, 0, 0); 1963 } 1964 1965 /** 1966 * CHPR performs the rank 1 operation 1967 * A := alpha*x*x**H + A 1968 * 1969 * Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html 1970 * 1971 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1972 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1973 * 'a' to packed matrix 'b'. 1974 * k = 0 1975 * for i in range(0, n): 1976 * for j in range(i, n): 1977 * b[k++] = a[i, j] 1978 * 1979 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 1980 * @param alpha The scalar alpha. 1981 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1982 * @param incX The increment for the elements of vector x, must be larger than zero. 1983 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1984 */ CHPR(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Ap)1985 public void CHPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) { 1986 // equivalent to SPR for validation 1987 int N = validateSPR(Element.F32_2(mRS), Uplo, X, incX, Ap); 1988 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, Ap.getID(mRS), incX, 0, 0, 0); 1989 } 1990 1991 /** 1992 * CHER2 performs the symmetric rank 2 operation 1993 * A := alpha*x*y**H + alpha*y*x**H + A 1994 * 1995 * Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html 1996 * 1997 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1998 * @param alpha The scalar alpha. 1999 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2000 * @param incX The increment for the elements of vector x, must be larger than zero. 2001 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2002 * @param incY The increment for the elements of vector y, must be larger than zero. 2003 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2004 */ CHER2(@plo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2005 public void CHER2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2006 // same as SYR2 2007 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); 2008 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); 2009 } 2010 2011 /** 2012 * CHPR2 performs the symmetric rank 2 operation 2013 * A := alpha*x*y**H + alpha*y*x**H + A 2014 * 2015 * Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html 2016 * 2017 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2018 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2019 * 'a' to packed matrix 'b'. 2020 * k = 0 2021 * for i in range(0, n): 2022 * for j in range(i, n): 2023 * b[k++] = a[i, j] 2024 * 2025 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2026 * @param alpha The scalar alpha. 2027 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2028 * @param incX The increment for the elements of vector x, must be larger than zero. 2029 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2030 * @param incY The increment for the elements of vector y, must be larger than zero. 2031 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2032 */ CHPR2(@plo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)2033 public void CHPR2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 2034 // same as SPR2 2035 int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap); 2036 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, Ap.getID(mRS), incX, incY, 0, 0); 2037 } 2038 2039 /** 2040 * ZHEMV performs the matrix-vector operation 2041 * y := alpha*A*x + beta*y 2042 * 2043 * Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html 2044 * 2045 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2046 * @param alpha The scalar alpha. 2047 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2048 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2049 * @param incX The increment for the elements of vector x, must be larger than zero. 2050 * @param beta The scalar beta. 2051 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2052 * @param incY The increment for the elements of vector y, must be larger than zero. 2053 */ ZHEMV(@plo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)2054 public void ZHEMV(@Uplo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 2055 // HEMV is the same as SYR2 validation-wise 2056 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); 2057 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); 2058 } 2059 2060 /** 2061 * ZHBMV performs the matrix-vector operation 2062 * y := alpha*A*x + beta*y 2063 * 2064 * Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html 2065 * 2066 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 2067 * but only the region N*(K+1) will be referenced. The following subroutine can is an 2068 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 2069 * for i in range(0, n): 2070 * for j in range(i, min(i+k+1, n)): 2071 * b[i, j-i] = a[i, j] 2072 * 2073 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 2074 * @param K The number of off-diagonals of the matrix A 2075 * @param alpha The scalar alpha. 2076 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2077 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2078 * @param incX The increment for the elements of vector x, must be larger than zero. 2079 * @param beta The scalar beta. 2080 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2081 * @param incY The increment for the elements of vector y, must be larger than zero. 2082 */ ZHBMV(@plo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)2083 public void ZHBMV(@Uplo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 2084 // HBMV is the same as SYR2 validation-wise 2085 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); 2086 if (K < 0) { 2087 throw new RSRuntimeException("K must be 0 or greater for HBMV"); 2088 } 2089 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); 2090 } 2091 2092 /** 2093 * ZHPMV performs the matrix-vector operation 2094 * y := alpha*A*x + beta*y 2095 * 2096 * Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html 2097 * 2098 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2099 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2100 * 'a' to packed matrix 'b'. 2101 * k = 0 2102 * for i in range(0, n): 2103 * for j in range(i, n): 2104 * b[k++] = a[i, j] 2105 * 2106 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 2107 * @param alpha The scalar alpha. 2108 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2109 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2110 * @param incX The increment for the elements of vector x, must be larger than zero. 2111 * @param beta The scalar beta. 2112 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2113 * @param incY The increment for the elements of vector y, must be larger than zero. 2114 */ ZHPMV(@plo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY)2115 public void ZHPMV(@Uplo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 2116 // HPMV is the same as SPR2 2117 int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap); 2118 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, Ap.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); 2119 } 2120 2121 /** 2122 * ZGERU performs the rank 1 operation 2123 * A := alpha*x*y**T + A 2124 * 2125 * Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html 2126 * 2127 * @param alpha The scalar alpha. 2128 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2129 * @param incX The increment for the elements of vector x, must be larger than zero. 2130 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2131 * @param incY The increment for the elements of vector y, must be larger than zero. 2132 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2133 */ ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2134 public void ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2135 validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A); 2136 int M = A.getType().getY(); 2137 int N = A.getType().getX(); 2138 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); 2139 } 2140 2141 /** 2142 * ZGERC performs the rank 1 operation 2143 * A := alpha*x*y**H + A 2144 * 2145 * Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html 2146 * 2147 * @param alpha The scalar alpha. 2148 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2149 * @param incX The increment for the elements of vector x, must be larger than zero. 2150 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2151 * @param incY The increment for the elements of vector y, must be larger than zero. 2152 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2153 */ ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2154 public void ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2155 // same as GERU 2156 validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A); 2157 int M = A.getType().getY(); 2158 int N = A.getType().getX(); 2159 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); 2160 } 2161 2162 /** 2163 * ZHER performs the rank 1 operation 2164 * A := alpha*x*x**H + A 2165 * 2166 * Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html 2167 * 2168 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2169 * @param alpha The scalar alpha. 2170 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2171 * @param incX The increment for the elements of vector x, must be larger than zero. 2172 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2173 */ ZHER(@plo int Uplo, double alpha, Allocation X, int incX, Allocation A)2174 public void ZHER(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) { 2175 // same as SYR 2176 int N = validateSYR(Element.F64_2(mRS), Uplo, X, incX, A); 2177 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, A.getID(mRS), incX, 0, 0, 0); 2178 } 2179 2180 /** 2181 * ZHPR performs the rank 1 operation 2182 * A := alpha*x*x**H + A 2183 * 2184 * Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html 2185 * 2186 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2187 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2188 * 'a' to packed matrix 'b'. 2189 * k = 0 2190 * for i in range(0, n): 2191 * for j in range(i, n): 2192 * b[k++] = a[i, j] 2193 * 2194 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2195 * @param alpha The scalar alpha. 2196 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2197 * @param incX The increment for the elements of vector x, must be larger than zero. 2198 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2199 */ ZHPR(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Ap)2200 public void ZHPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) { 2201 // equivalent to SPR for validation 2202 int N = validateSPR(Element.F64_2(mRS), Uplo, X, incX, Ap); 2203 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, Ap.getID(mRS), incX, 0, 0, 0); 2204 } 2205 2206 /** 2207 * ZHER2 performs the symmetric rank 2 operation 2208 * A := alpha*x*y**H + alpha*y*x**H + A 2209 * 2210 * Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html 2211 * 2212 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2213 * @param alpha The scalar alpha. 2214 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2215 * @param incX The increment for the elements of vector x, must be larger than zero. 2216 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2217 * @param incY The increment for the elements of vector y, must be larger than zero. 2218 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2219 */ ZHER2(@plo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2220 public void ZHER2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2221 // same as SYR2 2222 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); 2223 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); 2224 } 2225 2226 /** 2227 * ZHPR2 performs the symmetric rank 2 operation 2228 * A := alpha*x*y**H + alpha*y*x**H + A 2229 * 2230 * Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html 2231 * 2232 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2233 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2234 * 'a' to packed matrix 'b'. 2235 * k = 0 2236 * for i in range(0, n): 2237 * for j in range(i, n): 2238 * b[k++] = a[i, j] 2239 * 2240 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2241 * @param alpha The scalar alpha. 2242 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2243 * @param incX The increment for the elements of vector x, must be larger than zero. 2244 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2245 * @param incY The increment for the elements of vector y, must be larger than zero. 2246 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2247 */ ZHPR2(@plo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)2248 public void ZHPR2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 2249 // same as SPR2 2250 int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap); 2251 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, Ap.getID(mRS), incX, incY, 0, 0); 2252 } 2253 2254 2255 /** 2256 * Level 3 BLAS 2257 */ 2258 validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C)2259 static void validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) { 2260 int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1; 2261 if ((A != null && !A.getType().getElement().isCompatible(e)) || 2262 (B != null && !B.getType().getElement().isCompatible(e)) || 2263 (C != null && !C.getType().getElement().isCompatible(e))) { 2264 throw new RSRuntimeException("Called BLAS with wrong Element type"); 2265 } 2266 if (C == null) { 2267 //since matrix C is used to store the result, it cannot be null. 2268 throw new RSRuntimeException("Allocation C cannot be null"); 2269 } 2270 cM = C.getType().getY(); 2271 cN = C.getType().getX(); 2272 2273 if (Side == RIGHT) { 2274 if ((A == null && B != null) || (A != null && B == null)) { 2275 throw new RSRuntimeException("Provided Matrix A without Matrix B, or vice versa"); 2276 } 2277 if (B != null) { 2278 bM = A.getType().getY(); 2279 bN = A.getType().getX(); 2280 } 2281 if (A != null) { 2282 aM = B.getType().getY(); 2283 aN = B.getType().getX(); 2284 } 2285 } else { 2286 if (A != null) { 2287 if (TransA == TRANSPOSE || TransA == CONJ_TRANSPOSE) { 2288 aN = A.getType().getY(); 2289 aM = A.getType().getX(); 2290 } else { 2291 aM = A.getType().getY(); 2292 aN = A.getType().getX(); 2293 } 2294 } 2295 if (B != null) { 2296 if (TransB == TRANSPOSE || TransB == CONJ_TRANSPOSE) { 2297 bN = B.getType().getY(); 2298 bM = B.getType().getX(); 2299 } else { 2300 bM = B.getType().getY(); 2301 bN = B.getType().getX(); 2302 } 2303 } 2304 } 2305 if (A != null && B != null && C != null) { 2306 if (aN != bM || aM != cM || bN != cN) { 2307 throw new RSRuntimeException("Called BLAS with invalid dimensions"); 2308 } 2309 } else if (A != null && C != null) { 2310 // A and C only, for SYRK 2311 if (cM != cN) { 2312 throw new RSRuntimeException("Matrix C is not symmetric"); 2313 } 2314 if (aM != cM) { 2315 throw new RSRuntimeException("Called BLAS with invalid dimensions"); 2316 } 2317 } else if (A != null && B != null) { 2318 // A and B only 2319 if (aN != bM) { 2320 throw new RSRuntimeException("Called BLAS with invalid dimensions"); 2321 } 2322 } 2323 2324 } 2325 2326 /** 2327 * SGEMM performs one of the matrix-matrix operations 2328 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T 2329 * 2330 * Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html 2331 * 2332 * @param TransA The type of transpose applied to matrix A. 2333 * @param TransB The type of transpose applied to matrix B. 2334 * @param alpha The scalar alpha. 2335 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 2336 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 2337 * @param beta The scalar beta. 2338 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 2339 */ SGEMM(@ranspose int TransA, @Transpose int TransB, float alpha, Allocation A, Allocation B, float beta, Allocation C)2340 public void SGEMM(@Transpose int TransA, @Transpose int TransB, float alpha, Allocation A, 2341 Allocation B, float beta, Allocation C) { 2342 validateTranspose(TransA); 2343 validateTranspose(TransB); 2344 validateL3(Element.F32(mRS), TransA, TransB, 0, A, B, C); 2345 2346 int M = -1, N = -1, K = -1; 2347 if (TransA != NO_TRANSPOSE) { 2348 M = A.getType().getX(); 2349 K = A.getType().getY(); 2350 } else { 2351 M = A.getType().getY(); 2352 K = A.getType().getX(); 2353 } 2354 if (TransB != NO_TRANSPOSE) { 2355 N = B.getType().getY(); 2356 } else { 2357 N = B.getType().getX(); 2358 } 2359 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, A.getID(mRS), B.getID(mRS), 2360 beta, C.getID(mRS), 0, 0, 0, 0); 2361 } 2362 2363 /** 2364 * DGEMM performs one of the matrix-matrix operations 2365 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T 2366 * 2367 * Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html 2368 * 2369 * @param TransA The type of transpose applied to matrix A. 2370 * @param TransB The type of transpose applied to matrix B. 2371 * @param alpha The scalar alpha. 2372 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2373 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 2374 * @param beta The scalar beta. 2375 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 2376 */ DGEMM(@ranspose int TransA, @Transpose int TransB, double alpha, Allocation A, Allocation B, double beta, Allocation C)2377 public void DGEMM(@Transpose int TransA, @Transpose int TransB, double alpha, Allocation A, 2378 Allocation B, double beta, Allocation C) { 2379 validateTranspose(TransA); 2380 validateTranspose(TransB); 2381 validateL3(Element.F64(mRS), TransA, TransB, 0, A, B, C); 2382 int M = -1, N = -1, K = -1; 2383 if (TransA != NO_TRANSPOSE) { 2384 M = A.getType().getX(); 2385 K = A.getType().getY(); 2386 } else { 2387 M = A.getType().getY(); 2388 K = A.getType().getX(); 2389 } 2390 if (TransB != NO_TRANSPOSE) { 2391 N = B.getType().getY(); 2392 } else { 2393 N = B.getType().getX(); 2394 } 2395 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, A.getID(mRS), B.getID(mRS), 2396 beta, C.getID(mRS), 0, 0, 0, 0); 2397 } 2398 2399 /** 2400 * CGEMM performs one of the matrix-matrix operations 2401 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H 2402 * 2403 * Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html 2404 * 2405 * @param TransA The type of transpose applied to matrix A. 2406 * @param TransB The type of transpose applied to matrix B. 2407 * @param alpha The scalar alpha. 2408 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2409 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 2410 * @param beta The scalar beta. 2411 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 2412 */ CGEMM(@ranspose int TransA, @Transpose int TransB, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)2413 public void CGEMM(@Transpose int TransA, @Transpose int TransB, Float2 alpha, Allocation A, 2414 Allocation B, Float2 beta, Allocation C) { 2415 validateTranspose(TransA); 2416 validateTranspose(TransB); 2417 validateL3(Element.F32_2(mRS), TransA, TransB, 0, A, B, C); 2418 int M = -1, N = -1, K = -1; 2419 if (TransA != NO_TRANSPOSE) { 2420 M = A.getType().getX(); 2421 K = A.getType().getY(); 2422 } else { 2423 M = A.getType().getY(); 2424 K = A.getType().getX(); 2425 } 2426 if (TransB != NO_TRANSPOSE) { 2427 N = B.getType().getY(); 2428 } else { 2429 N = B.getType().getX(); 2430 } 2431 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 2432 beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); 2433 } 2434 2435 /** 2436 * ZGEMM performs one of the matrix-matrix operations 2437 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H 2438 * 2439 * Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html 2440 * 2441 * @param TransA The type of transpose applied to matrix A. 2442 * @param TransB The type of transpose applied to matrix B. 2443 * @param alpha The scalar alpha. 2444 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2 2445 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2 2446 * @param beta The scalar beta. 2447 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2 2448 */ ZGEMM(@ranspose int TransA, @Transpose int TransB, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)2449 public void ZGEMM(@Transpose int TransA, @Transpose int TransB, Double2 alpha, Allocation A, 2450 Allocation B, Double2 beta, Allocation C) { 2451 validateTranspose(TransA); 2452 validateTranspose(TransB); 2453 validateL3(Element.F64_2(mRS), TransA, TransB, 0, A, B, C); 2454 int M = -1, N = -1, K = -1; 2455 if (TransA != NO_TRANSPOSE) { 2456 M = A.getType().getX(); 2457 K = A.getType().getY(); 2458 } else { 2459 M = A.getType().getY(); 2460 K = A.getType().getX(); 2461 } 2462 if (TransB != NO_TRANSPOSE) { 2463 N = B.getType().getY(); 2464 } else { 2465 N = B.getType().getX(); 2466 } 2467 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 2468 beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); 2469 } 2470 2471 /** 2472 * SSYMM performs one of the matrix-matrix operations 2473 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 2474 * 2475 * Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html 2476 * 2477 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2478 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2479 * @param alpha The scalar alpha. 2480 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 2481 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 2482 * @param beta The scalar beta. 2483 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 2484 */ SSYMM(@ide int Side, @Uplo int Uplo, float alpha, Allocation A, Allocation B, float beta, Allocation C)2485 public void SSYMM(@Side int Side, @Uplo int Uplo, float alpha, Allocation A, 2486 Allocation B, float beta, Allocation C) { 2487 validateSide(Side); 2488 validateUplo(Uplo); 2489 //For SYMM, Matrix A should be symmetric 2490 if (A.getType().getX() != A.getType().getY()) { 2491 throw new RSRuntimeException("Matrix A is not symmetric"); 2492 } 2493 validateL3(Element.F32(mRS), 0, 0, Side, A, B, C); 2494 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS), 2495 beta, C.getID(mRS), 0, 0, 0, 0); 2496 } 2497 2498 /** 2499 * DSYMM performs one of the matrix-matrix operations 2500 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 2501 * 2502 * Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html 2503 * 2504 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2505 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2506 * @param alpha The scalar alpha. 2507 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2508 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 2509 * @param beta The scalar beta. 2510 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 2511 */ DSYMM(@ide int Side, @Uplo int Uplo, double alpha, Allocation A, Allocation B, double beta, Allocation C)2512 public void DSYMM(@Side int Side, @Uplo int Uplo, double alpha, Allocation A, 2513 Allocation B, double beta, Allocation C) { 2514 validateSide(Side); 2515 validateUplo(Uplo); 2516 if (A.getType().getX() != A.getType().getY()) { 2517 throw new RSRuntimeException("Matrix A is not symmetric"); 2518 } 2519 validateL3(Element.F64(mRS), 0, 0, Side, A, B, C); 2520 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS), 2521 beta, C.getID(mRS), 0, 0, 0, 0); 2522 } 2523 2524 /** 2525 * CSYMM performs one of the matrix-matrix operations 2526 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 2527 * 2528 * Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html 2529 * 2530 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2531 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2532 * @param alpha The scalar alpha. 2533 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2534 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 2535 * @param beta The scalar beta. 2536 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 2537 */ CSYMM(@ide int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)2538 public void CSYMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, 2539 Allocation B, Float2 beta, Allocation C) { 2540 validateSide(Side); 2541 validateUplo(Uplo); 2542 if (A.getType().getX() != A.getType().getY()) { 2543 throw new RSRuntimeException("Matrix A is not symmetric"); 2544 } 2545 validateL3(Element.F32_2(mRS), 0, 0, Side, A, B, C); 2546 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 2547 beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); 2548 } 2549 2550 /** 2551 * ZSYMM performs one of the matrix-matrix operations 2552 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 2553 * 2554 * Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html 2555 * 2556 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2557 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2558 * @param alpha The scalar alpha. 2559 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2560 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 2561 * @param beta The scalar beta. 2562 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 2563 */ ZSYMM(@ide int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)2564 public void ZSYMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, 2565 Allocation B, Double2 beta, Allocation C) { 2566 validateSide(Side); 2567 validateUplo(Uplo); 2568 if (A.getType().getX() != A.getType().getY()) { 2569 throw new RSRuntimeException("Matrix A is not symmetric"); 2570 } 2571 validateL3(Element.F64_2(mRS), 0, 0, Side, A, B, C); 2572 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 2573 beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); 2574 } 2575 2576 /** 2577 * SSYRK performs one of the symmetric rank k operations 2578 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 2579 * 2580 * Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html 2581 * 2582 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 2583 * @param Trans The type of transpose applied to the operation. 2584 * @param alpha The scalar alpha. 2585 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 2586 * @param beta The scalar beta. 2587 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 2588 */ SSYRK(@plo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C)2589 public void SSYRK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) { 2590 validateTranspose(Trans); 2591 validateUplo(Uplo); 2592 validateL3(Element.F32(mRS), Trans, 0, 0, A, null, C); 2593 int K = -1; 2594 if (Trans != NO_TRANSPOSE) { 2595 K = A.getType().getY(); 2596 } else { 2597 K = A.getType().getX(); 2598 } 2599 2600 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), 0, beta, C.getID(mRS), 0, 0, 0, 0); 2601 } 2602 2603 /** 2604 * DSYRK performs one of the symmetric rank k operations 2605 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 2606 * 2607 * Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html 2608 * 2609 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 2610 * @param Trans The type of transpose applied to the operation. 2611 * @param alpha The scalar alpha. 2612 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2613 * @param beta The scalar beta. 2614 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 2615 */ DSYRK(@plo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C)2616 public void DSYRK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) { 2617 validateTranspose(Trans); 2618 validateUplo(Uplo); 2619 validateL3(Element.F64(mRS), Trans, 0, 0, A, null, C); 2620 int K = -1; 2621 if (Trans != NO_TRANSPOSE) { 2622 K = A.getType().getY(); 2623 } else { 2624 K = A.getType().getX(); 2625 } 2626 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), 0, beta, C.getID(mRS), 0, 0, 0, 0); 2627 } 2628 2629 /** 2630 * CSYRK performs one of the symmetric rank k operations 2631 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 2632 * 2633 * Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html 2634 * 2635 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 2636 * @param Trans The type of transpose applied to the operation. 2637 * @param alpha The scalar alpha. 2638 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2639 * @param beta The scalar beta. 2640 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 2641 */ CSYRK(@plo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C)2642 public void CSYRK(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C) { 2643 validateTranspose(Trans); 2644 validateUplo(Uplo); 2645 validateL3(Element.F32_2(mRS), Trans, 0, 0, A, null, C); 2646 int K = -1; 2647 if (Trans != NO_TRANSPOSE) { 2648 K = A.getType().getY(); 2649 } else { 2650 K = A.getType().getX(); 2651 } 2652 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), 0, beta.x, beta.y, 2653 C.getID(mRS), 0, 0, 0, 0); 2654 } 2655 2656 /** 2657 * ZSYRK performs one of the symmetric rank k operations 2658 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 2659 * 2660 * Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html 2661 * 2662 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 2663 * @param Trans The type of transpose applied to the operation. 2664 * @param alpha The scalar alpha. 2665 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2666 * @param beta The scalar beta. 2667 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 2668 */ ZSYRK(@plo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C)2669 public void ZSYRK(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C) { 2670 validateTranspose(Trans); 2671 validateUplo(Uplo); 2672 validateL3(Element.F64_2(mRS), Trans, 0, 0, A, null, C); 2673 int K = -1; 2674 if (Trans != NO_TRANSPOSE) { 2675 K = A.getType().getY(); 2676 } else { 2677 K = A.getType().getX(); 2678 } 2679 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), 0, beta.x, beta.y, 2680 C.getID(mRS), 0, 0, 0, 0); 2681 } 2682 validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C)2683 static void validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) { 2684 validateTranspose(Trans); 2685 if (!A.getType().getElement().isCompatible(e) || 2686 !B.getType().getElement().isCompatible(e) || 2687 !C.getType().getElement().isCompatible(e)) { 2688 throw new RSRuntimeException("Called BLAS with wrong Element type"); 2689 } 2690 int Cdim = -1; 2691 // A is n x k if no transpose, k x n if transpose 2692 // C is n x n 2693 if (Trans == TRANSPOSE) { 2694 // check columns versus C 2695 Cdim = A.getType().getX(); 2696 } else { 2697 // check rows versus C 2698 Cdim = A.getType().getY(); 2699 } 2700 if (C.getType().getX() != Cdim || C.getType().getY() != Cdim) { 2701 throw new RSRuntimeException("Invalid symmetric matrix in SYR2K"); 2702 } 2703 // A dims == B dims 2704 if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) { 2705 throw new RSRuntimeException("Invalid A and B in SYR2K"); 2706 } 2707 } 2708 2709 /** 2710 * SSYR2K performs one of the symmetric rank 2k operations 2711 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 2712 * 2713 * Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html 2714 * 2715 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 2716 * @param Trans The type of transpose applied to the operation. 2717 * @param alpha The scalar alpha. 2718 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 2719 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 2720 * @param beta The scalar beta. 2721 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 2722 */ SSYR2K(@plo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C)2723 public void SSYR2K(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C) { 2724 validateUplo(Uplo); 2725 validateSYR2K(Element.F32(mRS), Trans, A, B, C); 2726 int K = -1; 2727 if (Trans != NO_TRANSPOSE) { 2728 K = A.getType().getY(); 2729 } else { 2730 K = A.getType().getX(); 2731 } 2732 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0); 2733 } 2734 2735 /** 2736 * DSYR2K performs one of the symmetric rank 2k operations 2737 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 2738 * 2739 * Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html 2740 * 2741 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 2742 * @param Trans The type of transpose applied to the operation. 2743 * @param alpha The scalar alpha. 2744 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2745 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 2746 * @param beta The scalar beta. 2747 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 2748 */ DSYR2K(@plo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C)2749 public void DSYR2K(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C) { 2750 validateUplo(Uplo); 2751 validateSYR2K(Element.F64(mRS), Trans, A, B, C); 2752 int K = -1; 2753 if (Trans != NO_TRANSPOSE) { 2754 K = A.getType().getY(); 2755 } else { 2756 K = A.getType().getX(); 2757 } 2758 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0); 2759 } 2760 2761 /** 2762 * CSYR2K performs one of the symmetric rank 2k operations 2763 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 2764 * 2765 * Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html 2766 * 2767 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 2768 * @param Trans The type of transpose applied to the operation. 2769 * @param alpha The scalar alpha. 2770 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2771 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 2772 * @param beta The scalar beta. 2773 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 2774 */ CSYR2K(@plo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)2775 public void CSYR2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) { 2776 validateUplo(Uplo); 2777 validateSYR2K(Element.F32_2(mRS), Trans, A, B, C); 2778 int K = -1; 2779 if (Trans != NO_TRANSPOSE) { 2780 K = A.getType().getY(); 2781 } else { 2782 K = A.getType().getX(); 2783 } 2784 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); 2785 } 2786 2787 /** 2788 * ZSYR2K performs one of the symmetric rank 2k operations 2789 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 2790 * 2791 * Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html 2792 * 2793 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 2794 * @param Trans The type of transpose applied to the operation. 2795 * @param alpha The scalar alpha. 2796 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2797 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 2798 * @param beta The scalar beta. 2799 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 2800 */ ZSYR2K(@plo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)2801 public void ZSYR2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) { 2802 validateUplo(Uplo); 2803 validateSYR2K(Element.F64_2(mRS), Trans, A, B, C); 2804 int K = -1; 2805 if (Trans != NO_TRANSPOSE) { 2806 K = A.getType().getY(); 2807 } else { 2808 K = A.getType().getX(); 2809 } 2810 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); 2811 } 2812 validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B)2813 static void validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) { 2814 validateSide(Side); 2815 validateTranspose(TransA); 2816 int aM = -1, aN = -1, bM = -1, bN = -1; 2817 if (!A.getType().getElement().isCompatible(e) || 2818 !B.getType().getElement().isCompatible(e)) { 2819 throw new RSRuntimeException("Called BLAS with wrong Element type"); 2820 } 2821 2822 aM = A.getType().getY(); 2823 aN = A.getType().getX(); 2824 if (aM != aN) { 2825 throw new RSRuntimeException("Called TRMM with a non-symmetric matrix A"); 2826 } 2827 2828 bM = B.getType().getY(); 2829 bN = B.getType().getX(); 2830 if (Side == LEFT) { 2831 if (aN != bM) { 2832 throw new RSRuntimeException("Called TRMM with invalid matrices"); 2833 } 2834 } else { 2835 if (bN != aM) { 2836 throw new RSRuntimeException("Called TRMM with invalid matrices"); 2837 } 2838 } 2839 } 2840 2841 /** 2842 * STRMM performs one of the matrix-matrix operations 2843 * B := alpha*op(A)*B or B := alpha*B*op(A) 2844 * op(A) is one of op(A) = A or op(A) = A**T 2845 * 2846 * Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html 2847 * 2848 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2849 * @param Uplo Specifies whether matrix A is upper or lower triangular. 2850 * @param TransA The type of transpose applied to matrix A. 2851 * @param Diag Specifies whether or not A is unit triangular. 2852 * @param alpha The scalar alpha. 2853 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 2854 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 2855 */ STRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B)2856 public void STRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) { 2857 validateUplo(Uplo); 2858 validateDiag(Diag); 2859 validateTRMM(Element.F32(mRS), Side, TransA, A, B); 2860 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 2861 alpha, A.getID(mRS), B.getID(mRS), 0.f, 0, 0, 0, 0, 0); 2862 } 2863 2864 /** 2865 * DTRMM performs one of the matrix-matrix operations 2866 * B := alpha*op(A)*B or B := alpha*B*op(A) 2867 * op(A) is one of op(A) = A or op(A) = A**T 2868 * 2869 * Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html 2870 * 2871 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2872 * @param Uplo Specifies whether matrix A is upper or lower triangular. 2873 * @param TransA The type of transpose applied to matrix A. 2874 * @param Diag Specifies whether or not A is unit triangular. 2875 * @param alpha The scalar alpha. 2876 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2877 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 2878 */ DTRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B)2879 public void DTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) { 2880 validateUplo(Uplo); 2881 validateDiag(Diag); 2882 validateTRMM(Element.F64(mRS), Side, TransA, A, B); 2883 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 2884 alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0); 2885 } 2886 2887 /** 2888 * CTRMM performs one of the matrix-matrix operations 2889 * B := alpha*op(A)*B or B := alpha*B*op(A) 2890 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 2891 * 2892 * Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html 2893 * 2894 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2895 * @param Uplo Specifies whether matrix A is upper or lower triangular. 2896 * @param TransA The type of transpose applied to matrix A. 2897 * @param Diag Specifies whether or not A is unit triangular. 2898 * @param alpha The scalar alpha. 2899 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2900 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 2901 */ CTRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B)2902 public void CTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) { 2903 validateUplo(Uplo); 2904 validateDiag(Diag); 2905 validateTRMM(Element.F32_2(mRS), Side, TransA, A, B); 2906 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 2907 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0); 2908 } 2909 2910 /** 2911 * ZTRMM performs one of the matrix-matrix operations 2912 * B := alpha*op(A)*B or B := alpha*B*op(A) 2913 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 2914 * 2915 * Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html 2916 * 2917 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2918 * @param Uplo Specifies whether matrix A is upper or lower triangular. 2919 * @param TransA The type of transpose applied to matrix A. 2920 * @param Diag Specifies whether or not A is unit triangular. 2921 * @param alpha The scalar alpha. 2922 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2923 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 2924 */ ZTRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B)2925 public void ZTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) { 2926 validateUplo(Uplo); 2927 validateDiag(Diag); 2928 validateTRMM(Element.F64_2(mRS), Side, TransA, A, B); 2929 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 2930 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0); 2931 } 2932 validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B)2933 static void validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) { 2934 int adim = -1, bM = -1, bN = -1; 2935 validateSide(Side); 2936 validateTranspose(TransA); 2937 if (!A.getType().getElement().isCompatible(e) || 2938 !B.getType().getElement().isCompatible(e)) { 2939 throw new RSRuntimeException("Called BLAS with wrong Element type"); 2940 } 2941 adim = A.getType().getX(); 2942 if (adim != A.getType().getY()) { 2943 // this may be unnecessary, the restriction could potentially be relaxed 2944 // A needs to contain at least that symmetric matrix but could theoretically be larger 2945 // for now we assume adapters are sufficient, will reevaluate in the future 2946 throw new RSRuntimeException("Called TRSM with a non-symmetric matrix A"); 2947 } 2948 bM = B.getType().getY(); 2949 bN = B.getType().getX(); 2950 if (Side == LEFT) { 2951 // A is M*M 2952 if (adim != bM) { 2953 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions"); 2954 } 2955 } else { 2956 // A is N*N 2957 if (adim != bN) { 2958 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions"); 2959 } 2960 } 2961 } 2962 2963 /** 2964 * STRSM solves one of the matrix equations 2965 * op(A)*X := alpha*B or X*op(A) := alpha*B 2966 * op(A) is one of op(A) = A or op(A) = A**T 2967 * 2968 * Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html 2969 * 2970 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2971 * @param Uplo Specifies whether matrix A is upper or lower triangular. 2972 * @param TransA The type of transpose applied to matrix A. 2973 * @param Diag Specifies whether or not A is unit triangular. 2974 * @param alpha The scalar alpha. 2975 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 2976 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 2977 */ STRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B)2978 public void STRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) { 2979 validateUplo(Uplo); 2980 validateDiag(Diag); 2981 validateTRSM(Element.F32(mRS), Side, TransA, A, B); 2982 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 2983 alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0); 2984 } 2985 2986 /** 2987 * DTRSM solves one of the matrix equations 2988 * op(A)*X := alpha*B or X*op(A) := alpha*B 2989 * op(A) is one of op(A) = A or op(A) = A**T 2990 * 2991 * Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html 2992 * 2993 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2994 * @param Uplo Specifies whether matrix A is upper or lower triangular. 2995 * @param TransA The type of transpose applied to matrix A. 2996 * @param Diag Specifies whether or not A is unit triangular. 2997 * @param alpha The scalar alpha. 2998 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2999 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 3000 */ DTRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B)3001 public void DTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) { 3002 validateUplo(Uplo); 3003 validateDiag(Diag); 3004 validateTRSM(Element.F64(mRS), Side, TransA, A, B); 3005 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3006 alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0); 3007 } 3008 3009 /** 3010 * CTRSM solves one of the matrix equations 3011 * op(A)*X := alpha*B or X*op(A) := alpha*B 3012 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 3013 * 3014 * Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html 3015 * 3016 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3017 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3018 * @param TransA The type of transpose applied to matrix A. 3019 * @param Diag Specifies whether or not A is unit triangular. 3020 * @param alpha The scalar alpha. 3021 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3022 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3023 */ CTRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B)3024 public void CTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) { 3025 validateUplo(Uplo); 3026 validateDiag(Diag); 3027 validateTRSM(Element.F32_2(mRS), Side, TransA, A, B); 3028 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3029 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0); 3030 } 3031 3032 /** 3033 * ZTRSM solves one of the matrix equations 3034 * op(A)*X := alpha*B or X*op(A) := alpha*B 3035 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 3036 * 3037 * Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html 3038 * 3039 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3040 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3041 * @param TransA The type of transpose applied to matrix A. 3042 * @param Diag Specifies whether or not A is unit triangular. 3043 * @param alpha The scalar alpha. 3044 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3045 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3046 */ ZTRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B)3047 public void ZTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) { 3048 validateUplo(Uplo); 3049 validateDiag(Diag); 3050 validateTRSM(Element.F64_2(mRS), Side, TransA, A, B); 3051 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3052 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0); 3053 } 3054 validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C)3055 static void validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C) { 3056 validateSide(Side); 3057 3058 if (!A.getType().getElement().isCompatible(e) || 3059 !B.getType().getElement().isCompatible(e) || 3060 !C.getType().getElement().isCompatible(e)) { 3061 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3062 } 3063 3064 // A must be square; can potentially be relaxed similar to TRSM 3065 int adim = A.getType().getX(); 3066 if (adim != A.getType().getY()) { 3067 throw new RSRuntimeException("Called HEMM with non-square A"); 3068 } 3069 if ((Side == LEFT && adim != B.getType().getY()) || 3070 (Side == RIGHT && adim != B.getType().getX())) { 3071 throw new RSRuntimeException("Called HEMM with invalid B"); 3072 } 3073 if (B.getType().getX() != C.getType().getX() || 3074 B.getType().getY() != C.getType().getY()) { 3075 throw new RSRuntimeException("Called HEMM with mismatched B and C"); 3076 } 3077 } 3078 3079 /** 3080 * CHEMM performs one of the matrix-matrix operations 3081 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3082 * 3083 * Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html 3084 * 3085 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3086 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3087 * @param alpha The scalar alpha. 3088 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3089 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3090 * @param beta The scalar beta. 3091 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3092 */ CHEMM(@ide int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)3093 public void CHEMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) { 3094 validateUplo(Uplo); 3095 validateHEMM(Element.F32_2(mRS), Side, A, B, C); 3096 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, 3097 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); 3098 } 3099 3100 /** 3101 * ZHEMM performs one of the matrix-matrix operations 3102 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3103 * 3104 * Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html 3105 * 3106 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3107 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3108 * @param alpha The scalar alpha. 3109 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3110 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3111 * @param beta The scalar beta. 3112 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3113 */ ZHEMM(@ide int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)3114 public void ZHEMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) { 3115 validateUplo(Uplo); 3116 validateHEMM(Element.F64_2(mRS), Side, A, B, C); 3117 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, 3118 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); 3119 } 3120 validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C)3121 static void validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C) { 3122 if (!A.getType().getElement().isCompatible(e) || 3123 !C.getType().getElement().isCompatible(e)) { 3124 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3125 } 3126 validateConjTranspose(Trans); 3127 int cdim = C.getType().getX(); 3128 if (cdim != C.getType().getY()) { 3129 throw new RSRuntimeException("Called HERK with non-square C"); 3130 } 3131 if (Trans == NO_TRANSPOSE) { 3132 if (cdim != A.getType().getY()) { 3133 throw new RSRuntimeException("Called HERK with invalid A"); 3134 } 3135 } else { 3136 if (cdim != A.getType().getX()) { 3137 throw new RSRuntimeException("Called HERK with invalid A"); 3138 } 3139 } 3140 } 3141 3142 /** 3143 * CHERK performs one of the hermitian rank k operations 3144 * C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C 3145 * 3146 * Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html 3147 * 3148 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3149 * @param Trans The type of transpose applied to the operation. 3150 * @param alpha The scalar alpha. 3151 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3152 * @param beta The scalar beta. 3153 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3154 */ CHERK(@plo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C)3155 public void CHERK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) { 3156 validateUplo(Uplo); 3157 validateHERK(Element.F32_2(mRS), Trans, A, C); 3158 int k = 0; 3159 if (Trans == CONJ_TRANSPOSE) { 3160 k = A.getType().getY(); 3161 } else { 3162 k = A.getType().getX(); 3163 } 3164 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, 3165 alpha, 0, A.getID(mRS), 0, beta, 0, C.getID(mRS), 0, 0, 0, 0); 3166 } 3167 3168 /** 3169 * ZHERK performs one of the hermitian rank k operations 3170 * C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C 3171 * 3172 * Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html 3173 * 3174 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3175 * @param Trans The type of transpose applied to the operation. 3176 * @param alpha The scalar alpha. 3177 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3178 * @param beta The scalar beta. 3179 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3180 */ ZHERK(@plo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C)3181 public void ZHERK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) { 3182 validateUplo(Uplo); 3183 validateHERK(Element.F64_2(mRS), Trans, A, C); 3184 int k = 0; 3185 if (Trans == CONJ_TRANSPOSE) { 3186 k = A.getType().getY(); 3187 } else { 3188 k = A.getType().getX(); 3189 } 3190 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, 3191 alpha, 0, A.getID(mRS), 0, beta, 0, C.getID(mRS), 0, 0, 0, 0); 3192 } 3193 validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C)3194 static void validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) { 3195 if (!A.getType().getElement().isCompatible(e) || 3196 !B.getType().getElement().isCompatible(e) || 3197 !C.getType().getElement().isCompatible(e)) { 3198 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3199 } 3200 validateConjTranspose(Trans); 3201 int cdim = C.getType().getX(); 3202 if (cdim != C.getType().getY()) { 3203 throw new RSRuntimeException("Called HER2K with non-square C"); 3204 } 3205 if (Trans == NO_TRANSPOSE) { 3206 if (A.getType().getY() != cdim) { 3207 throw new RSRuntimeException("Called HER2K with invalid matrices"); 3208 } 3209 } else { 3210 if (A.getType().getX() != cdim) { 3211 throw new RSRuntimeException("Called HER2K with invalid matrices"); 3212 } 3213 } 3214 if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) { 3215 throw new RSRuntimeException("Called HER2K with invalid A and B matrices"); 3216 } 3217 } 3218 3219 /** 3220 * CHER2K performs one of the hermitian rank 2k operations 3221 * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C 3222 * 3223 * Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html 3224 * 3225 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3226 * @param Trans The type of transpose applied to the operation. 3227 * @param alpha The scalar alpha. 3228 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3229 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3230 * @param beta The scalar beta. 3231 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3232 */ CHER2K(@plo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C)3233 public void CHER2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C) { 3234 validateUplo(Uplo); 3235 validateHER2K(Element.F32_2(mRS), Trans, A, B, C); 3236 int k = 0; 3237 if (Trans == NO_TRANSPOSE) { 3238 k = A.getType().getX(); 3239 } else { 3240 k = A.getType().getY(); 3241 } 3242 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y, 3243 A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0); 3244 } 3245 3246 /** 3247 * ZHER2K performs one of the hermitian rank 2k operations 3248 * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C 3249 * 3250 * Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html 3251 * 3252 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3253 * @param Trans The type of transpose applied to the operation. 3254 * @param alpha The scalar alpha. 3255 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3256 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3257 * @param beta The scalar beta. 3258 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3259 */ ZHER2K(@plo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C)3260 public void ZHER2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C) { 3261 validateUplo(Uplo); 3262 validateHER2K(Element.F64_2(mRS), Trans, A, B, C); 3263 int k = 0; 3264 if (Trans == NO_TRANSPOSE) { 3265 k = A.getType().getX(); 3266 } else { 3267 k = A.getType().getY(); 3268 } 3269 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y, 3270 A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0); 3271 } 3272 3273 3274 /** 3275 * 8-bit GEMM-like operation for neural networks: C = A * Transpose(B) 3276 * Calculations are done in 1.10.21 fixed-point format for the final output, 3277 * just before there's a shift down to drop the fractional parts. The output 3278 * values are gated to 0 to 255 to fit in a byte, but the 10-bit format 3279 * gives some headroom to avoid wrapping around on small overflows. 3280 * 3281 * @param A The input allocation contains matrix A, supported elements type {@link Element#U8}. 3282 * @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset. Value should be from 0 to 255. 3283 * @param B The input allocation contains matrix B, supported elements type {@link Element#U8}. 3284 * @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset. Value should be from 0 to 255. 3285 * @param C The input allocation contains matrix C, supported elements type {@link Element#U8}. 3286 * @param c_offset The offset for all values in matrix C. 3287 * @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult. 3288 **/ BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult)3289 public void BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult) { 3290 validateL3(Element.U8(mRS), NO_TRANSPOSE, TRANSPOSE, 0, A, B, C); 3291 3292 if (a_offset < 0 || a_offset > 255) { 3293 throw new RSRuntimeException("Invalid a_offset passed to BNNM"); 3294 } 3295 if (b_offset < 0 || b_offset > 255) { 3296 throw new RSRuntimeException("Invalid b_offset passed to BNNM"); 3297 } 3298 int M = -1, N = -1, K = -1; 3299 M = A.getType().getY(); 3300 N = B.getType().getY(); 3301 K = A.getType().getX(); 3302 3303 3304 mRS.nScriptIntrinsicBLAS_BNNM(getID(mRS), M, N, K, A.getID(mRS), a_offset, B.getID(mRS), b_offset, C.getID(mRS), c_offset, c_mult); 3305 3306 } 3307 3308 } 3309