• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package androidx.renderscript;
18 
19 import android.support.annotation.IntDef;
20 import java.lang.annotation.Retention;
21 import java.lang.annotation.RetentionPolicy;
22 
23 /**
24  *
25  * ScriptIntrinsicBLAS class provides high performance RenderScript APIs to BLAS.
26  *
27  * The BLAS (Basic Linear Algebra Subprograms) are routines that provide standard
28  * building blocks for performing basic vector and matrix operations.
29  *
30  * For detailed description of BLAS, please refer to http://www.netlib.org/blas/
31  *
32  * @deprecated Renderscript has been deprecated in API level 31. Please refer to the <a
33  * href="https://developer.android.com/guide/topics/renderscript/migration-guide">migration
34  * guide</a> for the proposed alternatives.
35  **/
36 @Deprecated
37 public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
38     private Allocation mLUT;
39     private static final int INTRINSIC_API_LEVEL = 23;
40 
ScriptIntrinsicBLAS(long id, RenderScript rs)41     private ScriptIntrinsicBLAS(long id, RenderScript rs) {
42         super(id, rs);
43     }
44 
45     private static final int RsBlas_sdsdot = 1;
46     private static final int RsBlas_dsdot = 2;
47     private static final int RsBlas_sdot = 3;
48     private static final int RsBlas_ddot = 4;
49     private static final int RsBlas_cdotu_sub = 5;
50     private static final int RsBlas_cdotc_sub = 6;
51     private static final int RsBlas_zdotu_sub = 7;
52     private static final int RsBlas_zdotc_sub = 8;
53     private static final int RsBlas_snrm2 = 9;
54     private static final int RsBlas_sasum = 10;
55     private static final int RsBlas_dnrm2 = 11;
56     private static final int RsBlas_dasum = 12;
57     private static final int RsBlas_scnrm2 = 13;
58     private static final int RsBlas_scasum = 14;
59     private static final int RsBlas_dznrm2 = 15;
60     private static final int RsBlas_dzasum = 16;
61     private static final int RsBlas_isamax = 17;
62     private static final int RsBlas_idamax = 18;
63     private static final int RsBlas_icamax = 19;
64     private static final int RsBlas_izamax = 20;
65     private static final int RsBlas_sswap = 21;
66     private static final int RsBlas_scopy = 22;
67     private static final int RsBlas_saxpy = 23;
68     private static final int RsBlas_dswap = 24;
69     private static final int RsBlas_dcopy = 25;
70     private static final int RsBlas_daxpy = 26;
71     private static final int RsBlas_cswap = 27;
72     private static final int RsBlas_ccopy = 28;
73     private static final int RsBlas_caxpy = 29;
74     private static final int RsBlas_zswap = 30;
75     private static final int RsBlas_zcopy = 31;
76     private static final int RsBlas_zaxpy = 32;
77     private static final int RsBlas_srotg = 33;
78     private static final int RsBlas_srotmg = 34;
79     private static final int RsBlas_srot = 35;
80     private static final int RsBlas_srotm = 36;
81     private static final int RsBlas_drotg = 37;
82     private static final int RsBlas_drotmg = 38;
83     private static final int RsBlas_drot = 39;
84     private static final int RsBlas_drotm = 40;
85     private static final int RsBlas_sscal = 41;
86     private static final int RsBlas_dscal = 42;
87     private static final int RsBlas_cscal = 43;
88     private static final int RsBlas_zscal = 44;
89     private static final int RsBlas_csscal = 45;
90     private static final int RsBlas_zdscal = 46;
91     private static final int RsBlas_sgemv = 47;
92     private static final int RsBlas_sgbmv = 48;
93     private static final int RsBlas_strmv = 49;
94     private static final int RsBlas_stbmv = 50;
95     private static final int RsBlas_stpmv = 51;
96     private static final int RsBlas_strsv = 52;
97     private static final int RsBlas_stbsv = 53;
98     private static final int RsBlas_stpsv = 54;
99     private static final int RsBlas_dgemv = 55;
100     private static final int RsBlas_dgbmv = 56;
101     private static final int RsBlas_dtrmv = 57;
102     private static final int RsBlas_dtbmv = 58;
103     private static final int RsBlas_dtpmv = 59;
104     private static final int RsBlas_dtrsv = 60;
105     private static final int RsBlas_dtbsv = 61;
106     private static final int RsBlas_dtpsv = 62;
107     private static final int RsBlas_cgemv = 63;
108     private static final int RsBlas_cgbmv = 64;
109     private static final int RsBlas_ctrmv = 65;
110     private static final int RsBlas_ctbmv = 66;
111     private static final int RsBlas_ctpmv = 67;
112     private static final int RsBlas_ctrsv = 68;
113     private static final int RsBlas_ctbsv = 69;
114     private static final int RsBlas_ctpsv = 70;
115     private static final int RsBlas_zgemv = 71;
116     private static final int RsBlas_zgbmv = 72;
117     private static final int RsBlas_ztrmv = 73;
118     private static final int RsBlas_ztbmv = 74;
119     private static final int RsBlas_ztpmv = 75;
120     private static final int RsBlas_ztrsv = 76;
121     private static final int RsBlas_ztbsv = 77;
122     private static final int RsBlas_ztpsv = 78;
123     private static final int RsBlas_ssymv = 79;
124     private static final int RsBlas_ssbmv = 80;
125     private static final int RsBlas_sspmv = 81;
126     private static final int RsBlas_sger = 82;
127     private static final int RsBlas_ssyr = 83;
128     private static final int RsBlas_sspr = 84;
129     private static final int RsBlas_ssyr2 = 85;
130     private static final int RsBlas_sspr2 = 86;
131     private static final int RsBlas_dsymv = 87;
132     private static final int RsBlas_dsbmv = 88;
133     private static final int RsBlas_dspmv = 89;
134     private static final int RsBlas_dger = 90;
135     private static final int RsBlas_dsyr = 91;
136     private static final int RsBlas_dspr = 92;
137     private static final int RsBlas_dsyr2 = 93;
138     private static final int RsBlas_dspr2 = 94;
139     private static final int RsBlas_chemv = 95;
140     private static final int RsBlas_chbmv = 96;
141     private static final int RsBlas_chpmv = 97;
142     private static final int RsBlas_cgeru = 98;
143     private static final int RsBlas_cgerc = 99;
144     private static final int RsBlas_cher = 100;
145     private static final int RsBlas_chpr = 101;
146     private static final int RsBlas_cher2 = 102;
147     private static final int RsBlas_chpr2 = 103;
148     private static final int RsBlas_zhemv = 104;
149     private static final int RsBlas_zhbmv = 105;
150     private static final int RsBlas_zhpmv = 106;
151     private static final int RsBlas_zgeru = 107;
152     private static final int RsBlas_zgerc = 108;
153     private static final int RsBlas_zher = 109;
154     private static final int RsBlas_zhpr = 110;
155     private static final int RsBlas_zher2 = 111;
156     private static final int RsBlas_zhpr2 = 112;
157     private static final int RsBlas_sgemm = 113;
158     private static final int RsBlas_ssymm = 114;
159     private static final int RsBlas_ssyrk = 115;
160     private static final int RsBlas_ssyr2k = 116;
161     private static final int RsBlas_strmm = 117;
162     private static final int RsBlas_strsm = 118;
163     private static final int RsBlas_dgemm = 119;
164     private static final int RsBlas_dsymm = 120;
165     private static final int RsBlas_dsyrk = 121;
166     private static final int RsBlas_dsyr2k = 122;
167     private static final int RsBlas_dtrmm = 123;
168     private static final int RsBlas_dtrsm = 124;
169     private static final int RsBlas_cgemm = 125;
170     private static final int RsBlas_csymm = 126;
171     private static final int RsBlas_csyrk = 127;
172     private static final int RsBlas_csyr2k = 128;
173     private static final int RsBlas_ctrmm = 129;
174     private static final int RsBlas_ctrsm = 130;
175     private static final int RsBlas_zgemm = 131;
176     private static final int RsBlas_zsymm = 132;
177     private static final int RsBlas_zsyrk = 133;
178     private static final int RsBlas_zsyr2k = 134;
179     private static final int RsBlas_ztrmm = 135;
180     private static final int RsBlas_ztrsm = 136;
181     private static final int RsBlas_chemm = 137;
182     private static final int RsBlas_cherk = 138;
183     private static final int RsBlas_cher2k = 139;
184     private static final int RsBlas_zhemm = 140;
185     private static final int RsBlas_zherk = 141;
186     private static final int RsBlas_zher2k = 142;
187 
188     // BLAS extensions start here
189     private static final int RsBlas_bnnm = 1000;
190 
191     /**
192      * Create an intrinsic to access BLAS subroutines.
193      *
194      * @param rs The RenderScript context
195      * @return ScriptIntrinsicBLAS
196      */
create(RenderScript rs)197     public static ScriptIntrinsicBLAS create(RenderScript rs) {
198         long id;
199         boolean mUseIncSupp = rs.isUseNative() &&
200                               android.os.Build.VERSION.SDK_INT < INTRINSIC_API_LEVEL;
201 
202         id = rs.nScriptIntrinsicCreate(13, Element.U32(rs).getID(rs), mUseIncSupp);
203         ScriptIntrinsicBLAS si = new ScriptIntrinsicBLAS(id, rs);
204         si.setIncSupp(mUseIncSupp);
205         return si;
206     }
207 
208     /**
209      * @hide
210      */
211     @IntDef({NO_TRANSPOSE, TRANSPOSE, CONJ_TRANSPOSE})
212     @Retention(RetentionPolicy.SOURCE)
213     public @interface Transpose {}
214 
215     /**
216      * @hide
217      */
218     @IntDef({UPPER, LOWER})
219     @Retention(RetentionPolicy.SOURCE)
220     public @interface Uplo {}
221 
222     /**
223      * @hide
224      */
225     @IntDef({NON_UNIT, UNIT})
226     @Retention(RetentionPolicy.SOURCE)
227     public @interface Diag {}
228 
229     /**
230      * @hide
231      */
232     @IntDef({LEFT, RIGHT})
233     @Retention(RetentionPolicy.SOURCE)
234     public @interface Side {}
235 
236     public static final int NO_TRANSPOSE = 111;
237     public static final int TRANSPOSE = 112;
238     public static final int CONJ_TRANSPOSE = 113;
239 
240     public static final int UPPER = 121;
241     public static final int LOWER = 122;
242 
243     public static final int NON_UNIT = 131;
244     public static final int UNIT = 132;
245 
246     public static final int LEFT = 141;
247     public static final int RIGHT = 142;
248 
249     static void validateSide(@Side int Side) {
250         if (Side != LEFT && Side != RIGHT) {
251             throw new RSRuntimeException("Invalid side passed to BLAS");
252         }
253     }
254 
255     static void validateTranspose(@Transpose int Trans) {
256         if (Trans != NO_TRANSPOSE && Trans != TRANSPOSE &&
257             Trans != CONJ_TRANSPOSE) {
258             throw new RSRuntimeException("Invalid transpose passed to BLAS");
259         }
260     }
261 
262     static void validateConjTranspose(@Transpose int Trans) {
263         if (Trans != NO_TRANSPOSE &&
264             Trans != CONJ_TRANSPOSE) {
265             throw new RSRuntimeException("Invalid transpose passed to BLAS");
266         }
267     }
268 
269     static void validateDiag(@Diag int Diag) {
270         if (Diag != NON_UNIT && Diag != UNIT) {
271             throw new RSRuntimeException("Invalid diag passed to BLAS");
272         }
273     }
274 
275     static void validateUplo(@Uplo int Uplo) {
276         if (Uplo != UPPER && Uplo != LOWER) {
277             throw new RSRuntimeException("Invalid uplo passed to BLAS");
278         }
279     }
280 
281 
282     /**
283      * Level 2 BLAS
284      */
285 
286     static void validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY) {
287         validateTranspose(TransA);
288         int M = A.getType().getY();
289         int N = A.getType().getX();
290         if (!A.getType().getElement().isCompatible(e) ||
291             !X.getType().getElement().isCompatible(e) ||
292             !Y.getType().getElement().isCompatible(e)) {
293             throw new RSRuntimeException("Called BLAS with wrong Element type");
294         }
295         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
296             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
297         }
298 
299         if (incX <= 0 || incY <= 0) {
300             throw new RSRuntimeException("Vector increments must be greater than 0");
301         }
302         int expectedXDim = -1, expectedYDim = -1;
303         if (TransA == NO_TRANSPOSE) {
304             expectedXDim = 1 + (N - 1) * incX;
305             expectedYDim = 1 + (M - 1) * incY;
306         } else {
307             expectedXDim = 1 + (M - 1) * incX;
308             expectedYDim = 1 + (N - 1) * incY;
309         }
310         if (X.getType().getX() != expectedXDim ||
311             Y.getType().getX() != expectedYDim) {
312             throw new RSRuntimeException("Incorrect vector dimensions for GEMV");
313         }
314     }
315 
316     /**
317      * SGEMV performs one of the matrix-vector operations
318      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
319      *
320      * Details: http://www.netlib.org/lapack/explore-html/db/d58/sgemv_8f.html
321      *
322      * @param TransA The type of transpose applied to matrix A.
323      * @param alpha The scalar alpha.
324      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
325      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
326      * @param incX The increment for the elements of vector x, must be larger than zero.
327      * @param beta The scalar beta.
328      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
329      * @param incY The increment for the elements of vector y, must be larger than zero.
330      */
SGEMV(@ranspose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)331     public void SGEMV(@Transpose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
332         validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY);
333         int M = A.getType().getY();
334         int N = A.getType().getX();
335 
336         boolean mUseIncSupp = isIncSupp();
337         long aID = A.getID(mRS);
338         long xID = X.getID(mRS);
339         long yID = Y.getID(mRS);
340         if (mUseIncSupp) {
341             aID = getDummyAlloc(A);
342             xID = getDummyAlloc(X);
343             yID = getDummyAlloc(Y);
344         }
345         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
346     }
347 
348     /**
349      * DGEMV performs one of the matrix-vector operations
350      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
351      *
352      * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html
353      *
354      * @param TransA The type of transpose applied to matrix A.
355      * @param alpha The scalar alpha.
356      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
357      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
358      * @param incX The increment for the elements of vector x, must be larger than zero.
359      * @param beta The scalar beta.
360      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
361      * @param incY The increment for the elements of vector y, must be larger than zero.
362      */
DGEMV(@ranspose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)363     public void DGEMV(@Transpose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
364         validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY);
365         int M = A.getType().getY();
366         int N = A.getType().getX();
367 
368         boolean mUseIncSupp = isIncSupp();
369         long aID = A.getID(mRS);
370         long xID = X.getID(mRS);
371         long yID = Y.getID(mRS);
372         if (mUseIncSupp) {
373             aID = getDummyAlloc(A);
374             xID = getDummyAlloc(X);
375             yID = getDummyAlloc(Y);
376         }
377         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
378     }
379 
380     /**
381      * CGEMV performs one of the matrix-vector operations
382      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
383      *
384      * Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html
385      *
386      * @param TransA The type of transpose applied to matrix A.
387      * @param alpha The scalar alpha.
388      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
389      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
390      * @param incX The increment for the elements of vector x, must be larger than zero.
391      * @param beta The scalar beta.
392      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
393      * @param incY The increment for the elements of vector y, must be larger than zero.
394      */
CGEMV(@ranspose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)395     public void CGEMV(@Transpose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
396         validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY);
397         int M = A.getType().getY();
398         int N = A.getType().getX();
399 
400         boolean mUseIncSupp = isIncSupp();
401         long aID = A.getID(mRS);
402         long xID = X.getID(mRS);
403         long yID = Y.getID(mRS);
404         if (mUseIncSupp) {
405             aID = getDummyAlloc(A);
406             xID = getDummyAlloc(X);
407             yID = getDummyAlloc(Y);
408         }
409         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
410     }
411 
412     /**
413      * ZGEMV performs one of the matrix-vector operations
414      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
415      *
416      * Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html
417      *
418      * @param TransA The type of transpose applied to matrix A.
419      * @param alpha The scalar alpha.
420      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
421      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
422      * @param incX The increment for the elements of vector x, must be larger than zero.
423      * @param beta The scalar beta.
424      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
425      * @param incY The increment for the elements of vector y, must be larger than zero.
426      */
ZGEMV(@ranspose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)427     public void ZGEMV(@Transpose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
428         validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY);
429         int M = A.getType().getY();
430         int N = A.getType().getX();
431 
432         boolean mUseIncSupp = isIncSupp();
433         long aID = A.getID(mRS);
434         long xID = X.getID(mRS);
435         long yID = Y.getID(mRS);
436         if (mUseIncSupp) {
437             aID = getDummyAlloc(A);
438             xID = getDummyAlloc(X);
439             yID = getDummyAlloc(Y);
440         }
441         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
442     }
443 
444     /**
445      * SGBMV performs one of the matrix-vector operations
446      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
447      *
448      * Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html
449      *
450      * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
451      *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
452      *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
453      *           for i in range(0, m):
454      *              for j in range(max(0, i-kl), min(i+ku+1, n)):
455      *                  b[i, j-i+kl] = a[i, j]
456      *
457      * @param TransA The type of transpose applied to matrix A.
458      * @param KL The number of sub-diagonals of the matrix A.
459      * @param KU The number of super-diagonals of the matrix A.
460      * @param alpha The scalar alpha.
461      * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32}.
462      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
463      * @param incX The increment for the elements of vector x, must be larger than zero.
464      * @param beta The scalar beta.
465      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
466      * @param incY The increment for the elements of vector y, must be larger than zero.
467      */
SGBMV(@ranspose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)468     public void SGBMV(@Transpose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
469         // GBMV has the same validation requirements as GEMV + KL and KU >= 0
470         validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY);
471         if (KL < 0 || KU < 0) {
472             throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
473         }
474         int M = A.getType().getY();
475         int N = A.getType().getX();
476 
477         boolean mUseIncSupp = isIncSupp();
478         long aID = A.getID(mRS);
479         long xID = X.getID(mRS);
480         long yID = Y.getID(mRS);
481         if (mUseIncSupp) {
482             aID = getDummyAlloc(A);
483             xID = getDummyAlloc(X);
484             yID = getDummyAlloc(Y);
485         }
486         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, KL, KU, mUseIncSupp);
487     }
488 
489     /**
490      * DGBMV performs one of the matrix-vector operations
491      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
492      *
493      * Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html
494      *
495      * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
496      *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
497      *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
498      *           for i in range(0, m):
499      *              for j in range(max(0, i-kl), min(i+ku+1, n)):
500      *                  b[i, j-i+kl] = a[i, j]
501      *
502      * @param TransA The type of transpose applied to matrix A.
503      * @param KL The number of sub-diagonals of the matrix A.
504      * @param KU The number of super-diagonals of the matrix A.
505      * @param alpha The scalar alpha.
506      * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64}.
507      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
508      * @param incX The increment for the elements of vector x, must be larger than zero.
509      * @param beta The scalar beta.
510      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
511      * @param incY The increment for the elements of vector y, must be larger than zero.
512      */
DGBMV(@ranspose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)513     public void DGBMV(@Transpose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
514         // GBMV has the same validation requirements as GEMV + KL and KU >= 0
515         validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY);
516         if (KL < 0 || KU < 0) {
517             throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
518         }
519         int M = A.getType().getY();
520         int N = A.getType().getX();
521 
522         boolean mUseIncSupp = isIncSupp();
523         long aID = A.getID(mRS);
524         long xID = X.getID(mRS);
525         long yID = Y.getID(mRS);
526         if (mUseIncSupp) {
527             aID = getDummyAlloc(A);
528             xID = getDummyAlloc(X);
529             yID = getDummyAlloc(Y);
530         }
531         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, KL, KU, mUseIncSupp);
532     }
533 
534     /**
535      * CGBMV performs one of the matrix-vector operations
536      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
537      *
538      * Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html
539      *
540      * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
541      *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
542      *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
543      *           for i in range(0, m):
544      *              for j in range(max(0, i-kl), min(i+ku+1, n)):
545      *                  b[i, j-i+kl] = a[i, j]
546      *
547      * @param TransA The type of transpose applied to matrix A.
548      * @param KL The number of sub-diagonals of the matrix A.
549      * @param KU The number of super-diagonals of the matrix A.
550      * @param alpha The scalar alpha.
551      * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32_2}.
552      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
553      * @param incX The increment for the elements of vector x, must be larger than zero.
554      * @param beta The scalar beta.
555      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
556      * @param incY The increment for the elements of vector y, must be larger than zero.
557      */
CGBMV(@ranspose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)558     public void CGBMV(@Transpose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
559         // GBMV has the same validation requirements as GEMV + KL and KU >= 0
560         validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY);
561         if (KL < 0 || KU < 0) {
562             throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
563         }
564         int M = A.getType().getY();
565         int N = A.getType().getX();
566 
567         boolean mUseIncSupp = isIncSupp();
568         long aID = A.getID(mRS);
569         long xID = X.getID(mRS);
570         long yID = Y.getID(mRS);
571         if (mUseIncSupp) {
572             aID = getDummyAlloc(A);
573             xID = getDummyAlloc(X);
574             yID = getDummyAlloc(Y);
575         }
576         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, KL, KU, mUseIncSupp);
577     }
578 
579     /**
580      * ZGBMV performs one of the matrix-vector operations
581      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
582      *
583      * Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html
584      *
585      * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
586      *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
587      *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
588      *           for i in range(0, m):
589      *              for j in range(max(0, i-kl), min(i+ku+1, n)):
590      *                  b[i, j-i+kl] = a[i, j]
591      *
592      * @param TransA The type of transpose applied to matrix A.
593      * @param KL The number of sub-diagonals of the matrix A.
594      * @param KU The number of super-diagonals of the matrix A.
595      * @param alpha The scalar alpha.
596      * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64_2}.
597      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
598      * @param incX The increment for the elements of vector x, must be larger than zero.
599      * @param beta The scalar beta.
600      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
601      * @param incY The increment for the elements of vector y, must be larger than zero.
602      */
ZGBMV(@ranspose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)603     public void ZGBMV(@Transpose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
604         // GBMV has the same validation requirements as GEMV + KL and KU >= 0
605         validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY);
606         if (KL < 0 || KU < 0) {
607             throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
608         }
609         int M = A.getType().getY();
610         int N = A.getType().getX();
611 
612         boolean mUseIncSupp = isIncSupp();
613         long aID = A.getID(mRS);
614         long xID = X.getID(mRS);
615         long yID = Y.getID(mRS);
616         if (mUseIncSupp) {
617             aID = getDummyAlloc(A);
618             xID = getDummyAlloc(X);
619             yID = getDummyAlloc(Y);
620         }
621         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, KL, KU, mUseIncSupp);
622     }
623 
validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)624     static void validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
625         validateTranspose(TransA);
626         validateUplo(Uplo);
627         validateDiag(Diag);
628         int N = A.getType().getY();
629         if (A.getType().getX() != N) {
630             throw new RSRuntimeException("A must be a square matrix for TRMV");
631         }
632         if (!A.getType().getElement().isCompatible(e) ||
633             !X.getType().getElement().isCompatible(e)) {
634             throw new RSRuntimeException("Called BLAS with wrong Element type");
635         }
636         if (X.getType().getY() > 1) {
637             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
638         }
639 
640         if (incX <= 0) {
641             throw new RSRuntimeException("Vector increments must be greater than 0");
642         }
643         int expectedXDim = 1 + (N - 1) * incX;
644         if (X.getType().getX() != expectedXDim) {
645             throw new RSRuntimeException("Incorrect vector dimensions for TRMV");
646         }
647     }
648 
validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)649     static int validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {
650         validateTranspose(TransA);
651         validateUplo(Uplo);
652         validateDiag(Diag);
653         if (!Ap.getType().getElement().isCompatible(e) ||
654             !X.getType().getElement().isCompatible(e)) {
655             throw new RSRuntimeException("Called BLAS with wrong Element type");
656         }
657         if (X.getType().getY() > 1) {
658             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
659         }
660 
661         if (Ap.getType().getY() > 1) {
662             throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
663         }
664 
665         int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
666         //is it really doing anything?
667         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
668             throw new RSRuntimeException("Invalid dimension for Ap");
669         }
670         if (incX <= 0) {
671             throw new RSRuntimeException("Vector increments must be greater than 0");
672         }
673         int expectedXDim = 1 + (N - 1) * incX;
674         if (X.getType().getX() != expectedXDim) {
675             throw new RSRuntimeException("Incorrect vector dimensions for TPMV");
676         }
677 
678         return N;
679     }
680 
681     /**
682      * STRMV performs one of the matrix-vector operations
683      * x := A*x   or   x := A**T*x
684      *
685      * Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html
686      *
687      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
688      * @param TransA The type of transpose applied to matrix A.
689      * @param Diag Specifies whether or not A is unit triangular.
690      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
691      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
692      * @param incX The increment for the elements of vector x, must be larger than zero.
693      */
STRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)694     public void STRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
695         validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
696         int N = A.getType().getY();
697 
698         boolean mUseIncSupp = isIncSupp();
699         long aID = A.getID(mRS);
700         long xID = X.getID(mRS);
701         if (mUseIncSupp) {
702             aID = getDummyAlloc(A);
703             xID = getDummyAlloc(X);
704         }
705         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
706     }
707 
708     /**
709      * DTRMV performs one of the matrix-vector operations
710      * x := A*x   or   x := A**T*x
711      *
712      * Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html
713      *
714      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
715      * @param TransA The type of transpose applied to matrix A.
716      * @param Diag Specifies whether or not A is unit triangular.
717      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
718      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
719      * @param incX The increment for the elements of vector x, must be larger than zero.
720      */
DTRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)721     public void DTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
722         validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
723         int N = A.getType().getY();
724 
725         boolean mUseIncSupp = isIncSupp();
726         long aID = A.getID(mRS);
727         long xID = X.getID(mRS);
728         if (mUseIncSupp) {
729             aID = getDummyAlloc(A);
730             xID = getDummyAlloc(X);
731         }
732         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
733     }
734 
735     /**
736      * CTRMV performs one of the matrix-vector operations
737      * x := A*x   or   x := A**T*x   or   x := A**H*x
738      *
739      * Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html
740      *
741      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
742      * @param TransA The type of transpose applied to matrix A.
743      * @param Diag Specifies whether or not A is unit triangular.
744      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
745      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
746      * @param incX The increment for the elements of vector x, must be larger than zero.
747      */
CTRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)748     public void CTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
749         validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
750         int N = A.getType().getY();
751 
752         boolean mUseIncSupp = isIncSupp();
753         long aID = A.getID(mRS);
754         long xID = X.getID(mRS);
755         if (mUseIncSupp) {
756             aID = getDummyAlloc(A);
757             xID = getDummyAlloc(X);
758         }
759         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
760     }
761 
762     /**
763      * ZTRMV performs one of the matrix-vector operations
764      * x := A*x   or   x := A**T*x   or   x := A**H*x
765      *
766      * Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html
767      *
768      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
769      * @param TransA The type of transpose applied to matrix A.
770      * @param Diag Specifies whether or not A is unit triangular.
771      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
772      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
773      * @param incX The increment for the elements of vector x, must be larger than zero.
774      */
ZTRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)775     public void ZTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
776         validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
777         int N = A.getType().getY();
778 
779         boolean mUseIncSupp = isIncSupp();
780         long aID = A.getID(mRS);
781         long xID = X.getID(mRS);
782         if (mUseIncSupp) {
783             aID = getDummyAlloc(A);
784             xID = getDummyAlloc(X);
785         }
786         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
787     }
788 
789     /**
790      * STBMV performs one of the matrix-vector operations
791      * x := A*x   or   x := A**T*x
792      *
793      * Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html
794      *
795      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
796      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
797      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
798      *           for i in range(0, n):
799      *              for j in range(i, min(i+k+1, n)):
800      *                  b[i, j-i] = a[i, j]
801      *
802      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
803      * @param TransA The type of transpose applied to matrix A.
804      * @param Diag Specifies whether or not A is unit triangular.
805      * @param K The number of off-diagonals of the matrix A
806      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
807      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
808      * @param incX The increment for the elements of vector x, must be larger than zero.
809      */
STBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)810     public void STBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
811         // TBMV has the same requirements as TRMV + K >= 0
812         if (K < 0) {
813             throw new RSRuntimeException("K must be greater than or equal to 0");
814         }
815         validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
816         int N = A.getType().getY();
817 
818         boolean mUseIncSupp = isIncSupp();
819         long aID = A.getID(mRS);
820         long xID = X.getID(mRS);
821         if (mUseIncSupp) {
822             aID = getDummyAlloc(A);
823             xID = getDummyAlloc(X);
824         }
825         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
826     }
827 
828     /**
829      * DTBMV performs one of the matrix-vector operations
830      * x := A*x   or   x := A**T*x
831      *
832      * Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html
833      *
834      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
835      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
836      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
837      *           for i in range(0, n):
838      *              for j in range(i, min(i+k+1, n)):
839      *                  b[i, j-i] = a[i, j]
840      *
841      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
842      * @param TransA The type of transpose applied to matrix A.
843      * @param Diag Specifies whether or not A is unit triangular.
844      * @param K The number of off-diagonals of the matrix A
845      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
846      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
847      * @param incX The increment for the elements of vector x, must be larger than zero.
848      */
DTBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)849     public void DTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
850         // TBMV has the same requirements as TRMV + K >= 0
851         if (K < 0) {
852             throw new RSRuntimeException("K must be greater than or equal to 0");
853         }
854         validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
855         int N = A.getType().getY();
856 
857         boolean mUseIncSupp = isIncSupp();
858         long aID = A.getID(mRS);
859         long xID = X.getID(mRS);
860         if (mUseIncSupp) {
861             aID = getDummyAlloc(A);
862             xID = getDummyAlloc(X);
863         }
864         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
865     }
866 
867     /**
868      * CTBMV performs one of the matrix-vector operations
869      * x := A*x   or   x := A**T*x   or   x := A**H*x
870      *
871      * Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html
872      *
873      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
874      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
875      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
876      *           for i in range(0, n):
877      *              for j in range(i, min(i+k+1, n)):
878      *                  b[i, j-i] = a[i, j]
879      *
880      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
881      * @param TransA The type of transpose applied to matrix A.
882      * @param Diag Specifies whether or not A is unit triangular.
883      * @param K The number of off-diagonals of the matrix A
884      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
885      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
886      * @param incX The increment for the elements of vector x, must be larger than zero.
887      */
CTBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)888     public void CTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
889         // TBMV has the same requirements as TRMV + K >= 0
890         if (K < 0) {
891             throw new RSRuntimeException("K must be greater than or equal to 0");
892         }
893         validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
894         int N = A.getType().getY();
895 
896         boolean mUseIncSupp = isIncSupp();
897         long aID = A.getID(mRS);
898         long xID = X.getID(mRS);
899         if (mUseIncSupp) {
900             aID = getDummyAlloc(A);
901             xID = getDummyAlloc(X);
902         }
903         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
904     }
905 
906     /**
907      * ZTBMV performs one of the matrix-vector operations
908      * x := A*x   or   x := A**T*x   or   x := A**H*x
909      *
910      * Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html
911      *
912      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
913      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
914      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
915      *           for i in range(0, n):
916      *              for j in range(i, min(i+k+1, n)):
917      *                  b[i, j-i] = a[i, j]
918      *
919      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
920      * @param TransA The type of transpose applied to matrix A.
921      * @param Diag Specifies whether or not A is unit triangular.
922      * @param K The number of off-diagonals of the matrix A
923      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
924      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
925      * @param incX The increment for the elements of vector x, must be larger than zero.
926      */
ZTBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)927     public void ZTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
928         // TBMV has the same requirements as TRMV + K >= 0
929         if (K < 0) {
930             throw new RSRuntimeException("K must be greater than or equal to 0");
931         }
932         validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
933         int N = A.getType().getY();
934 
935         boolean mUseIncSupp = isIncSupp();
936         long aID = A.getID(mRS);
937         long xID = X.getID(mRS);
938         if (mUseIncSupp) {
939             aID = getDummyAlloc(A);
940             xID = getDummyAlloc(X);
941         }
942         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
943     }
944 
945     /**
946      * STPMV performs one of the matrix-vector operations
947      * x := A*x   or   x := A**T*x
948      *
949      * Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html
950      *
951      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
952      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
953      *       'a' to packed matrix 'b'.
954      *           k = 0
955      *           for i in range(0, n):
956      *              for j in range(i, n):
957      *                  b[k++] = a[i, j]
958      *
959      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
960      * @param TransA The type of transpose applied to matrix A.
961      * @param Diag Specifies whether or not A is unit triangular.
962      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}.
963      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
964      * @param incX The increment for the elements of vector x, must be larger than zero.
965      */
STPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)966     public void STPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
967         int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX);
968 
969         boolean mUseIncSupp = isIncSupp();
970         long apID = Ap.getID(mRS);
971         long xID = X.getID(mRS);
972         if (mUseIncSupp) {
973             apID = getDummyAlloc(Ap);
974             xID = getDummyAlloc(X);
975         }
976         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
977     }
978 
979     /**
980      * DTPMV performs one of the matrix-vector operations
981      * x := A*x   or   x := A**T*x
982      *
983      * Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html
984      *
985      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
986      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
987      *       'a' to packed matrix 'b'.
988      *           k = 0
989      *           for i in range(0, n):
990      *              for j in range(i, n):
991      *                  b[k++] = a[i, j]
992      *
993      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
994      * @param TransA The type of transpose applied to matrix A.
995      * @param Diag Specifies whether or not A is unit triangular.
996      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}.
997      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
998      * @param incX The increment for the elements of vector x, must be larger than zero.
999      */
DTPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1000     public void DTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1001         int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX);
1002 
1003         boolean mUseIncSupp = isIncSupp();
1004         long apID = Ap.getID(mRS);
1005         long xID = X.getID(mRS);
1006         if (mUseIncSupp) {
1007             apID = getDummyAlloc(Ap);
1008             xID = getDummyAlloc(X);
1009         }
1010         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1011     }
1012 
1013     /**
1014      * CTPMV performs one of the matrix-vector operations
1015      * x := A*x   or   x := A**T*x   or   x := A**H*x
1016      *
1017      * Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html
1018      *
1019      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1020      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1021      *       'a' to packed matrix 'b'.
1022      *           k = 0
1023      *           for i in range(0, n):
1024      *              for j in range(i, n):
1025      *                  b[k++] = a[i, j]
1026      *
1027      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1028      * @param TransA The type of transpose applied to matrix A.
1029      * @param Diag Specifies whether or not A is unit triangular.
1030      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}.
1031      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1032      * @param incX The increment for the elements of vector x, must be larger than zero.
1033      */
CTPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1034     public void CTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1035         int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
1036 
1037         boolean mUseIncSupp = isIncSupp();
1038         long apID = Ap.getID(mRS);
1039         long xID = X.getID(mRS);
1040         if (mUseIncSupp) {
1041             apID = getDummyAlloc(Ap);
1042             xID = getDummyAlloc(X);
1043         }
1044         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1045     }
1046 
1047     /**
1048      * ZTPMV performs one of the matrix-vector operations
1049      * x := A*x   or   x := A**T*x   or   x := A**H*x
1050      *
1051      * Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html
1052      *
1053      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1054      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1055      *       'a' to packed matrix 'b'.
1056      *           k = 0
1057      *           for i in range(0, n):
1058      *              for j in range(i, n):
1059      *                  b[k++] = a[i, j]
1060      *
1061      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1062      * @param TransA The type of transpose applied to matrix A.
1063      * @param Diag Specifies whether or not A is unit triangular.
1064      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}.
1065      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
1066      * @param incX The increment for the elements of vector x, must be larger than zero.
1067      */
ZTPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1068     public void ZTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1069         int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
1070 
1071         boolean mUseIncSupp = isIncSupp();
1072         long apID = Ap.getID(mRS);
1073         long xID = X.getID(mRS);
1074         if (mUseIncSupp) {
1075             apID = getDummyAlloc(Ap);
1076             xID = getDummyAlloc(X);
1077         }
1078         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1079     }
1080 
1081     /**
1082      * STRSV solves one of the systems of equations
1083      * A*x = b   or   A**T*x = b
1084      *
1085      * Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html
1086      *
1087      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1088      * @param TransA The type of transpose applied to matrix A.
1089      * @param Diag Specifies whether or not A is unit triangular.
1090      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1091      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1092      * @param incX The increment for the elements of vector x, must be larger than zero.
1093      */
STRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)1094     public void STRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
1095         // TRSV is the same as TRMV
1096         validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
1097         int N = A.getType().getY();
1098 
1099         boolean mUseIncSupp = isIncSupp();
1100         long aID = A.getID(mRS);
1101         long xID = X.getID(mRS);
1102         if (mUseIncSupp) {
1103             aID = getDummyAlloc(A);
1104             xID = getDummyAlloc(X);
1105         }
1106         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1107 
1108     }
1109 
1110     /**
1111      * DTRSV solves one of the systems of equations
1112      * A*x = b   or   A**T*x = b
1113      *
1114      * Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html
1115      *
1116      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1117      * @param TransA The type of transpose applied to matrix A.
1118      * @param Diag Specifies whether or not A is unit triangular.
1119      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1120      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1121      * @param incX The increment for the elements of vector x, must be larger than zero.
1122      */
DTRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)1123     public void DTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
1124         // TRSV is the same as TRMV
1125         validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
1126         int N = A.getType().getY();
1127 
1128         boolean mUseIncSupp = isIncSupp();
1129         long aID = A.getID(mRS);
1130         long xID = X.getID(mRS);
1131         if (mUseIncSupp) {
1132             aID = getDummyAlloc(A);
1133             xID = getDummyAlloc(X);
1134         }
1135         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1136 
1137     }
1138 
1139     /**
1140      * CTRSV solves one of the systems of equations
1141      * A*x = b   or   A**T*x = b   or   A**H*x = b
1142      *
1143      * Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html
1144      *
1145      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1146      * @param TransA The type of transpose applied to matrix A.
1147      * @param Diag Specifies whether or not A is unit triangular.
1148      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1149      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1150      * @param incX The increment for the elements of vector x, must be larger than zero.
1151      */
CTRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)1152     public void CTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
1153         // TRSV is the same as TRMV
1154         validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
1155         int N = A.getType().getY();
1156 
1157         boolean mUseIncSupp = isIncSupp();
1158         long aID = A.getID(mRS);
1159         long xID = X.getID(mRS);
1160         if (mUseIncSupp) {
1161             aID = getDummyAlloc(A);
1162             xID = getDummyAlloc(X);
1163         }
1164         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1165 
1166     }
1167 
1168     /**
1169      * ZTRSV solves one of the systems of equations
1170      * A*x = b   or   A**T*x = b   or   A**H*x = b
1171      *
1172      * Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html
1173      *
1174      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1175      * @param TransA The type of transpose applied to matrix A.
1176      * @param Diag Specifies whether or not A is unit triangular.
1177      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
1178      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
1179      * @param incX The increment for the elements of vector x, must be larger than zero.
1180      */
ZTRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)1181     public void ZTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
1182         // TRSV is the same as TRMV
1183         validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
1184         int N = A.getType().getY();
1185 
1186         boolean mUseIncSupp = isIncSupp();
1187         long aID = A.getID(mRS);
1188         long xID = X.getID(mRS);
1189         if (mUseIncSupp) {
1190             aID = getDummyAlloc(A);
1191             xID = getDummyAlloc(X);
1192         }
1193         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1194 
1195     }
1196 
1197     /**
1198      * STBSV solves one of the systems of equations
1199      * A*x = b   or   A**T*x = b
1200      *
1201      * Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html
1202      *
1203      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1204      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1205      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1206      *           for i in range(0, n):
1207      *              for j in range(i, min(i+k+1, n)):
1208      *                  b[i, j-i] = a[i, j]
1209      *
1210      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1211      * @param TransA The type of transpose applied to matrix A.
1212      * @param Diag Specifies whether or not A is unit triangular.
1213      * @param K The number of off-diagonals of the matrix A
1214      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1215      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1216      * @param incX The increment for the elements of vector x, must be larger than zero.
1217      */
STBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1218     public void STBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
1219         // TBSV is the same as TRMV + K >= 0
1220         validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
1221         int N = A.getType().getY();
1222         if (K < 0) {
1223             throw new RSRuntimeException("Number of diagonals must be positive");
1224         }
1225 
1226         boolean mUseIncSupp = isIncSupp();
1227         long aID = A.getID(mRS);
1228         long xID = X.getID(mRS);
1229         if (mUseIncSupp) {
1230             aID = getDummyAlloc(A);
1231             xID = getDummyAlloc(X);
1232         }
1233         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1234     }
1235 
1236     /**
1237      * DTBSV solves one of the systems of equations
1238      * A*x = b   or   A**T*x = b
1239      *
1240      * Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html
1241      *
1242      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1243      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1244      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1245      *           for i in range(0, n):
1246      *              for j in range(i, min(i+k+1, n)):
1247      *                  b[i, j-i] = a[i, j]
1248      *
1249      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1250      * @param TransA The type of transpose applied to matrix A.
1251      * @param Diag Specifies whether or not A is unit triangular.
1252      * @param K The number of off-diagonals of the matrix A
1253      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1254      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1255      * @param incX The increment for the elements of vector x, must be larger than zero.
1256      */
DTBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1257     public void DTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
1258         // TBSV is the same as TRMV + K >= 0
1259         validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
1260         int N = A.getType().getY();
1261         if (K < 0) {
1262             throw new RSRuntimeException("Number of diagonals must be positive");
1263         }
1264 
1265         boolean mUseIncSupp = isIncSupp();
1266         long aID = A.getID(mRS);
1267         long xID = X.getID(mRS);
1268         if (mUseIncSupp) {
1269             aID = getDummyAlloc(A);
1270             xID = getDummyAlloc(X);
1271         }
1272         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1273     }
1274 
1275     /**
1276      * CTBSV solves one of the systems of equations
1277      * A*x = b   or   A**T*x = b   or   A**H*x = b
1278      *
1279      * Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html
1280      *
1281      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1282      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1283      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1284      *           for i in range(0, n):
1285      *              for j in range(i, min(i+k+1, n)):
1286      *                  b[i, j-i] = a[i, j]
1287      *
1288      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1289      * @param TransA The type of transpose applied to matrix A.
1290      * @param Diag Specifies whether or not A is unit triangular.
1291      * @param K The number of off-diagonals of the matrix A
1292      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1293      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1294      * @param incX The increment for the elements of vector x, must be larger than zero.
1295      */
CTBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1296     public void CTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
1297         // TBSV is the same as TRMV + K >= 0
1298         validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
1299         int N = A.getType().getY();
1300         if (K < 0) {
1301             throw new RSRuntimeException("Number of diagonals must be positive");
1302         }
1303 
1304         boolean mUseIncSupp = isIncSupp();
1305         long aID = A.getID(mRS);
1306         long xID = X.getID(mRS);
1307         if (mUseIncSupp) {
1308             aID = getDummyAlloc(A);
1309             xID = getDummyAlloc(X);
1310         }
1311         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1312     }
1313 
1314     /**
1315      * ZTBSV solves one of the systems of equations
1316      * A*x = b   or   A**T*x = b   or   A**H*x = b
1317      *
1318      * Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html
1319      *
1320      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1321      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1322      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1323      *           for i in range(0, n):
1324      *              for j in range(i, min(i+k+1, n)):
1325      *                  b[i, j-i] = a[i, j]
1326      *
1327      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1328      * @param TransA The type of transpose applied to matrix A.
1329      * @param Diag Specifies whether or not A is unit triangular.
1330      * @param K The number of off-diagonals of the matrix A
1331      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
1332      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
1333      * @param incX The increment for the elements of vector x, must be larger than zero.
1334      */
ZTBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1335     public void ZTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
1336         // TBSV is the same as TRMV + K >= 0
1337         validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
1338         int N = A.getType().getY();
1339         if (K < 0) {
1340             throw new RSRuntimeException("Number of diagonals must be positive");
1341         }
1342 
1343         boolean mUseIncSupp = isIncSupp();
1344         long aID = A.getID(mRS);
1345         long xID = X.getID(mRS);
1346         if (mUseIncSupp) {
1347             aID = getDummyAlloc(A);
1348             xID = getDummyAlloc(X);
1349         }
1350         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1351     }
1352 
1353     /**
1354      * STPSV solves one of the systems of equations
1355      * A*x = b   or   A**T*x = b
1356      *
1357      * Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html
1358      *
1359      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1360      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1361      *       'a' to packed matrix 'b'.
1362      *           k = 0
1363      *           for i in range(0, n):
1364      *              for j in range(i, n):
1365      *                  b[k++] = a[i, j]
1366      *
1367      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1368      * @param TransA The type of transpose applied to matrix A.
1369      * @param Diag Specifies whether or not A is unit triangular.
1370      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}.
1371      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1372      * @param incX The increment for the elements of vector x, must be larger than zero.
1373      */
STPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1374     public void STPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1375         // TPSV is same as TPMV
1376         int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX);
1377 
1378         boolean mUseIncSupp = isIncSupp();
1379         long apID = Ap.getID(mRS);
1380         long xID = X.getID(mRS);
1381         if (mUseIncSupp) {
1382             apID = getDummyAlloc(Ap);
1383             xID = getDummyAlloc(X);
1384         }
1385         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1386     }
1387 
1388     /**
1389      * DTPSV solves one of the systems of equations
1390      * A*x = b   or   A**T*x = b
1391      *
1392      * Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html
1393      *
1394      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1395      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1396      *       'a' to packed matrix 'b'.
1397      *           k = 0
1398      *           for i in range(0, n):
1399      *              for j in range(i, n):
1400      *                  b[k++] = a[i, j]
1401      *
1402      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1403      * @param TransA The type of transpose applied to matrix A.
1404      * @param Diag Specifies whether or not A is unit triangular.
1405      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}.
1406      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1407      * @param incX The increment for the elements of vector x, must be larger than zero.
1408      */
DTPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1409     public void DTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1410         // TPSV is same as TPMV
1411         int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX);
1412 
1413         boolean mUseIncSupp = isIncSupp();
1414         long apID = Ap.getID(mRS);
1415         long xID = X.getID(mRS);
1416         if (mUseIncSupp) {
1417             apID = getDummyAlloc(Ap);
1418             xID = getDummyAlloc(X);
1419         }
1420         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1421     }
1422 
1423     /**
1424      * CTPSV solves one of the systems of equations
1425      * A*x = b   or   A**T*x = b   or   A**H*x = b
1426      *
1427      * Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html
1428      *
1429      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1430      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1431      *       'a' to packed matrix 'b'.
1432      *           k = 0
1433      *           for i in range(0, n):
1434      *              for j in range(i, n):
1435      *                  b[k++] = a[i, j]
1436      *
1437      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1438      * @param TransA The type of transpose applied to matrix A.
1439      * @param Diag Specifies whether or not A is unit triangular.
1440      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}.
1441      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1442      * @param incX The increment for the elements of vector x, must be larger than zero.
1443      */
CTPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1444     public void CTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1445         // TPSV is same as TPMV
1446         int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
1447 
1448         boolean mUseIncSupp = isIncSupp();
1449         long apID = Ap.getID(mRS);
1450         long xID = X.getID(mRS);
1451         if (mUseIncSupp) {
1452             apID = getDummyAlloc(Ap);
1453             xID = getDummyAlloc(X);
1454         }
1455         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1456     }
1457 
1458     /**
1459      * ZTPSV solves one of the systems of equations
1460      * A*x = b   or   A**T*x = b   or   A**H*x = b
1461      *
1462      * Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html
1463      *
1464      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1465      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1466      *       'a' to packed matrix 'b'.
1467      *           k = 0
1468      *           for i in range(0, n):
1469      *              for j in range(i, n):
1470      *                  b[k++] = a[i, j]
1471      *
1472      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1473      * @param TransA The type of transpose applied to matrix A.
1474      * @param Diag Specifies whether or not A is unit triangular.
1475      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}.
1476      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
1477      * @param incX The increment for the elements of vector x, must be larger than zero.
1478      */
ZTPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1479     public void ZTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1480         // TPSV is same as TPMV
1481         int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
1482 
1483         boolean mUseIncSupp = isIncSupp();
1484         long apID = Ap.getID(mRS);
1485         long xID = X.getID(mRS);
1486         if (mUseIncSupp) {
1487             apID = getDummyAlloc(Ap);
1488             xID = getDummyAlloc(X);
1489         }
1490         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp);
1491     }
1492 
1493     /**
1494      * Level 2, S and D only
1495      */
validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY)1496     static int validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY) {
1497         validateUplo(Uplo);
1498         int N = A.getType().getY();
1499         if (A.getType().getX() != N) {
1500             throw new RSRuntimeException("A must be a square matrix for SYMV");
1501         }
1502         if (!A.getType().getElement().isCompatible(e) ||
1503             !X.getType().getElement().isCompatible(e) ||
1504             !Y.getType().getElement().isCompatible(e) ) {
1505             throw new RSRuntimeException("Called BLAS with wrong Element type");
1506         }
1507         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1508             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1509         }
1510 
1511         if (incX <= 0 || incY <= 0) {
1512             throw new RSRuntimeException("Vector increments must be greater than 0");
1513         }
1514         int expectedXDim = 1 + (N - 1) * incX;
1515         if (X.getType().getX() != expectedXDim) {
1516             throw new RSRuntimeException("Incorrect vector dimensions for SYMV");
1517         }
1518         int expectedYDim = 1 + (N - 1) * incY;
1519         if (Y.getType().getX() != expectedYDim) {
1520             throw new RSRuntimeException("Incorrect vector dimensions for SYMV");
1521         }
1522         return N;
1523     }
validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY)1524     static int validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY) {
1525         validateUplo(Uplo);
1526         if (!Ap.getType().getElement().isCompatible(e) ||
1527             !X.getType().getElement().isCompatible(e) ||
1528             !Y.getType().getElement().isCompatible(e)) {
1529             throw new RSRuntimeException("Called BLAS with wrong Element type");
1530         }
1531         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1532             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1533         }
1534 
1535         if (Ap.getType().getY() > 1) {
1536             throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
1537         }
1538 
1539         int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
1540         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
1541             throw new RSRuntimeException("Invalid dimension for Ap");
1542         }
1543         if (incX <= 0 || incY <= 0) {
1544             throw new RSRuntimeException("Vector increments must be greater than 0");
1545         }
1546         int expectedXDim = 1 + (N - 1) * incX;
1547         if (X.getType().getX() != expectedXDim) {
1548             throw new RSRuntimeException("Incorrect vector dimensions for SPMV");
1549         }
1550         int expectedYDim = 1 + (N - 1) * incY;
1551         if (Y.getType().getX() != expectedYDim) {
1552             throw new RSRuntimeException("Incorrect vector dimensions for SPMV");
1553         }
1554 
1555         return N;
1556     }
validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A)1557     static void validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1558         if (!A.getType().getElement().isCompatible(e) ||
1559             !X.getType().getElement().isCompatible(e) ||
1560             !Y.getType().getElement().isCompatible(e) ) {
1561             throw new RSRuntimeException("Called BLAS with wrong Element type");
1562         }
1563 
1564         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1565             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1566         }
1567 
1568         int M = A.getType().getY();
1569         int N = A.getType().getX();
1570 
1571         if (N < 1 || M < 1) {
1572             throw new RSRuntimeException("M and N must be 1 or greater for GER");
1573         }
1574         if (incX <= 0 || incY <= 0) {
1575             throw new RSRuntimeException("Vector increments must be greater than 0");
1576         }
1577         int expectedXDim = 1 + (M - 1) * incX;
1578         if (X.getType().getX() != expectedXDim) {
1579             throw new RSRuntimeException("Incorrect vector dimensions for GER");
1580         }
1581         int expectedYDim = 1 + (N - 1) * incY;
1582         if (Y.getType().getX() != expectedYDim) {
1583             throw new RSRuntimeException("Incorrect vector dimensions for GER");
1584         }
1585 
1586 
1587     }
validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A)1588     static int validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A) {
1589         validateUplo(Uplo);
1590         if (!A.getType().getElement().isCompatible(e) ||
1591             !X.getType().getElement().isCompatible(e)) {
1592             throw new RSRuntimeException("Called BLAS with wrong Element type");
1593         }
1594 
1595         int N = A.getType().getX();
1596 
1597         if (X.getType().getY() > 1) {
1598             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1599         }
1600         if (N != A.getType().getY()) {
1601             throw new RSRuntimeException("A must be a symmetric matrix");
1602         }
1603         if (incX <= 0) {
1604             throw new RSRuntimeException("Vector increments must be greater than 0");
1605         }
1606         int expectedXDim = 1 + (N - 1) * incX;
1607         if (X.getType().getX() != expectedXDim) {
1608             throw new RSRuntimeException("Incorrect vector dimensions for SYR");
1609         }
1610         return N;
1611     }
validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap)1612     static int validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap) {
1613         validateUplo(Uplo);
1614         if (!Ap.getType().getElement().isCompatible(e) ||
1615             !X.getType().getElement().isCompatible(e)) {
1616             throw new RSRuntimeException("Called BLAS with wrong Element type");
1617         }
1618         if (X.getType().getY() > 1) {
1619             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1620         }
1621 
1622         if (Ap.getType().getY() > 1) {
1623             throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
1624         }
1625 
1626         int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
1627         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
1628             throw new RSRuntimeException("Invalid dimension for Ap");
1629         }
1630         if (incX <= 0) {
1631             throw new RSRuntimeException("Vector increments must be greater than 0");
1632         }
1633         int expectedXDim = 1 + (N - 1) * incX;
1634         if (X.getType().getX() != expectedXDim) {
1635             throw new RSRuntimeException("Incorrect vector dimensions for SPR");
1636         }
1637 
1638         return N;
1639     }
1640 
validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A)1641     static int validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1642         validateUplo(Uplo);
1643         if (!A.getType().getElement().isCompatible(e) ||
1644             !X.getType().getElement().isCompatible(e) ||
1645             !Y.getType().getElement().isCompatible(e)) {
1646             throw new RSRuntimeException("Called BLAS with wrong Element type");
1647         }
1648 
1649         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1650             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1651         }
1652 
1653         int N = A.getType().getX();
1654 
1655         if (N != A.getType().getY()) {
1656             throw new RSRuntimeException("A must be a symmetric matrix");
1657         }
1658         if (incX <= 0 || incY <= 0) {
1659             throw new RSRuntimeException("Vector increments must be greater than 0");
1660         }
1661         int expectedXDim = 1 + (N - 1) * incX;
1662         int expectedYDim = 1 + (N - 1) * incY;
1663         if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {
1664             throw new RSRuntimeException("Incorrect vector dimensions for SYR");
1665         }
1666         return N;
1667 
1668     }
validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)1669     static int validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
1670         validateUplo(Uplo);
1671         if (!Ap.getType().getElement().isCompatible(e) ||
1672             !X.getType().getElement().isCompatible(e) ||
1673             !Y.getType().getElement().isCompatible(e)) {
1674             throw new RSRuntimeException("Called BLAS with wrong Element type");
1675         }
1676         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1677             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1678         }
1679 
1680         if (Ap.getType().getY() > 1) {
1681             throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
1682         }
1683 
1684         int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
1685         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
1686             throw new RSRuntimeException("Invalid dimension for Ap");
1687         }
1688         if (incX <= 0 || incY <= 0) {
1689             throw new RSRuntimeException("Vector increments must be greater than 0");
1690         }
1691         int expectedXDim = 1 + (N - 1) * incX;
1692         int expectedYDim = 1 + (N - 1) * incY;
1693         if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {
1694             throw new RSRuntimeException("Incorrect vector dimensions for SPR2");
1695         }
1696 
1697         return N;
1698     }
1699 
1700     /**
1701      * SSYMV performs the matrix-vector operation
1702      * y := alpha*A*x + beta*y
1703      *
1704      * Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html
1705      *
1706      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1707      * @param alpha The scalar alpha.
1708      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1709      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1710      * @param incX The increment for the elements of vector x, must be larger than zero.
1711      * @param beta The scalar beta.
1712      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1713      * @param incY The increment for the elements of vector y, must be larger than zero.
1714      */
SSYMV(@plo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)1715     public void SSYMV(@Uplo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
1716         int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY);
1717 
1718         boolean mUseIncSupp = isIncSupp();
1719         long aID = A.getID(mRS);
1720         long xID = X.getID(mRS);
1721         long yID = Y.getID(mRS);
1722         if (mUseIncSupp) {
1723             aID = getDummyAlloc(A);
1724             xID = getDummyAlloc(X);
1725             yID = getDummyAlloc(Y);
1726         }
1727         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
1728     }
1729 
1730     /**
1731      * SSBMV performs the matrix-vector operation
1732      * y := alpha*A*x + beta*y
1733      *
1734      * Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html
1735      *
1736      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1737      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1738      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1739      *           for i in range(0, n):
1740      *              for j in range(i, min(i+k+1, n)):
1741      *                  b[i, j-i] = a[i, j]
1742      *
1743      * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
1744      * @param K The number of off-diagonals of the matrix A
1745      * @param alpha The scalar alpha.
1746      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1747      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1748      * @param incX The increment for the elements of vector x, must be larger than zero.
1749      * @param beta The scalar beta.
1750      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1751      * @param incY The increment for the elements of vector y, must be larger than zero.
1752      */
SSBMV(@plo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)1753     public void SSBMV(@Uplo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
1754         // SBMV is the same as SYMV + K >= 0
1755         if (K < 0) {
1756             throw new RSRuntimeException("K must be greater than or equal to 0");
1757         }
1758         int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY);
1759 
1760         boolean mUseIncSupp = isIncSupp();
1761         long aID = A.getID(mRS);
1762         long xID = X.getID(mRS);
1763         long yID = Y.getID(mRS);
1764         if (mUseIncSupp) {
1765             aID = getDummyAlloc(A);
1766             xID = getDummyAlloc(X);
1767             yID = getDummyAlloc(Y);
1768         }
1769         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
1770     }
1771 
1772     /**
1773      * SSPMV performs the matrix-vector operation
1774      * y := alpha*A*x + beta*y
1775      *
1776      * Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html
1777      *
1778      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1779      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1780      *       'a' to packed matrix 'b'.
1781      *           k = 0
1782      *           for i in range(0, n):
1783      *              for j in range(i, n):
1784      *                  b[k++] = a[i, j]
1785      *
1786      * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
1787      * @param alpha The scalar alpha.
1788      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
1789      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1790      * @param incX The increment for the elements of vector x, must be larger than zero.
1791      * @param beta The scalar beta.
1792      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1793      * @param incY The increment for the elements of vector y, must be larger than zero.
1794      */
SSPMV(@plo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY)1795     public void SSPMV(@Uplo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY) {
1796         int N = validateSPMV(Element.F32(mRS), Uplo, Ap, X, incX, Y, incY);
1797 
1798         boolean mUseIncSupp = isIncSupp();
1799         long apID = Ap.getID(mRS);
1800         long xID = X.getID(mRS);
1801         long yID = Y.getID(mRS);
1802         if (mUseIncSupp) {
1803             apID = getDummyAlloc(Ap);
1804             xID = getDummyAlloc(X);
1805             yID = getDummyAlloc(Y);
1806         }
1807         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, apID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
1808     }
1809 
1810     /**
1811      * SGER performs the rank 1 operation
1812      * A := alpha*x*y**T + A
1813      *
1814      * Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html
1815      *
1816      * @param alpha The scalar alpha.
1817      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1818      * @param incX The increment for the elements of vector x, must be larger than zero.
1819      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1820      * @param incY The increment for the elements of vector y, must be larger than zero.
1821      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1822      */
SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)1823     public void SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1824         int M = A.getType().getY();
1825         int N = A.getType().getX();
1826         validateGER(Element.F32(mRS), X, incX, Y, incY, A);
1827 
1828         boolean mUseIncSupp = isIncSupp();
1829         long aID = A.getID(mRS);
1830         long xID = X.getID(mRS);
1831         long yID = Y.getID(mRS);
1832         if (mUseIncSupp) {
1833             aID = getDummyAlloc(A);
1834             xID = getDummyAlloc(X);
1835             yID = getDummyAlloc(Y);
1836         }
1837         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sger, 0, 0, 0, 0, 0, M, N, 0, alpha, xID, yID, 0.f, aID, incX, incY, 0, 0, mUseIncSupp);
1838     }
1839 
1840     /**
1841      * SSYR performs the rank 1 operation
1842      * A := alpha*x*x**T + A
1843      *
1844      * Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html
1845      *
1846      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1847      * @param alpha The scalar alpha.
1848      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1849      * @param incX The increment for the elements of vector x, must be larger than zero.
1850      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1851      */
SSYR(@plo int Uplo, float alpha, Allocation X, int incX, Allocation A)1852     public void SSYR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) {
1853         int N = validateSYR(Element.F32(mRS), Uplo, X, incX, A);
1854 
1855         boolean mUseIncSupp = isIncSupp();
1856         long aID = A.getID(mRS);
1857         long xID = X.getID(mRS);
1858         if (mUseIncSupp) {
1859             aID = getDummyAlloc(A);
1860             xID = getDummyAlloc(X);
1861         }
1862         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, aID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp);
1863     }
1864 
1865     /**
1866      * SSPR performs the rank 1 operation
1867      * A := alpha*x*x**T + A
1868      *
1869      * Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html
1870      *
1871      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1872      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1873      *       'a' to packed matrix 'b'.
1874      *           k = 0
1875      *           for i in range(0, n):
1876      *              for j in range(i, n):
1877      *                  b[k++] = a[i, j]
1878      *
1879      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1880      * @param alpha The scalar alpha.
1881      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1882      * @param incX The increment for the elements of vector x, must be larger than zero.
1883      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
1884      */
SSPR(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Ap)1885     public void SSPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) {
1886         int N = validateSPR(Element.F32(mRS), Uplo, X, incX, Ap);
1887 
1888         boolean mUseIncSupp = isIncSupp();
1889         long apID = Ap.getID(mRS);
1890         long xID = X.getID(mRS);
1891         if (mUseIncSupp) {
1892             apID = getDummyAlloc(Ap);
1893             xID = getDummyAlloc(X);
1894         }
1895         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, apID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp);
1896     }
1897 
1898     /**
1899      * SSYR2 performs the symmetric rank 2 operation
1900      * A := alpha*x*y**T + alpha*y*x**T + A
1901      *
1902      * Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html
1903      *
1904      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1905      * @param alpha The scalar alpha.
1906      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1907      * @param incX The increment for the elements of vector x, must be larger than zero.
1908      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1909      * @param incY The increment for the elements of vector y, must be larger than zero.
1910      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1911      */
SSYR2(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)1912     public void SSYR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1913         int N = validateSYR2(Element.F32(mRS), Uplo, X, incX, Y, incY, A);
1914 
1915         boolean mUseIncSupp = isIncSupp();
1916         long aID = A.getID(mRS);
1917         long xID = X.getID(mRS);
1918         long yID = Y.getID(mRS);
1919         if (mUseIncSupp) {
1920             aID = getDummyAlloc(A);
1921             xID = getDummyAlloc(X);
1922             yID = getDummyAlloc(Y);
1923         }
1924         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, aID, incX, incY, 0, 0, mUseIncSupp);
1925     }
1926 
1927     /**
1928      * SSPR2 performs the symmetric rank 2 operation
1929      * A := alpha*x*y**T + alpha*y*x**T + A
1930      *
1931      * Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html
1932      *
1933      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1934      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1935      *       'a' to packed matrix 'b'.
1936      *           k = 0
1937      *           for i in range(0, n):
1938      *              for j in range(i, n):
1939      *                  b[k++] = a[i, j]
1940      *
1941      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1942      * @param alpha The scalar alpha.
1943      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1944      * @param incX The increment for the elements of vector x, must be larger than zero.
1945      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1946      * @param incY The increment for the elements of vector y, must be larger than zero.
1947      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
1948      */
SSPR2(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)1949     public void SSPR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
1950         int N = validateSPR2(Element.F32(mRS), Uplo, X, incX, Y, incY, Ap);
1951 
1952         boolean mUseIncSupp = isIncSupp();
1953         long apID = Ap.getID(mRS);
1954         long xID = X.getID(mRS);
1955         long yID = Y.getID(mRS);
1956         if (mUseIncSupp) {
1957             apID = getDummyAlloc(Ap);
1958             xID = getDummyAlloc(X);
1959             yID = getDummyAlloc(Y);
1960         }
1961         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, apID, incX, incY, 0, 0, mUseIncSupp);
1962     }
1963 
1964     /**
1965      * DSYMV performs the matrix-vector operation
1966      * y := alpha*A*x + beta*y
1967      *
1968      * Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html
1969      *
1970      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1971      * @param alpha The scalar alpha.
1972      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1973      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1974      * @param incX The increment for the elements of vector x, must be larger than zero.
1975      * @param beta The scalar beta.
1976      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1977      * @param incY The increment for the elements of vector y, must be larger than zero.
1978      */
DSYMV(@plo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)1979     public void DSYMV(@Uplo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
1980         int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY);
1981 
1982         boolean mUseIncSupp = isIncSupp();
1983         long aID = A.getID(mRS);
1984         long xID = X.getID(mRS);
1985         long yID = Y.getID(mRS);
1986         if (mUseIncSupp) {
1987             aID = getDummyAlloc(A);
1988             xID = getDummyAlloc(X);
1989             yID = getDummyAlloc(Y);
1990         }
1991         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
1992     }
1993 
1994     /**
1995      * DSBMV performs the matrix-vector operation
1996      * y := alpha*A*x + beta*y
1997      *
1998      * Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html
1999      *
2000      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2001      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2002      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2003      *           for i in range(0, n):
2004      *              for j in range(i, min(i+k+1, n)):
2005      *                  b[i, j-i] = a[i, j]
2006      *
2007      * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2008      * @param K The number of off-diagonals of the matrix A
2009      * @param alpha The scalar alpha.
2010      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2011      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2012      * @param incX The increment for the elements of vector x, must be larger than zero.
2013      * @param beta The scalar beta.
2014      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
2015      * @param incY The increment for the elements of vector y, must be larger than zero.
2016      */
DSBMV(@plo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)2017     public void DSBMV(@Uplo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
2018         // SBMV is the same as SYMV + K >= 0
2019         if (K < 0) {
2020             throw new RSRuntimeException("K must be greater than or equal to 0");
2021         }
2022         int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY);
2023 
2024         boolean mUseIncSupp = isIncSupp();
2025         long aID = A.getID(mRS);
2026         long xID = X.getID(mRS);
2027         long yID = Y.getID(mRS);
2028         if (mUseIncSupp) {
2029             aID = getDummyAlloc(A);
2030             xID = getDummyAlloc(X);
2031             yID = getDummyAlloc(Y);
2032         }
2033         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
2034     }
2035 
2036     /**
2037      * DSPMV performs the matrix-vector operation
2038      * y := alpha*A*x + beta*y
2039      *
2040      * Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html
2041      *
2042      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2043      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2044      *       'a' to packed matrix 'b'.
2045      *           k = 0
2046      *           for i in range(0, n):
2047      *              for j in range(i, n):
2048      *                  b[k++] = a[i, j]
2049      *
2050      * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2051      * @param alpha The scalar alpha.
2052      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
2053      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2054      * @param incX The increment for the elements of vector x, must be larger than zero.
2055      * @param beta The scalar beta.
2056      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
2057      * @param incY The increment for the elements of vector y, must be larger than zero.
2058      */
DSPMV(@plo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY)2059     public void DSPMV(@Uplo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY) {
2060         int N = validateSPMV(Element.F64(mRS), Uplo, Ap, X, incX, Y, incY);
2061 
2062         boolean mUseIncSupp = isIncSupp();
2063         long apID = Ap.getID(mRS);
2064         long xID = X.getID(mRS);
2065         long yID = Y.getID(mRS);
2066         if (mUseIncSupp) {
2067             apID = getDummyAlloc(Ap);
2068             xID = getDummyAlloc(X);
2069             yID = getDummyAlloc(Y);
2070         }
2071         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, apID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp);
2072     }
2073 
2074     /**
2075      * DGER performs the rank 1 operation
2076      * A := alpha*x*y**T + A
2077      *
2078      * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html
2079      *
2080      * @param alpha The scalar alpha.
2081      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2082      * @param incX The increment for the elements of vector x, must be larger than zero.
2083      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
2084      * @param incY The increment for the elements of vector y, must be larger than zero.
2085      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2086      */
DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2087     public void DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2088         int M = A.getType().getY();
2089         int N = A.getType().getX();
2090         validateGER(Element.F64(mRS), X, incX, Y, incY, A);
2091 
2092         boolean mUseIncSupp = isIncSupp();
2093         long aID = A.getID(mRS);
2094         long xID = X.getID(mRS);
2095         long yID = Y.getID(mRS);
2096         if (mUseIncSupp) {
2097             aID = getDummyAlloc(A);
2098             xID = getDummyAlloc(X);
2099             yID = getDummyAlloc(Y);
2100         }
2101         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dger, 0, 0, 0, 0, 0, M, N, 0, alpha, xID, yID, 0.f, aID, incX, incY, 0, 0, mUseIncSupp);
2102     }
2103 
2104     /**
2105      * DSYR performs the rank 1 operation
2106      * A := alpha*x*x**T + A
2107      *
2108      * Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html
2109      *
2110      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2111      * @param alpha The scalar alpha.
2112      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2113      * @param incX The increment for the elements of vector x, must be larger than zero.
2114      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2115      */
DSYR(@plo int Uplo, double alpha, Allocation X, int incX, Allocation A)2116     public void DSYR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) {
2117         int N = validateSYR(Element.F64(mRS), Uplo, X, incX, A);
2118 
2119         boolean mUseIncSupp = isIncSupp();
2120         long aID = A.getID(mRS);
2121         long xID = X.getID(mRS);
2122         if (mUseIncSupp) {
2123             aID = getDummyAlloc(A);
2124             xID = getDummyAlloc(X);
2125         }
2126         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, aID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp);
2127     }
2128 
2129     /**
2130      * DSPR performs the rank 1 operation
2131      * A := alpha*x*x**T + A
2132      *
2133      * Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html
2134      *
2135      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2136      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2137      *       'a' to packed matrix 'b'.
2138      *           k = 0
2139      *           for i in range(0, n):
2140      *              for j in range(i, n):
2141      *                  b[k++] = a[i, j]
2142      *
2143      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2144      * @param alpha The scalar alpha.
2145      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2146      * @param incX The increment for the elements of vector x, must be larger than zero.
2147      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
2148      */
DSPR(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Ap)2149     public void DSPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) {
2150         int N = validateSPR(Element.F64(mRS), Uplo, X, incX, Ap);
2151 
2152         boolean mUseIncSupp = isIncSupp();
2153         long apID = Ap.getID(mRS);
2154         long xID = X.getID(mRS);
2155         if (mUseIncSupp) {
2156             apID = getDummyAlloc(Ap);
2157             xID = getDummyAlloc(X);
2158         }
2159         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, apID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp);
2160     }
2161 
2162     /**
2163      * DSYR2 performs the symmetric rank 2 operation
2164      * A := alpha*x*y**T + alpha*y*x**T + A
2165      *
2166      * Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html
2167      *
2168      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2169      * @param alpha The scalar alpha.
2170      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2171      * @param incX The increment for the elements of vector x, must be larger than zero.
2172      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
2173      * @param incY The increment for the elements of vector y, must be larger than zero.
2174      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2175      */
DSYR2(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2176     public void DSYR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2177         int N = validateSYR2(Element.F64(mRS), Uplo, X, incX, Y, incY, A);
2178 
2179         boolean mUseIncSupp = isIncSupp();
2180         long aID = A.getID(mRS);
2181         long xID = X.getID(mRS);
2182         long yID = Y.getID(mRS);
2183         if (mUseIncSupp) {
2184             aID = getDummyAlloc(A);
2185             xID = getDummyAlloc(X);
2186             yID = getDummyAlloc(Y);
2187         }
2188         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2189     }
2190 
2191     /**
2192      * DSPR2 performs the symmetric rank 2 operation
2193      * A := alpha*x*y**T + alpha*y*x**T + A
2194      *
2195      * Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html
2196      *
2197      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2198      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2199      *       'a' to packed matrix 'b'.
2200      *           k = 0
2201      *           for i in range(0, n):
2202      *              for j in range(i, n):
2203      *                  b[k++] = a[i, j]
2204      *
2205      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2206      * @param alpha The scalar alpha.
2207      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
2208      * @param incX The increment for the elements of vector x, must be larger than zero.
2209      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
2210      * @param incY The increment for the elements of vector y, must be larger than zero.
2211      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
2212      */
DSPR2(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)2213     public void DSPR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
2214         int N = validateSPR2(Element.F64(mRS), Uplo, X, incX, Y, incY, Ap);
2215 
2216         boolean mUseIncSupp = isIncSupp();
2217         long apID = Ap.getID(mRS);
2218         long xID = X.getID(mRS);
2219         long yID = Y.getID(mRS);
2220         if (mUseIncSupp) {
2221             apID = getDummyAlloc(Ap);
2222             xID = getDummyAlloc(X);
2223             yID = getDummyAlloc(Y);
2224         }
2225         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, apID, incX, incY, 0, 0, mUseIncSupp);
2226     }
2227 
2228 
2229     /**
2230      * Level 2, C and Z only
2231      */
2232 
validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A)2233     static void validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2234         if (!A.getType().getElement().isCompatible(e) ||
2235             !X.getType().getElement().isCompatible(e) ||
2236             !Y.getType().getElement().isCompatible(e)) {
2237             throw new RSRuntimeException("Called BLAS with wrong Element type");
2238         }
2239         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
2240             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
2241         }
2242 
2243         int M = A.getType().getY();
2244         int N = A.getType().getX();
2245         if (incX <= 0 || incY <= 0) {
2246             throw new RSRuntimeException("Vector increments must be greater than 0");
2247         }
2248         int expectedXDim = 1 + (M - 1) * incX;
2249         if (X.getType().getX() != expectedXDim) {
2250             throw new RSRuntimeException("Incorrect vector dimensions for GERU");
2251         }
2252         int expectedYDim = 1 + (N - 1) * incY;
2253         if (Y.getType().getX() != expectedYDim) {
2254             throw new RSRuntimeException("Incorrect vector dimensions for GERU");
2255         }
2256 
2257     }
2258 
2259     /**
2260      * CHEMV performs the matrix-vector operation
2261      * y := alpha*A*x + beta*y
2262      *
2263      * Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html
2264      *
2265      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2266      * @param alpha The scalar alpha.
2267      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2268      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2269      * @param incX The increment for the elements of vector x, must be larger than zero.
2270      * @param beta The scalar beta.
2271      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2272      * @param incY The increment for the elements of vector y, must be larger than zero.
2273      */
CHEMV(@plo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)2274     public void CHEMV(@Uplo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
2275         // HEMV is the same as SYR2 validation-wise
2276         int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
2277 
2278         boolean mUseIncSupp = isIncSupp();
2279         long aID = A.getID(mRS);
2280         long xID = X.getID(mRS);
2281         long yID = Y.getID(mRS);
2282         if (mUseIncSupp) {
2283             aID = getDummyAlloc(A);
2284             xID = getDummyAlloc(X);
2285             yID = getDummyAlloc(Y);
2286         }
2287         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
2288     }
2289 
2290     /**
2291      * CHBMV performs the matrix-vector operation
2292      * y := alpha*A*x + beta*y
2293      *
2294      * Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html
2295      *
2296      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2297      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2298      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2299      *           for i in range(0, n):
2300      *              for j in range(i, min(i+k+1, n)):
2301      *                  b[i, j-i] = a[i, j]
2302      *
2303      * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2304      * @param K The number of off-diagonals of the matrix A
2305      * @param alpha The scalar alpha.
2306      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2307      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2308      * @param incX The increment for the elements of vector x, must be larger than zero.
2309      * @param beta The scalar beta.
2310      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2311      * @param incY The increment for the elements of vector y, must be larger than zero.
2312      */
CHBMV(@plo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)2313     public void CHBMV(@Uplo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
2314         // HBMV is the same as SYR2 validation-wise
2315         int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
2316         if (K < 0) {
2317             throw new RSRuntimeException("K must be 0 or greater for HBMV");
2318         }
2319 
2320         boolean mUseIncSupp = isIncSupp();
2321         long aID = A.getID(mRS);
2322         long xID = X.getID(mRS);
2323         long yID = Y.getID(mRS);
2324         if (mUseIncSupp) {
2325             aID = getDummyAlloc(A);
2326             xID = getDummyAlloc(X);
2327             yID = getDummyAlloc(Y);
2328         }
2329         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
2330     }
2331 
2332     /**
2333      * CHPMV performs the matrix-vector operation
2334      * y := alpha*A*x + beta*y
2335      *
2336      * Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html
2337      *
2338      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2339      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2340      *       'a' to packed matrix 'b'.
2341      *           k = 0
2342      *           for i in range(0, n):
2343      *              for j in range(i, n):
2344      *                  b[k++] = a[i, j]
2345      *
2346      * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2347      * @param alpha The scalar alpha.
2348      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2349      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2350      * @param incX The increment for the elements of vector x, must be larger than zero.
2351      * @param beta The scalar beta.
2352      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2353      * @param incY The increment for the elements of vector y, must be larger than zero.
2354      */
CHPMV(@plo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY)2355     public void CHPMV(@Uplo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
2356         // HPMV is the same as SPR2
2357         int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap);
2358 
2359         boolean mUseIncSupp = isIncSupp();
2360         long apID = Ap.getID(mRS);
2361         long xID = X.getID(mRS);
2362         long yID = Y.getID(mRS);
2363         if (mUseIncSupp) {
2364             apID = getDummyAlloc(Ap);
2365             xID = getDummyAlloc(X);
2366             yID = getDummyAlloc(Y);
2367         }
2368         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, apID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
2369     }
2370 
2371     /**
2372      * CGERU performs the rank 1 operation
2373      * A := alpha*x*y**T + A
2374      *
2375      * Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html
2376      *
2377      * @param alpha The scalar alpha.
2378      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2379      * @param incX The increment for the elements of vector x, must be larger than zero.
2380      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2381      * @param incY The increment for the elements of vector y, must be larger than zero.
2382      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2383      */
CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2384     public void CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2385         validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A);
2386         int M = A.getType().getY();
2387         int N = A.getType().getX();
2388 
2389         boolean mUseIncSupp = isIncSupp();
2390         long aID = A.getID(mRS);
2391         long xID = X.getID(mRS);
2392         long yID = Y.getID(mRS);
2393         if (mUseIncSupp) {
2394             aID = getDummyAlloc(A);
2395             xID = getDummyAlloc(X);
2396             yID = getDummyAlloc(Y);
2397         }
2398         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2399     }
2400 
2401     /**
2402      * CGERC performs the rank 1 operation
2403      * A := alpha*x*y**H + A
2404      *
2405      * Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html
2406      *
2407      * @param alpha The scalar alpha.
2408      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2409      * @param incX The increment for the elements of vector x, must be larger than zero.
2410      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2411      * @param incY The increment for the elements of vector y, must be larger than zero.
2412      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2413      */
CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2414     public void CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2415         // same as GERU
2416         validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A);
2417         int M = A.getType().getY();
2418         int N = A.getType().getX();
2419 
2420         boolean mUseIncSupp = isIncSupp();
2421         long aID = A.getID(mRS);
2422         long xID = X.getID(mRS);
2423         long yID = Y.getID(mRS);
2424         if (mUseIncSupp) {
2425             aID = getDummyAlloc(A);
2426             xID = getDummyAlloc(X);
2427             yID = getDummyAlloc(Y);
2428         }
2429         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2430     }
2431 
2432     /**
2433      * CHER performs the rank 1 operation
2434      * A := alpha*x*x**H + A
2435      *
2436      * Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html
2437      *
2438      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2439      * @param alpha The scalar alpha.
2440      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2441      * @param incX The increment for the elements of vector x, must be larger than zero.
2442      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2443      */
CHER(@plo int Uplo, float alpha, Allocation X, int incX, Allocation A)2444     public void CHER(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) {
2445         // same as SYR
2446         int N = validateSYR(Element.F32_2(mRS), Uplo, X, incX, A);
2447 
2448         boolean mUseIncSupp = isIncSupp();
2449         long aID = A.getID(mRS);
2450         long xID = X.getID(mRS);
2451         if (mUseIncSupp) {
2452             aID = getDummyAlloc(A);
2453             xID = getDummyAlloc(X);
2454         }
2455         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, aID, incX, 0, 0, 0, mUseIncSupp);
2456     }
2457 
2458     /**
2459      * CHPR performs the rank 1 operation
2460      * A := alpha*x*x**H + A
2461      *
2462      * Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html
2463      *
2464      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2465      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2466      *       'a' to packed matrix 'b'.
2467      *           k = 0
2468      *           for i in range(0, n):
2469      *              for j in range(i, n):
2470      *                  b[k++] = a[i, j]
2471      *
2472      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2473      * @param alpha The scalar alpha.
2474      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2475      * @param incX The increment for the elements of vector x, must be larger than zero.
2476      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2477      */
CHPR(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Ap)2478     public void CHPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) {
2479         // equivalent to SPR for validation
2480         int N = validateSPR(Element.F32_2(mRS), Uplo, X, incX, Ap);
2481 
2482         boolean mUseIncSupp = isIncSupp();
2483         long apID = Ap.getID(mRS);
2484         long xID = X.getID(mRS);
2485         if (mUseIncSupp) {
2486             apID = getDummyAlloc(Ap);
2487             xID = getDummyAlloc(X);
2488         }
2489         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, apID, incX, 0, 0, 0, mUseIncSupp);
2490     }
2491 
2492     /**
2493      * CHER2 performs the symmetric rank 2 operation
2494      * A := alpha*x*y**H + alpha*y*x**H + A
2495      *
2496      * Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html
2497      *
2498      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2499      * @param alpha The scalar alpha.
2500      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2501      * @param incX The increment for the elements of vector x, must be larger than zero.
2502      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2503      * @param incY The increment for the elements of vector y, must be larger than zero.
2504      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2505      */
CHER2(@plo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2506     public void CHER2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2507         // same as SYR2
2508         int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
2509 
2510         boolean mUseIncSupp = isIncSupp();
2511         long aID = A.getID(mRS);
2512         long xID = X.getID(mRS);
2513         long yID = Y.getID(mRS);
2514         if (mUseIncSupp) {
2515             aID = getDummyAlloc(A);
2516             xID = getDummyAlloc(X);
2517             yID = getDummyAlloc(Y);
2518         }
2519         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2520     }
2521 
2522     /**
2523      * CHPR2 performs the symmetric rank 2 operation
2524      * A := alpha*x*y**H + alpha*y*x**H + A
2525      *
2526      * Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html
2527      *
2528      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2529      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2530      *       'a' to packed matrix 'b'.
2531      *           k = 0
2532      *           for i in range(0, n):
2533      *              for j in range(i, n):
2534      *                  b[k++] = a[i, j]
2535      *
2536      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2537      * @param alpha The scalar alpha.
2538      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2539      * @param incX The increment for the elements of vector x, must be larger than zero.
2540      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2541      * @param incY The increment for the elements of vector y, must be larger than zero.
2542      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2543      */
CHPR2(@plo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)2544     public void CHPR2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
2545         // same as SPR2
2546         int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap);
2547 
2548         boolean mUseIncSupp = isIncSupp();
2549         long apID = Ap.getID(mRS);
2550         long xID = X.getID(mRS);
2551         long yID = Y.getID(mRS);
2552         if (mUseIncSupp) {
2553             apID = getDummyAlloc(Ap);
2554             xID = getDummyAlloc(X);
2555             yID = getDummyAlloc(Y);
2556         }
2557         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, apID, incX, incY, 0, 0, mUseIncSupp);
2558     }
2559 
2560     /**
2561      * ZHEMV performs the matrix-vector operation
2562      * y := alpha*A*x + beta*y
2563      *
2564      * Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html
2565      *
2566      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2567      * @param alpha The scalar alpha.
2568      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2569      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2570      * @param incX The increment for the elements of vector x, must be larger than zero.
2571      * @param beta The scalar beta.
2572      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2573      * @param incY The increment for the elements of vector y, must be larger than zero.
2574      */
ZHEMV(@plo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)2575     public void ZHEMV(@Uplo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
2576         // HEMV is the same as SYR2 validation-wise
2577         int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
2578 
2579         boolean mUseIncSupp = isIncSupp();
2580         long aID = A.getID(mRS);
2581         long xID = X.getID(mRS);
2582         long yID = Y.getID(mRS);
2583         if (mUseIncSupp) {
2584             aID = getDummyAlloc(A);
2585             xID = getDummyAlloc(X);
2586             yID = getDummyAlloc(Y);
2587         }
2588         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
2589     }
2590 
2591     /**
2592      * ZHBMV performs the matrix-vector operation
2593      * y := alpha*A*x + beta*y
2594      *
2595      * Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html
2596      *
2597      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2598      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2599      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2600      *           for i in range(0, n):
2601      *              for j in range(i, min(i+k+1, n)):
2602      *                  b[i, j-i] = a[i, j]
2603      *
2604      * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2605      * @param K The number of off-diagonals of the matrix A
2606      * @param alpha The scalar alpha.
2607      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2608      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2609      * @param incX The increment for the elements of vector x, must be larger than zero.
2610      * @param beta The scalar beta.
2611      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2612      * @param incY The increment for the elements of vector y, must be larger than zero.
2613      */
ZHBMV(@plo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)2614     public void ZHBMV(@Uplo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
2615         // HBMV is the same as SYR2 validation-wise
2616         int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
2617         if (K < 0) {
2618             throw new RSRuntimeException("K must be 0 or greater for HBMV");
2619         }
2620 
2621         boolean mUseIncSupp = isIncSupp();
2622         long aID = A.getID(mRS);
2623         long xID = X.getID(mRS);
2624         long yID = Y.getID(mRS);
2625         if (mUseIncSupp) {
2626             aID = getDummyAlloc(A);
2627             xID = getDummyAlloc(X);
2628             yID = getDummyAlloc(Y);
2629         }
2630         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
2631     }
2632 
2633     /**
2634      * ZHPMV performs the matrix-vector operation
2635      * y := alpha*A*x + beta*y
2636      *
2637      * Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html
2638      *
2639      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2640      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2641      *       'a' to packed matrix 'b'.
2642      *           k = 0
2643      *           for i in range(0, n):
2644      *              for j in range(i, n):
2645      *                  b[k++] = a[i, j]
2646      *
2647      * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2648      * @param alpha The scalar alpha.
2649      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2650      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2651      * @param incX The increment for the elements of vector x, must be larger than zero.
2652      * @param beta The scalar beta.
2653      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2654      * @param incY The increment for the elements of vector y, must be larger than zero.
2655      */
ZHPMV(@plo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY)2656     public void ZHPMV(@Uplo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
2657         // HPMV is the same as SPR2
2658         int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap);
2659 
2660         boolean mUseIncSupp = isIncSupp();
2661         long apID = Ap.getID(mRS);
2662         long xID = X.getID(mRS);
2663         long yID = Y.getID(mRS);
2664         if (mUseIncSupp) {
2665             apID = getDummyAlloc(Ap);
2666             xID = getDummyAlloc(X);
2667             yID = getDummyAlloc(Y);
2668         }
2669         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, apID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp);
2670     }
2671 
2672     /**
2673      * ZGERU performs the rank 1 operation
2674      * A := alpha*x*y**T + A
2675      *
2676      * Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html
2677      *
2678      * @param alpha The scalar alpha.
2679      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2680      * @param incX The increment for the elements of vector x, must be larger than zero.
2681      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2682      * @param incY The increment for the elements of vector y, must be larger than zero.
2683      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2684      */
ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2685     public void ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2686         validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A);
2687         int M = A.getType().getY();
2688         int N = A.getType().getX();
2689 
2690         boolean mUseIncSupp = isIncSupp();
2691         long aID = A.getID(mRS);
2692         long xID = X.getID(mRS);
2693         long yID = Y.getID(mRS);
2694         if (mUseIncSupp) {
2695             aID = getDummyAlloc(A);
2696             xID = getDummyAlloc(X);
2697             yID = getDummyAlloc(Y);
2698         }
2699         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2700     }
2701 
2702     /**
2703      * ZGERC performs the rank 1 operation
2704      * A := alpha*x*y**H + A
2705      *
2706      * Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html
2707      *
2708      * @param alpha The scalar alpha.
2709      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2710      * @param incX The increment for the elements of vector x, must be larger than zero.
2711      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2712      * @param incY The increment for the elements of vector y, must be larger than zero.
2713      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2714      */
ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2715     public void ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2716         // same as GERU
2717         validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A);
2718         int M = A.getType().getY();
2719         int N = A.getType().getX();
2720 
2721         boolean mUseIncSupp = isIncSupp();
2722         long aID = A.getID(mRS);
2723         long xID = X.getID(mRS);
2724         long yID = Y.getID(mRS);
2725         if (mUseIncSupp) {
2726             aID = getDummyAlloc(A);
2727             xID = getDummyAlloc(X);
2728             yID = getDummyAlloc(Y);
2729         }
2730         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2731     }
2732 
2733     /**
2734      * ZHER performs the rank 1 operation
2735      * A := alpha*x*x**H + A
2736      *
2737      * Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html
2738      *
2739      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2740      * @param alpha The scalar alpha.
2741      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2742      * @param incX The increment for the elements of vector x, must be larger than zero.
2743      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2744      */
ZHER(@plo int Uplo, double alpha, Allocation X, int incX, Allocation A)2745     public void ZHER(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) {
2746         // same as SYR
2747         int N = validateSYR(Element.F64_2(mRS), Uplo, X, incX, A);
2748 
2749         boolean mUseIncSupp = isIncSupp();
2750         long aID = A.getID(mRS);
2751         long xID = X.getID(mRS);
2752         if (mUseIncSupp) {
2753             aID = getDummyAlloc(A);
2754             xID = getDummyAlloc(X);
2755         }
2756         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, aID, incX, 0, 0, 0, mUseIncSupp);
2757     }
2758 
2759     /**
2760      * ZHPR performs the rank 1 operation
2761      * A := alpha*x*x**H + A
2762      *
2763      * Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html
2764      *
2765      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2766      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2767      *       'a' to packed matrix 'b'.
2768      *           k = 0
2769      *           for i in range(0, n):
2770      *              for j in range(i, n):
2771      *                  b[k++] = a[i, j]
2772      *
2773      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2774      * @param alpha The scalar alpha.
2775      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2776      * @param incX The increment for the elements of vector x, must be larger than zero.
2777      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2778      */
ZHPR(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Ap)2779     public void ZHPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) {
2780         // equivalent to SPR for validation
2781         int N = validateSPR(Element.F64_2(mRS), Uplo, X, incX, Ap);
2782 
2783         boolean mUseIncSupp = isIncSupp();
2784         long apID = Ap.getID(mRS);
2785         long xID = X.getID(mRS);
2786         if (mUseIncSupp) {
2787             apID = getDummyAlloc(Ap);
2788             xID = getDummyAlloc(X);
2789         }
2790         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, apID, incX, 0, 0, 0, mUseIncSupp);
2791     }
2792 
2793     /**
2794      * ZHER2 performs the symmetric rank 2 operation
2795      * A := alpha*x*y**H + alpha*y*x**H + A
2796      *
2797      * Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html
2798      *
2799      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2800      * @param alpha The scalar alpha.
2801      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2802      * @param incX The increment for the elements of vector x, must be larger than zero.
2803      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2804      * @param incY The increment for the elements of vector y, must be larger than zero.
2805      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2806      */
ZHER2(@plo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2807     public void ZHER2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2808         // same as SYR2
2809         int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
2810 
2811         boolean mUseIncSupp = isIncSupp();
2812         long aID = A.getID(mRS);
2813         long xID = X.getID(mRS);
2814         long yID = Y.getID(mRS);
2815         if (mUseIncSupp) {
2816             aID = getDummyAlloc(A);
2817             xID = getDummyAlloc(X);
2818             yID = getDummyAlloc(Y);
2819         }
2820         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp);
2821     }
2822 
2823     /**
2824      * ZHPR2 performs the symmetric rank 2 operation
2825      * A := alpha*x*y**H + alpha*y*x**H + A
2826      *
2827      * Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html
2828      *
2829      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2830      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2831      *       'a' to packed matrix 'b'.
2832      *           k = 0
2833      *           for i in range(0, n):
2834      *              for j in range(i, n):
2835      *                  b[k++] = a[i, j]
2836      *
2837      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2838      * @param alpha The scalar alpha.
2839      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2840      * @param incX The increment for the elements of vector x, must be larger than zero.
2841      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2842      * @param incY The increment for the elements of vector y, must be larger than zero.
2843      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2844      */
ZHPR2(@plo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)2845     public void ZHPR2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
2846         // same as SPR2
2847         int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap);
2848 
2849         boolean mUseIncSupp = isIncSupp();
2850         long apID = Ap.getID(mRS);
2851         long xID = X.getID(mRS);
2852         long yID = Y.getID(mRS);
2853         if (mUseIncSupp) {
2854             apID = getDummyAlloc(Ap);
2855             xID = getDummyAlloc(X);
2856             yID = getDummyAlloc(Y);
2857         }
2858         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, apID, incX, incY, 0, 0, mUseIncSupp);
2859     }
2860 
2861 
2862     /**
2863      * Level 3 BLAS
2864      */
2865 
validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C)2866     static void validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) {
2867         int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1;
2868         if ((A != null && !A.getType().getElement().isCompatible(e)) ||
2869             (B != null && !B.getType().getElement().isCompatible(e)) ||
2870             (C != null && !C.getType().getElement().isCompatible(e))) {
2871             throw new RSRuntimeException("Called BLAS with wrong Element type");
2872         }
2873         if (C == null) {
2874             //since matrix C is used to store the result, it cannot be null.
2875             throw new RSRuntimeException("Allocation C cannot be null");
2876         }
2877         cM = C.getType().getY();
2878         cN = C.getType().getX();
2879 
2880         if (Side == RIGHT) {
2881             if ((A == null && B != null) || (A != null && B == null)) {
2882                 throw new RSRuntimeException("Provided Matrix A without Matrix B, or vice versa");
2883             }
2884             if (B != null) {
2885                 bM = A.getType().getY();
2886                 bN = A.getType().getX();
2887             }
2888             if (A != null) {
2889                 aM = B.getType().getY();
2890                 aN = B.getType().getX();
2891             }
2892         } else {
2893             if (A != null) {
2894                 if (TransA == TRANSPOSE || TransA == CONJ_TRANSPOSE) {
2895                     aN = A.getType().getY();
2896                     aM = A.getType().getX();
2897                 } else {
2898                     aM = A.getType().getY();
2899                     aN = A.getType().getX();
2900                 }
2901             }
2902             if (B != null) {
2903                 if (TransB == TRANSPOSE || TransB == CONJ_TRANSPOSE) {
2904                     bN = B.getType().getY();
2905                     bM = B.getType().getX();
2906                 } else {
2907                     bM = B.getType().getY();
2908                     bN = B.getType().getX();
2909                 }
2910             }
2911         }
2912         if (A != null && B != null && C != null) {
2913             if (aN != bM || aM != cM || bN != cN) {
2914                 throw new RSRuntimeException("Called BLAS with invalid dimensions");
2915             }
2916         } else if (A != null && C != null) {
2917             // A and C only, for SYRK
2918             if (cM != cN) {
2919                 throw new RSRuntimeException("Matrix C is not symmetric");
2920             }
2921             if (aM != cM) {
2922                 throw new RSRuntimeException("Called BLAS with invalid dimensions");
2923             }
2924         } else if (A != null && B != null) {
2925             // A and B only
2926             if (aN != bM) {
2927                 throw new RSRuntimeException("Called BLAS with invalid dimensions");
2928             }
2929         }
2930 
2931     }
2932 
2933     /**
2934      * SGEMM performs one of the matrix-matrix operations
2935      * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T
2936      *
2937      * Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html
2938      *
2939      * @param TransA The type of transpose applied to matrix A.
2940      * @param TransB The type of transpose applied to matrix B.
2941      * @param alpha The scalar alpha.
2942      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2943      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
2944      * @param beta The scalar beta.
2945      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
2946      */
SGEMM(@ranspose int TransA, @Transpose int TransB, float alpha, Allocation A, Allocation B, float beta, Allocation C)2947     public void SGEMM(@Transpose int TransA, @Transpose int TransB, float alpha, Allocation A,
2948                       Allocation B, float beta, Allocation C) {
2949         validateTranspose(TransA);
2950         validateTranspose(TransB);
2951         validateL3(Element.F32(mRS), TransA, TransB, 0, A, B, C);
2952 
2953         int M = -1, N = -1, K = -1;
2954         if (TransA != NO_TRANSPOSE) {
2955             M = A.getType().getX();
2956             K = A.getType().getY();
2957         } else {
2958             M = A.getType().getY();
2959             K = A.getType().getX();
2960         }
2961         if (TransB != NO_TRANSPOSE) {
2962             N = B.getType().getY();
2963         } else {
2964             N = B.getType().getX();
2965         }
2966 
2967         boolean mUseIncSupp = isIncSupp();
2968         long aID = A.getID(mRS);
2969         long bID = B.getID(mRS);
2970         long cID = C.getID(mRS);
2971         if (mUseIncSupp) {
2972             aID = getDummyAlloc(A);
2973             bID = getDummyAlloc(B);
2974             cID = getDummyAlloc(C);
2975         }
2976         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha, aID, bID,
2977                                         beta, cID, 0, 0, 0, 0, mUseIncSupp);
2978     }
2979 
2980     /**
2981      * DGEMM performs one of the matrix-matrix operations
2982      * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T
2983      *
2984      * Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html
2985      *
2986      * @param TransA The type of transpose applied to matrix A.
2987      * @param TransB The type of transpose applied to matrix B.
2988      * @param alpha The scalar alpha.
2989      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2990      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
2991      * @param beta The scalar beta.
2992      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
2993      */
DGEMM(@ranspose int TransA, @Transpose int TransB, double alpha, Allocation A, Allocation B, double beta, Allocation C)2994     public void DGEMM(@Transpose int TransA, @Transpose int TransB, double alpha, Allocation A,
2995                       Allocation B, double beta, Allocation C) {
2996         validateTranspose(TransA);
2997         validateTranspose(TransB);
2998         validateL3(Element.F64(mRS), TransA, TransB, 0, A, B, C);
2999         int M = -1, N = -1, K = -1;
3000         if (TransA != NO_TRANSPOSE) {
3001             M = A.getType().getX();
3002             K = A.getType().getY();
3003         } else {
3004             M = A.getType().getY();
3005             K = A.getType().getX();
3006         }
3007         if (TransB != NO_TRANSPOSE) {
3008             N = B.getType().getY();
3009         } else {
3010             N = B.getType().getX();
3011         }
3012 
3013         boolean mUseIncSupp = isIncSupp();
3014         long aID = A.getID(mRS);
3015         long bID = B.getID(mRS);
3016         long cID = C.getID(mRS);
3017         if (mUseIncSupp) {
3018             aID = getDummyAlloc(A);
3019             bID = getDummyAlloc(B);
3020             cID = getDummyAlloc(C);
3021         }
3022         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha, aID, bID,
3023                                         beta, cID, 0, 0, 0, 0, mUseIncSupp);
3024     }
3025 
3026     /**
3027      * CGEMM performs one of the matrix-matrix operations
3028      * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T  or  op(X) = X**H
3029      *
3030      * Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html
3031      *
3032      * @param TransA The type of transpose applied to matrix A.
3033      * @param TransB The type of transpose applied to matrix B.
3034      * @param alpha The scalar alpha.
3035      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3036      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3037      * @param beta The scalar beta.
3038      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3039      */
CGEMM(@ranspose int TransA, @Transpose int TransB, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)3040     public void CGEMM(@Transpose int TransA, @Transpose int TransB, Float2 alpha, Allocation A,
3041                       Allocation B, Float2 beta, Allocation C) {
3042         validateTranspose(TransA);
3043         validateTranspose(TransB);
3044         validateL3(Element.F32_2(mRS), TransA, TransB, 0, A, B, C);
3045         int M = -1, N = -1, K = -1;
3046         if (TransA != NO_TRANSPOSE) {
3047             M = A.getType().getX();
3048             K = A.getType().getY();
3049         } else {
3050             M = A.getType().getY();
3051             K = A.getType().getX();
3052         }
3053         if (TransB != NO_TRANSPOSE) {
3054             N = B.getType().getY();
3055         } else {
3056             N = B.getType().getX();
3057         }
3058 
3059         boolean mUseIncSupp = isIncSupp();
3060         long aID = A.getID(mRS);
3061         long bID = B.getID(mRS);
3062         long cID = C.getID(mRS);
3063         if (mUseIncSupp) {
3064             aID = getDummyAlloc(A);
3065             bID = getDummyAlloc(B);
3066             cID = getDummyAlloc(C);
3067         }
3068         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha.x, alpha.y, aID, bID,
3069                                          beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3070     }
3071 
3072     /**
3073      * ZGEMM performs one of the matrix-matrix operations
3074      * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T  or  op(X) = X**H
3075      *
3076      * Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html
3077      *
3078      * @param TransA The type of transpose applied to matrix A.
3079      * @param TransB The type of transpose applied to matrix B.
3080      * @param alpha The scalar alpha.
3081      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2
3082      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2
3083      * @param beta The scalar beta.
3084      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2
3085      */
ZGEMM(@ranspose int TransA, @Transpose int TransB, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)3086     public void ZGEMM(@Transpose int TransA, @Transpose int TransB, Double2 alpha, Allocation A,
3087                       Allocation B, Double2 beta, Allocation C) {
3088         validateTranspose(TransA);
3089         validateTranspose(TransB);
3090         validateL3(Element.F64_2(mRS), TransA, TransB, 0, A, B, C);
3091         int M = -1, N = -1, K = -1;
3092         if (TransA != NO_TRANSPOSE) {
3093             M = A.getType().getX();
3094             K = A.getType().getY();
3095         } else {
3096             M = A.getType().getY();
3097             K = A.getType().getX();
3098         }
3099         if (TransB != NO_TRANSPOSE) {
3100             N = B.getType().getY();
3101         } else {
3102             N = B.getType().getX();
3103         }
3104 
3105         boolean mUseIncSupp = isIncSupp();
3106         long aID = A.getID(mRS);
3107         long bID = B.getID(mRS);
3108         long cID = C.getID(mRS);
3109         if (mUseIncSupp) {
3110             aID = getDummyAlloc(A);
3111             bID = getDummyAlloc(B);
3112             cID = getDummyAlloc(C);
3113         }
3114         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha.x, alpha.y, aID, bID,
3115                                    beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3116     }
3117 
3118     /**
3119      * SSYMM performs one of the matrix-matrix operations
3120      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3121      *
3122      * Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html
3123      *
3124      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3125      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3126      * @param alpha The scalar alpha.
3127      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
3128      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
3129      * @param beta The scalar beta.
3130      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
3131      */
SSYMM(@ide int Side, @Uplo int Uplo, float alpha, Allocation A, Allocation B, float beta, Allocation C)3132     public void SSYMM(@Side int Side, @Uplo int Uplo, float alpha, Allocation A,
3133                       Allocation B, float beta, Allocation C) {
3134         validateSide(Side);
3135         validateUplo(Uplo);
3136         //For SYMM, Matrix A should be symmetric
3137         if (A.getType().getX() != A.getType().getY()) {
3138             throw new RSRuntimeException("Matrix A is not symmetric");
3139         }
3140         validateL3(Element.F32(mRS), 0, 0, Side, A, B, C);
3141 
3142         boolean mUseIncSupp = isIncSupp();
3143         long aID = A.getID(mRS);
3144         long bID = B.getID(mRS);
3145         long cID = C.getID(mRS);
3146         if (mUseIncSupp) {
3147             aID = getDummyAlloc(A);
3148             bID = getDummyAlloc(B);
3149             cID = getDummyAlloc(C);
3150         }
3151         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, aID, bID,
3152                                         beta, cID, 0, 0, 0, 0, mUseIncSupp);
3153     }
3154 
3155     /**
3156      * DSYMM performs one of the matrix-matrix operations
3157      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3158      *
3159      * Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html
3160      *
3161      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3162      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3163      * @param alpha The scalar alpha.
3164      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
3165      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
3166      * @param beta The scalar beta.
3167      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
3168      */
DSYMM(@ide int Side, @Uplo int Uplo, double alpha, Allocation A, Allocation B, double beta, Allocation C)3169     public void DSYMM(@Side int Side, @Uplo int Uplo, double alpha, Allocation A,
3170                       Allocation B, double beta, Allocation C) {
3171         validateSide(Side);
3172         validateUplo(Uplo);
3173         if (A.getType().getX() != A.getType().getY()) {
3174             throw new RSRuntimeException("Matrix A is not symmetric");
3175         }
3176         validateL3(Element.F64(mRS), 0, 0, Side, A, B, C);
3177 
3178         boolean mUseIncSupp = isIncSupp();
3179         long aID = A.getID(mRS);
3180         long bID = B.getID(mRS);
3181         long cID = C.getID(mRS);
3182         if (mUseIncSupp) {
3183             aID = getDummyAlloc(A);
3184             bID = getDummyAlloc(B);
3185             cID = getDummyAlloc(C);
3186         }
3187         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, aID, bID,
3188                                         beta, cID, 0, 0, 0, 0, mUseIncSupp);
3189     }
3190 
3191     /**
3192      * CSYMM performs one of the matrix-matrix operations
3193      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3194      *
3195      * Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html
3196      *
3197      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3198      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3199      * @param alpha The scalar alpha.
3200      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3201      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3202      * @param beta The scalar beta.
3203      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3204      */
CSYMM(@ide int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)3205     public void CSYMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A,
3206                       Allocation B, Float2 beta, Allocation C) {
3207         validateSide(Side);
3208         validateUplo(Uplo);
3209         if (A.getType().getX() != A.getType().getY()) {
3210             throw new RSRuntimeException("Matrix A is not symmetric");
3211         }
3212         validateL3(Element.F32_2(mRS), 0, 0, Side, A, B, C);
3213 
3214         boolean mUseIncSupp = isIncSupp();
3215         long aID = A.getID(mRS);
3216         long bID = B.getID(mRS);
3217         long cID = C.getID(mRS);
3218         if (mUseIncSupp) {
3219             aID = getDummyAlloc(A);
3220             bID = getDummyAlloc(B);
3221             cID = getDummyAlloc(C);
3222         }
3223         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, aID, bID,
3224                                          beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3225     }
3226 
3227     /**
3228      * ZSYMM performs one of the matrix-matrix operations
3229      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3230      *
3231      * Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html
3232      *
3233      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3234      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3235      * @param alpha The scalar alpha.
3236      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3237      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3238      * @param beta The scalar beta.
3239      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3240      */
ZSYMM(@ide int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)3241     public void ZSYMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A,
3242                       Allocation B, Double2 beta, Allocation C) {
3243         validateSide(Side);
3244         validateUplo(Uplo);
3245         if (A.getType().getX() != A.getType().getY()) {
3246             throw new RSRuntimeException("Matrix A is not symmetric");
3247         }
3248         validateL3(Element.F64_2(mRS), 0, 0, Side, A, B, C);
3249 
3250         boolean mUseIncSupp = isIncSupp();
3251         long aID = A.getID(mRS);
3252         long bID = B.getID(mRS);
3253         long cID = C.getID(mRS);
3254         if (mUseIncSupp) {
3255             aID = getDummyAlloc(A);
3256             bID = getDummyAlloc(B);
3257             cID = getDummyAlloc(C);
3258         }
3259         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, aID, bID,
3260                                    beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3261     }
3262 
3263     /**
3264      * SSYRK performs one of the symmetric rank k operations
3265      * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3266      *
3267      * Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html
3268      *
3269      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3270      * @param Trans The type of transpose applied to the operation.
3271      * @param alpha The scalar alpha.
3272      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
3273      * @param beta The scalar beta.
3274      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
3275      */
SSYRK(@plo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C)3276     public void SSYRK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) {
3277         validateTranspose(Trans);
3278         validateUplo(Uplo);
3279         validateL3(Element.F32(mRS), Trans, 0, 0, A, null, C);
3280         int K = -1;
3281         if (Trans != NO_TRANSPOSE) {
3282             K = A.getType().getY();
3283         } else {
3284             K = A.getType().getX();
3285         }
3286 
3287         boolean mUseIncSupp = isIncSupp();
3288         long aID = A.getID(mRS);
3289         long cID = C.getID(mRS);
3290         if (mUseIncSupp) {
3291             aID = getDummyAlloc(A);
3292             cID = getDummyAlloc(C);
3293         }
3294         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, 0, beta, cID, 0, 0, 0, 0, mUseIncSupp);
3295     }
3296 
3297     /**
3298      * DSYRK performs one of the symmetric rank k operations
3299      * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3300      *
3301      * Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html
3302      *
3303      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3304      * @param Trans The type of transpose applied to the operation.
3305      * @param alpha The scalar alpha.
3306      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
3307      * @param beta The scalar beta.
3308      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
3309      */
DSYRK(@plo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C)3310     public void DSYRK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) {
3311         validateTranspose(Trans);
3312         validateUplo(Uplo);
3313         validateL3(Element.F64(mRS), Trans, 0, 0, A, null, C);
3314         int K = -1;
3315         if (Trans != NO_TRANSPOSE) {
3316             K = A.getType().getY();
3317         } else {
3318             K = A.getType().getX();
3319         }
3320 
3321         boolean mUseIncSupp = isIncSupp();
3322         long aID = A.getID(mRS);
3323         long cID = C.getID(mRS);
3324         if (mUseIncSupp) {
3325             aID = getDummyAlloc(A);
3326             cID = getDummyAlloc(C);
3327         }
3328         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, 0, beta, cID, 0, 0, 0, 0, mUseIncSupp);
3329     }
3330 
3331     /**
3332      * CSYRK performs one of the symmetric rank k operations
3333      * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3334      *
3335      * Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html
3336      *
3337      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3338      * @param Trans The type of transpose applied to the operation.
3339      * @param alpha The scalar alpha.
3340      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3341      * @param beta The scalar beta.
3342      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3343      */
CSYRK(@plo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C)3344     public void CSYRK(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C) {
3345         validateTranspose(Trans);
3346         validateUplo(Uplo);
3347         validateL3(Element.F32_2(mRS), Trans, 0, 0, A, null, C);
3348         int K = -1;
3349         if (Trans != NO_TRANSPOSE) {
3350             K = A.getType().getY();
3351         } else {
3352             K = A.getType().getX();
3353         }
3354 
3355         boolean mUseIncSupp = isIncSupp();
3356         long aID = A.getID(mRS);
3357         long cID = C.getID(mRS);
3358         if (mUseIncSupp) {
3359             aID = getDummyAlloc(A);
3360             cID = getDummyAlloc(C);
3361         }
3362         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, 0, beta.x, beta.y,
3363                                          C.getID(mRS), 0, 0, 0, 0, mUseIncSupp);
3364     }
3365 
3366     /**
3367      * ZSYRK performs one of the symmetric rank k operations
3368      * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3369      *
3370      * Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html
3371      *
3372      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3373      * @param Trans The type of transpose applied to the operation.
3374      * @param alpha The scalar alpha.
3375      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3376      * @param beta The scalar beta.
3377      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3378      */
ZSYRK(@plo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C)3379     public void ZSYRK(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C) {
3380         validateTranspose(Trans);
3381         validateUplo(Uplo);
3382         validateL3(Element.F64_2(mRS), Trans, 0, 0, A, null, C);
3383         int K = -1;
3384         if (Trans != NO_TRANSPOSE) {
3385             K = A.getType().getY();
3386         } else {
3387             K = A.getType().getX();
3388         }
3389 
3390         boolean mUseIncSupp = isIncSupp();
3391         long aID = A.getID(mRS);
3392         long cID = C.getID(mRS);
3393         if (mUseIncSupp) {
3394             aID = getDummyAlloc(A);
3395             cID = getDummyAlloc(C);
3396         }
3397         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, 0, beta.x, beta.y,
3398                                    C.getID(mRS), 0, 0, 0, 0, mUseIncSupp);
3399     }
3400 
validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C)3401     static void validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) {
3402         validateTranspose(Trans);
3403         if (!A.getType().getElement().isCompatible(e) ||
3404             !B.getType().getElement().isCompatible(e) ||
3405             !C.getType().getElement().isCompatible(e)) {
3406             throw new RSRuntimeException("Called BLAS with wrong Element type");
3407         }
3408         int Cdim = -1;
3409         // A is n x k if no transpose, k x n if transpose
3410         // C is n x n
3411         if (Trans == TRANSPOSE) {
3412             // check columns versus C
3413             Cdim = A.getType().getX();
3414         } else {
3415             // check rows versus C
3416             Cdim = A.getType().getY();
3417         }
3418         if (C.getType().getX() != Cdim || C.getType().getY() != Cdim) {
3419             throw new RSRuntimeException("Invalid symmetric matrix in SYR2K");
3420         }
3421         // A dims == B dims
3422         if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {
3423             throw new RSRuntimeException("Invalid A and B in SYR2K");
3424         }
3425     }
3426 
3427     /**
3428      * SSYR2K performs one of the symmetric rank 2k operations
3429      * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3430      *
3431      * Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html
3432      *
3433      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3434      * @param Trans The type of transpose applied to the operation.
3435      * @param alpha The scalar alpha.
3436      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
3437      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
3438      * @param beta The scalar beta.
3439      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
3440      */
SSYR2K(@plo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C)3441     public void SSYR2K(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C) {
3442         validateUplo(Uplo);
3443         validateSYR2K(Element.F32(mRS), Trans, A, B, C);
3444         int K = -1;
3445         if (Trans != NO_TRANSPOSE) {
3446             K = A.getType().getY();
3447         } else {
3448             K = A.getType().getX();
3449         }
3450 
3451         boolean mUseIncSupp = isIncSupp();
3452         long aID = A.getID(mRS);
3453         long bID = B.getID(mRS);
3454         long cID = C.getID(mRS);
3455         if (mUseIncSupp) {
3456             aID = getDummyAlloc(A);
3457             bID = getDummyAlloc(B);
3458             cID = getDummyAlloc(C);
3459         }
3460         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, bID, beta, cID, 0, 0, 0, 0, mUseIncSupp);
3461     }
3462 
3463     /**
3464      * DSYR2K performs one of the symmetric rank 2k operations
3465      * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3466      *
3467      * Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html
3468      *
3469      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3470      * @param Trans The type of transpose applied to the operation.
3471      * @param alpha The scalar alpha.
3472      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
3473      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
3474      * @param beta The scalar beta.
3475      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
3476      */
DSYR2K(@plo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C)3477     public void DSYR2K(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C) {
3478         validateUplo(Uplo);
3479         validateSYR2K(Element.F64(mRS), Trans, A, B, C);
3480         int K = -1;
3481         if (Trans != NO_TRANSPOSE) {
3482             K = A.getType().getY();
3483         } else {
3484             K = A.getType().getX();
3485         }
3486 
3487         boolean mUseIncSupp = isIncSupp();
3488         long aID = A.getID(mRS);
3489         long bID = B.getID(mRS);
3490         long cID = C.getID(mRS);
3491         if (mUseIncSupp) {
3492             aID = getDummyAlloc(A);
3493             bID = getDummyAlloc(B);
3494             cID = getDummyAlloc(C);
3495         }
3496         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, bID, beta, cID, 0, 0, 0, 0, mUseIncSupp);
3497     }
3498 
3499     /**
3500      * CSYR2K performs one of the symmetric rank 2k operations
3501      * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3502      *
3503      * Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html
3504      *
3505      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3506      * @param Trans The type of transpose applied to the operation.
3507      * @param alpha The scalar alpha.
3508      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3509      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3510      * @param beta The scalar beta.
3511      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3512      */
CSYR2K(@plo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)3513     public void CSYR2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) {
3514         validateUplo(Uplo);
3515         validateSYR2K(Element.F32_2(mRS), Trans, A, B, C);
3516         int K = -1;
3517         if (Trans != NO_TRANSPOSE) {
3518             K = A.getType().getY();
3519         } else {
3520             K = A.getType().getX();
3521         }
3522 
3523         boolean mUseIncSupp = isIncSupp();
3524         long aID = A.getID(mRS);
3525         long bID = B.getID(mRS);
3526         long cID = C.getID(mRS);
3527         if (mUseIncSupp) {
3528             aID = getDummyAlloc(A);
3529             bID = getDummyAlloc(B);
3530             cID = getDummyAlloc(C);
3531         }
3532         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3533     }
3534 
3535     /**
3536      * ZSYR2K performs one of the symmetric rank 2k operations
3537      * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3538      *
3539      * Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html
3540      *
3541      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3542      * @param Trans The type of transpose applied to the operation.
3543      * @param alpha The scalar alpha.
3544      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3545      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3546      * @param beta The scalar beta.
3547      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3548      */
ZSYR2K(@plo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)3549     public void ZSYR2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) {
3550         validateUplo(Uplo);
3551         validateSYR2K(Element.F64_2(mRS), Trans, A, B, C);
3552         int K = -1;
3553         if (Trans != NO_TRANSPOSE) {
3554             K = A.getType().getY();
3555         } else {
3556             K = A.getType().getX();
3557         }
3558 
3559         boolean mUseIncSupp = isIncSupp();
3560         long aID = A.getID(mRS);
3561         long bID = B.getID(mRS);
3562         long cID = C.getID(mRS);
3563         if (mUseIncSupp) {
3564             aID = getDummyAlloc(A);
3565             bID = getDummyAlloc(B);
3566             cID = getDummyAlloc(C);
3567         }
3568         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3569     }
3570 
validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B)3571     static void validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) {
3572         validateSide(Side);
3573         validateTranspose(TransA);
3574         int aM = -1, aN = -1, bM = -1, bN = -1;
3575         if (!A.getType().getElement().isCompatible(e) ||
3576             !B.getType().getElement().isCompatible(e)) {
3577             throw new RSRuntimeException("Called BLAS with wrong Element type");
3578         }
3579 
3580         aM = A.getType().getY();
3581         aN = A.getType().getX();
3582         if (aM != aN) {
3583             throw new RSRuntimeException("Called TRMM with a non-symmetric matrix A");
3584         }
3585 
3586         bM = B.getType().getY();
3587         bN = B.getType().getX();
3588         if (Side == LEFT) {
3589             if (aN != bM) {
3590                 throw new RSRuntimeException("Called TRMM with invalid matrices");
3591             }
3592         } else {
3593             if (bN != aM) {
3594                 throw new RSRuntimeException("Called TRMM with invalid matrices");
3595             }
3596         }
3597     }
3598 
3599     /**
3600      * STRMM performs one of the matrix-matrix operations
3601      * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3602      * op(A) is one of  op(A) = A  or  op(A) = A**T
3603      *
3604      * Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html
3605      *
3606      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3607      * @param Uplo Specifies whether matrix A is upper or lower triangular.
3608      * @param TransA The type of transpose applied to matrix A.
3609      * @param Diag Specifies whether or not A is unit triangular.
3610      * @param alpha The scalar alpha.
3611      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
3612      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
3613      */
STRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B)3614     public void STRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) {
3615         validateUplo(Uplo);
3616         validateDiag(Diag);
3617         validateTRMM(Element.F32(mRS), Side, TransA, A, B);
3618 
3619         boolean mUseIncSupp = isIncSupp();
3620         long aID = A.getID(mRS);
3621         long bID = B.getID(mRS);
3622         if (mUseIncSupp) {
3623             aID = getDummyAlloc(A);
3624             bID = getDummyAlloc(B);
3625         }
3626         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3627                                         alpha, aID, bID, 0.f, 0, 0, 0, 0, 0, mUseIncSupp);
3628     }
3629 
3630     /**
3631      * DTRMM performs one of the matrix-matrix operations
3632      * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3633      * op(A) is one of  op(A) = A  or  op(A) = A**T
3634      *
3635      * Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html
3636      *
3637      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3638      * @param Uplo Specifies whether matrix A is upper or lower triangular.
3639      * @param TransA The type of transpose applied to matrix A.
3640      * @param Diag Specifies whether or not A is unit triangular.
3641      * @param alpha The scalar alpha.
3642      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
3643      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
3644      */
DTRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B)3645     public void DTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) {
3646         validateUplo(Uplo);
3647         validateDiag(Diag);
3648         validateTRMM(Element.F64(mRS), Side, TransA, A, B);
3649 
3650         boolean mUseIncSupp = isIncSupp();
3651         long aID = A.getID(mRS);
3652         long bID = B.getID(mRS);
3653         if (mUseIncSupp) {
3654             aID = getDummyAlloc(A);
3655             bID = getDummyAlloc(B);
3656         }
3657         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3658                                         alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3659     }
3660 
3661     /**
3662      * CTRMM performs one of the matrix-matrix operations
3663      * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3664      * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3665      *
3666      * Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html
3667      *
3668      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3669      * @param Uplo Specifies whether matrix A is upper or lower triangular.
3670      * @param TransA The type of transpose applied to matrix A.
3671      * @param Diag Specifies whether or not A is unit triangular.
3672      * @param alpha The scalar alpha.
3673      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3674      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3675      */
CTRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B)3676     public void CTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) {
3677         validateUplo(Uplo);
3678         validateDiag(Diag);
3679         validateTRMM(Element.F32_2(mRS), Side, TransA, A, B);
3680 
3681         boolean mUseIncSupp = isIncSupp();
3682         long aID = A.getID(mRS);
3683         long bID = B.getID(mRS);
3684         if (mUseIncSupp) {
3685             aID = getDummyAlloc(A);
3686             bID = getDummyAlloc(B);
3687         }
3688         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3689                                          alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3690     }
3691 
3692     /**
3693      * ZTRMM performs one of the matrix-matrix operations
3694      * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3695      * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3696      *
3697      * Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html
3698      *
3699      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3700      * @param Uplo Specifies whether matrix A is upper or lower triangular.
3701      * @param TransA The type of transpose applied to matrix A.
3702      * @param Diag Specifies whether or not A is unit triangular.
3703      * @param alpha The scalar alpha.
3704      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3705      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3706      */
ZTRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B)3707     public void ZTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) {
3708         validateUplo(Uplo);
3709         validateDiag(Diag);
3710         validateTRMM(Element.F64_2(mRS), Side, TransA, A, B);
3711 
3712         boolean mUseIncSupp = isIncSupp();
3713         long aID = A.getID(mRS);
3714         long bID = B.getID(mRS);
3715         if (mUseIncSupp) {
3716             aID = getDummyAlloc(A);
3717             bID = getDummyAlloc(B);
3718         }
3719         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3720                                    alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3721     }
3722 
validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B)3723     static void validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) {
3724         int adim = -1, bM = -1, bN = -1;
3725         validateSide(Side);
3726         validateTranspose(TransA);
3727         if (!A.getType().getElement().isCompatible(e) ||
3728             !B.getType().getElement().isCompatible(e)) {
3729             throw new RSRuntimeException("Called BLAS with wrong Element type");
3730         }
3731         adim = A.getType().getX();
3732         if (adim != A.getType().getY()) {
3733             // this may be unnecessary, the restriction could potentially be relaxed
3734             // A needs to contain at least that symmetric matrix but could theoretically be larger
3735             // for now we assume adapters are sufficient, will reevaluate in the future
3736             throw new RSRuntimeException("Called TRSM with a non-symmetric matrix A");
3737         }
3738         bM = B.getType().getY();
3739         bN = B.getType().getX();
3740         if (Side == LEFT) {
3741             // A is M*M
3742             if (adim != bM) {
3743                 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions");
3744             }
3745         } else {
3746             // A is N*N
3747             if (adim != bN) {
3748                 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions");
3749             }
3750         }
3751     }
3752 
3753     /**
3754      * STRSM solves one of the matrix equations
3755      * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3756      * op(A) is one of  op(A) = A  or  op(A) = A**T
3757      *
3758      * Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html
3759      *
3760      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3761      * @param Uplo Specifies whether matrix A is upper or lower triangular.
3762      * @param TransA The type of transpose applied to matrix A.
3763      * @param Diag Specifies whether or not A is unit triangular.
3764      * @param alpha The scalar alpha.
3765      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
3766      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
3767      */
STRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B)3768     public void STRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) {
3769         validateUplo(Uplo);
3770         validateDiag(Diag);
3771         validateTRSM(Element.F32(mRS), Side, TransA, A, B);
3772 
3773         boolean mUseIncSupp = isIncSupp();
3774         long aID = A.getID(mRS);
3775         long bID = B.getID(mRS);
3776         if (mUseIncSupp) {
3777             aID = getDummyAlloc(A);
3778             bID = getDummyAlloc(B);
3779         }
3780         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3781                                         alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3782     }
3783 
3784     /**
3785      * DTRSM solves one of the matrix equations
3786      * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3787      * op(A) is one of  op(A) = A  or  op(A) = A**T
3788      *
3789      * Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html
3790      *
3791      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3792      * @param Uplo Specifies whether matrix A is upper or lower triangular.
3793      * @param TransA The type of transpose applied to matrix A.
3794      * @param Diag Specifies whether or not A is unit triangular.
3795      * @param alpha The scalar alpha.
3796      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
3797      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
3798      */
DTRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B)3799     public void DTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) {
3800         validateUplo(Uplo);
3801         validateDiag(Diag);
3802         validateTRSM(Element.F64(mRS), Side, TransA, A, B);
3803 
3804         boolean mUseIncSupp = isIncSupp();
3805         long aID = A.getID(mRS);
3806         long bID = B.getID(mRS);
3807         if (mUseIncSupp) {
3808             aID = getDummyAlloc(A);
3809             bID = getDummyAlloc(B);
3810         }
3811         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3812                                         alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3813     }
3814 
3815     /**
3816      * CTRSM solves one of the matrix equations
3817      * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3818      * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3819      *
3820      * Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html
3821      *
3822      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3823      * @param Uplo Specifies whether matrix A is upper or lower triangular.
3824      * @param TransA The type of transpose applied to matrix A.
3825      * @param Diag Specifies whether or not A is unit triangular.
3826      * @param alpha The scalar alpha.
3827      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3828      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3829      */
CTRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B)3830     public void CTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) {
3831         validateUplo(Uplo);
3832         validateDiag(Diag);
3833         validateTRSM(Element.F32_2(mRS), Side, TransA, A, B);
3834 
3835         boolean mUseIncSupp = isIncSupp();
3836         long aID = A.getID(mRS);
3837         long bID = B.getID(mRS);
3838         if (mUseIncSupp) {
3839             aID = getDummyAlloc(A);
3840             bID = getDummyAlloc(B);
3841         }
3842         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3843                                          alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3844     }
3845 
3846     /**
3847      * ZTRSM solves one of the matrix equations
3848      * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3849      * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3850      *
3851      * Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html
3852      *
3853      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3854      * @param Uplo Specifies whether matrix A is upper or lower triangular.
3855      * @param TransA The type of transpose applied to matrix A.
3856      * @param Diag Specifies whether or not A is unit triangular.
3857      * @param alpha The scalar alpha.
3858      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3859      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3860      */
ZTRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B)3861     public void ZTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) {
3862         validateUplo(Uplo);
3863         validateDiag(Diag);
3864         validateTRSM(Element.F64_2(mRS), Side, TransA, A, B);
3865 
3866         boolean mUseIncSupp = isIncSupp();
3867         long aID = A.getID(mRS);
3868         long bID = B.getID(mRS);
3869         if (mUseIncSupp) {
3870             aID = getDummyAlloc(A);
3871             bID = getDummyAlloc(B);
3872         }
3873         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3874                                    alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp);
3875     }
3876 
validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C)3877     static void validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C) {
3878         validateSide(Side);
3879 
3880         if (!A.getType().getElement().isCompatible(e) ||
3881             !B.getType().getElement().isCompatible(e) ||
3882             !C.getType().getElement().isCompatible(e)) {
3883             throw new RSRuntimeException("Called BLAS with wrong Element type");
3884         }
3885 
3886         // A must be square; can potentially be relaxed similar to TRSM
3887         int adim = A.getType().getX();
3888         if (adim != A.getType().getY()) {
3889             throw new RSRuntimeException("Called HEMM with non-square A");
3890         }
3891         if ((Side == LEFT && adim != B.getType().getY()) ||
3892             (Side == RIGHT && adim != B.getType().getX())) {
3893             throw new RSRuntimeException("Called HEMM with invalid B");
3894         }
3895         if (B.getType().getX() != C.getType().getX() ||
3896             B.getType().getY() != C.getType().getY()) {
3897             throw new RSRuntimeException("Called HEMM with mismatched B and C");
3898         }
3899     }
3900 
3901     /**
3902      * CHEMM performs one of the matrix-matrix operations
3903      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3904      *
3905      * Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html
3906      *
3907      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3908      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3909      * @param alpha The scalar alpha.
3910      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3911      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3912      * @param beta The scalar beta.
3913      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3914      */
CHEMM(@ide int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)3915     public void CHEMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) {
3916         validateUplo(Uplo);
3917         validateHEMM(Element.F32_2(mRS), Side, A, B, C);
3918 
3919         boolean mUseIncSupp = isIncSupp();
3920         long aID = A.getID(mRS);
3921         long bID = B.getID(mRS);
3922         long cID = C.getID(mRS);
3923         if (mUseIncSupp) {
3924             aID = getDummyAlloc(A);
3925             bID = getDummyAlloc(B);
3926             cID = getDummyAlloc(C);
3927         }
3928         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0,
3929                                          alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3930     }
3931 
3932     /**
3933      * ZHEMM performs one of the matrix-matrix operations
3934      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3935      *
3936      * Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html
3937      *
3938      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3939      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3940      * @param alpha The scalar alpha.
3941      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3942      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3943      * @param beta The scalar beta.
3944      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3945      */
ZHEMM(@ide int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)3946     public void ZHEMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) {
3947         validateUplo(Uplo);
3948         validateHEMM(Element.F64_2(mRS), Side, A, B, C);
3949 
3950         boolean mUseIncSupp = isIncSupp();
3951         long aID = A.getID(mRS);
3952         long bID = B.getID(mRS);
3953         long cID = C.getID(mRS);
3954         if (mUseIncSupp) {
3955             aID = getDummyAlloc(A);
3956             bID = getDummyAlloc(B);
3957             cID = getDummyAlloc(C);
3958         }
3959         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0,
3960                                    alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp);
3961     }
3962 
validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C)3963     static void validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C) {
3964         if (!A.getType().getElement().isCompatible(e) ||
3965             !C.getType().getElement().isCompatible(e)) {
3966             throw new RSRuntimeException("Called BLAS with wrong Element type");
3967         }
3968         validateConjTranspose(Trans);
3969         int cdim = C.getType().getX();
3970         if (cdim != C.getType().getY()) {
3971             throw new RSRuntimeException("Called HERK with non-square C");
3972         }
3973         if (Trans == NO_TRANSPOSE) {
3974             if (cdim != A.getType().getY()) {
3975                 throw new RSRuntimeException("Called HERK with invalid A");
3976             }
3977         } else {
3978             if (cdim != A.getType().getX()) {
3979                 throw new RSRuntimeException("Called HERK with invalid A");
3980             }
3981         }
3982     }
3983 
3984     /**
3985      * CHERK performs one of the hermitian rank k operations
3986      * C := alpha*A*A**H + beta*C   or   C := alpha*A**H*A + beta*C
3987      *
3988      * Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html
3989      *
3990      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3991      * @param Trans The type of transpose applied to the operation.
3992      * @param alpha The scalar alpha.
3993      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3994      * @param beta The scalar beta.
3995      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3996      */
CHERK(@plo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C)3997     public void CHERK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) {
3998         validateUplo(Uplo);
3999         validateHERK(Element.F32_2(mRS), Trans, A, C);
4000         int k = 0;
4001         if (Trans == CONJ_TRANSPOSE) {
4002             k = A.getType().getY();
4003         } else {
4004             k = A.getType().getX();
4005         }
4006 
4007         boolean mUseIncSupp = isIncSupp();
4008         long aID = A.getID(mRS);
4009         long cID = C.getID(mRS);
4010         if (mUseIncSupp) {
4011             aID = getDummyAlloc(A);
4012             cID = getDummyAlloc(C);
4013         }
4014         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k,
4015                                          alpha, 0, aID, 0, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp);
4016     }
4017 
4018     /**
4019      * ZHERK performs one of the hermitian rank k operations
4020      * C := alpha*A*A**H + beta*C   or   C := alpha*A**H*A + beta*C
4021      *
4022      * Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html
4023      *
4024      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
4025      * @param Trans The type of transpose applied to the operation.
4026      * @param alpha The scalar alpha.
4027      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
4028      * @param beta The scalar beta.
4029      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
4030      */
ZHERK(@plo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C)4031     public void ZHERK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) {
4032         validateUplo(Uplo);
4033         validateHERK(Element.F64_2(mRS), Trans, A, C);
4034         int k = 0;
4035         if (Trans == CONJ_TRANSPOSE) {
4036             k = A.getType().getY();
4037         } else {
4038             k = A.getType().getX();
4039         }
4040 
4041         boolean mUseIncSupp = isIncSupp();
4042         long aID = A.getID(mRS);
4043         long cID = C.getID(mRS);
4044         if (mUseIncSupp) {
4045             aID = getDummyAlloc(A);
4046             cID = getDummyAlloc(C);
4047         }
4048         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k,
4049                                    alpha, 0, aID, 0, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp);
4050     }
4051 
validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C)4052     static void validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) {
4053         if (!A.getType().getElement().isCompatible(e) ||
4054             !B.getType().getElement().isCompatible(e) ||
4055             !C.getType().getElement().isCompatible(e)) {
4056             throw new RSRuntimeException("Called BLAS with wrong Element type");
4057         }
4058         validateConjTranspose(Trans);
4059         int cdim = C.getType().getX();
4060         if (cdim != C.getType().getY()) {
4061             throw new RSRuntimeException("Called HER2K with non-square C");
4062         }
4063         if (Trans == NO_TRANSPOSE) {
4064             if (A.getType().getY() != cdim) {
4065                 throw new RSRuntimeException("Called HER2K with invalid matrices");
4066             }
4067         } else {
4068             if (A.getType().getX() != cdim) {
4069                 throw new RSRuntimeException("Called HER2K with invalid matrices");
4070             }
4071         }
4072         if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {
4073             throw new RSRuntimeException("Called HER2K with invalid A and B matrices");
4074         }
4075     }
4076 
4077     /**
4078      * CHER2K performs one of the hermitian rank 2k operations
4079      * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C   or   C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
4080      *
4081      * Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html
4082      *
4083      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
4084      * @param Trans The type of transpose applied to the operation.
4085      * @param alpha The scalar alpha.
4086      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
4087      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
4088      * @param beta The scalar beta.
4089      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
4090      */
CHER2K(@plo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C)4091     public void CHER2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C) {
4092         validateUplo(Uplo);
4093         validateHER2K(Element.F32_2(mRS), Trans, A, B, C);
4094         int k = 0;
4095         if (Trans == NO_TRANSPOSE) {
4096             k = A.getType().getX();
4097         } else {
4098             k = A.getType().getY();
4099         }
4100 
4101         boolean mUseIncSupp = isIncSupp();
4102         long aID = A.getID(mRS);
4103         long bID = B.getID(mRS);
4104         long cID = C.getID(mRS);
4105         if (mUseIncSupp) {
4106             aID = getDummyAlloc(A);
4107             bID = getDummyAlloc(B);
4108             cID = getDummyAlloc(C);
4109         }
4110         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y,
4111                                          A.getID(mRS), bID, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp);
4112     }
4113 
4114     /**
4115      * ZHER2K performs one of the hermitian rank 2k operations
4116      * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C   or   C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
4117      *
4118      * Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html
4119      *
4120      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
4121      * @param Trans The type of transpose applied to the operation.
4122      * @param alpha The scalar alpha.
4123      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
4124      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
4125      * @param beta The scalar beta.
4126      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
4127      */
ZHER2K(@plo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C)4128     public void ZHER2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C) {
4129         validateUplo(Uplo);
4130         validateHER2K(Element.F64_2(mRS), Trans, A, B, C);
4131         int k = 0;
4132         if (Trans == NO_TRANSPOSE) {
4133             k = A.getType().getX();
4134         } else {
4135             k = A.getType().getY();
4136         }
4137 
4138         boolean mUseIncSupp = isIncSupp();
4139         long aID = A.getID(mRS);
4140         long bID = B.getID(mRS);
4141         long cID = C.getID(mRS);
4142         if (mUseIncSupp) {
4143             aID = getDummyAlloc(A);
4144             bID = getDummyAlloc(B);
4145             cID = getDummyAlloc(C);
4146         }
4147         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y,
4148                                    A.getID(mRS), bID, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp);
4149     }
4150 
4151 
4152     /**
4153      * 8-bit GEMM-like operation for neural networks: C = A * Transpose(B)
4154      * Calculations are done in 1.10.21 fixed-point format for the final output,
4155      * just before there's a shift down to drop the fractional parts. The output
4156      * values are gated to 0 to 255 to fit in a byte, but the 10-bit format
4157      * gives some headroom to avoid wrapping around on small overflows.
4158      *
4159      * @param A The input allocation contains matrix A, supported elements type {@link Element#U8}.
4160      * @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset. Value should be from 0 to 255.
4161      * @param B The input allocation contains matrix B, supported elements type {@link Element#U8}.
4162      * @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset. Value should be from 0 to 255.
4163      * @param C The input allocation contains matrix C, supported elements type {@link Element#U8}.
4164      * @param c_offset The offset for all values in matrix C.
4165      * @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult.
4166      **/
BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult)4167     public void BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult) {
4168         validateL3(Element.U8(mRS), NO_TRANSPOSE, TRANSPOSE, 0, A, B, C);
4169 
4170         if (a_offset < 0 || a_offset > 255) {
4171             throw new RSRuntimeException("Invalid a_offset passed to BNNM");
4172         }
4173         if (b_offset < 0 || b_offset > 255) {
4174             throw new RSRuntimeException("Invalid b_offset passed to BNNM");
4175         }
4176         int M = -1, N = -1, K = -1;
4177         M = A.getType().getY();
4178         N = B.getType().getY();
4179         K = A.getType().getX();
4180 
4181         boolean mUseIncSupp = isIncSupp();
4182         long aID = A.getID(mRS);
4183         long bID = B.getID(mRS);
4184         long cID = C.getID(mRS);
4185         if (mUseIncSupp) {
4186             aID = getDummyAlloc(A);
4187             bID = getDummyAlloc(B);
4188             cID = getDummyAlloc(C);
4189         }
4190         mRS.nScriptIntrinsicBLAS_BNNM(getID(mRS), M, N, K, aID, a_offset, bID, b_offset, cID, c_offset, c_mult, mUseIncSupp);
4191 
4192     }
4193 
4194 }
4195