• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_RSCPPSTRUCTS_H
18 #define ANDROID_RSCPPSTRUCTS_H
19 
20 #include "rsDefines.h"
21 #include "util/RefBase.h"
22 
23 #include <pthread.h>
24 
25 
26 /**
27  * Every row in an RS allocation is guaranteed to be aligned by this amount, and
28  * every row in a user-backed allocation must be aligned by this amount.
29  */
30 #define RS_CPU_ALLOCATION_ALIGNMENT 16
31 
32 struct dispatchTable;
33 
34 namespace android {
35 class Surface;
36 
37 namespace RSC {
38 
39 
40 typedef void (*ErrorHandlerFunc_t)(uint32_t errorNum, const char *errorText);
41 typedef void (*MessageHandlerFunc_t)(uint32_t msgNum, const void *msgData, size_t msgLen);
42 
43 class RS;
44 class BaseObj;
45 class Element;
46 class Type;
47 class Allocation;
48 class Script;
49 class ScriptC;
50 class Sampler;
51 
52 /**
53  * Possible error codes used by RenderScript. Once a status other than RS_SUCCESS
54  * is returned, the RenderScript context is considered dead and cannot perform any
55  * additional work.
56  */
57  enum RSError {
58      RS_SUCCESS = 0,                 ///< No error
59      RS_ERROR_INVALID_PARAMETER = 1, ///< An invalid parameter was passed to a function
60      RS_ERROR_RUNTIME_ERROR = 2,     ///< The RenderScript driver returned an error; this is
61                                      ///< often indicative of a kernel that crashed
62      RS_ERROR_INVALID_ELEMENT = 3,   ///< An invalid Element was passed to a function
63      RS_ERROR_MAX = 9999
64 
65  };
66 
67  /**
68   * Flags that can control RenderScript behavior on a per-context level.
69   */
70  enum RSInitFlags {
71      RS_INIT_SYNCHRONOUS = 1, ///< All RenderScript calls will be synchronous. May reduce latency.
72      RS_INIT_LOW_LATENCY = 2, ///< Prefer low latency devices over potentially higher throughput devices.
73      // Bitflag 4 is reserved for the context flag low power
74      RS_INIT_WAIT_FOR_ATTACH = 8,   ///< Kernel execution will hold to give time for a debugger to be attached
75      RS_INIT_MAX = 16
76  };
77 
78 
79 class Byte2 {
80  public:
81   int8_t x, y;
82 
Byte2(int8_t initX,int8_t initY)83   Byte2(int8_t initX, int8_t initY)
84     : x(initX), y(initY) {}
Byte2()85   Byte2() : x(0), y(0) {}
86 };
87 
88 class Byte3 {
89  public:
90   int8_t x, y, z;
91 
Byte3(int8_t initX,int8_t initY,int8_t initZ)92   Byte3(int8_t initX, int8_t initY, int8_t initZ)
93     : x(initX), y(initY), z(initZ) {}
Byte3()94   Byte3() : x(0), y(0), z(0) {}
95 };
96 
97 class Byte4 {
98  public:
99   int8_t x, y, z, w;
100 
Byte4(int8_t initX,int8_t initY,int8_t initZ,int8_t initW)101   Byte4(int8_t initX, int8_t initY, int8_t initZ, int8_t initW)
102     : x(initX), y(initY), z(initZ), w(initW) {}
Byte4()103   Byte4() : x(0), y(0), z(0), w(0) {}
104 };
105 
106 class UByte2 {
107  public:
108   uint8_t x, y;
109 
UByte2(uint8_t initX,uint8_t initY)110   UByte2(uint8_t initX, uint8_t initY)
111     : x(initX), y(initY) {}
UByte2()112   UByte2() : x(0), y(0) {}
113 };
114 
115 class UByte3 {
116  public:
117   uint8_t x, y, z;
118 
UByte3(uint8_t initX,uint8_t initY,uint8_t initZ)119   UByte3(uint8_t initX, uint8_t initY, uint8_t initZ)
120     : x(initX), y(initY), z(initZ) {}
UByte3()121   UByte3() : x(0), y(0), z(0) {}
122 };
123 
124 class UByte4 {
125  public:
126   uint8_t x, y, z, w;
127 
UByte4(uint8_t initX,uint8_t initY,uint8_t initZ,uint8_t initW)128   UByte4(uint8_t initX, uint8_t initY, uint8_t initZ, uint8_t initW)
129     : x(initX), y(initY), z(initZ), w(initW) {}
UByte4()130   UByte4() : x(0), y(0), z(0), w(0) {}
131 };
132 
133 class Short2 {
134  public:
135   short x, y;
136 
Short2(short initX,short initY)137   Short2(short initX, short initY)
138     : x(initX), y(initY) {}
Short2()139   Short2() : x(0), y(0) {}
140 };
141 
142 class Short3 {
143  public:
144   short x, y, z;
145 
Short3(short initX,short initY,short initZ)146   Short3(short initX, short initY, short initZ)
147     : x(initX), y(initY), z(initZ) {}
Short3()148   Short3() : x(0), y(0), z(0) {}
149 };
150 
151 class Short4 {
152  public:
153   short x, y, z, w;
154 
Short4(short initX,short initY,short initZ,short initW)155   Short4(short initX, short initY, short initZ, short initW)
156     : x(initX), y(initY), z(initZ), w(initW) {}
Short4()157   Short4() : x(0), y(0), z(0), w(0) {}
158 };
159 
160 class UShort2 {
161  public:
162   uint16_t x, y;
163 
UShort2(uint16_t initX,uint16_t initY)164   UShort2(uint16_t initX, uint16_t initY)
165     : x(initX), y(initY) {}
UShort2()166   UShort2() : x(0), y(0) {}
167 };
168 
169 class UShort3 {
170  public:
171   uint16_t x, y, z;
172 
UShort3(uint16_t initX,uint16_t initY,uint16_t initZ)173   UShort3(uint16_t initX, uint16_t initY, uint16_t initZ)
174     : x(initX), y(initY), z(initZ) {}
UShort3()175   UShort3() : x(0), y(0), z(0) {}
176 };
177 
178 class UShort4 {
179  public:
180   uint16_t x, y, z, w;
181 
UShort4(uint16_t initX,uint16_t initY,uint16_t initZ,uint16_t initW)182   UShort4(uint16_t initX, uint16_t initY, uint16_t initZ, uint16_t initW)
183     : x(initX), y(initY), z(initZ), w(initW) {}
UShort4()184   UShort4() : x(0), y(0), z(0), w(0) {}
185 };
186 
187 class Int2 {
188  public:
189   int x, y;
190 
Int2(int initX,int initY)191   Int2(int initX, int initY)
192     : x(initX), y(initY) {}
Int2()193   Int2() : x(0), y(0) {}
194 };
195 
196 class Int3 {
197  public:
198   int x, y, z;
199 
Int3(int initX,int initY,int initZ)200   Int3(int initX, int initY, int initZ)
201     : x(initX), y(initY), z(initZ) {}
Int3()202   Int3() : x(0), y(0), z(0) {}
203 };
204 
205 class Int4 {
206  public:
207   int x, y, z, w;
208 
Int4(int initX,int initY,int initZ,int initW)209   Int4(int initX, int initY, int initZ, int initW)
210     : x(initX), y(initY), z(initZ), w(initW) {}
Int4()211   Int4() : x(0), y(0), z(0), w(0) {}
212 };
213 
214 class UInt2 {
215  public:
216   uint32_t x, y;
217 
UInt2(uint32_t initX,uint32_t initY)218   UInt2(uint32_t initX, uint32_t initY)
219     : x(initX), y(initY) {}
UInt2()220   UInt2() : x(0), y(0) {}
221 };
222 
223 class UInt3 {
224  public:
225   uint32_t x, y, z;
226 
UInt3(uint32_t initX,uint32_t initY,uint32_t initZ)227   UInt3(uint32_t initX, uint32_t initY, uint32_t initZ)
228     : x(initX), y(initY), z(initZ) {}
UInt3()229   UInt3() : x(0), y(0), z(0) {}
230 };
231 
232 class UInt4 {
233  public:
234   uint32_t x, y, z, w;
235 
UInt4(uint32_t initX,uint32_t initY,uint32_t initZ,uint32_t initW)236   UInt4(uint32_t initX, uint32_t initY, uint32_t initZ, uint32_t initW)
237     : x(initX), y(initY), z(initZ), w(initW) {}
UInt4()238   UInt4() : x(0), y(0), z(0), w(0) {}
239 };
240 
241 class Long2 {
242  public:
243   int64_t x, y;
244 
Long2(int64_t initX,int64_t initY)245   Long2(int64_t initX, int64_t initY)
246     : x(initX), y(initY) {}
Long2()247   Long2() : x(0), y(0) {}
248 };
249 
250 class Long3 {
251  public:
252   int64_t x, y, z;
253 
Long3(int64_t initX,int64_t initY,int64_t initZ)254   Long3(int64_t initX, int64_t initY, int64_t initZ)
255     : x(initX), y(initY), z(initZ) {}
Long3()256   Long3() : x(0), y(0), z(0) {}
257 };
258 
259 class Long4 {
260  public:
261   int64_t x, y, z, w;
262 
Long4(int64_t initX,int64_t initY,int64_t initZ,int64_t initW)263   Long4(int64_t initX, int64_t initY, int64_t initZ, int64_t initW)
264     : x(initX), y(initY), z(initZ), w(initW) {}
Long4()265   Long4() : x(0), y(0), z(0), w(0) {}
266 };
267 
268 class ULong2 {
269  public:
270   uint64_t x, y;
271 
ULong2(uint64_t initX,uint64_t initY)272   ULong2(uint64_t initX, uint64_t initY)
273     : x(initX), y(initY) {}
ULong2()274   ULong2() : x(0), y(0) {}
275 };
276 
277 class ULong3 {
278  public:
279   uint64_t x, y, z;
280 
ULong3(uint64_t initX,uint64_t initY,uint64_t initZ)281   ULong3(uint64_t initX, uint64_t initY, uint64_t initZ)
282     : x(initX), y(initY), z(initZ) {}
ULong3()283   ULong3() : x(0), y(0), z(0) {}
284 };
285 
286 class ULong4 {
287  public:
288   uint64_t x, y, z, w;
289 
ULong4(uint64_t initX,uint64_t initY,uint64_t initZ,uint64_t initW)290   ULong4(uint64_t initX, uint64_t initY, uint64_t initZ, uint64_t initW)
291     : x(initX), y(initY), z(initZ), w(initW) {}
ULong4()292   ULong4() : x(0), y(0), z(0), w(0) {}
293 };
294 
295 class Float2 {
296  public:
297   float x, y;
298 
Float2(float initX,float initY)299   Float2(float initX, float initY)
300     : x(initX), y(initY) {}
Float2()301   Float2() : x(0), y(0) {}
302 };
303 
304 class Float3 {
305  public:
306   float x, y, z;
307 
Float3(float initX,float initY,float initZ)308   Float3(float initX, float initY, float initZ)
309     : x(initX), y(initY), z(initZ) {}
Float3()310   Float3() : x(0.f), y(0.f), z(0.f) {}
311 };
312 
313 class Float4 {
314  public:
315   float x, y, z, w;
316 
Float4(float initX,float initY,float initZ,float initW)317   Float4(float initX, float initY, float initZ, float initW)
318     : x(initX), y(initY), z(initZ), w(initW) {}
Float4()319   Float4() : x(0.f), y(0.f), z(0.f), w(0.f) {}
320 };
321 
322 class Double2 {
323  public:
324   double x, y;
325 
Double2(double initX,double initY)326   Double2(double initX, double initY)
327     : x(initX), y(initY) {}
Double2()328   Double2() : x(0), y(0) {}
329 };
330 
331 class Double3 {
332  public:
333   double x, y, z;
334 
Double3(double initX,double initY,double initZ)335   Double3(double initX, double initY, double initZ)
336     : x(initX), y(initY), z(initZ) {}
Double3()337   Double3() : x(0), y(0), z(0) {}
338 };
339 
340 class Double4 {
341  public:
342   double x, y, z, w;
343 
Double4(double initX,double initY,double initZ,double initW)344   Double4(double initX, double initY, double initZ, double initW)
345     : x(initX), y(initY), z(initZ), w(initW) {}
Double4()346   Double4() : x(0), y(0), z(0), w(0) {}
347 };
348 
349  /**
350   * The RenderScript context. This class controls initialization, resource management, and teardown.
351   */
352  class RS : public android::RSC::LightRefBase<RS> {
353 
354  public:
355     RS();
356     virtual ~RS();
357 
358     /**
359      * Initializes a RenderScript context. A context must be initialized before it can be used.
360      * @param[in] name Directory name to be used by this context. This should be equivalent to
361      * Context.getCacheDir().
362      * @param[in] flags Optional flags for this context.
363      * @return true on success
364      */
365     bool init(const char * name, uint32_t flags = 0);
366 
367     /**
368      * Initializes a RenderScript context. A context must be initialized before it can be used.
369      * @param[in] name Directory name to be used by this context. This should be equivalent to
370      * Context.getCacheDir().
371      * @param[in] flags Flags for this context.
372      * @param[in] targetApi Target RS API level.
373      * @return true on success
374      */
375     bool init(const char * name, uint32_t flags, int targetApi);
376 
377     /**
378      * Sets the error handler function for this context. This error handler is
379      * called whenever an error is set.
380      *
381      * @param[in] func Error handler function
382      */
383     void setErrorHandler(ErrorHandlerFunc_t func);
384 
385     /**
386      * Returns the current error handler function for this context.
387      *
388      * @return pointer to current error handler function or NULL if not set
389      */
getErrorHandler()390     ErrorHandlerFunc_t getErrorHandler() { return mErrorFunc; }
391 
392     /**
393      * Sets the message handler function for this context. This message handler
394      * is called whenever a message is sent from a RenderScript kernel.
395      *
396      *  @param[in] func Message handler function
397      */
398     void setMessageHandler(MessageHandlerFunc_t func);
399 
400     /**
401      * Returns the current message handler function for this context.
402      *
403      * @return pointer to current message handler function or NULL if not set
404      */
getMessageHandler()405     MessageHandlerFunc_t getMessageHandler() { return mMessageFunc; }
406 
407     /**
408      * Returns current status for the context.
409      *
410      * @return current error
411      */
412     RSError getError();
413 
414     /**
415      * Waits for any currently running asynchronous operations to finish. This
416      * should only be used for performance testing and timing.
417      */
418     void finish();
419 
getContext()420     RsContext getContext() { return mContext; }
421     void throwError(RSError error, const char *errMsg);
422 
423     static dispatchTable* dispatch;
424 
425  private:
426     static bool usingNative;
427     static bool initDispatch(int targetApi);
428 
429     static void * threadProc(void *);
430 
431     static bool gInitialized;
432     static pthread_mutex_t gInitMutex;
433 
434     pthread_t mMessageThreadId;
435     pid_t mNativeMessageThreadId;
436     bool mMessageRun;
437 
438     RsContext mContext;
439     RSError mCurrentError;
440 
441     ErrorHandlerFunc_t mErrorFunc;
442     MessageHandlerFunc_t mMessageFunc;
443     bool mInit;
444 
445     char mCacheDir[PATH_MAX+1];
446     uint32_t mCacheDirLen;
447 
448     struct {
449         sp<const Element> U8;
450         sp<const Element> U8_2;
451         sp<const Element> U8_3;
452         sp<const Element> U8_4;
453         sp<const Element> I8;
454         sp<const Element> I8_2;
455         sp<const Element> I8_3;
456         sp<const Element> I8_4;
457         sp<const Element> U16;
458         sp<const Element> U16_2;
459         sp<const Element> U16_3;
460         sp<const Element> U16_4;
461         sp<const Element> I16;
462         sp<const Element> I16_2;
463         sp<const Element> I16_3;
464         sp<const Element> I16_4;
465         sp<const Element> U32;
466         sp<const Element> U32_2;
467         sp<const Element> U32_3;
468         sp<const Element> U32_4;
469         sp<const Element> I32;
470         sp<const Element> I32_2;
471         sp<const Element> I32_3;
472         sp<const Element> I32_4;
473         sp<const Element> U64;
474         sp<const Element> U64_2;
475         sp<const Element> U64_3;
476         sp<const Element> U64_4;
477         sp<const Element> I64;
478         sp<const Element> I64_2;
479         sp<const Element> I64_3;
480         sp<const Element> I64_4;
481         sp<const Element> F16;
482         sp<const Element> F16_2;
483         sp<const Element> F16_3;
484         sp<const Element> F16_4;
485         sp<const Element> F32;
486         sp<const Element> F32_2;
487         sp<const Element> F32_3;
488         sp<const Element> F32_4;
489         sp<const Element> F64;
490         sp<const Element> F64_2;
491         sp<const Element> F64_3;
492         sp<const Element> F64_4;
493         sp<const Element> BOOLEAN;
494 
495         sp<const Element> ELEMENT;
496         sp<const Element> TYPE;
497         sp<const Element> ALLOCATION;
498         sp<const Element> SAMPLER;
499         sp<const Element> SCRIPT;
500         sp<const Element> MESH;
501         sp<const Element> PROGRAM_FRAGMENT;
502         sp<const Element> PROGRAM_VERTEX;
503         sp<const Element> PROGRAM_RASTER;
504         sp<const Element> PROGRAM_STORE;
505 
506         sp<const Element> A_8;
507         sp<const Element> RGB_565;
508         sp<const Element> RGB_888;
509         sp<const Element> RGBA_5551;
510         sp<const Element> RGBA_4444;
511         sp<const Element> RGBA_8888;
512 
513         sp<const Element> YUV;
514 
515         sp<const Element> MATRIX_4X4;
516         sp<const Element> MATRIX_3X3;
517         sp<const Element> MATRIX_2X2;
518     } mElements;
519 
520     struct {
521         sp<const Sampler> CLAMP_NEAREST;
522         sp<const Sampler> CLAMP_LINEAR;
523         sp<const Sampler> CLAMP_LINEAR_MIP_LINEAR;
524         sp<const Sampler> WRAP_NEAREST;
525         sp<const Sampler> WRAP_LINEAR;
526         sp<const Sampler> WRAP_LINEAR_MIP_LINEAR;
527         sp<const Sampler> MIRRORED_REPEAT_NEAREST;
528         sp<const Sampler> MIRRORED_REPEAT_LINEAR;
529         sp<const Sampler> MIRRORED_REPEAT_LINEAR_MIP_LINEAR;
530     } mSamplers;
531     friend class Sampler;
532     friend class Element;
533     friend class ScriptC;
534 };
535 
536  /**
537   * Base class for all RenderScript objects. Not for direct use by developers.
538   */
539 class BaseObj : public android::RSC::LightRefBase<BaseObj> {
540 public:
541     void * getID() const;
542     virtual ~BaseObj();
543     virtual void updateFromNative();
544     virtual bool equals(sp<const BaseObj> obj);
545 
546 protected:
547     void *mID;
548     RS* mRS;
549     const char * mName;
550 
551     BaseObj(void *id, sp<RS> rs);
552     void checkValid();
553 
554     static void * getObjID(sp<const BaseObj> o);
555 
556 };
557 
558  /**
559   * This class provides the primary method through which data is passed to and
560   * from RenderScript kernels. An Allocation provides the backing store for a
561   * given Type.
562   *
563   * An Allocation also contains a set of usage flags that denote how the
564   * Allocation could be used. For example, an Allocation may have usage flags
565   * specifying that it can be used from a script as well as input to a
566   * Sampler. A developer must synchronize across these different usages using
567   * syncAll(int) in order to ensure that different users of the Allocation have
568   * a consistent view of memory. For example, in the case where an Allocation is
569   * used as the output of one kernel and as Sampler input in a later kernel, a
570   * developer must call syncAll(RS_ALLOCATION_USAGE_SCRIPT) prior to launching the
571   * second kernel to ensure correctness.
572   */
573 class Allocation : public BaseObj {
574 protected:
575     sp<const Type> mType;
576     uint32_t mUsage;
577     sp<Allocation> mAdaptedAllocation;
578 
579     bool mConstrainedLOD;
580     bool mConstrainedFace;
581     bool mConstrainedY;
582     bool mConstrainedZ;
583     bool mReadAllowed;
584     bool mWriteAllowed;
585     bool mAutoPadding;
586     uint32_t mSelectedY;
587     uint32_t mSelectedZ;
588     uint32_t mSelectedLOD;
589     RsAllocationCubemapFace mSelectedFace;
590 
591     uint32_t mCurrentDimX;
592     uint32_t mCurrentDimY;
593     uint32_t mCurrentDimZ;
594     uint32_t mCurrentCount;
595 
596     void * getIDSafe() const;
597     void updateCacheInfo(sp<const Type> t);
598 
599     Allocation(void *id, sp<RS> rs, sp<const Type> t, uint32_t usage);
600 
601     void validateIsInt64();
602     void validateIsInt32();
603     void validateIsInt16();
604     void validateIsInt8();
605     void validateIsFloat32();
606     void validateIsFloat64();
607     void validateIsObject();
608 
609     virtual void updateFromNative();
610 
611     void validate2DRange(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h);
612     void validate3DRange(uint32_t xoff, uint32_t yoff, uint32_t zoff,
613                          uint32_t w, uint32_t h, uint32_t d);
614 
615 public:
616 
617     /**
618      * Return Type for the allocation.
619      * @return pointer to underlying Type
620      */
getType()621     sp<const Type> getType() const {
622         return mType;
623     }
624 
625     /**
626      * Enable/Disable AutoPadding for Vec3 elements.
627      *
628      * @param useAutoPadding True: enable AutoPadding; flase: disable AutoPadding
629      *
630      */
setAutoPadding(bool useAutoPadding)631     void setAutoPadding(bool useAutoPadding) {
632         mAutoPadding = useAutoPadding;
633     }
634 
635     /**
636      * Propagate changes from one usage of the Allocation to other usages of the Allocation.
637      * @param[in] srcLocation source location with changes to propagate elsewhere
638      */
639     void syncAll(RsAllocationUsageType srcLocation);
640 
641     /**
642      * Send a buffer to the output stream.  The contents of the Allocation will
643      * be undefined after this operation. This operation is only valid if
644      * USAGE_IO_OUTPUT is set on the Allocation.
645      */
646     void ioSendOutput();
647 
648     /**
649      * Receive the latest input into the Allocation. This operation
650      * is only valid if USAGE_IO_INPUT is set on the Allocation.
651      */
652     void ioGetInput();
653 
654 #if !defined(RS_SERVER) && !defined(RS_COMPATIBILITY_LIB)
655     /**
656      * Returns the handle to a raw buffer that is being managed by the screen
657      * compositor. This operation is only valid for Allocations with USAGE_IO_INPUT.
658      * @return Surface associated with allocation
659      */
660     sp<Surface> getSurface();
661 
662     /**
663      * Associate a Surface with this Allocation. This
664      * operation is only valid for Allocations with USAGE_IO_OUTPUT.
665      * @param[in] s Surface to associate with allocation
666      */
667     void setSurface(sp<Surface> s);
668 #endif
669 
670     /**
671      * Generate a mipmap chain. This is only valid if the Type of the Allocation
672      * includes mipmaps. This function will generate a complete set of mipmaps
673      * from the top level LOD and place them into the script memory space. If
674      * the Allocation is also using other memory spaces, a call to
675      * syncAll(Allocation.USAGE_SCRIPT) is required.
676      */
677     void generateMipmaps();
678 
679     /**
680      * Copy an array into part of this Allocation.
681      * @param[in] off offset of first Element to be overwritten
682      * @param[in] count number of Elements to copy
683      * @param[in] data array from which to copy
684      */
685     void copy1DRangeFrom(uint32_t off, size_t count, const void *data);
686 
687     /**
688      * Copy part of an Allocation into part of this Allocation.
689      * @param[in] off offset of first Element to be overwritten
690      * @param[in] count number of Elements to copy
691      * @param[in] data Allocation from which to copy
692      * @param[in] dataOff offset of first Element in data to copy
693      */
694     void copy1DRangeFrom(uint32_t off, size_t count, sp<const Allocation> data, uint32_t dataOff);
695 
696     /**
697      * Copy an array into part of this Allocation.
698      * @param[in] off offset of first Element to be overwritten
699      * @param[in] count number of Elements to copy
700      * @param[in] data array from which to copy
701      */
702     void copy1DRangeTo(uint32_t off, size_t count, void *data);
703 
704     /**
705      * Copy entire array to an Allocation.
706      * @param[in] data array from which to copy
707      */
708     void copy1DFrom(const void* data);
709 
710     /**
711      * Copy entire Allocation to an array.
712      * @param[in] data destination array
713      */
714     void copy1DTo(void* data);
715 
716     /**
717      * Copy from an array into a rectangular region in this Allocation. The
718      * array is assumed to be tightly packed.
719      * @param[in] xoff X offset of region to update in this Allocation
720      * @param[in] yoff Y offset of region to update in this Allocation
721      * @param[in] w Width of region to update
722      * @param[in] h Height of region to update
723      * @param[in] data Array from which to copy
724      */
725     void copy2DRangeFrom(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
726                          const void *data);
727 
728     /**
729      * Copy from this Allocation into a rectangular region in an array. The
730      * array is assumed to be tightly packed.
731      * @param[in] xoff X offset of region to copy from this Allocation
732      * @param[in] yoff Y offset of region to copy from this Allocation
733      * @param[in] w Width of region to update
734      * @param[in] h Height of region to update
735      * @param[in] data destination array
736      */
737     void copy2DRangeTo(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
738                        void *data);
739 
740     /**
741      * Copy from an Allocation into a rectangular region in this Allocation.
742      * @param[in] xoff X offset of region to update in this Allocation
743      * @param[in] yoff Y offset of region to update in this Allocation
744      * @param[in] w Width of region to update
745      * @param[in] h Height of region to update
746      * @param[in] data Allocation from which to copy
747      * @param[in] dataXoff X offset of region to copy from in data
748      * @param[in] dataYoff Y offset of region to copy from in data
749      */
750     void copy2DRangeFrom(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
751                          sp<const Allocation> data, uint32_t dataXoff, uint32_t dataYoff);
752 
753     /**
754      * Copy from a strided array into a rectangular region in this Allocation.
755      * @param[in] xoff X offset of region to update in this Allocation
756      * @param[in] yoff Y offset of region to update in this Allocation
757      * @param[in] w Width of region to update
758      * @param[in] h Height of region to update
759      * @param[in] data array from which to copy
760      * @param[in] stride stride of data in bytes
761      */
762     void copy2DStridedFrom(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
763                            const void *data, size_t stride);
764 
765     /**
766      * Copy from a strided array into this Allocation.
767      * @param[in] data array from which to copy
768      * @param[in] stride stride of data in bytes
769      */
770     void copy2DStridedFrom(const void *data, size_t stride);
771 
772     /**
773      * Copy from a rectangular region in this Allocation into a strided array.
774      * @param[in] xoff X offset of region to update in this Allocation
775      * @param[in] yoff Y offset of region to update in this Allocation
776      * @param[in] w Width of region to update
777      * @param[in] h Height of region to update
778      * @param[in] data destination array
779      * @param[in] stride stride of data in bytes
780      */
781     void copy2DStridedTo(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
782                          void *data, size_t stride);
783 
784     /**
785      * Copy this Allocation into a strided array.
786      * @param[in] data destination array
787      * @param[in] stride stride of data in bytes
788      */
789     void copy2DStridedTo(void *data, size_t stride);
790 
791 
792     /**
793      * Copy from an array into a 3D region in this Allocation. The
794      * array is assumed to be tightly packed.
795      * @param[in] xoff X offset of region to update in this Allocation
796      * @param[in] yoff Y offset of region to update in this Allocation
797      * @param[in] zoff Z offset of region to update in this Allocation
798      * @param[in] w Width of region to update
799      * @param[in] h Height of region to update
800      * @param[in] d Depth of region to update
801      * @param[in] data Array from which to copy
802      */
803     void copy3DRangeFrom(uint32_t xoff, uint32_t yoff, uint32_t zoff, uint32_t w,
804                          uint32_t h, uint32_t d, const void* data);
805 
806     /**
807      * Copy from an Allocation into a 3D region in this Allocation.
808      * @param[in] xoff X offset of region to update in this Allocation
809      * @param[in] yoff Y offset of region to update in this Allocation
810      * @param[in] zoff Z offset of region to update in this Allocation
811      * @param[in] w Width of region to update
812      * @param[in] h Height of region to update
813      * @param[in] d Depth of region to update
814      * @param[in] data Allocation from which to copy
815      * @param[in] dataXoff X offset of region in data to copy from
816      * @param[in] dataYoff Y offset of region in data to copy from
817      * @param[in] dataZoff Z offset of region in data to copy from
818      */
819     void copy3DRangeFrom(uint32_t xoff, uint32_t yoff, uint32_t zoff,
820                          uint32_t w, uint32_t h, uint32_t d,
821                          sp<const Allocation> data,
822                          uint32_t dataXoff, uint32_t dataYoff, uint32_t dataZoff);
823 
824     /**
825      * Copy a 3D region in this Allocation into an array. The
826      * array is assumed to be tightly packed.
827      * @param[in] xoff X offset of region to update in this Allocation
828      * @param[in] yoff Y offset of region to update in this Allocation
829      * @param[in] zoff Z offset of region to update in this Allocation
830      * @param[in] w Width of region to update
831      * @param[in] h Height of region to update
832      * @param[in] d Depth of region to update
833      * @param[in] data Array from which to copy
834      */
835     void copy3DRangeTo(uint32_t xoff, uint32_t yoff, uint32_t zoff, uint32_t w,
836                          uint32_t h, uint32_t d, void* data);
837 
838     /**
839      * Creates an Allocation for use by scripts with a given Type.
840      * @param[in] rs Context to which the Allocation will belong
841      * @param[in] type Type of the Allocation
842      * @param[in] mipmaps desired mipmap behavior for the Allocation
843      * @param[in] usage usage for the Allocation
844      * @return new Allocation
845      */
846     static sp<Allocation> createTyped(sp<RS> rs, sp<const Type> type,
847                                    RsAllocationMipmapControl mipmaps, uint32_t usage);
848 
849     /**
850      * Creates an Allocation for use by scripts with a given Type and a backing pointer. For use
851      * with RS_ALLOCATION_USAGE_SHARED.
852      * @param[in] rs Context to which the Allocation will belong
853      * @param[in] type Type of the Allocation
854      * @param[in] mipmaps desired mipmap behavior for the Allocation
855      * @param[in] usage usage for the Allocation
856      * @param[in] pointer existing backing store to use for this Allocation if possible
857      * @return new Allocation
858      */
859     static sp<Allocation> createTyped(sp<RS> rs, sp<const Type> type,
860                                    RsAllocationMipmapControl mipmaps, uint32_t usage, void * pointer);
861 
862     /**
863      * Creates an Allocation for use by scripts with a given Type with no mipmaps.
864      * @param[in] rs Context to which the Allocation will belong
865      * @param[in] type Type of the Allocation
866      * @param[in] usage usage for the Allocation
867      * @return new Allocation
868      */
869     static sp<Allocation> createTyped(sp<RS> rs, sp<const Type> type,
870                                    uint32_t usage = RS_ALLOCATION_USAGE_SCRIPT);
871     /**
872      * Creates an Allocation with a specified number of given elements.
873      * @param[in] rs Context to which the Allocation will belong
874      * @param[in] e Element used in the Allocation
875      * @param[in] count Number of elements of the Allocation
876      * @param[in] usage usage for the Allocation
877      * @return new Allocation
878      */
879     static sp<Allocation> createSized(sp<RS> rs, sp<const Element> e, size_t count,
880                                    uint32_t usage = RS_ALLOCATION_USAGE_SCRIPT);
881 
882     /**
883      * Creates a 2D Allocation with a specified number of given elements.
884      * @param[in] rs Context to which the Allocation will belong
885      * @param[in] e Element used in the Allocation
886      * @param[in] x Width in Elements of the Allocation
887      * @param[in] y Height of the Allocation
888      * @param[in] usage usage for the Allocation
889      * @return new Allocation
890      */
891     static sp<Allocation> createSized2D(sp<RS> rs, sp<const Element> e,
892                                         size_t x, size_t y,
893                                         uint32_t usage = RS_ALLOCATION_USAGE_SCRIPT);
894 
895 
896     /**
897      * Get the backing pointer for a USAGE_SHARED allocation.
898      * @param[in] stride optional parameter. when non-NULL, will contain
899      *   stride in bytes of a 2D Allocation
900      * @return pointer to data
901      */
902     void * getPointer(size_t *stride = NULL);
903 };
904 
905  /**
906   * An Element represents one item within an Allocation. An Element is roughly
907   * equivalent to a C type in a RenderScript kernel. Elements may be basic
908   * or complex. Some basic elements are:
909 
910   * - A single float value (equivalent to a float in a kernel)
911   * - A four-element float vector (equivalent to a float4 in a kernel)
912   * - An unsigned 32-bit integer (equivalent to an unsigned int in a kernel)
913   * - A single signed 8-bit integer (equivalent to a char in a kernel)
914 
915   * Basic Elements are comprised of a Element.DataType and a
916   * Element.DataKind. The DataType encodes C type information of an Element,
917   * while the DataKind encodes how that Element should be interpreted by a
918   * Sampler. Note that Allocation objects with DataKind USER cannot be used as
919   * input for a Sampler. In general, Allocation objects that are intended for
920   * use with a Sampler should use bitmap-derived Elements such as
921   * Element::RGBA_8888.
922  */
923 
924 
925 class Element : public BaseObj {
926 public:
927     bool isComplex();
928 
929     /**
930      * Elements could be simple, such as an int or a float, or a structure with
931      * multiple sub-elements, such as a collection of floats, float2,
932      * float4. This function returns zero for simple elements or the number of
933      * sub-elements otherwise.
934      * @return number of sub-elements
935      */
getSubElementCount()936     size_t getSubElementCount() {
937         return mVisibleElementMapSize;
938     }
939 
940     /**
941      * For complex Elements, this returns the sub-element at a given index.
942      * @param[in] index index of sub-element
943      * @return sub-element
944      */
945     sp<const Element> getSubElement(uint32_t index);
946 
947     /**
948      * For complex Elements, this returns the name of the sub-element at a given
949      * index.
950      * @param[in] index index of sub-element
951      * @return name of sub-element
952      */
953     const char * getSubElementName(uint32_t index);
954 
955     /**
956      * For complex Elements, this returns the size of the sub-element at a given
957      * index.
958      * @param[in] index index of sub-element
959      * @return size of sub-element
960      */
961     size_t getSubElementArraySize(uint32_t index);
962 
963     /**
964      * Returns the location of a sub-element within a complex Element.
965      * @param[in] index index of sub-element
966      * @return offset in bytes
967      */
968     uint32_t getSubElementOffsetBytes(uint32_t index);
969 
970     /**
971      * Returns the data type used for the Element.
972      * @return data type
973      */
getDataType()974     RsDataType getDataType() const {
975         return mType;
976     }
977 
978     /**
979      * Returns the data kind used for the Element.
980      * @return data kind
981      */
getDataKind()982     RsDataKind getDataKind() const {
983         return mKind;
984     }
985 
986     /**
987      * Returns the size in bytes of the Element.
988      * @return size in bytes
989      */
getSizeBytes()990     size_t getSizeBytes() const {
991         return mSizeBytes;
992     }
993 
994     /**
995      * Returns the number of vector components for this Element.
996      * @return number of vector components
997      */
getVectorSize()998     uint32_t getVectorSize() const {
999         return mVectorSize;
1000     }
1001 
1002     /**
1003      * Utility function for returning an Element containing a single bool.
1004      * @param[in] rs RenderScript context
1005      * @return Element
1006      */
1007     static sp<const Element> BOOLEAN(sp<RS> rs);
1008     /**
1009      * Utility function for returning an Element containing a single unsigned char.
1010      * @param[in] rs RenderScript context
1011      * @return Element
1012      */
1013     static sp<const Element> U8(sp<RS> rs);
1014     /**
1015      * Utility function for returning an Element containing a single signed char.
1016      * @param[in] rs RenderScript context
1017      * @return Element
1018      */
1019     static sp<const Element> I8(sp<RS> rs);
1020     /**
1021      * Utility function for returning an Element containing a single unsigned short.
1022      * @param[in] rs RenderScript context
1023      * @return Element
1024      */
1025     static sp<const Element> U16(sp<RS> rs);
1026     /**
1027      * Utility function for returning an Element containing a single signed short.
1028      * @param[in] rs RenderScript context
1029      * @return Element
1030      */
1031     static sp<const Element> I16(sp<RS> rs);
1032     /**
1033      * Utility function for returning an Element containing a single unsigned int.
1034      * @param[in] rs RenderScript context
1035      * @return Element
1036      */
1037     static sp<const Element> U32(sp<RS> rs);
1038     /**
1039      * Utility function for returning an Element containing a single signed int.
1040      * @param[in] rs RenderScript context
1041      * @return Element
1042      */
1043     static sp<const Element> I32(sp<RS> rs);
1044     /**
1045      * Utility function for returning an Element containing a single unsigned long long.
1046      * @param[in] rs RenderScript context
1047      * @return Element
1048      */
1049     static sp<const Element> U64(sp<RS> rs);
1050     /**
1051      * Utility function for returning an Element containing a single signed long long.
1052      * @param[in] rs RenderScript context
1053      * @return Element
1054      */
1055     static sp<const Element> I64(sp<RS> rs);
1056     /**
1057      * Utility function for returning an Element containing a single half.
1058      * @param[in] rs RenderScript context
1059      * @return Element
1060      */
1061     static sp<const Element> F16(sp<RS> rs);
1062     /**
1063      * Utility function for returning an Element containing a single float.
1064      * @param[in] rs RenderScript context
1065      * @return Element
1066      */
1067     static sp<const Element> F32(sp<RS> rs);
1068     /**
1069      * Utility function for returning an Element containing a single double.
1070      * @param[in] rs RenderScript context
1071      * @return Element
1072      */
1073     static sp<const Element> F64(sp<RS> rs);
1074     /**
1075      * Utility function for returning an Element containing a single Element.
1076      * @param[in] rs RenderScript context
1077      * @return Element
1078      */
1079     static sp<const Element> ELEMENT(sp<RS> rs);
1080     /**
1081      * Utility function for returning an Element containing a single Type.
1082      * @param[in] rs RenderScript context
1083      * @return Element
1084      */
1085     static sp<const Element> TYPE(sp<RS> rs);
1086     /**
1087      * Utility function for returning an Element containing a single Allocation.
1088      * @param[in] rs RenderScript context
1089      * @return Element
1090      */
1091     static sp<const Element> ALLOCATION(sp<RS> rs);
1092     /**
1093      * Utility function for returning an Element containing a single Sampler.
1094      * @param[in] rs RenderScript context
1095      * @return Element
1096      */
1097     static sp<const Element> SAMPLER(sp<RS> rs);
1098     /**
1099      * Utility function for returning an Element containing a single Script.
1100      * @param[in] rs RenderScript context
1101      * @return Element
1102      */
1103     static sp<const Element> SCRIPT(sp<RS> rs);
1104     /**
1105      * Utility function for returning an Element containing an ALPHA_8 pixel.
1106      * @param[in] rs RenderScript context
1107      * @return Element
1108      */
1109     static sp<const Element> A_8(sp<RS> rs);
1110     /**
1111      * Utility function for returning an Element containing an RGB_565 pixel.
1112      * @param[in] rs RenderScript context
1113      * @return Element
1114      */
1115     static sp<const Element> RGB_565(sp<RS> rs);
1116     /**
1117      * Utility function for returning an Element containing an RGB_888 pixel.
1118      * @param[in] rs RenderScript context
1119      * @return Element
1120      */
1121     static sp<const Element> RGB_888(sp<RS> rs);
1122     /**
1123      * Utility function for returning an Element containing an RGBA_5551 pixel.
1124      * @param[in] rs RenderScript context
1125      * @return Element
1126      */
1127     static sp<const Element> RGBA_5551(sp<RS> rs);
1128     /**
1129      * Utility function for returning an Element containing an RGBA_4444 pixel.
1130      * @param[in] rs RenderScript context
1131      * @return Element
1132      */
1133     static sp<const Element> RGBA_4444(sp<RS> rs);
1134     /**
1135      * Utility function for returning an Element containing an RGBA_8888 pixel.
1136      * @param[in] rs RenderScript context
1137      * @return Element
1138      */
1139     static sp<const Element> RGBA_8888(sp<RS> rs);
1140 
1141     /**
1142      * Utility function for returning an Element containing a half2.
1143      * @param[in] rs RenderScript context
1144      * @return Element
1145      */
1146     static sp<const Element> F16_2(sp<RS> rs);
1147     /**
1148      * Utility function for returning an Element containing a half3.
1149      * @param[in] rs RenderScript context
1150      * @return Element
1151      */
1152     static sp<const Element> F16_3(sp<RS> rs);
1153     /**
1154      * Utility function for returning an Element containing a half4.
1155      * @param[in] rs RenderScript context
1156      * @return Element
1157      */
1158     static sp<const Element> F16_4(sp<RS> rs);
1159 
1160     /**
1161      * Utility function for returning an Element containing a float2.
1162      * @param[in] rs RenderScript context
1163      * @return Element
1164      */
1165     static sp<const Element> F32_2(sp<RS> rs);
1166     /**
1167      * Utility function for returning an Element containing a float3.
1168      * @param[in] rs RenderScript context
1169      * @return Element
1170      */
1171     static sp<const Element> F32_3(sp<RS> rs);
1172     /**
1173      * Utility function for returning an Element containing a float4.
1174      * @param[in] rs RenderScript context
1175      * @return Element
1176      */
1177     static sp<const Element> F32_4(sp<RS> rs);
1178     /**
1179      * Utility function for returning an Element containing a double2.
1180      * @param[in] rs RenderScript context
1181      * @return Element
1182      */
1183     static sp<const Element> F64_2(sp<RS> rs);
1184     /**
1185      * Utility function for returning an Element containing a double3.
1186      * @param[in] rs RenderScript context
1187      * @return Element
1188      */
1189     static sp<const Element> F64_3(sp<RS> rs);
1190     /**
1191      * Utility function for returning an Element containing a double4.
1192      * @param[in] rs RenderScript context
1193      * @return Element
1194      */
1195     static sp<const Element> F64_4(sp<RS> rs);
1196     /**
1197      * Utility function for returning an Element containing a uchar2.
1198      * @param[in] rs RenderScript context
1199      * @return Element
1200      */
1201     static sp<const Element> U8_2(sp<RS> rs);
1202     /**
1203      * Utility function for returning an Element containing a uchar3.
1204      * @param[in] rs RenderScript context
1205      * @return Element
1206      */
1207     static sp<const Element> U8_3(sp<RS> rs);
1208     /**
1209      * Utility function for returning an Element containing a uchar4.
1210      * @param[in] rs RenderScript context
1211      * @return Element
1212      */
1213     static sp<const Element> U8_4(sp<RS> rs);
1214     /**
1215      * Utility function for returning an Element containing a char2.
1216      * @param[in] rs RenderScript context
1217      * @return Element
1218      */
1219     static sp<const Element> I8_2(sp<RS> rs);
1220     /**
1221      * Utility function for returning an Element containing a char3.
1222      * @param[in] rs RenderScript context
1223      * @return Element
1224      */
1225     static sp<const Element> I8_3(sp<RS> rs);
1226     /**
1227      * Utility function for returning an Element containing a char4.
1228      * @param[in] rs RenderScript context
1229      * @return Element
1230      */
1231     static sp<const Element> I8_4(sp<RS> rs);
1232     /**
1233      * Utility function for returning an Element containing a ushort2.
1234      * @param[in] rs RenderScript context
1235      * @return Element
1236      */
1237     static sp<const Element> U16_2(sp<RS> rs);
1238     /**
1239      * Utility function for returning an Element containing a ushort3.
1240      * @param[in] rs RenderScript context
1241      * @return Element
1242      */
1243     static sp<const Element> U16_3(sp<RS> rs);
1244     /**
1245      * Utility function for returning an Element containing a ushort4.
1246      * @param[in] rs RenderScript context
1247      * @return Element
1248      */
1249     static sp<const Element> U16_4(sp<RS> rs);
1250     /**
1251      * Utility function for returning an Element containing a short2.
1252      * @param[in] rs RenderScript context
1253      * @return Element
1254      */
1255     static sp<const Element> I16_2(sp<RS> rs);
1256     /**
1257      * Utility function for returning an Element containing a short3.
1258      * @param[in] rs RenderScript context
1259      * @return Element
1260      */
1261     static sp<const Element> I16_3(sp<RS> rs);
1262     /**
1263      * Utility function for returning an Element containing a short4.
1264      * @param[in] rs RenderScript context
1265      * @return Element
1266      */
1267     static sp<const Element> I16_4(sp<RS> rs);
1268     /**
1269      * Utility function for returning an Element containing a uint2.
1270      * @param[in] rs RenderScript context
1271      * @return Element
1272      */
1273     static sp<const Element> U32_2(sp<RS> rs);
1274     /**
1275      * Utility function for returning an Element containing a uint3.
1276      * @param[in] rs RenderScript context
1277      * @return Element
1278      */
1279     static sp<const Element> U32_3(sp<RS> rs);
1280     /**
1281      * Utility function for returning an Element containing a uint4.
1282      * @param[in] rs RenderScript context
1283      * @return Element
1284      */
1285     static sp<const Element> U32_4(sp<RS> rs);
1286     /**
1287      * Utility function for returning an Element containing an int2.
1288      * @param[in] rs RenderScript context
1289      * @return Element
1290      */
1291     static sp<const Element> I32_2(sp<RS> rs);
1292     /**
1293      * Utility function for returning an Element containing an int3.
1294      * @param[in] rs RenderScript context
1295      * @return Element
1296      */
1297     static sp<const Element> I32_3(sp<RS> rs);
1298     /**
1299      * Utility function for returning an Element containing an int4.
1300      * @param[in] rs RenderScript context
1301      * @return Element
1302      */
1303     static sp<const Element> I32_4(sp<RS> rs);
1304     /**
1305      * Utility function for returning an Element containing a ulong2.
1306      * @param[in] rs RenderScript context
1307      * @return Element
1308      */
1309     static sp<const Element> U64_2(sp<RS> rs);
1310     /**
1311      * Utility function for returning an Element containing a ulong3.
1312      * @param[in] rs RenderScript context
1313      * @return Element
1314      */
1315     static sp<const Element> U64_3(sp<RS> rs);
1316     /**
1317      * Utility function for returning an Element containing a ulong4.
1318      * @param[in] rs RenderScript context
1319      * @return Element
1320      */
1321     static sp<const Element> U64_4(sp<RS> rs);
1322     /**
1323      * Utility function for returning an Element containing a long2.
1324      * @param[in] rs RenderScript context
1325      * @return Element
1326      */
1327     static sp<const Element> I64_2(sp<RS> rs);
1328     /**
1329      * Utility function for returning an Element containing a long3.
1330      * @param[in] rs RenderScript context
1331      * @return Element
1332      */
1333     static sp<const Element> I64_3(sp<RS> rs);
1334     /**
1335      * Utility function for returning an Element containing a long4.
1336      * @param[in] rs RenderScript context
1337      * @return Element
1338      */
1339     static sp<const Element> I64_4(sp<RS> rs);
1340     /**
1341      * Utility function for returning an Element containing a YUV pixel.
1342      * @param[in] rs RenderScript context
1343      * @return Element
1344      */
1345     static sp<const Element> YUV(sp<RS> rs);
1346     /**
1347      * Utility function for returning an Element containing an rs_matrix_4x4.
1348      * @param[in] rs RenderScript context
1349      * @return Element
1350      */
1351     static sp<const Element> MATRIX_4X4(sp<RS> rs);
1352     /**
1353      * Utility function for returning an Element containing an rs_matrix_3x3.
1354      * @param[in] rs RenderScript context
1355      * @return Element
1356      */
1357     static sp<const Element> MATRIX_3X3(sp<RS> rs);
1358     /**
1359      * Utility function for returning an Element containing an rs_matrix_2x2.
1360      * @param[in] rs RenderScript context
1361      * @return Element
1362      */
1363     static sp<const Element> MATRIX_2X2(sp<RS> rs);
1364 
1365     void updateFromNative();
1366 
1367     /**
1368      * Create an Element with a given DataType.
1369      * @param[in] rs RenderScript context
1370      * @param[in] dt data type
1371      * @return Element
1372      */
1373     static sp<const Element> createUser(sp<RS> rs, RsDataType dt);
1374     /**
1375      * Create a vector Element with the given DataType
1376      * @param[in] rs RenderScript
1377      * @param[in] dt DataType
1378      * @param[in] size vector size
1379      * @return Element
1380      */
1381     static sp<const Element> createVector(sp<RS> rs, RsDataType dt, uint32_t size);
1382     /**
1383      * Create an Element with a given DataType and DataKind.
1384      * @param[in] rs RenderScript context
1385      * @param[in] dt DataType
1386      * @param[in] dk DataKind
1387      * @return Element
1388      */
1389     static sp<const Element> createPixel(sp<RS> rs, RsDataType dt, RsDataKind dk);
1390 
1391     /**
1392      * Returns true if the Element can interoperate with this Element.
1393      * @param[in] e Element to compare
1394      * @return true if Elements can interoperate
1395      */
1396     bool isCompatible(sp<const Element>e) const;
1397 
1398     /**
1399      * Builder class for producing complex elements with matching field and name
1400      * pairs. The builder starts empty. The order in which elements are added is
1401      * retained for the layout in memory.
1402      */
1403     class Builder {
1404     private:
1405         RS* mRS;
1406         size_t mElementsCount;
1407         size_t mElementsVecSize;
1408         sp<const Element> * mElements;
1409         char ** mElementNames;
1410         size_t * mElementNameLengths;
1411         uint32_t * mArraySizes;
1412         bool mSkipPadding;
1413 
1414     public:
1415         Builder(sp<RS> rs);
1416         ~Builder();
1417         void add(sp<const Element> e, const char * name, uint32_t arraySize = 1);
1418         sp<const Element> create();
1419     };
1420 
1421 protected:
1422     friend class Type;
1423     Element(void *id, sp<RS> rs,
1424             sp<const Element> * elements,
1425             size_t elementCount,
1426             const char ** elementNames,
1427             size_t * elementNameLengths,
1428             uint32_t * arraySizes);
1429     Element(void *id, sp<RS> rs, RsDataType dt, RsDataKind dk, bool norm, uint32_t size);
1430     Element(void *id, sp<RS> rs);
1431     Element(sp<RS> rs);
1432     virtual ~Element();
1433 
1434 private:
1435     void updateVisibleSubElements();
1436 
1437     size_t mElementsCount;
1438     size_t mVisibleElementMapSize;
1439 
1440     sp<const Element> * mElements;
1441     char ** mElementNames;
1442     size_t * mElementNameLengths;
1443     uint32_t * mArraySizes;
1444     uint32_t * mVisibleElementMap;
1445     uint32_t * mOffsetInBytes;
1446 
1447     RsDataType mType;
1448     RsDataKind mKind;
1449     bool mNormalized;
1450     size_t mSizeBytes;
1451     size_t mVectorSize;
1452 };
1453 
1454 class FieldPacker {
1455 protected:
1456     unsigned char* mData;
1457     size_t mPos;
1458     size_t mLen;
1459 
1460 public:
FieldPacker(size_t len)1461     FieldPacker(size_t len)
1462         : mPos(0), mLen(len) {
1463             mData = new unsigned char[len];
1464         }
1465 
~FieldPacker()1466     virtual ~FieldPacker() {
1467         delete [] mData;
1468     }
1469 
align(size_t v)1470     void align(size_t v) {
1471         if ((v & (v - 1)) != 0) {
1472             //            ALOGE("Non-power-of-two alignment: %zu", v);
1473             return;
1474         }
1475 
1476         while ((mPos & (v - 1)) != 0) {
1477             mData[mPos++] = 0;
1478         }
1479     }
1480 
reset()1481     void reset() {
1482         mPos = 0;
1483     }
1484 
reset(size_t i)1485     void reset(size_t i) {
1486         if (i >= mLen) {
1487             //            ALOGE("Out of bounds: i (%zu) >= len (%zu)", i, mLen);
1488             return;
1489         }
1490         mPos = i;
1491     }
1492 
skip(size_t i)1493     void skip(size_t i) {
1494         size_t res = mPos + i;
1495         if (res > mLen) {
1496             //            ALOGE("Exceeded buffer length: i (%zu) > len (%zu)", i, mLen);
1497             return;
1498         }
1499         mPos = res;
1500     }
1501 
getData()1502     void* getData() const {
1503         return mData;
1504     }
1505 
getLength()1506     size_t getLength() const {
1507         return mLen;
1508     }
1509 
1510     template <typename T>
add(T t)1511         void add(T t) {
1512         align(sizeof(t));
1513         if (mPos + sizeof(t) <= mLen) {
1514             memcpy(&mData[mPos], &t, sizeof(t));
1515             mPos += sizeof(t);
1516         }
1517     }
1518 
1519     /*
1520       void add(rs_matrix4x4 m) {
1521       for (size_t i = 0; i < 16; i++) {
1522       add(m.m[i]);
1523       }
1524       }
1525 
1526       void add(rs_matrix3x3 m) {
1527       for (size_t i = 0; i < 9; i++) {
1528       add(m.m[i]);
1529       }
1530       }
1531 
1532       void add(rs_matrix2x2 m) {
1533       for (size_t i = 0; i < 4; i++) {
1534       add(m.m[i]);
1535       }
1536       }
1537     */
1538 
add(sp<BaseObj> obj)1539     void add(sp<BaseObj> obj) {
1540         if (obj != NULL) {
1541             add((uint32_t) (uintptr_t) obj->getID());
1542         } else {
1543             add((uint32_t) 0);
1544         }
1545     }
1546 };
1547 
1548 /**
1549  * A Type describes the Element and dimensions used for an Allocation or a
1550  * parallel operation.
1551  *
1552  * A Type always includes an Element and an X dimension. A Type may be
1553  * multidimensional, up to three dimensions. A nonzero value in the Y or Z
1554  * dimensions indicates that the dimension is present. Note that a Type with
1555  * only a given X dimension and a Type with the same X dimension but Y = 1 are
1556  * not equivalent.
1557  *
1558  * A Type also supports inclusion of level of detail (LOD) or cube map
1559  * faces. LOD and cube map faces are booleans to indicate present or not
1560  * present.
1561  *
1562  * A Type also supports YUV format information to support an Allocation in a YUV
1563  * format. The YUV formats supported are RS_YUV_YV12 and RS_YUV_NV21.
1564  */
1565 class Type : public BaseObj {
1566 protected:
1567     friend class Allocation;
1568 
1569     uint32_t mDimX;
1570     uint32_t mDimY;
1571     uint32_t mDimZ;
1572     RsYuvFormat mYuvFormat;
1573     bool mDimMipmaps;
1574     bool mDimFaces;
1575     size_t mElementCount;
1576     sp<const Element> mElement;
1577 
1578     Type(void *id, sp<RS> rs);
1579 
1580     void calcElementCount();
1581     virtual void updateFromNative();
1582 
1583 public:
1584 
1585     /**
1586      * Returns the YUV format.
1587      * @return YUV format of the Allocation
1588      */
getYuvFormat()1589     RsYuvFormat getYuvFormat() const {
1590         return mYuvFormat;
1591     }
1592 
1593     /**
1594      * Returns the Element of the Allocation.
1595      * @return YUV format of the Allocation
1596      */
getElement()1597     sp<const Element> getElement() const {
1598         return mElement;
1599     }
1600 
1601     /**
1602      * Returns the X dimension of the Allocation.
1603      * @return X dimension of the allocation
1604      */
getX()1605     uint32_t getX() const {
1606         return mDimX;
1607     }
1608 
1609     /**
1610      * Returns the Y dimension of the Allocation.
1611      * @return Y dimension of the allocation
1612      */
getY()1613     uint32_t getY() const {
1614         return mDimY;
1615     }
1616 
1617     /**
1618      * Returns the Z dimension of the Allocation.
1619      * @return Z dimension of the allocation
1620      */
getZ()1621     uint32_t getZ() const {
1622         return mDimZ;
1623     }
1624 
1625     /**
1626      * Returns true if the Allocation has mipmaps.
1627      * @return true if the Allocation has mipmaps
1628      */
hasMipmaps()1629     bool hasMipmaps() const {
1630         return mDimMipmaps;
1631     }
1632 
1633     /**
1634      * Returns true if the Allocation is a cube map
1635      * @return true if the Allocation is a cube map
1636      */
hasFaces()1637     bool hasFaces() const {
1638         return mDimFaces;
1639     }
1640 
1641     /**
1642      * Returns number of accessible Elements in the Allocation
1643      * @return number of accessible Elements in the Allocation
1644      */
getCount()1645     size_t getCount() const {
1646         return mElementCount;
1647     }
1648 
1649     /**
1650      * Returns size in bytes of all Elements in the Allocation
1651      * @return size in bytes of all Elements in the Allocation
1652      */
getSizeBytes()1653     size_t getSizeBytes() const {
1654         return mElementCount * mElement->getSizeBytes();
1655     }
1656 
1657     /**
1658      * Creates a new Type with the given Element and dimensions.
1659      * @param[in] rs RenderScript context
1660      * @param[in] e Element
1661      * @param[in] dimX X dimension
1662      * @param[in] dimY Y dimension
1663      * @param[in] dimZ Z dimension
1664      * @return new Type
1665      */
1666     static sp<const Type> create(sp<RS> rs, sp<const Element> e, uint32_t dimX, uint32_t dimY, uint32_t dimZ);
1667 
1668     class Builder {
1669     protected:
1670         RS* mRS;
1671         uint32_t mDimX;
1672         uint32_t mDimY;
1673         uint32_t mDimZ;
1674         RsYuvFormat mYuvFormat;
1675         bool mDimMipmaps;
1676         bool mDimFaces;
1677         sp<const Element> mElement;
1678 
1679     public:
1680         Builder(sp<RS> rs, sp<const Element> e);
1681 
1682         void setX(uint32_t value);
1683         void setY(uint32_t value);
1684         void setZ(uint32_t value);
1685         void setYuvFormat(RsYuvFormat format);
1686         void setMipmaps(bool value);
1687         void setFaces(bool value);
1688         sp<const Type> create();
1689     };
1690 
1691 };
1692 
1693 /**
1694  * The parent class for all executable Scripts. This should not be used by applications.
1695  */
1696 class Script : public BaseObj {
1697 private:
1698 
1699 protected:
1700     Script(void *id, sp<RS> rs);
1701     void forEach(uint32_t slot, sp<const Allocation> in, sp<const Allocation> out,
1702             const void *v, size_t) const;
1703     void bindAllocation(sp<Allocation> va, uint32_t slot) const;
1704     void setVar(uint32_t index, const void *, size_t len) const;
1705     void setVar(uint32_t index, sp<const BaseObj> o) const;
1706     void invoke(uint32_t slot, const void *v, size_t len) const;
1707 
1708 
invoke(uint32_t slot)1709     void invoke(uint32_t slot) const {
1710         invoke(slot, NULL, 0);
1711     }
setVar(uint32_t index,float v)1712     void setVar(uint32_t index, float v) const {
1713         setVar(index, &v, sizeof(v));
1714     }
setVar(uint32_t index,double v)1715     void setVar(uint32_t index, double v) const {
1716         setVar(index, &v, sizeof(v));
1717     }
setVar(uint32_t index,int32_t v)1718     void setVar(uint32_t index, int32_t v) const {
1719         setVar(index, &v, sizeof(v));
1720     }
setVar(uint32_t index,uint32_t v)1721     void setVar(uint32_t index, uint32_t v) const {
1722         setVar(index, &v, sizeof(v));
1723     }
setVar(uint32_t index,int64_t v)1724     void setVar(uint32_t index, int64_t v) const {
1725         setVar(index, &v, sizeof(v));
1726     }
setVar(uint32_t index,bool v)1727     void setVar(uint32_t index, bool v) const {
1728         setVar(index, &v, sizeof(v));
1729     }
1730 
1731 public:
1732     class FieldBase {
1733     protected:
1734         sp<const Element> mElement;
1735         sp<Allocation> mAllocation;
1736 
1737         void init(sp<RS> rs, uint32_t dimx, uint32_t usages = 0);
1738 
1739     public:
getElement()1740         sp<const Element> getElement() {
1741             return mElement;
1742         }
1743 
getType()1744         sp<const Type> getType() {
1745             return mAllocation->getType();
1746         }
1747 
getAllocation()1748         sp<const Allocation> getAllocation() {
1749             return mAllocation;
1750         }
1751 
1752         //void updateAllocation();
1753     };
1754 };
1755 
1756 /**
1757  * The parent class for all user-defined scripts. This is intended to be used by auto-generated code only.
1758  */
1759 class ScriptC : public Script {
1760 protected:
1761     ScriptC(sp<RS> rs,
1762             const void *codeTxt, size_t codeLength,
1763             const char *cachedName, size_t cachedNameLength,
1764             const char *cacheDir, size_t cacheDirLength);
1765 
1766 };
1767 
1768 /**
1769  * The parent class for all script intrinsics. Intrinsics provide highly optimized implementations of
1770  * basic functions. This is not intended to be used directly.
1771  */
1772 class ScriptIntrinsic : public Script {
1773  protected:
1774     sp<const Element> mElement;
1775     ScriptIntrinsic(sp<RS> rs, int id, sp<const Element> e);
1776     virtual ~ScriptIntrinsic();
1777 };
1778 
1779 /**
1780  * Intrinsic for converting RGB to RGBA by using a 3D lookup table. The incoming
1781  * r,g,b values are use as normalized x,y,z coordinates into a 3D
1782  * allocation. The 8 nearest values are sampled and linearly interpolated. The
1783  * result is placed in the output.
1784  */
1785 class ScriptIntrinsic3DLUT : public ScriptIntrinsic {
1786  private:
1787     ScriptIntrinsic3DLUT(sp<RS> rs, sp<const Element> e);
1788  public:
1789     /**
1790      * Supported Element types are U8_4. Default lookup table is identity.
1791      * @param[in] rs RenderScript context
1792      * @param[in] e Element
1793      * @return new ScriptIntrinsic
1794      */
1795     static sp<ScriptIntrinsic3DLUT> create(sp<RS> rs, sp<const Element> e);
1796 
1797     /**
1798      * Launch the intrinsic.
1799      * @param[in] ain input Allocation
1800      * @param[in] aout output Allocation
1801      */
1802     void forEach(sp<Allocation> ain, sp<Allocation> aout);
1803 
1804     /**
1805      * Sets the lookup table. The lookup table must use the same Element as the
1806      * intrinsic.
1807      * @param[in] lut new lookup table
1808      */
1809     void setLUT(sp<Allocation> lut);
1810 };
1811 
1812 
1813 /**
1814  * Intrinsic kernel provides high performance RenderScript APIs to BLAS.
1815  *
1816  * The BLAS (Basic Linear Algebra Subprograms) are routines that provide standard
1817  * building blocks for performing basic vector and matrix operations.
1818  *
1819  * For detailed description of BLAS, please refer to http://www.netlib.org/blas/
1820  *
1821  **/
1822 class ScriptIntrinsicBLAS : public ScriptIntrinsic {
1823  private:
1824     ScriptIntrinsicBLAS(sp<RS> rs, sp<const Element> e);
1825  public:
1826     /**
1827      * Create an intrinsic to access BLAS subroutines.
1828      *
1829      * @param rs The RenderScript context
1830      * @return ScriptIntrinsicBLAS
1831      */
1832     static sp<ScriptIntrinsicBLAS> create(sp<RS> rs);
1833 
1834     /**
1835      * SGEMV performs one of the matrix-vector operations
1836      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
1837      *
1838      * Details: http://www.netlib.org/lapack/explore-html/db/d58/sgemv_8f.html
1839      *
1840      * @param TransA The type of transpose applied to matrix A.
1841      * @param alpha The scalar alpha.
1842      * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
1843      * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
1844      * @param incX The increment for the elements of vector x, must be larger than zero.
1845      * @param beta The scalar beta.
1846      * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
1847      * @param incY The increment for the elements of vector y, must be larger than zero.
1848      */
1849     void SGEMV(RsBlasTranspose TransA,
1850                float alpha, sp<Allocation> A, sp<Allocation> X, int incX,
1851                float beta, sp<Allocation> Y, int incY);
1852 
1853     /**
1854      * DGEMV performs one of the matrix-vector operations
1855      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
1856      *
1857      * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html
1858      *
1859      * @param TransA The type of transpose applied to matrix A.
1860      * @param alpha The scalar alpha.
1861      * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
1862      * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
1863      * @param incX The increment for the elements of vector x, must be larger than zero.
1864      * @param beta The scalar beta.
1865      * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
1866      * @param incY The increment for the elements of vector y, must be larger than zero.
1867      */
1868     void DGEMV(RsBlasTranspose TransA,
1869                double alpha, sp<Allocation> A, sp<Allocation> X, int incX,
1870                double beta, sp<Allocation> Y, int incY);
1871 
1872     /**
1873      * CGEMV performs one of the matrix-vector operations
1874      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
1875      *
1876      * Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html
1877      *
1878      * @param TransA The type of transpose applied to matrix A.
1879      * @param alpha The scalar alpha.
1880      * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
1881      * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
1882      * @param incX The increment for the elements of vector x, must be larger than zero.
1883      * @param beta The scalar beta.
1884      * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
1885      * @param incY The increment for the elements of vector y, must be larger than zero.
1886      */
1887     void CGEMV(RsBlasTranspose TransA,
1888                Float2 alpha, sp<Allocation> A, sp<Allocation> X, int incX,
1889                Float2 beta, sp<Allocation> Y, int incY);
1890 
1891     /**
1892      * ZGEMV performs one of the matrix-vector operations
1893      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
1894      *
1895      * Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html
1896      *
1897      * @param TransA The type of transpose applied to matrix A.
1898      * @param alpha The scalar alpha.
1899      * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
1900      * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
1901      * @param incX The increment for the elements of vector x, must be larger than zero.
1902      * @param beta The scalar beta.
1903      * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
1904      * @param incY The increment for the elements of vector y, must be larger than zero.
1905      */
1906     void ZGEMV(RsBlasTranspose TransA,
1907                Double2 alpha, sp<Allocation> A, sp<Allocation> X, int incX,
1908                Double2 beta, sp<Allocation> Y, int incY);
1909 
1910     /**
1911      * SGBMV performs one of the matrix-vector operations
1912      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
1913      *
1914      * Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html
1915      *
1916      * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
1917      *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
1918      *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
1919      *           for i in range(0, m):
1920      *              for j in range(max(0, i-kl), min(i+ku+1, n)):
1921      *                  b[i, j-i+kl] = a[i, j]
1922      *
1923      * @param TransA The type of transpose applied to matrix A.
1924      * @param KL The number of sub-diagonals of the matrix A.
1925      * @param KU The number of super-diagonals of the matrix A.
1926      * @param alpha The scalar alpha.
1927      * @param A The input allocation contains the band matrix A, supported elements type: {Element#F32}.
1928      * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
1929      * @param incX The increment for the elements of vector x, must be larger than zero.
1930      * @param beta The scalar beta.
1931      * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
1932      * @param incY The increment for the elements of vector y, must be larger than zero.
1933      */
1934     void SGBMV(RsBlasTranspose TransA,
1935                int KL, int KU, float alpha, sp<Allocation> A, sp<Allocation> X, int incX,
1936                float beta, sp<Allocation> Y, int incY);
1937 
1938     /**
1939      * DGBMV performs one of the matrix-vector operations
1940      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
1941      *
1942      * Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html
1943      *
1944      * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
1945      *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
1946      *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
1947      *           for i in range(0, m):
1948      *              for j in range(max(0, i-kl), min(i+ku+1, n)):
1949      *                  b[i, j-i+kl] = a[i, j]
1950      *
1951      * @param TransA The type of transpose applied to matrix A.
1952      * @param KL The number of sub-diagonals of the matrix A.
1953      * @param KU The number of super-diagonals of the matrix A.
1954      * @param alpha The scalar alpha.
1955      * @param A The input allocation contains the band matrix A, supported elements type: {Element#F64}.
1956      * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
1957      * @param incX The increment for the elements of vector x, must be larger than zero.
1958      * @param beta The scalar beta.
1959      * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
1960      * @param incY The increment for the elements of vector y, must be larger than zero.
1961      */
1962     void DGBMV(RsBlasTranspose TransA,
1963                int KL, int KU, double alpha, sp<Allocation> A, sp<Allocation> X,
1964                int incX, double beta, sp<Allocation> Y, int incY);
1965 
1966     /**
1967      * CGBMV performs one of the matrix-vector operations
1968      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
1969      *
1970      * Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html
1971      *
1972      * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
1973      *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
1974      *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
1975      *           for i in range(0, m):
1976      *              for j in range(max(0, i-kl), min(i+ku+1, n)):
1977      *                  b[i, j-i+kl] = a[i, j]
1978      *
1979      * @param TransA The type of transpose applied to matrix A.
1980      * @param KL The number of sub-diagonals of the matrix A.
1981      * @param KU The number of super-diagonals of the matrix A.
1982      * @param alpha The scalar alpha.
1983      * @param A The input allocation contains the band matrix A, supported elements type: {Element#F32_2}.
1984      * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
1985      * @param incX The increment for the elements of vector x, must be larger than zero.
1986      * @param beta The scalar beta.
1987      * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
1988      * @param incY The increment for the elements of vector y, must be larger than zero.
1989      */
1990     void CGBMV(RsBlasTranspose TransA,
1991                int KL, int KU, Float2 alpha, sp<Allocation> A, sp<Allocation> X,
1992                int incX, Float2 beta, sp<Allocation> Y, int incY);
1993 
1994     /**
1995      * ZGBMV performs one of the matrix-vector operations
1996      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
1997      *
1998      * Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html
1999      *
2000      * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
2001      *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
2002      *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
2003      *           for i in range(0, m):
2004      *              for j in range(max(0, i-kl), min(i+ku+1, n)):
2005      *                  b[i, j-i+kl] = a[i, j]
2006      *
2007      * @param TransA The type of transpose applied to matrix A.
2008      * @param KL The number of sub-diagonals of the matrix A.
2009      * @param KU The number of super-diagonals of the matrix A.
2010      * @param alpha The scalar alpha.
2011      * @param A The input allocation contains the band matrix A, supported elements type: {Element#F64_2}.
2012      * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2013      * @param incX The increment for the elements of vector x, must be larger than zero.
2014      * @param beta The scalar beta.
2015      * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
2016      * @param incY The increment for the elements of vector y, must be larger than zero.
2017      */
2018     void ZGBMV(RsBlasTranspose TransA,
2019                int KL, int KU, Double2 alpha, sp<Allocation> A, sp<Allocation> X, int incX,
2020                Double2 beta, sp<Allocation> Y, int incY);
2021 
2022     /**
2023      * STRMV performs one of the matrix-vector operations
2024      * x := A*x   or   x := A**T*x
2025      *
2026      * Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html
2027      *
2028      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2029      * @param TransA The type of transpose applied to matrix A.
2030      * @param Diag Specifies whether or not A is unit triangular.
2031      * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2032      * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2033      * @param incX The increment for the elements of vector x, must be larger than zero.
2034      */
2035     void STRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2036                sp<Allocation> A, sp<Allocation> X, int incX);
2037 
2038     /**
2039      * DTRMV performs one of the matrix-vector operations
2040      * x := A*x   or   x := A**T*x
2041      *
2042      * Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html
2043      *
2044      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2045      * @param TransA The type of transpose applied to matrix A.
2046      * @param Diag Specifies whether or not A is unit triangular.
2047      * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2048      * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2049      * @param incX The increment for the elements of vector x, must be larger than zero.
2050      */
2051     void DTRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2052                sp<Allocation> A, sp<Allocation> X, int incX);
2053 
2054     /**
2055      * CTRMV performs one of the matrix-vector operations
2056      * x := A*x   or   x := A**T*x   or   x := A**H*x
2057      *
2058      * Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html
2059      *
2060      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2061      * @param TransA The type of transpose applied to matrix A.
2062      * @param Diag Specifies whether or not A is unit triangular.
2063      * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2064      * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2065      * @param incX The increment for the elements of vector x, must be larger than zero.
2066      */
2067     void CTRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2068                sp<Allocation> A, sp<Allocation> X, int incX);
2069 
2070     /**
2071      * ZTRMV performs one of the matrix-vector operations
2072      * x := A*x   or   x := A**T*x   or   x := A**H*x
2073      *
2074      * Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html
2075      *
2076      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2077      * @param TransA The type of transpose applied to matrix A.
2078      * @param Diag Specifies whether or not A is unit triangular.
2079      * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
2080      * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2081      * @param incX The increment for the elements of vector x, must be larger than zero.
2082      */
2083     void ZTRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2084                sp<Allocation> A, sp<Allocation> X, int incX);
2085 
2086     /**
2087      * STBMV performs one of the matrix-vector operations
2088      * x := A*x   or   x := A**T*x
2089      *
2090      * Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html
2091      *
2092      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2093      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2094      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2095      *           for i in range(0, n):
2096      *              for j in range(i, min(i+k+1, n)):
2097      *                  b[i, j-i] = a[i, j]
2098      *
2099      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2100      * @param TransA The type of transpose applied to matrix A.
2101      * @param Diag Specifies whether or not A is unit triangular.
2102      * @param K The number of off-diagonals of the matrix A
2103      * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2104      * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2105      * @param incX The increment for the elements of vector x, must be larger than zero.
2106      */
2107     void STBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2108                int K, sp<Allocation> A, sp<Allocation> X, int incX);
2109 
2110     /**
2111      * DTBMV performs one of the matrix-vector operations
2112      * x := A*x   or   x := A**T*x
2113      *
2114      * Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html
2115      *
2116      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2117      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2118      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2119      *           for i in range(0, n):
2120      *              for j in range(i, min(i+k+1, n)):
2121      *                  b[i, j-i] = a[i, j]
2122      *
2123      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2124      * @param TransA The type of transpose applied to matrix A.
2125      * @param Diag Specifies whether or not A is unit triangular.
2126      * @param K The number of off-diagonals of the matrix A
2127      * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2128      * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2129      * @param incX The increment for the elements of vector x, must be larger than zero.
2130      */
2131     void DTBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2132                int K, sp<Allocation> A, sp<Allocation> X, int incX);
2133 
2134     /**
2135      * CTBMV performs one of the matrix-vector operations
2136      * x := A*x   or   x := A**T*x   or   x := A**H*x
2137      *
2138      * Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html
2139      *
2140      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2141      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2142      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2143      *           for i in range(0, n):
2144      *              for j in range(i, min(i+k+1, n)):
2145      *                  b[i, j-i] = a[i, j]
2146      *
2147      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2148      * @param TransA The type of transpose applied to matrix A.
2149      * @param Diag Specifies whether or not A is unit triangular.
2150      * @param K The number of off-diagonals of the matrix A
2151      * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2152      * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2153      * @param incX The increment for the elements of vector x, must be larger than zero.
2154      */
2155     void CTBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2156                int K, sp<Allocation> A, sp<Allocation> X, int incX);
2157 
2158     /**
2159      * ZTBMV performs one of the matrix-vector operations
2160      * x := A*x   or   x := A**T*x   or   x := A**H*x
2161      *
2162      * Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html
2163      *
2164      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2165      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2166      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2167      *           for i in range(0, n):
2168      *              for j in range(i, min(i+k+1, n)):
2169      *                  b[i, j-i] = a[i, j]
2170      *
2171      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2172      * @param TransA The type of transpose applied to matrix A.
2173      * @param Diag Specifies whether or not A is unit triangular.
2174      * @param K The number of off-diagonals of the matrix A
2175      * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
2176      * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2177      * @param incX The increment for the elements of vector x, must be larger than zero.
2178      */
2179     void ZTBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2180                int K, sp<Allocation> A, sp<Allocation> X, int incX);
2181 
2182     /**
2183      * STPMV performs one of the matrix-vector operations
2184      * x := A*x   or   x := A**T*x
2185      *
2186      * Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html
2187      *
2188      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2189      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2190      *       'a' to packed matrix 'b'.
2191      *           k = 0
2192      *           for i in range(0, n):
2193      *              for j in range(i, n):
2194      *                  b[k++] = a[i, j]
2195      *
2196      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2197      * @param TransA The type of transpose applied to matrix A.
2198      * @param Diag Specifies whether or not A is unit triangular.
2199      * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F32}.
2200      * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2201      * @param incX The increment for the elements of vector x, must be larger than zero.
2202      */
2203     void STPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2204                sp<Allocation> Ap, sp<Allocation> X, int incX);
2205 
2206     /**
2207      * DTPMV performs one of the matrix-vector operations
2208      * x := A*x   or   x := A**T*x
2209      *
2210      * Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html
2211      *
2212      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2213      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2214      *       'a' to packed matrix 'b'.
2215      *           k = 0
2216      *           for i in range(0, n):
2217      *              for j in range(i, n):
2218      *                  b[k++] = a[i, j]
2219      *
2220      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2221      * @param TransA The type of transpose applied to matrix A.
2222      * @param Diag Specifies whether or not A is unit triangular.
2223      * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F64}.
2224      * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2225      * @param incX The increment for the elements of vector x, must be larger than zero.
2226      */
2227     void DTPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2228                sp<Allocation> Ap, sp<Allocation> X, int incX);
2229 
2230     /**
2231      * CTPMV performs one of the matrix-vector operations
2232      * x := A*x   or   x := A**T*x   or   x := A**H*x
2233      *
2234      * Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html
2235      *
2236      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2237      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2238      *       'a' to packed matrix 'b'.
2239      *           k = 0
2240      *           for i in range(0, n):
2241      *              for j in range(i, n):
2242      *                  b[k++] = a[i, j]
2243      *
2244      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2245      * @param TransA The type of transpose applied to matrix A.
2246      * @param Diag Specifies whether or not A is unit triangular.
2247      * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F32_2}.
2248      * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2249      * @param incX The increment for the elements of vector x, must be larger than zero.
2250      */
2251     void CTPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2252                sp<Allocation> Ap, sp<Allocation> X, int incX);
2253 
2254     /**
2255      * ZTPMV performs one of the matrix-vector operations
2256      * x := A*x   or   x := A**T*x   or   x := A**H*x
2257      *
2258      * Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html
2259      *
2260      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2261      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2262      *       'a' to packed matrix 'b'.
2263      *           k = 0
2264      *           for i in range(0, n):
2265      *              for j in range(i, n):
2266      *                  b[k++] = a[i, j]
2267      *
2268      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2269      * @param TransA The type of transpose applied to matrix A.
2270      * @param Diag Specifies whether or not A is unit triangular.
2271      * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F64_2}.
2272      * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2273      * @param incX The increment for the elements of vector x, must be larger than zero.
2274      */
2275     void ZTPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2276                sp<Allocation> Ap, sp<Allocation> X, int incX);
2277 
2278     /**
2279      * STRSV solves one of the systems of equations
2280      * A*x = b   or   A**T*x = b
2281      *
2282      * Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html
2283      *
2284      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2285      * @param TransA The type of transpose applied to matrix A.
2286      * @param Diag Specifies whether or not A is unit triangular.
2287      * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2288      * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2289      * @param incX The increment for the elements of vector x, must be larger than zero.
2290      */
2291     void STRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2292                sp<Allocation> A, sp<Allocation> X, int incX);
2293 
2294     /**
2295      * DTRSV solves one of the systems of equations
2296      * A*x = b   or   A**T*x = b
2297      *
2298      * Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html
2299      *
2300      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2301      * @param TransA The type of transpose applied to matrix A.
2302      * @param Diag Specifies whether or not A is unit triangular.
2303      * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2304      * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2305      * @param incX The increment for the elements of vector x, must be larger than zero.
2306      */
2307     void DTRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2308                sp<Allocation> A, sp<Allocation> X, int incX);
2309 
2310     /**
2311      * CTRSV solves one of the systems of equations
2312      * A*x = b   or   A**T*x = b   or   A**H*x = b
2313      *
2314      * Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html
2315      *
2316      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2317      * @param TransA The type of transpose applied to matrix A.
2318      * @param Diag Specifies whether or not A is unit triangular.
2319      * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2320      * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2321      * @param incX The increment for the elements of vector x, must be larger than zero.
2322      */
2323     void CTRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2324                sp<Allocation> A, sp<Allocation> X, int incX);
2325 
2326     /**
2327      * ZTRSV solves one of the systems of equations
2328      * A*x = b   or   A**T*x = b   or   A**H*x = b
2329      *
2330      * Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html
2331      *
2332      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2333      * @param TransA The type of transpose applied to matrix A.
2334      * @param Diag Specifies whether or not A is unit triangular.
2335      * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
2336      * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2337      * @param incX The increment for the elements of vector x, must be larger than zero.
2338      */
2339     void ZTRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2340                sp<Allocation> A, sp<Allocation> X, int incX);
2341 
2342     /**
2343      * STBSV solves one of the systems of equations
2344      * A*x = b   or   A**T*x = b
2345      *
2346      * Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html
2347      *
2348      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2349      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2350      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2351      *           for i in range(0, n):
2352      *              for j in range(i, min(i+k+1, n)):
2353      *                  b[i, j-i] = a[i, j]
2354      *
2355      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2356      * @param TransA The type of transpose applied to matrix A.
2357      * @param Diag Specifies whether or not A is unit triangular.
2358      * @param K The number of off-diagonals of the matrix A
2359      * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2360      * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2361      * @param incX The increment for the elements of vector x, must be larger than zero.
2362      */
2363     void STBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2364                int K, sp<Allocation> A, sp<Allocation> X, int incX);
2365 
2366     /**
2367      * DTBSV solves one of the systems of equations
2368      * A*x = b   or   A**T*x = b
2369      *
2370      * Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html
2371      *
2372      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2373      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2374      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2375      *           for i in range(0, n):
2376      *              for j in range(i, min(i+k+1, n)):
2377      *                  b[i, j-i] = a[i, j]
2378      *
2379      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2380      * @param TransA The type of transpose applied to matrix A.
2381      * @param Diag Specifies whether or not A is unit triangular.
2382      * @param K The number of off-diagonals of the matrix A
2383      * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2384      * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2385      * @param incX The increment for the elements of vector x, must be larger than zero.
2386      */
2387     void DTBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2388                int K, sp<Allocation> A, sp<Allocation> X, int incX);
2389 
2390     /**
2391      * CTBSV solves one of the systems of equations
2392      * A*x = b   or   A**T*x = b   or   A**H*x = b
2393      *
2394      * Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html
2395      *
2396      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2397      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2398      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2399      *           for i in range(0, n):
2400      *              for j in range(i, min(i+k+1, n)):
2401      *                  b[i, j-i] = a[i, j]
2402      *
2403      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2404      * @param TransA The type of transpose applied to matrix A.
2405      * @param Diag Specifies whether or not A is unit triangular.
2406      * @param K The number of off-diagonals of the matrix A
2407      * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2408      * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2409      * @param incX The increment for the elements of vector x, must be larger than zero.
2410      */
2411     void CTBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2412                int K, sp<Allocation> A, sp<Allocation> X, int incX);
2413 
2414     /**
2415      * ZTBSV solves one of the systems of equations
2416      * A*x = b   or   A**T*x = b   or   A**H*x = b
2417      *
2418      * Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html
2419      *
2420      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2421      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2422      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2423      *           for i in range(0, n):
2424      *              for j in range(i, min(i+k+1, n)):
2425      *                  b[i, j-i] = a[i, j]
2426      *
2427      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2428      * @param TransA The type of transpose applied to matrix A.
2429      * @param Diag Specifies whether or not A is unit triangular.
2430      * @param K The number of off-diagonals of the matrix A
2431      * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
2432      * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2433      * @param incX The increment for the elements of vector x, must be larger than zero.
2434      */
2435     void ZTBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2436                int K, sp<Allocation> A, sp<Allocation> X, int incX);
2437 
2438     /**
2439      * STPSV solves one of the systems of equations
2440      * A*x = b   or   A**T*x = b
2441      *
2442      * Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html
2443      *
2444      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2445      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2446      *       'a' to packed matrix 'b'.
2447      *           k = 0
2448      *           for i in range(0, n):
2449      *              for j in range(i, n):
2450      *                  b[k++] = a[i, j]
2451      *
2452      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2453      * @param TransA The type of transpose applied to matrix A.
2454      * @param Diag Specifies whether or not A is unit triangular.
2455      * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F32}.
2456      * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2457      * @param incX The increment for the elements of vector x, must be larger than zero.
2458      */
2459     void STPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2460                sp<Allocation> Ap, sp<Allocation> X, int incX);
2461 
2462     /**
2463      * DTPSV solves one of the systems of equations
2464      * A*x = b   or   A**T*x = b
2465      *
2466      * Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html
2467      *
2468      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2469      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2470      *       'a' to packed matrix 'b'.
2471      *           k = 0
2472      *           for i in range(0, n):
2473      *              for j in range(i, n):
2474      *                  b[k++] = a[i, j]
2475      *
2476      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2477      * @param TransA The type of transpose applied to matrix A.
2478      * @param Diag Specifies whether or not A is unit triangular.
2479      * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F64}.
2480      * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2481      * @param incX The increment for the elements of vector x, must be larger than zero.
2482      */
2483     void DTPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2484                sp<Allocation> Ap, sp<Allocation> X, int incX);
2485 
2486     /**
2487      * CTPSV solves one of the systems of equations
2488      * A*x = b   or   A**T*x = b   or   A**H*x = b
2489      *
2490      * Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html
2491      *
2492      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2493      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2494      *       'a' to packed matrix 'b'.
2495      *           k = 0
2496      *           for i in range(0, n):
2497      *              for j in range(i, n):
2498      *                  b[k++] = a[i, j]
2499      *
2500      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2501      * @param TransA The type of transpose applied to matrix A.
2502      * @param Diag Specifies whether or not A is unit triangular.
2503      * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F32_2}.
2504      * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2505      * @param incX The increment for the elements of vector x, must be larger than zero.
2506      */
2507     void CTPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2508                sp<Allocation> Ap, sp<Allocation> X, int incX);
2509 
2510     /**
2511      * ZTPSV solves one of the systems of equations
2512      * A*x = b   or   A**T*x = b   or   A**H*x = b
2513      *
2514      * Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html
2515      *
2516      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2517      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2518      *       'a' to packed matrix 'b'.
2519      *           k = 0
2520      *           for i in range(0, n):
2521      *              for j in range(i, n):
2522      *                  b[k++] = a[i, j]
2523      *
2524      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
2525      * @param TransA The type of transpose applied to matrix A.
2526      * @param Diag Specifies whether or not A is unit triangular.
2527      * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F64_2}.
2528      * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
2529      * @param incX The increment for the elements of vector x, must be larger than zero.
2530      */
2531     void ZTPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
2532                sp<Allocation> Ap, sp<Allocation> X, int incX);
2533 
2534     /**
2535      * SSYMV performs the matrix-vector operation
2536      * y := alpha*A*x + beta*y
2537      *
2538      * Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html
2539      *
2540      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2541      * @param alpha The scalar alpha.
2542      * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2543      * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2544      * @param incX The increment for the elements of vector x, must be larger than zero.
2545      * @param beta The scalar beta.
2546      * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2547      * @param incY The increment for the elements of vector y, must be larger than zero.
2548      */
2549     void SSYMV(RsBlasUplo Uplo, float alpha, sp<Allocation> A, sp<Allocation> X,
2550                int incX, float beta, sp<Allocation> Y, int incY);
2551 
2552     /**
2553      * SSBMV performs the matrix-vector operation
2554      * y := alpha*A*x + beta*y
2555      *
2556      * Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html
2557      *
2558      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2559      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2560      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2561      *           for i in range(0, n):
2562      *              for j in range(i, min(i+k+1, n)):
2563      *                  b[i, j-i] = a[i, j]
2564      *
2565      * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2566      * @param K The number of off-diagonals of the matrix A
2567      * @param alpha The scalar alpha.
2568      * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2569      * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2570      * @param incX The increment for the elements of vector x, must be larger than zero.
2571      * @param beta The scalar beta.
2572      * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2573      * @param incY The increment for the elements of vector y, must be larger than zero.
2574      */
2575     void SSBMV(RsBlasUplo Uplo, int K, float alpha, sp<Allocation> A, sp<Allocation> X,
2576                int incX, float beta, sp<Allocation> Y, int incY);
2577 
2578     /**
2579      * SSPMV performs the matrix-vector operation
2580      * y := alpha*A*x + beta*y
2581      *
2582      * Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html
2583      *
2584      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2585      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2586      *       'a' to packed matrix 'b'.
2587      *           k = 0
2588      *           for i in range(0, n):
2589      *              for j in range(i, n):
2590      *                  b[k++] = a[i, j]
2591      *
2592      * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2593      * @param alpha The scalar alpha.
2594      * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32}.
2595      * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2596      * @param incX The increment for the elements of vector x, must be larger than zero.
2597      * @param beta The scalar beta.
2598      * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2599      * @param incY The increment for the elements of vector y, must be larger than zero.
2600      */
2601     void SSPMV(RsBlasUplo Uplo, float alpha, sp<Allocation> Ap, sp<Allocation> X,
2602                int incX, float beta, sp<Allocation> Y, int incY);
2603 
2604     /**
2605      * SGER performs the rank 1 operation
2606      * A := alpha*x*y**T + A
2607      *
2608      * Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html
2609      *
2610      * @param alpha The scalar alpha.
2611      * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2612      * @param incX The increment for the elements of vector x, must be larger than zero.
2613      * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2614      * @param incY The increment for the elements of vector y, must be larger than zero.
2615      * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2616      */
2617     void SGER(float alpha, sp<Allocation> X, int incX, sp<Allocation> Y, int incY, sp<Allocation> A);
2618 
2619     /**
2620      * SSYR performs the rank 1 operation
2621      * A := alpha*x*x**T + A
2622      *
2623      * Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html
2624      *
2625      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2626      * @param alpha The scalar alpha.
2627      * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2628      * @param incX The increment for the elements of vector x, must be larger than zero.
2629      * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2630      */
2631     void SSYR(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX, sp<Allocation> A);
2632 
2633     /**
2634      * SSPR performs the rank 1 operation
2635      * A := alpha*x*x**T + A
2636      *
2637      * Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html
2638      *
2639      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2640      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2641      *       'a' to packed matrix 'b'.
2642      *           k = 0
2643      *           for i in range(0, n):
2644      *              for j in range(i, n):
2645      *                  b[k++] = a[i, j]
2646      *
2647      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2648      * @param alpha The scalar alpha.
2649      * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2650      * @param incX The increment for the elements of vector x, must be larger than zero.
2651      * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32}.
2652      */
2653     void SSPR(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX, sp<Allocation> Ap);
2654 
2655     /**
2656      * SSYR2 performs the symmetric rank 2 operation
2657      * A := alpha*x*y**T + alpha*y*x**T + A
2658      *
2659      * Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html
2660      *
2661      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2662      * @param alpha The scalar alpha.
2663      * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2664      * @param incX The increment for the elements of vector x, must be larger than zero.
2665      * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2666      * @param incY The increment for the elements of vector y, must be larger than zero.
2667      * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
2668      */
2669     void SSYR2(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX,
2670                sp<Allocation> Y, int incY, sp<Allocation> A);
2671 
2672     /**
2673      * SSPR2 performs the symmetric rank 2 operation
2674      * A := alpha*x*y**T + alpha*y*x**T + A
2675      *
2676      * Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html
2677      *
2678      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2679      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2680      *       'a' to packed matrix 'b'.
2681      *           k = 0
2682      *           for i in range(0, n):
2683      *              for j in range(i, n):
2684      *                  b[k++] = a[i, j]
2685      *
2686      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2687      * @param alpha The scalar alpha.
2688      * @param X The input allocation contains vector x, supported elements type: {Element#F32}.
2689      * @param incX The increment for the elements of vector x, must be larger than zero.
2690      * @param Y The input allocation contains vector y, supported elements type: {Element#F32}.
2691      * @param incY The increment for the elements of vector y, must be larger than zero.
2692      * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32}.
2693      */
2694     void SSPR2(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX,
2695                sp<Allocation> Y, int incY, sp<Allocation> Ap);
2696 
2697     /**
2698      * DSYMV performs the matrix-vector operation
2699      * y := alpha*A*x + beta*y
2700      *
2701      * Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html
2702      *
2703      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2704      * @param alpha The scalar alpha.
2705      * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2706      * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2707      * @param incX The increment for the elements of vector x, must be larger than zero.
2708      * @param beta The scalar beta.
2709      * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2710      * @param incY The increment for the elements of vector y, must be larger than zero.
2711      */
2712     void DSYMV(RsBlasUplo Uplo, double alpha, sp<Allocation> A, sp<Allocation> X, int incX,
2713                double beta, sp<Allocation> Y, int incY);
2714 
2715     /**
2716      * DSBMV performs the matrix-vector operation
2717      * y := alpha*A*x + beta*y
2718      *
2719      * Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html
2720      *
2721      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2722      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2723      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2724      *           for i in range(0, n):
2725      *              for j in range(i, min(i+k+1, n)):
2726      *                  b[i, j-i] = a[i, j]
2727      *
2728      * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2729      * @param K The number of off-diagonals of the matrix A
2730      * @param alpha The scalar alpha.
2731      * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2732      * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2733      * @param incX The increment for the elements of vector x, must be larger than zero.
2734      * @param beta The scalar beta.
2735      * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2736      * @param incY The increment for the elements of vector y, must be larger than zero.
2737      */
2738     void DSBMV(RsBlasUplo Uplo, int K, double alpha, sp<Allocation> A, sp<Allocation> X, int incX,
2739                double beta, sp<Allocation> Y, int incY);
2740 
2741     /**
2742      * DSPMV performs the matrix-vector operation
2743      * y := alpha*A*x + beta*y
2744      *
2745      * Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html
2746      *
2747      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2748      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2749      *       'a' to packed matrix 'b'.
2750      *           k = 0
2751      *           for i in range(0, n):
2752      *              for j in range(i, n):
2753      *                  b[k++] = a[i, j]
2754      *
2755      * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2756      * @param alpha The scalar alpha.
2757      * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64}.
2758      * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2759      * @param incX The increment for the elements of vector x, must be larger than zero.
2760      * @param beta The scalar beta.
2761      * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2762      * @param incY The increment for the elements of vector y, must be larger than zero.
2763      */
2764     void DSPMV(RsBlasUplo Uplo, double alpha, sp<Allocation> Ap, sp<Allocation> X, int incX,
2765                double beta, sp<Allocation> Y, int incY);
2766 
2767     /**
2768      * DGER performs the rank 1 operation
2769      * A := alpha*x*y**T + A
2770      *
2771      * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html
2772      *
2773      * @param alpha The scalar alpha.
2774      * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2775      * @param incX The increment for the elements of vector x, must be larger than zero.
2776      * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2777      * @param incY The increment for the elements of vector y, must be larger than zero.
2778      * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2779      */
2780     void DGER(double alpha, sp<Allocation> X, int incX, sp<Allocation> Y, int incY, sp<Allocation> A);
2781 
2782     /**
2783      * DSYR performs the rank 1 operation
2784      * A := alpha*x*x**T + A
2785      *
2786      * Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html
2787      *
2788      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2789      * @param alpha The scalar alpha.
2790      * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2791      * @param incX The increment for the elements of vector x, must be larger than zero.
2792      * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2793      */
2794     void DSYR(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX, sp<Allocation> A);
2795 
2796     /**
2797      * DSPR performs the rank 1 operation
2798      * A := alpha*x*x**T + A
2799      *
2800      * Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html
2801      *
2802      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2803      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2804      *       'a' to packed matrix 'b'.
2805      *           k = 0
2806      *           for i in range(0, n):
2807      *              for j in range(i, n):
2808      *                  b[k++] = a[i, j]
2809      *
2810      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2811      * @param alpha The scalar alpha.
2812      * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2813      * @param incX The increment for the elements of vector x, must be larger than zero.
2814      * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64}.
2815      */
2816     void DSPR(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX, sp<Allocation> Ap);
2817 
2818     /**
2819      * DSYR2 performs the symmetric rank 2 operation
2820      * A := alpha*x*y**T + alpha*y*x**T + A
2821      *
2822      * Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html
2823      *
2824      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2825      * @param alpha The scalar alpha.
2826      * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2827      * @param incX The increment for the elements of vector x, must be larger than zero.
2828      * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2829      * @param incY The increment for the elements of vector y, must be larger than zero.
2830      * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
2831      */
2832     void DSYR2(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX,
2833                sp<Allocation> Y, int incY, sp<Allocation> A);
2834 
2835     /**
2836      * DSPR2 performs the symmetric rank 2 operation
2837      * A := alpha*x*y**T + alpha*y*x**T + A
2838      *
2839      * Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html
2840      *
2841      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2842      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2843      *       'a' to packed matrix 'b'.
2844      *           k = 0
2845      *           for i in range(0, n):
2846      *              for j in range(i, n):
2847      *                  b[k++] = a[i, j]
2848      *
2849      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2850      * @param alpha The scalar alpha.
2851      * @param X The input allocation contains vector x, supported elements type: {Element#F64}.
2852      * @param incX The increment for the elements of vector x, must be larger than zero.
2853      * @param Y The input allocation contains vector y, supported elements type: {Element#F64}.
2854      * @param incY The increment for the elements of vector y, must be larger than zero.
2855      * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64}.
2856      */
2857     void DSPR2(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX,
2858                sp<Allocation> Y, int incY, sp<Allocation> Ap);
2859 
2860     /**
2861      * CHEMV performs the matrix-vector operation
2862      * y := alpha*A*x + beta*y
2863      *
2864      * Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html
2865      *
2866      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2867      * @param alpha The scalar alpha.
2868      * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2869      * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2870      * @param incX The increment for the elements of vector x, must be larger than zero.
2871      * @param beta The scalar beta.
2872      * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2873      * @param incY The increment for the elements of vector y, must be larger than zero.
2874      */
2875     void CHEMV(RsBlasUplo Uplo, Float2 alpha, sp<Allocation> A, sp<Allocation> X,
2876                int incX, Float2 beta, sp<Allocation> Y, int incY);
2877 
2878     /**
2879      * CHBMV performs the matrix-vector operation
2880      * y := alpha*A*x + beta*y
2881      *
2882      * Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html
2883      *
2884      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2885      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2886      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2887      *           for i in range(0, n):
2888      *              for j in range(i, min(i+k+1, n)):
2889      *                  b[i, j-i] = a[i, j]
2890      *
2891      * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2892      * @param K The number of off-diagonals of the matrix A
2893      * @param alpha The scalar alpha.
2894      * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2895      * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2896      * @param incX The increment for the elements of vector x, must be larger than zero.
2897      * @param beta The scalar beta.
2898      * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2899      * @param incY The increment for the elements of vector y, must be larger than zero.
2900      */
2901     void CHBMV(RsBlasUplo Uplo, int K, Float2 alpha, sp<Allocation> A, sp<Allocation> X,
2902                int incX, Float2 beta, sp<Allocation> Y, int incY);
2903 
2904     /**
2905      * CHPMV performs the matrix-vector operation
2906      * y := alpha*A*x + beta*y
2907      *
2908      * Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html
2909      *
2910      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2911      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2912      *       'a' to packed matrix 'b'.
2913      *           k = 0
2914      *           for i in range(0, n):
2915      *              for j in range(i, n):
2916      *                  b[k++] = a[i, j]
2917      *
2918      * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2919      * @param alpha The scalar alpha.
2920      * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2921      * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2922      * @param incX The increment for the elements of vector x, must be larger than zero.
2923      * @param beta The scalar beta.
2924      * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2925      * @param incY The increment for the elements of vector y, must be larger than zero.
2926      */
2927     void CHPMV(RsBlasUplo Uplo, Float2 alpha, sp<Allocation> Ap, sp<Allocation> X,
2928                int incX, Float2 beta, sp<Allocation> Y, int incY);
2929 
2930     /**
2931      * CGERU performs the rank 1 operation
2932      * A := alpha*x*y**T + A
2933      *
2934      * Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html
2935      *
2936      * @param alpha The scalar alpha.
2937      * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2938      * @param incX The increment for the elements of vector x, must be larger than zero.
2939      * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2940      * @param incY The increment for the elements of vector y, must be larger than zero.
2941      * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2942      */
2943     void CGERU(Float2 alpha, sp<Allocation> X, int incX,
2944                sp<Allocation> Y, int incY, sp<Allocation> A);
2945 
2946     /**
2947      * CGERC performs the rank 1 operation
2948      * A := alpha*x*y**H + A
2949      *
2950      * Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html
2951      *
2952      * @param alpha The scalar alpha.
2953      * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2954      * @param incX The increment for the elements of vector x, must be larger than zero.
2955      * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
2956      * @param incY The increment for the elements of vector y, must be larger than zero.
2957      * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2958      */
2959     void CGERC(Float2 alpha, sp<Allocation> X, int incX,
2960                sp<Allocation> Y, int incY, sp<Allocation> A);
2961 
2962     /**
2963      * CHER performs the rank 1 operation
2964      * A := alpha*x*x**H + A
2965      *
2966      * Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html
2967      *
2968      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2969      * @param alpha The scalar alpha.
2970      * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2971      * @param incX The increment for the elements of vector x, must be larger than zero.
2972      * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2973      */
2974     void CHER(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX, sp<Allocation> A);
2975 
2976     /**
2977      * CHPR performs the rank 1 operation
2978      * A := alpha*x*x**H + A
2979      *
2980      * Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html
2981      *
2982      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2983      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2984      *       'a' to packed matrix 'b'.
2985      *           k = 0
2986      *           for i in range(0, n):
2987      *              for j in range(i, n):
2988      *                  b[k++] = a[i, j]
2989      *
2990      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2991      * @param alpha The scalar alpha.
2992      * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
2993      * @param incX The increment for the elements of vector x, must be larger than zero.
2994      * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32_2}.
2995      */
2996     void CHPR(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX, sp<Allocation> Ap);
2997 
2998     /**
2999      * CHER2 performs the symmetric rank 2 operation
3000      * A := alpha*x*y**H + alpha*y*x**H + A
3001      *
3002      * Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html
3003      *
3004      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3005      * @param alpha The scalar alpha.
3006      * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
3007      * @param incX The increment for the elements of vector x, must be larger than zero.
3008      * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
3009      * @param incY The increment for the elements of vector y, must be larger than zero.
3010      * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3011      */
3012     void CHER2(RsBlasUplo Uplo, Float2 alpha, sp<Allocation> X, int incX,
3013                sp<Allocation> Y, int incY, sp<Allocation> A);
3014 
3015     /**
3016      * CHPR2 performs the symmetric rank 2 operation
3017      * A := alpha*x*y**H + alpha*y*x**H + A
3018      *
3019      * Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html
3020      *
3021      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
3022      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
3023      *       'a' to packed matrix 'b'.
3024      *           k = 0
3025      *           for i in range(0, n):
3026      *              for j in range(i, n):
3027      *                  b[k++] = a[i, j]
3028      *
3029      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
3030      * @param alpha The scalar alpha.
3031      * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}.
3032      * @param incX The increment for the elements of vector x, must be larger than zero.
3033      * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}.
3034      * @param incY The increment for the elements of vector y, must be larger than zero.
3035      * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3036      */
3037     void CHPR2(RsBlasUplo Uplo, Float2 alpha, sp<Allocation> X, int incX,
3038                sp<Allocation> Y, int incY, sp<Allocation> Ap);
3039 
3040     /**
3041      * ZHEMV performs the matrix-vector operation
3042      * y := alpha*A*x + beta*y
3043      *
3044      * Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html
3045      *
3046      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3047      * @param alpha The scalar alpha.
3048      * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3049      * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3050      * @param incX The increment for the elements of vector x, must be larger than zero.
3051      * @param beta The scalar beta.
3052      * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3053      * @param incY The increment for the elements of vector y, must be larger than zero.
3054      */
3055     void ZHEMV(RsBlasUplo Uplo, Double2 alpha, sp<Allocation> A, sp<Allocation> X,
3056                int incX, Double2 beta, sp<Allocation> Y, int incY);
3057 
3058     /**
3059      * ZHBMV performs the matrix-vector operation
3060      * y := alpha*A*x + beta*y
3061      *
3062      * Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html
3063      *
3064      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
3065      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
3066      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
3067      *           for i in range(0, n):
3068      *              for j in range(i, min(i+k+1, n)):
3069      *                  b[i, j-i] = a[i, j]
3070      *
3071      * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
3072      * @param K The number of off-diagonals of the matrix A
3073      * @param alpha The scalar alpha.
3074      * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3075      * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3076      * @param incX The increment for the elements of vector x, must be larger than zero.
3077      * @param beta The scalar beta.
3078      * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3079      * @param incY The increment for the elements of vector y, must be larger than zero.
3080      */
3081     void ZHBMV(RsBlasUplo Uplo, int K, Double2 alpha, sp<Allocation> A, sp<Allocation> X,
3082                int incX, Double2 beta, sp<Allocation> Y, int incY);
3083 
3084     /**
3085      * ZHPMV performs the matrix-vector operation
3086      * y := alpha*A*x + beta*y
3087      *
3088      * Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html
3089      *
3090      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
3091      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
3092      *       'a' to packed matrix 'b'.
3093      *           k = 0
3094      *           for i in range(0, n):
3095      *              for j in range(i, n):
3096      *                  b[k++] = a[i, j]
3097      *
3098      * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
3099      * @param alpha The scalar alpha.
3100      * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3101      * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3102      * @param incX The increment for the elements of vector x, must be larger than zero.
3103      * @param beta The scalar beta.
3104      * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3105      * @param incY The increment for the elements of vector y, must be larger than zero.
3106      */
3107     void ZHPMV(RsBlasUplo Uplo, Double2 alpha, sp<Allocation> Ap, sp<Allocation> X,
3108                int incX, Double2 beta, sp<Allocation> Y, int incY);
3109 
3110     /**
3111      * ZGERU performs the rank 1 operation
3112      * A := alpha*x*y**T + A
3113      *
3114      * Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html
3115      *
3116      * @param alpha The scalar alpha.
3117      * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3118      * @param incX The increment for the elements of vector x, must be larger than zero.
3119      * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3120      * @param incY The increment for the elements of vector y, must be larger than zero.
3121      * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3122      */
3123     void ZGERU(Double2 alpha, sp<Allocation> X, int incX,
3124                sp<Allocation> Y, int incY, sp<Allocation> A);
3125 
3126     /**
3127      * ZGERC performs the rank 1 operation
3128      * A := alpha*x*y**H + A
3129      *
3130      * Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html
3131      *
3132      * @param alpha The scalar alpha.
3133      * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3134      * @param incX The increment for the elements of vector x, must be larger than zero.
3135      * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3136      * @param incY The increment for the elements of vector y, must be larger than zero.
3137      * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3138      */
3139     void ZGERC(Double2 alpha, sp<Allocation> X, int incX,
3140                sp<Allocation> Y, int incY, sp<Allocation> A);
3141 
3142     /**
3143      * ZHER performs the rank 1 operation
3144      * A := alpha*x*x**H + A
3145      *
3146      * Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html
3147      *
3148      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3149      * @param alpha The scalar alpha.
3150      * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3151      * @param incX The increment for the elements of vector x, must be larger than zero.
3152      * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3153      */
3154     void ZHER(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX, sp<Allocation> A);
3155 
3156     /**
3157      * ZHPR performs the rank 1 operation
3158      * A := alpha*x*x**H + A
3159      *
3160      * Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html
3161      *
3162      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
3163      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
3164      *       'a' to packed matrix 'b'.
3165      *           k = 0
3166      *           for i in range(0, n):
3167      *              for j in range(i, n):
3168      *                  b[k++] = a[i, j]
3169      *
3170      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
3171      * @param alpha The scalar alpha.
3172      * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3173      * @param incX The increment for the elements of vector x, must be larger than zero.
3174      * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3175      */
3176     void ZHPR(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX, sp<Allocation> Ap);
3177 
3178     /**
3179      * ZHER2 performs the symmetric rank 2 operation
3180      * A := alpha*x*y**H + alpha*y*x**H + A
3181      *
3182      * Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html
3183      *
3184      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3185      * @param alpha The scalar alpha.
3186      * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3187      * @param incX The increment for the elements of vector x, must be larger than zero.
3188      * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3189      * @param incY The increment for the elements of vector y, must be larger than zero.
3190      * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3191      */
3192     void ZHER2(RsBlasUplo Uplo, Double2 alpha, sp<Allocation> X, int incX,
3193                sp<Allocation> Y, int incY, sp<Allocation> A);
3194 
3195     /**
3196      * ZHPR2 performs the symmetric rank 2 operation
3197      * A := alpha*x*y**H + alpha*y*x**H + A
3198      *
3199      * Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html
3200      *
3201      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
3202      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
3203      *       'a' to packed matrix 'b'.
3204      *           k = 0
3205      *           for i in range(0, n):
3206      *              for j in range(i, n):
3207      *                  b[k++] = a[i, j]
3208      *
3209      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
3210      * @param alpha The scalar alpha.
3211      * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}.
3212      * @param incX The increment for the elements of vector x, must be larger than zero.
3213      * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}.
3214      * @param incY The increment for the elements of vector y, must be larger than zero.
3215      * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3216      */
3217     void ZHPR2(RsBlasUplo Uplo, Double2 alpha, sp<Allocation> X, int incX,
3218                sp<Allocation> Y, int incY, sp<Allocation> Ap);
3219 
3220     /**
3221      * SGEMM performs one of the matrix-matrix operations
3222      * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T
3223      *
3224      * Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html
3225      *
3226      * @param TransA The type of transpose applied to matrix A.
3227      * @param TransB The type of transpose applied to matrix B.
3228      * @param alpha The scalar alpha.
3229      * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3230      * @param B The input allocation contains matrix B, supported elements type: {Element#F32}.
3231      * @param beta The scalar beta.
3232      * @param C The input allocation contains matrix C, supported elements type: {Element#F32}.
3233      */
3234     void SGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, float alpha, sp<Allocation> A,
3235                       sp<Allocation> B, float beta, sp<Allocation> C);
3236 
3237 
3238     /**
3239      * DGEMM performs one of the matrix-matrix operations
3240      * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T
3241      *
3242      * Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html
3243      *
3244      * @param TransA The type of transpose applied to matrix A.
3245      * @param TransB The type of transpose applied to matrix B.
3246      * @param alpha The scalar alpha.
3247      * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3248      * @param B The input allocation contains matrix B, supported elements type: {Element#F64}.
3249      * @param beta The scalar beta.
3250      * @param C The input allocation contains matrix C, supported elements type: {Element#F64}.
3251      */
3252     void DGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, double alpha, sp<Allocation> A,
3253                       sp<Allocation> B, double beta, sp<Allocation> C);
3254 
3255     /**
3256      * CGEMM performs one of the matrix-matrix operations
3257      * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T  or  op(X) = X**H
3258      *
3259      * Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html
3260      *
3261      * @param TransA The type of transpose applied to matrix A.
3262      * @param TransB The type of transpose applied to matrix B.
3263      * @param alpha The scalar alpha.
3264      * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3265      * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3266      * @param beta The scalar beta.
3267      * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3268      */
3269     void CGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, Float2 alpha, sp<Allocation> A,
3270                       sp<Allocation> B, Float2 beta, sp<Allocation> C);
3271 
3272     /**
3273      * ZGEMM performs one of the matrix-matrix operations
3274      * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T  or  op(X) = X**H
3275      *
3276      * Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html
3277      *
3278      * @param TransA The type of transpose applied to matrix A.
3279      * @param TransB The type of transpose applied to matrix B.
3280      * @param alpha The scalar alpha.
3281      * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3282      * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3283      * @param beta The scalar beta.
3284      * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3285      */
3286     void ZGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, Double2 alpha, sp<Allocation> A,
3287                       sp<Allocation> B, Double2 beta, sp<Allocation> C);
3288 
3289     /**
3290      * SSYMM performs one of the matrix-matrix operations
3291      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3292      *
3293      * Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html
3294      *
3295      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3296      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3297      * @param alpha The scalar alpha.
3298      * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3299      * @param B The input allocation contains matrix B, supported elements type: {Element#F32}.
3300      * @param beta The scalar beta.
3301      * @param C The input allocation contains matrix C, supported elements type: {Element#F32}.
3302      */
3303     void SSYMM(RsBlasSide Side, RsBlasUplo Uplo, float alpha, sp<Allocation> A,
3304                       sp<Allocation> B, float beta, sp<Allocation> C);
3305 
3306     /**
3307      * DSYMM performs one of the matrix-matrix operations
3308      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3309      *
3310      * Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html
3311      *
3312      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3313      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3314      * @param alpha The scalar alpha.
3315      * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3316      * @param B The input allocation contains matrix B, supported elements type: {Element#F64}.
3317      * @param beta The scalar beta.
3318      * @param C The input allocation contains matrix C, supported elements type: {Element#F64}.
3319      */
3320     void DSYMM(RsBlasSide Side, RsBlasUplo Uplo, double alpha, sp<Allocation> A,
3321                       sp<Allocation> B, double beta, sp<Allocation> C);
3322 
3323     /**
3324      * CSYMM performs one of the matrix-matrix operations
3325      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3326      *
3327      * Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html
3328      *
3329      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3330      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3331      * @param alpha The scalar alpha.
3332      * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3333      * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3334      * @param beta The scalar beta.
3335      * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3336      */
3337     void CSYMM(RsBlasSide Side, RsBlasUplo Uplo, Float2 alpha, sp<Allocation> A,
3338                       sp<Allocation> B, Float2 beta, sp<Allocation> C);
3339 
3340     /**
3341      * ZSYMM performs one of the matrix-matrix operations
3342      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3343      *
3344      * Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html
3345      *
3346      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3347      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3348      * @param alpha The scalar alpha.
3349      * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3350      * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3351      * @param beta The scalar beta.
3352      * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3353      */
3354     void ZSYMM(RsBlasSide Side, RsBlasUplo Uplo, Double2 alpha, sp<Allocation> A,
3355                       sp<Allocation> B, Double2 beta, sp<Allocation> C);
3356 
3357     /**
3358      * SSYRK performs one of the symmetric rank k operations
3359      * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3360      *
3361      * Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html
3362      *
3363      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3364      * @param Trans The type of transpose applied to the operation.
3365      * @param alpha The scalar alpha.
3366      * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3367      * @param beta The scalar beta.
3368      * @param C The input allocation contains matrix C, supported elements type: {Element#F32}.
3369      */
3370     void SSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, float alpha,
3371                sp<Allocation> A, float beta, sp<Allocation> C);
3372 
3373     /**
3374      * DSYRK performs one of the symmetric rank k operations
3375      * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3376      *
3377      * Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html
3378      *
3379      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3380      * @param Trans The type of transpose applied to the operation.
3381      * @param alpha The scalar alpha.
3382      * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3383      * @param beta The scalar beta.
3384      * @param C The input allocation contains matrix C, supported elements type: {Element#F64}.
3385      */
3386     void DSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, double alpha,
3387                sp<Allocation> A, double beta, sp<Allocation> C);
3388 
3389     /**
3390      * CSYRK performs one of the symmetric rank k operations
3391      * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3392      *
3393      * Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html
3394      *
3395      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3396      * @param Trans The type of transpose applied to the operation.
3397      * @param alpha The scalar alpha.
3398      * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3399      * @param beta The scalar beta.
3400      * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3401      */
3402     void CSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, Float2 alpha,
3403                sp<Allocation> A, Float2 beta, sp<Allocation> C);
3404 
3405     /**
3406      * ZSYRK performs one of the symmetric rank k operations
3407      * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
3408      *
3409      * Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html
3410      *
3411      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3412      * @param Trans The type of transpose applied to the operation.
3413      * @param alpha The scalar alpha.
3414      * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3415      * @param beta The scalar beta.
3416      * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3417      */
3418     void ZSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, Double2 alpha,
3419                sp<Allocation> A, Double2 beta, sp<Allocation> C);
3420 
3421     /**
3422      * SSYR2K performs one of the symmetric rank 2k operations
3423      * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3424      *
3425      * Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html
3426      *
3427      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3428      * @param Trans The type of transpose applied to the operation.
3429      * @param alpha The scalar alpha.
3430      * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3431      * @param B The input allocation contains matrix B, supported elements type: {Element#F32}.
3432      * @param beta The scalar beta.
3433      * @param C The input allocation contains matrix C, supported elements type: {Element#F32}.
3434      */
3435     void SSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, float alpha,
3436                 sp<Allocation> A, sp<Allocation> B, float beta, sp<Allocation> C);
3437 
3438     /**
3439      * DSYR2K performs one of the symmetric rank 2k operations
3440      * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3441      *
3442      * Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html
3443      *
3444      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3445      * @param Trans The type of transpose applied to the operation.
3446      * @param alpha The scalar alpha.
3447      * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3448      * @param B The input allocation contains matrix B, supported elements type: {Element#F64}.
3449      * @param beta The scalar beta.
3450      * @param C The input allocation contains matrix C, supported elements type: {Element#F64}.
3451      */
3452     void DSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, double alpha,
3453                 sp<Allocation> A, sp<Allocation> B, double beta, sp<Allocation> C);
3454 
3455     /**
3456      * CSYR2K performs one of the symmetric rank 2k operations
3457      * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3458      *
3459      * Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html
3460      *
3461      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3462      * @param Trans The type of transpose applied to the operation.
3463      * @param alpha The scalar alpha.
3464      * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3465      * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3466      * @param beta The scalar beta.
3467      * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3468      */
3469     void CSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Float2 alpha,
3470                 sp<Allocation> A, sp<Allocation> B, Float2 beta, sp<Allocation> C);
3471 
3472     /**
3473      * ZSYR2K performs one of the symmetric rank 2k operations
3474      * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
3475      *
3476      * Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html
3477      *
3478      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3479      * @param Trans The type of transpose applied to the operation.
3480      * @param alpha The scalar alpha.
3481      * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3482      * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3483      * @param beta The scalar beta.
3484      * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3485      */
3486     void ZSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Double2 alpha,
3487                 sp<Allocation> A, sp<Allocation> B, Double2 beta, sp<Allocation> C);
3488 
3489     /**
3490      * STRMM performs one of the matrix-matrix operations
3491      * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3492      * op(A) is one of  op(A) = A  or  op(A) = A**T
3493      *
3494      * Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html
3495      *
3496      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3497      * @param Uplo Specifies whether matrix A is upper or lower triangular.
3498      * @param TransA The type of transpose applied to matrix A.
3499      * @param Diag Specifies whether or not A is unit triangular.
3500      * @param alpha The scalar alpha.
3501      * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3502      * @param B The input allocation contains matrix B, supported elements type: {Element#F32}.
3503      */
3504     void STRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA,
3505                RsBlasDiag Diag, float alpha, sp<Allocation> A, sp<Allocation> B);
3506 
3507     /**
3508      * DTRMM performs one of the matrix-matrix operations
3509      * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3510      * op(A) is one of  op(A) = A  or  op(A) = A**T
3511      *
3512      * Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html
3513      *
3514      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3515      * @param Uplo Specifies whether matrix A is upper or lower triangular.
3516      * @param TransA The type of transpose applied to matrix A.
3517      * @param Diag Specifies whether or not A is unit triangular.
3518      * @param alpha The scalar alpha.
3519      * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3520      * @param B The input allocation contains matrix B, supported elements type: {Element#F64}.
3521      */
3522     void DTRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3523                double alpha, sp<Allocation> A, sp<Allocation> B);
3524 
3525     /**
3526      * CTRMM performs one of the matrix-matrix operations
3527      * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3528      * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3529      *
3530      * Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html
3531      *
3532      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3533      * @param Uplo Specifies whether matrix A is upper or lower triangular.
3534      * @param TransA The type of transpose applied to matrix A.
3535      * @param Diag Specifies whether or not A is unit triangular.
3536      * @param alpha The scalar alpha.
3537      * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3538      * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3539      */
3540     void CTRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3541                Float2 alpha, sp<Allocation> A, sp<Allocation> B);
3542 
3543     /**
3544      * ZTRMM performs one of the matrix-matrix operations
3545      * B := alpha*op(A)*B   or   B := alpha*B*op(A)
3546      * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3547      *
3548      * Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html
3549      *
3550      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3551      * @param Uplo Specifies whether matrix A is upper or lower triangular.
3552      * @param TransA The type of transpose applied to matrix A.
3553      * @param Diag Specifies whether or not A is unit triangular.
3554      * @param alpha The scalar alpha.
3555      * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3556      * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3557      */
3558     void ZTRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3559                Double2 alpha, sp<Allocation> A, sp<Allocation> B);
3560 
3561     /**
3562      * STRSM solves one of the matrix equations
3563      * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3564      * op(A) is one of  op(A) = A  or  op(A) = A**T
3565      *
3566      * Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html
3567      *
3568      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3569      * @param Uplo Specifies whether matrix A is upper or lower triangular.
3570      * @param TransA The type of transpose applied to matrix A.
3571      * @param Diag Specifies whether or not A is unit triangular.
3572      * @param alpha The scalar alpha.
3573      * @param A The input allocation contains matrix A, supported elements type: {Element#F32}.
3574      * @param B The input allocation contains matrix B, supported elements type: {Element#F32}.
3575      */
3576     void STRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3577                float alpha, sp<Allocation> A, sp<Allocation> B);
3578 
3579     /**
3580      * DTRSM solves one of the matrix equations
3581      * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3582      * op(A) is one of  op(A) = A  or  op(A) = A**T
3583      *
3584      * Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html
3585      *
3586      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3587      * @param Uplo Specifies whether matrix A is upper or lower triangular.
3588      * @param TransA The type of transpose applied to matrix A.
3589      * @param Diag Specifies whether or not A is unit triangular.
3590      * @param alpha The scalar alpha.
3591      * @param A The input allocation contains matrix A, supported elements type: {Element#F64}.
3592      * @param B The input allocation contains matrix B, supported elements type: {Element#F64}.
3593      */
3594     void DTRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3595                double alpha, sp<Allocation> A, sp<Allocation> B);
3596 
3597     /**
3598      * CTRSM solves one of the matrix equations
3599      * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3600      * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3601      *
3602      * Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html
3603      *
3604      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3605      * @param Uplo Specifies whether matrix A is upper or lower triangular.
3606      * @param TransA The type of transpose applied to matrix A.
3607      * @param Diag Specifies whether or not A is unit triangular.
3608      * @param alpha The scalar alpha.
3609      * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3610      * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3611      */
3612     void CTRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3613                Float2 alpha, sp<Allocation> A, sp<Allocation> B);
3614 
3615     /**
3616      * ZTRSM solves one of the matrix equations
3617      * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3618      * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3619      *
3620      * Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html
3621      *
3622      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3623      * @param Uplo Specifies whether matrix A is upper or lower triangular.
3624      * @param TransA The type of transpose applied to matrix A.
3625      * @param Diag Specifies whether or not A is unit triangular.
3626      * @param alpha The scalar alpha.
3627      * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3628      * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3629      */
3630     void ZTRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag,
3631                Double2 alpha, sp<Allocation> A, sp<Allocation> B);
3632 
3633     /**
3634      * CHEMM performs one of the matrix-matrix operations
3635      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3636      *
3637      * Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html
3638      *
3639      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3640      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3641      * @param alpha The scalar alpha.
3642      * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3643      * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3644      * @param beta The scalar beta.
3645      * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3646      */
3647     void CHEMM(RsBlasSide Side, RsBlasUplo Uplo, Float2 alpha, sp<Allocation> A,
3648                sp<Allocation> B, Float2 beta, sp<Allocation> C);
3649 
3650     /**
3651      * ZHEMM performs one of the matrix-matrix operations
3652      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3653      *
3654      * Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html
3655      *
3656      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3657      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3658      * @param alpha The scalar alpha.
3659      * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3660      * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3661      * @param beta The scalar beta.
3662      * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3663      */
3664     void ZHEMM(RsBlasSide Side, RsBlasUplo Uplo, Double2 alpha, sp<Allocation> A,
3665                sp<Allocation> B, Double2 beta, sp<Allocation> C);
3666 
3667     /**
3668      * CHERK performs one of the hermitian rank k operations
3669      * C := alpha*A*A**H + beta*C   or   C := alpha*A**H*A + beta*C
3670      *
3671      * Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html
3672      *
3673      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3674      * @param Trans The type of transpose applied to the operation.
3675      * @param alpha The scalar alpha.
3676      * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3677      * @param beta The scalar beta.
3678      * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3679      */
3680     void CHERK(RsBlasUplo Uplo, RsBlasTranspose Trans, float alpha, sp<Allocation> A,
3681                float beta, sp<Allocation> C);
3682 
3683     /**
3684      * ZHERK performs one of the hermitian rank k operations
3685      * C := alpha*A*A**H + beta*C   or   C := alpha*A**H*A + beta*C
3686      *
3687      * Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html
3688      *
3689      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3690      * @param Trans The type of transpose applied to the operation.
3691      * @param alpha The scalar alpha.
3692      * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3693      * @param beta The scalar beta.
3694      * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3695      */
3696     void ZHERK(RsBlasUplo Uplo, RsBlasTranspose Trans, double alpha, sp<Allocation> A,
3697                double beta, sp<Allocation> C);
3698 
3699     /**
3700      * CHER2K performs one of the hermitian rank 2k operations
3701      * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C   or   C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
3702      *
3703      * Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html
3704      *
3705      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3706      * @param Trans The type of transpose applied to the operation.
3707      * @param alpha The scalar alpha.
3708      * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}.
3709      * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}.
3710      * @param beta The scalar beta.
3711      * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}.
3712      */
3713     void CHER2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Float2 alpha, sp<Allocation> A,
3714                 sp<Allocation> B, float beta, sp<Allocation> C);
3715 
3716     /**
3717      * ZHER2K performs one of the hermitian rank 2k operations
3718      * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C   or   C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
3719      *
3720      * Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html
3721      *
3722      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3723      * @param Trans The type of transpose applied to the operation.
3724      * @param alpha The scalar alpha.
3725      * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}.
3726      * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}.
3727      * @param beta The scalar beta.
3728      * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}.
3729      */
3730     void ZHER2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Double2 alpha, sp<Allocation> A,
3731                 sp<Allocation> B, double beta, sp<Allocation> C);
3732 
3733     /**
3734      * 8-bit GEMM-like operation for neural networks: C = A * Transpose(B)
3735      * Calculations are done in 1.10.21 fixed-point format for the final output,
3736      * just before there's a shift down to drop the fractional parts. The output
3737      * values are gated to 0 to 255 to fit in a byte, but the 10-bit format
3738      * gives some headroom to avoid wrapping around on small overflows.
3739      *
3740      * @param A The input allocation contains matrix A, supported elements type: {Element#U8}.
3741      * @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset. Value should be from 0 to 255.
3742      * @param B The input allocation contains matrix B, supported elements type: {Element#U8}.
3743      * @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset. Value should be from 0 to 255.
3744      * @param C The input allocation contains matrix C, supported elements type: {Element#U8}.
3745      * @param c_offset The offset for all values in matrix C.
3746      * @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult.
3747      **/
3748     void BNNM(sp<Allocation> A, int a_offset, sp<Allocation> B, int b_offset, sp<Allocation> C,
3749               int c_offset, int c_mult);
3750 };
3751 
3752 /**
3753  * Intrinsic kernel for blending two Allocations.
3754  */
3755 class ScriptIntrinsicBlend : public ScriptIntrinsic {
3756  private:
3757     ScriptIntrinsicBlend(sp<RS> rs, sp<const Element> e);
3758  public:
3759     /**
3760      * Supported Element types are U8_4.
3761      * @param[in] rs RenderScript context
3762      * @param[in] e Element
3763      * @return new ScriptIntrinsicBlend
3764      */
3765     static sp<ScriptIntrinsicBlend> create(sp<RS> rs, sp<const Element> e);
3766     /**
3767      * sets dst = {0, 0, 0, 0}
3768      * @param[in] in input Allocation
3769      * @param[in] out output Allocation
3770      */
3771     void forEachClear(sp<Allocation> in, sp<Allocation> out);
3772     /**
3773      * Sets dst = src
3774      * @param[in] in input Allocation
3775      * @param[in] out output Allocation
3776      */
3777     void forEachSrc(sp<Allocation> in, sp<Allocation> out);
3778     /**
3779      * Sets dst = dst (NOP)
3780      * @param[in] in input Allocation
3781      * @param[in] out output Allocation
3782      */
3783     void forEachDst(sp<Allocation> in, sp<Allocation> out);
3784     /**
3785      * Sets dst = src + dst * (1.0 - src.a)
3786      * @param[in] in input Allocation
3787      * @param[in] out output Allocation
3788      */
3789     void forEachSrcOver(sp<Allocation> in, sp<Allocation> out);
3790     /**
3791      * Sets dst = dst + src * (1.0 - dst.a)
3792      * @param[in] in input Allocation
3793      * @param[in] out output Allocation
3794      */
3795     void forEachDstOver(sp<Allocation> in, sp<Allocation> out);
3796     /**
3797      * Sets dst = src * dst.a
3798      * @param[in] in input Allocation
3799      * @param[in] out output Allocation
3800      */
3801     void forEachSrcIn(sp<Allocation> in, sp<Allocation> out);
3802     /**
3803      * Sets dst = dst * src.a
3804      * @param[in] in input Allocation
3805      * @param[in] out output Allocation
3806      */
3807     void forEachDstIn(sp<Allocation> in, sp<Allocation> out);
3808     /**
3809      * Sets dst = src * (1.0 - dst.a)
3810      * @param[in] in input Allocation
3811      * @param[in] out output Allocation
3812      */
3813     void forEachSrcOut(sp<Allocation> in, sp<Allocation> out);
3814     /**
3815      * Sets dst = dst * (1.0 - src.a)
3816      * @param[in] in input Allocation
3817      * @param[in] out output Allocation
3818      */
3819     void forEachDstOut(sp<Allocation> in, sp<Allocation> out);
3820     /**
3821      * Sets dst.rgb = src.rgb * dst.a + (1.0 - src.a) * dst.rgb
3822      * @param[in] in input Allocation
3823      * @param[in] out output Allocation
3824      */
3825     void forEachSrcAtop(sp<Allocation> in, sp<Allocation> out);
3826     /**
3827      * Sets dst.rgb = dst.rgb * src.a + (1.0 - dst.a) * src.rgb
3828      * @param[in] in input Allocation
3829      * @param[in] out output Allocation
3830      */
3831     void forEachDstAtop(sp<Allocation> in, sp<Allocation> out);
3832     /**
3833      * Sets dst = {src.r ^ dst.r, src.g ^ dst.g, src.b ^ dst.b, src.a ^ dst.a}
3834      * @param[in] in input Allocation
3835      * @param[in] out output Allocation
3836      */
3837     void forEachXor(sp<Allocation> in, sp<Allocation> out);
3838     /**
3839      * Sets dst = src * dst
3840      * @param[in] in input Allocation
3841      * @param[in] out output Allocation
3842      */
3843     void forEachMultiply(sp<Allocation> in, sp<Allocation> out);
3844     /**
3845      * Sets dst = min(src + dst, 1.0)
3846      * @param[in] in input Allocation
3847      * @param[in] out output Allocation
3848      */
3849     void forEachAdd(sp<Allocation> in, sp<Allocation> out);
3850     /**
3851      * Sets dst = max(dst - src, 0.0)
3852      * @param[in] in input Allocation
3853      * @param[in] out output Allocation
3854      */
3855     void forEachSubtract(sp<Allocation> in, sp<Allocation> out);
3856 };
3857 
3858 /**
3859  * Intrinsic Gausian blur filter. Applies a Gaussian blur of the specified
3860  * radius to all elements of an Allocation.
3861  */
3862 class ScriptIntrinsicBlur : public ScriptIntrinsic {
3863  private:
3864     ScriptIntrinsicBlur(sp<RS> rs, sp<const Element> e);
3865  public:
3866     /**
3867      * Supported Element types are U8 and U8_4.
3868      * @param[in] rs RenderScript context
3869      * @param[in] e Element
3870      * @return new ScriptIntrinsicBlur
3871      */
3872     static sp<ScriptIntrinsicBlur> create(sp<RS> rs, sp<const Element> e);
3873     /**
3874      * Sets the input of the blur.
3875      * @param[in] in input Allocation
3876      */
3877     void setInput(sp<Allocation> in);
3878     /**
3879      * Runs the intrinsic.
3880      * @param[in] output Allocation
3881      */
3882     void forEach(sp<Allocation> out);
3883     /**
3884      * Sets the radius of the blur. The supported range is 0 < radius <= 25.
3885      * @param[in] radius radius of the blur
3886      */
3887     void setRadius(float radius);
3888 };
3889 
3890 /**
3891  * Intrinsic for applying a color matrix to allocations. This has the
3892  * same effect as loading each element and converting it to a
3893  * F32_N, multiplying the result by the 4x4 color matrix
3894  * as performed by rsMatrixMultiply() and writing it to the output
3895  * after conversion back to U8_N or F32_N.
3896  */
3897 class ScriptIntrinsicColorMatrix : public ScriptIntrinsic {
3898  private:
3899     ScriptIntrinsicColorMatrix(sp<RS> rs, sp<const Element> e);
3900  public:
3901     /**
3902      * Creates a new intrinsic.
3903      * @param[in] rs RenderScript context
3904      * @return new ScriptIntrinsicColorMatrix
3905      */
3906     static sp<ScriptIntrinsicColorMatrix> create(sp<RS> rs);
3907     /**
3908      * Applies the color matrix. Supported types are U8 and F32 with
3909      * vector lengths between 1 and 4.
3910      * @param[in] in input Allocation
3911      * @param[out] out output Allocation
3912      */
3913     void forEach(sp<Allocation> in, sp<Allocation> out);
3914     /**
3915      * Set the value to be added after the color matrix has been
3916      * applied. The default value is {0, 0, 0, 0}.
3917      * @param[in] add float[4] of values
3918      */
3919     void setAdd(float* add);
3920 
3921     /**
3922      * Set the color matrix which will be applied to each cell of the
3923      * image. The alpha channel will be copied.
3924      *
3925      * @param[in] m float[9] of values
3926      */
3927     void setColorMatrix3(float* m);
3928     /**
3929      * Set the color matrix which will be applied to each cell of the
3930      * image.
3931      *
3932      * @param[in] m float[16] of values
3933      */
3934     void setColorMatrix4(float* m);
3935     /**
3936      * Set a color matrix to convert from RGB to luminance. The alpha
3937      * channel will be a copy.
3938      */
3939     void setGreyscale();
3940     /**
3941      * Set the matrix to convert from RGB to YUV with a direct copy of
3942      * the 4th channel.
3943      */
3944     void setRGBtoYUV();
3945     /**
3946      * Set the matrix to convert from YUV to RGB with a direct copy of
3947      * the 4th channel.
3948      */
3949     void setYUVtoRGB();
3950 };
3951 
3952 /**
3953  * Intrinsic for applying a 3x3 convolve to an allocation.
3954  */
3955 class ScriptIntrinsicConvolve3x3 : public ScriptIntrinsic {
3956  private:
3957     ScriptIntrinsicConvolve3x3(sp<RS> rs, sp<const Element> e);
3958  public:
3959     /**
3960      * Supported types U8 and F32 with vector lengths between 1 and
3961      * 4. The default convolution kernel is the identity.
3962      * @param[in] rs RenderScript context
3963      * @param[in] e Element
3964      * @return new ScriptIntrinsicConvolve3x3
3965      */
3966     static sp<ScriptIntrinsicConvolve3x3> create(sp<RS> rs, sp<const Element> e);
3967     /**
3968      * Sets input for intrinsic.
3969      * @param[in] in input Allocation
3970      */
3971     void setInput(sp<Allocation> in);
3972     /**
3973      * Launches the intrinsic.
3974      * @param[in] out output Allocation
3975      */
3976     void forEach(sp<Allocation> out);
3977     /**
3978      * Sets convolution kernel.
3979      * @param[in] v float[9] of values
3980      */
3981     void setCoefficients(float* v);
3982 };
3983 
3984 /**
3985  * Intrinsic for applying a 5x5 convolve to an allocation.
3986  */
3987 class ScriptIntrinsicConvolve5x5 : public ScriptIntrinsic {
3988  private:
3989     ScriptIntrinsicConvolve5x5(sp<RS> rs, sp<const Element> e);
3990  public:
3991     /**
3992      * Supported types U8 and F32 with vector lengths between 1 and
3993      * 4. The default convolution kernel is the identity.
3994      * @param[in] rs RenderScript context
3995      * @param[in] e Element
3996      * @return new ScriptIntrinsicConvolve5x5
3997      */
3998     static sp<ScriptIntrinsicConvolve5x5> create(sp<RS> rs, sp<const Element> e);
3999     /**
4000      * Sets input for intrinsic.
4001      * @param[in] in input Allocation
4002      */
4003     void setInput(sp<Allocation> in);
4004     /**
4005      * Launches the intrinsic.
4006      * @param[in] out output Allocation
4007      */
4008     void forEach(sp<Allocation> out);
4009     /**
4010      * Sets convolution kernel.
4011      * @param[in] v float[25] of values
4012      */
4013     void setCoefficients(float* v);
4014 };
4015 
4016 /**
4017  * Intrinsic for computing a histogram.
4018  */
4019 class ScriptIntrinsicHistogram : public ScriptIntrinsic {
4020  private:
4021     ScriptIntrinsicHistogram(sp<RS> rs, sp<const Element> e);
4022     sp<Allocation> mOut;
4023  public:
4024     /**
4025      * Create an intrinsic for calculating the histogram of an uchar
4026      * or uchar4 image.
4027      *
4028      * Supported elements types are U8_4, U8_3, U8_2, and U8.
4029      *
4030      * @param[in] rs The RenderScript context
4031      * @param[in] e Element type for inputs
4032      *
4033      * @return ScriptIntrinsicHistogram
4034      */
4035     static sp<ScriptIntrinsicHistogram> create(sp<RS> rs, sp<const Element> e);
4036     /**
4037      * Set the output of the histogram.  32 bit integer types are
4038      * supported.
4039      *
4040      * @param[in] aout The output allocation
4041      */
4042     void setOutput(sp<Allocation> aout);
4043     /**
4044      * Set the coefficients used for the dot product calculation. The
4045      * default is {0.299f, 0.587f, 0.114f, 0.f}.
4046      *
4047      * Coefficients must be >= 0 and sum to 1.0 or less.
4048      *
4049      * @param[in] r Red coefficient
4050      * @param[in] g Green coefficient
4051      * @param[in] b Blue coefficient
4052      * @param[in] a Alpha coefficient
4053      */
4054     void setDotCoefficients(float r, float g, float b, float a);
4055     /**
4056      * Process an input buffer and place the histogram into the output
4057      * allocation. The output allocation may be a narrower vector size
4058      * than the input. In this case the vector size of the output is
4059      * used to determine how many of the input channels are used in
4060      * the computation. This is useful if you have an RGBA input
4061      * buffer but only want the histogram for RGB.
4062      *
4063      * 1D and 2D input allocations are supported.
4064      *
4065      * @param[in] ain The input image
4066      */
4067     void forEach(sp<Allocation> ain);
4068     /**
4069      * Process an input buffer and place the histogram into the output
4070      * allocation. The dot product of the input channel and the
4071      * coefficients from 'setDotCoefficients' are used to calculate
4072      * the output values.
4073      *
4074      * 1D and 2D input allocations are supported.
4075      *
4076      * @param ain The input image
4077      */
4078     void forEach_dot(sp<Allocation> ain);
4079 };
4080 
4081 /**
4082  * Intrinsic for applying a per-channel lookup table. Each channel of
4083  * the input has an independant lookup table. The tables are 256
4084  * entries in size and can cover the full value range of U8_4.
4085  **/
4086 class ScriptIntrinsicLUT : public ScriptIntrinsic {
4087  private:
4088     sp<Allocation> LUT;
4089     bool mDirty;
4090     unsigned char mCache[1024];
4091     void setTable(unsigned int offset, unsigned char base, unsigned int length, unsigned char* lutValues);
4092     ScriptIntrinsicLUT(sp<RS> rs, sp<const Element> e);
4093 
4094  public:
4095     /**
4096      * Supported elements types are U8_4.
4097      *
4098      * The defaults tables are identity.
4099      *
4100      * @param[in] rs The RenderScript context
4101      * @param[in] e Element type for intputs and outputs
4102      *
4103      * @return ScriptIntrinsicLUT
4104      */
4105     static sp<ScriptIntrinsicLUT> create(sp<RS> rs, sp<const Element> e);
4106     /**
4107      * Invoke the kernel and apply the lookup to each cell of ain and
4108      * copy to aout.
4109      *
4110      * @param[in] ain Input allocation
4111      * @param[in] aout Output allocation
4112      */
4113     void forEach(sp<Allocation> ain, sp<Allocation> aout);
4114     /**
4115      * Sets entries in LUT for the red channel.
4116      * @param[in] base base of region to update
4117      * @param[in] length length of region to update
4118      * @param[in] lutValues LUT values to use
4119      */
4120     void setRed(unsigned char base, unsigned int length, unsigned char* lutValues);
4121     /**
4122      * Sets entries in LUT for the green channel.
4123      * @param[in] base base of region to update
4124      * @param[in] length length of region to update
4125      * @param[in] lutValues LUT values to use
4126      */
4127     void setGreen(unsigned char base, unsigned int length, unsigned char* lutValues);
4128     /**
4129      * Sets entries in LUT for the blue channel.
4130      * @param[in] base base of region to update
4131      * @param[in] length length of region to update
4132      * @param[in] lutValues LUT values to use
4133      */
4134     void setBlue(unsigned char base, unsigned int length, unsigned char* lutValues);
4135     /**
4136      * Sets entries in LUT for the alpha channel.
4137      * @param[in] base base of region to update
4138      * @param[in] length length of region to update
4139      * @param[in] lutValues LUT values to use
4140      */
4141     void setAlpha(unsigned char base, unsigned int length, unsigned char* lutValues);
4142     virtual ~ScriptIntrinsicLUT();
4143 };
4144 
4145 /**
4146  * Intrinsic for performing a resize of a 2D allocation.
4147  */
4148 class ScriptIntrinsicResize : public ScriptIntrinsic {
4149  private:
4150     sp<Allocation> mInput;
4151     ScriptIntrinsicResize(sp<RS> rs, sp<const Element> e);
4152  public:
4153     /**
4154      * Supported Element types are U8_4. Default lookup table is identity.
4155      * @param[in] rs RenderScript context
4156      * @param[in] e Element
4157      * @return new ScriptIntrinsic
4158      */
4159     static sp<ScriptIntrinsicResize> create(sp<RS> rs);
4160 
4161     /**
4162      * Resize copy the input allocation to the output specified. The
4163      * Allocation is rescaled if necessary using bi-cubic
4164      * interpolation.
4165      * @param[in] ain input Allocation
4166      * @param[in] aout output Allocation
4167      */
4168     void forEach_bicubic(sp<Allocation> aout);
4169 
4170     /**
4171      * Set the input of the resize.
4172      * @param[in] lut new lookup table
4173      */
4174     void setInput(sp<Allocation> ain);
4175 };
4176 
4177 /**
4178  * Intrinsic for converting an Android YUV buffer to RGB.
4179  *
4180  * The input allocation should be supplied in a supported YUV format
4181  * as a YUV element Allocation. The output is RGBA; the alpha channel
4182  * will be set to 255.
4183  */
4184 class ScriptIntrinsicYuvToRGB : public ScriptIntrinsic {
4185  private:
4186     ScriptIntrinsicYuvToRGB(sp<RS> rs, sp<const Element> e);
4187  public:
4188     /**
4189      * Create an intrinsic for converting YUV to RGB.
4190      *
4191      * Supported elements types are U8_4.
4192      *
4193      * @param[in] rs The RenderScript context
4194      * @param[in] e Element type for output
4195      *
4196      * @return ScriptIntrinsicYuvToRGB
4197      */
4198     static sp<ScriptIntrinsicYuvToRGB> create(sp<RS> rs, sp<const Element> e);
4199     /**
4200      * Set the input YUV allocation.
4201      *
4202      * @param[in] ain The input allocation.
4203      */
4204     void setInput(sp<Allocation> in);
4205 
4206     /**
4207      * Convert the image to RGB.
4208      *
4209      * @param[in] aout Output allocation. Must match creation element
4210      *                 type.
4211      */
4212     void forEach(sp<Allocation> out);
4213 
4214 };
4215 
4216 /**
4217  * Sampler object that defines how Allocations can be read as textures
4218  * within a kernel. Samplers are used in conjunction with the rsSample
4219  * runtime function to return values from normalized coordinates.
4220  *
4221  * Any Allocation used with a Sampler must have been created with
4222  * RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE; using a Sampler on an
4223  * Allocation that was not created with
4224  * RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE is undefined.
4225  **/
4226  class Sampler : public BaseObj {
4227  private:
4228     Sampler(sp<RS> rs, void* id);
4229     Sampler(sp<RS> rs, void* id, RsSamplerValue min, RsSamplerValue mag,
4230             RsSamplerValue wrapS, RsSamplerValue wrapT, float anisotropy);
4231     RsSamplerValue mMin;
4232     RsSamplerValue mMag;
4233     RsSamplerValue mWrapS;
4234     RsSamplerValue mWrapT;
4235     float mAniso;
4236 
4237  public:
4238     /**
4239      * Creates a non-standard Sampler.
4240      * @param[in] rs RenderScript context
4241      * @param[in] min minification
4242      * @param[in] mag magnification
4243      * @param[in] wrapS S wrapping mode
4244      * @param[in] wrapT T wrapping mode
4245      * @param[in] anisotropy anisotropy setting
4246      */
4247     static sp<Sampler> create(sp<RS> rs, RsSamplerValue min, RsSamplerValue mag, RsSamplerValue wrapS, RsSamplerValue wrapT, float anisotropy);
4248 
4249     /**
4250      * @return minification setting for the sampler
4251      */
4252     RsSamplerValue getMinification();
4253     /**
4254      * @return magnification setting for the sampler
4255      */
4256     RsSamplerValue getMagnification();
4257     /**
4258      * @return S wrapping mode for the sampler
4259      */
4260     RsSamplerValue getWrapS();
4261     /**
4262      * @return T wrapping mode for the sampler
4263      */
4264     RsSamplerValue getWrapT();
4265     /**
4266      * @return anisotropy setting for the sampler
4267      */
4268     float getAnisotropy();
4269 
4270     /**
4271      * Retrieve a sampler with min and mag set to nearest and wrap modes set to
4272      * clamp.
4273      *
4274      * @param rs Context to which the sampler will belong.
4275      *
4276      * @return Sampler
4277      */
4278     static sp<const Sampler> CLAMP_NEAREST(sp<RS> rs);
4279     /**
4280      * Retrieve a sampler with min and mag set to linear and wrap modes set to
4281      * clamp.
4282      *
4283      * @param rs Context to which the sampler will belong.
4284      *
4285      * @return Sampler
4286      */
4287     static sp<const Sampler> CLAMP_LINEAR(sp<RS> rs);
4288     /**
4289      * Retrieve a sampler with mag set to linear, min linear mipmap linear, and
4290      * wrap modes set to clamp.
4291      *
4292      * @param rs Context to which the sampler will belong.
4293      *
4294      * @return Sampler
4295      */
4296     static sp<const Sampler> CLAMP_LINEAR_MIP_LINEAR(sp<RS> rs);
4297     /**
4298      * Retrieve a sampler with min and mag set to nearest and wrap modes set to
4299      * wrap.
4300      *
4301      * @param rs Context to which the sampler will belong.
4302      *
4303      * @return Sampler
4304      */
4305     static sp<const Sampler> WRAP_NEAREST(sp<RS> rs);
4306     /**
4307      * Retrieve a sampler with min and mag set to linear and wrap modes set to
4308      * wrap.
4309      *
4310      * @param rs Context to which the sampler will belong.
4311      *
4312      * @return Sampler
4313      */
4314     static sp<const Sampler> WRAP_LINEAR(sp<RS> rs);
4315     /**
4316      * Retrieve a sampler with mag set to linear, min linear mipmap linear, and
4317      * wrap modes set to wrap.
4318      *
4319      * @param rs Context to which the sampler will belong.
4320      *
4321      * @return Sampler
4322      */
4323     static sp<const Sampler> WRAP_LINEAR_MIP_LINEAR(sp<RS> rs);
4324     /**
4325      * Retrieve a sampler with min and mag set to nearest and wrap modes set to
4326      * mirrored repeat.
4327      *
4328      * @param rs Context to which the sampler will belong.
4329      *
4330      * @return Sampler
4331      */
4332     static sp<const Sampler> MIRRORED_REPEAT_NEAREST(sp<RS> rs);
4333     /**
4334      * Retrieve a sampler with min and mag set to linear and wrap modes set to
4335      * mirrored repeat.
4336      *
4337      * @param rs Context to which the sampler will belong.
4338      *
4339      * @return Sampler
4340      */
4341     static sp<const Sampler> MIRRORED_REPEAT_LINEAR(sp<RS> rs);
4342     /**
4343      * Retrieve a sampler with min and mag set to linear and wrap modes set to
4344      * mirrored repeat.
4345      *
4346      * @param rs Context to which the sampler will belong.
4347      *
4348      * @return Sampler
4349      */
4350     static sp<const Sampler> MIRRORED_REPEAT_LINEAR_MIP_LINEAR(sp<RS> rs);
4351 
4352 };
4353 
4354 }
4355 
4356 }
4357 
4358 #endif
4359