• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2019 Nuclei Limited. All rights reserved.
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  *
6  * Licensed under the Apache License, Version 2.0 (the License); you may
7  * not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
14  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 #ifndef __CORE_FEATURE_DSP__
19 #define __CORE_FEATURE_DSP__
20 
21 /*!
22  * @file     core_feature_dsp.h
23  * @brief    DSP feature API header file for Nuclei N/NX Core
24  */
25 /*
26  * DSP Feature Configuration Macro:
27  * 1. __DSP_PRESENT:  Define whether Digital Signal Processing Unit(DSP) is present or not
28  *   * 0: Not present
29  *   * 1: Present
30  */
31 #ifdef __cplusplus
32  extern "C" {
33 #endif
34 
35 #if defined(__DSP_PRESENT) && (__DSP_PRESENT == 1)
36 
37 /* ###########################  CPU SIMD DSP Intrinsic Functions ########################### */
38 /**
39  * \defgroup NMSIS_Core_DSP_Intrinsic   Intrinsic Functions for SIMD Instructions
40  * \ingroup  NMSIS_Core
41  * \brief    Functions that generate RISC-V DSP SIMD instructions.
42  * \details
43  *
44  * The following functions generate specified RISC-V SIMD instructions that cannot be directly accessed by compiler.
45  * * **DSP ISA Extension Instruction Summary**
46  *   + **Shorthand Definitions**
47  *     - r.H == rH1: r[31:16], r.L == r.H0: r[15:0]
48  *     - r.B3: r[31:24], r.B2: r[23:16], r.B1: r[15:8], r.B0: r[7:0]
49  *     - r.B[x]: r[(x*8+7):(x*8+0)]
50  *     - r.H[x]: r[(x*16+7):(x*16+0)]
51  *     - r.W[x]: r[(x*32+31):(x*32+0)]
52  *     - r[xU]: the upper 32-bit of a 64-bit number; xU represents the GPR number that contains this upper part 32-bit value.
53  *     - r[xL]: the lower 32-bit of a 64-bit number; xL represents the GPR number that contains this lower part 32-bit value.
54  *     - r[xU].r[xL]: a 64-bit number that is formed from a pair of GPRs.
55  *     - s>>: signed arithmetic right shift:
56  *     - u>>: unsigned logical right shift
57  *     - SAT.Qn(): Saturate to the range of [-2^n, 2^n-1], if saturation happens, set PSW.OV.
58  *     - SAT.Um(): Saturate to the range of [0, 2^m-1], if saturation happens, set PSW.OV.
59  *     - RUND(): Indicate `rounding`, i.e., add 1 to the most significant discarded bit for right shift or MSW-type multiplication instructions.
60  *     - Sign or Zero Extending functions:
61  *       - SEm(data): Sign-Extend data to m-bit.:
62  *       - ZEm(data): Zero-Extend data to m-bit.
63  *     - ABS(x): Calculate the absolute value of `x`.
64  *     - CONCAT(x,y): Concatinate `x` and `y` to form a value.
65  *     - u<: Unsinged less than comparison.
66  *     - u<=: Unsinged less than & equal comparison.
67  *     - u>: Unsinged greater than comparison.
68  *     - s*: Signed multiplication.
69  *     - u*: Unsigned multiplication.
70  *
71  *   @{
72  */
73 /** @} */ /* End of Doxygen Group NMSIS_Core_DSP_Intrinsic */
74 
75 
76 /**
77  * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS      SIMD Data Processing Instructions
78  * \ingroup  NMSIS_Core_DSP_Intrinsic
79  * \brief    SIMD Data Processing Instructions
80  * \details
81  */
82 
83 /**
84  * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB      SIMD 16-bit Add/Subtract Instructions
85  * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
86  * \brief    SIMD 16-bit Add/Subtract Instructions
87  * \details
88  * Based on the combination of the types of the two 16-bit arithmetic operations, the SIMD 16-bit
89  * add/subtract instructions can be classified into 6 main categories: Addition (two 16-bit addition),
90  * Subtraction (two 16-bit subtraction), Crossed Add & Sub (one addition and one subtraction), and
91  * Crossed Sub & Add (one subtraction and one addition), Straight Add & Sub (one addition and one
92  * subtraction), and Straight Sub & Add (one subtraction and one addition).
93  * Based on the way of how an overflow condition is handled, the SIMD 16-bit add/subtract
94  * instructions can be classified into 5 groups: Wrap-around (dropping overflow), Signed Halving
95  * (keeping overflow by dropping 1 LSB bit), Unsigned Halving, Signed Saturation (clipping overflow),
96  * and Unsigned Saturation.
97  * Together, there are 30 SIMD 16-bit add/subtract instructions.
98  */
99 
100 /**
101  * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB      SIMD 8-bit Addition & Subtraction Instructions
102  * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
103  * \brief    SIMD 8-bit Addition & Subtraction Instructions
104  * \details
105  * Based on the types of the four 8-bit arithmetic operations, the SIMD 8-bit add/subtract instructions
106  * can be classified into 2 main categories: Addition (four 8-bit addition), and Subtraction (four 8-bit
107  * subtraction).
108  * Based on the way of how an overflow condition is handled for signed or unsigned operation, the
109  * SIMD 8-bit add/subtract instructions can be classified into 5 groups: Wrap-around (dropping
110  * overflow), Signed Halving (keeping overflow by dropping 1 LSB bit), Unsigned Halving, Signed
111  * Saturation (clipping overflow), and Unsigned Saturation.
112  * Together, there are 10 SIMD 8-bit add/subtract instructions.
113  */
114 
115 /**
116  * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT      SIMD 16-bit Shift Instructions
117  * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
118  * \brief    SIMD 16-bit Shift Instructions
119  * \details
120  * there are 14 SIMD 16-bit shift instructions.
121  */
122 
123 /**
124  * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT      SIMD 8-bit Shift Instructions
125  * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
126  * \brief    SIMD 8-bit Shift Instructions
127  * \details
128  *  there are 14 SIMD 8-bit shift instructions.
129  */
130 
131 /**
132  * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP      SIMD 16-bit Compare Instructions
133  * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
134  * \brief    SIMD 16-bit Compare Instructions
135  * \details
136  *  there are 5 SIMD 16-bit Compare instructions.
137  */
138 
139 /**
140  * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP      SIMD 8-bit Compare Instructions
141  * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
142  * \brief    SIMD 8-bit Compare Instructions
143  * \details
144  *  there are 5  SIMD 8-bit Compare instructions.
145  */
146 
147 /**
148  * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY      SIMD 16-bit Multiply Instructions
149  * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
150  * \brief    SIMD 16-bit Multiply Instructions
151  * \details
152  * there are 6 SIMD 16-bit Multiply instructions.
153  */
154 
155 /**
156  * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY      SIMD 8-bit Multiply Instructions
157  * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
158  * \brief    SIMD 8-bit Multiply Instructions
159  * \details
160  *  there are 6 SIMD 8-bit Multiply instructions.
161  */
162 
163 /**
164  * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC      SIMD 16-bit Miscellaneous Instructions
165  * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
166  * \brief    SIMD 16-bit Miscellaneous Instructions
167  * \details
168  *  there are 10 SIMD 16-bit Misc instructions.
169  */
170 
171 /**
172  * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC      SIMD 8-bit Miscellaneous Instructions
173  * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
174  * \brief    SIMD 8-bit Miscellaneous Instructions
175  * \details
176  *  there are 10 SIMD 8-bit Miscellaneous instructions.
177  */
178 
179 /**
180  * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK      SIMD 8-bit Unpacking Instructions
181  * \ingroup  NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
182  * \brief    SIMD 8-bit Unpacking Instructions
183  * \details
184  *  there are 8 SIMD 8-bit Unpacking instructions.
185  */
186 
187 /**
188  * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD      Non-SIMD Instructions
189  * \ingroup  NMSIS_Core_DSP_Intrinsic
190  * \brief    Non-SIMD Instructions
191  * \details
192  */
193 
194 /**
195  * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU      Non-SIMD Q15 saturation ALU Instructions
196  * \ingroup  NMSIS_Core_DSP_Intrinsic_NON_SIMD
197  * \brief    Non-SIMD Q15 saturation ALU Instructions
198  * \details
199  * there are 7 Non-SIMD Q15 saturation ALU Instructions
200  */
201 
202 /**
203  * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU      Non-SIMD Q31 saturation ALU Instructions
204  * \ingroup  NMSIS_Core_DSP_Intrinsic_NON_SIMD
205  * \brief    Non-SIMD Q31 saturation ALU Instructions
206  * \details
207  *  there are Non-SIMD Q31 saturation ALU Instructions
208  */
209 
210 /**
211  * \defgroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION      32-bit Computation Instructions
212  * \ingroup  NMSIS_Core_DSP_Intrinsic_NON_SIMD
213  * \brief    32-bit Computation Instructions
214  * \details
215  * there are 8 32-bit Computation Instructions
216  */
217 
218 /**
219  * \defgroup NMSIS_Core_DSP_Intrinsic_OV_FLAG_SC      OV (Overflow) flag Set/Clear Instructions
220  * \ingroup  NMSIS_Core_DSP_Intrinsic_NON_SIMD
221  * \brief    OV (Overflow) flag Set/Clear Instructions
222  * \details
223  * The following table lists the user instructions related to Overflow (OV) flag manipulation. there are 2 OV (Overflow) flag Set/Clear Instructions
224  */
225 
226 /**
227  * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC      Non-SIMD Miscellaneous Instructions
228  * \ingroup  NMSIS_Core_DSP_Intrinsic_NON_SIMD
229  * \brief    Non-SIMD Miscellaneous Instructions
230  * \details
231  * There are 13 Miscellaneous Instructions here.
232  */
233 
234 /**
235  * \defgroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS      Partial-SIMD Data Processing Instructions
236  * \ingroup  NMSIS_Core_DSP_Intrinsic
237  * \brief    Partial-SIMD Data Processing Instructions
238  * \details
239  */
240 
241 /**
242  * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK      SIMD 16-bit Packing Instructions
243  * \ingroup  NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
244  * \brief    SIMD 16-bit Packing Instructions
245  * \details
246  * there are 4 SIMD16-bit Packing Instructions.
247  */
248 
249 /**
250  * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC      Signed MSW 32x32 Multiply and Add Instructions
251  * \ingroup  NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
252  * \brief    Signed MSW 32x32 Multiply and Add Instructions
253  * \details
254  *  there are 8 Signed MSW 32x32 Multiply and Add Instructions
255  */
256 
257 /**
258  * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC      Signed MSW 32x16 Multiply and Add Instructions
259  * \ingroup  NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
260  * \brief    Signed MSW 32x16 Multiply and Add Instructions
261  * \details
262  * there are 15 Signed MSW 32x16 Multiply and Add Instructions
263  */
264 
265 /**
266  * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB      Signed 16-bit Multiply 32-bit Add/Subtract Instructions
267  * \ingroup  NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
268  * \brief    Signed 16-bit Multiply 32-bit Add/Subtract Instructions
269  * \details
270  *  there are 18 Signed 16-bit Multiply 32-bit Add/Subtract Instructions
271  */
272 
273 /**
274  * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB      Signed 16-bit Multiply 64-bit Add/Subtract Instructions
275  * \ingroup  NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
276  * \brief    Signed 16-bit Multiply 64-bit Add/Subtract Instructions
277  * \details
278  *  there is Signed 16-bit Multiply 64-bit Add/Subtract Instructions
279  */
280 
281 /**
282  * \defgroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC      Partial-SIMD Miscellaneous Instructions
283  * \ingroup  NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
284  * \brief    Partial-SIMD Miscellaneous Instructions
285  * \details
286  *  there are  7 Partial-SIMD Miscellaneous Instructions
287  */
288 
289 /**
290  * \defgroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD      8-bit Multiply with 32-bit Add Instructions
291  * \ingroup  NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
292  * \brief    8-bit Multiply with 32-bit Add Instructions
293  * \details
294  * there are  3 8-bit Multiply with 32-bit Add Instructions
295  */
296 
297 /**
298  * \defgroup NMSIS_Core_DSP_Intrinsic_64B_PROFILE      64-bit Profile Instructions
299  * \ingroup  NMSIS_Core_DSP_Intrinsic
300  * \brief    64-bit Profile Instructions
301  * \details
302  */
303 
304 /**
305  * \defgroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB      64-bit Addition & Subtraction Instructions
306  * \ingroup  NMSIS_Core_DSP_Intrinsic_64B_PROFILE
307  * \brief    64-bit Addition & Subtraction Instructions
308  * \details
309  * there are 10 64-bit Addition & Subtraction Instructions.
310  */
311 
312 /**
313  * \defgroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB      32-bit Multiply with 64-bit Add/Subtract Instructions
314  * \ingroup  NMSIS_Core_DSP_Intrinsic_64B_PROFILE
315  * \brief    32-bit Multiply with 64-bit Add/Subtract Instructions
316  * \details
317  *  there are 32-bit Multiply 64-bit Add/Subtract Instructions
318  */
319 
320 /**
321  * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB      Signed 16-bit Multiply with 64-bit Add/Subtract Instructions
322  * \ingroup  NMSIS_Core_DSP_Intrinsic_64B_PROFILE
323  * \brief    Signed 16-bit Multiply with 64-bit Add/Subtract Instructions
324  * \details
325  * there are 10 Signed 16-bit Multiply with 64-bit Add/Subtract Instructions
326  */
327 
328 /**
329  * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY      RV64 Only Instructions
330  * \ingroup  NMSIS_Core_DSP_Intrinsic
331  * \brief    RV64 Only Instructions
332  * \details
333  */
334 
335 /**
336  * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB      (RV64 Only) SIMD 32-bit Add/Subtract Instructions
337  * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
338  * \brief    (RV64 Only) SIMD 32-bit Add/Subtract Instructions
339  * \details
340  * The following tables list instructions that are only present in RV64.
341  * There are 30 SIMD 32-bit addition or subtraction instructions.there are 4 SIMD16-bit Packing Instructions.
342  */
343 
344 /**
345  * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT      (RV64 Only) SIMD 32-bit Shift Instructions
346  * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
347  * \brief    (RV64 Only) SIMD 32-bit Shift Instructions
348  * \details
349  *  there are 14 (RV64 Only) SIMD 32-bit Shift Instructions
350  */
351 
352 /**
353  * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC      (RV64 Only) SIMD 32-bit Miscellaneous Instructions
354  * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
355  * \brief    (RV64 Only) SIMD 32-bit Miscellaneous Instructions
356  * \details
357  * there are 5  (RV64 Only) SIMD 32-bit Miscellaneous Instructions
358  */
359 
360 /**
361  * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT      (RV64 Only) SIMD Q15 Saturating Multiply Instructions
362  * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
363  * \brief    (RV64 Only) SIMD Q15 Saturating Multiply Instructions
364  * \details
365  *  there are 9 (RV64 Only) SIMD Q15 saturating Multiply Instructions
366  */
367 
368 /**
369  * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT      (RV64 Only) 32-bit Multiply Instructions
370  * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
371  * \brief    (RV64 Only) 32-bit Multiply Instructions
372  * \details
373  *  there is 3 RV64 Only) 32-bit Multiply Instructions
374  */
375 
376 /**
377  * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD      (RV64 Only) 32-bit Multiply & Add Instructions
378  * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
379  * \brief    (RV64 Only) 32-bit Multiply & Add Instructions
380  * \details
381  *  there are  3 (RV64 Only) 32-bit Multiply & Add Instructions
382  */
383 
384 /**
385  * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC      (RV64 Only) 32-bit Parallel Multiply & Add Instructions
386  * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
387  * \brief    (RV64 Only) 32-bit Parallel Multiply & Add Instructions
388  * \details
389  * there are 12 (RV64 Only) 32-bit Parallel Multiply & Add Instructions
390  */
391 
392 /**
393  * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_NON_SIMD_32B_SHIFT      (RV64 Only) Non-SIMD 32-bit Shift Instructions
394  * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
395  * \brief    (RV64 Only) Non-SIMD 32-bit Shift Instructions
396  * \details
397  *  there are 1  (RV64 Only) Non-SIMD 32-bit Shift Instructions
398  */
399 
400 /**
401  * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK      32-bit Packing Instructions
402  * \ingroup  NMSIS_Core_DSP_Intrinsic_RV64_ONLY
403  * \brief    32-bit Packing Instructions
404  * \details
405  *  There are four 32-bit packing instructions here
406  */
407 
408 /* ===== Inline Function Start for 3.1. ADD8 ===== */
409 /**
410  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
411  * \brief ADD8 (SIMD 8-bit Addition)
412  * \details
413  * **Type**: SIMD
414  *
415  * **Syntax**:\n
416  * ~~~
417  * ADD8 Rd, Rs1, Rs2
418  * ~~~
419  *
420  * **Purpose**:\n
421  * Do 8-bit integer element additions simultaneously.
422  *
423  * **Description**:\n
424  * This instruction adds the 8-bit integer elements in Rs1 with the 8-bit integer elements
425  * in Rs2, and then writes the 8-bit element results to Rd.
426  *
427  * **Note**:\n
428  * This instruction can be used for either signed or unsigned addition.
429  *
430  * **Operations**:\n
431  * ~~~
432  * Rd.B[x] = Rs1.B[x] + Rs2.B[x];
433  * for RV32: x=3...0,
434  * for RV64: x=7...0
435  * ~~~
436  *
437  * \param [in]  a    unsigned long type of value stored in a
438  * \param [in]  b    unsigned long type of value stored in b
439  * \return value stored in unsigned long type
440  */
__RV_ADD8(unsigned long a,unsigned long b)441 __STATIC_FORCEINLINE unsigned long __RV_ADD8(unsigned long a, unsigned long b)
442 {
443     register unsigned long result;
444     __ASM volatile("add8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
445     return result;
446 }
447 /* ===== Inline Function End for 3.1. ADD8 ===== */
448 
449 /* ===== Inline Function Start for 3.2. ADD16 ===== */
450 /**
451  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
452  * \brief ADD16 (SIMD 16-bit Addition)
453  * \details
454  * **Type**: SIMD
455  *
456  * **Syntax**:\n
457  * ~~~
458  * ADD16 Rd, Rs1, Rs2
459  * ~~~
460  *
461  * **Purpose**:\n
462  * Do 16-bit integer element additions simultaneously.
463  *
464  * **Description**:\n
465  * This instruction adds the 16-bit integer elements in Rs1 with the 16-bit integer
466  * elements in Rs2, and then writes the 16-bit element results to Rd.
467  *
468  * **Note**:\n
469  * This instruction can be used for either signed or unsigned addition.
470  *
471  * **Operations**:\n
472  * ~~~
473  * Rd.H[x] = Rs1.H[x] + Rs2.H[x];
474  * for RV32: x=1...0,
475  * for RV64: x=3...0
476  * ~~~
477  *
478  * \param [in]  a    unsigned long type of value stored in a
479  * \param [in]  b    unsigned long type of value stored in b
480  * \return value stored in unsigned long type
481  */
__RV_ADD16(unsigned long a,unsigned long b)482 __STATIC_FORCEINLINE unsigned long __RV_ADD16(unsigned long a, unsigned long b)
483 {
484     register unsigned long result;
485     __ASM volatile("add16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
486     return result;
487 }
488 /* ===== Inline Function End for 3.2. ADD16 ===== */
489 
490 /* ===== Inline Function Start for 3.3. ADD64 ===== */
491 /**
492  * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
493  * \brief ADD64 (64-bit Addition)
494  * \details
495  * **Type**: 64-bit Profile
496  *
497  * **Syntax**:\n
498  * ~~~
499  * ADD64 Rd, Rs1, Rs2
500  * ~~~
501  *
502  * **Purpose**:\n
503  * Add two 64-bit signed or unsigned integers.
504  *
505  * **RV32 Description**:\n
506  * This instruction adds the 64-bit integer of an even/odd pair of registers specified
507  * by Rs1(4,1) with the 64-bit integer of an even/odd pair of registers specified by Rs2(4,1), and then
508  * writes the 64-bit result to an even/odd pair of registers specified by Rd(4,1).
509  * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
510  * pair includes register 2d and 2d+1.
511  * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
512  * of the pair contains the low 32-bit of the result.
513  *
514  * **RV64 Description**:\n
515  * This instruction has the same behavior as the ADD instruction in RV64I.
516  *
517  * **Note**:\n
518  * This instruction can be used for either signed or unsigned addition.
519  *
520  * **Operations**:\n
521  * ~~~
522  * RV32:
523  *  t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
524  *  a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
525  *  b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
526  *  R[t_H].R[t_L] = R[a_H].R[a_L] + R[b_H].R[b_L];
527  * RV64:
528  *  Rd = Rs1 + Rs2;
529  * ~~~
530  *
531  * \param [in]  a    unsigned long long type of value stored in a
532  * \param [in]  b    unsigned long long type of value stored in b
533  * \return value stored in unsigned long long type
534  */
__RV_ADD64(unsigned long long a,unsigned long long b)535 __STATIC_FORCEINLINE unsigned long long __RV_ADD64(unsigned long long a, unsigned long long b)
536 {
537     register unsigned long long result;
538     __ASM volatile("add64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
539     return result;
540 }
541 /* ===== Inline Function End for 3.3. ADD64 ===== */
542 
543 /* ===== Inline Function Start for 3.4. AVE ===== */
544 /**
545  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
546  * \brief AVE (Average with Rounding)
547  * \details
548  * **Type**: DSP
549  *
550  * **Syntax**:\n
551  * ~~~
552  * AVE Rd, Rs1, Rs2
553  * ~~~
554  *
555  * **Purpose**:\n
556  * Calculate the average of the contents of two general registers.
557  *
558  * **Description**:\n
559  * This instruction calculates the average value of two signed integers stored in Rs1 and
560  * Rs2, rounds up a half-integer result to the nearest integer, and writes the result to Rd.
561  *
562  * **Operations**:\n
563  * ~~~
564  * Sum = CONCAT(Rs1[MSB],Rs1[MSB:0]) + CONCAT(Rs2[MSB],Rs2[MSB:0]) + 1;
565  * Rd = Sum[(MSB+1):1];
566  * for RV32: MSB=31,
567  * for RV64: MSB=63
568  * ~~~
569  *
570  * \param [in]  a    long type of value stored in a
571  * \param [in]  b    long type of value stored in b
572  * \return value stored in long type
573  */
__RV_AVE(long a,long b)574 __STATIC_FORCEINLINE long __RV_AVE(long a, long b)
575 {
576     register long result;
577     __ASM volatile("ave %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
578     return result;
579 }
580 /* ===== Inline Function End for 3.4. AVE ===== */
581 
582 /* ===== Inline Function Start for 3.5. BITREV ===== */
583 /**
584  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
585  * \brief BITREV (Bit Reverse)
586  * \details
587  * **Type**: DSP
588  *
589  * **Syntax**:\n
590  * ~~~
591  * BITREV Rd, Rs1, Rs2
592  * ~~~
593  *
594  * **Purpose**:\n
595  * Reverse the bit positions of the source operand within a specified width starting from bit
596  * 0. The reversed width is a variable from a GPR.
597  *
598  * **Description**:\n
599  * This instruction reverses the bit positions of the content of Rs1. The reversed bit width
600  * is calculated as Rs2[4:0]+1 (RV32) or Rs2[5:0]+1 (RV64). The upper bits beyond the reversed width
601  * are filled with zeros. After the bit reverse operation, the result is written to Rd.
602  *
603  * **Operations**:\n
604  * ~~~
605  * msb = Rs2[4:0]; (for RV32)
606  * msb = Rs2[5:0]; (for RV64)
607  * rev[0:msb] = Rs1[msb:0];
608  * Rd = ZE(rev[msb:0]);
609  * ~~~
610  *
611  * \param [in]  a    unsigned long type of value stored in a
612  * \param [in]  b    unsigned long type of value stored in b
613  * \return value stored in unsigned long type
614  */
__RV_BITREV(unsigned long a,unsigned long b)615 __STATIC_FORCEINLINE unsigned long __RV_BITREV(unsigned long a, unsigned long b)
616 {
617     register unsigned long result;
618     __ASM volatile("bitrev %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
619     return result;
620 }
621 /* ===== Inline Function End for 3.5. BITREV ===== */
622 
623 /* ===== Inline Function Start for 3.6. BITREVI ===== */
624 /**
625  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
626  * \brief BITREVI (Bit Reverse Immediate)
627  * \details
628  * **Type**: DSP
629  *
630  * **Syntax**:\n
631  * ~~~
632  * (RV32) BITREVI Rd, Rs1, imm[4:0]
633  * (RV64) BITREVI Rd, Rs1, imm[5:0]
634  * ~~~
635  *
636  * **Purpose**:\n
637  * Reverse the bit positions of the source operand within a specified width starting from bit
638  * 0. The reversed width is an immediate value.
639  *
640  * **Description**:\n
641  * This instruction reverses the bit positions of the content of Rs1. The reversed bit width
642  * is calculated as imm[4:0]+1 (RV32) or imm[5:0]+1 (RV64). The upper bits beyond the reversed width
643  * are filled with zeros. After the bit reverse operation, the result is written to Rd.
644  *
645  * **Operations**:\n
646  * ~~~
647  * msb = imm[4:0]; (RV32)
648  * msb = imm[5:0]; (RV64)
649  * rev[0:msb] = Rs1[msb:0];
650  * Rd = ZE(rev[msb:0]);
651  * ~~~
652  *
653  * \param [in]  a    unsigned long type of value stored in a
654  * \param [in]  b    unsigned long type of value stored in b
655  * \return value stored in unsigned long type
656  */
657 #define __RV_BITREVI(a, b)    \
658     ({    \
659         register unsigned long result;    \
660         register unsigned long __a = (unsigned long)(a);    \
661         __ASM volatile("bitrevi %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
662         result;    \
663     })
664 /* ===== Inline Function End for 3.6. BITREVI ===== */
665 
666 /* ===== Inline Function Start for 3.7. BPICK ===== */
667 /**
668  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
669  * \brief BPICK (Bit-wise Pick)
670  * \details
671  * **Type**: DSP
672  *
673  * **Syntax**:\n
674  * ~~~
675  * BPICK Rd, Rs1, Rs2, Rc
676  * ~~~
677  *
678  * **Purpose**:\n
679  * Select from two source operands based on a bit mask in the third operand.
680  *
681  * **Description**:\n
682  * This instruction selects individual bits from Rs1 or Rs2, based on the bit mask value in
683  * Rc. If a bit in Rc is 1, the corresponding bit is from Rs1; otherwise, the corresponding bit is from Rs2.
684  * The selection results are written to Rd.
685  *
686  * **Operations**:\n
687  * ~~~
688  * Rd[x] = Rc[x]? Rs1[x] : Rs2[x];
689  * for RV32, x=31...0
690  * for RV64, x=63...0
691  * ~~~
692  *
693  * \param [in]  a    unsigned long type of value stored in a
694  * \param [in]  b    unsigned long type of value stored in b
695  * \param [in]  c    unsigned long type of value stored in c
696  * \return value stored in unsigned long type
697  */
__RV_BPICK(unsigned long a,unsigned long b,unsigned long c)698 __STATIC_FORCEINLINE unsigned long __RV_BPICK(unsigned long a, unsigned long b, unsigned long c)
699 {
700     register unsigned long result;
701     __ASM volatile("bpick %0, %1, %2, %3" : "=r"(result) : "r"(a), "r"(b), "r"(c));
702     return result;
703 }
704 /* ===== Inline Function End for 3.7. BPICK ===== */
705 
706 /* ===== Inline Function Start for 3.8. CLROV ===== */
707 /**
708  * \ingroup NMSIS_Core_DSP_Intrinsic_OV_FLAG_SC
709  * \brief CLROV (Clear OV flag)
710  * \details
711  * **Type**: DSP
712  *
713  * **Syntax**:\n
714  * ~~~
715  * CLROV # pseudo mnemonic
716  * ~~~
717  *
718  * **Purpose**:\n
719  * This pseudo instruction is an alias to `CSRRCI x0, ucode, 1` instruction.
720  *
721  *
722  */
__RV_CLROV(void)723 __STATIC_FORCEINLINE void __RV_CLROV(void)
724 {
725     __ASM volatile("clrov ");
726 }
727 /* ===== Inline Function End for 3.8. CLROV ===== */
728 
729 /* ===== Inline Function Start for 3.9. CLRS8 ===== */
730 /**
731  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
732  * \brief CLRS8 (SIMD 8-bit Count Leading Redundant Sign)
733  * \details
734  * **Type**: SIMD
735  *
736  * **Syntax**:\n
737  * ~~~
738  * CLRS8 Rd, Rs1
739  * ~~~
740  *
741  * **Purpose**:\n
742  * Count the number of redundant sign bits of the 8-bit elements of a general register.
743  *
744  * **Description**:\n
745  * Starting from the bits next to the sign bits of the 8-bit elements of Rs1, this instruction
746  * counts the number of redundant sign bits and writes the result to the corresponding 8-bit elements
747  * of Rd.
748  *
749  * **Operations**:\n
750  * ~~~
751  * snum[x] = Rs1.B[x];
752  * cnt[x] = 0;
753  * for (i = 6 to 0) {
754  *   if (snum[x](i) == snum[x](7)) {
755  *     cnt[x] = cnt[x] + 1;
756  *   } else {
757  *     break;
758  *   }
759  * }
760  * Rd.B[x] = cnt[x];
761  * for RV32: x=3...0
762  * for RV64: x=7...0
763  * ~~~
764  *
765  * \param [in]  a    unsigned long type of value stored in a
766  * \return value stored in unsigned long type
767  */
__RV_CLRS8(unsigned long a)768 __STATIC_FORCEINLINE unsigned long __RV_CLRS8(unsigned long a)
769 {
770     register unsigned long result;
771     __ASM volatile("clrs8 %0, %1" : "=r"(result) : "r"(a));
772     return result;
773 }
774 /* ===== Inline Function End for 3.9. CLRS8 ===== */
775 
776 /* ===== Inline Function Start for 3.10. CLRS16 ===== */
777 /**
778  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
779  * \brief CLRS16 (SIMD 16-bit Count Leading Redundant Sign)
780  * \details
781  * **Type**: SIMD
782  *
783  * **Syntax**:\n
784  * ~~~
785  * CLRS16 Rd, Rs1
786  * ~~~
787  *
788  * **Purpose**:\n
789  * Count the number of redundant sign bits of the 16-bit elements of a general register.
790  *
791  * **Description**:\n
792  * Starting from the bits next to the sign bits of the 16-bit elements of Rs1, this
793  * instruction counts the number of redundant sign bits and writes the result to the corresponding 16-
794  * bit elements of Rd.
795  *
796  * **Operations**:\n
797  * ~~~
798  * snum[x] = Rs1.H[x];
799  * cnt[x] = 0;
800  * for (i = 14 to 0) {
801  *   if (snum[x](i) == snum[x](15)) {
802  *     cnt[x] = cnt[x] + 1;
803  *   } else {
804  *     break;
805  *   }
806  * }
807  * Rd.H[x] = cnt[x];
808  * for RV32: x=1...0
809  * for RV64: x=3...0
810  * ~~~
811  *
812  * \param [in]  a    unsigned long type of value stored in a
813  * \return value stored in unsigned long type
814  */
__RV_CLRS16(unsigned long a)815 __STATIC_FORCEINLINE unsigned long __RV_CLRS16(unsigned long a)
816 {
817     register unsigned long result;
818     __ASM volatile("clrs16 %0, %1" : "=r"(result) : "r"(a));
819     return result;
820 }
821 /* ===== Inline Function End for 3.10. CLRS16 ===== */
822 
823 /* ===== Inline Function Start for 3.11. CLRS32 ===== */
824 /**
825  * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
826  * \brief CLRS32 (SIMD 32-bit Count Leading Redundant Sign)
827  * \details
828  * **Type**: SIMD
829  *
830  * **Syntax**:\n
831  * ~~~
832  * CLRS32 Rd, Rs1
833  * ~~~
834  *
835  * **Purpose**:\n
836  * Count the number of redundant sign bits of the 32-bit elements of a general register.
837  *
838  * **Description**:\n
839  * Starting from the bits next to the sign bits of the 32-bit elements of Rs1, this
840  * instruction counts the number of redundant sign bits and writes the result to the corresponding 32-
841  * bit elements of Rd.
842  *
843  * **Operations**:\n
844  * ~~~
845  * snum[x] = Rs1.W[x];
846  * cnt[x] = 0;
847  * for (i = 30 to 0) {
848  *   if (snum[x](i) == snum[x](31)) {
849  *     cnt[x] = cnt[x] + 1;
850  *   } else {
851  *     break;
852  *   }
853  * }
854  * Rd.W[x] = cnt[x];
855  * for RV32: x=0
856  * for RV64: x=1...0
857  * ~~~
858  *
859  * \param [in]  a    unsigned long type of value stored in a
860  * \return value stored in unsigned long type
861  */
__RV_CLRS32(unsigned long a)862 __STATIC_FORCEINLINE unsigned long __RV_CLRS32(unsigned long a)
863 {
864     register unsigned long result;
865     __ASM volatile("clrs32 %0, %1" : "=r"(result) : "r"(a));
866     return result;
867 }
868 /* ===== Inline Function End for 3.11. CLRS32 ===== */
869 
870 /* ===== Inline Function Start for 3.12. CLO8 ===== */
871 /**
872  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
873  * \brief CLO8 (SIMD 8-bit Count Leading One)
874  * \details
875  * **Type**: SIMD
876  *
877  * **Syntax**:\n
878  * ~~~
879  * CLO8 Rd, Rs1
880  * ~~~
881  *
882  * **Purpose**:\n
883  * Count the number of leading one bits of the 8-bit elements of a general register.
884  *
885  * **Description**:\n
886  * Starting from the most significant bits of the 8-bit elements of Rs1, this instruction
887  * counts the number of leading one bits and writes the results to the corresponding 8-bit elements of
888  * Rd.
889  *
890  * **Operations**:\n
891  * ~~~
892  * snum[x] = Rs1.B[x];
893  * cnt[x] = 0;
894  *   for (i = 7 to 0) {
895  *   if (snum[x](i) == 1) {
896  *     cnt[x] = cnt[x] + 1;
897  *   } else {
898  *     break;
899  *   }
900  * }
901  * Rd.B[x] = cnt[x];
902  * for RV32: x=3...0
903  * for RV64: x=7...0
904  * ~~~
905  *
906  * \param [in]  a    unsigned long type of value stored in a
907  * \return value stored in unsigned long type
908  */
__RV_CLO8(unsigned long a)909 __STATIC_FORCEINLINE unsigned long __RV_CLO8(unsigned long a)
910 {
911     register unsigned long result;
912     __ASM volatile("clo8 %0, %1" : "=r"(result) : "r"(a));
913     return result;
914 }
915 /* ===== Inline Function End for 3.12. CLO8 ===== */
916 
917 /* ===== Inline Function Start for 3.13. CLO16 ===== */
918 /**
919  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
920  * \brief CLO16 (SIMD 16-bit Count Leading One)
921  * \details
922  * **Type**: SIMD
923  *
924  * **Syntax**:\n
925  * ~~~
926  * CLO16 Rd, Rs1
927  * ~~~
928  *
929  * **Purpose**:\n
930  * Count the number of leading one bits of the 16-bit elements of a general register.
931  *
932  * **Description**:\n
933  * Starting from the most significant bits of the 16-bit elements of Rs1, this instruction
934  * counts the number of leading one bits and writes the results to the corresponding 16-bit elements
935  * of Rd.
936  *
937  * **Operations**:\n
938  * ~~~
939  * snum[x] = Rs1.H[x];
940  * cnt[x] = 0;
941  * for (i = 15 to 0) {
942  *   if (snum[x](i) == 1) {
943  *     cnt[x] = cnt[x] + 1;
944  *   } else {
945  *     break;
946  *   }
947  * }
948  * Rd.H[x] = cnt[x];
949  * for RV32: x=1...0
950  * for RV64: x=3...0
951  * ~~~
952  *
953  * \param [in]  a    unsigned long type of value stored in a
954  * \return value stored in unsigned long type
955  */
__RV_CLO16(unsigned long a)956 __STATIC_FORCEINLINE unsigned long __RV_CLO16(unsigned long a)
957 {
958     register unsigned long result;
959     __ASM volatile("clo16 %0, %1" : "=r"(result) : "r"(a));
960     return result;
961 }
962 /* ===== Inline Function End for 3.13. CLO16 ===== */
963 
964 /* ===== Inline Function Start for 3.14. CLO32 ===== */
965 /**
966  * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
967  * \brief CLO32 (SIMD 32-bit Count Leading One)
968  * \details
969  * **Type**: SIMD
970  *
971  * **Syntax**:\n
972  * ~~~
973  * CLO32 Rd, Rs1
974  * ~~~
975  *
976  * **Purpose**:\n
977  * Count the number of leading one bits of the 32-bit elements of a general register.
978  *
979  * **Description**:\n
980  * Starting from the most significant bits of the 32-bit elements of Rs1, this instruction
981  * counts the number of leading one bits and writes the results to the corresponding 32-bit elements
982  * of Rd.
983  *
984  * **Operations**:\n
985  * ~~~
986  * snum[x] = Rs1.W[x];
987  * cnt[x] = 0;
988  * for (i = 31 to 0) {
989  *   if (snum[x](i) == 1) {
990  *     cnt[x] = cnt[x] + 1;
991  *   } else {
992  *     break;
993  *   }
994  * }
995  * Rd.W[x] = cnt[x];
996  * for RV32: x=0
997  * for RV64: x=1...0
998  * ~~~
999  *
1000  * \param [in]  a    unsigned long type of value stored in a
1001  * \return value stored in unsigned long type
1002  */
__RV_CLO32(unsigned long a)1003 __STATIC_FORCEINLINE unsigned long __RV_CLO32(unsigned long a)
1004 {
1005     register unsigned long result;
1006     __ASM volatile("clo32 %0, %1" : "=r"(result) : "r"(a));
1007     return result;
1008 }
1009 /* ===== Inline Function End for 3.14. CLO32 ===== */
1010 
1011 /* ===== Inline Function Start for 3.15. CLZ8 ===== */
1012 /**
1013  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
1014  * \brief CLZ8 (SIMD 8-bit Count Leading Zero)
1015  * \details
1016  * **Type**: SIMD
1017  *
1018  * **Syntax**:\n
1019  * ~~~
1020  * CLZ8 Rd, Rs1
1021  * ~~~
1022  *
1023  * **Purpose**:\n
1024  * Count the number of leading zero bits of the 8-bit elements of a general register.
1025  *
1026  * **Description**:\n
1027  * Starting from the most significant bits of the 8-bit elements of Rs1, this instruction
1028  * counts the number of leading zero bits and writes the results to the corresponding 8-bit elements of
1029  * Rd.
1030  *
1031  * **Operations**:\n
1032  * ~~~
1033  * snum[x] = Rs1.B[x];
1034  * cnt[x] = 0;
1035  * for (i = 7 to 0) {
1036  *   if (snum[x](i) == 0) {
1037  *     cnt[x] = cnt[x] + 1;
1038  *   } else {
1039  *     break;
1040  *   }
1041  * }
1042  * Rd.B[x] = cnt[x];
1043  * for RV32: x=3...0
1044  * for RV64: x=7...0
1045  * ~~~
1046  *
1047  * \param [in]  a    unsigned long type of value stored in a
1048  * \return value stored in unsigned long type
1049  */
__RV_CLZ8(unsigned long a)1050 __STATIC_FORCEINLINE unsigned long __RV_CLZ8(unsigned long a)
1051 {
1052     register unsigned long result;
1053     __ASM volatile("clz8 %0, %1" : "=r"(result) : "r"(a));
1054     return result;
1055 }
1056 /* ===== Inline Function End for 3.15. CLZ8 ===== */
1057 
1058 /* ===== Inline Function Start for 3.16. CLZ16 ===== */
1059 /**
1060  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
1061  * \brief CLZ16 (SIMD 16-bit Count Leading Zero)
1062  * \details
1063  * **Type**: SIMD
1064  *
1065  * **Syntax**:\n
1066  * ~~~
1067  * CLZ16 Rd, Rs1
1068  * ~~~
1069  *
1070  * **Purpose**:\n
1071  * Count the number of leading zero bits of the 16-bit elements of a general register.
1072  *
1073  * **Description**:\n
1074  * Starting from the most significant bits of the 16-bit elements of Rs1, this instruction
1075  * counts the number of leading zero bits and writes the results to the corresponding 16-bit elements
1076  * of Rd.
1077  *
1078  * **Operations**:\n
1079  * ~~~
1080  * snum[x] = Rs1.H[x];
1081  * cnt[x] = 0;
1082  * for (i = 15 to 0) {
1083  *   if (snum[x](i) == 0) {
1084  *     cnt[x] = cnt[x] + 1;
1085  *   } else {
1086  *     break;
1087  *   }
1088  * }
1089  * Rd.H[x] = cnt[x];
1090  * for RV32: x=1...0
1091  * for RV64: x=3...0
1092  * ~~~
1093  *
1094  * \param [in]  a    unsigned long type of value stored in a
1095  * \return value stored in unsigned long type
1096  */
__RV_CLZ16(unsigned long a)1097 __STATIC_FORCEINLINE unsigned long __RV_CLZ16(unsigned long a)
1098 {
1099     register unsigned long result;
1100     __ASM volatile("clz16 %0, %1" : "=r"(result) : "r"(a));
1101     return result;
1102 }
1103 /* ===== Inline Function End for 3.16. CLZ16 ===== */
1104 
1105 /* ===== Inline Function Start for 3.17. CLZ32 ===== */
1106 /**
1107  * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
1108  * \brief CLZ32 (SIMD 32-bit Count Leading Zero)
1109  * \details
1110  * **Type**: SIMD
1111  *
1112  * **Syntax**:\n
1113  * ~~~
1114  * CLZ32 Rd, Rs1
1115  * ~~~
1116  *
1117  * **Purpose**:\n
1118  * Count the number of leading zero bits of the 32-bit elements of a general register.
1119  *
1120  * **Description**:\n
1121  * Starting from the most significant bits of the 32-bit elements of Rs1, this instruction
1122  * counts the number of leading zero bits and writes the results to the corresponding 32-bit elements
1123  * of Rd.
1124  *
1125  * **Operations**:\n
1126  * ~~~
1127  * snum[x] = Rs1.W[x];
1128  * cnt[x] = 0;
1129  * for (i = 31 to 0) {
1130  *   if (snum[x](i) == 0) {
1131  *     cnt[x] = cnt[x] + 1;
1132  *   } else {
1133  *     break;
1134  *   }
1135  * }
1136  * Rd.W[x] = cnt[x];
1137  * for RV32: x=0
1138  * for RV64: x=1...0
1139  * ~~~
1140  *
1141  * \param [in]  a    unsigned long type of value stored in a
1142  * \return value stored in unsigned long type
1143  */
__RV_CLZ32(unsigned long a)1144 __STATIC_FORCEINLINE unsigned long __RV_CLZ32(unsigned long a)
1145 {
1146     register unsigned long result;
1147     __ASM volatile("clz32 %0, %1" : "=r"(result) : "r"(a));
1148     return result;
1149 }
1150 /* ===== Inline Function End for 3.17. CLZ32 ===== */
1151 
1152 /* ===== Inline Function Start for 3.18. CMPEQ8 ===== */
1153 /**
1154  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
1155  * \brief CMPEQ8 (SIMD 8-bit Integer Compare Equal)
1156  * \details
1157  * **Type**: SIMD
1158  *
1159  * **Syntax**:\n
1160  * ~~~
1161  * CMPEQ8 Rs, Rs1, Rs2
1162  * ~~~
1163  *
1164  * **Purpose**:\n
1165  * Do 8-bit integer elements equal comparisons simultaneously.
1166  *
1167  * **Description**:\n
1168  * This instruction compares the 8-bit integer elements in Rs1 with the 8-bit integer
1169  * elements in Rs2 to see if they are equal. If they are equal, the result is 0xFF; otherwise, the result is
1170  * 0x0. The 8-bit element comparison results are written to Rd.
1171  *
1172  * **Note**:\n
1173  * This instruction can be used for either signed or unsigned numbers.
1174  *
1175  * **Operations**:\n
1176  * ~~~
1177  * Rd.B[x] = (Rs1.B[x] == Rs2.B[x])? 0xff : 0x0;
1178  * for RV32: x=3...0,
1179  * for RV64: x=7...0
1180  * ~~~
1181  *
1182  * \param [in]  a    unsigned long type of value stored in a
1183  * \param [in]  b    unsigned long type of value stored in b
1184  * \return value stored in unsigned long type
1185  */
__RV_CMPEQ8(unsigned long a,unsigned long b)1186 __STATIC_FORCEINLINE unsigned long __RV_CMPEQ8(unsigned long a, unsigned long b)
1187 {
1188     register unsigned long result;
1189     __ASM volatile("cmpeq8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
1190     return result;
1191 }
1192 /* ===== Inline Function End for 3.18. CMPEQ8 ===== */
1193 
1194 /* ===== Inline Function Start for 3.19. CMPEQ16 ===== */
1195 /**
1196  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
1197  * \brief CMPEQ16 (SIMD 16-bit Integer Compare Equal)
1198  * \details
1199  * **Type**: SIMD
1200  *
1201  * **Syntax**:\n
1202  * ~~~
1203  * CMPEQ16 Rd, Rs1, Rs2
1204  * ~~~
1205  *
1206  * **Purpose**:\n
1207  * Do 16-bit integer elements equal comparisons simultaneously.
1208  *
1209  * **Description**:\n
1210  * This instruction compares the 16-bit integer elements in Rs1 with the 16-bit integer
1211  * elements in Rs2 to see if they are equal. If they are equal, the result is 0xFFFF; otherwise, the result
1212  * is 0x0. The 16-bit element comparison results are written to Rt.
1213  *
1214  * **Note**:\n
1215  * This instruction can be used for either signed or unsigned numbers.
1216  *
1217  * **Operations**:\n
1218  * ~~~
1219  * Rd.H[x] = (Rs1.H[x] == Rs2.H[x])? 0xffff : 0x0;
1220  * for RV32: x=1...0,
1221  * for RV64: x=3...0
1222  * ~~~
1223  *
1224  * \param [in]  a    unsigned long type of value stored in a
1225  * \param [in]  b    unsigned long type of value stored in b
1226  * \return value stored in unsigned long type
1227  */
__RV_CMPEQ16(unsigned long a,unsigned long b)1228 __STATIC_FORCEINLINE unsigned long __RV_CMPEQ16(unsigned long a, unsigned long b)
1229 {
1230     register unsigned long result;
1231     __ASM volatile("cmpeq16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
1232     return result;
1233 }
1234 /* ===== Inline Function End for 3.19. CMPEQ16 ===== */
1235 
1236 /* ===== Inline Function Start for 3.20. CRAS16 ===== */
1237 /**
1238  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
1239  * \brief CRAS16 (SIMD 16-bit Cross Addition & Subtraction)
1240  * \details
1241  * **Type**: SIMD
1242  *
1243  * **Syntax**:\n
1244  * ~~~
1245  * CRAS16 Rd, Rs1, Rs2
1246  * ~~~
1247  *
1248  * **Purpose**:\n
1249  * Do 16-bit integer element addition and 16-bit integer element subtraction in a 32-bit
1250  * chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
1251  *
1252  * **Description**:\n
1253  * This instruction adds the 16-bit integer element in [31:16] of 32-bit chunks in Rs1 with
1254  * the 16-bit integer element in [15:0] of 32-bit chunks in Rs2, and writes the result to [31:16] of 32-bit
1255  * chunks in Rd; at the same time, it subtracts the 16-bit integer element in [31:16] of 32-bit chunks in
1256  * Rs2 from the 16-bit integer element in [15:0] of 32-bit chunks, and writes the result to [15:0] of 32-
1257  * bit chunks in Rd.
1258  *
1259  * **Note**:\n
1260  * This instruction can be used for either signed or unsigned operations.
1261  *
1262  * **Operations**:\n
1263  * ~~~
1264  * Rd.W[x][31:16] = Rs1.W[x][31:16] + Rs2.W[x][15:0];
1265  * Rd.W[x][15:0] = Rs1.W[x][15:0] - Rs2.W[x][31:16];
1266  * for RV32, x=0
1267  * for RV64, x=1...0
1268  * ~~~
1269  *
1270  * \param [in]  a    unsigned long type of value stored in a
1271  * \param [in]  b    unsigned long type of value stored in b
1272  * \return value stored in unsigned long type
1273  */
__RV_CRAS16(unsigned long a,unsigned long b)1274 __STATIC_FORCEINLINE unsigned long __RV_CRAS16(unsigned long a, unsigned long b)
1275 {
1276     register unsigned long result;
1277     __ASM volatile("cras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
1278     return result;
1279 }
1280 /* ===== Inline Function End for 3.20. CRAS16 ===== */
1281 
1282 /* ===== Inline Function Start for 3.21. CRSA16 ===== */
1283 /**
1284  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
1285  * \brief CRSA16 (SIMD 16-bit Cross Subtraction & Addition)
1286  * \details
1287  * **Type**: SIMD
1288  *
1289  * **Syntax**:\n
1290  * ~~~
1291  * CRSA16 Rd, Rs1, Rs2
1292  * ~~~
1293  *
1294  * **Purpose**:\n
1295  * Do 16-bit integer element subtraction and 16-bit integer element addition in a 32-bit
1296  * chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
1297  *
1298  * **Description**:\n
1299  * This instruction subtracts the 16-bit integer element in [15:0] of 32-bit chunks in Rs2
1300  * from the 16-bit integer element in [31:16] of 32-bit chunks in Rs1, and writes the result to [31:16] of
1301  * 32-bit chunks in Rd; at the same time, it adds the 16-bit integer element in [31:16] of 32-bit chunks
1302  * in Rs2 with the 16-bit integer element in [15:0] of 32-bit chunks in Rs1, and writes the result to
1303  * [15:0] of 32-bit chunks in Rd.
1304  *
1305  * **Note**:\n
1306  * This instruction can be used for either signed or unsigned operations.
1307  *
1308  * **Operations**:\n
1309  * ~~~
1310  * Rd.W[x][31:16] = Rs1.W[x][31:16] - Rs2.W[x][15:0];
1311  * Rd.W[x][15:0] = Rs1.W[x][15:0] + Rs2.W[x][31:16];
1312  * for RV32, x=0
1313  * for RV64, x=1...0
1314  * ~~~
1315  *
1316  * \param [in]  a    unsigned long type of value stored in a
1317  * \param [in]  b    unsigned long type of value stored in b
1318  * \return value stored in unsigned long type
1319  */
__RV_CRSA16(unsigned long a,unsigned long b)1320 __STATIC_FORCEINLINE unsigned long __RV_CRSA16(unsigned long a, unsigned long b)
1321 {
1322     register unsigned long result;
1323     __ASM volatile("crsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
1324     return result;
1325 }
1326 /* ===== Inline Function End for 3.21. CRSA16 ===== */
1327 
1328 /* ===== Inline Function Start for 3.22. INSB ===== */
1329 /**
1330  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
1331  * \brief INSB (Insert Byte)
1332  * \details
1333  * **Type**: DSP
1334  *
1335  * **Syntax**:\n
1336  * ~~~
1337  * (RV32) INSB Rd, Rs1, imm[1:0]
1338  * (RV64) INSB Rd, Rs1, imm[2:0]
1339  * ~~~
1340  *
1341  * **Purpose**:\n
1342  * Insert byte 0 of a 32-bit or 64-bit register into one of the byte elements of another register.
1343  *
1344  * **Description**:\n
1345  * This instruction inserts byte 0 of Rs1 into byte `imm[1:0]` (RV32) or `imm[2:0]` (RV64)
1346  * of Rd.
1347  *
1348  * **Operations**:\n
1349  * ~~~
1350  * bpos = imm[1:0]; (RV32)
1351  * bpos = imm[2:0]; (RV64)
1352  * Rd.B[bpos] = Rs1.B[0]
1353  * ~~~
1354  *
1355  * \param [in]  t    unsigned long type of value stored in t
1356  * \param [in]  a    unsigned long type of value stored in a
1357  * \param [in]  b    unsigned long type of value stored in b
1358  * \return value stored in unsigned long type
1359  */
1360 #define __RV_INSB(t, a, b)    \
1361     ({    \
1362         register unsigned long __t = (unsigned long)(t);    \
1363         register unsigned long __a = (unsigned long)(a);    \
1364         __ASM volatile("insb %0, %1, %2" : "+r"(__t) : "r"(__a), "K"(b));    \
1365         __t;    \
1366     })
1367 /* ===== Inline Function End for 3.22. INSB ===== */
1368 
1369 /* ===== Inline Function Start for 3.23. KABS8 ===== */
1370 /**
1371  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
1372  * \brief KABS8 (SIMD 8-bit Saturating Absolute)
1373  * \details
1374  * **Type**: SIMD
1375  *
1376  * **Syntax**:\n
1377  * ~~~
1378  * KABS8 Rd, Rs1
1379  * ~~~
1380  *
1381  * **Purpose**:\n
1382  * Get the absolute value of 8-bit signed integer elements simultaneously.
1383  *
1384  * **Description**:\n
1385  * This instruction calculates the absolute value of 8-bit signed integer elements stored
1386  * in Rs1 and writes the element results to Rd. If the input number is 0x80, this instruction generates
1387  * 0x7f as the output and sets the OV bit to 1.
1388  *
1389  * **Operations**:\n
1390  * ~~~
1391  * src = Rs1.B[x];
1392  * if (src == 0x80) {
1393  *   src = 0x7f;
1394  *   OV = 1;
1395  * } else if (src[7] == 1)
1396  *   src = -src;
1397  * }
1398  * Rd.B[x] = src;
1399  * for RV32: x=3...0,
1400  * for RV64: x=7...0
1401  * ~~~
1402  *
1403  * \param [in]  a    unsigned long type of value stored in a
1404  * \return value stored in unsigned long type
1405  */
__RV_KABS8(unsigned long a)1406 __STATIC_FORCEINLINE unsigned long __RV_KABS8(unsigned long a)
1407 {
1408     register unsigned long result;
1409     __ASM volatile("kabs8 %0, %1" : "=r"(result) : "r"(a));
1410     return result;
1411 }
1412 /* ===== Inline Function End for 3.23. KABS8 ===== */
1413 
1414 /* ===== Inline Function Start for 3.24. KABS16 ===== */
1415 /**
1416  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
1417  * \brief KABS16 (SIMD 16-bit Saturating Absolute)
1418  * \details
1419  * **Type**: SIMD
1420  *
1421  * **Syntax**:\n
1422  * ~~~
1423  * KABS16 Rd, Rs1
1424  * ~~~
1425  *
1426  * **Purpose**:\n
1427  * Get the absolute value of 16-bit signed integer elements simultaneously.
1428  *
1429  * **Description**:\n
1430  * This instruction calculates the absolute value of 16-bit signed integer elements stored
1431  * in Rs1 and writes the element results to Rd. If the input number is 0x8000, this instruction
1432  * generates 0x7fff as the output and sets the OV bit to 1.
1433  *
1434  * **Operations**:\n
1435  * ~~~
1436  * src = Rs1.H[x];
1437  * if (src == 0x8000) {
1438  *   src = 0x7fff;
1439  *   OV = 1;
1440  * } else if (src[15] == 1)
1441  *   src = -src;
1442  * }
1443  * Rd.H[x] = src;
1444  * for RV32: x=1...0,
1445  * for RV64: x=3...0
1446  * ~~~
1447  *
1448  * \param [in]  a    unsigned long type of value stored in a
1449  * \return value stored in unsigned long type
1450  */
__RV_KABS16(unsigned long a)1451 __STATIC_FORCEINLINE unsigned long __RV_KABS16(unsigned long a)
1452 {
1453     register unsigned long result;
1454     __ASM volatile("kabs16 %0, %1" : "=r"(result) : "r"(a));
1455     return result;
1456 }
1457 /* ===== Inline Function End for 3.24. KABS16 ===== */
1458 
1459 /* ===== Inline Function Start for 3.25. KABSW ===== */
1460 /**
1461  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
1462  * \brief KABSW (Scalar 32-bit Absolute Value with Saturation)
1463  * \details
1464  * **Type**: DSP
1465  *
1466  * **Syntax**:\n
1467  * ~~~
1468  * KABSW Rd, Rs1
1469  * ~~~
1470  *
1471  * **Purpose**:\n
1472  * Get the absolute value of a signed 32-bit integer in a general register.
1473  *
1474  * **Description**:\n
1475  * This instruction calculates the absolute value of a signed 32-bit integer stored in Rs1.
1476  * The result is sign-extended (for RV64) and written to Rd. This instruction with the minimum
1477  * negative integer input of 0x80000000 will produce a saturated output of maximum positive integer
1478  * of 0x7fffffff and the OV flag will be set to 1.
1479  *
1480  * **Operations**:\n
1481  * ~~~
1482  * if (Rs1.W[0] >= 0) {
1483  *   res = Rs1.W[0];
1484  * } else {
1485  *   If (Rs1.W[0] == 0x80000000) {
1486  *     res = 0x7fffffff;
1487  *     OV = 1;
1488  *   } else {
1489  *     res = -Rs1.W[0];
1490  *   }
1491  * }
1492  * Rd = SE32(res);
1493  * ~~~
1494  *
1495  * \param [in]  a    signed long type of value stored in a
1496  * \return value stored in unsigned long type
1497  */
__RV_KABSW(signed long a)1498 __STATIC_FORCEINLINE unsigned long __RV_KABSW(signed long a)
1499 {
1500     register unsigned long result;
1501     __ASM volatile("kabsw %0, %1" : "=r"(result) : "r"(a));
1502     return result;
1503 }
1504 /* ===== Inline Function End for 3.25. KABSW ===== */
1505 
1506 /* ===== Inline Function Start for 3.26. KADD8 ===== */
1507 /**
1508  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
1509  * \brief KADD8 (SIMD 8-bit Signed Saturating Addition)
1510  * \details
1511  * **Type**: SIMD
1512  *
1513  * **Syntax**:\n
1514  * ~~~
1515  * KADD8 Rd, Rs1, Rs2
1516  * ~~~
1517  *
1518  * **Purpose**:\n
1519  * Do 8-bit signed integer element saturating additions simultaneously.
1520  *
1521  * **Description**:\n
1522  * This instruction adds the 8-bit signed integer elements in Rs1 with the 8-bit signed
1523  * integer elements in Rs2. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 2^7-1), they
1524  * are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
1525  *
1526  * **Operations**:\n
1527  * ~~~
1528  * res[x] = Rs1.B[x] + Rs2.B[x];
1529  * if (res[x] > 127) {
1530  *   res[x] = 127;
1531  *   OV = 1;
1532  * } else if (res[x] < -128) {
1533  *   res[x] = -128;
1534  *   OV = 1;
1535  * }
1536  * Rd.B[x] = res[x];
1537  * for RV32: x=3...0,
1538  * for RV64: x=7...0
1539  * ~~~
1540  *
1541  * \param [in]  a    unsigned long type of value stored in a
1542  * \param [in]  b    unsigned long type of value stored in b
1543  * \return value stored in unsigned long type
1544  */
__RV_KADD8(unsigned long a,unsigned long b)1545 __STATIC_FORCEINLINE unsigned long __RV_KADD8(unsigned long a, unsigned long b)
1546 {
1547     register unsigned long result;
1548     __ASM volatile("kadd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
1549     return result;
1550 }
1551 /* ===== Inline Function End for 3.26. KADD8 ===== */
1552 
1553 /* ===== Inline Function Start for 3.27. KADD16 ===== */
1554 /**
1555  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
1556  * \brief KADD16 (SIMD 16-bit Signed Saturating Addition)
1557  * \details
1558  * **Type**: SIMD
1559  *
1560  * **Syntax**:\n
1561  * ~~~
1562  * KADD16 Rd, Rs1, Rs2
1563  * ~~~
1564  *
1565  * **Purpose**:\n
1566  * Do 16-bit signed integer element saturating additions simultaneously.
1567  *
1568  * **Description**:\n
1569  * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed
1570  * integer elements in Rs2. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1),
1571  * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
1572  *
1573  * **Operations**:\n
1574  * ~~~
1575  * res[x] = Rs1.H[x] + Rs2.H[x];
1576  * if (res[x] > 32767) {
1577  *   res[x] = 32767;
1578  *   OV = 1;
1579  * } else if (res[x] < -32768) {
1580  *   res[x] = -32768;
1581  *   OV = 1;
1582  * }
1583  * Rd.H[x] = res[x];
1584  * for RV32: x=1...0,
1585  * for RV64: x=3...0
1586  * ~~~
1587  *
1588  * \param [in]  a    unsigned long type of value stored in a
1589  * \param [in]  b    unsigned long type of value stored in b
1590  * \return value stored in unsigned long type
1591  */
__RV_KADD16(unsigned long a,unsigned long b)1592 __STATIC_FORCEINLINE unsigned long __RV_KADD16(unsigned long a, unsigned long b)
1593 {
1594     register unsigned long result;
1595     __ASM volatile("kadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
1596     return result;
1597 }
1598 /* ===== Inline Function End for 3.27. KADD16 ===== */
1599 
1600 /* ===== Inline Function Start for 3.28. KADD64 ===== */
1601 /**
1602  * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
1603  * \brief KADD64 (64-bit Signed Saturating Addition)
1604  * \details
1605  * **Type**: DSP (64-bit Profile)
1606  *
1607  * **Syntax**:\n
1608  * ~~~
1609  * KADD64 Rd, Rs1, Rs2
1610  * ~~~
1611  *
1612  * **Purpose**:\n
1613  * Add two 64-bit signed integers. The result is saturated to the Q63 range.
1614  *
1615  * **RV32 Description**:\n
1616  * This instruction adds the 64-bit signed integer of an even/odd pair of registers
1617  * specified by Rs1(4,1) with the 64-bit signed integer of an even/odd pair of registers specified by
1618  * Rs2(4,1). If the 64-bit result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the
1619  * range and the OV bit is set to 1. The saturated result is written to an even/odd pair of registers
1620  * specified by Rd(4,1).
1621  * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
1622  * pair includes register 2d and 2d+1.
1623  * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
1624  * of the pair contains the low 32-bit of the result.
1625  *
1626  * **RV64 Description**:\n
1627  * This instruction adds the 64-bit signed integer in Rs1 with the 64-bit signed
1628  * integer in Rs2. If the result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the
1629  * range and the OV bit is set to 1. The saturated result is written to Rd.
1630  *
1631  * **Operations**:\n
1632  * ~~~
1633  * RV32:
1634  *  t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
1635  *  a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
1636  *  b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
1637  *  result = R[a_H].R[a_L] + R[b_H].R[b_L];
1638  *  if (result > (2^63)-1) {
1639  *    result = (2^63)-1; OV = 1;
1640  *  } else if (result < -2^63) {
1641  *    result = -2^63; OV = 1;
1642  *  }
1643  *  R[t_H].R[t_L] = result;
1644  * RV64:
1645  *  result = Rs1 + Rs2;
1646  *  if (result > (2^63)-1) {
1647  *    result = (2^63)-1; OV = 1;
1648  *  } else if (result < -2^63) {
1649  *    result = -2^63; OV = 1;
1650  *  }
1651  *  Rd = result;
1652  * ~~~
1653  *
1654  * \param [in]  a    long long type of value stored in a
1655  * \param [in]  b    long long type of value stored in b
1656  * \return value stored in long long type
1657  */
__RV_KADD64(long long a,long long b)1658 __STATIC_FORCEINLINE long long __RV_KADD64(long long a, long long b)
1659 {
1660     register long long result;
1661     __ASM volatile("kadd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
1662     return result;
1663 }
1664 /* ===== Inline Function End for 3.28. KADD64 ===== */
1665 
1666 /* ===== Inline Function Start for 3.29. KADDH ===== */
1667 /**
1668  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
1669  * \brief KADDH (Signed Addition with Q15 Saturation)
1670  * \details
1671  * **Type**: DSP
1672  *
1673  * **Syntax**:\n
1674  * ~~~
1675  * KADDH Rd, Rs1, Rs2
1676  * ~~~
1677  *
1678  * **Purpose**:\n
1679  * Add the signed lower 32-bit content of two registers with Q15 saturation.
1680  *
1681  * **Description**:\n
1682  * The signed lower 32-bit content of Rs1 is added with the signed lower 32-bit content of
1683  * Rs2. And the result is saturated to the 16-bit signed integer range of [-2^15, 2^15-1] and then sign-
1684  * extended and written to Rd. If saturation happens, this instruction sets the OV flag.
1685  *
1686  * **Operations**:\n
1687  * ~~~
1688  * tmp = Rs1.W[0] + Rs2.W[0];
1689  * if (tmp > 32767) {
1690  *   res = 32767;
1691  *   OV = 1;
1692  * } else if (tmp < -32768) {
1693  *   res = -32768;
1694  *   OV = 1
1695  * } else {
1696  *   res = tmp;
1697  * }
1698  * Rd = SE(tmp[15:0]);
1699  * ~~~
1700  *
1701  * \param [in]  a    int type of value stored in a
1702  * \param [in]  b    int type of value stored in b
1703  * \return value stored in long type
1704  */
__RV_KADDH(int a,int b)1705 __STATIC_FORCEINLINE long __RV_KADDH(int a, int b)
1706 {
1707     register long result;
1708     __ASM volatile("kaddh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
1709     return result;
1710 }
1711 /* ===== Inline Function End for 3.29. KADDH ===== */
1712 
1713 /* ===== Inline Function Start for 3.30. KADDW ===== */
1714 /**
1715  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
1716  * \brief KADDW (Signed Addition with Q31 Saturation)
1717  * \details
1718  * **Type**: DSP
1719  *
1720  * **Syntax**:\n
1721  * ~~~
1722  * KADDW Rd, Rs1, Rs2
1723  * ~~~
1724  *
1725  * **Purpose**:\n
1726  * Add the lower 32-bit signed content of two registers with Q31 saturation.
1727  *
1728  * **Description**:\n
1729  * The lower 32-bit signed content of Rs1 is added with the lower 32-bit signed content of
1730  * Rs2. And the result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1] and then sign-
1731  * extended and written to Rd. If saturation happens, this instruction sets the OV flag.
1732  *
1733  * **Operations**:\n
1734  * ~~~
1735  * tmp = Rs1.W[0] + Rs2.W[0];
1736  * if (tmp > (2^31)-1) {
1737  *   res = (2^31)-1;
1738  *   OV = 1;
1739  * } else if (tmp < -2^31) {
1740  *   res = -2^31;
1741  *   OV = 1
1742  * } else {
1743  *   res = tmp;
1744  * }
1745  * Rd = res[31:0]; // RV32
1746  * Rd = SE(res[31:0]) // RV64
1747  * ~~~
1748  *
1749  * \param [in]  a    int type of value stored in a
1750  * \param [in]  b    int type of value stored in b
1751  * \return value stored in long type
1752  */
__RV_KADDW(int a,int b)1753 __STATIC_FORCEINLINE long __RV_KADDW(int a, int b)
1754 {
1755     register long result;
1756     __ASM volatile("kaddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
1757     return result;
1758 }
1759 /* ===== Inline Function End for 3.30. KADDW ===== */
1760 
1761 /* ===== Inline Function Start for 3.31. KCRAS16 ===== */
1762 /**
1763  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
1764  * \brief KCRAS16 (SIMD 16-bit Signed Saturating Cross Addition & Subtraction)
1765  * \details
1766  * **Type**: SIMD
1767  *
1768  * **Syntax**:\n
1769  * ~~~
1770  * KCRAS16 Rd, Rs1, Rs2
1771  * ~~~
1772  *
1773  * **Purpose**:\n
1774  * Do 16-bit signed integer element saturating addition and 16-bit signed integer element
1775  * saturating subtraction in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-
1776  * bit chunks.
1777  *
1778  * **Description**:\n
1779  * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in
1780  * Rs1 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2; at the same time, it
1781  * subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed
1782  * integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number
1783  * range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated
1784  * results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit chunks in Rd for
1785  * subtraction.
1786  *
1787  * **Operations**:\n
1788  * ~~~
1789  * res1 = Rs1.W[x][31:16] + Rs2.W[x][15:0];
1790  * res2 = Rs1.W[x][15:0] - Rs2.W[x][31:16];
1791  * for (res in [res1, res2]) {
1792  *   if (res > (2^15)-1) {
1793  *     res = (2^15)-1;
1794  *     OV = 1;
1795  *   } else if (res < -2^15) {
1796  *     res = -2^15;
1797  *     OV = 1;
1798  *   }
1799  * }
1800  * Rd.W[x][31:16] = res1;
1801  * Rd.W[x][15:0] = res2;
1802  * for RV32, x=0
1803  * for RV64, x=1...0
1804  * ~~~
1805  *
1806  * \param [in]  a    unsigned long type of value stored in a
1807  * \param [in]  b    unsigned long type of value stored in b
1808  * \return value stored in unsigned long type
1809  */
__RV_KCRAS16(unsigned long a,unsigned long b)1810 __STATIC_FORCEINLINE unsigned long __RV_KCRAS16(unsigned long a, unsigned long b)
1811 {
1812     register unsigned long result;
1813     __ASM volatile("kcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
1814     return result;
1815 }
1816 /* ===== Inline Function End for 3.31. KCRAS16 ===== */
1817 
1818 /* ===== Inline Function Start for 3.32. KCRSA16 ===== */
1819 /**
1820  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
1821  * \brief KCRSA16 (SIMD 16-bit Signed Saturating Cross Subtraction & Addition)
1822  * \details
1823  * **Type**: SIMD
1824  *
1825  * **Syntax**:\n
1826  * ~~~
1827  * KCRSA16 Rd, Rs1, Rs2
1828  * ~~~
1829  *
1830  * **Purpose**:\n
1831  * Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element
1832  * saturating addition in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit
1833  * chunks.
1834  *
1835  * **Description**:\n
1836  * This instruction subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks
1837  * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1; at the same time, it
1838  * adds the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2 with the 16-bit signed
1839  * integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number
1840  * range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated
1841  * results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks in Rd
1842  * for addition.
1843  *
1844  * **Operations**:\n
1845  * ~~~
1846  * res1 = Rs1.W[x][31:16] - Rs2.W[x][15:0];
1847  * res2 = Rs1.W[x][15:0] + Rs2.W[x][31:16];
1848  * for (res in [res1, res2]) {
1849  *   if (res > (2^15)-1) {
1850  *     res = (2^15)-1;
1851  *     OV = 1;
1852  *   } else if (res < -2^15) {
1853  *     res = -2^15;
1854  *     OV = 1;
1855  *   }
1856  * }
1857  * Rd.W[x][31:16] = res1;
1858  * Rd.W[x][15:0] = res2;
1859  * for RV32, x=0
1860  * for RV64, x=1...0
1861  * ~~~
1862  *
1863  * \param [in]  a    unsigned long type of value stored in a
1864  * \param [in]  b    unsigned long type of value stored in b
1865  * \return value stored in unsigned long type
1866  */
__RV_KCRSA16(unsigned long a,unsigned long b)1867 __STATIC_FORCEINLINE unsigned long __RV_KCRSA16(unsigned long a, unsigned long b)
1868 {
1869     register unsigned long result;
1870     __ASM volatile("kcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
1871     return result;
1872 }
1873 /* ===== Inline Function End for 3.32. KCRSA16 ===== */
1874 
1875 /* ===== Inline Function Start for 3.33.1. KDMBB ===== */
1876 /**
1877  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
1878  * \brief KDMBB (Signed Saturating Double Multiply B16 x B16)
1879  * \details
1880  * **Type**: DSP
1881  *
1882  * **Syntax**:\n
1883  * ~~~
1884  * KDMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
1885  * ~~~
1886  *
1887  * **Purpose**:\n
1888  * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
1889  * of the lower 32-bit chunk in registers and then double and saturate the Q31 result. The result is
1890  * written into the destination register for RV32 or sign-extended to 64-bits and written into the
1891  * destination register for RV64. If saturation happens, an overflow flag OV will be set.
1892  *
1893  * **Description**:\n
1894  * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
1895  * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
1896  * doubled and saturated into a Q31 value. The Q31 value is then written into Rd (sign-extended in
1897  * RV64). When both the two Q15 inputs are 0x8000, saturation will happen. The result will be
1898  * saturated to 0x7FFFFFFF and the overflow flag OV will be set.
1899  *
1900  * **Operations**:\n
1901  * ~~~
1902  * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMBB
1903  * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMBT
1904  * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMTT
1905  * If (0x8000 != aop | 0x8000 != bop) {
1906  *   Mresult = aop * bop;
1907  *   resQ31 = Mresult << 1;
1908  *   Rd = resQ31; // RV32
1909  *   Rd = SE(resQ31); // RV64
1910  * } else {
1911  *   resQ31 = 0x7FFFFFFF;
1912  *   Rd = resQ31; // RV32
1913  *   Rd = SE(resQ31); // RV64
1914  *   OV = 1;
1915  * }
1916  * ~~~
1917  *
1918  * \param [in]  a    unsigned int type of value stored in a
1919  * \param [in]  b    unsigned int type of value stored in b
1920  * \return value stored in long type
1921  */
__RV_KDMBB(unsigned int a,unsigned int b)1922 __STATIC_FORCEINLINE long __RV_KDMBB(unsigned int a, unsigned int b)
1923 {
1924     register long result;
1925     __ASM volatile("kdmbb %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
1926     return result;
1927 }
1928 /* ===== Inline Function End for 3.33.1. KDMBB ===== */
1929 
1930 /* ===== Inline Function Start for 3.33.2. KDMBT ===== */
1931 /**
1932  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
1933  * \brief KDMBT (Signed Saturating Double Multiply B16 x T16)
1934  * \details
1935  * **Type**: DSP
1936  *
1937  * **Syntax**:\n
1938  * ~~~
1939  * KDMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
1940  * ~~~
1941  *
1942  * **Purpose**:\n
1943  * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
1944  * of the lower 32-bit chunk in registers and then double and saturate the Q31 result. The result is
1945  * written into the destination register for RV32 or sign-extended to 64-bits and written into the
1946  * destination register for RV64. If saturation happens, an overflow flag OV will be set.
1947  *
1948  * **Description**:\n
1949  * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
1950  * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
1951  * doubled and saturated into a Q31 value. The Q31 value is then written into Rd (sign-extended in
1952  * RV64). When both the two Q15 inputs are 0x8000, saturation will happen. The result will be
1953  * saturated to 0x7FFFFFFF and the overflow flag OV will be set.
1954  *
1955  * **Operations**:\n
1956  * ~~~
1957  * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMBB
1958  * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMBT
1959  * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMTT
1960  * If (0x8000 != aop | 0x8000 != bop) {
1961  *   Mresult = aop * bop;
1962  *   resQ31 = Mresult << 1;
1963  *   Rd = resQ31; // RV32
1964  *   Rd = SE(resQ31); // RV64
1965  * } else {
1966  *   resQ31 = 0x7FFFFFFF;
1967  *   Rd = resQ31; // RV32
1968  *   Rd = SE(resQ31); // RV64
1969  *   OV = 1;
1970  * }
1971  * ~~~
1972  *
1973  * \param [in]  a    unsigned int type of value stored in a
1974  * \param [in]  b    unsigned int type of value stored in b
1975  * \return value stored in long type
1976  */
__RV_KDMBT(unsigned int a,unsigned int b)1977 __STATIC_FORCEINLINE long __RV_KDMBT(unsigned int a, unsigned int b)
1978 {
1979     register long result;
1980     __ASM volatile("kdmbt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
1981     return result;
1982 }
1983 /* ===== Inline Function End for 3.33.2. KDMBT ===== */
1984 
1985 /* ===== Inline Function Start for 3.33.3. KDMTT ===== */
1986 /**
1987  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
1988  * \brief KDMTT (Signed Saturating Double Multiply T16 x T16)
1989  * \details
1990  * **Type**: DSP
1991  *
1992  * **Syntax**:\n
1993  * ~~~
1994  * KDMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
1995  * ~~~
1996  *
1997  * **Purpose**:\n
1998  * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
1999  * of the lower 32-bit chunk in registers and then double and saturate the Q31 result. The result is
2000  * written into the destination register for RV32 or sign-extended to 64-bits and written into the
2001  * destination register for RV64. If saturation happens, an overflow flag OV will be set.
2002  *
2003  * **Description**:\n
2004  * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
2005  * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
2006  * doubled and saturated into a Q31 value. The Q31 value is then written into Rd (sign-extended in
2007  * RV64). When both the two Q15 inputs are 0x8000, saturation will happen. The result will be
2008  * saturated to 0x7FFFFFFF and the overflow flag OV will be set.
2009  *
2010  * **Operations**:\n
2011  * ~~~
2012  * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMBB
2013  * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMBT
2014  * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMTT
2015  * If (0x8000 != aop | 0x8000 != bop) {
2016  *   Mresult = aop * bop;
2017  *   resQ31 = Mresult << 1;
2018  *   Rd = resQ31; // RV32
2019  *   Rd = SE(resQ31); // RV64
2020  * } else {
2021  *   resQ31 = 0x7FFFFFFF;
2022  *   Rd = resQ31; // RV32
2023  *   Rd = SE(resQ31); // RV64
2024  *   OV = 1;
2025  * }
2026  * ~~~
2027  *
2028  * \param [in]  a    unsigned int type of value stored in a
2029  * \param [in]  b    unsigned int type of value stored in b
2030  * \return value stored in long type
2031  */
__RV_KDMTT(unsigned int a,unsigned int b)2032 __STATIC_FORCEINLINE long __RV_KDMTT(unsigned int a, unsigned int b)
2033 {
2034     register long result;
2035     __ASM volatile("kdmtt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
2036     return result;
2037 }
2038 /* ===== Inline Function End for 3.33.3. KDMTT ===== */
2039 
2040 /* ===== Inline Function Start for 3.34.1. KDMABB ===== */
2041 /**
2042  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
2043  * \brief KDMABB (Signed Saturating Double Multiply Addition B16 x B16)
2044  * \details
2045  * **Type**: DSP
2046  *
2047  * **Syntax**:\n
2048  * ~~~
2049  * KDMAxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
2050  * ~~~
2051  *
2052  * **Purpose**:\n
2053  * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
2054  * of the lower 32-bit chunk in registers and then double and saturate the Q31 result, add the result
2055  * with the sign-extended lower 32-bit chunk destination register and write the saturated addition
2056  * result into the destination register. If saturation happens, an overflow flag OV will be set.
2057  *
2058  * **Description**:\n
2059  * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
2060  * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
2061  * doubled and saturated into a Q31 value. The Q31 value is then added with the content of Rd. If the
2062  * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
2063  * the OV flag is set to 1. The result after saturation is written to Rd.
2064  * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
2065  * set.
2066  *
2067  * **Operations**:\n
2068  * ~~~
2069  * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMABB
2070  * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMABT
2071  * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMATT
2072  * If (0x8000 != aop | 0x8000 != bop) {
2073  *   Mresult = aop * bop;
2074  *   resQ31 = Mresult << 1;
2075  * } else {
2076  *   resQ31 = 0x7FFFFFFF;
2077  *   OV = 1;
2078  * }
2079  * resadd = Rd + resQ31; // RV32
2080  * resadd = Rd.W[0] + resQ31; // RV64
2081  * if (resadd > (2^31)-1) {
2082  *   resadd = (2^31)-1;
2083  *   OV = 1;
2084  * } else if (resadd < -2^31) {
2085  *   resadd = -2^31;
2086  *   OV = 1;
2087  * }
2088  * Rd = resadd; // RV32
2089  * Rd = SE(resadd); // RV64
2090  * ~~~
2091  *
2092  * \param [in]  t    long type of value stored in t
2093  * \param [in]  a    unsigned int type of value stored in a
2094  * \param [in]  b    unsigned int type of value stored in b
2095  * \return value stored in long type
2096  */
__RV_KDMABB(long t,unsigned int a,unsigned int b)2097 __STATIC_FORCEINLINE long __RV_KDMABB(long t, unsigned int a, unsigned int b)
2098 {
2099     __ASM volatile("kdmabb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
2100     return t;
2101 }
2102 /* ===== Inline Function End for 3.34.1. KDMABB ===== */
2103 
2104 /* ===== Inline Function Start for 3.34.2. KDMABT ===== */
2105 /**
2106  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
2107  * \brief KDMABT (Signed Saturating Double Multiply Addition B16 x T16)
2108  * \details
2109  * **Type**: DSP
2110  *
2111  * **Syntax**:\n
2112  * ~~~
2113  * KDMAxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
2114  * ~~~
2115  *
2116  * **Purpose**:\n
2117  * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
2118  * of the lower 32-bit chunk in registers and then double and saturate the Q31 result, add the result
2119  * with the sign-extended lower 32-bit chunk destination register and write the saturated addition
2120  * result into the destination register. If saturation happens, an overflow flag OV will be set.
2121  *
2122  * **Description**:\n
2123  * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
2124  * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
2125  * doubled and saturated into a Q31 value. The Q31 value is then added with the content of Rd. If the
2126  * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
2127  * the OV flag is set to 1. The result after saturation is written to Rd.
2128  * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
2129  * set.
2130  *
2131  * **Operations**:\n
2132  * ~~~
2133  * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMABB
2134  * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMABT
2135  * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMATT
2136  * If (0x8000 != aop | 0x8000 != bop) {
2137  *   Mresult = aop * bop;
2138  *   resQ31 = Mresult << 1;
2139  * } else {
2140  *   resQ31 = 0x7FFFFFFF;
2141  *   OV = 1;
2142  * }
2143  * resadd = Rd + resQ31; // RV32
2144  * resadd = Rd.W[0] + resQ31; // RV64
2145  * if (resadd > (2^31)-1) {
2146  *   resadd = (2^31)-1;
2147  *   OV = 1;
2148  * } else if (resadd < -2^31) {
2149  *   resadd = -2^31;
2150  *   OV = 1;
2151  * }
2152  * Rd = resadd; // RV32
2153  * Rd = SE(resadd); // RV64
2154  * ~~~
2155  *
2156  * \param [in]  t    long type of value stored in t
2157  * \param [in]  a    unsigned int type of value stored in a
2158  * \param [in]  b    unsigned int type of value stored in b
2159  * \return value stored in long type
2160  */
__RV_KDMABT(long t,unsigned int a,unsigned int b)2161 __STATIC_FORCEINLINE long __RV_KDMABT(long t, unsigned int a, unsigned int b)
2162 {
2163     __ASM volatile("kdmabt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
2164     return t;
2165 }
2166 /* ===== Inline Function End for 3.34.2. KDMABT ===== */
2167 
2168 /* ===== Inline Function Start for 3.34.3. KDMATT ===== */
2169 /**
2170  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
2171  * \brief KDMATT (Signed Saturating Double Multiply Addition T16 x T16)
2172  * \details
2173  * **Type**: DSP
2174  *
2175  * **Syntax**:\n
2176  * ~~~
2177  * KDMAxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
2178  * ~~~
2179  *
2180  * **Purpose**:\n
2181  * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
2182  * of the lower 32-bit chunk in registers and then double and saturate the Q31 result, add the result
2183  * with the sign-extended lower 32-bit chunk destination register and write the saturated addition
2184  * result into the destination register. If saturation happens, an overflow flag OV will be set.
2185  *
2186  * **Description**:\n
2187  * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
2188  * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
2189  * doubled and saturated into a Q31 value. The Q31 value is then added with the content of Rd. If the
2190  * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
2191  * the OV flag is set to 1. The result after saturation is written to Rd.
2192  * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
2193  * set.
2194  *
2195  * **Operations**:\n
2196  * ~~~
2197  * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMABB
2198  * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMABT
2199  * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMATT
2200  * If (0x8000 != aop | 0x8000 != bop) {
2201  *   Mresult = aop * bop;
2202  *   resQ31 = Mresult << 1;
2203  * } else {
2204  *   resQ31 = 0x7FFFFFFF;
2205  *   OV = 1;
2206  * }
2207  * resadd = Rd + resQ31; // RV32
2208  * resadd = Rd.W[0] + resQ31; // RV64
2209  * if (resadd > (2^31)-1) {
2210  *   resadd = (2^31)-1;
2211  *   OV = 1;
2212  * } else if (resadd < -2^31) {
2213  *   resadd = -2^31;
2214  *   OV = 1;
2215  * }
2216  * Rd = resadd; // RV32
2217  * Rd = SE(resadd); // RV64
2218  * ~~~
2219  *
2220  * \param [in]  t    long type of value stored in t
2221  * \param [in]  a    unsigned int type of value stored in a
2222  * \param [in]  b    unsigned int type of value stored in b
2223  * \return value stored in long type
2224  */
__RV_KDMATT(long t,unsigned int a,unsigned int b)2225 __STATIC_FORCEINLINE long __RV_KDMATT(long t, unsigned int a, unsigned int b)
2226 {
2227     __ASM volatile("kdmatt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
2228     return t;
2229 }
2230 /* ===== Inline Function End for 3.34.3. KDMATT ===== */
2231 
2232 /* ===== Inline Function Start for 3.35.1. KHM8 ===== */
2233 /**
2234  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
2235  * \brief KHM8 (SIMD Signed Saturating Q7 Multiply)
2236  * \details
2237  * **Type**: SIMD
2238  *
2239  * **Syntax**:\n
2240  * ~~~
2241  * KHM8 Rd, Rs1, Rs2
2242  * KHMX8 Rd, Rs1, Rs2
2243  * ~~~
2244  *
2245  * **Purpose**:\n
2246  * Do Q7xQ7 element multiplications simultaneously. The Q14 results are then reduced to Q7
2247  * numbers again.
2248  *
2249  * **Description**:\n
2250  * For the `KHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1
2251  * with the top 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
2252  * content of 16-bit chunks in Rs1 with the bottom 8-bit Q7 content of 16-bit chunks in Rs2.
2253  * For the `KHMX16` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 with the
2254  * bottom 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
2255  * content of 16-bit chunks in Rs1 with the top 8-bit Q7 content of 16-bit chunks in Rs2.
2256  * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then
2257  * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen.
2258  * The result will be saturated to 0x7F and the overflow flag OV will be set.
2259  *
2260  * **Operations**:\n
2261  * ~~~
2262  * if (is `KHM8`) {
2263  *   op1t = Rs1.B[x+1]; op2t = Rs2.B[x+1]; // top
2264  *   op1b = Rs1.B[x]; op2b = Rs2.B[x]; // bottom
2265  * } else if (is `KHMX8`) {
2266  *   op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top
2267  *   op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom
2268  * }
2269  * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
2270  *   if (0x80 != aop | 0x80 != bop) {
2271  *     res = (aop s* bop) >> 7;
2272  *   } else {
2273  *     res= 0x7F;
2274  *     OV = 1;
2275  *   }
2276  * }
2277  * Rd.H[x/2] = concat(rest, resb);
2278  * for RV32, x=0,2
2279  * for RV64, x=0,2,4,6
2280  * ~~~
2281  *
2282  * \param [in]  a    unsigned long type of value stored in a
2283  * \param [in]  b    unsigned long type of value stored in b
2284  * \return value stored in unsigned long type
2285  */
__RV_KHM8(unsigned long a,unsigned long b)2286 __STATIC_FORCEINLINE unsigned long __RV_KHM8(unsigned long a, unsigned long b)
2287 {
2288     register unsigned long result;
2289     __ASM volatile("khm8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
2290     return result;
2291 }
2292 /* ===== Inline Function End for 3.35.1. KHM8 ===== */
2293 
2294 /* ===== Inline Function Start for 3.35.2. KHMX8 ===== */
2295 /**
2296  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
2297  * \brief KHMX8 (SIMD Signed Saturating Crossed Q7 Multiply)
2298  * \details
2299  * **Type**: SIMD
2300  *
2301  * **Syntax**:\n
2302  * ~~~
2303  * KHM8 Rd, Rs1, Rs2
2304  * KHMX8 Rd, Rs1, Rs2
2305  * ~~~
2306  *
2307  * **Purpose**:\n
2308  * Do Q7xQ7 element multiplications simultaneously. The Q14 results are then reduced to Q7
2309  * numbers again.
2310  *
2311  * **Description**:\n
2312  * For the `KHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1
2313  * with the top 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
2314  * content of 16-bit chunks in Rs1 with the bottom 8-bit Q7 content of 16-bit chunks in Rs2.
2315  * For the `KHMX16` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 with the
2316  * bottom 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
2317  * content of 16-bit chunks in Rs1 with the top 8-bit Q7 content of 16-bit chunks in Rs2.
2318  * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then
2319  * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen.
2320  * The result will be saturated to 0x7F and the overflow flag OV will be set.
2321  *
2322  * **Operations**:\n
2323  * ~~~
2324  * if (is `KHM8`) {
2325  *   op1t = Rs1.B[x+1]; op2t = Rs2.B[x+1]; // top
2326  *   op1b = Rs1.B[x]; op2b = Rs2.B[x]; // bottom
2327  * } else if (is `KHMX8`) {
2328  *   op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top
2329  *   op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom
2330  * }
2331  * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
2332  *   if (0x80 != aop | 0x80 != bop) {
2333  *     res = (aop s* bop) >> 7;
2334  *   } else {
2335  *     res= 0x7F;
2336  *     OV = 1;
2337  *   }
2338  * }
2339  * Rd.H[x/2] = concat(rest, resb);
2340  * for RV32, x=0,2
2341  * for RV64, x=0,2,4,6
2342  * ~~~
2343  *
2344  * \param [in]  a    unsigned long type of value stored in a
2345  * \param [in]  b    unsigned long type of value stored in b
2346  * \return value stored in unsigned long type
2347  */
__RV_KHMX8(unsigned long a,unsigned long b)2348 __STATIC_FORCEINLINE unsigned long __RV_KHMX8(unsigned long a, unsigned long b)
2349 {
2350     register unsigned long result;
2351     __ASM volatile("khmx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
2352     return result;
2353 }
2354 /* ===== Inline Function End for 3.35.2. KHMX8 ===== */
2355 
2356 /* ===== Inline Function Start for 3.36.1. KHM16 ===== */
2357 /**
2358  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
2359  * \brief KHM16 (SIMD Signed Saturating Q15 Multiply)
2360  * \details
2361  * **Type**: SIMD
2362  *
2363  * **Syntax**:\n
2364  * ~~~
2365  * KHM16 Rd, Rs1, Rs2
2366  * KHMX16 Rd, Rs1, Rs2
2367  * ~~~
2368  *
2369  * **Purpose**:\n
2370  * Do Q15xQ15 element multiplications simultaneously. The Q30 results are then reduced to
2371  * Q15 numbers again.
2372  *
2373  * **Description**:\n
2374  * For the `KHM16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in
2375  * Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom
2376  * 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in
2377  * Rs2.
2378  * For the `KHMX16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the
2379  * bottom 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom 16-bit Q15
2380  * content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2.
2381  * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are
2382  * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will
2383  * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set.
2384  *
2385  * **Operations**:\n
2386  * ~~~
2387  * if (is `KHM16`) {
2388  *   op1t = Rs1.H[x+1]; op2t = Rs2.H[x+1]; // top
2389  *   op1b = Rs1.H[x]; op2b = Rs2.H[x]; // bottom
2390  * } else if (is `KHMX16`) {
2391  *   op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top
2392  *   op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom
2393  * }
2394  * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
2395  *   if (0x8000 != aop | 0x8000 != bop) {
2396  *     res = (aop s* bop) >> 15;
2397  *   } else {
2398  *     res= 0x7FFF;
2399  *     OV = 1;
2400  *   }
2401  * }
2402  * Rd.W[x/2] = concat(rest, resb);
2403  * for RV32: x=0
2404  * for RV64: x=0,2
2405  * ~~~
2406  *
2407  * \param [in]  a    unsigned long type of value stored in a
2408  * \param [in]  b    unsigned long type of value stored in b
2409  * \return value stored in unsigned long type
2410  */
__RV_KHM16(unsigned long a,unsigned long b)2411 __STATIC_FORCEINLINE unsigned long __RV_KHM16(unsigned long a, unsigned long b)
2412 {
2413     register unsigned long result;
2414     __ASM volatile("khm16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
2415     return result;
2416 }
2417 /* ===== Inline Function End for 3.36.1. KHM16 ===== */
2418 
2419 /* ===== Inline Function Start for 3.36.2. KHMX16 ===== */
2420 /**
2421  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
2422  * \brief KHMX16 (SIMD Signed Saturating Crossed Q15 Multiply)
2423  * \details
2424  * **Type**: SIMD
2425  *
2426  * **Syntax**:\n
2427  * ~~~
2428  * KHM16 Rd, Rs1, Rs2
2429  * KHMX16 Rd, Rs1, Rs2
2430  * ~~~
2431  *
2432  * **Purpose**:\n
2433  * Do Q15xQ15 element multiplications simultaneously. The Q30 results are then reduced to
2434  * Q15 numbers again.
2435  *
2436  * **Description**:\n
2437  * For the `KHM16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in
2438  * Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom
2439  * 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in
2440  * Rs2.
2441  * For the `KHMX16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the
2442  * bottom 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom 16-bit Q15
2443  * content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2.
2444  * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are
2445  * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will
2446  * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set.
2447  *
2448  * **Operations**:\n
2449  * ~~~
2450  * if (is `KHM16`) {
2451  *   op1t = Rs1.H[x+1]; op2t = Rs2.H[x+1]; // top
2452  *   op1b = Rs1.H[x]; op2b = Rs2.H[x]; // bottom
2453  * } else if (is `KHMX16`) {
2454  *   op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top
2455  *   op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom
2456  * }
2457  * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
2458  *   if (0x8000 != aop | 0x8000 != bop) {
2459  *     res = (aop s* bop) >> 15;
2460  *   } else {
2461  *     res= 0x7FFF;
2462  *     OV = 1;
2463  *   }
2464  * }
2465  * Rd.W[x/2] = concat(rest, resb);
2466  * for RV32: x=0
2467  * for RV64: x=0,2
2468  * ~~~
2469  *
2470  * \param [in]  a    unsigned long type of value stored in a
2471  * \param [in]  b    unsigned long type of value stored in b
2472  * \return value stored in unsigned long type
2473  */
__RV_KHMX16(unsigned long a,unsigned long b)2474 __STATIC_FORCEINLINE unsigned long __RV_KHMX16(unsigned long a, unsigned long b)
2475 {
2476     register unsigned long result;
2477     __ASM volatile("khmx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
2478     return result;
2479 }
2480 /* ===== Inline Function End for 3.36.2. KHMX16 ===== */
2481 
2482 /* ===== Inline Function Start for 3.37.1. KHMBB ===== */
2483 /**
2484  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
2485  * \brief KHMBB (Signed Saturating Half Multiply B16 x B16)
2486  * \details
2487  * **Type**: DSP
2488  *
2489  * **Syntax**:\n
2490  * ~~~
2491  * KHMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
2492  * ~~~
2493  *
2494  * **Purpose**:\n
2495  * Multiply the signed Q15 number contents of two 16-bit data in the corresponding portion
2496  * of the lower 32-bit chunk in registers and then right-shift 15 bits to turn the Q30 result into a Q15
2497  * number again and saturate the Q15 result into the destination register. If saturation happens, an
2498  * overflow flag OV will be set.
2499  *
2500  * **Description**:\n
2501  * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
2502  * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then right-
2503  * shifted 15-bits and saturated into a Q15 value. The Q15 value is then sing-extended and written into
2504  * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
2505  * to 0x7FFF and the overflow flag OV will be set.
2506  *
2507  * **Operations**:\n
2508  * ~~~
2509  * aop = Rs1.H[0]; bop = Rs2.H[0]; // KHMBB
2510  * aop = Rs1.H[0]; bop = Rs2.H[1]; // KHMBT
2511  * aop = Rs1.H[1]; bop = Rs2.H[1]; // KHMTT
2512  * If (0x8000 != aop | 0x8000 != bop) {
2513  *   Mresult[31:0] = aop * bop;
2514  *   res[15:0] = Mresult[30:15];
2515  * } else {
2516  *   res[15:0] = 0x7FFF;
2517  *   OV = 1;
2518  * }
2519  * Rd = SE32(res[15:0]); // Rv32
2520  * Rd = SE64(res[15:0]); // RV64
2521  * ~~~
2522  *
2523  * \param [in]  a    unsigned int type of value stored in a
2524  * \param [in]  b    unsigned int type of value stored in b
2525  * \return value stored in long type
2526  */
__RV_KHMBB(unsigned int a,unsigned int b)2527 __STATIC_FORCEINLINE long __RV_KHMBB(unsigned int a, unsigned int b)
2528 {
2529     register long result;
2530     __ASM volatile("khmbb %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
2531     return result;
2532 }
2533 /* ===== Inline Function End for 3.37.1. KHMBB ===== */
2534 
2535 /* ===== Inline Function Start for 3.37.2. KHMBT ===== */
2536 /**
2537  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
2538  * \brief KHMBT (Signed Saturating Half Multiply B16 x T16)
2539  * \details
2540  * **Type**: DSP
2541  *
2542  * **Syntax**:\n
2543  * ~~~
2544  * KHMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
2545  * ~~~
2546  *
2547  * **Purpose**:\n
2548  * Multiply the signed Q15 number contents of two 16-bit data in the corresponding portion
2549  * of the lower 32-bit chunk in registers and then right-shift 15 bits to turn the Q30 result into a Q15
2550  * number again and saturate the Q15 result into the destination register. If saturation happens, an
2551  * overflow flag OV will be set.
2552  *
2553  * **Description**:\n
2554  * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
2555  * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then right-
2556  * shifted 15-bits and saturated into a Q15 value. The Q15 value is then sing-extended and written into
2557  * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
2558  * to 0x7FFF and the overflow flag OV will be set.
2559  *
2560  * **Operations**:\n
2561  * ~~~
2562  * aop = Rs1.H[0]; bop = Rs2.H[0]; // KHMBB
2563  * aop = Rs1.H[0]; bop = Rs2.H[1]; // KHMBT
2564  * aop = Rs1.H[1]; bop = Rs2.H[1]; // KHMTT
2565  * If (0x8000 != aop | 0x8000 != bop) {
2566  *   Mresult[31:0] = aop * bop;
2567  *   res[15:0] = Mresult[30:15];
2568  * } else {
2569  *   res[15:0] = 0x7FFF;
2570  *   OV = 1;
2571  * }
2572  * Rd = SE32(res[15:0]); // Rv32
2573  * Rd = SE64(res[15:0]); // RV64
2574  * ~~~
2575  *
2576  * \param [in]  a    unsigned int type of value stored in a
2577  * \param [in]  b    unsigned int type of value stored in b
2578  * \return value stored in long type
2579  */
__RV_KHMBT(unsigned int a,unsigned int b)2580 __STATIC_FORCEINLINE long __RV_KHMBT(unsigned int a, unsigned int b)
2581 {
2582     register long result;
2583     __ASM volatile("khmbt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
2584     return result;
2585 }
2586 /* ===== Inline Function End for 3.37.2. KHMBT ===== */
2587 
2588 /* ===== Inline Function Start for 3.37.3. KHMTT ===== */
2589 /**
2590  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
2591  * \brief KHMTT (Signed Saturating Half Multiply T16 x T16)
2592  * \details
2593  * **Type**: DSP
2594  *
2595  * **Syntax**:\n
2596  * ~~~
2597  * KHMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
2598  * ~~~
2599  *
2600  * **Purpose**:\n
2601  * Multiply the signed Q15 number contents of two 16-bit data in the corresponding portion
2602  * of the lower 32-bit chunk in registers and then right-shift 15 bits to turn the Q30 result into a Q15
2603  * number again and saturate the Q15 result into the destination register. If saturation happens, an
2604  * overflow flag OV will be set.
2605  *
2606  * **Description**:\n
2607  * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
2608  * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then right-
2609  * shifted 15-bits and saturated into a Q15 value. The Q15 value is then sing-extended and written into
2610  * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
2611  * to 0x7FFF and the overflow flag OV will be set.
2612  *
2613  * **Operations**:\n
2614  * ~~~
2615  * aop = Rs1.H[0]; bop = Rs2.H[0]; // KHMBB
2616  * aop = Rs1.H[0]; bop = Rs2.H[1]; // KHMBT
2617  * aop = Rs1.H[1]; bop = Rs2.H[1]; // KHMTT
2618  * If (0x8000 != aop | 0x8000 != bop) {
2619  *   Mresult[31:0] = aop * bop;
2620  *   res[15:0] = Mresult[30:15];
2621  * } else {
2622  *   res[15:0] = 0x7FFF;
2623  *   OV = 1;
2624  * }
2625  * Rd = SE32(res[15:0]); // Rv32
2626  * Rd = SE64(res[15:0]); // RV64
2627  * ~~~
2628  *
2629  * \param [in]  a    unsigned int type of value stored in a
2630  * \param [in]  b    unsigned int type of value stored in b
2631  * \return value stored in long type
2632  */
__RV_KHMTT(unsigned int a,unsigned int b)2633 __STATIC_FORCEINLINE long __RV_KHMTT(unsigned int a, unsigned int b)
2634 {
2635     register long result;
2636     __ASM volatile("khmtt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
2637     return result;
2638 }
2639 /* ===== Inline Function End for 3.37.3. KHMTT ===== */
2640 
2641 /* ===== Inline Function Start for 3.38.1. KMABB ===== */
2642 /**
2643  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
2644  * \brief KMABB (SIMD Saturating Signed Multiply Bottom Halfs & Add)
2645  * \details
2646  * **Type**: SIMD
2647  *
2648  * **Syntax**:\n
2649  * ~~~
2650  * KMABB Rd, Rs1, Rs2
2651  * KMABT Rd, Rs1, Rs2
2652  * KMATT Rd, Rs1, Rs2
2653  * ~~~
2654  *
2655  * **Purpose**:\n
2656  * Multiply the signed 16-bit content of 32-bit elements in a register with the 16-bit content
2657  * of 32-bit elements in another register and add the result to the content of 32-bit elements in the
2658  * third register. The addition result may be saturated and is written to the third register.
2659  * * KMABB: rd.W[x] + bottom*bottom (per 32-bit element)
2660  * * KMABT rd.W[x] + bottom*top (per 32-bit element)
2661  * * KMATT rd.W[x] + top*top (per 32-bit element)
2662  *
2663  * **Description**:\n
2664  * For the `KMABB` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
2665  * the bottom 16-bit content of 32-bit elements in Rs2.
2666  * For the `KMABT` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
2667  * the top 16-bit content of 32-bit elements in Rs2.
2668  * For the `KMATT` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
2669  * top 16-bit content of 32-bit elements in Rs2.
2670  * The multiplication result is added to the content of 32-bit elements in Rd. If the addition result is
2671  * beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to
2672  * 1. The results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as
2673  * signed integers.
2674  *
2675  * **Operations**:\n
2676  * ~~~
2677  * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); // KMABB
2678  * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); // KMABT
2679  * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]); // KMATT
2680  * if (res[x] > (2^31)-1) {
2681  *   res[x] = (2^31)-1;
2682  *   OV = 1;
2683  * } else if (res[x] < -2^31) {
2684  *   res[x] = -2^31;
2685  *   OV = 1;
2686  * }
2687  * Rd.W[x] = res[x];
2688  * for RV32: x=0
2689  * for RV64: x=1...0
2690  * ~~~
2691  *
2692  * \param [in]  t    long type of value stored in t
2693  * \param [in]  a    unsigned long type of value stored in a
2694  * \param [in]  b    unsigned long type of value stored in b
2695  * \return value stored in long type
2696  */
__RV_KMABB(long t,unsigned long a,unsigned long b)2697 __STATIC_FORCEINLINE long __RV_KMABB(long t, unsigned long a, unsigned long b)
2698 {
2699     __ASM volatile("kmabb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
2700     return t;
2701 }
2702 /* ===== Inline Function End for 3.38.1. KMABB ===== */
2703 
2704 /* ===== Inline Function Start for 3.38.2. KMABT ===== */
2705 /**
2706  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
2707  * \brief KMABT (SIMD Saturating Signed Multiply Bottom & Top Halfs & Add)
2708  * \details
2709  * **Type**: SIMD
2710  *
2711  * **Syntax**:\n
2712  * ~~~
2713  * KMABB Rd, Rs1, Rs2
2714  * KMABT Rd, Rs1, Rs2
2715  * KMATT Rd, Rs1, Rs2
2716  * ~~~
2717  *
2718  * **Purpose**:\n
2719  * Multiply the signed 16-bit content of 32-bit elements in a register with the 16-bit content
2720  * of 32-bit elements in another register and add the result to the content of 32-bit elements in the
2721  * third register. The addition result may be saturated and is written to the third register.
2722  * * KMABB: rd.W[x] + bottom*bottom (per 32-bit element)
2723  * * KMABT rd.W[x] + bottom*top (per 32-bit element)
2724  * * KMATT rd.W[x] + top*top (per 32-bit element)
2725  *
2726  * **Description**:\n
2727  * For the `KMABB` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
2728  * the bottom 16-bit content of 32-bit elements in Rs2.
2729  * For the `KMABT` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
2730  * the top 16-bit content of 32-bit elements in Rs2.
2731  * For the `KMATT` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
2732  * top 16-bit content of 32-bit elements in Rs2.
2733  * The multiplication result is added to the content of 32-bit elements in Rd. If the addition result is
2734  * beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to
2735  * 1. The results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as
2736  * signed integers.
2737  *
2738  * **Operations**:\n
2739  * ~~~
2740  * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); // KMABB
2741  * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); // KMABT
2742  * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]); // KMATT
2743  * if (res[x] > (2^31)-1) {
2744  *   res[x] = (2^31)-1;
2745  *   OV = 1;
2746  * } else if (res[x] < -2^31) {
2747  *   res[x] = -2^31;
2748  *   OV = 1;
2749  * }
2750  * Rd.W[x] = res[x];
2751  * for RV32: x=0
2752  * for RV64: x=1...0
2753  * ~~~
2754  *
2755  * \param [in]  t    long type of value stored in t
2756  * \param [in]  a    unsigned long type of value stored in a
2757  * \param [in]  b    unsigned long type of value stored in b
2758  * \return value stored in long type
2759  */
__RV_KMABT(long t,unsigned long a,unsigned long b)2760 __STATIC_FORCEINLINE long __RV_KMABT(long t, unsigned long a, unsigned long b)
2761 {
2762     __ASM volatile("kmabt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
2763     return t;
2764 }
2765 /* ===== Inline Function End for 3.38.2. KMABT ===== */
2766 
2767 /* ===== Inline Function Start for 3.38.3. KMATT ===== */
2768 /**
2769  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
2770  * \brief KMATT (SIMD Saturating Signed Multiply Top Halfs & Add)
2771  * \details
2772  * **Type**: SIMD
2773  *
2774  * **Syntax**:\n
2775  * ~~~
2776  * KMABB Rd, Rs1, Rs2
2777  * KMABT Rd, Rs1, Rs2
2778  * KMATT Rd, Rs1, Rs2
2779  * ~~~
2780  *
2781  * **Purpose**:\n
2782  * Multiply the signed 16-bit content of 32-bit elements in a register with the 16-bit content
2783  * of 32-bit elements in another register and add the result to the content of 32-bit elements in the
2784  * third register. The addition result may be saturated and is written to the third register.
2785  * * KMABB: rd.W[x] + bottom*bottom (per 32-bit element)
2786  * * KMABT rd.W[x] + bottom*top (per 32-bit element)
2787  * * KMATT rd.W[x] + top*top (per 32-bit element)
2788  *
2789  * **Description**:\n
2790  * For the `KMABB` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
2791  * the bottom 16-bit content of 32-bit elements in Rs2.
2792  * For the `KMABT` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
2793  * the top 16-bit content of 32-bit elements in Rs2.
2794  * For the `KMATT` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
2795  * top 16-bit content of 32-bit elements in Rs2.
2796  * The multiplication result is added to the content of 32-bit elements in Rd. If the addition result is
2797  * beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to
2798  * 1. The results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as
2799  * signed integers.
2800  *
2801  * **Operations**:\n
2802  * ~~~
2803  * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); // KMABB
2804  * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); // KMABT
2805  * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]); // KMATT
2806  * if (res[x] > (2^31)-1) {
2807  *   res[x] = (2^31)-1;
2808  *   OV = 1;
2809  * } else if (res[x] < -2^31) {
2810  *   res[x] = -2^31;
2811  *   OV = 1;
2812  * }
2813  * Rd.W[x] = res[x];
2814  * for RV32: x=0
2815  * for RV64: x=1...0
2816  * ~~~
2817  *
2818  * \param [in]  t    long type of value stored in t
2819  * \param [in]  a    unsigned long type of value stored in a
2820  * \param [in]  b    unsigned long type of value stored in b
2821  * \return value stored in long type
2822  */
__RV_KMATT(long t,unsigned long a,unsigned long b)2823 __STATIC_FORCEINLINE long __RV_KMATT(long t, unsigned long a, unsigned long b)
2824 {
2825     __ASM volatile("kmatt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
2826     return t;
2827 }
2828 /* ===== Inline Function End for 3.38.3. KMATT ===== */
2829 
2830 /* ===== Inline Function Start for 3.39.1. KMADA ===== */
2831 /**
2832  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
2833  * \brief KMADA (SIMD Saturating Signed Multiply Two Halfs and Two Adds)
2834  * \details
2835  * **Type**: SIMD
2836  *
2837  * **Syntax**:\n
2838  * ~~~
2839  * KMADA Rd, Rs1, Rs2
2840  * KMAXDA Rd, Rs1, Rs2
2841  * ~~~
2842  *
2843  * **Purpose**:\n
2844  * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then adds
2845  * the two 32-bit results and 32-bit elements in a third register together. The addition result may be
2846  * saturated.
2847  * * KMADA: rd.W[x] + top*top + bottom*bottom (per 32-bit element)
2848  * * KMAXDA: rd.W[x] + top*bottom + bottom*top (per 32-bit element)
2849  *
2850  * **Description**:\n
2851  * For the `KMADA instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
2852  * the bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of
2853  * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
2854  * elements in Rs2.
2855  * For the `KMAXDA` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
2856  * bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of multiplying
2857  * the bottom 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit elements in
2858  * Rs2.
2859  * The result is added to the content of 32-bit elements in Rd. If the addition result is beyond the Q31
2860  * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The 32-bit
2861  * results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
2862  * integers.
2863  *
2864  * **Operations**:\n
2865  * ~~~
2866  * // KMADA
2867  * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
2868  * // KMAXDA
2869  * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) + (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
2870  * if (res[x] > (2^31)-1) {
2871  *   res[x] = (2^31)-1;
2872  *   OV = 1;
2873  * } else if (res[x] < -2^31) {
2874  *   res[x] = -2^31;
2875  * OV = 1;
2876  * }
2877  * Rd.W[x] = res[x];
2878  * for RV32: x=0
2879  * for RV64: x=1...0
2880  * ~~~
2881  *
2882  * \param [in]  t    long type of value stored in t
2883  * \param [in]  a    unsigned long type of value stored in a
2884  * \param [in]  b    unsigned long type of value stored in b
2885  * \return value stored in long type
2886  */
__RV_KMADA(long t,unsigned long a,unsigned long b)2887 __STATIC_FORCEINLINE long __RV_KMADA(long t, unsigned long a, unsigned long b)
2888 {
2889     __ASM volatile("kmada %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
2890     return t;
2891 }
2892 /* ===== Inline Function End for 3.39.1. KMADA ===== */
2893 
2894 /* ===== Inline Function Start for 3.39.2. KMAXDA ===== */
2895 /**
2896  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
2897  * \brief KMAXDA (SIMD Saturating Signed Crossed Multiply Two Halfs and Two Adds)
2898  * \details
2899  * **Type**: SIMD
2900  *
2901  * **Syntax**:\n
2902  * ~~~
2903  * KMADA Rd, Rs1, Rs2
2904  * KMAXDA Rd, Rs1, Rs2
2905  * ~~~
2906  *
2907  * **Purpose**:\n
2908  * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then adds
2909  * the two 32-bit results and 32-bit elements in a third register together. The addition result may be
2910  * saturated.
2911  * * KMADA: rd.W[x] + top*top + bottom*bottom (per 32-bit element)
2912  * * KMAXDA: rd.W[x] + top*bottom + bottom*top (per 32-bit element)
2913  *
2914  * **Description**:\n
2915  * For the `KMADA instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
2916  * the bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of
2917  * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
2918  * elements in Rs2.
2919  * For the `KMAXDA` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
2920  * bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of multiplying
2921  * the bottom 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit elements in
2922  * Rs2.
2923  * The result is added to the content of 32-bit elements in Rd. If the addition result is beyond the Q31
2924  * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The 32-bit
2925  * results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
2926  * integers.
2927  *
2928  * **Operations**:\n
2929  * ~~~
2930  * // KMADA
2931  * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
2932  * // KMAXDA
2933  * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) + (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
2934  * if (res[x] > (2^31)-1) {
2935  *   res[x] = (2^31)-1;
2936  *   OV = 1;
2937  * } else if (res[x] < -2^31) {
2938  *   res[x] = -2^31;
2939  * OV = 1;
2940  * }
2941  * Rd.W[x] = res[x];
2942  * for RV32: x=0
2943  * for RV64: x=1...0
2944  * ~~~
2945  *
2946  * \param [in]  t    long type of value stored in t
2947  * \param [in]  a    unsigned long type of value stored in a
2948  * \param [in]  b    unsigned long type of value stored in b
2949  * \return value stored in long type
2950  */
__RV_KMAXDA(long t,unsigned long a,unsigned long b)2951 __STATIC_FORCEINLINE long __RV_KMAXDA(long t, unsigned long a, unsigned long b)
2952 {
2953     __ASM volatile("kmaxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
2954     return t;
2955 }
2956 /* ===== Inline Function End for 3.39.2. KMAXDA ===== */
2957 
2958 /* ===== Inline Function Start for 3.40.1. KMADS ===== */
2959 /**
2960  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
2961  * \brief KMADS (SIMD Saturating Signed Multiply Two Halfs & Subtract & Add)
2962  * \details
2963  * **Type**: SIMD
2964  *
2965  * **Syntax**:\n
2966  * ~~~
2967  * KMADS Rd, Rs1, Rs2
2968  * KMADRS Rd, Rs1, Rs2
2969  * KMAXDS Rd, Rs1, Rs2
2970  * ~~~
2971  *
2972  * **Purpose**:\n
2973  * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then
2974  * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
2975  * the corresponding 32-bit elements in a third register. The addition result may be saturated.
2976  * * KMADS: rd.W[x] + (top*top - bottom*bottom) (per 32-bit element)
2977  * * KMADRS: rd.W[x] + (bottom*bottom - top*top) (per 32-bit element)
2978  * * KMAXDS: rd.W[x] + (top*bottom - bottom*top) (per 32-bit element)
2979  *
2980  * **Description**:\n
2981  * For the `KMADS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
2982  * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
2983  * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
2984  * elements in Rs2.
2985  * For the `KMADRS` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
2986  * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
2987  * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-
2988  * bit elements in Rs2.
2989  * For the `KMAXDS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
2990  * the top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
2991  * multiplying the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit
2992  * elements in Rs2.
2993  * The subtraction result is then added to the content of the corresponding 32-bit elements in Rd. If the
2994  * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
2995  * the OV bit is set to 1. The 32-bit results after saturation are written to Rd. The 16-bit contents of Rs1
2996  * and Rs2 are treated as signed integers.
2997  *
2998  * **Operations**:\n
2999  * ~~~
3000  * // KMADS
3001  * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
3002  * // KMADRS
3003  * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
3004  * // KMAXDS
3005  * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
3006  * if (res[x] > (2^31)-1) {
3007  *   res[x] = (2^31)-1;
3008  *   OV = 1;
3009  * } else if (res[x] < -2^31) {
3010  *   res[x] = -2^31;
3011  *   OV = 1;
3012  * }
3013  * Rd.W[x] = res[x];
3014  * for RV32: x=0
3015  * for RV64: x=1...0
3016  * ~~~
3017  *
3018  * \param [in]  t    long type of value stored in t
3019  * \param [in]  a    unsigned long type of value stored in a
3020  * \param [in]  b    unsigned long type of value stored in b
3021  * \return value stored in long type
3022  */
__RV_KMADS(long t,unsigned long a,unsigned long b)3023 __STATIC_FORCEINLINE long __RV_KMADS(long t, unsigned long a, unsigned long b)
3024 {
3025     __ASM volatile("kmads %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3026     return t;
3027 }
3028 /* ===== Inline Function End for 3.40.1. KMADS ===== */
3029 
3030 /* ===== Inline Function Start for 3.40.2. KMADRS ===== */
3031 /**
3032  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
3033  * \brief KMADRS (SIMD Saturating Signed Multiply Two Halfs & Reverse Subtract & Add)
3034  * \details
3035  * **Type**: SIMD
3036  *
3037  * **Syntax**:\n
3038  * ~~~
3039  * KMADS Rd, Rs1, Rs2
3040  * KMADRS Rd, Rs1, Rs2
3041  * KMAXDS Rd, Rs1, Rs2
3042  * ~~~
3043  *
3044  * **Purpose**:\n
3045  * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then
3046  * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
3047  * the corresponding 32-bit elements in a third register. The addition result may be saturated.
3048  * * KMADS: rd.W[x] + (top*top - bottom*bottom) (per 32-bit element)
3049  * * KMADRS: rd.W[x] + (bottom*bottom - top*top) (per 32-bit element)
3050  * * KMAXDS: rd.W[x] + (top*bottom - bottom*top) (per 32-bit element)
3051  *
3052  * **Description**:\n
3053  * For the `KMADS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
3054  * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
3055  * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
3056  * elements in Rs2.
3057  * For the `KMADRS` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
3058  * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
3059  * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-
3060  * bit elements in Rs2.
3061  * For the `KMAXDS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
3062  * the top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
3063  * multiplying the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit
3064  * elements in Rs2.
3065  * The subtraction result is then added to the content of the corresponding 32-bit elements in Rd. If the
3066  * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
3067  * the OV bit is set to 1. The 32-bit results after saturation are written to Rd. The 16-bit contents of Rs1
3068  * and Rs2 are treated as signed integers.
3069  *
3070  * **Operations**:\n
3071  * ~~~
3072  * // KMADS
3073  * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
3074  * // KMADRS
3075  * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
3076  * // KMAXDS
3077  * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
3078  * if (res[x] > (2^31)-1) {
3079  *   res[x] = (2^31)-1;
3080  *   OV = 1;
3081  * } else if (res[x] < -2^31) {
3082  *   res[x] = -2^31;
3083  *   OV = 1;
3084  * }
3085  * Rd.W[x] = res[x];
3086  * for RV32: x=0
3087  * for RV64: x=1...0
3088  * ~~~
3089  *
3090  * \param [in]  t    long type of value stored in t
3091  * \param [in]  a    unsigned long type of value stored in a
3092  * \param [in]  b    unsigned long type of value stored in b
3093  * \return value stored in long type
3094  */
__RV_KMADRS(long t,unsigned long a,unsigned long b)3095 __STATIC_FORCEINLINE long __RV_KMADRS(long t, unsigned long a, unsigned long b)
3096 {
3097     __ASM volatile("kmadrs %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3098     return t;
3099 }
3100 /* ===== Inline Function End for 3.40.2. KMADRS ===== */
3101 
3102 /* ===== Inline Function Start for 3.40.3. KMAXDS ===== */
3103 /**
3104  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
3105  * \brief KMAXDS (SIMD Saturating Signed Crossed Multiply Two Halfs & Subtract & Add)
3106  * \details
3107  * **Type**: SIMD
3108  *
3109  * **Syntax**:\n
3110  * ~~~
3111  * KMADS Rd, Rs1, Rs2
3112  * KMADRS Rd, Rs1, Rs2
3113  * KMAXDS Rd, Rs1, Rs2
3114  * ~~~
3115  *
3116  * **Purpose**:\n
3117  * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then
3118  * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
3119  * the corresponding 32-bit elements in a third register. The addition result may be saturated.
3120  * * KMADS: rd.W[x] + (top*top - bottom*bottom) (per 32-bit element)
3121  * * KMADRS: rd.W[x] + (bottom*bottom - top*top) (per 32-bit element)
3122  * * KMAXDS: rd.W[x] + (top*bottom - bottom*top) (per 32-bit element)
3123  *
3124  * **Description**:\n
3125  * For the `KMADS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
3126  * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
3127  * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
3128  * elements in Rs2.
3129  * For the `KMADRS` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
3130  * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
3131  * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-
3132  * bit elements in Rs2.
3133  * For the `KMAXDS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
3134  * the top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
3135  * multiplying the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit
3136  * elements in Rs2.
3137  * The subtraction result is then added to the content of the corresponding 32-bit elements in Rd. If the
3138  * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
3139  * the OV bit is set to 1. The 32-bit results after saturation are written to Rd. The 16-bit contents of Rs1
3140  * and Rs2 are treated as signed integers.
3141  *
3142  * **Operations**:\n
3143  * ~~~
3144  * // KMADS
3145  * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
3146  * // KMADRS
3147  * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
3148  * // KMAXDS
3149  * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
3150  * if (res[x] > (2^31)-1) {
3151  *   res[x] = (2^31)-1;
3152  *   OV = 1;
3153  * } else if (res[x] < -2^31) {
3154  *   res[x] = -2^31;
3155  *   OV = 1;
3156  * }
3157  * Rd.W[x] = res[x];
3158  * for RV32: x=0
3159  * for RV64: x=1...0
3160  * ~~~
3161  *
3162  * \param [in]  t    long type of value stored in t
3163  * \param [in]  a    unsigned long type of value stored in a
3164  * \param [in]  b    unsigned long type of value stored in b
3165  * \return value stored in long type
3166  */
__RV_KMAXDS(long t,unsigned long a,unsigned long b)3167 __STATIC_FORCEINLINE long __RV_KMAXDS(long t, unsigned long a, unsigned long b)
3168 {
3169     __ASM volatile("kmaxds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3170     return t;
3171 }
3172 /* ===== Inline Function End for 3.40.3. KMAXDS ===== */
3173 
3174 /* ===== Inline Function Start for 3.41. KMAR64 ===== */
3175 /**
3176  * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
3177  * \brief KMAR64 (Signed Multiply and Saturating Add to 64-Bit Data)
3178  * \details
3179  * **Type**: DSP (64-bit Profile)
3180  *
3181  * **Syntax**:\n
3182  * ~~~
3183  * KMAR64 Rd, Rs1, Rs2
3184  * ~~~
3185  *
3186  * **Purpose**:\n
3187  * Multiply the 32-bit signed elements in two registers and add the 64-bit multiplication
3188  * results to the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is
3189  * saturated to the Q63 range and written back to the pair of registers (RV32) or the register (RV64).
3190  *
3191  * **RV32 Description**:\n
3192  * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It adds
3193  * the 64-bit multiplication result to the 64-bit signed data of an even/odd pair of registers specified by
3194  * Rd(4,1) with unlimited precision. If the 64-bit addition result is beyond the Q63 number range (-2^63 <=
3195  * Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The saturated result is written back
3196  * to the even/odd pair of registers specified by Rd(4,1).
3197  * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
3198  * pair includes register 2d and 2d+1.
3199  * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
3200  * of the pair contains the low 32-bit of the result.
3201  *
3202  * **RV64 Description**:\n
3203  * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It
3204  * adds the 64-bit multiplication results to the 64-bit signed data of Rd with unlimited precision. If the
3205  * 64-bit addition result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range
3206  * and the OV bit is set to 1. The saturated result is written back to Rd.
3207  *
3208  * **Operations**:\n
3209  * ~~~
3210  * RV32:
3211  * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
3212  * result = R[t_H].R[t_L] + (Rs1 * Rs2);
3213  * if (result > (2^63)-1) {
3214  *   result = (2^63)-1; OV = 1;
3215  * } else if (result < -2^63) {
3216  *   result = -2^63; OV = 1;
3217  * }
3218  * R[t_H].R[t_L] = result;
3219  * RV64:
3220  * // `result` has unlimited precision
3221  * result = Rd + (Rs1.W[0] * Rs2.W[0]) + (Rs1.W[1] * Rs2.W[1]);
3222  * if (result > (2^63)-1) {
3223  *   result = (2^63)-1; OV = 1;
3224  * } else if (result < -2^63) {
3225  *   result = -2^63; OV = 1;
3226  * }
3227  * Rd = result;
3228  * ~~~
3229  *
3230  * \param [in]  t    long long type of value stored in t
3231  * \param [in]  a    long type of value stored in a
3232  * \param [in]  b    long type of value stored in b
3233  * \return value stored in long long type
3234  */
__RV_KMAR64(long long t,long a,long b)3235 __STATIC_FORCEINLINE long long __RV_KMAR64(long long t, long a, long b)
3236 {
3237     __ASM volatile("kmar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3238     return t;
3239 }
3240 /* ===== Inline Function End for 3.41. KMAR64 ===== */
3241 
3242 /* ===== Inline Function Start for 3.42.1. KMDA ===== */
3243 /**
3244  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
3245  * \brief KMDA (SIMD Signed Multiply Two Halfs and Add)
3246  * \details
3247  * **Type**: SIMD
3248  *
3249  * **Syntax**:\n
3250  * ~~~
3251  * KMDA Rd, Rs1, Rs2
3252  * KMXDA Rd, Rs1, Rs2
3253  * ~~~
3254  *
3255  * **Purpose**:\n
3256  * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
3257  * adds the two 32-bit results together. The addition result may be saturated.
3258  * * KMDA: top*top + bottom*bottom (per 32-bit element)
3259  * * KMXDA: top*bottom + bottom*top (per 32-bit element)
3260  *
3261  * **Description**:\n
3262  * For the `KMDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
3263  * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
3264  * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-
3265  * bit elements of Rs2.
3266  * For the `KMXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
3267  * with the top 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
3268  * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the
3269  * 32-bit elements of Rs2.
3270  * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^31-1.
3271  * The final results are written to Rd. The 16-bit contents are treated as signed integers.
3272  *
3273  * **Operations**:\n
3274  * ~~~
3275  * if  Rs1.W[x]  !=  0x80008000)  or  (Rs2.W[x]  !=  0x80008000  {  //  KMDA  Rd.W[x]  =  Rs1.W[x].H[1]  *
3276  * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]; // KMXDA Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[0])
3277  * +  (Rs1.W[x].H[0]  *  Rs2.W[x].H[1];  }  else  {  Rd.W[x]  =  0x7fffffff;  OV  =  1;  }  for  RV32:  x=0  for  RV64:
3278  * x=1...0
3279  * ~~~
3280  *
3281  * \param [in]  a    unsigned long type of value stored in a
3282  * \param [in]  b    unsigned long type of value stored in b
3283  * \return value stored in long type
3284  */
__RV_KMDA(unsigned long a,unsigned long b)3285 __STATIC_FORCEINLINE long __RV_KMDA(unsigned long a, unsigned long b)
3286 {
3287     register long result;
3288     __ASM volatile("kmda %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
3289     return result;
3290 }
3291 /* ===== Inline Function End for 3.42.1. KMDA ===== */
3292 
3293 /* ===== Inline Function Start for 3.42.2. KMXDA ===== */
3294 /**
3295  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
3296  * \brief KMXDA (SIMD Signed Crossed Multiply Two Halfs and Add)
3297  * \details
3298  * **Type**: SIMD
3299  *
3300  * **Syntax**:\n
3301  * ~~~
3302  * KMDA Rd, Rs1, Rs2
3303  * KMXDA Rd, Rs1, Rs2
3304  * ~~~
3305  *
3306  * **Purpose**:\n
3307  * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
3308  * adds the two 32-bit results together. The addition result may be saturated.
3309  * * KMDA: top*top + bottom*bottom (per 32-bit element)
3310  * * KMXDA: top*bottom + bottom*top (per 32-bit element)
3311  *
3312  * **Description**:\n
3313  * For the `KMDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
3314  * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
3315  * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-
3316  * bit elements of Rs2.
3317  * For the `KMXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
3318  * with the top 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
3319  * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the
3320  * 32-bit elements of Rs2.
3321  * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^31-1.
3322  * The final results are written to Rd. The 16-bit contents are treated as signed integers.
3323  *
3324  * **Operations**:\n
3325  * ~~~
3326  * if  Rs1.W[x]  !=  0x80008000)  or  (Rs2.W[x]  !=  0x80008000  {  //  KMDA  Rd.W[x]  =  Rs1.W[x].H[1]  *
3327  * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]; // KMXDA Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[0])
3328  * +  (Rs1.W[x].H[0]  *  Rs2.W[x].H[1];  }  else  {  Rd.W[x]  =  0x7fffffff;  OV  =  1;  }  for  RV32:  x=0  for  RV64:
3329  * x=1...0
3330  * ~~~
3331  *
3332  * \param [in]  a    unsigned long type of value stored in a
3333  * \param [in]  b    unsigned long type of value stored in b
3334  * \return value stored in long type
3335  */
__RV_KMXDA(unsigned long a,unsigned long b)3336 __STATIC_FORCEINLINE long __RV_KMXDA(unsigned long a, unsigned long b)
3337 {
3338     register long result;
3339     __ASM volatile("kmxda %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
3340     return result;
3341 }
3342 /* ===== Inline Function End for 3.42.2. KMXDA ===== */
3343 
3344 /* ===== Inline Function Start for 3.43.1. KMMAC ===== */
3345 /**
3346  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
3347  * \brief KMMAC (SIMD Saturating MSW Signed Multiply Word and Add)
3348  * \details
3349  * **Type**: SIMD
3350  *
3351  * **Syntax**:\n
3352  * ~~~
3353  * KMMAC Rd, Rs1, Rs2
3354  * KMMAC.u Rd, Rs1, Rs2
3355  * ~~~
3356  *
3357  * **Purpose**:\n
3358  * Multiply the signed 32-bit integer elements of two registers and add the most significant
3359  * 32-bit results with the signed 32-bit integer elements of a third register. The addition results are
3360  * saturated first and then written back to the third register. The `.u` form performs an additional
3361  * rounding up operation on the multiplication results before adding the most significant 32-bit part
3362  * of the results.
3363  *
3364  * **Description**:\n
3365  * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
3366  * and adds the most significant 32-bit multiplication results with the signed 32-bit elements of Rd. If
3367  * the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range
3368  * and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the
3369  * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
3370  * adding a 1 to bit 31 of the results.
3371  *
3372  * **Operations**:\n
3373  * ~~~
3374  * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
3375  * if (`.u` form) {
3376  *   Round[x][32:0] = Mres[x][63:31] + 1;
3377  *   res[x] = Rd.W[x] + Round[x][32:1];
3378  * } else {
3379  *   res[x] = Rd.W[x] + Mres[x][63:32];
3380  * }
3381  * if (res[x] > (2^31)-1) {
3382  *   res[x] = (2^31)-1;
3383  *   OV = 1;
3384  * } else if (res[x] < -2^31) {
3385  *   res[x] = -2^31;
3386  *   OV = 1;
3387  * }
3388  * Rd.W[x] = res[x];
3389  * for RV32: x=0
3390  * for RV64: x=1...0
3391  * ~~~
3392  *
3393  * \param [in]  t    long type of value stored in t
3394  * \param [in]  a    long type of value stored in a
3395  * \param [in]  b    long type of value stored in b
3396  * \return value stored in long type
3397  */
__RV_KMMAC(long t,long a,long b)3398 __STATIC_FORCEINLINE long __RV_KMMAC(long t, long a, long b)
3399 {
3400     __ASM volatile("kmmac %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3401     return t;
3402 }
3403 /* ===== Inline Function End for 3.43.1. KMMAC ===== */
3404 
3405 /* ===== Inline Function Start for 3.43.2. KMMAC.u ===== */
3406 /**
3407  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
3408  * \brief KMMAC.u (SIMD Saturating MSW Signed Multiply Word and Add with Rounding)
3409  * \details
3410  * **Type**: SIMD
3411  *
3412  * **Syntax**:\n
3413  * ~~~
3414  * KMMAC Rd, Rs1, Rs2
3415  * KMMAC.u Rd, Rs1, Rs2
3416  * ~~~
3417  *
3418  * **Purpose**:\n
3419  * Multiply the signed 32-bit integer elements of two registers and add the most significant
3420  * 32-bit results with the signed 32-bit integer elements of a third register. The addition results are
3421  * saturated first and then written back to the third register. The `.u` form performs an additional
3422  * rounding up operation on the multiplication results before adding the most significant 32-bit part
3423  * of the results.
3424  *
3425  * **Description**:\n
3426  * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
3427  * and adds the most significant 32-bit multiplication results with the signed 32-bit elements of Rd. If
3428  * the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range
3429  * and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the
3430  * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
3431  * adding a 1 to bit 31 of the results.
3432  *
3433  * **Operations**:\n
3434  * ~~~
3435  * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
3436  * if (`.u` form) {
3437  *   Round[x][32:0] = Mres[x][63:31] + 1;
3438  *   res[x] = Rd.W[x] + Round[x][32:1];
3439  * } else {
3440  *   res[x] = Rd.W[x] + Mres[x][63:32];
3441  * }
3442  * if (res[x] > (2^31)-1) {
3443  *   res[x] = (2^31)-1;
3444  *   OV = 1;
3445  * } else if (res[x] < -2^31) {
3446  *   res[x] = -2^31;
3447  *   OV = 1;
3448  * }
3449  * Rd.W[x] = res[x];
3450  * for RV32: x=0
3451  * for RV64: x=1...0
3452  * ~~~
3453  *
3454  * \param [in]  t    long type of value stored in t
3455  * \param [in]  a    long type of value stored in a
3456  * \param [in]  b    long type of value stored in b
3457  * \return value stored in long type
3458  */
__RV_KMMAC_U(long t,long a,long b)3459 __STATIC_FORCEINLINE long __RV_KMMAC_U(long t, long a, long b)
3460 {
3461     __ASM volatile("kmmac.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3462     return t;
3463 }
3464 /* ===== Inline Function End for 3.43.2. KMMAC.u ===== */
3465 
3466 /* ===== Inline Function Start for 3.44.1. KMMAWB ===== */
3467 /**
3468  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
3469  * \brief KMMAWB (SIMD Saturating MSW Signed Multiply Word and Bottom Half and Add)
3470  * \details
3471  * **Type**: SIMD
3472  *
3473  * **Syntax**:\n
3474  * ~~~
3475  * KMMAWB Rd, Rs1, Rs2
3476  * KMMAWB.u Rd, Rs1, Rs2
3477  * ~~~
3478  *
3479  * **Purpose**:\n
3480  * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
3481  * corresponding 32-bit elements of another register and add the most significant 32-bit results with
3482  * the corresponding signed 32-bit elements of a third register. The addition result is written to the
3483  * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication
3484  * results from the most significant discarded bit before the addition operations.
3485  *
3486  * **Description**:\n
3487  * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content
3488  * of the corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication
3489  * results with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31
3490  * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results
3491  * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the
3492  * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to
3493  * bit 15 of the result before the addition operations.
3494  *
3495  * **Operations**:\n
3496  * ~~~
3497  * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0];
3498  * if (`.u` form) {
3499  *   Round[x][32:0] = Mres[x][47:15] + 1;
3500  *   res[x] = Rd.W[x] + Round[x][32:1];
3501  * } else {
3502  *   res[x] = Rd.W[x] + Mres[x][47:16];
3503  * }
3504  * if (res[x] > (2^31)-1) {
3505  *   res[x] = (2^31)-1;
3506  *   OV = 1;
3507  * } else if (res[x] < -2^31) {
3508  *   res[x] = -2^31;
3509  *   OV = 1;
3510  * }
3511  * Rd.W[x] = res[x];
3512  * for RV32: x=0
3513  * for RV64: x=1...0
3514  * ~~~
3515  *
3516  * \param [in]  t    long type of value stored in t
3517  * \param [in]  a    unsigned long type of value stored in a
3518  * \param [in]  b    unsigned long type of value stored in b
3519  * \return value stored in long type
3520  */
__RV_KMMAWB(long t,unsigned long a,unsigned long b)3521 __STATIC_FORCEINLINE long __RV_KMMAWB(long t, unsigned long a, unsigned long b)
3522 {
3523     __ASM volatile("kmmawb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3524     return t;
3525 }
3526 /* ===== Inline Function End for 3.44.1. KMMAWB ===== */
3527 
3528 /* ===== Inline Function Start for 3.44.2. KMMAWB.u ===== */
3529 /**
3530  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
3531  * \brief KMMAWB.u (SIMD Saturating MSW Signed Multiply Word and Bottom Half and Add with Rounding)
3532  * \details
3533  * **Type**: SIMD
3534  *
3535  * **Syntax**:\n
3536  * ~~~
3537  * KMMAWB Rd, Rs1, Rs2
3538  * KMMAWB.u Rd, Rs1, Rs2
3539  * ~~~
3540  *
3541  * **Purpose**:\n
3542  * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
3543  * corresponding 32-bit elements of another register and add the most significant 32-bit results with
3544  * the corresponding signed 32-bit elements of a third register. The addition result is written to the
3545  * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication
3546  * results from the most significant discarded bit before the addition operations.
3547  *
3548  * **Description**:\n
3549  * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content
3550  * of the corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication
3551  * results with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31
3552  * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results
3553  * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the
3554  * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to
3555  * bit 15 of the result before the addition operations.
3556  *
3557  * **Operations**:\n
3558  * ~~~
3559  * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0];
3560  * if (`.u` form) {
3561  *   Round[x][32:0] = Mres[x][47:15] + 1;
3562  *   res[x] = Rd.W[x] + Round[x][32:1];
3563  * } else {
3564  *   res[x] = Rd.W[x] + Mres[x][47:16];
3565  * }
3566  * if (res[x] > (2^31)-1) {
3567  *   res[x] = (2^31)-1;
3568  *   OV = 1;
3569  * } else if (res[x] < -2^31) {
3570  *   res[x] = -2^31;
3571  *   OV = 1;
3572  * }
3573  * Rd.W[x] = res[x];
3574  * for RV32: x=0
3575  * for RV64: x=1...0
3576  * ~~~
3577  *
3578  * \param [in]  t    long type of value stored in t
3579  * \param [in]  a    unsigned long type of value stored in a
3580  * \param [in]  b    unsigned long type of value stored in b
3581  * \return value stored in long type
3582  */
__RV_KMMAWB_U(long t,unsigned long a,unsigned long b)3583 __STATIC_FORCEINLINE long __RV_KMMAWB_U(long t, unsigned long a, unsigned long b)
3584 {
3585     __ASM volatile("kmmawb.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3586     return t;
3587 }
3588 /* ===== Inline Function End for 3.44.2. KMMAWB.u ===== */
3589 
3590 /* ===== Inline Function Start for 3.45.1. KMMAWB2 ===== */
3591 /**
3592  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
3593  * \brief KMMAWB2 (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2 and Add)
3594  * \details
3595  * **Type**: SIMD
3596  *
3597  * **Syntax**:\n
3598  * ~~~
3599  * KMMAWB2 Rd, Rs1, Rs2
3600  * KMMAWB2.u Rd, Rs1, Rs2
3601  * ~~~
3602  *
3603  * **Purpose**:\n
3604  * Multiply the signed 32-bit elements of one register and the bottom 16-bit of the
3605  * corresponding 32-bit elements of another register, double the multiplication results and add the
3606  * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third
3607  * register. The saturated addition result is written to the corresponding 32-bit elements of the third
3608  * register. The `.u` form rounds up the multiplication results from the most significant discarded bit
3609  * before the addition operations.
3610  *
3611  * **Description**:\n
3612  * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15
3613  * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
3614  * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed
3615  * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
3616  * saturated to the range and the OV bit is set to 1. The results after saturation are written to the
3617  * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant
3618  * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of
3619  * the result before the addition operations.
3620  *
3621  * **Operations**:\n
3622  * ~~~
3623  * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) {
3624  *   addop.W[x] = 0x7fffffff;
3625  *   OV = 1;
3626  * } else {
3627  *   Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0];
3628  *   if (`.u` form) {
3629  *     Mres[x][47:14] = Mres[x][47:14] + 1;
3630  *   }
3631  *   addop.W[x] = Mres[x][46:15]; // doubling
3632  * }
3633  * res[x] = Rd.W[x] + addop.W[x];
3634  * if (res[x] > (2^31)-1) {
3635  *   res[x] = (2^31)-1;
3636  *   OV = 1;
3637  * } else if (res[x] < -2^31) {
3638  *   res[x] = -2^31;
3639  *   OV = 1;
3640  * }
3641  * Rd.W[x] = res[x];
3642  * for RV32: x=0
3643  * for RV64: x=1...0
3644  * ~~~
3645  *
3646  * \param [in]  t    long type of value stored in t
3647  * \param [in]  a    unsigned long type of value stored in a
3648  * \param [in]  b    unsigned long type of value stored in b
3649  * \return value stored in long type
3650  */
__RV_KMMAWB2(long t,unsigned long a,unsigned long b)3651 __STATIC_FORCEINLINE long __RV_KMMAWB2(long t, unsigned long a, unsigned long b)
3652 {
3653     __ASM volatile("kmmawb2 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3654     return t;
3655 }
3656 /* ===== Inline Function End for 3.45.1. KMMAWB2 ===== */
3657 
3658 /* ===== Inline Function Start for 3.45.2. KMMAWB2.u ===== */
3659 /**
3660  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
3661  * \brief KMMAWB2.u (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2 and Add with Rounding)
3662  * \details
3663  * **Type**: SIMD
3664  *
3665  * **Syntax**:\n
3666  * ~~~
3667  * KMMAWB2 Rd, Rs1, Rs2
3668  * KMMAWB2.u Rd, Rs1, Rs2
3669  * ~~~
3670  *
3671  * **Purpose**:\n
3672  * Multiply the signed 32-bit elements of one register and the bottom 16-bit of the
3673  * corresponding 32-bit elements of another register, double the multiplication results and add the
3674  * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third
3675  * register. The saturated addition result is written to the corresponding 32-bit elements of the third
3676  * register. The `.u` form rounds up the multiplication results from the most significant discarded bit
3677  * before the addition operations.
3678  *
3679  * **Description**:\n
3680  * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15
3681  * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
3682  * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed
3683  * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
3684  * saturated to the range and the OV bit is set to 1. The results after saturation are written to the
3685  * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant
3686  * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of
3687  * the result before the addition operations.
3688  *
3689  * **Operations**:\n
3690  * ~~~
3691  * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) {
3692  *   addop.W[x] = 0x7fffffff;
3693  *   OV = 1;
3694  * } else {
3695  *   Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0];
3696  *   if (`.u` form) {
3697  *     Mres[x][47:14] = Mres[x][47:14] + 1;
3698  *   }
3699  *   addop.W[x] = Mres[x][46:15]; // doubling
3700  * }
3701  * res[x] = Rd.W[x] + addop.W[x];
3702  * if (res[x] > (2^31)-1) {
3703  *   res[x] = (2^31)-1;
3704  *   OV = 1;
3705  * } else if (res[x] < -2^31) {
3706  *   res[x] = -2^31;
3707  *   OV = 1;
3708  * }
3709  * Rd.W[x] = res[x];
3710  * for RV32: x=0
3711  * for RV64: x=1...0
3712  * ~~~
3713  *
3714  * \param [in]  t    long type of value stored in t
3715  * \param [in]  a    unsigned long type of value stored in a
3716  * \param [in]  b    unsigned long type of value stored in b
3717  * \return value stored in long type
3718  */
__RV_KMMAWB2_U(long t,unsigned long a,unsigned long b)3719 __STATIC_FORCEINLINE long __RV_KMMAWB2_U(long t, unsigned long a, unsigned long b)
3720 {
3721     __ASM volatile("kmmawb2.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3722     return t;
3723 }
3724 /* ===== Inline Function End for 3.45.2. KMMAWB2.u ===== */
3725 
3726 /* ===== Inline Function Start for 3.46.1. KMMAWT ===== */
3727 /**
3728  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
3729  * \brief KMMAWT (SIMD Saturating MSW Signed Multiply Word and Top Half and Add)
3730  * \details
3731  * **Type**: SIMD
3732  *
3733  * **Syntax**:\n
3734  * ~~~
3735  * KMMAWT Rd, Rs1, Rs2
3736  * KMMAWT.u Rd Rs1, Rs2
3737  * ~~~
3738  *
3739  * **Purpose**:\n
3740  * Multiply the signed 32-bit integer elements of one register and the signed top 16-bit of the
3741  * corresponding 32-bit elements of another register and add the most significant 32-bit results with
3742  * the corresponding signed 32-bit elements of a third register. The addition results are written to the
3743  * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication
3744  * results from the most significant discarded bit before the addition operations.
3745  *
3746  * **Description**:\n
3747  * This instruction multiplies the signed 32-bit elements of Rs1 with the signed top 16-bit of the
3748  * corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication results
3749  * with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31
3750  * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results
3751  * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the
3752  * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to
3753  * bit 15 of the result before the addition operations.
3754  *
3755  * **Operations**:\n
3756  * ~~~
3757  * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1];
3758  * if (`.u` form) {
3759  *   Round[x][32:0] = Mres[x][47:15] + 1;
3760  *   res[x] = Rd.W[x] + Round[x][32:1];
3761  * } else {
3762  *   res[x] = Rd.W[x] + Mres[x][47:16];
3763  * }
3764  * if (res[x] > (2^31)-1) {
3765  *   res[x] = (2^31)-1;
3766  *   OV = 1;
3767  * } else if (res[x] < -2^31) {
3768  *   res[x] = -2^31;
3769  *   OV = 1;
3770  * }
3771  * Rd.W[x] = res[x];
3772  * for RV32: x=0
3773  * for RV64: x=1...0
3774  * ~~~
3775  *
3776  * \param [in]  t    long type of value stored in t
3777  * \param [in]  a    unsigned long type of value stored in a
3778  * \param [in]  b    unsigned long type of value stored in b
3779  * \return value stored in long type
3780  */
__RV_KMMAWT(long t,unsigned long a,unsigned long b)3781 __STATIC_FORCEINLINE long __RV_KMMAWT(long t, unsigned long a, unsigned long b)
3782 {
3783     __ASM volatile("kmmawt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3784     return t;
3785 }
3786 /* ===== Inline Function End for 3.46.1. KMMAWT ===== */
3787 
3788 /* ===== Inline Function Start for 3.46.2. KMMAWT.u ===== */
3789 /**
3790  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
3791  * \brief KMMAWT.u (SIMD Saturating MSW Signed Multiply Word and Top Half and Add with Rounding)
3792  * \details
3793  * **Type**: SIMD
3794  *
3795  * **Syntax**:\n
3796  * ~~~
3797  * KMMAWT Rd, Rs1, Rs2
3798  * KMMAWT.u Rd Rs1, Rs2
3799  * ~~~
3800  *
3801  * **Purpose**:\n
3802  * Multiply the signed 32-bit integer elements of one register and the signed top 16-bit of the
3803  * corresponding 32-bit elements of another register and add the most significant 32-bit results with
3804  * the corresponding signed 32-bit elements of a third register. The addition results are written to the
3805  * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication
3806  * results from the most significant discarded bit before the addition operations.
3807  *
3808  * **Description**:\n
3809  * This instruction multiplies the signed 32-bit elements of Rs1 with the signed top 16-bit of the
3810  * corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication results
3811  * with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31
3812  * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results
3813  * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the
3814  * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to
3815  * bit 15 of the result before the addition operations.
3816  *
3817  * **Operations**:\n
3818  * ~~~
3819  * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1];
3820  * if (`.u` form) {
3821  *   Round[x][32:0] = Mres[x][47:15] + 1;
3822  *   res[x] = Rd.W[x] + Round[x][32:1];
3823  * } else {
3824  *   res[x] = Rd.W[x] + Mres[x][47:16];
3825  * }
3826  * if (res[x] > (2^31)-1) {
3827  *   res[x] = (2^31)-1;
3828  *   OV = 1;
3829  * } else if (res[x] < -2^31) {
3830  *   res[x] = -2^31;
3831  *   OV = 1;
3832  * }
3833  * Rd.W[x] = res[x];
3834  * for RV32: x=0
3835  * for RV64: x=1...0
3836  * ~~~
3837  *
3838  * \param [in]  t    long type of value stored in t
3839  * \param [in]  a    unsigned long type of value stored in a
3840  * \param [in]  b    unsigned long type of value stored in b
3841  * \return value stored in long type
3842  */
__RV_KMMAWT_U(long t,unsigned long a,unsigned long b)3843 __STATIC_FORCEINLINE long __RV_KMMAWT_U(long t, unsigned long a, unsigned long b)
3844 {
3845     __ASM volatile("kmmawt.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3846     return t;
3847 }
3848 /* ===== Inline Function End for 3.46.2. KMMAWT.u ===== */
3849 
3850 /* ===== Inline Function Start for 3.47.1. KMMAWT2 ===== */
3851 /**
3852  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
3853  * \brief KMMAWT2 (SIMD Saturating MSW Signed Multiply Word and Top Half & 2 and Add)
3854  * \details
3855  * **Type**: SIMD
3856  *
3857  * **Syntax**:\n
3858  * ~~~
3859  * KMMAWT2 Rd, Rs1, Rs2
3860  * KMMAWT2.u Rd, Rs1, Rs2
3861  * ~~~
3862  *
3863  * **Purpose**:\n
3864  * Multiply the signed 32-bit elements of one register and the top 16-bit of the
3865  * corresponding 32-bit elements of another register, double the multiplication results and add the
3866  * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third
3867  * register. The saturated addition result is written to the corresponding 32-bit elements of the third
3868  * register. The `.u` form rounds up the multiplication results from the most significant discarded bit
3869  * before the addition operations.
3870  *
3871  * **Description**:\n
3872  * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15
3873  * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
3874  * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed
3875  * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
3876  * saturated to the range and the OV bit is set to 1. The results after saturation are written to the
3877  * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant
3878  * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of
3879  * the result before the addition operations.
3880  *
3881  * **Operations**:\n
3882  * ~~~
3883  * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) {
3884  *   addop.W[x] = 0x7fffffff;
3885  *   OV = 1;
3886  * } else {
3887  *   Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1];
3888  *   if (`.u` form) {
3889  *     Mres[x][47:14] = Mres[x][47:14] + 1;
3890  *   }
3891  *   addop.W[x] = Mres[x][46:15]; // doubling
3892  * }
3893  * res[x] = Rd.W[x] + addop.W[x];
3894  * if (res[x] > (2^31)-1) {
3895  *   res[x] = (2^31)-1;
3896  *   OV = 1;
3897  * } else if (res[x] < -2^31) {
3898  *   res[x] = -2^31;
3899  *   OV = 1;
3900  * }
3901  * Rd.W[x] = res[x];
3902  * for RV32: x=0
3903  * for RV64: x=1...0
3904  * ~~~
3905  *
3906  * \param [in]  t    long type of value stored in t
3907  * \param [in]  a    unsigned long type of value stored in a
3908  * \param [in]  b    unsigned long type of value stored in b
3909  * \return value stored in long type
3910  */
__RV_KMMAWT2(long t,unsigned long a,unsigned long b)3911 __STATIC_FORCEINLINE long __RV_KMMAWT2(long t, unsigned long a, unsigned long b)
3912 {
3913     __ASM volatile("kmmawt2 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3914     return t;
3915 }
3916 /* ===== Inline Function End for 3.47.1. KMMAWT2 ===== */
3917 
3918 /* ===== Inline Function Start for 3.47.2. KMMAWT2.u ===== */
3919 /**
3920  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
3921  * \brief KMMAWT2.u (SIMD Saturating MSW Signed Multiply Word and Top Half & 2 and Add with Rounding)
3922  * \details
3923  * **Type**: SIMD
3924  *
3925  * **Syntax**:\n
3926  * ~~~
3927  * KMMAWT2 Rd, Rs1, Rs2
3928  * KMMAWT2.u Rd, Rs1, Rs2
3929  * ~~~
3930  *
3931  * **Purpose**:\n
3932  * Multiply the signed 32-bit elements of one register and the top 16-bit of the
3933  * corresponding 32-bit elements of another register, double the multiplication results and add the
3934  * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third
3935  * register. The saturated addition result is written to the corresponding 32-bit elements of the third
3936  * register. The `.u` form rounds up the multiplication results from the most significant discarded bit
3937  * before the addition operations.
3938  *
3939  * **Description**:\n
3940  * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15
3941  * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
3942  * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed
3943  * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
3944  * saturated to the range and the OV bit is set to 1. The results after saturation are written to the
3945  * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant
3946  * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of
3947  * the result before the addition operations.
3948  *
3949  * **Operations**:\n
3950  * ~~~
3951  * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) {
3952  *   addop.W[x] = 0x7fffffff;
3953  *   OV = 1;
3954  * } else {
3955  *   Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1];
3956  *   if (`.u` form) {
3957  *     Mres[x][47:14] = Mres[x][47:14] + 1;
3958  *   }
3959  *   addop.W[x] = Mres[x][46:15]; // doubling
3960  * }
3961  * res[x] = Rd.W[x] + addop.W[x];
3962  * if (res[x] > (2^31)-1) {
3963  *   res[x] = (2^31)-1;
3964  *   OV = 1;
3965  * } else if (res[x] < -2^31) {
3966  *   res[x] = -2^31;
3967  *   OV = 1;
3968  * }
3969  * Rd.W[x] = res[x];
3970  * for RV32: x=0
3971  * for RV64: x=1...0
3972  * ~~~
3973  *
3974  * \param [in]  t    long type of value stored in t
3975  * \param [in]  a    unsigned long type of value stored in a
3976  * \param [in]  b    unsigned long type of value stored in b
3977  * \return value stored in long type
3978  */
__RV_KMMAWT2_U(long t,unsigned long a,unsigned long b)3979 __STATIC_FORCEINLINE long __RV_KMMAWT2_U(long t, unsigned long a, unsigned long b)
3980 {
3981     __ASM volatile("kmmawt2.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3982     return t;
3983 }
3984 /* ===== Inline Function End for 3.47.2. KMMAWT2.u ===== */
3985 
3986 /* ===== Inline Function Start for 3.48.1. KMMSB ===== */
3987 /**
3988  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
3989  * \brief KMMSB (SIMD Saturating MSW Signed Multiply Word and Subtract)
3990  * \details
3991  * **Type**: SIMD
3992  *
3993  * **Syntax**:\n
3994  * ~~~
3995  * KMMSB Rd, Rs1, Rs2
3996  * KMMSB.u Rd, Rs1, Rs2
3997  * ~~~
3998  *
3999  * **Purpose**:\n
4000  * Multiply the signed 32-bit integer elements of two registers and subtract the most
4001  * significant 32-bit results from the signed 32-bit elements of a third register. The subtraction results
4002  * are written to the third register. The `.u` form performs an additional rounding up operation on
4003  * the multiplication results before subtracting the most significant 32-bit part of the results.
4004  *
4005  * **Description**:\n
4006  * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
4007  * and subtracts the most significant 32-bit multiplication results from the signed 32-bit elements of
4008  * Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the
4009  * range and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the
4010  * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
4011  * adding a 1 to bit 31 of the results.
4012  *
4013  * **Operations**:\n
4014  * ~~~
4015  * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
4016  * if (`.u` form) {
4017  *   Round[x][32:0] = Mres[x][63:31] + 1;
4018  *   res[x] = Rd.W[x] - Round[x][32:1];
4019  * } else {
4020  *   res[x] = Rd.W[x] - Mres[x][63:32];
4021  * }
4022  * if (res[x] > (2^31)-1) {
4023  *   res[x] = (2^31)-1;
4024  *   OV = 1;
4025  * } else if (res[x] < -2^31) {
4026  *   res[x] = -2^31;
4027  *   OV = 1;
4028  * }
4029  * Rd.W[x] = res[x];
4030  * for RV32: x=0
4031  * for RV64: x=1...0
4032  * ~~~
4033  *
4034  * \param [in]  t    long type of value stored in t
4035  * \param [in]  a    long type of value stored in a
4036  * \param [in]  b    long type of value stored in b
4037  * \return value stored in long type
4038  */
__RV_KMMSB(long t,long a,long b)4039 __STATIC_FORCEINLINE long __RV_KMMSB(long t, long a, long b)
4040 {
4041     __ASM volatile("kmmsb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
4042     return t;
4043 }
4044 /* ===== Inline Function End for 3.48.1. KMMSB ===== */
4045 
4046 /* ===== Inline Function Start for 3.48.2. KMMSB.u ===== */
4047 /**
4048  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
4049  * \brief KMMSB.u (SIMD Saturating MSW Signed Multiply Word and Subtraction with Rounding)
4050  * \details
4051  * **Type**: SIMD
4052  *
4053  * **Syntax**:\n
4054  * ~~~
4055  * KMMSB Rd, Rs1, Rs2
4056  * KMMSB.u Rd, Rs1, Rs2
4057  * ~~~
4058  *
4059  * **Purpose**:\n
4060  * Multiply the signed 32-bit integer elements of two registers and subtract the most
4061  * significant 32-bit results from the signed 32-bit elements of a third register. The subtraction results
4062  * are written to the third register. The `.u` form performs an additional rounding up operation on
4063  * the multiplication results before subtracting the most significant 32-bit part of the results.
4064  *
4065  * **Description**:\n
4066  * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
4067  * and subtracts the most significant 32-bit multiplication results from the signed 32-bit elements of
4068  * Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the
4069  * range and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the
4070  * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
4071  * adding a 1 to bit 31 of the results.
4072  *
4073  * **Operations**:\n
4074  * ~~~
4075  * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
4076  * if (`.u` form) {
4077  *   Round[x][32:0] = Mres[x][63:31] + 1;
4078  *   res[x] = Rd.W[x] - Round[x][32:1];
4079  * } else {
4080  *   res[x] = Rd.W[x] - Mres[x][63:32];
4081  * }
4082  * if (res[x] > (2^31)-1) {
4083  *   res[x] = (2^31)-1;
4084  *   OV = 1;
4085  * } else if (res[x] < -2^31) {
4086  *   res[x] = -2^31;
4087  *   OV = 1;
4088  * }
4089  * Rd.W[x] = res[x];
4090  * for RV32: x=0
4091  * for RV64: x=1...0
4092  * ~~~
4093  *
4094  * \param [in]  t    long type of value stored in t
4095  * \param [in]  a    long type of value stored in a
4096  * \param [in]  b    long type of value stored in b
4097  * \return value stored in long type
4098  */
__RV_KMMSB_U(long t,long a,long b)4099 __STATIC_FORCEINLINE long __RV_KMMSB_U(long t, long a, long b)
4100 {
4101     __ASM volatile("kmmsb.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
4102     return t;
4103 }
4104 /* ===== Inline Function End for 3.48.2. KMMSB.u ===== */
4105 
4106 /* ===== Inline Function Start for 3.49.1. KMMWB2 ===== */
4107 /**
4108  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
4109  * \brief KMMWB2 (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2)
4110  * \details
4111  * **Type**: SIMD
4112  *
4113  * **Syntax**:\n
4114  * ~~~
4115  * KMMWB2 Rd, Rs1, Rs2
4116  * KMMWB2.u Rd, Rs1, Rs2
4117  * ~~~
4118  *
4119  * **Purpose**:\n
4120  * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
4121  * corresponding 32-bit elements of another register, double the multiplication results and write the
4122  * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u`
4123  * form rounds up the results from the most significant discarded bit.
4124  *
4125  * **Description**:\n
4126  * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15
4127  * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
4128  * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit
4129  * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit
4130  * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results.
4131  *
4132  * **Operations**:\n
4133  * ~~~
4134  * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) {
4135  *   Rd.W[x] = 0x7fffffff;
4136  *   OV = 1;
4137  * } else {
4138  *   Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0];
4139  *   if (`.u` form) {
4140  *     Round[x][32:0] = Mres[x][46:14] + 1;
4141  *     Rd.W[x] = Round[x][32:1];
4142  *   } else {
4143  *     Rd.W[x] = Mres[x][46:15];
4144  *   }
4145  * }
4146  * for RV32: x=0
4147  * for RV64: x=1...0
4148  * ~~~
4149  *
4150  * \param [in]  a    long type of value stored in a
4151  * \param [in]  b    unsigned long type of value stored in b
4152  * \return value stored in long type
4153  */
__RV_KMMWB2(long a,unsigned long b)4154 __STATIC_FORCEINLINE long __RV_KMMWB2(long a, unsigned long b)
4155 {
4156     register long result;
4157     __ASM volatile("kmmwb2 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
4158     return result;
4159 }
4160 /* ===== Inline Function End for 3.49.1. KMMWB2 ===== */
4161 
4162 /* ===== Inline Function Start for 3.49.2. KMMWB2.u ===== */
4163 /**
4164  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
4165  * \brief KMMWB2.u (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2 with Rounding)
4166  * \details
4167  * **Type**: SIMD
4168  *
4169  * **Syntax**:\n
4170  * ~~~
4171  * KMMWB2 Rd, Rs1, Rs2
4172  * KMMWB2.u Rd, Rs1, Rs2
4173  * ~~~
4174  *
4175  * **Purpose**:\n
4176  * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
4177  * corresponding 32-bit elements of another register, double the multiplication results and write the
4178  * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u`
4179  * form rounds up the results from the most significant discarded bit.
4180  *
4181  * **Description**:\n
4182  * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15
4183  * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
4184  * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit
4185  * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit
4186  * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results.
4187  *
4188  * **Operations**:\n
4189  * ~~~
4190  * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) {
4191  *   Rd.W[x] = 0x7fffffff;
4192  *   OV = 1;
4193  * } else {
4194  *   Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0];
4195  *   if (`.u` form) {
4196  *     Round[x][32:0] = Mres[x][46:14] + 1;
4197  *     Rd.W[x] = Round[x][32:1];
4198  *   } else {
4199  *     Rd.W[x] = Mres[x][46:15];
4200  *   }
4201  * }
4202  * for RV32: x=0
4203  * for RV64: x=1...0
4204  * ~~~
4205  *
4206  * \param [in]  a    long type of value stored in a
4207  * \param [in]  b    unsigned long type of value stored in b
4208  * \return value stored in long type
4209  */
__RV_KMMWB2_U(long a,unsigned long b)4210 __STATIC_FORCEINLINE long __RV_KMMWB2_U(long a, unsigned long b)
4211 {
4212     register long result;
4213     __ASM volatile("kmmwb2.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
4214     return result;
4215 }
4216 /* ===== Inline Function End for 3.49.2. KMMWB2.u ===== */
4217 
4218 /* ===== Inline Function Start for 3.50.1. KMMWT2 ===== */
4219 /**
4220  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
4221  * \brief KMMWT2 (SIMD Saturating MSW Signed Multiply Word and Top Half & 2)
4222  * \details
4223  * **Type**: SIMD
4224  *
4225  * **Syntax**:\n
4226  * ~~~
4227  * KMMWT2 Rd, Rs1, Rs2
4228  * KMMWT2.u Rd, Rs1, Rs2
4229  * ~~~
4230  *
4231  * **Purpose**:\n
4232  * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the
4233  * corresponding 32-bit elements of another register, double the multiplication results and write the
4234  * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u`
4235  * form rounds up the results from the most significant discarded bit.
4236  *
4237  * **Description**:\n
4238  * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15
4239  * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
4240  * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit
4241  * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit
4242  * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results.
4243  *
4244  * **Operations**:\n
4245  * ~~~
4246  * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) {
4247  *   Rd.W[x] = 0x7fffffff;
4248  *   OV = 1;
4249  * } else {
4250  *   Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1];
4251  *   if (`.u` form) {
4252  *     Round[x][32:0] = Mres[x][46:14] + 1;
4253  *     Rd.W[x] = Round[x][32:1];
4254  *   } else {
4255  *     Rd.W[x] = Mres[x][46:15];
4256  *   }
4257  * }
4258  * for RV32: x=0
4259  * for RV64: x=1...0
4260  * ~~~
4261  *
4262  * \param [in]  a    long type of value stored in a
4263  * \param [in]  b    unsigned long type of value stored in b
4264  * \return value stored in long type
4265  */
__RV_KMMWT2(long a,unsigned long b)4266 __STATIC_FORCEINLINE long __RV_KMMWT2(long a, unsigned long b)
4267 {
4268     register long result;
4269     __ASM volatile("kmmwt2 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
4270     return result;
4271 }
4272 /* ===== Inline Function End for 3.50.1. KMMWT2 ===== */
4273 
4274 /* ===== Inline Function Start for 3.50.2. KMMWT2.u ===== */
4275 /**
4276  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
4277  * \brief KMMWT2.u (SIMD Saturating MSW Signed Multiply Word and Top Half & 2 with Rounding)
4278  * \details
4279  * **Type**: SIMD
4280  *
4281  * **Syntax**:\n
4282  * ~~~
4283  * KMMWT2 Rd, Rs1, Rs2
4284  * KMMWT2.u Rd, Rs1, Rs2
4285  * ~~~
4286  *
4287  * **Purpose**:\n
4288  * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the
4289  * corresponding 32-bit elements of another register, double the multiplication results and write the
4290  * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u`
4291  * form rounds up the results from the most significant discarded bit.
4292  *
4293  * **Description**:\n
4294  * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15
4295  * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
4296  * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit
4297  * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit
4298  * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results.
4299  *
4300  * **Operations**:\n
4301  * ~~~
4302  * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) {
4303  *   Rd.W[x] = 0x7fffffff;
4304  *   OV = 1;
4305  * } else {
4306  *   Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1];
4307  *   if (`.u` form) {
4308  *     Round[x][32:0] = Mres[x][46:14] + 1;
4309  *     Rd.W[x] = Round[x][32:1];
4310  *   } else {
4311  *     Rd.W[x] = Mres[x][46:15];
4312  *   }
4313  * }
4314  * for RV32: x=0
4315  * for RV64: x=1...0
4316  * ~~~
4317  *
4318  * \param [in]  a    long type of value stored in a
4319  * \param [in]  b    unsigned long type of value stored in b
4320  * \return value stored in long type
4321  */
__RV_KMMWT2_U(long a,unsigned long b)4322 __STATIC_FORCEINLINE long __RV_KMMWT2_U(long a, unsigned long b)
4323 {
4324     register long result;
4325     __ASM volatile("kmmwt2.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
4326     return result;
4327 }
4328 /* ===== Inline Function End for 3.50.2. KMMWT2.u ===== */
4329 
4330 /* ===== Inline Function Start for 3.51.1. KMSDA ===== */
4331 /**
4332  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
4333  * \brief KMSDA (SIMD Saturating Signed Multiply Two Halfs & Add & Subtract)
4334  * \details
4335  * **Type**: SIMD
4336  *
4337  * **Syntax**:\n
4338  * ~~~
4339  * KMSDA Rd, Rs1, Rs2
4340  * KMSXDA Rd, Rs1, Rs2
4341  * ~~~
4342  *
4343  * **Purpose**:\n
4344  * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
4345  * subtracts the two 32-bit results from the corresponding 32-bit elements of a third register. The
4346  * subtraction result may be saturated.
4347  * * KMSDA: rd.W[x] - top*top - bottom*bottom (per 32-bit element)
4348  * * KMSXDA: rd.W[x] - top*bottom - bottom*top (per 32-bit element)
4349  *
4350  * **Description**:\n
4351  * For the `KMSDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
4352  * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
4353  * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
4354  * For the `KMSXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
4355  * with the top 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of the
4356  * 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2.
4357  * The two 32-bit multiplication results are then subtracted from the content of the corresponding 32-
4358  * bit elements of Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
4359  * saturated to the range and the OV bit is set to 1. The results after saturation are written to Rd. The
4360  * 16-bit contents are treated as signed integers.
4361  *
4362  * **Operations**:\n
4363  * ~~~
4364  * // KMSDA
4365  * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
4366  * // KMSXDA
4367  * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
4368  * if (res[x] > (2^31)-1) {
4369  *   res[x] = (2^31)-1;
4370  *   OV = 1;
4371  * } else if (res[x] < -2^31) {
4372  *   res[x] = -2^31;
4373  *   OV = 1;
4374  * }
4375  * Rd.W[x] = res[x];
4376  * for RV32: x=0
4377  * for RV64: x=1...0
4378  * ~~~
4379  *
4380  * \param [in]  t    long type of value stored in t
4381  * \param [in]  a    unsigned long type of value stored in a
4382  * \param [in]  b    unsigned long type of value stored in b
4383  * \return value stored in long type
4384  */
__RV_KMSDA(long t,unsigned long a,unsigned long b)4385 __STATIC_FORCEINLINE long __RV_KMSDA(long t, unsigned long a, unsigned long b)
4386 {
4387     __ASM volatile("kmsda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
4388     return t;
4389 }
4390 /* ===== Inline Function End for 3.51.1. KMSDA ===== */
4391 
4392 /* ===== Inline Function Start for 3.51.2. KMSXDA ===== */
4393 /**
4394  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
4395  * \brief KMSXDA (SIMD Saturating Signed Crossed Multiply Two Halfs & Add & Subtract)
4396  * \details
4397  * **Type**: SIMD
4398  *
4399  * **Syntax**:\n
4400  * ~~~
4401  * KMSDA Rd, Rs1, Rs2
4402  * KMSXDA Rd, Rs1, Rs2
4403  * ~~~
4404  *
4405  * **Purpose**:\n
4406  * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
4407  * subtracts the two 32-bit results from the corresponding 32-bit elements of a third register. The
4408  * subtraction result may be saturated.
4409  * * KMSDA: rd.W[x] - top*top - bottom*bottom (per 32-bit element)
4410  * * KMSXDA: rd.W[x] - top*bottom - bottom*top (per 32-bit element)
4411  *
4412  * **Description**:\n
4413  * For the `KMSDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
4414  * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
4415  * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
4416  * For the `KMSXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
4417  * with the top 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of the
4418  * 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2.
4419  * The two 32-bit multiplication results are then subtracted from the content of the corresponding 32-
4420  * bit elements of Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
4421  * saturated to the range and the OV bit is set to 1. The results after saturation are written to Rd. The
4422  * 16-bit contents are treated as signed integers.
4423  *
4424  * **Operations**:\n
4425  * ~~~
4426  * // KMSDA
4427  * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
4428  * // KMSXDA
4429  * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
4430  * if (res[x] > (2^31)-1) {
4431  *   res[x] = (2^31)-1;
4432  *   OV = 1;
4433  * } else if (res[x] < -2^31) {
4434  *   res[x] = -2^31;
4435  *   OV = 1;
4436  * }
4437  * Rd.W[x] = res[x];
4438  * for RV32: x=0
4439  * for RV64: x=1...0
4440  * ~~~
4441  *
4442  * \param [in]  t    long type of value stored in t
4443  * \param [in]  a    unsigned long type of value stored in a
4444  * \param [in]  b    unsigned long type of value stored in b
4445  * \return value stored in long type
4446  */
__RV_KMSXDA(long t,unsigned long a,unsigned long b)4447 __STATIC_FORCEINLINE long __RV_KMSXDA(long t, unsigned long a, unsigned long b)
4448 {
4449     __ASM volatile("kmsxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
4450     return t;
4451 }
4452 /* ===== Inline Function End for 3.51.2. KMSXDA ===== */
4453 
4454 /* ===== Inline Function Start for 3.52. KMSR64 ===== */
4455 /**
4456  * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
4457  * \brief KMSR64 (Signed Multiply and Saturating Subtract from 64-Bit Data)
4458  * \details
4459  * **Type**: DSP (64-bit Profile)
4460  *
4461  * **Syntax**:\n
4462  * ~~~
4463  * KMSR64 Rd, Rs1, Rs2
4464  * ~~~
4465  *
4466  * **Purpose**:\n
4467  * Multiply the 32-bit signed elements in two registers and subtract the 64-bit multiplication
4468  * results from the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is
4469  * saturated to the Q63 range and written back to the pair of registers (RV32) or the register (RV64).
4470  *
4471  * **RV32 Description**:\n
4472  * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It
4473  * subtracts the 64-bit multiplication result from the 64-bit signed data of an even/odd pair of registers
4474  * specified by Rd(4,1) with unlimited precision. If the 64-bit subtraction result is beyond the Q63
4475  * number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The saturated
4476  * result is written back to the even/odd pair of registers specified by Rd(4,1).
4477  * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
4478  * includes register 2d and 2d+1.
4479  * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
4480  * of the pair contains the low 32-bit of the result.
4481  *
4482  * **RV64 Description**:\n
4483  * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It
4484  * subtracts the 64-bit multiplication results from the 64-bit signed data in Rd with unlimited
4485  * precision. If the 64-bit subtraction result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is
4486  * saturated to the range and the OV bit is set to 1. The saturated result is written back to Rd.
4487  *
4488  * **Operations**:\n
4489  * ~~~
4490  * RV32:
4491  * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
4492  * result = R[t_H].R[t_L] - (Rs1 * Rs2);
4493  * if (result > (2^63)-1) {
4494  *   result = (2^63)-1; OV = 1;
4495  * } else if (result < -2^63) {
4496  *   result = -2^63; OV = 1;
4497  * }
4498  * R[t_H].R[t_L] = result;
4499  * RV64:
4500  * // `result` has unlimited precision
4501  * result = Rd - (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]);
4502  * if (result > (2^63)-1) {
4503  *   result = (2^63)-1; OV = 1;
4504  * } else if (result < -2^63) {
4505  *   result = -2^63; OV = 1;
4506  * }
4507  * Rd = result;
4508  * ~~~
4509  *
4510  * \param [in]  t    long long type of value stored in t
4511  * \param [in]  a    long type of value stored in a
4512  * \param [in]  b    long type of value stored in b
4513  * \return value stored in long long type
4514  */
__RV_KMSR64(long long t,long a,long b)4515 __STATIC_FORCEINLINE long long __RV_KMSR64(long long t, long a, long b)
4516 {
4517     __ASM volatile("kmsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
4518     return t;
4519 }
4520 /* ===== Inline Function End for 3.52. KMSR64 ===== */
4521 
4522 /* ===== Inline Function Start for 3.53. KSLLW ===== */
4523 /**
4524  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
4525  * \brief KSLLW (Saturating Shift Left Logical for Word)
4526  * \details
4527  * **Type**: DSP
4528  *
4529  * **Syntax**:\n
4530  * ~~~
4531  * KSLLW Rd, Rs1, Rs2
4532  * ~~~
4533  *
4534  * **Purpose**:\n
4535  * Do logical left shift operation with saturation on a 32-bit word. The shift amount is a
4536  * variable from a GPR.
4537  *
4538  * **Description**:\n
4539  * The first word data in Rs1 is left-shifted logically. The shifted out bits are filled with
4540  * zero and the shift amount is specified by the low-order 5-bits of the value in the Rs2 register. Any
4541  * shifted value greater than 2^31-1 is saturated to 2^31-1. Any shifted value smaller than -2^31 is saturated
4542  * to -2^31. And the saturated result is sign-extended and written to Rd. If any saturation is performed,
4543  * set OV bit to 1.
4544  *
4545  * **Operations**:\n
4546  * ~~~
4547  * sa = Rs2[4:0];
4548  * res[(31+sa):0] = Rs1.W[0] << sa;
4549  * if (res > (2^31)-1) {
4550  *   res = 0x7fffffff; OV = 1;
4551  * } else if (res < -2^31) {
4552  *   res = 0x80000000; OV = 1;
4553  * }
4554  * Rd[31:0] = res[31:0]; // RV32
4555  * Rd[63:0] = SE(res[31:0]); // RV64
4556  * ~~~
4557  *
4558  * \param [in]  a    long type of value stored in a
4559  * \param [in]  b    unsigned int type of value stored in b
4560  * \return value stored in long type
4561  */
__RV_KSLLW(long a,unsigned int b)4562 __STATIC_FORCEINLINE long __RV_KSLLW(long a, unsigned int b)
4563 {
4564     register long result;
4565     __ASM volatile("ksllw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
4566     return result;
4567 }
4568 /* ===== Inline Function End for 3.53. KSLLW ===== */
4569 
4570 /* ===== Inline Function Start for 3.54. KSLLIW ===== */
4571 /**
4572  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
4573  * \brief KSLLIW (Saturating Shift Left Logical Immediate for Word)
4574  * \details
4575  * **Type**: DSP
4576  *
4577  * **Syntax**:\n
4578  * ~~~
4579  * KSLLIW Rd, Rs1, imm5u
4580  * ~~~
4581  *
4582  * **Purpose**:\n
4583  * Do logical left shift operation with saturation on a 32-bit word. The shift amount is an
4584  * immediate value.
4585  *
4586  * **Description**:\n
4587  * The first word data in Rs1 is left-shifted logically. The shifted out bits are filled with
4588  * zero and the shift amount is specified by the imm5u constant. Any shifted value greater than 2^31-1 is
4589  * saturated to 2^31-1. Any shifted value smaller than -2^31 is saturated to -2^31. And the saturated result is
4590  * sign-extended and written to Rd. If any saturation is performed, set OV bit to 1.
4591  *
4592  * **Operations**:\n
4593  * ~~~
4594  * sa = imm5u;
4595  * res[(31+sa):0] = Rs1.W[0] << sa;
4596  * if (res > (2^31)-1) {
4597  *   res = 0x7fffffff; OV = 1;
4598  * } else if (res < -2^31) {
4599  *   res = 0x80000000; OV = 1;
4600  * }
4601  * Rd[31:0] = res[31:0]; // RV32
4602  * Rd[63:0] = SE(res[31:0]); // RV64
4603  * ~~~
4604  *
4605  * \param [in]  a    long type of value stored in a
4606  * \param [in]  b    unsigned int type of value stored in b
4607  * \return value stored in long type
4608  */
4609 #define __RV_KSLLIW(a, b)    \
4610     ({    \
4611         register long result;    \
4612         register long __a = (long)(a);    \
4613         __ASM volatile("kslliw %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
4614         result;    \
4615     })
4616 /* ===== Inline Function End for 3.54. KSLLIW ===== */
4617 
4618 /* ===== Inline Function Start for 3.55. KSLL8 ===== */
4619 /**
4620  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
4621  * \brief KSLL8 (SIMD 8-bit Saturating Shift Left Logical)
4622  * \details
4623  * **Type**: SIMD
4624  *
4625  * **Syntax**:\n
4626  * ~~~
4627  * KSLL8 Rd, Rs1, Rs2
4628  * ~~~
4629  *
4630  * **Purpose**:\n
4631  * Do 8-bit elements logical left shift operations with saturation simultaneously. The shift
4632  * amount is a variable from a GPR.
4633  *
4634  * **Description**:\n
4635  * The 8-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
4636  * with zero and the shift amount is specified by the low-order 3-bits of the value in the Rs2 register.
4637  * Any shifted value greater than 2^7-1 is saturated to 2^7-1. Any shifted value smaller than -2^7 is
4638  * saturated to -2^7. And the saturated results are written to Rd. If any saturation is performed, set OV
4639  * bit to 1.
4640  *
4641  * **Operations**:\n
4642  * ~~~
4643  * sa = Rs2[2:0];
4644  * if (sa != 0) {
4645  *   res[(7+sa):0] = Rs1.B[x] << sa;
4646  *   if (res > (2^7)-1) {
4647  *     res = 0x7f; OV = 1;
4648  *   } else if (res < -2^7) {
4649  *     res = 0x80; OV = 1;
4650  *   }
4651  *   Rd.B[x] = res[7:0];
4652  * } else {
4653  *   Rd = Rs1;
4654  * }
4655  * for RV32: x=3...0,
4656  * for RV64: x=7...0
4657  * ~~~
4658  *
4659  * \param [in]  a    unsigned long type of value stored in a
4660  * \param [in]  b    unsigned int type of value stored in b
4661  * \return value stored in unsigned long type
4662  */
__RV_KSLL8(unsigned long a,unsigned int b)4663 __STATIC_FORCEINLINE unsigned long __RV_KSLL8(unsigned long a, unsigned int b)
4664 {
4665     register unsigned long result;
4666     __ASM volatile("ksll8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
4667     return result;
4668 }
4669 /* ===== Inline Function End for 3.55. KSLL8 ===== */
4670 
4671 /* ===== Inline Function Start for 3.56. KSLLI8 ===== */
4672 /**
4673  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
4674  * \brief KSLLI8 (SIMD 8-bit Saturating Shift Left Logical Immediate)
4675  * \details
4676  * **Type**: SIMD
4677  *
4678  * **Syntax**:\n
4679  * ~~~
4680  * KSLLI8 Rd, Rs1, imm3u
4681  * ~~~
4682  *
4683  * **Purpose**:\n
4684  * Do 8-bit elements logical left shift operations with saturation simultaneously. The shift
4685  * amount is an immediate value.
4686  *
4687  * **Description**:\n
4688  * The 8-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
4689  * with zero and the shift amount is specified by the imm3u constant. Any shifted value greater than
4690  * 2^7-1 is saturated to 2^7-1. Any shifted value smaller than -2^7 is saturated to -2^7. And the saturated
4691  * results are written to Rd. If any saturation is performed, set OV bit to 1.
4692  *
4693  * **Operations**:\n
4694  * ~~~
4695  * sa = imm3u[2:0];
4696  * if (sa != 0) {
4697  *   res[(7+sa):0] = Rs1.B[x] << sa;
4698  *   if (res > (2^7)-1) {
4699  *     res = 0x7f; OV = 1;
4700  *   } else if (res < -2^7) {
4701  *     res = 0x80; OV = 1;
4702  *   }
4703  *   Rd.B[x] = res[7:0];
4704  * } else {
4705  *   Rd = Rs1;
4706  * }
4707  * for RV32: x=3...0,
4708  * for RV64: x=7...0
4709  * ~~~
4710  *
4711  * \param [in]  a    unsigned long type of value stored in a
4712  * \param [in]  b    unsigned int type of value stored in b
4713  * \return value stored in unsigned long type
4714  */
4715 #define __RV_KSLLI8(a, b)    \
4716     ({    \
4717         register unsigned long result;    \
4718         register unsigned long __a = (unsigned long)(a);    \
4719         __ASM volatile("kslli8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
4720         result;    \
4721     })
4722 /* ===== Inline Function End for 3.56. KSLLI8 ===== */
4723 
4724 /* ===== Inline Function Start for 3.57. KSLL16 ===== */
4725 /**
4726  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
4727  * \brief KSLL16 (SIMD 16-bit Saturating Shift Left Logical)
4728  * \details
4729  * **Type**: SIMD
4730  *
4731  * **Syntax**:\n
4732  * ~~~
4733  * KSLL16 Rd, Rs1, Rs2
4734  * ~~~
4735  *
4736  * **Purpose**:\n
4737  * Do 16-bit elements logical left shift operations with saturation simultaneously. The shift
4738  * amount is a variable from a GPR.
4739  *
4740  * **Description**:\n
4741  * The 16-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
4742  * with zero and the shift amount is specified by the low-order 4-bits of the value in the Rs2 register.
4743  * Any shifted value greater than 2^15-1 is saturated to 2^15-1. Any shifted value smaller than -2^15 is
4744  * saturated to -2^15. And the saturated results are written to Rd. If any saturation is performed, set OV
4745  * bit to 1.
4746  *
4747  * **Operations**:\n
4748  * ~~~
4749  * sa = Rs2[3:0];
4750  * if (sa != 0) {
4751  *   res[(15+sa):0] = Rs1.H[x] << sa;
4752  *   if (res > (2^15)-1) {
4753  *     res = 0x7fff; OV = 1;
4754  *   } else if (res < -2^15) {
4755  *     res = 0x8000; OV = 1;
4756  *   }
4757  *   Rd.H[x] = res[15:0];
4758  * } else {
4759  *   Rd = Rs1;
4760  * }
4761  * for RV32: x=1...0,
4762  * for RV64: x=3...0
4763  * ~~~
4764  *
4765  * \param [in]  a    unsigned long type of value stored in a
4766  * \param [in]  b    unsigned int type of value stored in b
4767  * \return value stored in unsigned long type
4768  */
__RV_KSLL16(unsigned long a,unsigned int b)4769 __STATIC_FORCEINLINE unsigned long __RV_KSLL16(unsigned long a, unsigned int b)
4770 {
4771     register unsigned long result;
4772     __ASM volatile("ksll16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
4773     return result;
4774 }
4775 /* ===== Inline Function End for 3.57. KSLL16 ===== */
4776 
4777 /* ===== Inline Function Start for 3.58. KSLLI16 ===== */
4778 /**
4779  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
4780  * \brief KSLLI16 (SIMD 16-bit Saturating Shift Left Logical Immediate)
4781  * \details
4782  * **Type**: SIMD
4783  *
4784  * **Syntax**:\n
4785  * ~~~
4786  * KSLLI16 Rd, Rs1, imm4u
4787  * ~~~
4788  *
4789  * **Purpose**:\n
4790  * Do 16-bit elements logical left shift operations with saturation simultaneously. The shift
4791  * amount is an immediate value.
4792  *
4793  * **Description**:\n
4794  * The 16-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
4795  * with zero and the shift amount is specified by the imm4u constant. Any shifted value greater than
4796  * 2^15-1 is saturated to 2^15-1. Any shifted value smaller than -2^15 is saturated to -2^15. And the saturated
4797  * results are written to Rd. If any saturation is performed, set OV bit to 1.
4798  *
4799  * **Operations**:\n
4800  * ~~~
4801  * sa = imm4u[3:0];
4802  * if (sa != 0) {
4803  *   res[(15+sa):0] = Rs1.H[x] << sa;
4804  *   if (res > (2^15)-1) {
4805  *     res = 0x7fff; OV = 1;
4806  *   } else if (res < -2^15) {
4807  *     res = 0x8000; OV = 1;
4808  *   }
4809  *   Rd.H[x] = res[15:0];
4810  * } else {
4811  *   Rd = Rs1;
4812  * }
4813  * for RV32: x=1...0,
4814  * for RV64: x=3...0
4815  * ~~~
4816  *
4817  * \param [in]  a    unsigned long type of value stored in a
4818  * \param [in]  b    unsigned int type of value stored in b
4819  * \return value stored in unsigned long type
4820  */
4821 #define __RV_KSLLI16(a, b)    \
4822     ({    \
4823         register unsigned long result;    \
4824         register unsigned long __a = (unsigned long)(a);    \
4825         __ASM volatile("kslli16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
4826         result;    \
4827     })
4828 /* ===== Inline Function End for 3.58. KSLLI16 ===== */
4829 
4830 /* ===== Inline Function Start for 3.59.1. KSLRA8 ===== */
4831 /**
4832  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
4833  * \brief KSLRA8 (SIMD 8-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
4834  * \details
4835  * **Type**: SIMD
4836  *
4837  * **Syntax**:\n
4838  * ~~~
4839  * KSLRA8 Rd, Rs1, Rs2
4840  * KSLRA8.u Rd, Rs1, Rs2
4841  * ~~~
4842  *
4843  * **Purpose**:\n
4844  * Do 8-bit elements logical left (positive) or arithmetic right (negative) shift operation with
4845  * Q7 saturation for the left shift. The `.u` form performs additional rounding up operations for the
4846  * right shift.
4847  *
4848  * **Description**:\n
4849  * The 8-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
4850  * based on the value of Rs2[3:0]. Rs2[3:0] is in the signed range of [-2^3, 2^3-1]. A positive Rs2[3:0] means
4851  * logical left shift and a negative Rs2[3:0] means arithmetic right shift. The shift amount is the
4852  * absolute value of Rs2[3:0]. However, the behavior of `Rs2[3:0]==-2^3 (0x8)` is defined to be
4853  * equivalent to the behavior of `Rs2[3:0]==-(2^3-1) (0x9)`.
4854  * The left-shifted results are saturated to the 8-bit signed integer range of [-2^7, 2^7-1]. For the `.u` form
4855  * of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
4856  * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
4857  * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:4] will not affect
4858  * this instruction.
4859  *
4860  * **Operations**:\n
4861  * ~~~
4862  * if (Rs2[3:0] < 0) {
4863  *   sa = -Rs2[3:0];
4864  *   sa = (sa == 8)? 7 : sa;
4865  *   if (`.u` form) {
4866  *     res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
4867  *     Rd.B[x] = res[7:0];
4868  *   } else {
4869  *     Rd.B[x] = SE8(Rs1.B[x][7:sa]);
4870  *   }
4871  * } else {
4872  *   sa = Rs2[2:0];
4873  *   res[(7+sa):0] = Rs1.B[x] <<(logic) sa;
4874  *   if (res > (2^7)-1) {
4875  *     res[7:0] = 0x7f; OV = 1;
4876  *   } else if (res < -2^7) {
4877  *     res[7:0] = 0x80; OV = 1;
4878  *   }
4879  *   Rd.B[x] = res[7:0];
4880  * }
4881  * for RV32: x=3...0,
4882  * for RV64: x=7...0
4883  * ~~~
4884  *
4885  * \param [in]  a    unsigned long type of value stored in a
4886  * \param [in]  b    int type of value stored in b
4887  * \return value stored in unsigned long type
4888  */
__RV_KSLRA8(unsigned long a,int b)4889 __STATIC_FORCEINLINE unsigned long __RV_KSLRA8(unsigned long a, int b)
4890 {
4891     register unsigned long result;
4892     __ASM volatile("kslra8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
4893     return result;
4894 }
4895 /* ===== Inline Function End for 3.59.1. KSLRA8 ===== */
4896 
4897 /* ===== Inline Function Start for 3.59.2. KSLRA8.u ===== */
4898 /**
4899  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
4900  * \brief KSLRA8.u (SIMD 8-bit Shift Left Logical with Saturation or Rounding Shift Right Arithmetic)
4901  * \details
4902  * **Type**: SIMD
4903  *
4904  * **Syntax**:\n
4905  * ~~~
4906  * KSLRA8 Rd, Rs1, Rs2
4907  * KSLRA8.u Rd, Rs1, Rs2
4908  * ~~~
4909  *
4910  * **Purpose**:\n
4911  * Do 8-bit elements logical left (positive) or arithmetic right (negative) shift operation with
4912  * Q7 saturation for the left shift. The `.u` form performs additional rounding up operations for the
4913  * right shift.
4914  *
4915  * **Description**:\n
4916  * The 8-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
4917  * based on the value of Rs2[3:0]. Rs2[3:0] is in the signed range of [-2^3, 2^3-1]. A positive Rs2[3:0] means
4918  * logical left shift and a negative Rs2[3:0] means arithmetic right shift. The shift amount is the
4919  * absolute value of Rs2[3:0]. However, the behavior of `Rs2[3:0]==-2^3 (0x8)` is defined to be
4920  * equivalent to the behavior of `Rs2[3:0]==-(2^3-1) (0x9)`.
4921  * The left-shifted results are saturated to the 8-bit signed integer range of [-2^7, 2^7-1]. For the `.u` form
4922  * of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
4923  * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
4924  * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:4] will not affect
4925  * this instruction.
4926  *
4927  * **Operations**:\n
4928  * ~~~
4929  * if (Rs2[3:0] < 0) {
4930  *   sa = -Rs2[3:0];
4931  *   sa = (sa == 8)? 7 : sa;
4932  *   if (`.u` form) {
4933  *     res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
4934  *     Rd.B[x] = res[7:0];
4935  *   } else {
4936  *     Rd.B[x] = SE8(Rs1.B[x][7:sa]);
4937  *   }
4938  * } else {
4939  *   sa = Rs2[2:0];
4940  *   res[(7+sa):0] = Rs1.B[x] <<(logic) sa;
4941  *   if (res > (2^7)-1) {
4942  *     res[7:0] = 0x7f; OV = 1;
4943  *   } else if (res < -2^7) {
4944  *     res[7:0] = 0x80; OV = 1;
4945  *   }
4946  *   Rd.B[x] = res[7:0];
4947  * }
4948  * for RV32: x=3...0,
4949  * for RV64: x=7...0
4950  * ~~~
4951  *
4952  * \param [in]  a    unsigned long type of value stored in a
4953  * \param [in]  b    int type of value stored in b
4954  * \return value stored in unsigned long type
4955  */
__RV_KSLRA8_U(unsigned long a,int b)4956 __STATIC_FORCEINLINE unsigned long __RV_KSLRA8_U(unsigned long a, int b)
4957 {
4958     register unsigned long result;
4959     __ASM volatile("kslra8.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
4960     return result;
4961 }
4962 /* ===== Inline Function End for 3.59.2. KSLRA8.u ===== */
4963 
4964 /* ===== Inline Function Start for 3.60.1. KSLRA16 ===== */
4965 /**
4966  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
4967  * \brief KSLRA16 (SIMD 16-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
4968  * \details
4969  * **Type**: SIMD
4970  *
4971  * **Syntax**:\n
4972  * ~~~
4973  * KSLRA16 Rd, Rs1, Rs2
4974  * KSLRA16.u Rd, Rs1, Rs2
4975  * ~~~
4976  *
4977  * **Purpose**:\n
4978  * Do 16-bit elements logical left (positive) or arithmetic right (negative) shift operation with
4979  * Q15 saturation for the left shift. The `.u` form performs additional rounding up operations for the
4980  * right shift.
4981  *
4982  * **Description**:\n
4983  * The 16-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
4984  * based on the value of Rs2[4:0]. Rs2[4:0] is in the signed range of [-2^4, 2^4-1]. A positive Rs2[4:0] means
4985  * logical left shift and a negative Rs2[4:0] means arithmetic right shift. The shift amount is the
4986  * absolute value of Rs2[4:0]. However, the behavior of `Rs2[4:0]==-2^4 (0x10)` is defined to be
4987  * equivalent to the behavior of `Rs2[4:0]==-(2^4-1) (0x11)`.
4988  * The left-shifted results are saturated to the 16-bit signed integer range of [-2^15, 2^15-1]. For the `.u`
4989  * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
4990  * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
4991  * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:5] will not affect
4992  * this instruction.
4993  *
4994  * **Operations**:\n
4995  * ~~~
4996  * if (Rs2[4:0] < 0) {
4997  *   sa = -Rs2[4:0];
4998  *   sa = (sa == 16)? 15 : sa;
4999  *   if (`.u` form) {
5000  *     res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
5001  *     Rd.H[x] = res[15:0];
5002  *   } else {
5003  *     Rd.H[x] = SE16(Rs1.H[x][15:sa]);
5004  *   }
5005  * } else {
5006  *   sa = Rs2[3:0];
5007  *   res[(15+sa):0] = Rs1.H[x] <<(logic) sa;
5008  *   if (res > (2^15)-1) {
5009  *     res[15:0] = 0x7fff; OV = 1;
5010  *   } else if (res < -2^15) {
5011  *     res[15:0] = 0x8000; OV = 1;
5012  *   }
5013  *   d.H[x] = res[15:0];
5014  * }
5015  * for RV32: x=1...0,
5016  * for RV64: x=3...0
5017  * ~~~
5018  *
5019  * \param [in]  a    unsigned long type of value stored in a
5020  * \param [in]  b    int type of value stored in b
5021  * \return value stored in unsigned long type
5022  */
__RV_KSLRA16(unsigned long a,int b)5023 __STATIC_FORCEINLINE unsigned long __RV_KSLRA16(unsigned long a, int b)
5024 {
5025     register unsigned long result;
5026     __ASM volatile("kslra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5027     return result;
5028 }
5029 /* ===== Inline Function End for 3.60.1. KSLRA16 ===== */
5030 
5031 /* ===== Inline Function Start for 3.60.2. KSLRA16.u ===== */
5032 /**
5033  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
5034  * \brief KSLRA16.u (SIMD 16-bit Shift Left Logical with Saturation or Rounding Shift Right Arithmetic)
5035  * \details
5036  * **Type**: SIMD
5037  *
5038  * **Syntax**:\n
5039  * ~~~
5040  * KSLRA16 Rd, Rs1, Rs2
5041  * KSLRA16.u Rd, Rs1, Rs2
5042  * ~~~
5043  *
5044  * **Purpose**:\n
5045  * Do 16-bit elements logical left (positive) or arithmetic right (negative) shift operation with
5046  * Q15 saturation for the left shift. The `.u` form performs additional rounding up operations for the
5047  * right shift.
5048  *
5049  * **Description**:\n
5050  * The 16-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
5051  * based on the value of Rs2[4:0]. Rs2[4:0] is in the signed range of [-2^4, 2^4-1]. A positive Rs2[4:0] means
5052  * logical left shift and a negative Rs2[4:0] means arithmetic right shift. The shift amount is the
5053  * absolute value of Rs2[4:0]. However, the behavior of `Rs2[4:0]==-2^4 (0x10)` is defined to be
5054  * equivalent to the behavior of `Rs2[4:0]==-(2^4-1) (0x11)`.
5055  * The left-shifted results are saturated to the 16-bit signed integer range of [-2^15, 2^15-1]. For the `.u`
5056  * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
5057  * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
5058  * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:5] will not affect
5059  * this instruction.
5060  *
5061  * **Operations**:\n
5062  * ~~~
5063  * if (Rs2[4:0] < 0) {
5064  *   sa = -Rs2[4:0];
5065  *   sa = (sa == 16)? 15 : sa;
5066  *   if (`.u` form) {
5067  *     res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
5068  *     Rd.H[x] = res[15:0];
5069  *   } else {
5070  *     Rd.H[x] = SE16(Rs1.H[x][15:sa]);
5071  *   }
5072  * } else {
5073  *   sa = Rs2[3:0];
5074  *   res[(15+sa):0] = Rs1.H[x] <<(logic) sa;
5075  *   if (res > (2^15)-1) {
5076  *     res[15:0] = 0x7fff; OV = 1;
5077  *   } else if (res < -2^15) {
5078  *     res[15:0] = 0x8000; OV = 1;
5079  *   }
5080  *   d.H[x] = res[15:0];
5081  * }
5082  * for RV32: x=1...0,
5083  * for RV64: x=3...0
5084  * ~~~
5085  *
5086  * \param [in]  a    unsigned long type of value stored in a
5087  * \param [in]  b    int type of value stored in b
5088  * \return value stored in unsigned long type
5089  */
__RV_KSLRA16_U(unsigned long a,int b)5090 __STATIC_FORCEINLINE unsigned long __RV_KSLRA16_U(unsigned long a, int b)
5091 {
5092     register unsigned long result;
5093     __ASM volatile("kslra16.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5094     return result;
5095 }
5096 /* ===== Inline Function End for 3.60.2. KSLRA16.u ===== */
5097 
5098 /* ===== Inline Function Start for 3.61. KSLRAW ===== */
5099 /**
5100  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
5101  * \brief KSLRAW (Shift Left Logical with Q31 Saturation or Shift Right Arithmetic)
5102  * \details
5103  * **Type**: DSP
5104  *
5105  * **Syntax**:\n
5106  * ~~~
5107  * KSLRAW Rd, Rs1, Rs2
5108  * ~~~
5109  *
5110  * **Purpose**:\n
5111  * Perform a logical left (positive) or arithmetic right (negative) shift operation with Q31
5112  * saturation for the left shift on a 32-bit data.
5113  *
5114  * **Description**:\n
5115  * The lower 32-bit content of Rs1 is left-shifted logically or right-shifted arithmetically
5116  * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means
5117  * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the
5118  * absolute value of Rs2[5:0] clamped to the actual shift range of [0, 31].
5119  * The left-shifted result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. After the shift
5120  * operation, the final result is bit-31 sign-extended and written to Rd. If any saturation happens, this
5121  * instruction sets the OV flag. The value of Rs2[31:6] will not affected the operation of this instruction.
5122  *
5123  * **Operations**:\n
5124  * ~~~
5125  * if (Rs2[5:0] < 0) {
5126  *   sa = -Rs2[5:0];
5127  *   sa = (sa == 32)? 31 : sa;
5128  *   res[31:0] = Rs1.W[0] >>(arith) sa;
5129  * } else {
5130  *   sa = Rs2[5:0];
5131  *   tmp = Rs1.W[0] <<(logic) sa;
5132  *   if (tmp > (2^31)-1) {
5133  *     res[31:0] = (2^31)-1;
5134  *     OV = 1;
5135  *   } else if (tmp < -2^31) {
5136  *     res[31:0] = -2^31;
5137  *     OV = 1
5138  *   } else {
5139  *     res[31:0] = tmp[31:0];
5140  *   }
5141  * }
5142  * Rd = res[31:0]; // RV32
5143  * Rd = SE64(res[31:0]); // RV64
5144  * ~~~
5145  *
5146  * \param [in]  a    int type of value stored in a
5147  * \param [in]  b    int type of value stored in b
5148  * \return value stored in long type
5149  */
__RV_KSLRAW(int a,int b)5150 __STATIC_FORCEINLINE long __RV_KSLRAW(int a, int b)
5151 {
5152     register long result;
5153     __ASM volatile("kslraw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5154     return result;
5155 }
5156 /* ===== Inline Function End for 3.61. KSLRAW ===== */
5157 
5158 /* ===== Inline Function Start for 3.62. KSLRAW.u ===== */
5159 /**
5160  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
5161  * \brief KSLRAW.u (Shift Left Logical with Q31 Saturation or Rounding Shift Right Arithmetic)
5162  * \details
5163  * **Type**: DSP
5164  *
5165  * **Syntax**:\n
5166  * ~~~
5167  * KSLRAW.u Rd, Rs1, Rs2
5168  * ~~~
5169  *
5170  * **Purpose**:\n
5171  * Perform a logical left (positive) or arithmetic right (negative) shift operation with Q31
5172  * saturation for the left shift and a rounding up operation for the right shift on a 32-bit data.
5173  *
5174  * **Description**:\n
5175  * The lower 32-bit content of Rs1 is left-shifted logically or right-shifted arithmetically
5176  * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means
5177  * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the
5178  * absolute value of Rs2[5:0] clamped to the actual shift range of [0, 31].
5179  * The left-shifted result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. The right-shifted
5180  * result is added a 1 to the most significant discarded bit position for rounding effect. After the shift,
5181  * saturation, or rounding, the final result is bit-31 sign-extended and written to Rd. If any saturation
5182  * happens, this instruction sets the OV flag. The value of Rs2[31:6] will not affect the operation of this
5183  * instruction.
5184  *
5185  * **Operations**:\n
5186  * ~~~
5187  * if (Rs2[5:0] < 0) {
5188  *   sa = -Rs2[5:0];
5189  *   sa = (sa == 32)? 31 : sa;
5190  *   res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1;
5191  *   rst[31:0] = res[31:0];
5192  * } else {
5193  *   sa = Rs2[5:0];
5194  *   tmp = Rs1.W[0] <<(logic) sa;
5195  *   if (tmp > (2^31)-1) {
5196  *     rst[31:0] = (2^31)-1;
5197  *     OV = 1;
5198  *   } else if (tmp < -2^31) {
5199  *     rst[31:0] = -2^31;
5200  *     OV = 1
5201  *   } else {
5202  *     rst[31:0] = tmp[31:0];
5203  *   }
5204  * }
5205  * Rd = rst[31:0]; // RV32
5206  * Rd = SE64(rst[31:0]); // RV64
5207  * ~~~
5208  *
5209  * \param [in]  a    int type of value stored in a
5210  * \param [in]  b    int type of value stored in b
5211  * \return value stored in long type
5212  */
__RV_KSLRAW_U(int a,int b)5213 __STATIC_FORCEINLINE long __RV_KSLRAW_U(int a, int b)
5214 {
5215     register long result;
5216     __ASM volatile("kslraw.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5217     return result;
5218 }
5219 /* ===== Inline Function End for 3.62. KSLRAW.u ===== */
5220 
5221 /* ===== Inline Function Start for 3.63. KSTAS16 ===== */
5222 /**
5223  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
5224  * \brief KSTAS16 (SIMD 16-bit Signed Saturating Straight Addition & Subtraction)
5225  * \details
5226  * **Type**: SIMD
5227  *
5228  * **Syntax**:\n
5229  * ~~~
5230  * KSTAS16 Rd, Rs1, Rs2
5231  * ~~~
5232  *
5233  * **Purpose**:\n
5234  * Do 16-bit signed integer element saturating addition and 16-bit signed integer element
5235  * saturating subtraction in a 32-bit chunk simultaneously. Operands are from corresponding
5236  * positions in 32-bit chunks.
5237  *
5238  * **Description**:\n
5239  * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in
5240  * Rs1 with the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2; at the same time, it
5241  * subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit signed
5242  * integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number
5243  * range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated
5244  * results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit chunks in Rd for
5245  * subtraction.
5246  *
5247  * **Operations**:\n
5248  * ~~~
5249  * res1 = Rs1.W[x][31:16] + Rs2.W[x][31:16];
5250  * res2 = Rs1.W[x][15:0] - Rs2.W[x][15:0];
5251  * for (res in [res1, res2]) {
5252  *   if (res > (2^15)-1) {
5253  *     res = (2^15)-1;
5254  *     OV = 1;
5255  *   } else if (res < -2^15) {
5256  *     res = -2^15;
5257  *     OV = 1;
5258  *   }
5259  * }
5260  * Rd.W[x][31:16] = res1;
5261  * Rd.W[x][15:0] = res2;
5262  * for RV32, x=0
5263  * for RV64, x=1...0
5264  * ~~~
5265  *
5266  * \param [in]  a    unsigned long type of value stored in a
5267  * \param [in]  b    unsigned long type of value stored in b
5268  * \return value stored in unsigned long type
5269  */
__RV_KSTAS16(unsigned long a,unsigned long b)5270 __STATIC_FORCEINLINE unsigned long __RV_KSTAS16(unsigned long a, unsigned long b)
5271 {
5272     register unsigned long result;
5273     __ASM volatile("kstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5274     return result;
5275 }
5276 /* ===== Inline Function End for 3.63. KSTAS16 ===== */
5277 
5278 /* ===== Inline Function Start for 3.64. KSTSA16 ===== */
5279 /**
5280  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
5281  * \brief KSTSA16 (SIMD 16-bit Signed Saturating Straight Subtraction & Addition)
5282  * \details
5283  * **Type**: SIMD
5284  *
5285  * **Syntax**:\n
5286  * ~~~
5287  * KSTSA16 Rd, Rs1, Rs2
5288  * ~~~
5289  *
5290  * **Purpose**:\n
5291  * Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element
5292  * saturating addition in a 32-bit chunk simultaneously. Operands are from corresponding positions in
5293  * 32-bit chunks.
5294  *
5295  * **Description**:\n
5296  * This instruction subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks
5297  * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1; at the same time, it
5298  * adds the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2 with the 16-bit signed integer
5299  * element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number range (-2^15
5300  * <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are
5301  * written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks in Rd for
5302  * addition.
5303  *
5304  * **Operations**:\n
5305  * ~~~
5306  * res1 = Rs1.W[x][31:16] - Rs2.W[x][31:16];
5307  * res2 = Rs1.W[x][15:0] + Rs2.W[x][15:0];
5308  * for (res in [res1, res2]) {
5309  *   if (res > (2^15)-1) {
5310  *     res = (2^15)-1;
5311  *     OV = 1;
5312  *   } else if (res < -2^15) {
5313  *     res = -2^15;
5314  *     OV = 1;
5315  *   }
5316  * }
5317  * Rd.W[x][31:16] = res1;
5318  * Rd.W[x][15:0] = res2;
5319  * for RV32, x=0
5320  * for RV64, x=1...0
5321  * ~~~
5322  *
5323  * \param [in]  a    unsigned long type of value stored in a
5324  * \param [in]  b    unsigned long type of value stored in b
5325  * \return value stored in unsigned long type
5326  */
__RV_KSTSA16(unsigned long a,unsigned long b)5327 __STATIC_FORCEINLINE unsigned long __RV_KSTSA16(unsigned long a, unsigned long b)
5328 {
5329     register unsigned long result;
5330     __ASM volatile("kstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5331     return result;
5332 }
5333 /* ===== Inline Function End for 3.64. KSTSA16 ===== */
5334 
5335 /* ===== Inline Function Start for 3.65. KSUB8 ===== */
5336 /**
5337  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
5338  * \brief KSUB8 (SIMD 8-bit Signed Saturating Subtraction)
5339  * \details
5340  * **Type**: SIMD
5341  *
5342  * **Syntax**:\n
5343  * ~~~
5344  * KSUB8 Rd, Rs1, Rs2
5345  * ~~~
5346  *
5347  * **Purpose**:\n
5348  * Do 8-bit signed elements saturating subtractions simultaneously.
5349  *
5350  * **Description**:\n
5351  * This instruction subtracts the 8-bit signed integer elements in Rs2 from the 8-bit
5352  * signed integer elements in Rs1. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 27
5353  * -1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
5354  *
5355  * **Operations**:\n
5356  * ~~~
5357  * res[x] = Rs1.B[x] - Rs2.B[x];
5358  * if (res[x] > (2^7)-1) {
5359  *   res[x] = (2^7)-1;
5360  *   OV = 1;
5361  * } else if (res[x] < -2^7) {
5362  *   res[x] = -2^7;
5363  *   OV = 1;
5364  * }
5365  * Rd.B[x] = res[x];
5366  * for RV32: x=3...0,
5367  * for RV64: x=7...0
5368  * ~~~
5369  *
5370  * \param [in]  a    unsigned long type of value stored in a
5371  * \param [in]  b    unsigned long type of value stored in b
5372  * \return value stored in unsigned long type
5373  */
__RV_KSUB8(unsigned long a,unsigned long b)5374 __STATIC_FORCEINLINE unsigned long __RV_KSUB8(unsigned long a, unsigned long b)
5375 {
5376     register unsigned long result;
5377     __ASM volatile("ksub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5378     return result;
5379 }
5380 /* ===== Inline Function End for 3.65. KSUB8 ===== */
5381 
5382 /* ===== Inline Function Start for 3.66. KSUB16 ===== */
5383 /**
5384  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
5385  * \brief KSUB16 (SIMD 16-bit Signed Saturating Subtraction)
5386  * \details
5387  * **Type**: SIMD
5388  *
5389  * **Syntax**:\n
5390  * ~~~
5391  * KSUB16 Rd, Rs1, Rs2
5392  * ~~~
5393  *
5394  * **Purpose**:\n
5395  * Do 16-bit signed integer elements saturating subtractions simultaneously.
5396  *
5397  * **Description**:\n
5398  * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit
5399  * signed integer elements in Rs1. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <=
5400  * 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to
5401  * Rd.
5402  *
5403  * **Operations**:\n
5404  * ~~~
5405  * res[x] = Rs1.H[x] - Rs2.H[x];
5406  * if (res[x] > (2^15)-1) {
5407  *   res[x] = (2^15)-1;
5408  *   OV = 1;
5409  * } else if (res[x] < -2^15) {
5410  *   res[x] = -2^15;
5411  *   OV = 1;
5412  * }
5413  * Rd.H[x] = res[x];
5414  * for RV32: x=1...0,
5415  * for RV64: x=3...0
5416  * ~~~
5417  *
5418  * \param [in]  a    unsigned long type of value stored in a
5419  * \param [in]  b    unsigned long type of value stored in b
5420  * \return value stored in unsigned long type
5421  */
__RV_KSUB16(unsigned long a,unsigned long b)5422 __STATIC_FORCEINLINE unsigned long __RV_KSUB16(unsigned long a, unsigned long b)
5423 {
5424     register unsigned long result;
5425     __ASM volatile("ksub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5426     return result;
5427 }
5428 /* ===== Inline Function End for 3.66. KSUB16 ===== */
5429 
5430 /* ===== Inline Function Start for 3.67. KSUB64 ===== */
5431 /**
5432  * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
5433  * \brief KSUB64 (64-bit Signed Saturating Subtraction)
5434  * \details
5435  * **Type**: DSP (64-bit Profile)
5436  *
5437  * **Syntax**:\n
5438  * ~~~
5439  * KSUB64 Rd, Rs1, Rs2
5440  * ~~~
5441  *
5442  * **Purpose**:\n
5443  * Perform a 64-bit signed integer subtraction. The result is saturated to the Q63 range.
5444  *
5445  * **RV32 Description**:\n
5446  * This instruction subtracts the 64-bit signed integer of an even/odd pair of
5447  * registers specified by Rs2(4,1) from the 64-bit signed integer of an even/odd pair of registers
5448  * specified by Rs1(4,1). If the 64-bit result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is
5449  * saturated to the range and the OV bit is set to 1. The saturated result is then written to an even/odd
5450  * pair of registers specified by Rd(4,1).
5451  * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
5452  * includes register 2d and 2d+1.
5453  * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
5454  * register of the pair contains the low 32-bit of the operand.
5455  *
5456  * **RV64 Description**:\n
5457  * This instruction subtracts the 64-bit signed integer of Rs2 from the 64-bit signed
5458  * integer of Rs1. If the 64-bit result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated
5459  * to the range and the OV bit is set to 1. The saturated result is then written to Rd.
5460  *
5461  * **Operations**:\n
5462  * ~~~
5463  * RV32:
5464  * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
5465  * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
5466  * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
5467  * result = R[a_H].R[a_L] - R[b_H].R[b_L];
5468  * if (result > (2^63)-1) {
5469  *   result = (2^63)-1; OV = 1;
5470  * } else if (result < -2^63) {
5471  *   result = -2^63; OV = 1;
5472  * }
5473  * R[t_H].R[t_L] = result;
5474  * RV64:
5475  * result = Rs1 - Rs2;
5476  * if (result > (2^63)-1) {
5477  *   result = (2^63)-1; OV = 1;
5478  * } else if (result < -2^63) {
5479  *   result = -2^63; OV = 1;
5480  * }
5481  * Rd = result;
5482  * ~~~
5483  *
5484  * \param [in]  a    long long type of value stored in a
5485  * \param [in]  b    long long type of value stored in b
5486  * \return value stored in long long type
5487  */
__RV_KSUB64(long long a,long long b)5488 __STATIC_FORCEINLINE long long __RV_KSUB64(long long a, long long b)
5489 {
5490     register long long result;
5491     __ASM volatile("ksub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5492     return result;
5493 }
5494 /* ===== Inline Function End for 3.67. KSUB64 ===== */
5495 
5496 /* ===== Inline Function Start for 3.68. KSUBH ===== */
5497 /**
5498  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
5499  * \brief KSUBH (Signed Subtraction with Q15 Saturation)
5500  * \details
5501  * **Type**: DSP
5502  *
5503  * **Syntax**:\n
5504  * ~~~
5505  * KSUBH Rd, Rs1, Rs2
5506  * ~~~
5507  *
5508  * **Purpose**:\n
5509  * Subtract the signed lower 32-bit content of two registers with Q15 saturation.
5510  *
5511  * **Description**:\n
5512  * The signed lower 32-bit content of Rs2 is subtracted from the signed lower 32-bit
5513  * content of Rs1. And the result is saturated to the 16-bit signed integer range of [-2^15, 2^15-1] and then
5514  * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
5515  *
5516  * **Operations**:\n
5517  * ~~~
5518  * tmp = Rs1.W[0] - Rs2.W[0];
5519  * if (tmp > (2^15)-1) {
5520  *   res = (2^15)-1;
5521  *   OV = 1;
5522  * } else if (tmp < -2^15) {
5523  *   res = -2^15;
5524  *   OV = 1
5525  * } else {
5526  *   res = tmp;
5527  * }
5528  * Rd = SE(res[15:0]);
5529  * ~~~
5530  *
5531  * \param [in]  a    int type of value stored in a
5532  * \param [in]  b    int type of value stored in b
5533  * \return value stored in long type
5534  */
__RV_KSUBH(int a,int b)5535 __STATIC_FORCEINLINE long __RV_KSUBH(int a, int b)
5536 {
5537     register long result;
5538     __ASM volatile("ksubh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5539     return result;
5540 }
5541 /* ===== Inline Function End for 3.68. KSUBH ===== */
5542 
5543 /* ===== Inline Function Start for 3.69. KSUBW ===== */
5544 /**
5545  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
5546  * \brief KSUBW (Signed Subtraction with Q31 Saturation)
5547  * \details
5548  * **Type**: DSP
5549  *
5550  * **Syntax**:\n
5551  * ~~~
5552  * KSUBW Rd, Rs1, Rs2
5553  * ~~~
5554  *
5555  * **Purpose**:\n
5556  * Subtract the signed lower 32-bit content of two registers with Q31 saturation.
5557  *
5558  * **Description**:\n
5559  * The signed lower 32-bit content of Rs2 is subtracted from the signed lower 32-bit
5560  * content of Rs1. And the result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1] and then
5561  * sign-extened and written to Rd. If saturation happens, this instruction sets the OV flag.
5562  *
5563  * **Operations**:\n
5564  * ~~~
5565  * tmp = Rs1.W[0] - Rs2.W[0];
5566  * if (tmp > (2^31)-1) {
5567  *   res = (2^31)-1;
5568  *   OV = 1;
5569  * } else if (tmp < -2^31) {
5570  * res = -2^31;
5571  *   OV = 1
5572  * } else {
5573  *   res = tmp;
5574  * }
5575  * Rd = res[31:0]; // RV32
5576  * Rd = SE(res[31:0]); // RV64
5577  * ~~~
5578  *
5579  * \param [in]  a    int type of value stored in a
5580  * \param [in]  b    int type of value stored in b
5581  * \return value stored in long type
5582  */
__RV_KSUBW(int a,int b)5583 __STATIC_FORCEINLINE long __RV_KSUBW(int a, int b)
5584 {
5585     register long result;
5586     __ASM volatile("ksubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5587     return result;
5588 }
5589 /* ===== Inline Function End for 3.69. KSUBW ===== */
5590 
5591 /* ===== Inline Function Start for 3.70.1. KWMMUL ===== */
5592 /**
5593  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
5594  * \brief KWMMUL (SIMD Saturating MSW Signed Multiply Word & Double)
5595  * \details
5596  * **Type**: SIMD
5597  *
5598  * **Syntax**:\n
5599  * ~~~
5600  * KWMMUL Rd, Rs1, Rs2
5601  * KWMMUL.u Rd, Rs1, Rs2
5602  * ~~~
5603  *
5604  * **Purpose**:\n
5605  * Multiply the signed 32-bit integer elements of two registers, shift the results left 1-bit,
5606  * saturate, and write the most significant 32-bit results to a register. The `.u` form additionally
5607  * rounds up the multiplication results from the most signification discarded bit.
5608  *
5609  * **Description**:\n
5610  * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts
5611  * the multiplication results one bit to the left and takes the most significant 32-bit results. If the
5612  * shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element
5613  * result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The `.u`
5614  * form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit
5615  * 30 before the shift and saturation operations.
5616  *
5617  * **Operations**:\n
5618  * ~~~
5619  * if ((0x80000000 != Rs1.W[x]) | (0x80000000 != Rs2.W[x])) {
5620  *   Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
5621  *   if (`.u` form) {
5622  *     Round[x][33:0] = Mres[x][63:30] + 1;
5623  *     Rd.W[x] = Round[x][32:1];
5624  *   } else {
5625  *     Rd.W[x] = Mres[x][62:31];
5626  *   }
5627  * } else {
5628  *   Rd.W[x] = 0x7fffffff;
5629  *   OV = 1;
5630  * }
5631  * for RV32: x=0
5632  * for RV64: x=1...0
5633  * ~~~
5634  *
5635  * \param [in]  a    long type of value stored in a
5636  * \param [in]  b    long type of value stored in b
5637  * \return value stored in long type
5638  */
__RV_KWMMUL(long a,long b)5639 __STATIC_FORCEINLINE long __RV_KWMMUL(long a, long b)
5640 {
5641     register long result;
5642     __ASM volatile("kwmmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5643     return result;
5644 }
5645 /* ===== Inline Function End for 3.70.1. KWMMUL ===== */
5646 
5647 /* ===== Inline Function Start for 3.70.2. KWMMUL.u ===== */
5648 /**
5649  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
5650  * \brief KWMMUL.u (SIMD Saturating MSW Signed Multiply Word & Double with Rounding)
5651  * \details
5652  * **Type**: SIMD
5653  *
5654  * **Syntax**:\n
5655  * ~~~
5656  * KWMMUL Rd, Rs1, Rs2
5657  * KWMMUL.u Rd, Rs1, Rs2
5658  * ~~~
5659  *
5660  * **Purpose**:\n
5661  * Multiply the signed 32-bit integer elements of two registers, shift the results left 1-bit,
5662  * saturate, and write the most significant 32-bit results to a register. The `.u` form additionally
5663  * rounds up the multiplication results from the most signification discarded bit.
5664  *
5665  * **Description**:\n
5666  * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts
5667  * the multiplication results one bit to the left and takes the most significant 32-bit results. If the
5668  * shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element
5669  * result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The `.u`
5670  * form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit
5671  * 30 before the shift and saturation operations.
5672  *
5673  * **Operations**:\n
5674  * ~~~
5675  * if ((0x80000000 != Rs1.W[x]) | (0x80000000 != Rs2.W[x])) {
5676  *   Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
5677  *   if (`.u` form) {
5678  *     Round[x][33:0] = Mres[x][63:30] + 1;
5679  *     Rd.W[x] = Round[x][32:1];
5680  *   } else {
5681  *     Rd.W[x] = Mres[x][62:31];
5682  *   }
5683  * } else {
5684  *   Rd.W[x] = 0x7fffffff;
5685  *   OV = 1;
5686  * }
5687  * for RV32: x=0
5688  * for RV64: x=1...0
5689  * ~~~
5690  *
5691  * \param [in]  a    long type of value stored in a
5692  * \param [in]  b    long type of value stored in b
5693  * \return value stored in long type
5694  */
__RV_KWMMUL_U(long a,long b)5695 __STATIC_FORCEINLINE long __RV_KWMMUL_U(long a, long b)
5696 {
5697     register long result;
5698     __ASM volatile("kwmmul.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5699     return result;
5700 }
5701 /* ===== Inline Function End for 3.70.2. KWMMUL.u ===== */
5702 
5703 /* ===== Inline Function Start for 3.71. MADDR32 ===== */
5704 /**
5705  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
5706  * \brief MADDR32 (Multiply and Add to 32-Bit Word)
5707  * \details
5708  * **Type**: DSP
5709  *
5710  * **Syntax**:\n
5711  * ~~~
5712  * MADDR32 Rd, Rs1, Rs2
5713  * ~~~
5714  *
5715  * **Purpose**:\n
5716  * Multiply the 32-bit contents of two registers and add the lower 32-bit multiplication result
5717  * to the 32-bit content of a destination register. Write the final result back to the destination register.
5718  *
5719  * **Description**:\n
5720  * This instruction multiplies the lower 32-bit content of Rs1 with that of Rs2. It adds the
5721  * lower 32-bit multiplication result to the lower 32-bit content of Rd and writes the final result (RV32)
5722  * or sign-extended result (RV64) back to Rd. The contents of Rs1 and Rs2 can be either signed or
5723  * unsigned integers.
5724  *
5725  * **Operations**:\n
5726  * ~~~
5727  * RV32:
5728  * Mresult = Rs1 * Rs2;
5729  * Rd = Rd + Mresult.W[0];
5730  * RV64:
5731  * Mresult = Rs1.W[0] * Rs2.W[0];
5732  * tres[31:0] = Rd.W[0] + Mresult.W[0];
5733  * Rd = SE64(tres[31:0]);
5734  * ~~~
5735  *
5736  * \param [in]  t    unsigned long type of value stored in t
5737  * \param [in]  a    unsigned long type of value stored in a
5738  * \param [in]  b    unsigned long type of value stored in b
5739  * \return value stored in unsigned long type
5740  */
__RV_MADDR32(unsigned long t,unsigned long a,unsigned long b)5741 __STATIC_FORCEINLINE unsigned long __RV_MADDR32(unsigned long t, unsigned long a, unsigned long b)
5742 {
5743     __ASM volatile("maddr32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
5744     return t;
5745 }
5746 /* ===== Inline Function End for 3.71. MADDR32 ===== */
5747 
5748 /* ===== Inline Function Start for 3.72. MAXW ===== */
5749 /**
5750  * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
5751  * \brief MAXW (32-bit Signed Word Maximum)
5752  * \details
5753  * **Type**: DSP
5754  *
5755  * **Syntax**:\n
5756  * ~~~
5757  * MAXW Rd, Rs1, Rs2
5758  * ~~~
5759  *
5760  * **Purpose**:\n
5761  * Get the larger value from the 32-bit contents of two general registers.
5762  *
5763  * **Description**:\n
5764  * This instruction compares two signed 32-bit integers stored in Rs1 and Rs2, picks the
5765  * larger value as the result, and writes the result to Rd.
5766  *
5767  * **Operations**:\n
5768  * ~~~
5769  * if (Rs1.W[0] >= Rs2.W[0]) {
5770  *   Rd = SE(Rs1.W[0]);
5771  * } else {
5772  *   Rd = SE(Rs2.W[0]);
5773  * }
5774  * ~~~
5775  *
5776  * \param [in]  a    int type of value stored in a
5777  * \param [in]  b    int type of value stored in b
5778  * \return value stored in long type
5779  */
__RV_MAXW(int a,int b)5780 __STATIC_FORCEINLINE long __RV_MAXW(int a, int b)
5781 {
5782     register long result;
5783     __ASM volatile("maxw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5784     return result;
5785 }
5786 /* ===== Inline Function End for 3.72. MAXW ===== */
5787 
5788 /* ===== Inline Function Start for 3.73. MINW ===== */
5789 /**
5790  * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
5791  * \brief MINW (32-bit Signed Word Minimum)
5792  * \details
5793  * **Type**: DSP
5794  *
5795  * **Syntax**:\n
5796  * ~~~
5797  * MINW Rd, Rs1, Rs2
5798  * ~~~
5799  *
5800  * **Purpose**:\n
5801  * Get the smaller value from the 32-bit contents of two general registers.
5802  *
5803  * **Description**:\n
5804  * This instruction compares two signed 32-bit integers stored in Rs1 and Rs2, picks the
5805  * smaller value as the result, and writes the result to Rd.
5806  *
5807  * **Operations**:\n
5808  * ~~~
5809  * if (Rs1.W[0] >= Rs2.W[0]) { Rd = SE(Rs2.W[0]); } else { Rd = SE(Rs1.W[0]); }
5810  * ~~~
5811  *
5812  * \param [in]  a    int type of value stored in a
5813  * \param [in]  b    int type of value stored in b
5814  * \return value stored in long type
5815  */
__RV_MINW(int a,int b)5816 __STATIC_FORCEINLINE long __RV_MINW(int a, int b)
5817 {
5818     register long result;
5819     __ASM volatile("minw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5820     return result;
5821 }
5822 /* ===== Inline Function End for 3.73. MINW ===== */
5823 
5824 /* ===== Inline Function Start for 3.74. MSUBR32 ===== */
5825 /**
5826  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
5827  * \brief MSUBR32 (Multiply and Subtract from 32-Bit Word)
5828  * \details
5829  * **Type**: DSP
5830  *
5831  * **Syntax**:\n
5832  * ~~~
5833  * MSUBR32 Rd, Rs1, Rs2
5834  * ~~~
5835  *
5836  * **Purpose**:\n
5837  * Multiply the 32-bit contents of two registers and subtract the lower 32-bit multiplication
5838  * result from the 32-bit content of a destination register. Write the final result back to the destination
5839  * register.
5840  *
5841  * **Description**:\n
5842  * This instruction multiplies the lower 32-bit content of Rs1 with that of Rs2, subtracts
5843  * the lower 32-bit multiplication result from the lower 32-bit content of Rd, then writes the final
5844  * result (RV32) or sign-extended result (RV64) back to Rd. The contents of Rs1 and Rs2 can be either
5845  * signed or unsigned integers.
5846  *
5847  * **Operations**:\n
5848  * ~~~
5849  * RV32:
5850  * Mresult = Rs1 * Rs2;
5851  * Rd = Rd - Mresult.W[0];
5852  * RV64:
5853  * Mresult = Rs1.W[0] * Rs2.W[0];
5854  * tres[31:0] = Rd.W[0] - Mresult.W[0];
5855  * Rd = SE64(tres[31:0]);
5856  * ~~~
5857  *
5858  * \param [in]  t    unsigned long type of value stored in t
5859  * \param [in]  a    unsigned long type of value stored in a
5860  * \param [in]  b    unsigned long type of value stored in b
5861  * \return value stored in unsigned long type
5862  */
__RV_MSUBR32(unsigned long t,unsigned long a,unsigned long b)5863 __STATIC_FORCEINLINE unsigned long __RV_MSUBR32(unsigned long t, unsigned long a, unsigned long b)
5864 {
5865     __ASM volatile("msubr32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
5866     return t;
5867 }
5868 /* ===== Inline Function End for 3.74. MSUBR32 ===== */
5869 
5870 /* ===== Inline Function Start for 3.75. MULR64 ===== */
5871 /**
5872  * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
5873  * \brief MULR64 (Multiply Word Unsigned to 64-bit Data)
5874  * \details
5875  * **Type**: DSP
5876  *
5877  * **Syntax**:\n
5878  * ~~~
5879  * MULR64 Rd, Rs1, Rs2
5880  * ~~~
5881  *
5882  * **Purpose**:\n
5883  * Multiply the 32-bit unsigned integer contents of two registers and write the 64-bit result.
5884  *
5885  * **RV32 Description**:\n
5886  * This instruction multiplies the 32-bit content of Rs1 with that of Rs2 and writes the 64-bit
5887  * multiplication result to an even/odd pair of registers containing Rd. Rd(4,1) index d determines the
5888  * even/odd pair group of the two registers. Specifically, the register pair includes register 2d and
5889  * 2d+1.
5890  * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
5891  * of the pair contains the low 32-bit of the result.
5892  * The lower 32-bit contents of Rs1 and Rs2 are treated as unsigned integers.
5893  *
5894  * **RV64 Description**:\n
5895  * This instruction multiplies the lower 32-bit content of Rs1 with that of Rs2 and writes the 64-bit
5896  * multiplication result to Rd.
5897  * The lower 32-bit contents of Rs1 and Rs2 are treated as unsigned integers.
5898  *
5899  * **Operations**:\n
5900  * ~~~
5901  * RV32:
5902  * Mresult = CONCAT(1`b0,Rs1) u* CONCAT(1`b0,Rs2);
5903  * R[Rd(4,1).1(0)][31:0] = Mresult[63:32];
5904  * R[Rd(4,1).0(0)][31:0] = Mresult[31:0];
5905  * RV64:
5906  * Rd = Mresult[63:0];
5907  * Mresult = CONCAT(1`b0,Rs1.W[0]) u* CONCAT(1`b0,Rs2.W[0]);
5908  * ~~~
5909  *
5910  * \param [in]  a    unsigned long type of value stored in a
5911  * \param [in]  b    unsigned long type of value stored in b
5912  * \return value stored in unsigned long long type
5913  */
__RV_MULR64(unsigned long a,unsigned long b)5914 __STATIC_FORCEINLINE unsigned long long __RV_MULR64(unsigned long a, unsigned long b)
5915 {
5916     register unsigned long long result;
5917     __ASM volatile("mulr64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5918     return result;
5919 }
5920 /* ===== Inline Function End for 3.75. MULR64 ===== */
5921 
5922 /* ===== Inline Function Start for 3.76. MULSR64 ===== */
5923 /**
5924  * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
5925  * \brief MULSR64 (Multiply Word Signed to 64-bit Data)
5926  * \details
5927  * **Type**: DSP
5928  *
5929  * **Syntax**:\n
5930  * ~~~
5931  * MULSR64 Rd, Rs1, Rs2
5932  * ~~~
5933  *
5934  * **Purpose**:\n
5935  * Multiply the 32-bit signed integer contents of two registers and write the 64-bit result.
5936  *
5937  * **RV32 Description**:\n
5938  * This instruction multiplies the lower 32-bit content of Rs1 with the lower 32-bit content of Rs2 and
5939  * writes the 64-bit multiplication result to an even/odd pair of registers containing Rd. Rd(4,1) index d
5940  * determines the even/odd pair group of the two registers. Specifically, the register pair includes
5941  * register 2d and 2d+1.
5942  * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
5943  * of the pair contains the low 32-bit of the result.
5944  * The lower 32-bit contents of Rs1 and Rs2 are treated as signed integers.
5945  *
5946  * **RV64 Description**:\n
5947  * This instruction multiplies the lower 32-bit content of Rs1 with the lower 32-bit content of Rs2 and
5948  * writes the 64-bit multiplication result to Rd.
5949  * The lower 32-bit contents of Rs1 and Rs2 are treated as signed integers.
5950  *
5951  * **Operations**:\n
5952  * ~~~
5953  * RV32:
5954  * Mresult = Ra s* Rb;
5955  * R[Rd(4,1).1(0)][31:0] = Mresult[63:32];
5956  * R[Rd(4,1).0(0)][31:0] = Mresult[31:0];
5957  * RV64:
5958  * Mresult = Ra.W[0] s* Rb.W[0];
5959  * Rd = Mresult[63:0];
5960  * ~~~
5961  *
5962  * \param [in]  a    long type of value stored in a
5963  * \param [in]  b    long type of value stored in b
5964  * \return value stored in long long type
5965  */
__RV_MULSR64(long a,long b)5966 __STATIC_FORCEINLINE long long __RV_MULSR64(long a, long b)
5967 {
5968     register long long result;
5969     __ASM volatile("mulsr64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5970     return result;
5971 }
5972 /* ===== Inline Function End for 3.76. MULSR64 ===== */
5973 
5974 /* ===== Inline Function Start for 3.77. PBSAD ===== */
5975 /**
5976  * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
5977  * \brief PBSAD (Parallel Byte Sum of Absolute Difference)
5978  * \details
5979  * **Type**: DSP
5980  *
5981  * **Syntax**:\n
5982  * ~~~
5983  * PBSAD Rd, Rs1, Rs2
5984  * ~~~
5985  *
5986  * **Purpose**:\n
5987  * Calculate the sum of absolute difference of unsigned 8-bit data elements.
5988  *
5989  * **Description**:\n
5990  * This instruction subtracts the un-signed 8-bit elements of Rs2 from those of Rs1. Then
5991  * it adds the absolute value of each difference together and writes the result to Rd.
5992  *
5993  * **Operations**:\n
5994  * ~~~
5995  * absdiff[x] = ABS(Rs1.B[x] - Rs2.B[x]);
5996  * Rd = SUM(absdiff[x]);
5997  * for RV32: x=3...0,
5998  * for RV64: x=7...0
5999  * ~~~
6000  *
6001  * \param [in]  a    unsigned long type of value stored in a
6002  * \param [in]  b    unsigned long type of value stored in b
6003  * \return value stored in unsigned long type
6004  */
__RV_PBSAD(unsigned long a,unsigned long b)6005 __STATIC_FORCEINLINE unsigned long __RV_PBSAD(unsigned long a, unsigned long b)
6006 {
6007     register unsigned long result;
6008     __ASM volatile("pbsad %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6009     return result;
6010 }
6011 /* ===== Inline Function End for 3.77. PBSAD ===== */
6012 
6013 /* ===== Inline Function Start for 3.78. PBSADA ===== */
6014 /**
6015  * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
6016  * \brief PBSADA (Parallel Byte Sum of Absolute Difference Accum)
6017  * \details
6018  * **Type**: DSP
6019  *
6020  * **Syntax**:\n
6021  * ~~~
6022  * PBSADA Rd, Rs1, Rs2
6023  * ~~~
6024  *
6025  * **Purpose**:\n
6026  * Calculate the sum of absolute difference of four unsigned 8-bit data elements and
6027  * accumulate it into a register.
6028  *
6029  * **Description**:\n
6030  * This instruction subtracts the un-signed 8-bit elements of Rs2 from those of Rs1. It
6031  * then adds the absolute value of each difference together along with the content of Rd and writes the
6032  * accumulated result back to Rd.
6033  *
6034  * **Operations**:\n
6035  * ~~~
6036  * absdiff[x] = ABS(Rs1.B[x] - Rs2.B[x]);
6037  * Rd = Rd + SUM(absdiff[x]);
6038  * for RV32: x=3...0,
6039  * for RV64: x=7...0
6040  * ~~~
6041  *
6042  * \param [in]  t    unsigned long type of value stored in t
6043  * \param [in]  a    unsigned long type of value stored in a
6044  * \param [in]  b    unsigned long type of value stored in b
6045  * \return value stored in unsigned long type
6046  */
__RV_PBSADA(unsigned long t,unsigned long a,unsigned long b)6047 __STATIC_FORCEINLINE unsigned long __RV_PBSADA(unsigned long t, unsigned long a, unsigned long b)
6048 {
6049     __ASM volatile("pbsada %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
6050     return t;
6051 }
6052 /* ===== Inline Function End for 3.78. PBSADA ===== */
6053 
6054 /* ===== Inline Function Start for 3.79.1. PKBB16 ===== */
6055 /**
6056  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK
6057  * \brief PKBB16 (Pack Two 16-bit Data from Both Bottom Half)
6058  * \details
6059  * **Type**: DSP
6060  *
6061  * **Syntax**:\n
6062  * ~~~
6063  * PKBB16 Rd, Rs1, Rs2
6064  * PKBT16 Rd, Rs1, Rs2
6065  * PKTT16 Rd, Rs1, Rs2
6066  * PKTB16 Rd, Rs1, Rs2
6067  * ~~~
6068  *
6069  * **Purpose**:\n
6070  * Pack 16-bit data from 32-bit chunks in two registers.
6071  * * PKBB16: bottom.bottom
6072  * * PKBT16 bottom.top
6073  * * PKTT16 top.top
6074  * * PKTB16 top.bottom
6075  *
6076  * **Description**:\n
6077  * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to
6078  * Rd.W[x] [15:0].
6079  * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
6080  * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
6081  * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
6082  *
6083  * **Operations**:\n
6084  * ~~~
6085  * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16
6086  * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16
6087  * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16
6088  * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16
6089  * for RV32: x=0,
6090  * for RV64: x=1...0
6091  * ~~~
6092  *
6093  * \param [in]  a    unsigned long type of value stored in a
6094  * \param [in]  b    unsigned long type of value stored in b
6095  * \return value stored in unsigned long type
6096  */
__RV_PKBB16(unsigned long a,unsigned long b)6097 __STATIC_FORCEINLINE unsigned long __RV_PKBB16(unsigned long a, unsigned long b)
6098 {
6099     register unsigned long result;
6100     __ASM volatile("pkbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6101     return result;
6102 }
6103 /* ===== Inline Function End for 3.79.1. PKBB16 ===== */
6104 
6105 /* ===== Inline Function Start for 3.79.2. PKBT16 ===== */
6106 /**
6107  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK
6108  * \brief PKBT16 (Pack Two 16-bit Data from Bottom and Top Half)
6109  * \details
6110  * **Type**: DSP
6111  *
6112  * **Syntax**:\n
6113  * ~~~
6114  * PKBB16 Rd, Rs1, Rs2
6115  * PKBT16 Rd, Rs1, Rs2
6116  * PKTT16 Rd, Rs1, Rs2
6117  * PKTB16 Rd, Rs1, Rs2
6118  * ~~~
6119  *
6120  * **Purpose**:\n
6121  * Pack 16-bit data from 32-bit chunks in two registers.
6122  * * PKBB16: bottom.bottom
6123  * * PKBT16 bottom.top
6124  * * PKTT16 top.top
6125  * * PKTB16 top.bottom
6126  *
6127  * **Description**:\n
6128  * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to
6129  * Rd.W[x] [15:0].
6130  * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
6131  * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
6132  * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
6133  *
6134  * **Operations**:\n
6135  * ~~~
6136  * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16
6137  * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16
6138  * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16
6139  * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16
6140  * for RV32: x=0,
6141  * for RV64: x=1...0
6142  * ~~~
6143  *
6144  * \param [in]  a    unsigned long type of value stored in a
6145  * \param [in]  b    unsigned long type of value stored in b
6146  * \return value stored in unsigned long type
6147  */
__RV_PKBT16(unsigned long a,unsigned long b)6148 __STATIC_FORCEINLINE unsigned long __RV_PKBT16(unsigned long a, unsigned long b)
6149 {
6150     register unsigned long result;
6151     __ASM volatile("pkbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6152     return result;
6153 }
6154 /* ===== Inline Function End for 3.79.2. PKBT16 ===== */
6155 
6156 /* ===== Inline Function Start for 3.79.3. PKTT16 ===== */
6157 /**
6158  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK
6159  * \brief PKTT16 (Pack Two 16-bit Data from Both Top Half)
6160  * \details
6161  * **Type**: DSP
6162  *
6163  * **Syntax**:\n
6164  * ~~~
6165  * PKBB16 Rd, Rs1, Rs2
6166  * PKBT16 Rd, Rs1, Rs2
6167  * PKTT16 Rd, Rs1, Rs2
6168  * PKTB16 Rd, Rs1, Rs2
6169  * ~~~
6170  *
6171  * **Purpose**:\n
6172  * Pack 16-bit data from 32-bit chunks in two registers.
6173  * * PKBB16: bottom.bottom
6174  * * PKBT16 bottom.top
6175  * * PKTT16 top.top
6176  * * PKTB16 top.bottom
6177  *
6178  * **Description**:\n
6179  * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to
6180  * Rd.W[x] [15:0].
6181  * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
6182  * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
6183  * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
6184  *
6185  * **Operations**:\n
6186  * ~~~
6187  * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16
6188  * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16
6189  * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16
6190  * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16
6191  * for RV32: x=0,
6192  * for RV64: x=1...0
6193  * ~~~
6194  *
6195  * \param [in]  a    unsigned long type of value stored in a
6196  * \param [in]  b    unsigned long type of value stored in b
6197  * \return value stored in unsigned long type
6198  */
__RV_PKTT16(unsigned long a,unsigned long b)6199 __STATIC_FORCEINLINE unsigned long __RV_PKTT16(unsigned long a, unsigned long b)
6200 {
6201     register unsigned long result;
6202     __ASM volatile("pktt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6203     return result;
6204 }
6205 /* ===== Inline Function End for 3.79.3. PKTT16 ===== */
6206 
6207 /* ===== Inline Function Start for 3.79.4. PKTB16 ===== */
6208 /**
6209  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK
6210  * \brief PKTB16 (Pack Two 16-bit Data from Top and Bottom Half)
6211  * \details
6212  * **Type**: DSP
6213  *
6214  * **Syntax**:\n
6215  * ~~~
6216  * PKBB16 Rd, Rs1, Rs2
6217  * PKBT16 Rd, Rs1, Rs2
6218  * PKTT16 Rd, Rs1, Rs2
6219  * PKTB16 Rd, Rs1, Rs2
6220  * ~~~
6221  *
6222  * **Purpose**:\n
6223  * Pack 16-bit data from 32-bit chunks in two registers.
6224  * * PKBB16: bottom.bottom
6225  * * PKBT16 bottom.top
6226  * * PKTT16 top.top
6227  * * PKTB16 top.bottom
6228  *
6229  * **Description**:\n
6230  * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to
6231  * Rd.W[x] [15:0].
6232  * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
6233  * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
6234  * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
6235  *
6236  * **Operations**:\n
6237  * ~~~
6238  * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16
6239  * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16
6240  * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16
6241  * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16
6242  * for RV32: x=0,
6243  * for RV64: x=1...0
6244  * ~~~
6245  *
6246  * \param [in]  a    unsigned long type of value stored in a
6247  * \param [in]  b    unsigned long type of value stored in b
6248  * \return value stored in unsigned long type
6249  */
__RV_PKTB16(unsigned long a,unsigned long b)6250 __STATIC_FORCEINLINE unsigned long __RV_PKTB16(unsigned long a, unsigned long b)
6251 {
6252     register unsigned long result;
6253     __ASM volatile("pktb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6254     return result;
6255 }
6256 /* ===== Inline Function End for 3.79.4. PKTB16 ===== */
6257 
6258 /* ===== Inline Function Start for 3.80. RADD8 ===== */
6259 /**
6260  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
6261  * \brief RADD8 (SIMD 8-bit Signed Halving Addition)
6262  * \details
6263  * **Type**: SIMD
6264  *
6265  * **Syntax**:\n
6266  * ~~~
6267  * RADD8 Rd, Rs1, Rs2
6268  * ~~~
6269  *
6270  * **Purpose**:\n
6271  * Do 8-bit signed integer element additions simultaneously. The element results are halved
6272  * to avoid overflow or saturation.
6273  *
6274  * **Description**:\n
6275  * This instruction adds the 8-bit signed integer elements in Rs1 with the 8-bit signed
6276  * integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to
6277  * Rd.
6278  *
6279  * **Examples**:\n
6280  * ~~~
6281  * * Rs1 = 0x7F, Rs2 = 0x7F, Rd = 0x7F
6282  * * Rs1 = 0x80, Rs2 = 0x80, Rd = 0x80
6283  * * Rs1 = 0x40, Rs2 = 0x80, Rd = 0xE0
6284  * ~~~
6285  *
6286  * **Operations**:\n
6287  * ~~~
6288  * Rd.B[x] = (Rs1.B[x] + Rs2.B[x]) s>> 1; for RV32: x=3...0, for RV64: x=7...0
6289  * ~~~
6290  *
6291  * \param [in]  a    unsigned long type of value stored in a
6292  * \param [in]  b    unsigned long type of value stored in b
6293  * \return value stored in unsigned long type
6294  */
__RV_RADD8(unsigned long a,unsigned long b)6295 __STATIC_FORCEINLINE unsigned long __RV_RADD8(unsigned long a, unsigned long b)
6296 {
6297     register unsigned long result;
6298     __ASM volatile("radd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6299     return result;
6300 }
6301 /* ===== Inline Function End for 3.80. RADD8 ===== */
6302 
6303 /* ===== Inline Function Start for 3.81. RADD16 ===== */
6304 /**
6305  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
6306  * \brief RADD16 (SIMD 16-bit Signed Halving Addition)
6307  * \details
6308  * **Type**: SIMD
6309  *
6310  * **Syntax**:\n
6311  * ~~~
6312  * RADD16 Rd, Rs1, Rs2
6313  * ~~~
6314  *
6315  * **Purpose**:\n
6316  * Do 16-bit signed integer element additions simultaneously. The results are halved to avoid
6317  * overflow or saturation.
6318  *
6319  * **Description**:\n
6320  * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed
6321  * integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to
6322  * Rd.
6323  *
6324  * **Examples**:\n
6325  * ~~~
6326  * * Rs1 = 0x7FFF, Rs2 = 0x7FFF, Rd = 0x7FFF
6327  * * Rs1 = 0x8000, Rs2 = 0x8000, Rd = 0x8000
6328  * * Rs1 = 0x4000, Rs2 = 0x8000, Rd = 0xE000
6329  * ~~~
6330  *
6331  * **Operations**:\n
6332  * ~~~
6333  * Rd.H[x] = (Rs1.H[x] + Rs2.H[x]) s>> 1; for RV32: x=1...0, for RV64: x=3...0
6334  * ~~~
6335  *
6336  * \param [in]  a    unsigned long type of value stored in a
6337  * \param [in]  b    unsigned long type of value stored in b
6338  * \return value stored in unsigned long type
6339  */
__RV_RADD16(unsigned long a,unsigned long b)6340 __STATIC_FORCEINLINE unsigned long __RV_RADD16(unsigned long a, unsigned long b)
6341 {
6342     register unsigned long result;
6343     __ASM volatile("radd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6344     return result;
6345 }
6346 /* ===== Inline Function End for 3.81. RADD16 ===== */
6347 
6348 /* ===== Inline Function Start for 3.82. RADD64 ===== */
6349 /**
6350  * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
6351  * \brief RADD64 (64-bit Signed Halving Addition)
6352  * \details
6353  * **Type**: DSP (64-bit Profile)
6354  *
6355  * **Syntax**:\n
6356  * ~~~
6357  * RADD64 Rd, Rs1, Rs2
6358  * ~~~
6359  *
6360  * **Purpose**:\n
6361  * Add two 64-bit signed integers. The result is halved to avoid overflow or saturation.
6362  *
6363  * **RV32 Description**:\n
6364  * This instruction adds the 64-bit signed integer of an even/odd pair of registers
6365  * specified by Rs1(4,1) with the 64-bit signed integer of an even/odd pair of registers specified by
6366  * Rs2(4,1). The 64-bit addition result is first arithmetically right-shifted by 1 bit and then written to an
6367  * even/odd pair of registers specified by Rd(4,1).
6368  * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
6369  * pair includes register 2d and 2d+1.
6370  * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
6371  * of the pair contains the low 32-bit of the result.
6372  *
6373  * **RV64 Description**:\n
6374  * This instruction adds the 64-bit signed integer in Rs1 with the 64-bit signed
6375  * integer in Rs2. The 64-bit addition result is first arithmetically right-shifted by 1 bit and then
6376  * written to Rd.
6377  *
6378  * **Operations**:\n
6379  * ~~~
6380  * RV32:
6381  * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
6382  * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
6383  * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
6384  * R[t_H].R[t_L] = (R[a_H].R[a_L] + R[b_H].R[b_L]) s>> 1;
6385  * RV64:
6386  * Rd = (Rs1 + Rs2) s>> 1;
6387  * ~~~
6388  *
6389  * \param [in]  a    long long type of value stored in a
6390  * \param [in]  b    long long type of value stored in b
6391  * \return value stored in long long type
6392  */
__RV_RADD64(long long a,long long b)6393 __STATIC_FORCEINLINE long long __RV_RADD64(long long a, long long b)
6394 {
6395     register long long result;
6396     __ASM volatile("radd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6397     return result;
6398 }
6399 /* ===== Inline Function End for 3.82. RADD64 ===== */
6400 
6401 /* ===== Inline Function Start for 3.83. RADDW ===== */
6402 /**
6403  * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
6404  * \brief RADDW (32-bit Signed Halving Addition)
6405  * \details
6406  * **Type**: DSP
6407  *
6408  * **Syntax**:\n
6409  * ~~~
6410  * RADDW Rd, Rs1, Rs2
6411  * ~~~
6412  *
6413  * **Purpose**:\n
6414  * Add 32-bit signed integers and the results are halved to avoid overflow or saturation.
6415  *
6416  * **Description**:\n
6417  * This instruction adds the first 32-bit signed integer in Rs1 with the first 32-bit signed
6418  * integer in Rs2. The result is first arithmetically right-shifted by 1 bit and then sign-extended and
6419  * written to Rd.
6420  *
6421  * **Examples**:\n
6422  * ~~~
6423  * * Rs1 = 0x7FFFFFFF, Rs2 = 0x7FFFFFFF, Rd = 0x7FFFFFFF
6424  * * Rs1 = 0x80000000, Rs2 = 0x80000000, Rd = 0x80000000
6425  * * Rs1 = 0x40000000, Rs2 = 0x80000000, Rd = 0xE0000000
6426  * ~~~
6427  *
6428  * **Operations**:\n
6429  * ~~~
6430  * RV32:
6431  * Rd[31:0] = (Rs1[31:0] + Rs2[31:0]) s>> 1;
6432  * RV64:
6433  * resw[31:0] = (Rs1[31:0] + Rs2[31:0]) s>> 1;
6434  * Rd[63:0] = SE(resw[31:0]);
6435  * ~~~
6436  *
6437  * \param [in]  a    int type of value stored in a
6438  * \param [in]  b    int type of value stored in b
6439  * \return value stored in long type
6440  */
__RV_RADDW(int a,int b)6441 __STATIC_FORCEINLINE long __RV_RADDW(int a, int b)
6442 {
6443     register long result;
6444     __ASM volatile("raddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6445     return result;
6446 }
6447 /* ===== Inline Function End for 3.83. RADDW ===== */
6448 
6449 /* ===== Inline Function Start for 3.84. RCRAS16 ===== */
6450 /**
6451  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
6452  * \brief RCRAS16 (SIMD 16-bit Signed Halving Cross Addition & Subtraction)
6453  * \details
6454  * **Type**: SIMD
6455  *
6456  * **Syntax**:\n
6457  * ~~~
6458  * RCRAS16 Rd, Rs1, Rs2
6459  * ~~~
6460  *
6461  * **Purpose**:\n
6462  * Do 16-bit signed integer element addition and 16-bit signed integer element subtraction in
6463  * a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks. The results
6464  * are halved to avoid overflow or saturation.
6465  *
6466  * **Description**:\n
6467  * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in
6468  * Rs1 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2, and subtracts the 16-bit
6469  * signed integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed integer element in
6470  * [15:0] of 32-bit chunks in Rs1. The element results are first arithmetically right-shifted by 1 bit and
6471  * then written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
6472  *
6473  * **Examples**:\n
6474  * ~~~
6475  * Please see `RADD16` and `RSUB16` instructions.
6476  * ~~~
6477  *
6478  * **Operations**:\n
6479  * ~~~
6480  * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][15:0]) s>> 1;
6481  * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][31:16]) s>> 1;
6482  * for RV32, x=0
6483  * for RV64, x=1...0
6484  * ~~~
6485  *
6486  * \param [in]  a    unsigned long type of value stored in a
6487  * \param [in]  b    unsigned long type of value stored in b
6488  * \return value stored in unsigned long type
6489  */
__RV_RCRAS16(unsigned long a,unsigned long b)6490 __STATIC_FORCEINLINE unsigned long __RV_RCRAS16(unsigned long a, unsigned long b)
6491 {
6492     register unsigned long result;
6493     __ASM volatile("rcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6494     return result;
6495 }
6496 /* ===== Inline Function End for 3.84. RCRAS16 ===== */
6497 
6498 /* ===== Inline Function Start for 3.85. RCRSA16 ===== */
6499 /**
6500  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
6501  * \brief RCRSA16 (SIMD 16-bit Signed Halving Cross Subtraction & Addition)
6502  * \details
6503  * **Type**: SIMD
6504  *
6505  * **Syntax**:\n
6506  * ~~~
6507  * RCRSA16 Rd, Rs1, Rs2
6508  * ~~~
6509  *
6510  * **Purpose**:\n
6511  * Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in
6512  * a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks. The results
6513  * are halved to avoid overflow or saturation.
6514  *
6515  * **Description**:\n
6516  * This instruction subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks
6517  * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit
6518  * signed element integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit signed integer element in
6519  * [31:16] of 32-bit chunks in Rs2. The two results are first arithmetically right-shifted by 1 bit and
6520  * then written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
6521  *
6522  * **Examples**:\n
6523  * ~~~
6524  * Please see `RADD16` and `RSUB16` instructions.
6525  * ~~~
6526  *
6527  * **Operations**:\n
6528  * ~~~
6529  * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][15:0]) s>> 1;
6530  * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][31:16]) s>> 1;
6531  * for RV32, x=0
6532  * for RV64, x=1...0
6533  * ~~~
6534  *
6535  * \param [in]  a    unsigned long type of value stored in a
6536  * \param [in]  b    unsigned long type of value stored in b
6537  * \return value stored in unsigned long type
6538  */
__RV_RCRSA16(unsigned long a,unsigned long b)6539 __STATIC_FORCEINLINE unsigned long __RV_RCRSA16(unsigned long a, unsigned long b)
6540 {
6541     register unsigned long result;
6542     __ASM volatile("rcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6543     return result;
6544 }
6545 /* ===== Inline Function End for 3.85. RCRSA16 ===== */
6546 
6547 /* ===== Inline Function Start for 3.86. RDOV ===== */
6548 /**
6549  * \ingroup NMSIS_Core_DSP_Intrinsic_OV_FLAG_SC
6550  * \brief RDOV (Read OV flag)
6551  * \details
6552  * **Type**: DSP
6553  *
6554  * **Syntax**:\n
6555  * ~~~
6556  * RDOV Rd  # pseudo mnemonic
6557  * ~~~
6558  *
6559  * **Purpose**:\n
6560  * This pseudo instruction is an alias to `CSRR Rd, ucode` instruction which maps to the real
6561  * instruction of `CSRRS Rd, ucode, x0`.
6562  *
6563  *
6564  * \return value stored in unsigned long type
6565  */
__RV_RDOV(void)6566 __STATIC_FORCEINLINE unsigned long __RV_RDOV(void)
6567 {
6568     register unsigned long result;
6569     __ASM volatile("rdov %0" : "=r"(result));
6570     return result;
6571 }
6572 /* ===== Inline Function End for 3.86. RDOV ===== */
6573 
6574 /* ===== Inline Function Start for 3.87. RSTAS16 ===== */
6575 /**
6576  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
6577  * \brief RSTAS16 (SIMD 16-bit Signed Halving Straight Addition & Subtraction)
6578  * \details
6579  * **Type**: SIMD
6580  *
6581  * **Syntax**:\n
6582  * ~~~
6583  * RSTAS16 Rd, Rs1, Rs2
6584  * ~~~
6585  *
6586  * **Purpose**:\n
6587  * Do 16-bit signed integer element addition and 16-bit signed integer element subtraction in
6588  * a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit chunks. The
6589  * results are halved to avoid overflow or saturation.
6590  *
6591  * **Description**:\n
6592  * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in
6593  * Rs1 with the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2, and subtracts the 16-bit
6594  * signed integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit signed integer element in
6595  * [15:0] of 32-bit chunks in Rs1. The element results are first arithmetically right-shifted by 1 bit and
6596  * then written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
6597  *
6598  * **Examples**:\n
6599  * ~~~
6600  * Please see `RADD16` and `RSUB16` instructions.
6601  * ~~~
6602  *
6603  * **Operations**:\n
6604  * ~~~
6605  * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][31:16]) s>> 1;
6606  * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][15:0]) s>> 1;
6607  * for RV32, x=0
6608  * for RV64, x=1...0
6609  * ~~~
6610  *
6611  * \param [in]  a    unsigned long type of value stored in a
6612  * \param [in]  b    unsigned long type of value stored in b
6613  * \return value stored in unsigned long type
6614  */
__RV_RSTAS16(unsigned long a,unsigned long b)6615 __STATIC_FORCEINLINE unsigned long __RV_RSTAS16(unsigned long a, unsigned long b)
6616 {
6617     register unsigned long result;
6618     __ASM volatile("rstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6619     return result;
6620 }
6621 /* ===== Inline Function End for 3.87. RSTAS16 ===== */
6622 
6623 /* ===== Inline Function Start for 3.88. RSTSA16 ===== */
6624 /**
6625  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
6626  * \brief RSTSA16 (SIMD 16-bit Signed Halving Straight Subtraction & Addition)
6627  * \details
6628  * **Type**: SIMD
6629  *
6630  * **Syntax**:\n
6631  * ~~~
6632  * RSTSA16 Rd, Rs1, Rs2
6633  * ~~~
6634  *
6635  * **Purpose**:\n
6636  * Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in
6637  * a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit chunks. The
6638  * results are halved to avoid overflow or saturation.
6639  *
6640  * **Description**:\n
6641  * This instruction subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks
6642  * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit
6643  * signed element integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit signed integer element in
6644  * [15:0] of 32-bit chunks in Rs2. The two results are first arithmetically right-shifted by 1 bit and then
6645  * written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
6646  *
6647  * **Examples**:\n
6648  * ~~~
6649  * Please see `RADD16` and `RSUB16` instructions.
6650  * ~~~
6651  *
6652  * **Operations**:\n
6653  * ~~~
6654  * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][31:16]) s>> 1;
6655  * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][15:0]) s>> 1;
6656  * for RV32, x=0
6657  * for RV64, x=1...0
6658  * ~~~
6659  *
6660  * \param [in]  a    unsigned long type of value stored in a
6661  * \param [in]  b    unsigned long type of value stored in b
6662  * \return value stored in unsigned long type
6663  */
__RV_RSTSA16(unsigned long a,unsigned long b)6664 __STATIC_FORCEINLINE unsigned long __RV_RSTSA16(unsigned long a, unsigned long b)
6665 {
6666     register unsigned long result;
6667     __ASM volatile("rstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6668     return result;
6669 }
6670 /* ===== Inline Function End for 3.88. RSTSA16 ===== */
6671 
6672 /* ===== Inline Function Start for 3.89. RSUB8 ===== */
6673 /**
6674  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
6675  * \brief RSUB8 (SIMD 8-bit Signed Halving Subtraction)
6676  * \details
6677  * **Type**: SIMD
6678  *
6679  * **Syntax**:\n
6680  * ~~~
6681  * RSUB8 Rd, Rs1, Rs2
6682  * ~~~
6683  *
6684  * **Purpose**:\n
6685  * Do 8-bit signed integer element subtractions simultaneously. The results are halved to
6686  * avoid overflow or saturation.
6687  *
6688  * **Description**:\n
6689  * This instruction subtracts the 8-bit signed integer elements in Rs2 from the 8-bit
6690  * signed integer elements in Rs1. The results are first arithmetically right-shifted by 1 bit and then
6691  * written to Rd.
6692  *
6693  * **Examples**:\n
6694  * ~~~
6695  * * Rs1 = 0x7F, Rs2 = 0x80, Rd = 0x7F
6696  * * Rs1 = 0x80, Rs2 = 0x7F, Rd = 0x80
6697  * * Rs1= 0x80, Rs2 = 0x40, Rd = 0xA0
6698  * ~~~
6699  *
6700  * **Operations**:\n
6701  * ~~~
6702  * Rd.B[x] = (Rs1.B[x] - Rs2.B[x]) s>> 1;
6703  * for RV32: x=3...0,
6704  * for RV64: x=7...0
6705  * ~~~
6706  *
6707  * \param [in]  a    unsigned long type of value stored in a
6708  * \param [in]  b    unsigned long type of value stored in b
6709  * \return value stored in unsigned long type
6710  */
__RV_RSUB8(unsigned long a,unsigned long b)6711 __STATIC_FORCEINLINE unsigned long __RV_RSUB8(unsigned long a, unsigned long b)
6712 {
6713     register unsigned long result;
6714     __ASM volatile("rsub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6715     return result;
6716 }
6717 /* ===== Inline Function End for 3.89. RSUB8 ===== */
6718 
6719 /* ===== Inline Function Start for 3.90. RSUB16 ===== */
6720 /**
6721  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
6722  * \brief RSUB16 (SIMD 16-bit Signed Halving Subtraction)
6723  * \details
6724  * **Type**: SIMD
6725  *
6726  * **Syntax**:\n
6727  * ~~~
6728  * RSUB16 Rd, Rs1, Rs2
6729  * ~~~
6730  *
6731  * **Purpose**:\n
6732  * Do 16-bit signed integer element subtractions simultaneously. The results are halved to
6733  * avoid overflow or saturation.
6734  *
6735  * **Description**:\n
6736  * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit
6737  * signed integer elements in Rs1. The results are first arithmetically right-shifted by 1 bit and then
6738  * written to Rd.
6739  *
6740  * **Examples**:\n
6741  * ~~~
6742  * * Ra = 0x7FFF, Rb = 0x8000, Rt = 0x7FFF
6743  * * Ra = 0x8000, Rb = 0x7FFF, Rt = 0x8000
6744  * * Ra = 0x8000, Rb = 0x4000, Rt = 0xA000
6745  * ~~~
6746  *
6747  * **Operations**:\n
6748  * ~~~
6749  * Rd.H[x] = (Rs1.H[x] - Rs2.H[x]) s>> 1;
6750  * for RV32: x=1...0,
6751  * for RV64: x=3...0
6752  * ~~~
6753  *
6754  * \param [in]  a    unsigned long type of value stored in a
6755  * \param [in]  b    unsigned long type of value stored in b
6756  * \return value stored in unsigned long type
6757  */
__RV_RSUB16(unsigned long a,unsigned long b)6758 __STATIC_FORCEINLINE unsigned long __RV_RSUB16(unsigned long a, unsigned long b)
6759 {
6760     register unsigned long result;
6761     __ASM volatile("rsub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6762     return result;
6763 }
6764 /* ===== Inline Function End for 3.90. RSUB16 ===== */
6765 
6766 /* ===== Inline Function Start for 3.91. RSUB64 ===== */
6767 /**
6768  * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
6769  * \brief RSUB64 (64-bit Signed Halving Subtraction)
6770  * \details
6771  * **Type**: DSP (64-bit Profile)
6772  *
6773  * **Syntax**:\n
6774  * ~~~
6775  * RSUB64 Rd, Rs1, Rs2
6776  * ~~~
6777  *
6778  * **Purpose**:\n
6779  * Perform a 64-bit signed integer subtraction. The result is halved to avoid overflow or
6780  * saturation.
6781  *
6782  * **RV32 Description**:\n
6783  * This instruction subtracts the 64-bit signed integer of an even/odd pair of
6784  * registers specified by Rb(4,1) from the 64-bit signed integer of an even/odd pair of registers
6785  * specified by Ra(4,1). The subtraction result is first arithmetically right-shifted by 1 bit and then
6786  * written to an even/odd pair of registers specified by Rt(4,1).
6787  * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
6788  * pair includes register 2d and 2d+1.
6789  * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
6790  * of the pair contains the low 32-bit of the result.
6791  *
6792  * **RV64 Description**:\n
6793  * This instruction subtracts the 64-bit signed integer in Rs2 from the 64-bit signed
6794  * integer in Rs1. The 64-bit subtraction result is first arithmetically right-shifted by 1 bit and then
6795  * written to Rd.
6796  *
6797  * **Operations**:\n
6798  * ~~~
6799  * RV32:
6800  * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
6801  * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
6802  * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
6803  * R[t_H].R[t_L] = (R[a_H].R[a_L] - R[b_H].R[b_L]) s>> 1;
6804  * RV64:
6805  * Rd = (Rs1 - Rs2) s>> 1;
6806  * ~~~
6807  *
6808  * \param [in]  a    long long type of value stored in a
6809  * \param [in]  b    long long type of value stored in b
6810  * \return value stored in long long type
6811  */
__RV_RSUB64(long long a,long long b)6812 __STATIC_FORCEINLINE long long __RV_RSUB64(long long a, long long b)
6813 {
6814     register long long result;
6815     __ASM volatile("rsub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6816     return result;
6817 }
6818 /* ===== Inline Function End for 3.91. RSUB64 ===== */
6819 
6820 /* ===== Inline Function Start for 3.92. RSUBW ===== */
6821 /**
6822  * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
6823  * \brief RSUBW (32-bit Signed Halving Subtraction)
6824  * \details
6825  * **Type**: DSP
6826  *
6827  * **Syntax**:\n
6828  * ~~~
6829  * RSUBW Rd, Rs1, Rs2
6830  * ~~~
6831  *
6832  * **Purpose**:\n
6833  * Subtract 32-bit signed integers and the result is halved to avoid overflow or saturation.
6834  *
6835  * **Description**:\n
6836  * This instruction subtracts the first 32-bit signed integer in Rs2 from the first 32-bit
6837  * signed integer in Rs1. The result is first arithmetically right-shifted by 1 bit and then sign-extended
6838  * and written to Rd.
6839  *
6840  * **Examples**:\n
6841  * ~~~
6842  * * Rs1 = 0x7FFFFFFF, Rs2 = 0x80000000, Rd = 0x7FFFFFFF
6843  * * Rs1 = 0x80000000, Rs2 = 0x7FFFFFFF, Rd = 0x80000000
6844  * * Rs1 = 0x80000000, Rs2 = 0x40000000, Rd = 0xA0000000
6845  * ~~~
6846  *
6847  * **Operations**:\n
6848  * ~~~
6849  * RV32:
6850  * Rd[31:0] = (Rs1[31:0] - Rs2[31:0]) s>> 1;
6851  * RV64:
6852  * resw[31:0] = (Rs1[31:0] - Rs2[31:0]) s>> 1;
6853  * Rd[63:0] = SE(resw[31:0]);
6854  * ~~~
6855  *
6856  * \param [in]  a    int type of value stored in a
6857  * \param [in]  b    int type of value stored in b
6858  * \return value stored in long type
6859  */
__RV_RSUBW(int a,int b)6860 __STATIC_FORCEINLINE long __RV_RSUBW(int a, int b)
6861 {
6862     register long result;
6863     __ASM volatile("rsubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6864     return result;
6865 }
6866 /* ===== Inline Function End for 3.92. RSUBW ===== */
6867 
6868 /* ===== Inline Function Start for 3.93. SCLIP8 ===== */
6869 /**
6870  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
6871  * \brief SCLIP8 (SIMD 8-bit Signed Clip Value)
6872  * \details
6873  * **Type**: SIMD
6874  *
6875  * **Syntax**:\n
6876  * ~~~
6877  * SCLIP8 Rd, Rs1, imm3u[2:0]
6878  * ~~~
6879  *
6880  * **Purpose**:\n
6881  * Limit the 8-bit signed integer elements of a register into a signed range simultaneously.
6882  *
6883  * **Description**:\n
6884  * This instruction limits the 8-bit signed integer elements stored in Rs1 into a signed
6885  * integer range between 2^imm3u-1 and -2^imm3u, and writes the limited results to Rd. For example, if
6886  * imm3u is 3, the 8-bit input values should be saturated between 7 and -8. If saturation is performed,
6887  * set OV bit to 1.
6888  *
6889  * **Operations**:\n
6890  * ~~~
6891  * src = Rs1.B[x];
6892  * if (src > (2^imm3u)-1) {
6893  *   src = (2^imm3u)-1;
6894  *   OV = 1;
6895  * } else if (src < -2^imm3u) {
6896  *   src = -2^imm3u;
6897  *   OV = 1;
6898  * }
6899  * Rd.B[x] = src
6900  * for RV32: x=3...0,
6901  * for RV64: x=7...0
6902  * ~~~
6903  *
6904  * \param [in]  a    unsigned long type of value stored in a
6905  * \param [in]  b    unsigned int type of value stored in b
6906  * \return value stored in unsigned long type
6907  */
6908 #define __RV_SCLIP8(a, b)    \
6909     ({    \
6910         register unsigned long result;    \
6911         register unsigned long __a = (unsigned long)(a);    \
6912         __ASM volatile("sclip8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
6913         result;    \
6914     })
6915 /* ===== Inline Function End for 3.93. SCLIP8 ===== */
6916 
6917 /* ===== Inline Function Start for 3.94. SCLIP16 ===== */
6918 /**
6919  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
6920  * \brief SCLIP16 (SIMD 16-bit Signed Clip Value)
6921  * \details
6922  * **Type**: SIMD
6923  *
6924  * **Syntax**:\n
6925  * ~~~
6926  * SCLIP16 Rd, Rs1, imm4u[3:0]
6927  * ~~~
6928  *
6929  * **Purpose**:\n
6930  * Limit the 16-bit signed integer elements of a register into a signed range simultaneously.
6931  *
6932  * **Description**:\n
6933  * This instruction limits the 16-bit signed integer elements stored in Rs1 into a signed
6934  * integer range between 2imm4u-1 and -2imm4u, and writes the limited results to Rd. For example, if
6935  * imm4u is 3, the 16-bit input values should be saturated between 7 and -8. If saturation is performed,
6936  * set OV bit to 1.
6937  *
6938  * **Operations**:\n
6939  * ~~~
6940  * src = Rs1.H[x];
6941  * if (src > (2^imm4u)-1) {
6942  *   src = (2^imm4u)-1;
6943  *   OV = 1;
6944  * } else if (src < -2^imm4u) {
6945  *   src = -2^imm4u;
6946  *   OV = 1;
6947  * }
6948  * Rd.H[x] = src
6949  * for RV32: x=1...0,
6950  * for RV64: x=3...0
6951  * ~~~
6952  *
6953  * \param [in]  a    unsigned long type of value stored in a
6954  * \param [in]  b    unsigned int type of value stored in b
6955  * \return value stored in unsigned long type
6956  */
6957 #define __RV_SCLIP16(a, b)    \
6958     ({    \
6959         register unsigned long result;    \
6960         register unsigned long __a = (unsigned long)(a);    \
6961         __ASM volatile("sclip16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
6962         result;    \
6963     })
6964 /* ===== Inline Function End for 3.94. SCLIP16 ===== */
6965 
6966 /* ===== Inline Function Start for 3.95. SCLIP32 ===== */
6967 /**
6968  * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
6969  * \brief SCLIP32 (SIMD 32-bit Signed Clip Value)
6970  * \details
6971  * **Type**: DSP
6972  *
6973  * **Syntax**:\n
6974  * ~~~
6975  * SCLIP32 Rd, Rs1, imm5u[4:0]
6976  * ~~~
6977  *
6978  * **Purpose**:\n
6979  * Limit the 32-bit signed integer elements of a register into a signed range simultaneously.
6980  *
6981  * **Description**:\n
6982  * This instruction limits the 32-bit signed integer elements stored in Rs1 into a signed
6983  * integer range between 2imm5u-1 and -2imm5u, and writes the limited results to Rd. For example, if
6984  * imm5u is 3, the 32-bit input values should be saturated between 7 and -8. If saturation is performed,
6985  * set OV bit to 1.
6986  *
6987  * **Operations**:\n
6988  * ~~~
6989  * src = Rs1.W[x];
6990  * if (src > (2^imm5u)-1) {
6991  *   src = (2^imm5u)-1;
6992  *   OV = 1;
6993  * } else if (src < -2^imm5u) {
6994  *   src = -2^imm5u;
6995  *   OV = 1;
6996  * }
6997  * Rd.W[x] = src
6998  * for RV32: x=0,
6999  * for RV64: x=1...0
7000  * ~~~
7001  *
7002  * \param [in]  a    long type of value stored in a
7003  * \param [in]  b    unsigned int type of value stored in b
7004  * \return value stored in long type
7005  */
7006 #define __RV_SCLIP32(a, b)    \
7007     ({    \
7008         register long result;    \
7009         register long __a = (long)(a);    \
7010         __ASM volatile("sclip32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
7011         result;    \
7012     })
7013 /* ===== Inline Function End for 3.95. SCLIP32 ===== */
7014 
7015 /* ===== Inline Function Start for 3.96. SCMPLE8 ===== */
7016 /**
7017  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
7018  * \brief SCMPLE8 (SIMD 8-bit Signed Compare Less Than & Equal)
7019  * \details
7020  * **Type**: SIMD
7021  *
7022  * **Syntax**:\n
7023  * ~~~
7024  * SCMPLE8 Rd, Rs1, Rs2
7025  * ~~~
7026  *
7027  * **Purpose**:\n
7028  * Do 8-bit signed integer elements less than & equal comparisons simultaneously.
7029  *
7030  * **Description**:\n
7031  * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit
7032  * signed integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it is
7033  * true, the result is 0xFF; otherwise, the result is 0x0. The element comparison results are written to
7034  * Rd
7035  *
7036  * **Operations**:\n
7037  * ~~~
7038  * Rd.B[x] = (Rs1.B[x] {le} Rs2.B[x])? 0xff : 0x0;
7039  * for RV32: x=3...0,
7040  * for RV64: x=7...0
7041  * ~~~
7042  *
7043  * \param [in]  a    unsigned long type of value stored in a
7044  * \param [in]  b    unsigned long type of value stored in b
7045  * \return value stored in unsigned long type
7046  */
__RV_SCMPLE8(unsigned long a,unsigned long b)7047 __STATIC_FORCEINLINE unsigned long __RV_SCMPLE8(unsigned long a, unsigned long b)
7048 {
7049     register unsigned long result;
7050     __ASM volatile("scmple8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
7051     return result;
7052 }
7053 /* ===== Inline Function End for 3.96. SCMPLE8 ===== */
7054 
7055 /* ===== Inline Function Start for 3.97. SCMPLE16 ===== */
7056 /**
7057  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
7058  * \brief SCMPLE16 (SIMD 16-bit Signed Compare Less Than & Equal)
7059  * \details
7060  * **Type**: SIMD
7061  *
7062  * **Syntax**:\n
7063  * ~~~
7064  * SCMPLE16 Rd, Rs1, Rs2
7065  * ~~~
7066  *
7067  * **Purpose**:\n
7068  * Do 16-bit signed integer elements less than & equal comparisons simultaneously.
7069  *
7070  * **Description**:\n
7071  * This instruction compares the 16-bit signed integer elements in Rs1 with the 16-bit
7072  * signed integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it is
7073  * true, the result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are written
7074  * to Rd.
7075  *
7076  * **Operations**:\n
7077  * ~~~
7078  * Rd.H[x] = (Rs1.H[x] {le} Rs2.H[x])? 0xffff : 0x0;
7079  * for RV32: x=1...0,
7080  * for RV64: x=3...0
7081  * ~~~
7082  *
7083  * \param [in]  a    unsigned long type of value stored in a
7084  * \param [in]  b    unsigned long type of value stored in b
7085  * \return value stored in unsigned long type
7086  */
__RV_SCMPLE16(unsigned long a,unsigned long b)7087 __STATIC_FORCEINLINE unsigned long __RV_SCMPLE16(unsigned long a, unsigned long b)
7088 {
7089     register unsigned long result;
7090     __ASM volatile("scmple16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
7091     return result;
7092 }
7093 /* ===== Inline Function End for 3.97. SCMPLE16 ===== */
7094 
7095 /* ===== Inline Function Start for 3.98. SCMPLT8 ===== */
7096 /**
7097  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
7098  * \brief SCMPLT8 (SIMD 8-bit Signed Compare Less Than)
7099  * \details
7100  * **Type**: SIMD
7101  *
7102  * **Syntax**:\n
7103  * ~~~
7104  * SCMPLT8 Rd, Rs1, Rs2
7105  * ~~~
7106  *
7107  * **Purpose**:\n
7108  * Do 8-bit signed integer elements less than comparisons simultaneously.
7109  *
7110  * **Description**:\n
7111  * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit
7112  * signed integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the
7113  * result is 0xFF; otherwise, the result is 0x0. The element comparison results are written to Rd.
7114  *
7115  * **Operations**:\n
7116  * ~~~
7117  * Rd.B[x] = (Rs1.B[x] < Rs2.B[x])? 0xff : 0x0;
7118  * for RV32: x=3...0,
7119  * for RV64: x=7...0
7120  * ~~~
7121  *
7122  * \param [in]  a    unsigned long type of value stored in a
7123  * \param [in]  b    unsigned long type of value stored in b
7124  * \return value stored in unsigned long type
7125  */
__RV_SCMPLT8(unsigned long a,unsigned long b)7126 __STATIC_FORCEINLINE unsigned long __RV_SCMPLT8(unsigned long a, unsigned long b)
7127 {
7128     register unsigned long result;
7129     __ASM volatile("scmplt8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
7130     return result;
7131 }
7132 /* ===== Inline Function End for 3.98. SCMPLT8 ===== */
7133 
7134 /* ===== Inline Function Start for 3.99. SCMPLT16 ===== */
7135 /**
7136  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
7137  * \brief SCMPLT16 (SIMD 16-bit Signed Compare Less Than)
7138  * \details
7139  * **Type**: SIMD
7140  *
7141  * **Syntax**:\n
7142  * ~~~
7143  * SCMPLT16 Rd, Rs1, Rs2
7144  * ~~~
7145  *
7146  * **Purpose**:\n
7147  * Do 16-bit signed integer elements less than comparisons simultaneously.
7148  *
7149  * **Description**:\n
7150  * This instruction compares the 16-bit signed integer elements in Rs1 with the two 16-
7151  * bit signed integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the
7152  * result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are written to Rd.
7153  *
7154  * **Operations**:\n
7155  * ~~~
7156  * Rd.H[x] = (Rs1.H[x] < Rs2.H[x])? 0xffff : 0x0;
7157  * for RV32: x=1...0,
7158  * for RV64: x=3...0
7159  * ~~~
7160  *
7161  * \param [in]  a    unsigned long type of value stored in a
7162  * \param [in]  b    unsigned long type of value stored in b
7163  * \return value stored in unsigned long type
7164  */
__RV_SCMPLT16(unsigned long a,unsigned long b)7165 __STATIC_FORCEINLINE unsigned long __RV_SCMPLT16(unsigned long a, unsigned long b)
7166 {
7167     register unsigned long result;
7168     __ASM volatile("scmplt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
7169     return result;
7170 }
7171 /* ===== Inline Function End for 3.99. SCMPLT16 ===== */
7172 
7173 /* ===== Inline Function Start for 3.100. SLL8 ===== */
7174 /**
7175  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
7176  * \brief SLL8 (SIMD 8-bit Shift Left Logical)
7177  * \details
7178  * **Type**: SIMD
7179  *
7180  * **Syntax**:\n
7181  * ~~~
7182  * SLL8 Rd, Rs1, Rs2
7183  * ~~~
7184  *
7185  * **Purpose**:\n
7186  * Do 8-bit elements logical left shift operations simultaneously. The shift amount is a
7187  * variable from a GPR.
7188  *
7189  * **Description**:\n
7190  * The 8-bit elements in Rs1 are left-shifted logically. And the results are written to Rd.
7191  * The shifted out bits are filled with zero and the shift amount is specified by the low-order 3-bits of
7192  * the value in the Rs2 register.
7193  *
7194  * **Operations**:\n
7195  * ~~~
7196  * sa = Rs2[2:0];
7197  * Rd.B[x] = Rs1.B[x] << sa;
7198  * for RV32: x=3...0,
7199  * for RV64: x=7...0
7200  * ~~~
7201  *
7202  * \param [in]  a    unsigned long type of value stored in a
7203  * \param [in]  b    unsigned int type of value stored in b
7204  * \return value stored in unsigned long type
7205  */
__RV_SLL8(unsigned long a,unsigned int b)7206 __STATIC_FORCEINLINE unsigned long __RV_SLL8(unsigned long a, unsigned int b)
7207 {
7208     register unsigned long result;
7209     __ASM volatile("sll8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
7210     return result;
7211 }
7212 /* ===== Inline Function End for 3.100. SLL8 ===== */
7213 
7214 /* ===== Inline Function Start for 3.101. SLLI8 ===== */
7215 /**
7216  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
7217  * \brief SLLI8 (SIMD 8-bit Shift Left Logical Immediate)
7218  * \details
7219  * **Type**: SIMD
7220  *
7221  * **Syntax**:\n
7222  * ~~~
7223  * SLLI8 Rd, Rs1, imm3u
7224  * ~~~
7225  *
7226  * **Purpose**:\n
7227  * Do 8-bit elements logical left shift operations simultaneously. The shift amount is an
7228  * immediate value.
7229  *
7230  * **Description**:\n
7231  * The 8-bit elements in Rs1 are left-shifted logically. And the results are written to Rd.
7232  * The shifted out bits are filled with zero and the shift amount is specified by the imm3u constant.
7233  *
7234  * **Operations**:\n
7235  * ~~~
7236  * sa = imm3u[2:0];
7237  * Rd.B[x] = Rs1.B[x] << sa;
7238  * for RV32: x=3...0,
7239  * for RV64: x=7...0
7240  * ~~~
7241  *
7242  * \param [in]  a    unsigned long type of value stored in a
7243  * \param [in]  b    unsigned int type of value stored in b
7244  * \return value stored in unsigned long type
7245  */
7246 #define __RV_SLLI8(a, b)    \
7247     ({    \
7248         register unsigned long result;    \
7249         register unsigned long __a = (unsigned long)(a);    \
7250         __ASM volatile("slli8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
7251         result;    \
7252     })
7253 /* ===== Inline Function End for 3.101. SLLI8 ===== */
7254 
7255 /* ===== Inline Function Start for 3.102. SLL16 ===== */
7256 /**
7257  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
7258  * \brief SLL16 (SIMD 16-bit Shift Left Logical)
7259  * \details
7260  * **Type**: SIMD
7261  *
7262  * **Syntax**:\n
7263  * ~~~
7264  * SLL16 Rd, Rs1, Rs2
7265  * ~~~
7266  *
7267  * **Purpose**:\n
7268  * Do 16-bit elements logical left shift operations simultaneously. The shift amount is a
7269  * variable from a GPR.
7270  *
7271  * **Description**:\n
7272  * The 16-bit elements in Rs1 are left-shifted logically. And the results are written to Rd.
7273  * The shifted out bits are filled with zero and the shift amount is specified by the low-order 4-bits of
7274  * the value in the Rs2 register.
7275  *
7276  * **Operations**:\n
7277  * ~~~
7278  * sa = Rs2[3:0];
7279  * Rd.H[x] = Rs1.H[x] << sa;
7280  * for RV32: x=1...0,
7281  * for RV64: x=3...0
7282  * ~~~
7283  *
7284  * \param [in]  a    unsigned long type of value stored in a
7285  * \param [in]  b    unsigned int type of value stored in b
7286  * \return value stored in unsigned long type
7287  */
__RV_SLL16(unsigned long a,unsigned int b)7288 __STATIC_FORCEINLINE unsigned long __RV_SLL16(unsigned long a, unsigned int b)
7289 {
7290     register unsigned long result;
7291     __ASM volatile("sll16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
7292     return result;
7293 }
7294 /* ===== Inline Function End for 3.102. SLL16 ===== */
7295 
7296 /* ===== Inline Function Start for 3.103. SLLI16 ===== */
7297 /**
7298  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
7299  * \brief SLLI16 (SIMD 16-bit Shift Left Logical Immediate)
7300  * \details
7301  * **Type**: SIMD
7302  *
7303  * **Syntax**:\n
7304  * ~~~
7305  * SLLI16 Rd, Rs1, imm4[3:0]
7306  * ~~~
7307  *
7308  * **Purpose**:\n
7309  * Do 16-bit element logical left shift operations simultaneously. The shift amount is an
7310  * immediate value.
7311  *
7312  * **Description**:\n
7313  * The 16-bit elements in Rs1 are left-shifted logically. The shifted out bits are filled with
7314  * zero and the shift amount is specified by the imm4[3:0] constant. And the results are written to Rd.
7315  *
7316  * **Operations**:\n
7317  * ~~~
7318  * sa = imm4[3:0];
7319  * Rd.H[x] = Rs1.H[x] << sa;
7320  * for RV32: x=1...0,
7321  * for RV64: x=3...0
7322  * ~~~
7323  *
7324  * \param [in]  a    unsigned long type of value stored in a
7325  * \param [in]  b    unsigned int type of value stored in b
7326  * \return value stored in unsigned long type
7327  */
7328 #define __RV_SLLI16(a, b)    \
7329     ({    \
7330         register unsigned long result;    \
7331         register unsigned long __a = (unsigned long)(a);    \
7332         __ASM volatile("slli16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
7333         result;    \
7334     })
7335 /* ===== Inline Function End for 3.103. SLLI16 ===== */
7336 
7337 /* ===== Inline Function Start for 3.104. SMAL ===== */
7338 /**
7339  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
7340  * \brief SMAL (Signed Multiply Halfs & Add 64-bit)
7341  * \details
7342  * **Type**: Partial-SIMD
7343  *
7344  * **Syntax**:\n
7345  * ~~~
7346  * SMAL Rd, Rs1, Rs2
7347  * ~~~
7348  *
7349  * **Purpose**:\n
7350  * Multiply the signed bottom 16-bit content of the 32-bit elements of a register with the top
7351  * 16-bit content of the same 32-bit elements of the same register, and add the results with a 64-bit
7352  * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back
7353  * to another even/odd pair of registers (RV32) or a register (RV64).
7354  *
7355  * **RV32 Description**:\n
7356  * This instruction multiplies the bottom 16-bit content of the lower 32-bit of Rs2 with the top 16-bit
7357  * content of the lower 32-bit of Rs2 and adds the result with the 64-bit value of an even/odd pair of
7358  * registers specified by Rs1(4,1). The 64-bit addition result is written back to an even/odd pair of
7359  * registers specified by Rd(4,1). The 16-bit values of Rs2, and the 64-bit value of the Rs1(4,1) register-
7360  * pair are treated as signed integers.
7361  * Rx(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
7362  * includes register 2d and 2d+1.
7363  * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
7364  * register of the pair contains the low 32-bit of the operand.
7365  *
7366  * **RV64 Description**:\n
7367  * This instruction multiplies the bottom 16-bit content of the 32-bit elements of Rs2 with the top 16-bit
7368  * content of the same 32-bit elements of Rs2 and adds the results with the 64-bit value of Rs1. The 64-
7369  * bit addition result is written back to Rd. The 16-bit values of Rs2, and the 64-bit value of Rs1 are
7370  * treated as signed integers.
7371  *
7372  * **Operations**:\n
7373  * ~~~
7374  * RV32:
7375  * Mres[31:0] = Rs2.H[1] * Rs2.H[0];
7376  * Idx0 = CONCAT(Rs1(4,1),1'b0); Idx1 = CONCAT(Rs1(4,1),1'b1); +
7377  * Idx2 = CONCAT(Rd(4,1),1'b0); Idx3 = CONCAT(Rd(4,1),1'b1);
7378  * R[Idx3].R[Idx2] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
7379  * RV64:
7380  * Mres[0][31:0] = Rs2.W[0].H[1] * Rs2.W[0].H[0];
7381  * Mres[1][31:0] = Rs2.W[1].H[1] * Rs2.W[1].H[0];
7382  * Rd = Rs1 + SE64(Mres[1][31:0]) + SE64(Mres[0][31:0]);
7383  * ~~~
7384  *
7385  * \param [in]  a    long long type of value stored in a
7386  * \param [in]  b    unsigned long type of value stored in b
7387  * \return value stored in long long type
7388  */
__RV_SMAL(long long a,unsigned long b)7389 __STATIC_FORCEINLINE long long __RV_SMAL(long long a, unsigned long b)
7390 {
7391     register long long result;
7392     __ASM volatile("smal %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
7393     return result;
7394 }
7395 /* ===== Inline Function End for 3.104. SMAL ===== */
7396 
7397 /* ===== Inline Function Start for 3.105.1. SMALBB ===== */
7398 /**
7399  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
7400  * \brief SMALBB (Signed Multiply Bottom Halfs & Add 64-bit)
7401  * \details
7402  * **Type**: DSP (64-bit Profile)
7403  *
7404  * **Syntax**:\n
7405  * ~~~
7406  * SMALBB Rd, Rs1, Rs2
7407  * SMALBT Rd, Rs1, Rs2
7408  * SMALTT Rd, Rs1, Rs2
7409  * ~~~
7410  *
7411  * **Purpose**:\n
7412  * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit
7413  * content of the corresponding 32-bit elements of another register and add the results with a 64-bit
7414  * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back
7415  * to the register-pair (RV32) or the register (RV64).
7416  * * SMALBB: rt pair + bottom*bottom (all 32-bit elements)
7417  * * SMALBT rt pair + bottom*top (all 32-bit elements)
7418  * * SMALTT rt pair + top*top (all 32-bit elements)
7419  *
7420  * **RV32 Description**:\n
7421  * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
7422  * content of Rs2.
7423  * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
7424  * content of Rs2.
7425  * For the `SMALTT` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
7426  * of Rs2.
7427  * The multiplication result is added with the 64-bit value of an even/odd pair of registers specified by
7428  * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
7429  * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
7430  * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
7431  * includes register 2d and 2d+1.
7432  * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
7433  * register of the pair contains the low 32-bit of the operand.
7434  *
7435  * **RV64 Description**:\n
7436  * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
7437  * with the bottom 16-bit content of the 32-bit elements of Rs2.
7438  * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
7439  * with the top 16-bit content of the 32-bit elements of Rs2.
7440  * For the `SMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
7441  * the top 16-bit content of the 32-bit elements of Rs2.
7442  * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written
7443  * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
7444  * integers.
7445  *
7446  * **Operations**:\n
7447  * ~~~
7448  * RV32:
7449  * Mres[31:0] = Rs1.H[0] * Rs2.H[0]; // SMALBB
7450  * Mres[31:0] = Rs1.H[0] * Rs2.H[1]; // SMALBT
7451  * Mres[31:0] = Rs1.H[1] * Rs2.H[1]; // SMALTT
7452  * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
7453  * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
7454  * RV64:
7455  * // SMALBB
7456  * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0];
7457  * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0];
7458  * // SMALBT
7459  * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1];
7460  * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1];
7461  * // SMALTT
7462  * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1];
7463  * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1];
7464  * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
7465  * ~~~
7466  *
7467  * \param [in]  t    long long type of value stored in t
7468  * \param [in]  a    unsigned long type of value stored in a
7469  * \param [in]  b    unsigned long type of value stored in b
7470  * \return value stored in long long type
7471  */
__RV_SMALBB(long long t,unsigned long a,unsigned long b)7472 __STATIC_FORCEINLINE long long __RV_SMALBB(long long t, unsigned long a, unsigned long b)
7473 {
7474     __ASM volatile("smalbb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
7475     return t;
7476 }
7477 /* ===== Inline Function End for 3.105.1. SMALBB ===== */
7478 
7479 /* ===== Inline Function Start for 3.105.2. SMALBT ===== */
7480 /**
7481  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
7482  * \brief SMALBT (Signed Multiply Bottom Half & Top Half & Add 64-bit)
7483  * \details
7484  * **Type**: DSP (64-bit Profile)
7485  *
7486  * **Syntax**:\n
7487  * ~~~
7488  * SMALBB Rd, Rs1, Rs2
7489  * SMALBT Rd, Rs1, Rs2
7490  * SMALTT Rd, Rs1, Rs2
7491  * ~~~
7492  *
7493  * **Purpose**:\n
7494  * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit
7495  * content of the corresponding 32-bit elements of another register and add the results with a 64-bit
7496  * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back
7497  * to the register-pair (RV32) or the register (RV64).
7498  * * SMALBB: rt pair + bottom*bottom (all 32-bit elements)
7499  * * SMALBT rt pair + bottom*top (all 32-bit elements)
7500  * * SMALTT rt pair + top*top (all 32-bit elements)
7501  *
7502  * **RV32 Description**:\n
7503  * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
7504  * content of Rs2.
7505  * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
7506  * content of Rs2.
7507  * For the `SMALTT` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
7508  * of Rs2.
7509  * The multiplication result is added with the 64-bit value of an even/odd pair of registers specified by
7510  * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
7511  * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
7512  * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
7513  * includes register 2d and 2d+1.
7514  * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
7515  * register of the pair contains the low 32-bit of the operand.
7516  *
7517  * **RV64 Description**:\n
7518  * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
7519  * with the bottom 16-bit content of the 32-bit elements of Rs2.
7520  * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
7521  * with the top 16-bit content of the 32-bit elements of Rs2.
7522  * For the `SMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
7523  * the top 16-bit content of the 32-bit elements of Rs2.
7524  * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written
7525  * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
7526  * integers.
7527  *
7528  * **Operations**:\n
7529  * ~~~
7530  * RV32:
7531  * Mres[31:0] = Rs1.H[0] * Rs2.H[0]; // SMALBB
7532  * Mres[31:0] = Rs1.H[0] * Rs2.H[1]; // SMALBT
7533  * Mres[31:0] = Rs1.H[1] * Rs2.H[1]; // SMALTT
7534  * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
7535  * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
7536  * RV64:
7537  * // SMALBB
7538  * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0];
7539  * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0];
7540  * // SMALBT
7541  * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1];
7542  * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1];
7543  * // SMALTT
7544  * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1];
7545  * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1];
7546  * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
7547  * ~~~
7548  *
7549  * \param [in]  t    long long type of value stored in t
7550  * \param [in]  a    unsigned long type of value stored in a
7551  * \param [in]  b    unsigned long type of value stored in b
7552  * \return value stored in long long type
7553  */
__RV_SMALBT(long long t,unsigned long a,unsigned long b)7554 __STATIC_FORCEINLINE long long __RV_SMALBT(long long t, unsigned long a, unsigned long b)
7555 {
7556     __ASM volatile("smalbt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
7557     return t;
7558 }
7559 /* ===== Inline Function End for 3.105.2. SMALBT ===== */
7560 
7561 /* ===== Inline Function Start for 3.105.3. SMALTT ===== */
7562 /**
7563  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
7564  * \brief SMALTT (Signed Multiply Top Halfs & Add 64-bit)
7565  * \details
7566  * **Type**: DSP (64-bit Profile)
7567  *
7568  * **Syntax**:\n
7569  * ~~~
7570  * SMALBB Rd, Rs1, Rs2
7571  * SMALBT Rd, Rs1, Rs2
7572  * SMALTT Rd, Rs1, Rs2
7573  * ~~~
7574  *
7575  * **Purpose**:\n
7576  * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit
7577  * content of the corresponding 32-bit elements of another register and add the results with a 64-bit
7578  * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back
7579  * to the register-pair (RV32) or the register (RV64).
7580  * * SMALBB: rt pair + bottom*bottom (all 32-bit elements)
7581  * * SMALBT rt pair + bottom*top (all 32-bit elements)
7582  * * SMALTT rt pair + top*top (all 32-bit elements)
7583  *
7584  * **RV32 Description**:\n
7585  * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
7586  * content of Rs2.
7587  * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
7588  * content of Rs2.
7589  * For the `SMALTT` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
7590  * of Rs2.
7591  * The multiplication result is added with the 64-bit value of an even/odd pair of registers specified by
7592  * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
7593  * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
7594  * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
7595  * includes register 2d and 2d+1.
7596  * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
7597  * register of the pair contains the low 32-bit of the operand.
7598  *
7599  * **RV64 Description**:\n
7600  * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
7601  * with the bottom 16-bit content of the 32-bit elements of Rs2.
7602  * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
7603  * with the top 16-bit content of the 32-bit elements of Rs2.
7604  * For the `SMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
7605  * the top 16-bit content of the 32-bit elements of Rs2.
7606  * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written
7607  * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
7608  * integers.
7609  *
7610  * **Operations**:\n
7611  * ~~~
7612  * RV32:
7613  * Mres[31:0] = Rs1.H[0] * Rs2.H[0]; // SMALBB
7614  * Mres[31:0] = Rs1.H[0] * Rs2.H[1]; // SMALBT
7615  * Mres[31:0] = Rs1.H[1] * Rs2.H[1]; // SMALTT
7616  * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
7617  * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
7618  * RV64:
7619  * // SMALBB
7620  * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0];
7621  * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0];
7622  * // SMALBT
7623  * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1];
7624  * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1];
7625  * // SMALTT
7626  * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1];
7627  * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1];
7628  * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
7629  * ~~~
7630  *
7631  * \param [in]  t    long long type of value stored in t
7632  * \param [in]  a    unsigned long type of value stored in a
7633  * \param [in]  b    unsigned long type of value stored in b
7634  * \return value stored in long long type
7635  */
__RV_SMALTT(long long t,unsigned long a,unsigned long b)7636 __STATIC_FORCEINLINE long long __RV_SMALTT(long long t, unsigned long a, unsigned long b)
7637 {
7638     __ASM volatile("smaltt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
7639     return t;
7640 }
7641 /* ===== Inline Function End for 3.105.3. SMALTT ===== */
7642 
7643 /* ===== Inline Function Start for 3.106.1. SMALDA ===== */
7644 /**
7645  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
7646  * \brief SMALDA (Signed Multiply Two Halfs and Two Adds 64-bit)
7647  * \details
7648  * **Type**: DSP (64-bit Profile)
7649  *
7650  * **Syntax**:\n
7651  * ~~~
7652  * SMALDA Rd, Rs1, Rs2
7653  * SMALXDA Rd, Rs1, Rs2
7654  * ~~~
7655  *
7656  * **Purpose**:\n
7657  * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
7658  * adds the two 32-bit results and the 64-bit value of an even/odd pair of registers together.
7659  * * SMALDA: rt pair+ top*top + bottom*bottom (all 32-bit elements)
7660  * * SMALXDA: rt pair+ top*bottom + bottom*top (all 32-bit elements)
7661  *
7662  * **RV32 Description**:\n
7663  * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
7664  * content of Rs2 and then adds the result to the result of multiplying the top 16-bit content of Rs1 with
7665  * the top 16-bit content of Rs2 with unlimited precision.
7666  * For the `SMALXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit
7667  * content of Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of Rs1
7668  * with the top 16-bit content of Rs2 with unlimited precision.
7669  * The result is added to the 64-bit value of an even/odd pair of registers specified by Rd(4,1). The 64-
7670  * bit addition result is written back to the register-pair. The 16-bit values of Rs1 and Rs2, and the 64-
7671  * bit value of the register-pair are treated as signed integers.
7672  * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
7673  * includes register 2d and 2d+1.
7674  * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
7675  * register of the pair contains the low 32-bit of the operand.
7676  *
7677  * **RV64 Description**:\n
7678  * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
7679  * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
7680  * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-
7681  * bit elements of Rs2 with unlimited precision.
7682  * For the `SMALXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1
7683  * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
7684  * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
7685  * 32-bit elements of Rs2 with unlimited precision.
7686  * The results are added to the 64-bit value of Rd. The 64-bit addition result is written back to Rd. The
7687  * 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed integers.
7688  *
7689  * **Operations**:\n
7690  * ~~~
7691  * RV32:
7692  * // SMALDA
7693  * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]);
7694  * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]);
7695  * // SMALXDA
7696  * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]);
7697  * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]);
7698  * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
7699  * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres0[31:0]) + SE64(Mres1[31:0]);
7700  * RV64:
7701  * // SMALDA
7702  * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
7703  * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
7704  * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
7705  * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
7706  * // SMALXDA
7707  * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
7708  * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
7709  * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
7710  * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
7711  * Rd = Rd + SE64(Mres0[0][31:0]) + SE64(Mres1[0][31:0]) + SE64(Mres0[1][31:0]) +
7712  * SE64(Mres1[1][31:0]);
7713  * ~~~
7714  *
7715  * \param [in]  t    long long type of value stored in t
7716  * \param [in]  a    unsigned long type of value stored in a
7717  * \param [in]  b    unsigned long type of value stored in b
7718  * \return value stored in long long type
7719  */
__RV_SMALDA(long long t,unsigned long a,unsigned long b)7720 __STATIC_FORCEINLINE long long __RV_SMALDA(long long t, unsigned long a, unsigned long b)
7721 {
7722     __ASM volatile("smalda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
7723     return t;
7724 }
7725 /* ===== Inline Function End for 3.106.1. SMALDA ===== */
7726 
7727 /* ===== Inline Function Start for 3.106.2. SMALXDA ===== */
7728 /**
7729  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
7730  * \brief SMALXDA (Signed Crossed Multiply Two Halfs and Two Adds 64-bit)
7731  * \details
7732  * **Type**: DSP (64-bit Profile)
7733  *
7734  * **Syntax**:\n
7735  * ~~~
7736  * SMALDA Rd, Rs1, Rs2
7737  * SMALXDA Rd, Rs1, Rs2
7738  * ~~~
7739  *
7740  * **Purpose**:\n
7741  * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
7742  * adds the two 32-bit results and the 64-bit value of an even/odd pair of registers together.
7743  * * SMALDA: rt pair+ top*top + bottom*bottom (all 32-bit elements)
7744  * * SMALXDA: rt pair+ top*bottom + bottom*top (all 32-bit elements)
7745  *
7746  * **RV32 Description**:\n
7747  * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
7748  * content of Rs2 and then adds the result to the result of multiplying the top 16-bit content of Rs1 with
7749  * the top 16-bit content of Rs2 with unlimited precision.
7750  * For the `SMALXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit
7751  * content of Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of Rs1
7752  * with the top 16-bit content of Rs2 with unlimited precision.
7753  * The result is added to the 64-bit value of an even/odd pair of registers specified by Rd(4,1). The 64-
7754  * bit addition result is written back to the register-pair. The 16-bit values of Rs1 and Rs2, and the 64-
7755  * bit value of the register-pair are treated as signed integers.
7756  * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
7757  * includes register 2d and 2d+1.
7758  * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
7759  * register of the pair contains the low 32-bit of the operand.
7760  *
7761  * **RV64 Description**:\n
7762  * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
7763  * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
7764  * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-
7765  * bit elements of Rs2 with unlimited precision.
7766  * For the `SMALXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1
7767  * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
7768  * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
7769  * 32-bit elements of Rs2 with unlimited precision.
7770  * The results are added to the 64-bit value of Rd. The 64-bit addition result is written back to Rd. The
7771  * 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed integers.
7772  *
7773  * **Operations**:\n
7774  * ~~~
7775  * RV32:
7776  * // SMALDA
7777  * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]);
7778  * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]);
7779  * // SMALXDA
7780  * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]);
7781  * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]);
7782  * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
7783  * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres0[31:0]) + SE64(Mres1[31:0]);
7784  * RV64:
7785  * // SMALDA
7786  * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
7787  * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
7788  * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
7789  * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
7790  * // SMALXDA
7791  * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
7792  * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
7793  * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
7794  * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
7795  * Rd = Rd + SE64(Mres0[0][31:0]) + SE64(Mres1[0][31:0]) + SE64(Mres0[1][31:0]) +
7796  * SE64(Mres1[1][31:0]);
7797  * ~~~
7798  *
7799  * \param [in]  t    long long type of value stored in t
7800  * \param [in]  a    unsigned long type of value stored in a
7801  * \param [in]  b    unsigned long type of value stored in b
7802  * \return value stored in long long type
7803  */
__RV_SMALXDA(long long t,unsigned long a,unsigned long b)7804 __STATIC_FORCEINLINE long long __RV_SMALXDA(long long t, unsigned long a, unsigned long b)
7805 {
7806     __ASM volatile("smalxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
7807     return t;
7808 }
7809 /* ===== Inline Function End for 3.106.2. SMALXDA ===== */
7810 
7811 /* ===== Inline Function Start for 3.107.1. SMALDS ===== */
7812 /**
7813  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
7814  * \brief SMALDS (Signed Multiply Two Halfs & Subtract & Add 64-bit)
7815  * \details
7816  * **Type**: DSP (64-bit Profile)
7817  *
7818  * **Syntax**:\n
7819  * ~~~
7820  * SMALDS Rd, Rs1, Rs2
7821  * SMALDRS Rd, Rs1, Rs2
7822  * SMALXDS Rd, Rs1, Rs2
7823  * ~~~
7824  *
7825  * **Purpose**:\n
7826  * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
7827  * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
7828  * the 64-bit value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is
7829  * written back to the register-pair.
7830  * * SMALDS: rt pair + (top*top - bottom*bottom) (all 32-bit elements)
7831  * * SMALDRS: rt pair + (bottom*bottom - top*top) (all 32-bit elements)
7832  * * SMALXDS: rt pair + (top*bottom - bottom*top) (all 32-bit elements)
7833  *
7834  * **RV32 Description**:\n
7835  * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
7836  * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
7837  * Rs1 with the top 16-bit content of Rs2.
7838  * For the `SMALDRS` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
7839  * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1
7840  * with the bottom 16-bit content of Rs2.
7841  * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
7842  * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
7843  * Rs1 with the bottom 16-bit content of Rs2.
7844  * The subtraction result is then added to the 64-bit value of an even/odd pair of registers specified by
7845  * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
7846  * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
7847  * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
7848  * includes register 2d and 2d+1.
7849  * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
7850  * register of the pair contains the low 32-bit of the operand.
7851  *
7852  * **RV64 Description**:\n
7853  * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
7854  * with the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
7855  * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content
7856  * of the 32-bit elements of Rs2.
7857  * For the `SMALDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
7858  * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
7859  * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
7860  * the 32-bit elements of Rs2.
7861  * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
7862  * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
7863  * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
7864  * content of the 32-bit elements of Rs2.
7865  * The subtraction results are then added to the 64-bit value of Rd. The 64-bit addition result is written
7866  * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
7867  * integers.
7868  *
7869  * **Operations**:\n
7870  * ~~~
7871  * * RV32:
7872  * Mres[31:0] = (Rs1.H[1] * Rs2.H[1]) - (Rs1.H[0] * Rs2.H[0]); // SMALDS
7873  * Mres[31:0] = (Rs1.H[0] * Rs2.H[0]) - (Rs1.H[1] * Rs2.H[1]); // SMALDRS
7874  * Mres[31:0] = (Rs1.H[1] * Rs2.H[0]) - (Rs1.H[0] * Rs2.H[1]); // SMALXDS
7875  * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
7876  * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
7877  * * RV64:
7878  * // SMALDS
7879  * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]) - (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
7880  * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[1]) - (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
7881  * // SMALDRS
7882  * Mres[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]) - (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
7883  * Mres[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[0].H[0]) - (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
7884  * // SMALXDS
7885  * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]) - (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
7886  * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[0]) - (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
7887  * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
7888  * ~~~
7889  *
7890  * \param [in]  t    long long type of value stored in t
7891  * \param [in]  a    unsigned long type of value stored in a
7892  * \param [in]  b    unsigned long type of value stored in b
7893  * \return value stored in long long type
7894  */
__RV_SMALDS(long long t,unsigned long a,unsigned long b)7895 __STATIC_FORCEINLINE long long __RV_SMALDS(long long t, unsigned long a, unsigned long b)
7896 {
7897     __ASM volatile("smalds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
7898     return t;
7899 }
7900 /* ===== Inline Function End for 3.107.1. SMALDS ===== */
7901 
7902 /* ===== Inline Function Start for 3.107.2. SMALDRS ===== */
7903 /**
7904  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
7905  * \brief SMALDRS (Signed Multiply Two Halfs & Reverse Subtract & Add 64- bit)
7906  * \details
7907  * **Type**: DSP (64-bit Profile)
7908  *
7909  * **Syntax**:\n
7910  * ~~~
7911  * SMALDS Rd, Rs1, Rs2
7912  * SMALDRS Rd, Rs1, Rs2
7913  * SMALXDS Rd, Rs1, Rs2
7914  * ~~~
7915  *
7916  * **Purpose**:\n
7917  * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
7918  * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
7919  * the 64-bit value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is
7920  * written back to the register-pair.
7921  * * SMALDS: rt pair + (top*top - bottom*bottom) (all 32-bit elements)
7922  * * SMALDRS: rt pair + (bottom*bottom - top*top) (all 32-bit elements)
7923  * * SMALXDS: rt pair + (top*bottom - bottom*top) (all 32-bit elements)
7924  *
7925  * **RV32 Description**:\n
7926  * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
7927  * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
7928  * Rs1 with the top 16-bit content of Rs2.
7929  * For the `SMALDRS` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
7930  * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1
7931  * with the bottom 16-bit content of Rs2.
7932  * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
7933  * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
7934  * Rs1 with the bottom 16-bit content of Rs2.
7935  * The subtraction result is then added to the 64-bit value of an even/odd pair of registers specified by
7936  * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
7937  * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
7938  * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
7939  * includes register 2d and 2d+1.
7940  * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
7941  * register of the pair contains the low 32-bit of the operand.
7942  *
7943  * **RV64 Description**:\n
7944  * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
7945  * with the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
7946  * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content
7947  * of the 32-bit elements of Rs2.
7948  * For the `SMALDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
7949  * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
7950  * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
7951  * the 32-bit elements of Rs2.
7952  * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
7953  * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
7954  * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
7955  * content of the 32-bit elements of Rs2.
7956  * The subtraction results are then added to the 64-bit value of Rd. The 64-bit addition result is written
7957  * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
7958  * integers.
7959  *
7960  * **Operations**:\n
7961  * ~~~
7962  * * RV32:
7963  * Mres[31:0] = (Rs1.H[1] * Rs2.H[1]) - (Rs1.H[0] * Rs2.H[0]); // SMALDS
7964  * Mres[31:0] = (Rs1.H[0] * Rs2.H[0]) - (Rs1.H[1] * Rs2.H[1]); // SMALDRS
7965  * Mres[31:0] = (Rs1.H[1] * Rs2.H[0]) - (Rs1.H[0] * Rs2.H[1]); // SMALXDS
7966  * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
7967  * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
7968  * * RV64:
7969  * // SMALDS
7970  * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]) - (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
7971  * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[1]) - (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
7972  * // SMALDRS
7973  * Mres[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]) - (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
7974  * Mres[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[0].H[0]) - (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
7975  * // SMALXDS
7976  * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]) - (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
7977  * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[0]) - (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
7978  * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
7979  * ~~~
7980  *
7981  * \param [in]  t    long long type of value stored in t
7982  * \param [in]  a    unsigned long type of value stored in a
7983  * \param [in]  b    unsigned long type of value stored in b
7984  * \return value stored in long long type
7985  */
__RV_SMALDRS(long long t,unsigned long a,unsigned long b)7986 __STATIC_FORCEINLINE long long __RV_SMALDRS(long long t, unsigned long a, unsigned long b)
7987 {
7988     __ASM volatile("smaldrs %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
7989     return t;
7990 }
7991 /* ===== Inline Function End for 3.107.2. SMALDRS ===== */
7992 
7993 /* ===== Inline Function Start for 3.107.3. SMALXDS ===== */
7994 /**
7995  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
7996  * \brief SMALXDS (Signed Crossed Multiply Two Halfs & Subtract & Add 64- bit)
7997  * \details
7998  * **Type**: DSP (64-bit Profile)
7999  *
8000  * **Syntax**:\n
8001  * ~~~
8002  * SMALDS Rd, Rs1, Rs2
8003  * SMALDRS Rd, Rs1, Rs2
8004  * SMALXDS Rd, Rs1, Rs2
8005  * ~~~
8006  *
8007  * **Purpose**:\n
8008  * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
8009  * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
8010  * the 64-bit value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is
8011  * written back to the register-pair.
8012  * * SMALDS: rt pair + (top*top - bottom*bottom) (all 32-bit elements)
8013  * * SMALDRS: rt pair + (bottom*bottom - top*top) (all 32-bit elements)
8014  * * SMALXDS: rt pair + (top*bottom - bottom*top) (all 32-bit elements)
8015  *
8016  * **RV32 Description**:\n
8017  * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
8018  * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
8019  * Rs1 with the top 16-bit content of Rs2.
8020  * For the `SMALDRS` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
8021  * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1
8022  * with the bottom 16-bit content of Rs2.
8023  * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
8024  * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
8025  * Rs1 with the bottom 16-bit content of Rs2.
8026  * The subtraction result is then added to the 64-bit value of an even/odd pair of registers specified by
8027  * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
8028  * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
8029  * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
8030  * includes register 2d and 2d+1.
8031  * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
8032  * register of the pair contains the low 32-bit of the operand.
8033  *
8034  * **RV64 Description**:\n
8035  * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
8036  * with the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
8037  * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content
8038  * of the 32-bit elements of Rs2.
8039  * For the `SMALDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
8040  * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
8041  * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
8042  * the 32-bit elements of Rs2.
8043  * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
8044  * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
8045  * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
8046  * content of the 32-bit elements of Rs2.
8047  * The subtraction results are then added to the 64-bit value of Rd. The 64-bit addition result is written
8048  * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
8049  * integers.
8050  *
8051  * **Operations**:\n
8052  * ~~~
8053  * * RV32:
8054  * Mres[31:0] = (Rs1.H[1] * Rs2.H[1]) - (Rs1.H[0] * Rs2.H[0]); // SMALDS
8055  * Mres[31:0] = (Rs1.H[0] * Rs2.H[0]) - (Rs1.H[1] * Rs2.H[1]); // SMALDRS
8056  * Mres[31:0] = (Rs1.H[1] * Rs2.H[0]) - (Rs1.H[0] * Rs2.H[1]); // SMALXDS
8057  * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
8058  * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
8059  * * RV64:
8060  * // SMALDS
8061  * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]) - (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
8062  * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[1]) - (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
8063  * // SMALDRS
8064  * Mres[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]) - (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
8065  * Mres[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[0].H[0]) - (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
8066  * // SMALXDS
8067  * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]) - (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
8068  * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[0]) - (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
8069  * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
8070  * ~~~
8071  *
8072  * \param [in]  t    long long type of value stored in t
8073  * \param [in]  a    unsigned long type of value stored in a
8074  * \param [in]  b    unsigned long type of value stored in b
8075  * \return value stored in long long type
8076  */
__RV_SMALXDS(long long t,unsigned long a,unsigned long b)8077 __STATIC_FORCEINLINE long long __RV_SMALXDS(long long t, unsigned long a, unsigned long b)
8078 {
8079     __ASM volatile("smalxds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
8080     return t;
8081 }
8082 /* ===== Inline Function End for 3.107.3. SMALXDS ===== */
8083 
8084 /* ===== Inline Function Start for 3.108. SMAR64 ===== */
8085 /**
8086  * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
8087  * \brief SMAR64 (Signed Multiply and Add to 64-Bit Data)
8088  * \details
8089  * **Type**: DSP (64-bit Profile)
8090  *
8091  * **Syntax**:\n
8092  * ~~~
8093  * SMAR64 Rd, Rs1, Rs2
8094  * ~~~
8095  *
8096  * **Purpose**:\n
8097  * Multiply the 32-bit signed elements in two registers and add the 64-bit multiplication
8098  * result to the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is written
8099  * back to the pair of registers (RV32) or a register (RV64).
8100  *
8101  * **RV32 Description**:\n
8102  * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It adds
8103  * the 64-bit multiplication result to the 64-bit signed data of an even/odd pair of registers specified by
8104  * Rd(4,1). The addition result is written back to the even/odd pair of registers specified by Rd(4,1).
8105  * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
8106  * includes register 2d and 2d+1.
8107  * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
8108  * of the pair contains the low 32-bit of the result.
8109  *
8110  * **RV64 Description**:\n
8111  * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It
8112  * adds the 64-bit multiplication results to the 64-bit signed data of Rd. The addition result is written
8113  * back to Rd.
8114  *
8115  * **Operations**:\n
8116  * ~~~
8117  * * RV32:
8118  * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
8119  * R[t_H].R[t_L] = R[t_H].R[t_L] + (Rs1 * Rs2);
8120  * * RV64:
8121  * Rd = Rd + (Rs1.W[0] * Rs2.W[0]) + (Rs1.W[1] * Rs2.W[1]);
8122  * ~~~
8123  *
8124  * \param [in]  t    long long type of value stored in t
8125  * \param [in]  a    long type of value stored in a
8126  * \param [in]  b    long type of value stored in b
8127  * \return value stored in long long type
8128  */
__RV_SMAR64(long long t,long a,long b)8129 __STATIC_FORCEINLINE long long __RV_SMAR64(long long t, long a, long b)
8130 {
8131     __ASM volatile("smar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
8132     return t;
8133 }
8134 /* ===== Inline Function End for 3.108. SMAR64 ===== */
8135 
8136 /* ===== Inline Function Start for 3.109. SMAQA ===== */
8137 /**
8138  * \ingroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD
8139  * \brief SMAQA (Signed Multiply Four Bytes with 32-bit Adds)
8140  * \details
8141  * **Type**: Partial-SIMD (Reduction)
8142  *
8143  * **Syntax**:\n
8144  * ~~~
8145  * SMAQA Rd, Rs1, Rs2
8146  * ~~~
8147  *
8148  * **Purpose**:\n
8149  * Do four signed 8-bit multiplications from 32-bit chunks of two registers; and then adds
8150  * the four 16-bit results and the content of corresponding 32-bit chunks of a third register together.
8151  *
8152  * **Description**:\n
8153  * This instruction multiplies the four signed 8-bit elements of 32-bit chunks of Rs1 with the four
8154  * signed 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the signed
8155  * content of the corresponding 32-bit chunks of Rd. The final results are written back to the
8156  * corresponding 32-bit chunks in Rd.
8157  *
8158  * **Operations**:\n
8159  * ~~~
8160  * res[x] = Rd.W[x] +
8161  *    (Rs1.W[x].B[3] s* Rs2.W[x].B[3]) + (Rs1.W[x].B[2] s* Rs2.W[x].B[2]) +
8162  *    (Rs1.W[x].B[1] s* Rs2.W[x].B[1]) + (Rs1.W[x].B[0] s* Rs2.W[x].B[0]);
8163  * Rd.W[x] = res[x];
8164  * for RV32: x=0,
8165  * for RV64: x=1,0
8166  * ~~~
8167  *
8168  * \param [in]  t    long type of value stored in t
8169  * \param [in]  a    unsigned long type of value stored in a
8170  * \param [in]  b    unsigned long type of value stored in b
8171  * \return value stored in long type
8172  */
__RV_SMAQA(long t,unsigned long a,unsigned long b)8173 __STATIC_FORCEINLINE long __RV_SMAQA(long t, unsigned long a, unsigned long b)
8174 {
8175     __ASM volatile("smaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
8176     return t;
8177 }
8178 /* ===== Inline Function End for 3.109. SMAQA ===== */
8179 
8180 /* ===== Inline Function Start for 3.110. SMAQA.SU ===== */
8181 /**
8182  * \ingroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD
8183  * \brief SMAQA.SU (Signed and Unsigned Multiply Four Bytes with 32-bit Adds)
8184  * \details
8185  * **Type**: Partial-SIMD (Reduction)
8186  *
8187  * **Syntax**:\n
8188  * ~~~
8189  * SMAQA.SU Rd, Rs1, Rs2
8190  * ~~~
8191  *
8192  * **Purpose**:\n
8193  * Do four `signed x unsigned` 8-bit multiplications from 32-bit chunks of two registers; and
8194  * then adds the four 16-bit results and the content of corresponding 32-bit chunks of a third register
8195  * together.
8196  *
8197  * **Description**:\n
8198  * This instruction multiplies the four signed 8-bit elements of 32-bit chunks of Rs1 with the four
8199  * unsigned 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the
8200  * signed content of the corresponding 32-bit chunks of Rd. The final results are written back to the
8201  * corresponding 32-bit chunks in Rd.
8202  *
8203  * **Operations**:\n
8204  * ~~~
8205  * res[x] = Rd.W[x] +
8206  *    (Rs1.W[x].B[3] su* Rs2.W[x].B[3]) + (Rs1.W[x].B[2] su* Rs2.W[x].B[2]) +
8207  *    (Rs1.W[x].B[1] su* Rs2.W[x].B[1]) + (Rs1.W[x].B[0] su* Rs2.W[x].B[0]);
8208  * Rd.W[x] = res[x];
8209  * for RV32: x=0,
8210  * for RV64: x=1...0
8211  * ~~~
8212  *
8213  * \param [in]  t    long type of value stored in t
8214  * \param [in]  a    unsigned long type of value stored in a
8215  * \param [in]  b    unsigned long type of value stored in b
8216  * \return value stored in long type
8217  */
__RV_SMAQA_SU(long t,unsigned long a,unsigned long b)8218 __STATIC_FORCEINLINE long __RV_SMAQA_SU(long t, unsigned long a, unsigned long b)
8219 {
8220     __ASM volatile("smaqa.su %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
8221     return t;
8222 }
8223 /* ===== Inline Function End for 3.110. SMAQA.SU ===== */
8224 
8225 /* ===== Inline Function Start for 3.111. SMAX8 ===== */
8226 /**
8227  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
8228  * \brief SMAX8 (SIMD 8-bit Signed Maximum)
8229  * \details
8230  * **Type**: SIMD
8231  *
8232  * **Syntax**:\n
8233  * ~~~
8234  * SMAX8 Rd, Rs1, Rs2
8235  * ~~~
8236  *
8237  * **Purpose**:\n
8238  * Do 8-bit signed integer elements finding maximum operations simultaneously.
8239  *
8240  * **Description**:\n
8241  * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit
8242  * signed integer elements in Rs2 and selects the numbers that is greater than the other one. The
8243  * selected results are written to Rd.
8244  *
8245  * **Operations**:\n
8246  * ~~~
8247  * Rd.B[x] = (Rs1.B[x] > Rs2.B[x])? Rs1.B[x] : Rs2.B[x];
8248  * for RV32: x=3...0,
8249  * for RV64: x=7...0
8250  * ~~~
8251  *
8252  * \param [in]  a    unsigned long type of value stored in a
8253  * \param [in]  b    unsigned long type of value stored in b
8254  * \return value stored in unsigned long type
8255  */
__RV_SMAX8(unsigned long a,unsigned long b)8256 __STATIC_FORCEINLINE unsigned long __RV_SMAX8(unsigned long a, unsigned long b)
8257 {
8258     register unsigned long result;
8259     __ASM volatile("smax8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8260     return result;
8261 }
8262 /* ===== Inline Function End for 3.111. SMAX8 ===== */
8263 
8264 /* ===== Inline Function Start for 3.112. SMAX16 ===== */
8265 /**
8266  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
8267  * \brief SMAX16 (SIMD 16-bit Signed Maximum)
8268  * \details
8269  * **Type**: SIMD
8270  *
8271  * **Syntax**:\n
8272  * ~~~
8273  * SMAX16 Rd, Rs1, Rs2
8274  * ~~~
8275  *
8276  * **Purpose**:\n
8277  * Do 16-bit signed integer elements finding maximum operations simultaneously.
8278  *
8279  * **Description**:\n
8280  * This instruction compares the 16-bit signed integer elements in Rs1 with the 16-bit
8281  * signed integer elements in Rs2 and selects the numbers that is greater than the other one. The
8282  * selected results are written to Rd.
8283  *
8284  * **Operations**:\n
8285  * ~~~
8286  * Rd.H[x] = (Rs1.H[x] > Rs2.H[x])? Rs1.H[x] : Rs2.H[x];
8287  * for RV32: x=1...0,
8288  * for RV64: x=3...0
8289  * ~~~
8290  *
8291  * \param [in]  a    unsigned long type of value stored in a
8292  * \param [in]  b    unsigned long type of value stored in b
8293  * \return value stored in unsigned long type
8294  */
__RV_SMAX16(unsigned long a,unsigned long b)8295 __STATIC_FORCEINLINE unsigned long __RV_SMAX16(unsigned long a, unsigned long b)
8296 {
8297     register unsigned long result;
8298     __ASM volatile("smax16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8299     return result;
8300 }
8301 /* ===== Inline Function End for 3.112. SMAX16 ===== */
8302 
8303 /* ===== Inline Function Start for 3.113.1. SMBB16 ===== */
8304 /**
8305  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
8306  * \brief SMBB16 (SIMD Signed Multiply Bottom Half & Bottom Half)
8307  * \details
8308  * **Type**: SIMD
8309  *
8310  * **Syntax**:\n
8311  * ~~~
8312  * SMBB16 Rd, Rs1, Rs2
8313  * SMBT16 Rd, Rs1, Rs2
8314  * SMTT16 Rd, Rs1, Rs2
8315  * ~~~
8316  *
8317  * **Purpose**:\n
8318  * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-
8319  * bit content of the 32-bit elements of another register and write the result to a third register.
8320  * * SMBB16: W[x].bottom*W[x].bottom
8321  * * SMBT16: W[x].bottom *W[x].top
8322  * * SMTT16: W[x].top * W[x].top
8323  *
8324  * **Description**:\n
8325  * For the `SMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
8326  * with the bottom 16-bit content of the 32-bit elements of Rs2.
8327  * For the `SMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
8328  * with the top 16-bit content of the 32-bit elements of Rs2.
8329  * For the `SMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
8330  * the top 16-bit content of the 32-bit elements of Rs2.
8331  * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
8332  * integers.
8333  *
8334  * **Operations**:\n
8335  * ~~~
8336  * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0]; // SMBB16
8337  * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1]; // SMBT16
8338  * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1]; // SMTT16
8339  * for RV32: x=0,
8340  * for RV64: x=1...0
8341  * ~~~
8342  *
8343  * \param [in]  a    unsigned long type of value stored in a
8344  * \param [in]  b    unsigned long type of value stored in b
8345  * \return value stored in long type
8346  */
__RV_SMBB16(unsigned long a,unsigned long b)8347 __STATIC_FORCEINLINE long __RV_SMBB16(unsigned long a, unsigned long b)
8348 {
8349     register long result;
8350     __ASM volatile("smbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8351     return result;
8352 }
8353 /* ===== Inline Function End for 3.113.1. SMBB16 ===== */
8354 
8355 /* ===== Inline Function Start for 3.113.2. SMBT16 ===== */
8356 /**
8357  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
8358  * \brief SMBT16 (SIMD Signed Multiply Bottom Half & Top Half)
8359  * \details
8360  * **Type**: SIMD
8361  *
8362  * **Syntax**:\n
8363  * ~~~
8364  * SMBB16 Rd, Rs1, Rs2
8365  * SMBT16 Rd, Rs1, Rs2
8366  * SMTT16 Rd, Rs1, Rs2
8367  * ~~~
8368  *
8369  * **Purpose**:\n
8370  * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-
8371  * bit content of the 32-bit elements of another register and write the result to a third register.
8372  * * SMBB16: W[x].bottom*W[x].bottom
8373  * * SMBT16: W[x].bottom *W[x].top
8374  * * SMTT16: W[x].top * W[x].top
8375  *
8376  * **Description**:\n
8377  * For the `SMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
8378  * with the bottom 16-bit content of the 32-bit elements of Rs2.
8379  * For the `SMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
8380  * with the top 16-bit content of the 32-bit elements of Rs2.
8381  * For the `SMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
8382  * the top 16-bit content of the 32-bit elements of Rs2.
8383  * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
8384  * integers.
8385  *
8386  * **Operations**:\n
8387  * ~~~
8388  * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0]; // SMBB16
8389  * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1]; // SMBT16
8390  * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1]; // SMTT16
8391  * for RV32: x=0,
8392  * for RV64: x=1...0
8393  * ~~~
8394  *
8395  * \param [in]  a    unsigned long type of value stored in a
8396  * \param [in]  b    unsigned long type of value stored in b
8397  * \return value stored in long type
8398  */
__RV_SMBT16(unsigned long a,unsigned long b)8399 __STATIC_FORCEINLINE long __RV_SMBT16(unsigned long a, unsigned long b)
8400 {
8401     register long result;
8402     __ASM volatile("smbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8403     return result;
8404 }
8405 /* ===== Inline Function End for 3.113.2. SMBT16 ===== */
8406 
8407 /* ===== Inline Function Start for 3.113.3. SMTT16 ===== */
8408 /**
8409  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
8410  * \brief SMTT16 (SIMD Signed Multiply Top Half & Top Half)
8411  * \details
8412  * **Type**: SIMD
8413  *
8414  * **Syntax**:\n
8415  * ~~~
8416  * SMBB16 Rd, Rs1, Rs2
8417  * SMBT16 Rd, Rs1, Rs2
8418  * SMTT16 Rd, Rs1, Rs2
8419  * ~~~
8420  *
8421  * **Purpose**:\n
8422  * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-
8423  * bit content of the 32-bit elements of another register and write the result to a third register.
8424  * * SMBB16: W[x].bottom*W[x].bottom
8425  * * SMBT16: W[x].bottom *W[x].top
8426  * * SMTT16: W[x].top * W[x].top
8427  *
8428  * **Description**:\n
8429  * For the `SMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
8430  * with the bottom 16-bit content of the 32-bit elements of Rs2.
8431  * For the `SMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
8432  * with the top 16-bit content of the 32-bit elements of Rs2.
8433  * For the `SMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
8434  * the top 16-bit content of the 32-bit elements of Rs2.
8435  * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
8436  * integers.
8437  *
8438  * **Operations**:\n
8439  * ~~~
8440  * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0]; // SMBB16
8441  * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1]; // SMBT16
8442  * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1]; // SMTT16
8443  * for RV32: x=0,
8444  * for RV64: x=1...0
8445  * ~~~
8446  *
8447  * \param [in]  a    unsigned long type of value stored in a
8448  * \param [in]  b    unsigned long type of value stored in b
8449  * \return value stored in long type
8450  */
__RV_SMTT16(unsigned long a,unsigned long b)8451 __STATIC_FORCEINLINE long __RV_SMTT16(unsigned long a, unsigned long b)
8452 {
8453     register long result;
8454     __ASM volatile("smtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8455     return result;
8456 }
8457 /* ===== Inline Function End for 3.113.3. SMTT16 ===== */
8458 
8459 /* ===== Inline Function Start for 3.114.1. SMDS ===== */
8460 /**
8461  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
8462  * \brief SMDS (SIMD Signed Multiply Two Halfs and Subtract)
8463  * \details
8464  * **Type**: SIMD
8465  *
8466  * **Syntax**:\n
8467  * ~~~
8468  * SMDS Rd, Rs1, Rs2
8469  * SMDRS Rd, Rs1, Rs2
8470  * SMXDS Rd, Rs1, Rs2
8471  * ~~~
8472  *
8473  * **Purpose**:\n
8474  * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
8475  * perform a subtraction operation between the two 32-bit results.
8476  * * SMDS: top*top - bottom*bottom (per 32-bit element)
8477  * * SMDRS: bottom*bottom - top*top (per 32-bit element)
8478  * * SMXDS: top*bottom - bottom*top (per 32-bit element)
8479  *
8480  * **Description**:\n
8481  * For the `SMDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with
8482  * the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result
8483  * of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
8484  * 32-bit elements of Rs2.
8485  * For the `SMDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
8486  * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
8487  * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
8488  * the 32-bit elements of Rs2.
8489  * For the `SMXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
8490  * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
8491  * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
8492  * content of the 32-bit elements of Rs2.
8493  * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of
8494  * multiplication are treated as signed integers.
8495  *
8496  * **Operations**:\n
8497  * ~~~
8498  * * SMDS:
8499  * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
8500  * * SMDRS:
8501  * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
8502  * * SMXDS:
8503  * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
8504  * ~~~
8505  *
8506  * \param [in]  a    unsigned long type of value stored in a
8507  * \param [in]  b    unsigned long type of value stored in b
8508  * \return value stored in long type
8509  */
__RV_SMDS(unsigned long a,unsigned long b)8510 __STATIC_FORCEINLINE long __RV_SMDS(unsigned long a, unsigned long b)
8511 {
8512     register long result;
8513     __ASM volatile("smds %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8514     return result;
8515 }
8516 /* ===== Inline Function End for 3.114.1. SMDS ===== */
8517 
8518 /* ===== Inline Function Start for 3.114.2. SMDRS ===== */
8519 /**
8520  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
8521  * \brief SMDRS (SIMD Signed Multiply Two Halfs and Reverse Subtract)
8522  * \details
8523  * **Type**: SIMD
8524  *
8525  * **Syntax**:\n
8526  * ~~~
8527  * SMDS Rd, Rs1, Rs2
8528  * SMDRS Rd, Rs1, Rs2
8529  * SMXDS Rd, Rs1, Rs2
8530  * ~~~
8531  *
8532  * **Purpose**:\n
8533  * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
8534  * perform a subtraction operation between the two 32-bit results.
8535  * * SMDS: top*top - bottom*bottom (per 32-bit element)
8536  * * SMDRS: bottom*bottom - top*top (per 32-bit element)
8537  * * SMXDS: top*bottom - bottom*top (per 32-bit element)
8538  *
8539  * **Description**:\n
8540  * For the `SMDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with
8541  * the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result
8542  * of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
8543  * 32-bit elements of Rs2.
8544  * For the `SMDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
8545  * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
8546  * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
8547  * the 32-bit elements of Rs2.
8548  * For the `SMXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
8549  * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
8550  * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
8551  * content of the 32-bit elements of Rs2.
8552  * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of
8553  * multiplication are treated as signed integers.
8554  *
8555  * **Operations**:\n
8556  * ~~~
8557  * * SMDS:
8558  * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
8559  * * SMDRS:
8560  * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
8561  * * SMXDS:
8562  * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
8563  * ~~~
8564  *
8565  * \param [in]  a    unsigned long type of value stored in a
8566  * \param [in]  b    unsigned long type of value stored in b
8567  * \return value stored in long type
8568  */
__RV_SMDRS(unsigned long a,unsigned long b)8569 __STATIC_FORCEINLINE long __RV_SMDRS(unsigned long a, unsigned long b)
8570 {
8571     register long result;
8572     __ASM volatile("smdrs %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8573     return result;
8574 }
8575 /* ===== Inline Function End for 3.114.2. SMDRS ===== */
8576 
8577 /* ===== Inline Function Start for 3.114.3. SMXDS ===== */
8578 /**
8579  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
8580  * \brief SMXDS (SIMD Signed Crossed Multiply Two Halfs and Subtract)
8581  * \details
8582  * **Type**: SIMD
8583  *
8584  * **Syntax**:\n
8585  * ~~~
8586  * SMDS Rd, Rs1, Rs2
8587  * SMDRS Rd, Rs1, Rs2
8588  * SMXDS Rd, Rs1, Rs2
8589  * ~~~
8590  *
8591  * **Purpose**:\n
8592  * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
8593  * perform a subtraction operation between the two 32-bit results.
8594  * * SMDS: top*top - bottom*bottom (per 32-bit element)
8595  * * SMDRS: bottom*bottom - top*top (per 32-bit element)
8596  * * SMXDS: top*bottom - bottom*top (per 32-bit element)
8597  *
8598  * **Description**:\n
8599  * For the `SMDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with
8600  * the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result
8601  * of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
8602  * 32-bit elements of Rs2.
8603  * For the `SMDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
8604  * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
8605  * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
8606  * the 32-bit elements of Rs2.
8607  * For the `SMXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
8608  * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
8609  * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
8610  * content of the 32-bit elements of Rs2.
8611  * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of
8612  * multiplication are treated as signed integers.
8613  *
8614  * **Operations**:\n
8615  * ~~~
8616  * * SMDS:
8617  * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
8618  * * SMDRS:
8619  * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
8620  * * SMXDS:
8621  * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
8622  * ~~~
8623  *
8624  * \param [in]  a    unsigned long type of value stored in a
8625  * \param [in]  b    unsigned long type of value stored in b
8626  * \return value stored in long type
8627  */
__RV_SMXDS(unsigned long a,unsigned long b)8628 __STATIC_FORCEINLINE long __RV_SMXDS(unsigned long a, unsigned long b)
8629 {
8630     register long result;
8631     __ASM volatile("smxds %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8632     return result;
8633 }
8634 /* ===== Inline Function End for 3.114.3. SMXDS ===== */
8635 
8636 /* ===== Inline Function Start for 3.115. SMIN8 ===== */
8637 /**
8638  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
8639  * \brief SMIN8 (SIMD 8-bit Signed Minimum)
8640  * \details
8641  * **Type**: SIMD
8642  *
8643  * **Syntax**:\n
8644  * ~~~
8645  * SMIN8 Rd, Rs1, Rs2
8646  * ~~~
8647  *
8648  * **Purpose**:\n
8649  * Do 8-bit signed integer elements finding minimum operations simultaneously.
8650  *
8651  * **Description**:\n
8652  * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit
8653  * signed integer elements in Rs2 and selects the numbers that is less than the other one. The selected
8654  * results are written to Rd.
8655  *
8656  * **Operations**:\n
8657  * ~~~
8658  * Rd.B[x] = (Rs1.B[x] < Rs2.B[x])? Rs1.B[x] : Rs2.B[x];
8659  * for RV32: x=3...0,
8660  * for RV64: x=7...0
8661  * ~~~
8662  *
8663  * \param [in]  a    unsigned long type of value stored in a
8664  * \param [in]  b    unsigned long type of value stored in b
8665  * \return value stored in unsigned long type
8666  */
__RV_SMIN8(unsigned long a,unsigned long b)8667 __STATIC_FORCEINLINE unsigned long __RV_SMIN8(unsigned long a, unsigned long b)
8668 {
8669     register unsigned long result;
8670     __ASM volatile("smin8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8671     return result;
8672 }
8673 /* ===== Inline Function End for 3.115. SMIN8 ===== */
8674 
8675 /* ===== Inline Function Start for 3.116. SMIN16 ===== */
8676 /**
8677  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
8678  * \brief SMIN16 (SIMD 16-bit Signed Minimum)
8679  * \details
8680  * **Type**: SIMD
8681  *
8682  * **Syntax**:\n
8683  * ~~~
8684  * SMIN16 Rd, Rs1, Rs2
8685  * ~~~
8686  *
8687  * **Purpose**:\n
8688  * Do 16-bit signed integer elements finding minimum operations simultaneously.
8689  *
8690  * **Description**:\n
8691  * This instruction compares the 16-bit signed integer elements in Rs1 with the 16-bit
8692  * signed integer elements in Rs2 and selects the numbers that is less than the other one. The selected
8693  * results are written to Rd.
8694  *
8695  * **Operations**:\n
8696  * ~~~
8697  * Rd.H[x] = (Rs1.H[x] < Rs2.H[x])? Rs1.H[x] : Rs2.H[x];
8698  * for RV32: x=1...0,
8699  * for RV64: x=3...0
8700  * ~~~
8701  *
8702  * \param [in]  a    unsigned long type of value stored in a
8703  * \param [in]  b    unsigned long type of value stored in b
8704  * \return value stored in unsigned long type
8705  */
__RV_SMIN16(unsigned long a,unsigned long b)8706 __STATIC_FORCEINLINE unsigned long __RV_SMIN16(unsigned long a, unsigned long b)
8707 {
8708     register unsigned long result;
8709     __ASM volatile("smin16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8710     return result;
8711 }
8712 /* ===== Inline Function End for 3.116. SMIN16 ===== */
8713 
8714 /* ===== Inline Function Start for 3.117.1. SMMUL ===== */
8715 /**
8716  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
8717  * \brief SMMUL (SIMD MSW Signed Multiply Word)
8718  * \details
8719  * **Type**: SIMD
8720  *
8721  * **Syntax**:\n
8722  * ~~~
8723  * SMMUL Rd, Rs1, Rs2
8724  * SMMUL.u Rd, Rs1, Rs2
8725  * ~~~
8726  *
8727  * **Purpose**:\n
8728  * Multiply the 32-bit signed integer elements of two registers and write the most significant
8729  * 32-bit results to the corresponding 32-bit elements of a register. The `.u` form performs an
8730  * additional rounding up operation on the multiplication results before taking the most significant
8731  * 32-bit part of the results.
8732  *
8733  * **Description**:\n
8734  * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the
8735  * most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit
8736  * elements of Rs1 and Rs2 are treated as signed integers. The `.u` form of the instruction rounds up
8737  * the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results.
8738  * * For `smmul/RV32` instruction, it is an alias to `mulh/RV32` instruction.
8739  *
8740  * **Operations**:\n
8741  * ~~~
8742  * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
8743  * if (`.u` form) {
8744  *   Round[x][32:0] = Mres[x][63:31] + 1;
8745  *   Rd.W[x] = Round[x][32:1];
8746  * } else {
8747  *   Rd.W[x] = Mres[x][63:32];
8748  * }
8749  * for RV32: x=0
8750  * for RV64: x=1...0
8751  * ~~~
8752  *
8753  * \param [in]  a    long type of value stored in a
8754  * \param [in]  b    long type of value stored in b
8755  * \return value stored in long type
8756  */
__RV_SMMUL(long a,long b)8757 __STATIC_FORCEINLINE long __RV_SMMUL(long a, long b)
8758 {
8759     register long result;
8760     __ASM volatile("smmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8761     return result;
8762 }
8763 /* ===== Inline Function End for 3.117.1. SMMUL ===== */
8764 
8765 /* ===== Inline Function Start for 3.117.2. SMMUL.u ===== */
8766 /**
8767  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
8768  * \brief SMMUL.u (SIMD MSW Signed Multiply Word with Rounding)
8769  * \details
8770  * **Type**: SIMD
8771  *
8772  * **Syntax**:\n
8773  * ~~~
8774  * SMMUL Rd, Rs1, Rs2
8775  * SMMUL.u Rd, Rs1, Rs2
8776  * ~~~
8777  *
8778  * **Purpose**:\n
8779  * Multiply the 32-bit signed integer elements of two registers and write the most significant
8780  * 32-bit results to the corresponding 32-bit elements of a register. The `.u` form performs an
8781  * additional rounding up operation on the multiplication results before taking the most significant
8782  * 32-bit part of the results.
8783  *
8784  * **Description**:\n
8785  * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the
8786  * most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit
8787  * elements of Rs1 and Rs2 are treated as signed integers. The `.u` form of the instruction rounds up
8788  * the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results.
8789  * * For `smmul/RV32` instruction, it is an alias to `mulh/RV32` instruction.
8790  *
8791  * **Operations**:\n
8792  * ~~~
8793  * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
8794  * if (`.u` form) {
8795  *   Round[x][32:0] = Mres[x][63:31] + 1;
8796  *   Rd.W[x] = Round[x][32:1];
8797  * } else {
8798  *   Rd.W[x] = Mres[x][63:32];
8799  * }
8800  * for RV32: x=0
8801  * for RV64: x=1...0
8802  * ~~~
8803  *
8804  * \param [in]  a    long type of value stored in a
8805  * \param [in]  b    long type of value stored in b
8806  * \return value stored in long type
8807  */
__RV_SMMUL_U(long a,long b)8808 __STATIC_FORCEINLINE long __RV_SMMUL_U(long a, long b)
8809 {
8810     register long result;
8811     __ASM volatile("smmul.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8812     return result;
8813 }
8814 /* ===== Inline Function End for 3.117.2. SMMUL.u ===== */
8815 
8816 /* ===== Inline Function Start for 3.118.1. SMMWB ===== */
8817 /**
8818  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
8819  * \brief SMMWB (SIMD MSW Signed Multiply Word and Bottom Half)
8820  * \details
8821  * **Type**: SIMD
8822  *
8823  * **Syntax**:\n
8824  * ~~~
8825  * SMMWB Rd, Rs1, Rs2
8826  * SMMWB.u Rd, Rs1, Rs2
8827  * ~~~
8828  *
8829  * **Purpose**:\n
8830  * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
8831  * corresponding 32-bit elements of another register, and write the most significant 32-bit results to
8832  * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most
8833  * significant discarded bit.
8834  *
8835  * **Description**:\n
8836  * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content
8837  * of the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication
8838  * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the
8839  * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results.
8840  *
8841  * **Operations**:\n
8842  * ~~~
8843  * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0];
8844  * if (`.u` form) {
8845  *   Round[x][32:0] = Mres[x][47:15] + 1;
8846  *   Rd.W[x] = Round[x][32:1];
8847  * } else {
8848  *   Rd.W[x] = Mres[x][47:16];
8849  * }
8850  * for RV32: x=0
8851  * for RV64: x=1...0
8852  * ~~~
8853  *
8854  * \param [in]  a    long type of value stored in a
8855  * \param [in]  b    unsigned long type of value stored in b
8856  * \return value stored in long type
8857  */
__RV_SMMWB(long a,unsigned long b)8858 __STATIC_FORCEINLINE long __RV_SMMWB(long a, unsigned long b)
8859 {
8860     register long result;
8861     __ASM volatile("smmwb %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8862     return result;
8863 }
8864 /* ===== Inline Function End for 3.118.1. SMMWB ===== */
8865 
8866 /* ===== Inline Function Start for 3.118.2. SMMWB.u ===== */
8867 /**
8868  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
8869  * \brief SMMWB.u (SIMD MSW Signed Multiply Word and Bottom Half with Rounding)
8870  * \details
8871  * **Type**: SIMD
8872  *
8873  * **Syntax**:\n
8874  * ~~~
8875  * SMMWB Rd, Rs1, Rs2
8876  * SMMWB.u Rd, Rs1, Rs2
8877  * ~~~
8878  *
8879  * **Purpose**:\n
8880  * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
8881  * corresponding 32-bit elements of another register, and write the most significant 32-bit results to
8882  * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most
8883  * significant discarded bit.
8884  *
8885  * **Description**:\n
8886  * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content
8887  * of the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication
8888  * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the
8889  * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results.
8890  *
8891  * **Operations**:\n
8892  * ~~~
8893  * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0];
8894  * if (`.u` form) {
8895  *   Round[x][32:0] = Mres[x][47:15] + 1;
8896  *   Rd.W[x] = Round[x][32:1];
8897  * } else {
8898  *   Rd.W[x] = Mres[x][47:16];
8899  * }
8900  * for RV32: x=0
8901  * for RV64: x=1...0
8902  * ~~~
8903  *
8904  * \param [in]  a    long type of value stored in a
8905  * \param [in]  b    unsigned long type of value stored in b
8906  * \return value stored in long type
8907  */
__RV_SMMWB_U(long a,unsigned long b)8908 __STATIC_FORCEINLINE long __RV_SMMWB_U(long a, unsigned long b)
8909 {
8910     register long result;
8911     __ASM volatile("smmwb.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8912     return result;
8913 }
8914 /* ===== Inline Function End for 3.118.2. SMMWB.u ===== */
8915 
8916 /* ===== Inline Function Start for 3.119.1. SMMWT ===== */
8917 /**
8918  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
8919  * \brief SMMWT (SIMD MSW Signed Multiply Word and Top Half)
8920  * \details
8921  * **Type**: SIMD
8922  *
8923  * **Syntax**:\n
8924  * ~~~
8925  * SMMWT Rd, Rs1, Rs2
8926  * SMMWT.u Rd, Rs1, Rs2
8927  * ~~~
8928  *
8929  * **Purpose**:\n
8930  * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the
8931  * corresponding 32-bit elements of another register, and write the most significant 32-bit results to
8932  * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most
8933  * significant discarded bit.
8934  *
8935  * **Description**:\n
8936  * This instruction multiplies the signed 32-bit elements of Rs1 with the top signed 16-bit content of
8937  * the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication
8938  * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the
8939  * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results.
8940  *
8941  * **Operations**:\n
8942  * ~~~
8943  * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1];
8944  * if (`.u` form) {
8945  *   Round[x][32:0] = Mres[x][47:15] + 1;
8946  *   Rd.W[x] = Round[x][32:1];
8947  * } else {
8948  *   Rd.W[x] = Mres[x][47:16];
8949  * }
8950  * for RV32: x=0
8951  * for RV64: x=1...0
8952  * ~~~
8953  *
8954  * \param [in]  a    long type of value stored in a
8955  * \param [in]  b    unsigned long type of value stored in b
8956  * \return value stored in long type
8957  */
__RV_SMMWT(long a,unsigned long b)8958 __STATIC_FORCEINLINE long __RV_SMMWT(long a, unsigned long b)
8959 {
8960     register long result;
8961     __ASM volatile("smmwt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8962     return result;
8963 }
8964 /* ===== Inline Function End for 3.119.1. SMMWT ===== */
8965 
8966 /* ===== Inline Function Start for 3.119.2. SMMWT.u ===== */
8967 /**
8968  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
8969  * \brief SMMWT.u (SIMD MSW Signed Multiply Word and Top Half with Rounding)
8970  * \details
8971  * **Type**: SIMD
8972  *
8973  * **Syntax**:\n
8974  * ~~~
8975  * SMMWT Rd, Rs1, Rs2
8976  * SMMWT.u Rd, Rs1, Rs2
8977  * ~~~
8978  *
8979  * **Purpose**:\n
8980  * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the
8981  * corresponding 32-bit elements of another register, and write the most significant 32-bit results to
8982  * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most
8983  * significant discarded bit.
8984  *
8985  * **Description**:\n
8986  * This instruction multiplies the signed 32-bit elements of Rs1 with the top signed 16-bit content of
8987  * the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication
8988  * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the
8989  * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results.
8990  *
8991  * **Operations**:\n
8992  * ~~~
8993  * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1];
8994  * if (`.u` form) {
8995  *   Round[x][32:0] = Mres[x][47:15] + 1;
8996  *   Rd.W[x] = Round[x][32:1];
8997  * } else {
8998  *   Rd.W[x] = Mres[x][47:16];
8999  * }
9000  * for RV32: x=0
9001  * for RV64: x=1...0
9002  * ~~~
9003  *
9004  * \param [in]  a    long type of value stored in a
9005  * \param [in]  b    unsigned long type of value stored in b
9006  * \return value stored in long type
9007  */
__RV_SMMWT_U(long a,unsigned long b)9008 __STATIC_FORCEINLINE long __RV_SMMWT_U(long a, unsigned long b)
9009 {
9010     register long result;
9011     __ASM volatile("smmwt.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
9012     return result;
9013 }
9014 /* ===== Inline Function End for 3.119.2. SMMWT.u ===== */
9015 
9016 /* ===== Inline Function Start for 3.120.1. SMSLDA ===== */
9017 /**
9018  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
9019  * \brief SMSLDA (Signed Multiply Two Halfs & Add & Subtract 64-bit)
9020  * \details
9021  * **Type**: DSP (64-bit Profile)
9022  *
9023  * **Syntax**:\n
9024  * ~~~
9025  * SMSLDA Rd, Rs1, Rs2
9026  * SMSLXDA Rd, Rs1, Rs2
9027  * ~~~
9028  *
9029  * **Purpose**:\n
9030  * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
9031  * subtracts the two 32-bit results from the 64-bit value of an even/odd pair of registers (RV32) or a
9032  * register (RV64). The subtraction result is written back to the register-pair.
9033  * * SMSLDA: rd pair - top*top - bottom*bottom (all 32-bit elements)
9034  * * SMSLXDA: rd pair - top*bottom - bottom*top (all 32-bit elements)
9035  *
9036  * **RV32 Description**:\n
9037  * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
9038  * content Rs2 and multiplies the top 16-bit content of Rs1 with the top 16-bit content of Rs2.
9039  * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit
9040  * content of Rs2 and multiplies the bottom 16-bit content of Rs1 with the top 16-bit content of Rs2.
9041  * The two multiplication results are subtracted from the 64-bit value of an even/odd pair of registers
9042  * specified by Rd(4,1). The 64-bit subtraction result is written back to the register-pair. The 16-bit
9043  * values of Rs1 and Rs2, and the 64-bit value of the register-pair are treated as signed integers.
9044  * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
9045  * includes register 2d and 2d+1.
9046  * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
9047  * of the pair contains the low 32-bit of the result.
9048  *
9049  * **RV64 Description**:\n
9050  * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
9051  * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
9052  * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
9053  * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
9054  * the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the bottom 16-bit content of
9055  * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
9056  * The four multiplication results are subtracted from the 64-bit value of Rd. The 64-bit subtraction
9057  * result is written back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated
9058  * as signed integers.
9059  *
9060  * **Operations**:\n
9061  * ~~~
9062  * * RV32:
9063  * // SMSLDA
9064  * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]);
9065  * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]);
9066  * // SMSLXDA
9067  * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]);
9068  * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]);
9069  * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
9070  * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] - SE64(Mres0[31:0]) - SE64(Mres1[31:0]);
9071  * * RV64:
9072  * // SMSLDA
9073  * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
9074  * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
9075  * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
9076  * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
9077  * // SMSLXDA
9078  * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
9079  * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
9080  * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
9081  * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
9082  * Rd = Rd - SE64(Mres0[0][31:0]) - SE64(Mres1[0][31:0]) - SE64(Mres0[1][31:0]) -
9083  * SE64(Mres1[1][31:0]);
9084  * ~~~
9085  *
9086  * \param [in]  t    long long type of value stored in t
9087  * \param [in]  a    unsigned long type of value stored in a
9088  * \param [in]  b    unsigned long type of value stored in b
9089  * \return value stored in long long type
9090  */
__RV_SMSLDA(long long t,unsigned long a,unsigned long b)9091 __STATIC_FORCEINLINE long long __RV_SMSLDA(long long t, unsigned long a, unsigned long b)
9092 {
9093     __ASM volatile("smslda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
9094     return t;
9095 }
9096 /* ===== Inline Function End for 3.120.1. SMSLDA ===== */
9097 
9098 /* ===== Inline Function Start for 3.120.2. SMSLXDA ===== */
9099 /**
9100  * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
9101  * \brief SMSLXDA (Signed Crossed Multiply Two Halfs & Add & Subtract 64- bit)
9102  * \details
9103  * **Type**: DSP (64-bit Profile)
9104  *
9105  * **Syntax**:\n
9106  * ~~~
9107  * SMSLDA Rd, Rs1, Rs2
9108  * SMSLXDA Rd, Rs1, Rs2
9109  * ~~~
9110  *
9111  * **Purpose**:\n
9112  * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
9113  * subtracts the two 32-bit results from the 64-bit value of an even/odd pair of registers (RV32) or a
9114  * register (RV64). The subtraction result is written back to the register-pair.
9115  * * SMSLDA: rd pair - top*top - bottom*bottom (all 32-bit elements)
9116  * * SMSLXDA: rd pair - top*bottom - bottom*top (all 32-bit elements)
9117  *
9118  * **RV32 Description**:\n
9119  * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
9120  * content Rs2 and multiplies the top 16-bit content of Rs1 with the top 16-bit content of Rs2.
9121  * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit
9122  * content of Rs2 and multiplies the bottom 16-bit content of Rs1 with the top 16-bit content of Rs2.
9123  * The two multiplication results are subtracted from the 64-bit value of an even/odd pair of registers
9124  * specified by Rd(4,1). The 64-bit subtraction result is written back to the register-pair. The 16-bit
9125  * values of Rs1 and Rs2, and the 64-bit value of the register-pair are treated as signed integers.
9126  * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
9127  * includes register 2d and 2d+1.
9128  * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
9129  * of the pair contains the low 32-bit of the result.
9130  *
9131  * **RV64 Description**:\n
9132  * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
9133  * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
9134  * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
9135  * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
9136  * the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the bottom 16-bit content of
9137  * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
9138  * The four multiplication results are subtracted from the 64-bit value of Rd. The 64-bit subtraction
9139  * result is written back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated
9140  * as signed integers.
9141  *
9142  * **Operations**:\n
9143  * ~~~
9144  * * RV32:
9145  * // SMSLDA
9146  * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]);
9147  * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]);
9148  * // SMSLXDA
9149  * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]);
9150  * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]);
9151  * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
9152  * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] - SE64(Mres0[31:0]) - SE64(Mres1[31:0]);
9153  * * RV64:
9154  * // SMSLDA
9155  * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
9156  * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
9157  * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
9158  * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
9159  * // SMSLXDA
9160  * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
9161  * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
9162  * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
9163  * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
9164  * Rd = Rd - SE64(Mres0[0][31:0]) - SE64(Mres1[0][31:0]) - SE64(Mres0[1][31:0]) -
9165  * SE64(Mres1[1][31:0]);
9166  * ~~~
9167  *
9168  * \param [in]  t    long long type of value stored in t
9169  * \param [in]  a    unsigned long type of value stored in a
9170  * \param [in]  b    unsigned long type of value stored in b
9171  * \return value stored in long long type
9172  */
__RV_SMSLXDA(long long t,unsigned long a,unsigned long b)9173 __STATIC_FORCEINLINE long long __RV_SMSLXDA(long long t, unsigned long a, unsigned long b)
9174 {
9175     __ASM volatile("smslxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
9176     return t;
9177 }
9178 /* ===== Inline Function End for 3.120.2. SMSLXDA ===== */
9179 
9180 /* ===== Inline Function Start for 3.121. SMSR64 ===== */
9181 /**
9182  * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
9183  * \brief SMSR64 (Signed Multiply and Subtract from 64- Bit Data)
9184  * \details
9185  * **Type**: DSP (64-bit Profile)
9186  *
9187  * **Syntax**:\n
9188  * ~~~
9189  * SMSR64 Rd, Rs1, Rs2
9190  * ~~~
9191  *
9192  * **Purpose**:\n
9193  * Multiply the 32-bit signed elements in two registers and subtract the 64-bit multiplication
9194  * results from the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is
9195  * written back to the pair of registers (RV32) or a register (RV64).
9196  *
9197  * **RV32 Description**:\n
9198  * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It
9199  * subtracts the 64-bit multiplication result from the 64-bit signed data of an even/odd pair of registers
9200  * specified by Rd(4,1). The subtraction result is written back to the even/odd pair of registers
9201  * specified by Rd(4,1).
9202  * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
9203  * includes register 2d and 2d+1.
9204  * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
9205  * of the pair contains the low 32-bit of the result.
9206  *
9207  * **RV64 Description**:\n
9208  * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It
9209  * subtracts the 64-bit multiplication results from the 64-bit signed data of Rd. The subtraction result is
9210  * written back to Rd.
9211  *
9212  * **Operations**:\n
9213  * ~~~
9214  * * RV32:
9215  * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
9216  * R[t_H].R[t_L] = R[t_H].R[t_L] - (Rs1 * Rs2);
9217  * * RV64:
9218  * Rd = Rd - (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]);
9219  * ~~~
9220  *
9221  * \param [in]  t    long long type of value stored in t
9222  * \param [in]  a    long type of value stored in a
9223  * \param [in]  b    long type of value stored in b
9224  * \return value stored in long long type
9225  */
__RV_SMSR64(long long t,long a,long b)9226 __STATIC_FORCEINLINE long long __RV_SMSR64(long long t, long a, long b)
9227 {
9228     __ASM volatile("smsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
9229     return t;
9230 }
9231 /* ===== Inline Function End for 3.121. SMSR64 ===== */
9232 
9233 /* ===== Inline Function Start for 3.122.1. SMUL8 ===== */
9234 /**
9235  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
9236  * \brief SMUL8 (SIMD Signed 8-bit Multiply)
9237  * \details
9238  * **Type**: SIMD
9239  *
9240  * **Syntax**:\n
9241  * ~~~
9242  * SMUL8 Rd, Rs1, Rs2
9243  * SMULX8 Rd, Rs1, Rs2
9244  * ~~~
9245  *
9246  * **Purpose**:\n
9247  * Do signed 8-bit multiplications and generate four 16-bit results simultaneously.
9248  *
9249  * **RV32 Description**:\n
9250  * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the
9251  * corresponding 8-bit data elements of Rs2.
9252  * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the
9253  * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data
9254  * elements of Rs1 with the fourth and third 8-bit data elements of Rs2.
9255  * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1).
9256  * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
9257  * includes register 2d and 2d+1.
9258  * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of
9259  * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom
9260  * part of Rs1.
9261  *
9262  * **RV64 Description**:\n
9263  * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the
9264  * corresponding 8-bit data elements of Rs2.
9265  * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the
9266  * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data
9267  * elements of Rs1 with the fourth and third 8-bit data elements of Rs2.
9268  * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results
9269  * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from
9270  * the bottom part of Rs1.
9271  *
9272  * **Operations**:\n
9273  * ~~~
9274  * * RV32:
9275  * if (is `SMUL8`) {
9276  *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
9277  *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
9278  * } else if (is `SMULX8`) {
9279  *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
9280  *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
9281  * }
9282  * rest[x/2] = op1t[x/2] s* op2t[x/2];
9283  * resb[x/2] = op1b[x/2] s* op2b[x/2];
9284  * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
9285  * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1];
9286  * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0];
9287  * x = 0 and 2
9288  * * RV64:
9289  * if (is `SMUL8`) {
9290  *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
9291  *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
9292  * } else if (is `SMULX8`) {
9293  *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
9294  *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
9295  * }
9296  * rest[x/2] = op1t[x/2] s* op2t[x/2];
9297  * resb[x/2] = op1b[x/2] s* op2b[x/2];
9298  * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
9299  * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1];
9300  * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0];
9301  * x = 0 and 2
9302  * ~~~
9303  *
9304  * \param [in]  a    unsigned int type of value stored in a
9305  * \param [in]  b    unsigned int type of value stored in b
9306  * \return value stored in unsigned long long type
9307  */
__RV_SMUL8(unsigned int a,unsigned int b)9308 __STATIC_FORCEINLINE unsigned long long __RV_SMUL8(unsigned int a, unsigned int b)
9309 {
9310     register unsigned long long result;
9311     __ASM volatile("smul8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
9312     return result;
9313 }
9314 /* ===== Inline Function End for 3.122.1. SMUL8 ===== */
9315 
9316 /* ===== Inline Function Start for 3.122.2. SMULX8 ===== */
9317 /**
9318  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
9319  * \brief SMULX8 (SIMD Signed Crossed 8-bit Multiply)
9320  * \details
9321  * **Type**: SIMD
9322  *
9323  * **Syntax**:\n
9324  * ~~~
9325  * SMUL8 Rd, Rs1, Rs2
9326  * SMULX8 Rd, Rs1, Rs2
9327  * ~~~
9328  *
9329  * **Purpose**:\n
9330  * Do signed 8-bit multiplications and generate four 16-bit results simultaneously.
9331  *
9332  * **RV32 Description**:\n
9333  * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the
9334  * corresponding 8-bit data elements of Rs2.
9335  * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the
9336  * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data
9337  * elements of Rs1 with the fourth and third 8-bit data elements of Rs2.
9338  * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1).
9339  * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
9340  * includes register 2d and 2d+1.
9341  * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of
9342  * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom
9343  * part of Rs1.
9344  *
9345  * **RV64 Description**:\n
9346  * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the
9347  * corresponding 8-bit data elements of Rs2.
9348  * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the
9349  * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data
9350  * elements of Rs1 with the fourth and third 8-bit data elements of Rs2.
9351  * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results
9352  * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from
9353  * the bottom part of Rs1.
9354  *
9355  * **Operations**:\n
9356  * ~~~
9357  * * RV32:
9358  * if (is `SMUL8`) {
9359  *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
9360  *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
9361  * } else if (is `SMULX8`) {
9362  *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
9363  *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
9364  * }
9365  * rest[x/2] = op1t[x/2] s* op2t[x/2];
9366  * resb[x/2] = op1b[x/2] s* op2b[x/2];
9367  * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
9368  * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1];
9369  * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0];
9370  * x = 0 and 2
9371  * * RV64:
9372  * if (is `SMUL8`) {
9373  *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
9374  *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
9375  * } else if (is `SMULX8`) {
9376  *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
9377  *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
9378  * }
9379  * rest[x/2] = op1t[x/2] s* op2t[x/2];
9380  * resb[x/2] = op1b[x/2] s* op2b[x/2];
9381  * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
9382  * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1];
9383  * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0];
9384  * x = 0 and 2
9385  * ~~~
9386  *
9387  * \param [in]  a    unsigned int type of value stored in a
9388  * \param [in]  b    unsigned int type of value stored in b
9389  * \return value stored in unsigned long long type
9390  */
__RV_SMULX8(unsigned int a,unsigned int b)9391 __STATIC_FORCEINLINE unsigned long long __RV_SMULX8(unsigned int a, unsigned int b)
9392 {
9393     register unsigned long long result;
9394     __ASM volatile("smulx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
9395     return result;
9396 }
9397 /* ===== Inline Function End for 3.122.2. SMULX8 ===== */
9398 
9399 /* ===== Inline Function Start for 3.123.1. SMUL16 ===== */
9400 /**
9401  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
9402  * \brief SMUL16 (SIMD Signed 16-bit Multiply)
9403  * \details
9404  * **Type**: SIMD
9405  *
9406  * **Syntax**:\n
9407  * ~~~
9408  * SMUL16 Rd, Rs1, Rs2
9409  * SMULX16 Rd, Rs1, Rs2
9410  * ~~~
9411  *
9412  * **Purpose**:\n
9413  * Do signed 16-bit multiplications and generate two 32-bit results simultaneously.
9414  *
9415  * **RV32 Description**:\n
9416  * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of Rs1 with
9417  * the top 16-bit Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1
9418  * with the bottom 16-bit Q15 content of Rs2.
9419  * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of Rs1 with the bottom 16-bit
9420  * Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1 with the top 16-
9421  * bit Q15 content of Rs2.
9422  * The two Q30 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1),
9423  * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes
9424  * register 2d and 2d+1.
9425  * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and
9426  * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1.
9427  *
9428  * **RV64 Description**:\n
9429  * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of the lower
9430  * 32-bit word in Rs1 with the top 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time,
9431  * multiply the bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the bottom 16-bit Q15
9432  * content of the lower 32-bit word in Rs2.
9433  * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of the lower 32-bit word in Rs1
9434  * with the bottom 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time, multiply the
9435  * bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the top 16-bit Q15 content of the
9436  * lower 32-bit word in Rs2.
9437  * The two 32-bit Q30 results are then written into Rd. The result calculated from the top 16-bit of the
9438  * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of
9439  * the lower 32-bit word in Rs1 is written to Rd.W[0]
9440  *
9441  * **Operations**:\n
9442  * ~~~
9443  * * RV32:
9444  * if (is `SMUL16`) {
9445  *   op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
9446  *   op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
9447  * } else if (is `SMULX16`) {
9448  *   op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
9449  *   op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
9450  * }
9451  * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
9452  *   res = aop s* bop;
9453  * }
9454  * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
9455  * R[t_H] = rest;
9456  * R[t_L] = resb;
9457  * * RV64:
9458  * if (is `SMUL16`) {
9459  *   op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
9460  *   op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
9461  * } else if (is `SMULX16`) {
9462  *   op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
9463  *   op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
9464  * }
9465  * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
9466  *   res = aop s* bop;
9467  * }
9468  * Rd.W[1] = rest;
9469  * Rd.W[0] = resb;
9470  * ~~~
9471  *
9472  * \param [in]  a    unsigned int type of value stored in a
9473  * \param [in]  b    unsigned int type of value stored in b
9474  * \return value stored in unsigned long long type
9475  */
__RV_SMUL16(unsigned int a,unsigned int b)9476 __STATIC_FORCEINLINE unsigned long long __RV_SMUL16(unsigned int a, unsigned int b)
9477 {
9478     register unsigned long long result;
9479     __ASM volatile("smul16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
9480     return result;
9481 }
9482 /* ===== Inline Function End for 3.123.1. SMUL16 ===== */
9483 
9484 /* ===== Inline Function Start for 3.123.2. SMULX16 ===== */
9485 /**
9486  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
9487  * \brief SMULX16 (SIMD Signed Crossed 16-bit Multiply)
9488  * \details
9489  * **Type**: SIMD
9490  *
9491  * **Syntax**:\n
9492  * ~~~
9493  * SMUL16 Rd, Rs1, Rs2
9494  * SMULX16 Rd, Rs1, Rs2
9495  * ~~~
9496  *
9497  * **Purpose**:\n
9498  * Do signed 16-bit multiplications and generate two 32-bit results simultaneously.
9499  *
9500  * **RV32 Description**:\n
9501  * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of Rs1 with
9502  * the top 16-bit Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1
9503  * with the bottom 16-bit Q15 content of Rs2.
9504  * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of Rs1 with the bottom 16-bit
9505  * Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1 with the top 16-
9506  * bit Q15 content of Rs2.
9507  * The two Q30 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1),
9508  * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes
9509  * register 2d and 2d+1.
9510  * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and
9511  * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1.
9512  *
9513  * **RV64 Description**:\n
9514  * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of the lower
9515  * 32-bit word in Rs1 with the top 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time,
9516  * multiply the bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the bottom 16-bit Q15
9517  * content of the lower 32-bit word in Rs2.
9518  * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of the lower 32-bit word in Rs1
9519  * with the bottom 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time, multiply the
9520  * bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the top 16-bit Q15 content of the
9521  * lower 32-bit word in Rs2.
9522  * The two 32-bit Q30 results are then written into Rd. The result calculated from the top 16-bit of the
9523  * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of
9524  * the lower 32-bit word in Rs1 is written to Rd.W[0]
9525  *
9526  * **Operations**:\n
9527  * ~~~
9528  * * RV32:
9529  * if (is `SMUL16`) {
9530  *   op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
9531  *   op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
9532  * } else if (is `SMULX16`) {
9533  *   op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
9534  *   op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
9535  * }
9536  * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
9537  *   res = aop s* bop;
9538  * }
9539  * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
9540  * R[t_H] = rest;
9541  * R[t_L] = resb;
9542  * * RV64:
9543  * if (is `SMUL16`) {
9544  *   op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
9545  *   op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
9546  * } else if (is `SMULX16`) {
9547  *   op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
9548  *   op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
9549  * }
9550  * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
9551  *   res = aop s* bop;
9552  * }
9553  * Rd.W[1] = rest;
9554  * Rd.W[0] = resb;
9555  * ~~~
9556  *
9557  * \param [in]  a    unsigned int type of value stored in a
9558  * \param [in]  b    unsigned int type of value stored in b
9559  * \return value stored in unsigned long long type
9560  */
__RV_SMULX16(unsigned int a,unsigned int b)9561 __STATIC_FORCEINLINE unsigned long long __RV_SMULX16(unsigned int a, unsigned int b)
9562 {
9563     register unsigned long long result;
9564     __ASM volatile("smulx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
9565     return result;
9566 }
9567 /* ===== Inline Function End for 3.123.2. SMULX16 ===== */
9568 
9569 /* ===== Inline Function Start for 3.124. SRA.u ===== */
9570 /**
9571  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
9572  * \brief SRA.u (Rounding Shift Right Arithmetic)
9573  * \details
9574  * **Type**: DSP
9575  *
9576  * **Syntax**:\n
9577  * ~~~
9578  * SRA.u Rd, Rs1, Rs2
9579  * ~~~
9580  *
9581  * **Purpose**:\n
9582  * Perform an arithmetic right shift operation with rounding. The shift amount is a variable
9583  * from a GPR.
9584  *
9585  * **Description**:\n
9586  * This instruction right-shifts the content of Rs1 arithmetically. The shifted out bits are
9587  * filled with the sign-bit and the shift amount is specified by the low-order 5-bits (RV32) or 6-bits
9588  * (RV64) of the Rs2 register. For the rounding operation, a value of 1 is added to the most significant
9589  * discarded bit of the data to calculate the final result. And the result is written to Rd.
9590  *
9591  * **Operations**:\n
9592  * ~~~
9593  * * RV32:
9594  * sa = Rs2[4:0];
9595  * if (sa > 0) {
9596  *   res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1;
9597  *   Rd = res[31:0];
9598  * } else {
9599  *   Rd = Rs1;
9600  * }
9601  * * RV64:
9602  * sa = Rs2[5:0];
9603  * if (sa > 0) {
9604  *   res[63:-1] = SE65(Rs1[63:(sa-1)]) + 1;
9605  *   Rd = res[63:0];
9606  * } else {
9607  *   Rd = Rs1;
9608  * }
9609  * ~~~
9610  *
9611  * \param [in]  a    long type of value stored in a
9612  * \param [in]  b    unsigned int type of value stored in b
9613  * \return value stored in long type
9614  */
__RV_SRA_U(long a,unsigned int b)9615 __STATIC_FORCEINLINE long __RV_SRA_U(long a, unsigned int b)
9616 {
9617     register long result;
9618     __ASM volatile("sra.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
9619     return result;
9620 }
9621 /* ===== Inline Function End for 3.124. SRA.u ===== */
9622 
9623 /* ===== Inline Function Start for 3.125. SRAI.u ===== */
9624 /**
9625  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
9626  * \brief SRAI.u (Rounding Shift Right Arithmetic Immediate)
9627  * \details
9628  * **Type**: DSP
9629  *
9630  * **Syntax**:\n
9631  * ~~~
9632  * SRAI.u Rd, Rs1, imm6u[4:0] (RV32)
9633  * SRAI.u Rd, Rs1, imm6u[5:0] (RV64)
9634  * ~~~
9635  *
9636  * **Purpose**:\n
9637  * Perform an arithmetic right shift operation with rounding. The shift amount is an
9638  * immediate value.
9639  *
9640  * **Description**:\n
9641  * This instruction right-shifts the content of Rs1 arithmetically. The shifted out bits are
9642  * filled with the sign-bit and the shift amount is specified by the imm6u[4:0] (RV32) or imm6u[5:0]
9643  * (RV64) constant . For the rounding operation, a value of 1 is added to the most significant discarded
9644  * bit of the data to calculate the final result. And the result is written to Rd.
9645  *
9646  * **Operations**:\n
9647  * ~~~
9648  * * RV32:
9649  * sa = imm6u[4:0];
9650  * if (sa > 0) {
9651  *   res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1;
9652  *   Rd = res[31:0];
9653  * } else {
9654  *   Rd = Rs1;
9655  * }
9656  * * RV64:
9657  * sa = imm6u[5:0];
9658  * if (sa > 0) {
9659  *   res[63:-1] = SE65(Rs1[63:(sa-1)]) + 1;
9660  *   Rd = res[63:0];
9661  * } else {
9662  *   Rd = Rs1;
9663  * }
9664  * ~~~
9665  *
9666  * \param [in]  a    long type of value stored in a
9667  * \param [in]  b    unsigned int type of value stored in b
9668  * \return value stored in long type
9669  */
9670 #define __RV_SRAI_U(a, b)    \
9671     ({    \
9672         register long result;    \
9673         register long __a = (long)(a);    \
9674         __ASM volatile("srai.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
9675         result;    \
9676     })
9677 /* ===== Inline Function End for 3.125. SRAI.u ===== */
9678 
9679 /* ===== Inline Function Start for 3.126.1. SRA8 ===== */
9680 /**
9681  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
9682  * \brief SRA8 (SIMD 8-bit Shift Right Arithmetic)
9683  * \details
9684  * **Type**: SIMD
9685  *
9686  * **Syntax**:\n
9687  * ~~~
9688  * SRA8 Rd, Rs1, Rs2
9689  * SRA8.u Rd, Rs1, Rs2
9690  * ~~~
9691  *
9692  * **Purpose**:\n
9693  * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is a
9694  * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
9695  * results.
9696  *
9697  * **Description**:\n
9698  * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
9699  * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
9700  * 3-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
9701  * added to the most significant discarded bit of each 8-bit data element to calculate the final results.
9702  * And the results are written to Rd.
9703  *
9704  * **Operations**:\n
9705  * ~~~
9706  * sa = Rs2[2:0];
9707  * if (sa > 0) {
9708  *   if (`.u` form) { // SRA8.u
9709  *     res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
9710  *     Rd.B[x] = res[7:0];
9711  *   } else { // SRA8
9712  *     Rd.B[x] = SE8(Rd.B[x][7:sa])
9713  *   }
9714  * } else {
9715  *   Rd = Rs1;
9716  * }
9717  * for RV32: x=3...0,
9718  * for RV64: x=7...0
9719  * ~~~
9720  *
9721  * \param [in]  a    unsigned long type of value stored in a
9722  * \param [in]  b    unsigned int type of value stored in b
9723  * \return value stored in unsigned long type
9724  */
__RV_SRA8(unsigned long a,unsigned int b)9725 __STATIC_FORCEINLINE unsigned long __RV_SRA8(unsigned long a, unsigned int b)
9726 {
9727     register unsigned long result;
9728     __ASM volatile("sra8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
9729     return result;
9730 }
9731 /* ===== Inline Function End for 3.126.1. SRA8 ===== */
9732 
9733 /* ===== Inline Function Start for 3.126.2. SRA8.u ===== */
9734 /**
9735  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
9736  * \brief SRA8.u (SIMD 8-bit Rounding Shift Right Arithmetic)
9737  * \details
9738  * **Type**: SIMD
9739  *
9740  * **Syntax**:\n
9741  * ~~~
9742  * SRA8 Rd, Rs1, Rs2
9743  * SRA8.u Rd, Rs1, Rs2
9744  * ~~~
9745  *
9746  * **Purpose**:\n
9747  * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is a
9748  * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
9749  * results.
9750  *
9751  * **Description**:\n
9752  * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
9753  * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
9754  * 3-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
9755  * added to the most significant discarded bit of each 8-bit data element to calculate the final results.
9756  * And the results are written to Rd.
9757  *
9758  * **Operations**:\n
9759  * ~~~
9760  * sa = Rs2[2:0];
9761  * if (sa > 0) {
9762  *   if (`.u` form) { // SRA8.u
9763  *     res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
9764  *     Rd.B[x] = res[7:0];
9765  *   } else { // SRA8
9766  *     Rd.B[x] = SE8(Rd.B[x][7:sa])
9767  *   }
9768  * } else {
9769  *   Rd = Rs1;
9770  * }
9771  * for RV32: x=3...0,
9772  * for RV64: x=7...0
9773  * ~~~
9774  *
9775  * \param [in]  a    unsigned long type of value stored in a
9776  * \param [in]  b    unsigned int type of value stored in b
9777  * \return value stored in unsigned long type
9778  */
__RV_SRA8_U(unsigned long a,unsigned int b)9779 __STATIC_FORCEINLINE unsigned long __RV_SRA8_U(unsigned long a, unsigned int b)
9780 {
9781     register unsigned long result;
9782     __ASM volatile("sra8.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
9783     return result;
9784 }
9785 /* ===== Inline Function End for 3.126.2. SRA8.u ===== */
9786 
9787 /* ===== Inline Function Start for 3.127.1. SRAI8 ===== */
9788 /**
9789  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
9790  * \brief SRAI8 (SIMD 8-bit Shift Right Arithmetic Immediate)
9791  * \details
9792  * **Type**: SIMD
9793  *
9794  * **Syntax**:\n
9795  * ~~~
9796  * SRAI8 Rd, Rs1, imm3u
9797  * SRAI8.u Rd, Rs1, imm3u
9798  * ~~~
9799  *
9800  * **Purpose**:\n
9801  * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is an
9802  * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
9803  *
9804  * **Description**:\n
9805  * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
9806  * bits are filled with the sign-bit of the data elements. The shift amount is specified by the imm3u
9807  * constant. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
9808  * discarded bit of each 8-bit data element to calculate the final results. And the results are written to
9809  * Rd.
9810  *
9811  * **Operations**:\n
9812  * ~~~
9813  * sa = imm3u[2:0];
9814  * if (sa > 0) {
9815  *   if (`.u` form) { // SRA8.u
9816  *     res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
9817  *     Rd.B[x] = res[7:0];
9818  *   } else { // SRA8
9819  *     Rd.B[x] = SE8(Rd.B[x][7:sa])
9820  *   }
9821  * } else {
9822  *   Rd = Rs1;
9823  * }
9824  * for RV32: x=3...0,
9825  * for RV64: x=7...0
9826  * ~~~
9827  *
9828  * \param [in]  a    unsigned long type of value stored in a
9829  * \param [in]  b    unsigned int type of value stored in b
9830  * \return value stored in unsigned long type
9831  */
9832 #define __RV_SRAI8(a, b)    \
9833     ({    \
9834         register unsigned long result;    \
9835         register unsigned long __a = (unsigned long)(a);    \
9836         __ASM volatile("srai8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
9837         result;    \
9838     })
9839 /* ===== Inline Function End for 3.127.1. SRAI8 ===== */
9840 
9841 /* ===== Inline Function Start for 3.127.2. SRAI8.u ===== */
9842 /**
9843  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
9844  * \brief SRAI8.u (SIMD 8-bit Rounding Shift Right Arithmetic Immediate)
9845  * \details
9846  * **Type**: SIMD
9847  *
9848  * **Syntax**:\n
9849  * ~~~
9850  * SRAI8 Rd, Rs1, imm3u
9851  * SRAI8.u Rd, Rs1, imm3u
9852  * ~~~
9853  *
9854  * **Purpose**:\n
9855  * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is an
9856  * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
9857  *
9858  * **Description**:\n
9859  * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
9860  * bits are filled with the sign-bit of the data elements. The shift amount is specified by the imm3u
9861  * constant. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
9862  * discarded bit of each 8-bit data element to calculate the final results. And the results are written to
9863  * Rd.
9864  *
9865  * **Operations**:\n
9866  * ~~~
9867  * sa = imm3u[2:0];
9868  * if (sa > 0) {
9869  *   if (`.u` form) { // SRA8.u
9870  *     res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
9871  *     Rd.B[x] = res[7:0];
9872  *   } else { // SRA8
9873  *     Rd.B[x] = SE8(Rd.B[x][7:sa])
9874  *   }
9875  * } else {
9876  *   Rd = Rs1;
9877  * }
9878  * for RV32: x=3...0,
9879  * for RV64: x=7...0
9880  * ~~~
9881  *
9882  * \param [in]  a    unsigned long type of value stored in a
9883  * \param [in]  b    unsigned int type of value stored in b
9884  * \return value stored in unsigned long type
9885  */
9886 #define __RV_SRAI8_U(a, b)    \
9887     ({    \
9888         register unsigned long result;    \
9889         register unsigned long __a = (unsigned long)(a);    \
9890         __ASM volatile("srai8.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
9891         result;    \
9892     })
9893 /* ===== Inline Function End for 3.127.2. SRAI8.u ===== */
9894 
9895 /* ===== Inline Function Start for 3.128.1. SRA16 ===== */
9896 /**
9897  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
9898  * \brief SRA16 (SIMD 16-bit Shift Right Arithmetic)
9899  * \details
9900  * **Type**: SIMD
9901  *
9902  * **Syntax**:\n
9903  * ~~~
9904  * SRA16 Rd, Rs1, Rs2
9905  * SRA16.u Rd, Rs1, Rs2
9906  * ~~~
9907  *
9908  * **Purpose**:\n
9909  * Do 16-bit element arithmetic right shift operations simultaneously. The shift amount is a
9910  * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
9911  * results.
9912  *
9913  * **Description**:\n
9914  * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
9915  * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
9916  * 4-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
9917  * added to the most significant discarded bit of each 16-bit data element to calculate the final results.
9918  * And the results are written to Rd.
9919  *
9920  * **Operations**:\n
9921  * ~~~
9922  * sa = Rs2[3:0];
9923  * if (sa != 0) {
9924  *   if (`.u` form) { // SRA16.u
9925  *     res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
9926  *     Rd.H[x] = res[15:0];
9927  *   } else { // SRA16
9928  *     Rd.H[x] = SE16(Rs1.H[x][15:sa])
9929  *   }
9930  * } else {
9931  *   Rd = Rs1;
9932  * }
9933  * for RV32: x=1...0,
9934  * for RV64: x=3...0
9935  * ~~~
9936  *
9937  * \param [in]  a    unsigned long type of value stored in a
9938  * \param [in]  b    unsigned long type of value stored in b
9939  * \return value stored in unsigned long type
9940  */
__RV_SRA16(unsigned long a,unsigned long b)9941 __STATIC_FORCEINLINE unsigned long __RV_SRA16(unsigned long a, unsigned long b)
9942 {
9943     register unsigned long result;
9944     __ASM volatile("sra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
9945     return result;
9946 }
9947 /* ===== Inline Function End for 3.128.1. SRA16 ===== */
9948 
9949 /* ===== Inline Function Start for 3.128.2. SRA16.u ===== */
9950 /**
9951  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
9952  * \brief SRA16.u (SIMD 16-bit Rounding Shift Right Arithmetic)
9953  * \details
9954  * **Type**: SIMD
9955  *
9956  * **Syntax**:\n
9957  * ~~~
9958  * SRA16 Rd, Rs1, Rs2
9959  * SRA16.u Rd, Rs1, Rs2
9960  * ~~~
9961  *
9962  * **Purpose**:\n
9963  * Do 16-bit element arithmetic right shift operations simultaneously. The shift amount is a
9964  * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
9965  * results.
9966  *
9967  * **Description**:\n
9968  * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
9969  * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
9970  * 4-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
9971  * added to the most significant discarded bit of each 16-bit data element to calculate the final results.
9972  * And the results are written to Rd.
9973  *
9974  * **Operations**:\n
9975  * ~~~
9976  * sa = Rs2[3:0];
9977  * if (sa != 0) {
9978  *   if (`.u` form) { // SRA16.u
9979  *     res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
9980  *     Rd.H[x] = res[15:0];
9981  *   } else { // SRA16
9982  *     Rd.H[x] = SE16(Rs1.H[x][15:sa])
9983  *   }
9984  * } else {
9985  *   Rd = Rs1;
9986  * }
9987  * for RV32: x=1...0,
9988  * for RV64: x=3...0
9989  * ~~~
9990  *
9991  * \param [in]  a    unsigned long type of value stored in a
9992  * \param [in]  b    unsigned long type of value stored in b
9993  * \return value stored in unsigned long type
9994  */
__RV_SRA16_U(unsigned long a,unsigned long b)9995 __STATIC_FORCEINLINE unsigned long __RV_SRA16_U(unsigned long a, unsigned long b)
9996 {
9997     register unsigned long result;
9998     __ASM volatile("sra16.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
9999     return result;
10000 }
10001 /* ===== Inline Function End for 3.128.2. SRA16.u ===== */
10002 
10003 /* ===== Inline Function Start for 3.129.1. SRAI16 ===== */
10004 /**
10005  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
10006  * \brief SRAI16 (SIMD 16-bit Shift Right Arithmetic Immediate)
10007  * \details
10008  * **Type**: SIMD
10009  *
10010  * **Syntax**:\n
10011  * ~~~
10012  * SRAI16 Rd, Rs1, imm4u
10013  * SRAI16.u Rd, Rs1, imm4u
10014  * ~~~
10015  *
10016  * **Purpose**:\n
10017  * Do 16-bit elements arithmetic right shift operations simultaneously. The shift amount is
10018  * an immediate value. The `.u` form performs additional rounding up operations on the shifted
10019  * results.
10020  *
10021  * **Description**:\n
10022  * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
10023  * bits are filled with the sign-bit of the 16-bit data elements. The shift amount is specified by the
10024  * imm4u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most
10025  * significant discarded bit of each 16-bit data to calculate the final results. And the results are written
10026  * to Rd.
10027  *
10028  * **Operations**:\n
10029  * ~~~
10030  * sa = imm4u[3:0];
10031  * if (sa > 0) {
10032  *   if (`.u` form) { // SRAI16.u
10033  *     res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
10034  *     Rd.H[x] = res[15:0];
10035  *   } else { // SRAI16
10036  *     Rd.H[x] = SE16(Rs1.H[x][15:sa]);
10037  *   }
10038  * } else {
10039  *   Rd = Rs1;
10040  * }
10041  * for RV32: x=1...0,
10042  * for RV64: x=3...0
10043  * ~~~
10044  *
10045  * \param [in]  a    unsigned long type of value stored in a
10046  * \param [in]  b    unsigned long type of value stored in b
10047  * \return value stored in unsigned long type
10048  */
10049 #define __RV_SRAI16(a, b)    \
10050     ({    \
10051         register unsigned long result;    \
10052         register unsigned long __a = (unsigned long)(a);    \
10053         __ASM volatile("srai16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
10054         result;    \
10055     })
10056 /* ===== Inline Function End for 3.129.1. SRAI16 ===== */
10057 
10058 /* ===== Inline Function Start for 3.129.2. SRAI16.u ===== */
10059 /**
10060  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
10061  * \brief SRAI16.u (SIMD 16-bit Rounding Shift Right Arithmetic Immediate)
10062  * \details
10063  * **Type**: SIMD
10064  *
10065  * **Syntax**:\n
10066  * ~~~
10067  * SRAI16 Rd, Rs1, imm4u
10068  * SRAI16.u Rd, Rs1, imm4u
10069  * ~~~
10070  *
10071  * **Purpose**:\n
10072  * Do 16-bit elements arithmetic right shift operations simultaneously. The shift amount is
10073  * an immediate value. The `.u` form performs additional rounding up operations on the shifted
10074  * results.
10075  *
10076  * **Description**:\n
10077  * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
10078  * bits are filled with the sign-bit of the 16-bit data elements. The shift amount is specified by the
10079  * imm4u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most
10080  * significant discarded bit of each 16-bit data to calculate the final results. And the results are written
10081  * to Rd.
10082  *
10083  * **Operations**:\n
10084  * ~~~
10085  * sa = imm4u[3:0];
10086  * if (sa > 0) {
10087  *   if (`.u` form) { // SRAI16.u
10088  *     res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
10089  *     Rd.H[x] = res[15:0];
10090  *   } else { // SRAI16
10091  *     Rd.H[x] = SE16(Rs1.H[x][15:sa]);
10092  *   }
10093  * } else {
10094  *   Rd = Rs1;
10095  * }
10096  * for RV32: x=1...0,
10097  * for RV64: x=3...0
10098  * ~~~
10099  *
10100  * \param [in]  a    unsigned long type of value stored in a
10101  * \param [in]  b    unsigned long type of value stored in b
10102  * \return value stored in unsigned long type
10103  */
10104 #define __RV_SRAI16_U(a, b)    \
10105     ({    \
10106         register unsigned long result;    \
10107         register unsigned long __a = (unsigned long)(a);    \
10108         __ASM volatile("srai16.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
10109         result;    \
10110     })
10111 /* ===== Inline Function End for 3.129.2. SRAI16.u ===== */
10112 
10113 /* ===== Inline Function Start for 3.130.1. SRL8 ===== */
10114 /**
10115  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
10116  * \brief SRL8 (SIMD 8-bit Shift Right Logical)
10117  * \details
10118  * **Type**: SIMD
10119  *
10120  * **Syntax**:\n
10121  * ~~~
10122  * SRL8 Rt, Ra, Rb
10123  * SRL8.u Rt, Ra, Rb
10124  * ~~~
10125  *
10126  * **Purpose**:\n
10127  * Do 8-bit elements logical right shift operations simultaneously. The shift amount is a
10128  * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
10129  * results.
10130  *
10131  * **Description**:\n
10132  * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are
10133  * filled with zero. The shift amount is specified by the low-order 3-bits of the value in the Rs2 register.
10134  * For the rounding operation of the `.u` form, a value of 1 is added to the most significant discarded
10135  * bit of each 8-bit data element to calculate the final results. And the results are written to Rd.
10136  *
10137  * **Operations**:\n
10138  * ~~~
10139  * sa = Rs2[2:0];
10140  * if (sa > 0) {
10141  *   if (`.u` form) { // SRL8.u
10142  *     res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1;
10143  *     Rd.B[x] = res[8:1];
10144  *   } else { // SRL8
10145  *     Rd.B[x] = ZE8(Rs1.B[x][7:sa]);
10146  *   }
10147  * } else {
10148  *   Rd = Rs1;
10149  * }
10150  * for RV32: x=3...0,
10151  * for RV64: x=7...0
10152  * ~~~
10153  *
10154  * \param [in]  a    unsigned long type of value stored in a
10155  * \param [in]  b    unsigned int type of value stored in b
10156  * \return value stored in unsigned long type
10157  */
__RV_SRL8(unsigned long a,unsigned int b)10158 __STATIC_FORCEINLINE unsigned long __RV_SRL8(unsigned long a, unsigned int b)
10159 {
10160     register unsigned long result;
10161     __ASM volatile("srl8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
10162     return result;
10163 }
10164 /* ===== Inline Function End for 3.130.1. SRL8 ===== */
10165 
10166 /* ===== Inline Function Start for 3.130.2. SRL8.u ===== */
10167 /**
10168  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
10169  * \brief SRL8.u (SIMD 8-bit Rounding Shift Right Logical)
10170  * \details
10171  * **Type**: SIMD
10172  *
10173  * **Syntax**:\n
10174  * ~~~
10175  * SRL8 Rt, Ra, Rb
10176  * SRL8.u Rt, Ra, Rb
10177  * ~~~
10178  *
10179  * **Purpose**:\n
10180  * Do 8-bit elements logical right shift operations simultaneously. The shift amount is a
10181  * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
10182  * results.
10183  *
10184  * **Description**:\n
10185  * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are
10186  * filled with zero. The shift amount is specified by the low-order 3-bits of the value in the Rs2 register.
10187  * For the rounding operation of the `.u` form, a value of 1 is added to the most significant discarded
10188  * bit of each 8-bit data element to calculate the final results. And the results are written to Rd.
10189  *
10190  * **Operations**:\n
10191  * ~~~
10192  * sa = Rs2[2:0];
10193  * if (sa > 0) {
10194  *   if (`.u` form) { // SRL8.u
10195  *     res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1;
10196  *     Rd.B[x] = res[8:1];
10197  *   } else { // SRL8
10198  *     Rd.B[x] = ZE8(Rs1.B[x][7:sa]);
10199  *   }
10200  * } else {
10201  *   Rd = Rs1;
10202  * }
10203  * for RV32: x=3...0,
10204  * for RV64: x=7...0
10205  * ~~~
10206  *
10207  * \param [in]  a    unsigned long type of value stored in a
10208  * \param [in]  b    unsigned int type of value stored in b
10209  * \return value stored in unsigned long type
10210  */
__RV_SRL8_U(unsigned long a,unsigned int b)10211 __STATIC_FORCEINLINE unsigned long __RV_SRL8_U(unsigned long a, unsigned int b)
10212 {
10213     register unsigned long result;
10214     __ASM volatile("srl8.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
10215     return result;
10216 }
10217 /* ===== Inline Function End for 3.130.2. SRL8.u ===== */
10218 
10219 /* ===== Inline Function Start for 3.131.1. SRLI8 ===== */
10220 /**
10221  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
10222  * \brief SRLI8 (SIMD 8-bit Shift Right Logical Immediate)
10223  * \details
10224  * **Type**: SIMD
10225  *
10226  * **Syntax**:\n
10227  * ~~~
10228  * SRLI8 Rt, Ra, imm3u
10229  * SRLI8.u Rt, Ra, imm3u
10230  * ~~~
10231  *
10232  * **Purpose**:\n
10233  * Do 8-bit elements logical right shift operations simultaneously. The shift amount is an
10234  * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
10235  *
10236  * **Description**:\n
10237  * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are
10238  * filled with zero. The shift amount is specified by the imm3u constant. For the rounding operation of
10239  * the `.u` form, a value of 1 is added to the most significant discarded bit of each 8-bit data element to
10240  * calculate the final results. And the results are written to Rd.
10241  *
10242  * **Operations**:\n
10243  * ~~~
10244  * sa = imm3u[2:0];
10245  * if (sa > 0) {
10246  *   if (`.u` form) { // SRLI8.u
10247  *     res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1;
10248  *     Rd.B[x] = res[8:1];
10249  *   } else { // SRLI8
10250  *     Rd.B[x] = ZE8(Rs1.B[x][7:sa]);
10251  *   }
10252  * } else {
10253  *   Rd = Rs1;
10254  * }
10255  * for RV32: x=3...0,
10256  * for RV64: x=7...0
10257  * ~~~
10258  *
10259  * \param [in]  a    unsigned long type of value stored in a
10260  * \param [in]  b    unsigned int type of value stored in b
10261  * \return value stored in unsigned long type
10262  */
10263 #define __RV_SRLI8(a, b)    \
10264     ({    \
10265         register unsigned long result;    \
10266         register unsigned long __a = (unsigned long)(a);    \
10267         __ASM volatile("srli8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
10268         result;    \
10269     })
10270 /* ===== Inline Function End for 3.131.1. SRLI8 ===== */
10271 
10272 /* ===== Inline Function Start for 3.131.2. SRLI8.u ===== */
10273 /**
10274  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
10275  * \brief SRLI8.u (SIMD 8-bit Rounding Shift Right Logical Immediate)
10276  * \details
10277  * **Type**: SIMD
10278  *
10279  * **Syntax**:\n
10280  * ~~~
10281  * SRLI8 Rt, Ra, imm3u
10282  * SRLI8.u Rt, Ra, imm3u
10283  * ~~~
10284  *
10285  * **Purpose**:\n
10286  * Do 8-bit elements logical right shift operations simultaneously. The shift amount is an
10287  * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
10288  *
10289  * **Description**:\n
10290  * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are
10291  * filled with zero. The shift amount is specified by the imm3u constant. For the rounding operation of
10292  * the `.u` form, a value of 1 is added to the most significant discarded bit of each 8-bit data element to
10293  * calculate the final results. And the results are written to Rd.
10294  *
10295  * **Operations**:\n
10296  * ~~~
10297  * sa = imm3u[2:0];
10298  * if (sa > 0) {
10299  *   if (`.u` form) { // SRLI8.u
10300  *     res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1;
10301  *     Rd.B[x] = res[8:1];
10302  *   } else { // SRLI8
10303  *     Rd.B[x] = ZE8(Rs1.B[x][7:sa]);
10304  *   }
10305  * } else {
10306  *   Rd = Rs1;
10307  * }
10308  * for RV32: x=3...0,
10309  * for RV64: x=7...0
10310  * ~~~
10311  *
10312  * \param [in]  a    unsigned long type of value stored in a
10313  * \param [in]  b    unsigned int type of value stored in b
10314  * \return value stored in unsigned long type
10315  */
10316 #define __RV_SRLI8_U(a, b)    \
10317     ({    \
10318         register unsigned long result;    \
10319         register unsigned long __a = (unsigned long)(a);    \
10320         __ASM volatile("srli8.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
10321         result;    \
10322     })
10323 /* ===== Inline Function End for 3.131.2. SRLI8.u ===== */
10324 
10325 /* ===== Inline Function Start for 3.132.1. SRL16 ===== */
10326 /**
10327  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
10328  * \brief SRL16 (SIMD 16-bit Shift Right Logical)
10329  * \details
10330  * **Type**: SIMD
10331  *
10332  * **Syntax**:\n
10333  * ~~~
10334  * SRL16 Rt, Ra, Rb
10335  *  SRL16.u Rt, Ra, Rb
10336  * ~~~
10337  *
10338  * **Purpose**:\n
10339  * Do 16-bit elements logical right shift operations simultaneously. The shift amount is a variable from a GPR. The `.u` form performs additional rounding upoperations on the shifted results.
10340  *
10341  * **Description**:\n
10342  * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
10343  * are filled with zero. The shift amount is specified by the low-order 4-bits of the value in the Rs2
10344  * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
10345  * discarded bit of each 16-bit data element to calculate the final results. And the results are written to
10346  * Rd.
10347  *
10348  * **Operations**:\n
10349  * ~~~
10350  * sa = Rs2[3:0];
10351  * if (sa > 0) {
10352  *   if (`.u` form) { // SRL16.u
10353  *     res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1;
10354  *     Rd.H[x] = res[16:1];
10355  *   } else { // SRL16
10356  *     Rd.H[x] = ZE16(Rs1.H[x][15:sa]);
10357  *   }
10358  * } else {
10359  *   Rd = Rs1;
10360  * }
10361  * for RV32: x=1...0,
10362  * for RV64: x=3...0
10363  * ~~~
10364  *
10365  * \param [in]  a    unsigned long type of value stored in a
10366  * \param [in]  b    unsigned int type of value stored in b
10367  * \return value stored in unsigned long type
10368  */
__RV_SRL16(unsigned long a,unsigned int b)10369 __STATIC_FORCEINLINE unsigned long __RV_SRL16(unsigned long a, unsigned int b)
10370 {
10371     register unsigned long result;
10372     __ASM volatile("srl16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
10373     return result;
10374 }
10375 /* ===== Inline Function End for 3.132.1. SRL16 ===== */
10376 
10377 /* ===== Inline Function Start for 3.132.2. SRL16.u ===== */
10378 /**
10379  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
10380  * \brief SRL16.u (SIMD 16-bit Rounding Shift Right Logical)
10381  * \details
10382  * **Type**: SIMD
10383  *
10384  * **Syntax**:\n
10385  * ~~~
10386  * SRL16 Rt, Ra, Rb
10387  *  SRL16.u Rt, Ra, Rb
10388  * ~~~
10389  *
10390  * **Purpose**:\n
10391  * Do 16-bit elements logical right shift operations simultaneously. The shift amount is a variable from a GPR. The `.u` form performs additional rounding upoperations on the shifted results.
10392  *
10393  * **Description**:\n
10394  * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
10395  * are filled with zero. The shift amount is specified by the low-order 4-bits of the value in the Rs2
10396  * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
10397  * discarded bit of each 16-bit data element to calculate the final results. And the results are written to
10398  * Rd.
10399  *
10400  * **Operations**:\n
10401  * ~~~
10402  * sa = Rs2[3:0];
10403  * if (sa > 0) {
10404  *   if (`.u` form) { // SRL16.u
10405  *     res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1;
10406  *     Rd.H[x] = res[16:1];
10407  *   } else { // SRL16
10408  *     Rd.H[x] = ZE16(Rs1.H[x][15:sa]);
10409  *   }
10410  * } else {
10411  *   Rd = Rs1;
10412  * }
10413  * for RV32: x=1...0,
10414  * for RV64: x=3...0
10415  * ~~~
10416  *
10417  * \param [in]  a    unsigned long type of value stored in a
10418  * \param [in]  b    unsigned int type of value stored in b
10419  * \return value stored in unsigned long type
10420  */
__RV_SRL16_U(unsigned long a,unsigned int b)10421 __STATIC_FORCEINLINE unsigned long __RV_SRL16_U(unsigned long a, unsigned int b)
10422 {
10423     register unsigned long result;
10424     __ASM volatile("srl16.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
10425     return result;
10426 }
10427 /* ===== Inline Function End for 3.132.2. SRL16.u ===== */
10428 
10429 /* ===== Inline Function Start for 3.133.1. SRLI16 ===== */
10430 /**
10431  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
10432  * \brief SRLI16 (SIMD 16-bit Shift Right Logical Immediate)
10433  * \details
10434  * **Type**: SIMD
10435  *
10436  * **Syntax**:\n
10437  * ~~~
10438  * SRLI16 Rt, Ra, imm4u
10439  * SRLI16.u Rt, Ra, imm4u
10440  * ~~~
10441  *
10442  * **Purpose**:\n
10443  * Do 16-bit elements logical right shift operations simultaneously. The shift amount is an
10444  * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
10445  *
10446  * **Description**:\n
10447  * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
10448  * are filled with zero. The shift amount is specified by the imm4u constant. For the rounding
10449  * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 16-bit
10450  * data element to calculate the final results. And the results are written to Rd.
10451  *
10452  * **Operations**:\n
10453  * ~~~
10454  * sa = imm4u;
10455  * if (sa > 0) {
10456  *   if (`.u` form) { // SRLI16.u
10457  *     res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1;
10458  *     Rd.H[x] = res[16:1];
10459  *   } else { // SRLI16
10460  *     Rd.H[x] = ZE16(Rs1.H[x][15:sa]);
10461  *   }
10462  * } else {
10463  *   Rd = Rs1;
10464  * }
10465  * for RV32: x=1...0,
10466  * for RV64: x=3...0
10467  * ~~~
10468  *
10469  * \param [in]  a    unsigned long type of value stored in a
10470  * \param [in]  b    unsigned int type of value stored in b
10471  * \return value stored in unsigned long type
10472  */
10473 #define __RV_SRLI16(a, b)    \
10474     ({    \
10475         register unsigned long result;    \
10476         register unsigned long __a = (unsigned long)(a);    \
10477         __ASM volatile("srli16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
10478         result;    \
10479     })
10480 /* ===== Inline Function End for 3.133.1. SRLI16 ===== */
10481 
10482 /* ===== Inline Function Start for 3.133.2. SRLI16.u ===== */
10483 /**
10484  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
10485  * \brief SRLI16.u (SIMD 16-bit Rounding Shift Right Logical Immediate)
10486  * \details
10487  * **Type**: SIMD
10488  *
10489  * **Syntax**:\n
10490  * ~~~
10491  * SRLI16 Rt, Ra, imm4u
10492  * SRLI16.u Rt, Ra, imm4u
10493  * ~~~
10494  *
10495  * **Purpose**:\n
10496  * Do 16-bit elements logical right shift operations simultaneously. The shift amount is an
10497  * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
10498  *
10499  * **Description**:\n
10500  * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
10501  * are filled with zero. The shift amount is specified by the imm4u constant. For the rounding
10502  * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 16-bit
10503  * data element to calculate the final results. And the results are written to Rd.
10504  *
10505  * **Operations**:\n
10506  * ~~~
10507  * sa = imm4u;
10508  * if (sa > 0) {
10509  *   if (`.u` form) { // SRLI16.u
10510  *     res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1;
10511  *     Rd.H[x] = res[16:1];
10512  *   } else { // SRLI16
10513  *     Rd.H[x] = ZE16(Rs1.H[x][15:sa]);
10514  *   }
10515  * } else {
10516  *   Rd = Rs1;
10517  * }
10518  * for RV32: x=1...0,
10519  * for RV64: x=3...0
10520  * ~~~
10521  *
10522  * \param [in]  a    unsigned long type of value stored in a
10523  * \param [in]  b    unsigned int type of value stored in b
10524  * \return value stored in unsigned long type
10525  */
10526 #define __RV_SRLI16_U(a, b)    \
10527     ({    \
10528         register unsigned long result;    \
10529         register unsigned long __a = (unsigned long)(a);    \
10530         __ASM volatile("srli16.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
10531         result;    \
10532     })
10533 /* ===== Inline Function End for 3.133.2. SRLI16.u ===== */
10534 
10535 /* ===== Inline Function Start for 3.134. STAS16 ===== */
10536 /**
10537  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
10538  * \brief STAS16 (SIMD 16-bit Straight Addition & Subtraction)
10539  * \details
10540  * **Type**: SIMD
10541  *
10542  * **Syntax**:\n
10543  * ~~~
10544  * STAS16 Rd, Rs1, Rs2
10545  * ~~~
10546  *
10547  * **Purpose**:\n
10548  * Do 16-bit integer element addition and 16-bit integer element subtraction in a 32-bit
10549  * chunk simultaneously. Operands are from corresponding positions in 32-bit chunks.
10550  *
10551  * **Description**:\n
10552  * This instruction adds the 16-bit integer element in [31:16] of 32-bit chunks in Rs1 with
10553  * the 16-bit integer element in [31:16] of 32-bit chunks in Rs2, and writes the result to [31:16] of 32-bit
10554  * chunks in Rd; at the same time, it subtracts the 16-bit integer element in [15:0] of 32-bit chunks in
10555  * Rs2 from the 16-bit integer element in [15:0] of 32-bit chunks, and writes the result to [15:0] of 32-
10556  * bit chunks in Rd.
10557  *
10558  * **Note**:\n
10559  * This instruction can be used for either signed or unsigned operations.
10560  *
10561  * **Operations**:\n
10562  * ~~~
10563  * Rd.W[x][31:16] = Rs1.W[x][31:16] + Rs2.W[x][31:16];
10564  * Rd.W[x][15:0] = Rs1.W[x][15:0] - Rs2.W[x][15:0];
10565  * for RV32, x=0
10566  * for RV64, x=1...0
10567  * ~~~
10568  *
10569  * \param [in]  a    unsigned long type of value stored in a
10570  * \param [in]  b    unsigned long type of value stored in b
10571  * \return value stored in unsigned long type
10572  */
__RV_STAS16(unsigned long a,unsigned long b)10573 __STATIC_FORCEINLINE unsigned long __RV_STAS16(unsigned long a, unsigned long b)
10574 {
10575     register unsigned long result;
10576     __ASM volatile("stas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
10577     return result;
10578 }
10579 /* ===== Inline Function End for 3.134. STAS16 ===== */
10580 
10581 /* ===== Inline Function Start for 3.135. STSA16 ===== */
10582 /**
10583  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
10584  * \brief STSA16 (SIMD 16-bit Straight Subtraction & Addition)
10585  * \details
10586  * **Type**: SIMD
10587  *
10588  * **Syntax**:\n
10589  * ~~~
10590  * STSA16 Rd, Rs1, Rs2
10591  * ~~~
10592  *
10593  * **Purpose**:\n
10594  * Do 16-bit integer element subtraction and 16-bit integer element addition in a 32-bit
10595  * chunk simultaneously. Operands are from corresponding positions in 32-bit chunks.
10596  *
10597  * **Description**:\n
10598  * This instruction subtracts the 16-bit integer element in [31:16] of 32-bit chunks in Rs2
10599  * from the 16-bit integer element in [31:16] of 32-bit chunks in Rs1, and writes the result to [31:16] of
10600  * 32-bit chunks in Rd; at the same time, it adds the 16-bit integer element in [15:0] of 32-bit chunks in
10601  * Rs2 with the 16-bit integer element in [15:0] of 32-bit chunks in Rs1, and writes the result to [15:0] of
10602  * 32-bit chunks in Rd.
10603  *
10604  * **Note**:\n
10605  * This instruction can be used for either signed or unsigned operations.
10606  *
10607  * **Operations**:\n
10608  * ~~~
10609  * Rd.W[x][31:16] = Rs1.W[x][31:16] - Rs2.W[x][31:16];
10610  * Rd.W[x][15:0] = Rs1.W[x][15:0] + Rs2.W[x][15:0];
10611  * for RV32, x=0
10612  * for RV64, x=1...0
10613  * ~~~
10614  *
10615  * \param [in]  a    unsigned long type of value stored in a
10616  * \param [in]  b    unsigned long type of value stored in b
10617  * \return value stored in unsigned long type
10618  */
__RV_STSA16(unsigned long a,unsigned long b)10619 __STATIC_FORCEINLINE unsigned long __RV_STSA16(unsigned long a, unsigned long b)
10620 {
10621     register unsigned long result;
10622     __ASM volatile("stsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
10623     return result;
10624 }
10625 /* ===== Inline Function End for 3.135. STSA16 ===== */
10626 
10627 /* ===== Inline Function Start for 3.136. SUB8 ===== */
10628 /**
10629  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
10630  * \brief SUB8 (SIMD 8-bit Subtraction)
10631  * \details
10632  * **Type**: SIMD
10633  *
10634  * **Syntax**:\n
10635  * ~~~
10636  * SUB8 Rd, Rs1, Rs2
10637  * ~~~
10638  *
10639  * **Purpose**:\n
10640  * Do 8-bit integer element subtractions simultaneously.
10641  *
10642  * **Description**:\n
10643  * This instruction subtracts the 8-bit integer elements in Rs2 from the 8-bit integer
10644  * elements in Rs1, and then writes the result to Rd.
10645  *
10646  * **Note**:\n
10647  * This instruction can be used for either signed or unsigned subtraction.
10648  *
10649  * **Operations**:\n
10650  * ~~~
10651  * Rd.B[x] = Rs1.B[x] - Rs2.B[x];
10652  * for RV32: x=3...0,
10653  * for RV64: x=7...0
10654  * ~~~
10655  *
10656  * \param [in]  a    unsigned long type of value stored in a
10657  * \param [in]  b    unsigned long type of value stored in b
10658  * \return value stored in unsigned long type
10659  */
__RV_SUB8(unsigned long a,unsigned long b)10660 __STATIC_FORCEINLINE unsigned long __RV_SUB8(unsigned long a, unsigned long b)
10661 {
10662     register unsigned long result;
10663     __ASM volatile("sub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
10664     return result;
10665 }
10666 /* ===== Inline Function End for 3.136. SUB8 ===== */
10667 
10668 /* ===== Inline Function Start for 3.137. SUB16 ===== */
10669 /**
10670  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
10671  * \brief SUB16 (SIMD 16-bit Subtraction)
10672  * \details
10673  * **Type**: SIMD
10674  *
10675  * **Syntax**:\n
10676  * ~~~
10677  * SUB16 Rd, Rs1, Rs2
10678  * ~~~
10679  *
10680  * **Purpose**:\n
10681  * Do 16-bit integer element subtractions simultaneously.
10682  *
10683  * **Description**:\n
10684  * This instruction subtracts the 16-bit integer elements in Rs2 from the 16-bit integer
10685  * elements in Rs1, and then writes the result to Rd.
10686  *
10687  * **Note**:\n
10688  * This instruction can be used for either signed or unsigned subtraction.
10689  *
10690  * **Operations**:\n
10691  * ~~~
10692  * Rd.H[x] = Rs1.H[x] - Rs2.H[x];
10693  * for RV32: x=1...0,
10694  * for RV64: x=3...0
10695  * ~~~
10696  *
10697  * \param [in]  a    unsigned long type of value stored in a
10698  * \param [in]  b    unsigned long type of value stored in b
10699  * \return value stored in unsigned long type
10700  */
__RV_SUB16(unsigned long a,unsigned long b)10701 __STATIC_FORCEINLINE unsigned long __RV_SUB16(unsigned long a, unsigned long b)
10702 {
10703     register unsigned long result;
10704     __ASM volatile("sub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
10705     return result;
10706 }
10707 /* ===== Inline Function End for 3.137. SUB16 ===== */
10708 
10709 /* ===== Inline Function Start for 3.138. SUB64 ===== */
10710 /**
10711  * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
10712  * \brief SUB64 (64-bit Subtraction)
10713  * \details
10714  * **Type**: DSP (64-bit Profile)
10715  *
10716  * **Syntax**:\n
10717  * ~~~
10718  * SUB64 Rd, Rs1, Rs2
10719  * ~~~
10720  *
10721  * **Purpose**:\n
10722  * Perform a 64-bit signed or unsigned integer subtraction.
10723  *
10724  * **RV32 Description**:\n
10725  * This instruction subtracts the 64-bit integer of an even/odd pair of registers
10726  * specified by Rs2(4,1) from the 64-bit integer of an even/odd pair of registers specified by Rs1(4,1),
10727  * and then writes the 64-bit result to an even/odd pair of registers specified by Rd(4,1).
10728  * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
10729  * includes register 2d and 2d+1.
10730  * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
10731  * register of the pair contains the low 32-bit of the operand.
10732  *
10733  * **RV64 Description**:\n
10734  * This instruction subtracts the 64-bit integer of Rs2 from the 64-bit integer of Rs1,
10735  * and then writes the 64-bit result to Rd.
10736  *
10737  * **Note**:\n
10738  * This instruction can be used for either signed or unsigned subtraction.
10739  *
10740  * **Operations**:\n
10741  * ~~~
10742  * * RV32:
10743  * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
10744  * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
10745  * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
10746  * R[t_H].R[t_L] = R[a_H].R[a_L] - R[b_H].R[b_L];
10747  * * RV64:
10748  * Rd = Rs1 - Rs2;
10749  * ~~~
10750  *
10751  * \param [in]  a    unsigned long long type of value stored in a
10752  * \param [in]  b    unsigned long long type of value stored in b
10753  * \return value stored in unsigned long long type
10754  */
__RV_SUB64(unsigned long long a,unsigned long long b)10755 __STATIC_FORCEINLINE unsigned long long __RV_SUB64(unsigned long long a, unsigned long long b)
10756 {
10757     register unsigned long long result;
10758     __ASM volatile("sub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
10759     return result;
10760 }
10761 /* ===== Inline Function End for 3.138. SUB64 ===== */
10762 
10763 /* ===== Inline Function Start for 3.139.1. SUNPKD810 ===== */
10764 /**
10765  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
10766  * \brief SUNPKD810 (Signed Unpacking Bytes 1 & 0)
10767  * \details
10768  * **Type**: DSP
10769  *
10770  * **Syntax**:\n
10771  * ~~~
10772  * SUNPKD8xy Rd, Rs1
10773  * xy = {10, 20, 30, 31, 32}
10774  * ~~~
10775  *
10776  * **Purpose**:\n
10777  * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
10778  * of 32-bit chunks in a register.
10779  *
10780  * **Description**:\n
10781  * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
10782  * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
10783  * chunks in Rd.
10784  *
10785  * **Operations**:\n
10786  * ~~~
10787  * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
10788  * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
10789  * // SUNPKD810, x=1,y=0
10790  * // SUNPKD820, x=2,y=0
10791  * // SUNPKD830, x=3,y=0
10792  * // SUNPKD831, x=3,y=1
10793  * // SUNPKD832, x=3,y=2
10794  * for RV32: m=0,
10795  * for RV64: m=1...0
10796  * ~~~
10797  *
10798  * \param [in]  a    unsigned long type of value stored in a
10799  * \return value stored in unsigned long type
10800  */
__RV_SUNPKD810(unsigned long a)10801 __STATIC_FORCEINLINE unsigned long __RV_SUNPKD810(unsigned long a)
10802 {
10803     register unsigned long result;
10804     __ASM volatile("sunpkd810 %0, %1" : "=r"(result) : "r"(a));
10805     return result;
10806 }
10807 /* ===== Inline Function End for 3.139.1. SUNPKD810 ===== */
10808 
10809 /* ===== Inline Function Start for 3.139.2. SUNPKD820 ===== */
10810 /**
10811  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
10812  * \brief SUNPKD820 (Signed Unpacking Bytes 2 & 0)
10813  * \details
10814  * **Type**: DSP
10815  *
10816  * **Syntax**:\n
10817  * ~~~
10818  * SUNPKD8xy Rd, Rs1
10819  * xy = {10, 20, 30, 31, 32}
10820  * ~~~
10821  *
10822  * **Purpose**:\n
10823  * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
10824  * of 32-bit chunks in a register.
10825  *
10826  * **Description**:\n
10827  * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
10828  * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
10829  * chunks in Rd.
10830  *
10831  * **Operations**:\n
10832  * ~~~
10833  * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
10834  * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
10835  * // SUNPKD810, x=1,y=0
10836  * // SUNPKD820, x=2,y=0
10837  * // SUNPKD830, x=3,y=0
10838  * // SUNPKD831, x=3,y=1
10839  * // SUNPKD832, x=3,y=2
10840  * for RV32: m=0,
10841  * for RV64: m=1...0
10842  * ~~~
10843  *
10844  * \param [in]  a    unsigned long type of value stored in a
10845  * \return value stored in unsigned long type
10846  */
__RV_SUNPKD820(unsigned long a)10847 __STATIC_FORCEINLINE unsigned long __RV_SUNPKD820(unsigned long a)
10848 {
10849     register unsigned long result;
10850     __ASM volatile("sunpkd820 %0, %1" : "=r"(result) : "r"(a));
10851     return result;
10852 }
10853 /* ===== Inline Function End for 3.139.2. SUNPKD820 ===== */
10854 
10855 /* ===== Inline Function Start for 3.139.3. SUNPKD830 ===== */
10856 /**
10857  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
10858  * \brief SUNPKD830 (Signed Unpacking Bytes 3 & 0)
10859  * \details
10860  * **Type**: DSP
10861  *
10862  * **Syntax**:\n
10863  * ~~~
10864  * SUNPKD8xy Rd, Rs1
10865  * xy = {10, 20, 30, 31, 32}
10866  * ~~~
10867  *
10868  * **Purpose**:\n
10869  * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
10870  * of 32-bit chunks in a register.
10871  *
10872  * **Description**:\n
10873  * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
10874  * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
10875  * chunks in Rd.
10876  *
10877  * **Operations**:\n
10878  * ~~~
10879  * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
10880  * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
10881  * // SUNPKD810, x=1,y=0
10882  * // SUNPKD820, x=2,y=0
10883  * // SUNPKD830, x=3,y=0
10884  * // SUNPKD831, x=3,y=1
10885  * // SUNPKD832, x=3,y=2
10886  * for RV32: m=0,
10887  * for RV64: m=1...0
10888  * ~~~
10889  *
10890  * \param [in]  a    unsigned long type of value stored in a
10891  * \return value stored in unsigned long type
10892  */
__RV_SUNPKD830(unsigned long a)10893 __STATIC_FORCEINLINE unsigned long __RV_SUNPKD830(unsigned long a)
10894 {
10895     register unsigned long result;
10896     __ASM volatile("sunpkd830 %0, %1" : "=r"(result) : "r"(a));
10897     return result;
10898 }
10899 /* ===== Inline Function End for 3.139.3. SUNPKD830 ===== */
10900 
10901 /* ===== Inline Function Start for 3.139.4. SUNPKD831 ===== */
10902 /**
10903  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
10904  * \brief SUNPKD831 (Signed Unpacking Bytes 3 & 1)
10905  * \details
10906  * **Type**: DSP
10907  *
10908  * **Syntax**:\n
10909  * ~~~
10910  * SUNPKD8xy Rd, Rs1
10911  * xy = {10, 20, 30, 31, 32}
10912  * ~~~
10913  *
10914  * **Purpose**:\n
10915  * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
10916  * of 32-bit chunks in a register.
10917  *
10918  * **Description**:\n
10919  * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
10920  * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
10921  * chunks in Rd.
10922  *
10923  * **Operations**:\n
10924  * ~~~
10925  * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
10926  * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
10927  * // SUNPKD810, x=1,y=0
10928  * // SUNPKD820, x=2,y=0
10929  * // SUNPKD830, x=3,y=0
10930  * // SUNPKD831, x=3,y=1
10931  * // SUNPKD832, x=3,y=2
10932  * for RV32: m=0,
10933  * for RV64: m=1...0
10934  * ~~~
10935  *
10936  * \param [in]  a    unsigned long type of value stored in a
10937  * \return value stored in unsigned long type
10938  */
__RV_SUNPKD831(unsigned long a)10939 __STATIC_FORCEINLINE unsigned long __RV_SUNPKD831(unsigned long a)
10940 {
10941     register unsigned long result;
10942     __ASM volatile("sunpkd831 %0, %1" : "=r"(result) : "r"(a));
10943     return result;
10944 }
10945 /* ===== Inline Function End for 3.139.4. SUNPKD831 ===== */
10946 
10947 /* ===== Inline Function Start for 3.139.5. SUNPKD832 ===== */
10948 /**
10949  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
10950  * \brief SUNPKD832 (Signed Unpacking Bytes 3 & 2)
10951  * \details
10952  * **Type**: DSP
10953  *
10954  * **Syntax**:\n
10955  * ~~~
10956  * SUNPKD8xy Rd, Rs1
10957  * xy = {10, 20, 30, 31, 32}
10958  * ~~~
10959  *
10960  * **Purpose**:\n
10961  * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
10962  * of 32-bit chunks in a register.
10963  *
10964  * **Description**:\n
10965  * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
10966  * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
10967  * chunks in Rd.
10968  *
10969  * **Operations**:\n
10970  * ~~~
10971  * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
10972  * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
10973  * // SUNPKD810, x=1,y=0
10974  * // SUNPKD820, x=2,y=0
10975  * // SUNPKD830, x=3,y=0
10976  * // SUNPKD831, x=3,y=1
10977  * // SUNPKD832, x=3,y=2
10978  * for RV32: m=0,
10979  * for RV64: m=1...0
10980  * ~~~
10981  *
10982  * \param [in]  a    unsigned long type of value stored in a
10983  * \return value stored in unsigned long type
10984  */
__RV_SUNPKD832(unsigned long a)10985 __STATIC_FORCEINLINE unsigned long __RV_SUNPKD832(unsigned long a)
10986 {
10987     register unsigned long result;
10988     __ASM volatile("sunpkd832 %0, %1" : "=r"(result) : "r"(a));
10989     return result;
10990 }
10991 /* ===== Inline Function End for 3.139.5. SUNPKD832 ===== */
10992 
10993 /* ===== Inline Function Start for 3.140. SWAP8 ===== */
10994 /**
10995  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
10996  * \brief SWAP8 (Swap Byte within Halfword)
10997  * \details
10998  * **Type**: DSP
10999  *
11000  * **Syntax**:\n
11001  * ~~~
11002  * SWAP8 Rd, Rs1
11003  * ~~~
11004  *
11005  * **Purpose**:\n
11006  * Swap the bytes within each halfword of a register.
11007  *
11008  * **Description**:\n
11009  * This instruction swaps the bytes within each halfword of Rs1 and writes the result to
11010  * Rd.
11011  *
11012  * **Operations**:\n
11013  * ~~~
11014  * Rd.H[x] = CONCAT(Rs1.H[x][7:0],Rs1.H[x][15:8]);
11015  * for RV32: x=1...0,
11016  * for RV64: x=3...0
11017  * ~~~
11018  *
11019  * \param [in]  a    unsigned long type of value stored in a
11020  * \return value stored in unsigned long type
11021  */
__RV_SWAP8(unsigned long a)11022 __STATIC_FORCEINLINE unsigned long __RV_SWAP8(unsigned long a)
11023 {
11024     register unsigned long result;
11025     __ASM volatile("swap8 %0, %1" : "=r"(result) : "r"(a));
11026     return result;
11027 }
11028 /* ===== Inline Function End for 3.140. SWAP8 ===== */
11029 
11030 /* ===== Inline Function Start for 3.141. SWAP16 ===== */
11031 /**
11032  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
11033  * \brief SWAP16 (Swap Halfword within Word)
11034  * \details
11035  * **Type**: DSP
11036  *
11037  * **Syntax**:\n
11038  * ~~~
11039  * SWAP16 Rd, Rs1
11040  * ~~~
11041  *
11042  * **Purpose**:\n
11043  * Swap the 16-bit halfwords within each word of a register.
11044  *
11045  * **Description**:\n
11046  * This instruction swaps the 16-bit halfwords within each word of Rs1 and writes the
11047  * result to Rd.
11048  *
11049  * **Operations**:\n
11050  * ~~~
11051  * Rd.W[x] = CONCAT(Rs1.W[x][15:0],Rs1.H[x][31:16]);
11052  * for RV32: x=0,
11053  * for RV64: x=1...0
11054  * ~~~
11055  *
11056  * \param [in]  a    unsigned long type of value stored in a
11057  * \return value stored in unsigned long type
11058  */
__RV_SWAP16(unsigned long a)11059 __STATIC_FORCEINLINE unsigned long __RV_SWAP16(unsigned long a)
11060 {
11061     register unsigned long result;
11062     __ASM volatile("swap16 %0, %1" : "=r"(result) : "r"(a));
11063     return result;
11064 }
11065 /* ===== Inline Function End for 3.141. SWAP16 ===== */
11066 
11067 /* ===== Inline Function Start for 3.142. UCLIP8 ===== */
11068 /**
11069  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
11070  * \brief UCLIP8 (SIMD 8-bit Unsigned Clip Value)
11071  * \details
11072  * **Type**: SIMD
11073  *
11074  * **Syntax**:\n
11075  * ~~~
11076  * UCLIP8 Rt, Ra, imm3u
11077  * ~~~
11078  *
11079  * **Purpose**:\n
11080  * Limit the 8-bit signed elements of a register into an unsigned range simultaneously.
11081  *
11082  * **Description**:\n
11083  * This instruction limits the 8-bit signed elements stored in Rs1 into an unsigned integer
11084  * range between 2^imm3u-1 and 0, and writes the limited results to Rd. For example, if imm3u is 3, the 8-
11085  * bit input values should be saturated between 7 and 0. If saturation is performed, set OV bit to 1.
11086  *
11087  * **Operations**:\n
11088  * ~~~
11089  * src = Rs1.H[x];
11090  * if (src > (2^imm3u)-1) {
11091  *   src = (2^imm3u)-1;
11092  *   OV = 1;
11093  * } else if (src < 0) {
11094  *   src = 0;
11095  *   OV = 1;
11096  * }
11097  * Rd.H[x] = src;
11098  * for RV32: x=3...0,
11099  * for RV64: x=7...0
11100  * ~~~
11101  *
11102  * \param [in]  a    unsigned long type of value stored in a
11103  * \param [in]  b    unsigned int type of value stored in b
11104  * \return value stored in unsigned long type
11105  */
11106 #define __RV_UCLIP8(a, b)    \
11107     ({    \
11108         register unsigned long result;    \
11109         register unsigned long __a = (unsigned long)(a);    \
11110         __ASM volatile("uclip8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
11111         result;    \
11112     })
11113 /* ===== Inline Function End for 3.142. UCLIP8 ===== */
11114 
11115 /* ===== Inline Function Start for 3.143. UCLIP16 ===== */
11116 /**
11117  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
11118  * \brief UCLIP16 (SIMD 16-bit Unsigned Clip Value)
11119  * \details
11120  * **Type**: SIMD
11121  *
11122  * **Syntax**:\n
11123  * ~~~
11124  * UCLIP16 Rt, Ra, imm4u
11125  * ~~~
11126  *
11127  * **Purpose**:\n
11128  * Limit the 16-bit signed elements of a register into an unsigned range simultaneously.
11129  *
11130  * **Description**:\n
11131  * This instruction limits the 16-bit signed elements stored in Rs1 into an unsigned
11132  * integer range between 2imm4u-1 and 0, and writes the limited results to Rd. For example, if imm4u is
11133  * 3, the 16-bit input values should be saturated between 7 and 0. If saturation is performed, set OV bit
11134  * to 1.
11135  *
11136  * **Operations**:\n
11137  * ~~~
11138  * src = Rs1.H[x];
11139  * if (src > (2^imm4u)-1) {
11140  *   src = (2^imm4u)-1;
11141  *   OV = 1;
11142  * } else if (src < 0) {
11143  *   src = 0;
11144  *   OV = 1;
11145  * }
11146  * Rd.H[x] = src;
11147  * for RV32: x=1...0,
11148  * for RV64: x=3...0
11149  * ~~~
11150  *
11151  * \param [in]  a    unsigned long type of value stored in a
11152  * \param [in]  b    unsigned int type of value stored in b
11153  * \return value stored in unsigned long type
11154  */
11155 #define __RV_UCLIP16(a, b)    \
11156     ({    \
11157         register unsigned long result;    \
11158         register unsigned long __a = (unsigned long)(a);    \
11159         __ASM volatile("uclip16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
11160         result;    \
11161     })
11162 /* ===== Inline Function End for 3.143. UCLIP16 ===== */
11163 
11164 /* ===== Inline Function Start for 3.144. UCLIP32 ===== */
11165 /**
11166  * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
11167  * \brief UCLIP32 (SIMD 32-bit Unsigned Clip Value)
11168  * \details
11169  * **Type**: SIMD
11170  *
11171  * **Syntax**:\n
11172  * ~~~
11173  * UCLIP32 Rd, Rs1, imm5u[4:0]
11174  * ~~~
11175  *
11176  * **Purpose**:\n
11177  * Limit the 32-bit signed integer elements of a register into an unsigned range
11178  * simultaneously.
11179  *
11180  * **Description**:\n
11181  * This instruction limits the 32-bit signed integer elements stored in Rs1 into an
11182  * unsigned integer range between 2imm5u-1 and 0, and writes the limited results to Rd. For example, if
11183  * imm5u is 3, the 32-bit input values should be saturated between 7 and 0. If saturation is performed,
11184  * set OV bit to 1.
11185  *
11186  * **Operations**:\n
11187  * ~~~
11188  * src = Rs1.W[x];
11189  * if (src > (2^imm5u)-1) {
11190  *   src = (2^imm5u)-1;
11191  *   OV = 1;
11192  * } else if (src < 0) {
11193  *   src = 0;
11194  *   OV = 1;
11195  * }
11196  * Rd.W[x] = src
11197  * for RV32: x=0,
11198  * for RV64: x=1...0
11199  * ~~~
11200  *
11201  * \param [in]  a    unsigned long type of value stored in a
11202  * \param [in]  b    unsigned int type of value stored in b
11203  * \return value stored in unsigned long type
11204  */
11205 #define __RV_UCLIP32(a, b)    \
11206     ({    \
11207         register unsigned long result;    \
11208         register unsigned long __a = (unsigned long)(a);    \
11209         __ASM volatile("uclip32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
11210         result;    \
11211     })
11212 /* ===== Inline Function End for 3.144. UCLIP32 ===== */
11213 
11214 /* ===== Inline Function Start for 3.145. UCMPLE8 ===== */
11215 /**
11216  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
11217  * \brief UCMPLE8 (SIMD 8-bit Unsigned Compare Less Than & Equal)
11218  * \details
11219  * **Type**: SIMD
11220  *
11221  * **Syntax**:\n
11222  * ~~~
11223  * UCMPLE8 Rd, Rs1, Rs2
11224  * ~~~
11225  *
11226  * **Purpose**:\n
11227  * Do 8-bit unsigned integer elements less than & equal comparisons simultaneously.
11228  *
11229  * **Description**:\n
11230  * This instruction compares the 8-bit unsigned integer elements in Rs1 with the 8-bit
11231  * unsigned integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it
11232  * is true, the result is 0xFF; otherwise, the result is 0x0. The four comparison results are written to
11233  * Rd.
11234  *
11235  * **Operations**:\n
11236  * ~~~
11237  * Rd.B[x] = (Rs1.B[x] <=u Rs2.B[x])? 0xff : 0x0;
11238  * for RV32: x=3...0,
11239  * for RV64: x=7...0
11240  * ~~~
11241  *
11242  * \param [in]  a    unsigned long type of value stored in a
11243  * \param [in]  b    unsigned long type of value stored in b
11244  * \return value stored in unsigned long type
11245  */
__RV_UCMPLE8(unsigned long a,unsigned long b)11246 __STATIC_FORCEINLINE unsigned long __RV_UCMPLE8(unsigned long a, unsigned long b)
11247 {
11248     register unsigned long result;
11249     __ASM volatile("ucmple8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
11250     return result;
11251 }
11252 /* ===== Inline Function End for 3.145. UCMPLE8 ===== */
11253 
11254 /* ===== Inline Function Start for 3.146. UCMPLE16 ===== */
11255 /**
11256  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
11257  * \brief UCMPLE16 (SIMD 16-bit Unsigned Compare Less Than & Equal)
11258  * \details
11259  * **Type**: SIMD
11260  *
11261  * **Syntax**:\n
11262  * ~~~
11263  * UCMPLE16 Rd, Rs1, Rs2
11264  * ~~~
11265  *
11266  * **Purpose**:\n
11267  * Do 16-bit unsigned integer elements less than & equal comparisons simultaneously.
11268  *
11269  * **Description**:\n
11270  * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit
11271  * unsigned integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it
11272  * is true, the result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are
11273  * written to Rd.
11274  *
11275  * **Operations**:\n
11276  * ~~~
11277  * Rd.H[x] = (Rs1.H[x] <=u Rs2.H[x])? 0xffff : 0x0;
11278  * for RV32: x=1...0,
11279  * for RV64: x=3...0
11280  * ~~~
11281  *
11282  * \param [in]  a    unsigned long type of value stored in a
11283  * \param [in]  b    unsigned long type of value stored in b
11284  * \return value stored in unsigned long type
11285  */
__RV_UCMPLE16(unsigned long a,unsigned long b)11286 __STATIC_FORCEINLINE unsigned long __RV_UCMPLE16(unsigned long a, unsigned long b)
11287 {
11288     register unsigned long result;
11289     __ASM volatile("ucmple16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
11290     return result;
11291 }
11292 /* ===== Inline Function End for 3.146. UCMPLE16 ===== */
11293 
11294 /* ===== Inline Function Start for 3.147. UCMPLT8 ===== */
11295 /**
11296  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
11297  * \brief UCMPLT8 (SIMD 8-bit Unsigned Compare Less Than)
11298  * \details
11299  * **Type**: SIMD
11300  *
11301  * **Syntax**:\n
11302  * ~~~
11303  * UCMPLT8 Rd, Rs1, Rs2
11304  * ~~~
11305  *
11306  * **Purpose**:\n
11307  * Do 8-bit unsigned integer elements less than comparisons simultaneously.
11308  *
11309  * **Description**:\n
11310  * This instruction compares the 8-bit unsigned integer elements in Rs1 with the 8-bit
11311  * unsigned integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the
11312  * result is 0xFF; otherwise, the result is 0x0. The element comparison results are written to Rd.
11313  *
11314  * **Operations**:\n
11315  * ~~~
11316  * Rd.B[x] = (Rs1.B[x] <u Rs2.B[x])? 0xff : 0x0;
11317  * for RV32: x=3...0,
11318  * for RV64: x=7...0
11319  * ~~~
11320  *
11321  * \param [in]  a    unsigned long type of value stored in a
11322  * \param [in]  b    unsigned long type of value stored in b
11323  * \return value stored in unsigned long type
11324  */
__RV_UCMPLT8(unsigned long a,unsigned long b)11325 __STATIC_FORCEINLINE unsigned long __RV_UCMPLT8(unsigned long a, unsigned long b)
11326 {
11327     register unsigned long result;
11328     __ASM volatile("ucmplt8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
11329     return result;
11330 }
11331 /* ===== Inline Function End for 3.147. UCMPLT8 ===== */
11332 
11333 /* ===== Inline Function Start for 3.148. UCMPLT16 ===== */
11334 /**
11335  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
11336  * \brief UCMPLT16 (SIMD 16-bit Unsigned Compare Less Than)
11337  * \details
11338  * **Type**: SIMD
11339  *
11340  * **Syntax**:\n
11341  * ~~~
11342  * UCMPLT16 Rd, Rs1, Rs2
11343  * ~~~
11344  *
11345  * **Purpose**:\n
11346  * Do 16-bit unsigned integer elements less than comparisons simultaneously.
11347  *
11348  * **Description**:\n
11349  * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit
11350  * unsigned integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the
11351  * result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are written to Rd.
11352  *
11353  * **Operations**:\n
11354  * ~~~
11355  * Rd.H[x] = (Rs1.H[x] <u Rs2.H[x])? 0xffff : 0x0;
11356  * for RV32: x=1...0,
11357  * for RV64: x=3...0
11358  * ~~~
11359  *
11360  * \param [in]  a    unsigned long type of value stored in a
11361  * \param [in]  b    unsigned long type of value stored in b
11362  * \return value stored in unsigned long type
11363  */
__RV_UCMPLT16(unsigned long a,unsigned long b)11364 __STATIC_FORCEINLINE unsigned long __RV_UCMPLT16(unsigned long a, unsigned long b)
11365 {
11366     register unsigned long result;
11367     __ASM volatile("ucmplt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
11368     return result;
11369 }
11370 /* ===== Inline Function End for 3.148. UCMPLT16 ===== */
11371 
11372 /* ===== Inline Function Start for 3.149. UKADD8 ===== */
11373 /**
11374  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
11375  * \brief UKADD8 (SIMD 8-bit Unsigned Saturating Addition)
11376  * \details
11377  * **Type**: SIMD
11378  *
11379  * **Syntax**:\n
11380  * ~~~
11381  * UKADD8 Rd, Rs1, Rs2
11382  * ~~~
11383  *
11384  * **Purpose**:\n
11385  * Do 8-bit unsigned integer element saturating additions simultaneously.
11386  *
11387  * **Description**:\n
11388  * This instruction adds the 8-bit unsigned integer elements in Rs1 with the 8-bit
11389  * unsigned integer elements in Rs2. If any of the results are beyond the 8-bit unsigned number range
11390  * (0 <= RES <= 28-1), they are saturated to the range and the OV bit is set to 1. The saturated results are
11391  * written to Rd.
11392  *
11393  * **Operations**:\n
11394  * ~~~
11395  * res[x] = Rs1.B[x] + Rs2.B[x];
11396  * if (res[x] > (2^8)-1) {
11397  *   res[x] = (2^8)-1;
11398  *   OV = 1;
11399  * }
11400  * Rd.B[x] = res[x];
11401  * for RV32: x=3...0,
11402  * for RV64: x=7...0
11403  * ~~~
11404  *
11405  * \param [in]  a    unsigned long type of value stored in a
11406  * \param [in]  b    unsigned long type of value stored in b
11407  * \return value stored in unsigned long type
11408  */
__RV_UKADD8(unsigned long a,unsigned long b)11409 __STATIC_FORCEINLINE unsigned long __RV_UKADD8(unsigned long a, unsigned long b)
11410 {
11411     register unsigned long result;
11412     __ASM volatile("ukadd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
11413     return result;
11414 }
11415 /* ===== Inline Function End for 3.149. UKADD8 ===== */
11416 
11417 /* ===== Inline Function Start for 3.150. UKADD16 ===== */
11418 /**
11419  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
11420  * \brief UKADD16 (SIMD 16-bit Unsigned Saturating Addition)
11421  * \details
11422  * **Type**: SIMD
11423  *
11424  * **Syntax**:\n
11425  * ~~~
11426  * UKADD16 Rd, Rs1, Rs2
11427  * ~~~
11428  *
11429  * **Purpose**:\n
11430  * Do 16-bit unsigned integer element saturating additions simultaneously.
11431  *
11432  * **Description**:\n
11433  * This instruction adds the 16-bit unsigned integer elements in Rs1 with the 16-bit
11434  * unsigned integer elements in Rs2. If any of the results are beyond the 16-bit unsigned number
11435  * range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1. The saturated
11436  * results are written to Rd.
11437  *
11438  * **Operations**:\n
11439  * ~~~
11440  * res[x] = Rs1.H[x] + Rs2.H[x];
11441  * if (res[x] > (2^16)-1) {
11442  *   res[x] = (2^16)-1;
11443  *   OV = 1;
11444  * }
11445  * Rd.H[x] = res[x];
11446  * for RV32: x=1...0,
11447  * for RV64: x=3...0
11448  * ~~~
11449  *
11450  * \param [in]  a    unsigned long type of value stored in a
11451  * \param [in]  b    unsigned long type of value stored in b
11452  * \return value stored in unsigned long type
11453  */
__RV_UKADD16(unsigned long a,unsigned long b)11454 __STATIC_FORCEINLINE unsigned long __RV_UKADD16(unsigned long a, unsigned long b)
11455 {
11456     register unsigned long result;
11457     __ASM volatile("ukadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
11458     return result;
11459 }
11460 /* ===== Inline Function End for 3.150. UKADD16 ===== */
11461 
11462 /* ===== Inline Function Start for 3.151. UKADD64 ===== */
11463 /**
11464  * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
11465  * \brief UKADD64 (64-bit Unsigned Saturating Addition)
11466  * \details
11467  * **Type**: DSP (64-bit Profile)
11468  *
11469  * **Syntax**:\n
11470  * ~~~
11471  * UKADD64 Rd, Rs1, Rs2
11472  * ~~~
11473  *
11474  * **Purpose**:\n
11475  * Add two 64-bit unsigned integers. The result is saturated to the U64 range.
11476  *
11477  * **RV32 Description**:\n
11478  * This instruction adds the 64-bit unsigned integer of an even/odd pair of registers
11479  * specified by Rs1(4,1) with the 64-bit unsigned integer of an even/odd pair of registers specified by
11480  * Rs2(4,1). If the 64-bit result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is saturated to the
11481  * range and the OV bit is set to 1. The saturated result is written to an even/odd pair of registers
11482  * specified by Rd(4,1).
11483  * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
11484  * includes register 2d and 2d+1.
11485  * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
11486  * of the pair contains the low 32-bit of the result.
11487  *
11488  * **RV64 Description**:\n
11489  * This instruction adds the 64-bit unsigned integer in Rs1 with the 64-bit unsigned
11490  * integer in Rs2. If the 64-bit result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is saturated to
11491  * the range and the OV bit is set to 1. The saturated result is written to Rd.
11492  *
11493  * **Operations**:\n
11494  * ~~~
11495  * * RV32:
11496  * t_L = CONCAT(Rt(4,1),1'b0); t_H = CONCAT(Rt(4,1),1'b1);
11497  * a_L = CONCAT(Ra(4,1),1'b0); a_H = CONCAT(Ra(4,1),1'b1);
11498  * b_L = CONCAT(Rb(4,1),1'b0); b_H = CONCAT(Rb(4,1),1'b1);
11499  * result = R[a_H].R[a_L] + R[b_H].R[b_L];
11500  * if (result > (2^64)-1) {
11501  *   result = (2^64)-1; OV = 1;
11502  * }
11503  * R[t_H].R[t_L] = result;
11504  * * RV64:
11505  * result = Rs1 + Rs2;
11506  * if (result > (2^64)-1) {
11507  *   result = (2^64)-1; OV = 1;
11508  * }
11509  * Rd = result;
11510  * ~~~
11511  *
11512  * \param [in]  a    unsigned long long type of value stored in a
11513  * \param [in]  b    unsigned long long type of value stored in b
11514  * \return value stored in unsigned long long type
11515  */
__RV_UKADD64(unsigned long long a,unsigned long long b)11516 __STATIC_FORCEINLINE unsigned long long __RV_UKADD64(unsigned long long a, unsigned long long b)
11517 {
11518     register unsigned long long result;
11519     __ASM volatile("ukadd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
11520     return result;
11521 }
11522 /* ===== Inline Function End for 3.151. UKADD64 ===== */
11523 
11524 /* ===== Inline Function Start for 3.152. UKADDH ===== */
11525 /**
11526  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
11527  * \brief UKADDH (Unsigned Addition with U16 Saturation)
11528  * \details
11529  * **Type**: DSP
11530  *
11531  * **Syntax**:\n
11532  * ~~~
11533  * UKADDH Rd, Rs1, Rs2
11534  * ~~~
11535  *
11536  * **Purpose**:\n
11537  * Add the unsigned lower 32-bit content of two registers with U16 saturation.
11538  *
11539  * **Description**:\n
11540  * The unsigned lower 32-bit content of Rs1 is added with the unsigned lower 32-bit
11541  * content of Rs2. And the result is saturated to the 16-bit unsigned integer range of [0, 2^16-1] and then
11542  * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
11543  *
11544  * **Operations**:\n
11545  * ~~~
11546  * tmp = Rs1.W[0] + Rs2.W[0];
11547  * if (tmp > (2^16)-1) {
11548  *   tmp = (2^16)-1;
11549  *   OV = 1;
11550  * }
11551  * Rd = SE(tmp[15:0]);
11552  * ~~~
11553  *
11554  * \param [in]  a    unsigned int type of value stored in a
11555  * \param [in]  b    unsigned int type of value stored in b
11556  * \return value stored in unsigned long type
11557  */
__RV_UKADDH(unsigned int a,unsigned int b)11558 __STATIC_FORCEINLINE unsigned long __RV_UKADDH(unsigned int a, unsigned int b)
11559 {
11560     register unsigned long result;
11561     __ASM volatile("ukaddh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
11562     return result;
11563 }
11564 /* ===== Inline Function End for 3.152. UKADDH ===== */
11565 
11566 /* ===== Inline Function Start for 3.153. UKADDW ===== */
11567 /**
11568  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
11569  * \brief UKADDW (Unsigned Addition with U32 Saturation)
11570  * \details
11571  * **Type**: DSP
11572  *
11573  * **Syntax**:\n
11574  * ~~~
11575  * UKADDW Rd, Rs1, Rs2
11576  * ~~~
11577  *
11578  * **Purpose**:\n
11579  * Add the unsigned lower 32-bit content of two registers with U32 saturation.
11580  *
11581  * **Description**:\n
11582  * The unsigned lower 32-bit content of Rs1 is added with the unsigned lower 32-bit
11583  * content of Rs2. And the result is saturated to the 32-bit unsigned integer range of [0, 2^32-1] and then
11584  * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
11585  *
11586  * **Operations**:\n
11587  * ~~~
11588  * tmp = Rs1.W[0] + Rs2.W[0];
11589  * if (tmp > (2^32)-1) {
11590  *   tmp[31:0] = (2^32)-1;
11591  *   OV = 1;
11592  * }
11593  * Rd = tmp[31:0]; // RV32
11594  * Rd = SE(tmp[31:0]); // RV64
11595  * ~~~
11596  *
11597  * \param [in]  a    unsigned int type of value stored in a
11598  * \param [in]  b    unsigned int type of value stored in b
11599  * \return value stored in unsigned long type
11600  */
__RV_UKADDW(unsigned int a,unsigned int b)11601 __STATIC_FORCEINLINE unsigned long __RV_UKADDW(unsigned int a, unsigned int b)
11602 {
11603     register unsigned long result;
11604     __ASM volatile("ukaddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
11605     return result;
11606 }
11607 /* ===== Inline Function End for 3.153. UKADDW ===== */
11608 
11609 /* ===== Inline Function Start for 3.154. UKCRAS16 ===== */
11610 /**
11611  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
11612  * \brief UKCRAS16 (SIMD 16-bit Unsigned Saturating Cross Addition & Subtraction)
11613  * \details
11614  * **Type**: SIMD
11615  *
11616  * **Syntax**:\n
11617  * ~~~
11618  * UKCRAS16 Rd, Rs1, Rs2
11619  * ~~~
11620  *
11621  * **Purpose**:\n
11622  * Do one 16-bit unsigned integer element saturating addition and one 16-bit unsigned
11623  * integer element saturating subtraction in a 32-bit chunk simultaneously. Operands are from crossed
11624  * positions in 32-bit chunks.
11625  *
11626  * **Description**:\n
11627  * This instruction adds the 16-bit unsigned integer element in [31:16] of 32-bit chunks in
11628  * Rs1 with the 16-bit unsigned integer element in [15:0] of 32-bit chunks in Rs2; at the same time, it
11629  * subtracts the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit
11630  * unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the 16-bit
11631  * unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1.
11632  * The saturated results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit
11633  * chunks in Rd for subtraction.
11634  *
11635  * **Operations**:\n
11636  * ~~~
11637  * res1 = Rs1.W[x][31:16] + Rs2.W[x][15:0];
11638  * res2 = Rs1.W[x][15:0] - Rs2.W[x][31:16];
11639  * if (res1 > (2^16)-1) {
11640  *   res1 = (2^16)-1;
11641  *   OV = 1;
11642  * }
11643  * if (res2 < 0) {
11644  *   res2 = 0;
11645  *   OV = 1;
11646  * }
11647  * Rd.W[x][31:16] = res1;
11648  * Rd.W[x][15:0] = res2;
11649  * for RV32, x=0
11650  * for RV64, x=1...0
11651  * ~~~
11652  *
11653  * \param [in]  a    unsigned long type of value stored in a
11654  * \param [in]  b    unsigned long type of value stored in b
11655  * \return value stored in unsigned long type
11656  */
__RV_UKCRAS16(unsigned long a,unsigned long b)11657 __STATIC_FORCEINLINE unsigned long __RV_UKCRAS16(unsigned long a, unsigned long b)
11658 {
11659     register unsigned long result;
11660     __ASM volatile("ukcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
11661     return result;
11662 }
11663 /* ===== Inline Function End for 3.154. UKCRAS16 ===== */
11664 
11665 /* ===== Inline Function Start for 3.155. UKCRSA16 ===== */
11666 /**
11667  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
11668  * \brief UKCRSA16 (SIMD 16-bit Unsigned Saturating Cross Subtraction & Addition)
11669  * \details
11670  * **Type**: SIMD
11671  *
11672  * **Syntax**:\n
11673  * ~~~
11674  * UKCRSA16 Rd, Rs1, Rs2
11675  * ~~~
11676  *
11677  * **Purpose**:\n
11678  * Do one 16-bit unsigned integer element saturating subtraction and one 16-bit unsigned
11679  * integer element saturating addition in a 32-bit chunk simultaneously. Operands are from crossed
11680  * positions in 32-bit chunks.
11681  *
11682  * **Description**:\n
11683  * This instruction subtracts the 16-bit unsigned integer element in [15:0] of 32-bit
11684  * chunks in Rs2 from the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs1; at the
11685  * same time, it adds the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs2 with the 16-
11686  * bit unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the
11687  * 16-bit unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set
11688  * to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of
11689  * 32-bit chunks in Rd for addition.
11690  *
11691  * **Operations**:\n
11692  * ~~~
11693  * res1 = Rs1.W[x][31:16] - Rs2.W[x][15:0];
11694  * res2 = Rs1.W[x][15:0] + Rs2.W[x][31:16];
11695  * if (res1 < 0) {
11696  *   res1 = 0;
11697  *   OV = 1;
11698  * } else if (res2 > (2^16)-1) {
11699  *   res2 = (2^16)-1;
11700  *   OV = 1;
11701  * }
11702  * Rd.W[x][31:16] = res1;
11703  * Rd.W[x][15:0] = res2;
11704  * for RV32, x=0
11705  * for RV64, x=1...0
11706  * ~~~
11707  *
11708  * \param [in]  a    unsigned long type of value stored in a
11709  * \param [in]  b    unsigned long type of value stored in b
11710  * \return value stored in unsigned long type
11711  */
__RV_UKCRSA16(unsigned long a,unsigned long b)11712 __STATIC_FORCEINLINE unsigned long __RV_UKCRSA16(unsigned long a, unsigned long b)
11713 {
11714     register unsigned long result;
11715     __ASM volatile("ukcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
11716     return result;
11717 }
11718 /* ===== Inline Function End for 3.155. UKCRSA16 ===== */
11719 
11720 /* ===== Inline Function Start for 3.156. UKMAR64 ===== */
11721 /**
11722  * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
11723  * \brief UKMAR64 (Unsigned Multiply and Saturating Add to 64-Bit Data)
11724  * \details
11725  * **Type**: DSP (64-bit Profile)
11726  *
11727  * **Syntax**:\n
11728  * ~~~
11729  * UKMAR64 Rd, Rs1, Rs2
11730  * ~~~
11731  *
11732  * **Purpose**:\n
11733  * Multiply the 32-bit unsigned elements in two registers and add the 64-bit multiplication
11734  * results to the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64). The result is
11735  * saturated to the U64 range and written back to the pair of registers (RV32) or the register (RV64).
11736  *
11737  * **RV32 Description**:\n
11738  * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It
11739  * adds the 64-bit multiplication result to the 64-bit unsigned data of an even/odd pair of registers
11740  * specified by Rd(4,1) with unlimited precision. If the 64-bit addition result is beyond the U64 number
11741  * range (0 <= U64 <= 2^64-1), it is saturated to the range and the OV bit is set to 1. The saturated result is
11742  * written back to the even/odd pair of registers specified by Rd(4,1).
11743  * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
11744  * includes register 2d and 2d+1.
11745  * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
11746  * of the pair contains the low 32-bit of the result.
11747  *
11748  * **RV64 Description**:\n
11749  * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2.
11750  * It adds the 64-bit multiplication results to the 64-bit unsigned data in Rd with unlimited precision. If
11751  * the 64-bit addition result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is saturated to the
11752  * range and the OV bit is set to 1. The saturated result is written back to Rd.
11753  *
11754  * **Operations**:\n
11755  * ~~~
11756  * * RV32:
11757  * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
11758  * result = R[t_H].R[t_L] + (Rs1 * Rs2);
11759  * if (result > (2^64)-1) {
11760  *   result = (2^64)-1; OV = 1;
11761  * }
11762  * R[t_H].R[t_L] = result;
11763  * * RV64:
11764  * // `result` has unlimited precision
11765  * result = Rd + (Rs1.W[0] u* Rs2.W[0]) + (Rs1.W[1] u* Rs2.W[1]);
11766  * if (result > (2^64)-1) {
11767  *   result = (2^64)-1; OV = 1;
11768  * }
11769  * Rd = result;
11770  * ~~~
11771  *
11772  * \param [in]  t    unsigned long long type of value stored in t
11773  * \param [in]  a    unsigned long type of value stored in a
11774  * \param [in]  b    unsigned long type of value stored in b
11775  * \return value stored in unsigned long long type
11776  */
__RV_UKMAR64(unsigned long long t,unsigned long a,unsigned long b)11777 __STATIC_FORCEINLINE unsigned long long __RV_UKMAR64(unsigned long long t, unsigned long a, unsigned long b)
11778 {
11779     __ASM volatile("ukmar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
11780     return t;
11781 }
11782 /* ===== Inline Function End for 3.156. UKMAR64 ===== */
11783 
11784 /* ===== Inline Function Start for 3.157. UKMSR64 ===== */
11785 /**
11786  * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
11787  * \brief UKMSR64 (Unsigned Multiply and Saturating Subtract from 64-Bit Data)
11788  * \details
11789  * **Type**: DSP (64-bit Profile)
11790  *
11791  * **Syntax**:\n
11792  * ~~~
11793  * UKMSR64 Rd, Rs1, Rs2
11794  * ~~~
11795  *
11796  * **Purpose**:\n
11797  * Multiply the 32-bit unsigned elements in two registers and subtract the 64-bit
11798  * multiplication results from the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64).
11799  * The result is saturated to the U64 range and written back to the pair of registers (RV32) or a register
11800  * (RV64).
11801  *
11802  * **RV32 Description**:\n
11803  * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It
11804  * subtracts the 64-bit multiplication result from the 64-bit unsigned data of an even/odd pair of
11805  * registers specified by Rd(4,1) with unlimited precision. If the 64-bit subtraction result is beyond the
11806  * U64 number range (0 <= U64 <= 2^64-1), it is saturated to the range and the OV bit is set to 1. The
11807  * saturated result is written back to the even/odd pair of registers specified by Rd(4,1).
11808  * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
11809  * includes register 2d and 2d+1.
11810  * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
11811  * of the pair contains the low 32-bit of the result.
11812  *
11813  * **RV64 Description**:\n
11814  * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2.
11815  * It subtracts the 64-bit multiplication results from the 64-bit unsigned data of Rd with unlimited
11816  * precision. If the 64-bit subtraction result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is
11817  * saturated to the range and the OV bit is set to 1. The saturated result is written back to Rd.
11818  *
11819  * **Operations**:\n
11820  * ~~~
11821  * * RV32:
11822  * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
11823  * result = R[t_H].R[t_L] - (Rs1 u* Rs2);
11824  * if (result < 0) {
11825  *   result = 0; OV = 1;
11826  * }
11827  * R[t_H].R[t_L] = result;
11828  * * RV64:
11829  * // `result` has unlimited precision
11830  * result = Rd - (Rs1.W[0] u* Rs2.W[0]) - (Rs1.W[1] u* Rs2.W[1]);
11831  * if (result < 0) {
11832  *   result = 0; OV = 1;
11833  * }
11834  * Rd = result;
11835  * ~~~
11836  *
11837  * \param [in]  t    unsigned long long type of value stored in t
11838  * \param [in]  a    unsigned long type of value stored in a
11839  * \param [in]  b    unsigned long type of value stored in b
11840  * \return value stored in unsigned long long type
11841  */
__RV_UKMSR64(unsigned long long t,unsigned long a,unsigned long b)11842 __STATIC_FORCEINLINE unsigned long long __RV_UKMSR64(unsigned long long t, unsigned long a, unsigned long b)
11843 {
11844     __ASM volatile("ukmsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
11845     return t;
11846 }
11847 /* ===== Inline Function End for 3.157. UKMSR64 ===== */
11848 
11849 /* ===== Inline Function Start for 3.158. UKSTAS16 ===== */
11850 /**
11851  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
11852  * \brief UKSTAS16 (SIMD 16-bit Unsigned Saturating Straight Addition & Subtraction)
11853  * \details
11854  * **Type**: SIMD
11855  *
11856  * **Syntax**:\n
11857  * ~~~
11858  * UKSTAS16 Rd, Rs1, Rs2
11859  * ~~~
11860  *
11861  * **Purpose**:\n
11862  * Do one 16-bit unsigned integer element saturating addition and one 16-bit unsigned
11863  * integer element saturating subtraction in a 32-bit chunk simultaneously. Operands are from
11864  * corresponding positions in 32-bit chunks.
11865  *
11866  * **Description**:\n
11867  * This instruction adds the 16-bit unsigned integer element in [31:16] of 32-bit chunks in
11868  * Rs1 with the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs2; at the same time, it
11869  * subtracts the 16-bit unsigned integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit
11870  * unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the 16-bit
11871  * unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1.
11872  * The saturated results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit
11873  * chunks in Rd for subtraction.
11874  *
11875  * **Operations**:\n
11876  * ~~~
11877  * res1 = Rs1.W[x][31:16] + Rs2.W[x][31:16];
11878  * res2 = Rs1.W[x][15:0] - Rs2.W[x][15:0];
11879  * if (res1 > (2^16)-1) {
11880  *   res1 = (2^16)-1;
11881  *   OV = 1;
11882  * }
11883  * if (res2 < 0) {
11884  *   res2 = 0;
11885  *   OV = 1;
11886  * }
11887  * Rd.W[x][31:16] = res1;
11888  * Rd.W[x][15:0] = res2;
11889  * for RV32, x=0
11890  * for RV64, x=1...0
11891  * ~~~
11892  *
11893  * \param [in]  a    unsigned long type of value stored in a
11894  * \param [in]  b    unsigned long type of value stored in b
11895  * \return value stored in unsigned long type
11896  */
__RV_UKSTAS16(unsigned long a,unsigned long b)11897 __STATIC_FORCEINLINE unsigned long __RV_UKSTAS16(unsigned long a, unsigned long b)
11898 {
11899     register unsigned long result;
11900     __ASM volatile("ukstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
11901     return result;
11902 }
11903 /* ===== Inline Function End for 3.158. UKSTAS16 ===== */
11904 
11905 /* ===== Inline Function Start for 3.159. UKSTSA16 ===== */
11906 /**
11907  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
11908  * \brief UKSTSA16 (SIMD 16-bit Unsigned Saturating Straight Subtraction & Addition)
11909  * \details
11910  * **Type**: SIMD
11911  *
11912  * **Syntax**:\n
11913  * ~~~
11914  * UKSTSA16 Rd, Rs1, Rs2
11915  * ~~~
11916  *
11917  * **Purpose**:\n
11918  * Do one 16-bit unsigned integer element saturating subtraction and one 16-bit unsigned
11919  * integer element saturating addition in a 32-bit chunk simultaneously. Operands are from
11920  * corresponding positions in 32-bit chunks.
11921  *
11922  * **Description**:\n
11923  * This instruction subtracts the 16-bit unsigned integer element in [31:16] of 32-bit
11924  * chunks in Rs2 from the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs1; at the
11925  * same time, it adds the 16-bit unsigned integer element in [15:0] of 32-bit chunks in Rs2 with the 16-
11926  * bit unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the
11927  * 16-bit unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set
11928  * to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of
11929  * 32-bit chunks in Rd for addition.
11930  *
11931  * **Operations**:\n
11932  * ~~~
11933  * res1 = Rs1.W[x][31:16] - Rs2.W[x][31:16];
11934  * res2 = Rs1.W[x][15:0] + Rs2.W[x][15:0];
11935  * if (res1 < 0) {
11936  *   res1 = 0;
11937  *   OV = 1;
11938  * } else if (res2 > (2^16)-1) {
11939  *   res2 = (2^16)-1;
11940  *   OV = 1;
11941  * }
11942  * Rd.W[x][31:16] = res1;
11943  * Rd.W[x][15:0] = res2;
11944  * for RV32, x=0
11945  * for RV64, x=1...0
11946  * ~~~
11947  *
11948  * \param [in]  a    unsigned long type of value stored in a
11949  * \param [in]  b    unsigned long type of value stored in b
11950  * \return value stored in unsigned long type
11951  */
__RV_UKSTSA16(unsigned long a,unsigned long b)11952 __STATIC_FORCEINLINE unsigned long __RV_UKSTSA16(unsigned long a, unsigned long b)
11953 {
11954     register unsigned long result;
11955     __ASM volatile("ukstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
11956     return result;
11957 }
11958 /* ===== Inline Function End for 3.159. UKSTSA16 ===== */
11959 
11960 /* ===== Inline Function Start for 3.160. UKSUB8 ===== */
11961 /**
11962  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
11963  * \brief UKSUB8 (SIMD 8-bit Unsigned Saturating Subtraction)
11964  * \details
11965  * **Type**: SIMD
11966  *
11967  * **Syntax**:\n
11968  * ~~~
11969  * UKSUB8 Rd, Rs1, Rs2
11970  * ~~~
11971  *
11972  * **Purpose**:\n
11973  * Do 8-bit unsigned integer elements saturating subtractions simultaneously.
11974  *
11975  * **Description**:\n
11976  * This instruction subtracts the 8-bit unsigned integer elements in Rs2 from the 8-bit
11977  * unsigned integer elements in Rs1. If any of the results are beyond the 8-bit unsigned number range
11978  * (0 <= RES <= 28-1), they are saturated to the range and the OV bit is set to 1. The saturated results are
11979  * written to Rd.
11980  *
11981  * **Operations**:\n
11982  * ~~~
11983  * res[x] = Rs1.B[x] - Rs2.B[x];
11984  * if (res[x] < 0) {
11985  *   res[x] = 0;
11986  *   OV = 1;
11987  * }
11988  * Rd.B[x] = res[x];
11989  * for RV32: x=3...0,
11990  * for RV64: x=7...0
11991  * ~~~
11992  *
11993  * \param [in]  a    unsigned long type of value stored in a
11994  * \param [in]  b    unsigned long type of value stored in b
11995  * \return value stored in unsigned long type
11996  */
__RV_UKSUB8(unsigned long a,unsigned long b)11997 __STATIC_FORCEINLINE unsigned long __RV_UKSUB8(unsigned long a, unsigned long b)
11998 {
11999     register unsigned long result;
12000     __ASM volatile("uksub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12001     return result;
12002 }
12003 /* ===== Inline Function End for 3.160. UKSUB8 ===== */
12004 
12005 /* ===== Inline Function Start for 3.161. UKSUB16 ===== */
12006 /**
12007  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
12008  * \brief UKSUB16 (SIMD 16-bit Unsigned Saturating Subtraction)
12009  * \details
12010  * **Type**: SIMD
12011  *
12012  * **Syntax**:\n
12013  * ~~~
12014  * UKSUB16 Rd, Rs1, Rs2
12015  * ~~~
12016  *
12017  * **Purpose**:\n
12018  * Do 16-bit unsigned integer elements saturating subtractions simultaneously.
12019  *
12020  * **Description**:\n
12021  * This instruction subtracts the 16-bit unsigned integer elements in Rs2 from the 16-bit
12022  * unsigned integer elements in Rs1. If any of the results are beyond the 16-bit unsigned number
12023  * range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1. The saturated
12024  * results are written to Rd.
12025  *
12026  * **Operations**:\n
12027  * ~~~
12028  * res[x] = Rs1.H[x] - Rs2.H[x];
12029  * if (res[x] < 0) {
12030  *   res[x] = 0;
12031  *   OV = 1;
12032  * }
12033  * Rd.H[x] = res[x];
12034  * for RV32: x=1...0,
12035  * for RV64: x=3...0
12036  * ~~~
12037  *
12038  * \param [in]  a    unsigned long type of value stored in a
12039  * \param [in]  b    unsigned long type of value stored in b
12040  * \return value stored in unsigned long type
12041  */
__RV_UKSUB16(unsigned long a,unsigned long b)12042 __STATIC_FORCEINLINE unsigned long __RV_UKSUB16(unsigned long a, unsigned long b)
12043 {
12044     register unsigned long result;
12045     __ASM volatile("uksub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12046     return result;
12047 }
12048 /* ===== Inline Function End for 3.161. UKSUB16 ===== */
12049 
12050 /* ===== Inline Function Start for 3.162. UKSUB64 ===== */
12051 /**
12052  * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
12053  * \brief UKSUB64 (64-bit Unsigned Saturating Subtraction)
12054  * \details
12055  * **Type**: DSP (64-bit Profile)
12056  *
12057  * **Syntax**:\n
12058  * ~~~
12059  * UKSUB64 Rd, Rs1, Rs2
12060  * ~~~
12061  *
12062  * **Purpose**:\n
12063  * Perform a 64-bit signed integer subtraction. The result is saturated to the U64 range.
12064  *
12065  * **RV32 Description**:\n
12066  * This instruction subtracts the 64-bit unsigned integer of an even/odd pair of
12067  * registers specified by Rs2(4,1) from the 64-bit unsigned integer of an even/odd pair of registers
12068  * specified by Rs1(4,1). If the 64-bit result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is
12069  * saturated to the range and the OV bit is set to 1. The saturated result is then written to an even/odd
12070  * pair of registers specified by Rd(4,1).
12071  * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
12072  * includes register 2d and 2d+1.
12073  * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
12074  * register of the pair contains the low 32-bit of the operand.
12075  *
12076  * **RV64 Description**:\n
12077  * This instruction subtracts the 64-bit unsigned integer of Rs2 from the 64-bit
12078  * unsigned integer of an even/odd pair of Rs1. If the 64-bit result is beyond the U64 number range (0 <=
12079  * U64 <= 2^64-1), it is saturated to the range and the OV bit is set to 1. The saturated result is then written
12080  * to Rd.
12081  *
12082  * **Operations**:\n
12083  * ~~~
12084  * * RV32:
12085  * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
12086  * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
12087  * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
12088  * result = R[a_H].R[a_L] - R[b_H].R[b_L];
12089  * if (result < 0) {
12090  *   result = 0; OV = 1;
12091  * }
12092  * R[t_H].R[t_L] = result;
12093  * * RV64
12094  * result = Rs1 - Rs2;
12095  * if (result < 0) {
12096  *   result = 0; OV = 1;
12097  * }
12098  * Rd = result;
12099  * ~~~
12100  *
12101  * \param [in]  a    unsigned long long type of value stored in a
12102  * \param [in]  b    unsigned long long type of value stored in b
12103  * \return value stored in unsigned long long type
12104  */
__RV_UKSUB64(unsigned long long a,unsigned long long b)12105 __STATIC_FORCEINLINE unsigned long long __RV_UKSUB64(unsigned long long a, unsigned long long b)
12106 {
12107     register unsigned long long result;
12108     __ASM volatile("uksub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12109     return result;
12110 }
12111 /* ===== Inline Function End for 3.162. UKSUB64 ===== */
12112 
12113 /* ===== Inline Function Start for 3.163. UKSUBH ===== */
12114 /**
12115  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
12116  * \brief UKSUBH (Unsigned Subtraction with U16 Saturation)
12117  * \details
12118  * **Type**: DSP
12119  *
12120  * **Syntax**:\n
12121  * ~~~
12122  * UKSUBH Rd, Rs1, Rs2
12123  * ~~~
12124  *
12125  * **Purpose**:\n
12126  * Subtract the unsigned lower 32-bit content of two registers with U16 saturation.
12127  *
12128  * **Description**:\n
12129  * The unsigned lower 32-bit content of Rs2 is subtracted from the unsigned lower 32-bit
12130  * content of Rs1. And the result is saturated to the 16-bit unsigned integer range of [0, 2^16-1] and then
12131  * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
12132  *
12133  * **Operations**:\n
12134  * ~~~
12135  * tmp = Rs1.W[0] - Rs2.W[0];
12136  * if (tmp > (2^16)-1) {
12137  *   tmp = (2^16)-1;
12138  *   OV = 1;
12139  * }
12140  * else if (tmp < 0) {
12141  *   tmp = 0;
12142  *   OV = 1;
12143  * }
12144  * Rd = SE(tmp[15:0]);
12145  * ~~~
12146  *
12147  * \param [in]  a    unsigned int type of value stored in a
12148  * \param [in]  b    unsigned int type of value stored in b
12149  * \return value stored in unsigned long type
12150  */
__RV_UKSUBH(unsigned int a,unsigned int b)12151 __STATIC_FORCEINLINE unsigned long __RV_UKSUBH(unsigned int a, unsigned int b)
12152 {
12153     register unsigned long result;
12154     __ASM volatile("uksubh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12155     return result;
12156 }
12157 /* ===== Inline Function End for 3.163. UKSUBH ===== */
12158 
12159 /* ===== Inline Function Start for 3.164. UKSUBW ===== */
12160 /**
12161  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
12162  * \brief UKSUBW (Unsigned Subtraction with U32 Saturation)
12163  * \details
12164  * **Type**: DSP
12165  *
12166  * **Syntax**:\n
12167  * ~~~
12168  * UKSUBW Rd, Rs1, Rs2
12169  * ~~~
12170  *
12171  * **Purpose**:\n
12172  * Subtract the unsigned lower 32-bit content of two registers with unsigned 32-bit
12173  * saturation.
12174  *
12175  * **Description**:\n
12176  * The unsigned lower 32-bit content of Rs2 is subtracted from the unsigned lower 32-bit
12177  * content of Rs1. And the result is saturated to the 32-bit unsigned integer range of [0, 2^32-1] and then
12178  * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
12179  *
12180  * **Operations**:\n
12181  * ~~~
12182  * tmp = Rs1.W[0] - Rs2.W[0];
12183  * if (tmp < 0) {
12184  *   tmp[31:0] = 0;
12185  *   OV = 1;
12186  * }
12187  * Rd = tmp[31:0]; // RV32
12188  * Rd = SE(tmp[31:0]); // RV64
12189  * ~~~
12190  *
12191  * \param [in]  a    unsigned int type of value stored in a
12192  * \param [in]  b    unsigned int type of value stored in b
12193  * \return value stored in unsigned long type
12194  */
__RV_UKSUBW(unsigned int a,unsigned int b)12195 __STATIC_FORCEINLINE unsigned long __RV_UKSUBW(unsigned int a, unsigned int b)
12196 {
12197     register unsigned long result;
12198     __ASM volatile("uksubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12199     return result;
12200 }
12201 /* ===== Inline Function End for 3.164. UKSUBW ===== */
12202 
12203 /* ===== Inline Function Start for 3.165. UMAR64 ===== */
12204 /**
12205  * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
12206  * \brief UMAR64 (Unsigned Multiply and Add to 64-Bit Data)
12207  * \details
12208  * **Type**: DSP (64-bit Profile)
12209  *
12210  * **Syntax**:\n
12211  * ~~~
12212  * UMAR64 Rd, Rs1, Rs2
12213  * ~~~
12214  *
12215  * **Purpose**:\n
12216  * Multiply the 32-bit unsigned elements in two registers and add the 64-bit multiplication
12217  * results to the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64). The result is
12218  * written back to the pair of registers (RV32) or a register (RV64).
12219  *
12220  * **RV32 Description**:\n
12221  * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It
12222  * adds the 64-bit multiplication result to the 64-bit unsigned data of an even/odd pair of registers
12223  * specified by Rd(4,1). The addition result is written back to the even/odd pair of registers specified by
12224  * Rd(4,1).
12225  * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
12226  * includes register 2d and 2d+1.
12227  * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
12228  * of the pair contains the low 32-bit of the result.
12229  *
12230  * **RV64 Description**:\n
12231  * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2.
12232  * It adds the 64-bit multiplication results to the 64-bit unsigned data of Rd. The addition result is
12233  * written back to Rd.
12234  *
12235  * **Operations**:\n
12236  * ~~~
12237  * * RV32:
12238  * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
12239  * R[t_H].R[t_L] = R[t_H].R[t_L] + (Rs1 * Rs2);
12240  * * RV64:
12241  * Rd = Rd + (Rs1.W[0] u* Rs2.W[0]) + (Rs1.W[1] u* Rs2.W[1]);
12242  * ~~~
12243  *
12244  * \param [in]  t    unsigned long long type of value stored in t
12245  * \param [in]  a    unsigned long type of value stored in a
12246  * \param [in]  b    unsigned long type of value stored in b
12247  * \return value stored in unsigned long long type
12248  */
__RV_UMAR64(unsigned long long t,unsigned long a,unsigned long b)12249 __STATIC_FORCEINLINE unsigned long long __RV_UMAR64(unsigned long long t, unsigned long a, unsigned long b)
12250 {
12251     __ASM volatile("umar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
12252     return t;
12253 }
12254 /* ===== Inline Function End for 3.165. UMAR64 ===== */
12255 
12256 /* ===== Inline Function Start for 3.166. UMAQA ===== */
12257 /**
12258  * \ingroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD
12259  * \brief UMAQA (Unsigned Multiply Four Bytes with 32- bit Adds)
12260  * \details
12261  * **Type**: DSP
12262  *
12263  * **Syntax**:\n
12264  * ~~~
12265  * UMAQA Rd, Rs1, Rs2
12266  * ~~~
12267  *
12268  * **Purpose**:\n
12269  * Do four unsigned 8-bit multiplications from 32-bit chunks of two registers; and then adds
12270  * the four 16-bit results and the content of corresponding 32-bit chunks of a third register together.
12271  *
12272  * **Description**:\n
12273  * This instruction multiplies the four unsigned 8-bit elements of 32-bit chunks of Rs1 with the four
12274  * unsigned 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the
12275  * unsigned content of the corresponding 32-bit chunks of Rd. The final results are written back to the
12276  * corresponding 32-bit chunks in Rd.
12277  *
12278  * **Operations**:\n
12279  * ~~~
12280  * res[x] = Rd.W[x] + (Rs1.W[x].B[3] u* Rs2.W[x].B[3]) +
12281  *          (Rs1.W[x].B[2] u* Rs2.W[x].B[2]) + (Rs1.W[x].B[1] u* Rs2.W[x].B[1]) +
12282  *          (Rs1.W[x].B[0] u* Rs2.W[x].B[0]);
12283  * Rd.W[x] = res[x];
12284  * for RV32: x=0,
12285  * for RV64: x=1...0
12286  * ~~~
12287  *
12288  * \param [in]  t    unsigned long type of value stored in t
12289  * \param [in]  a    unsigned long type of value stored in a
12290  * \param [in]  b    unsigned long type of value stored in b
12291  * \return value stored in unsigned long type
12292  */
__RV_UMAQA(unsigned long t,unsigned long a,unsigned long b)12293 __STATIC_FORCEINLINE unsigned long __RV_UMAQA(unsigned long t, unsigned long a, unsigned long b)
12294 {
12295     __ASM volatile("umaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
12296     return t;
12297 }
12298 /* ===== Inline Function End for 3.166. UMAQA ===== */
12299 
12300 /* ===== Inline Function Start for 3.167. UMAX8 ===== */
12301 /**
12302  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
12303  * \brief UMAX8 (SIMD 8-bit Unsigned Maximum)
12304  * \details
12305  * **Type**: SIMD
12306  *
12307  * **Syntax**:\n
12308  * ~~~
12309  * UMAX8 Rd, Rs1, Rs2
12310  * ~~~
12311  *
12312  * **Purpose**:\n
12313  * Do 8-bit unsigned integer elements finding maximum operations simultaneously.
12314  *
12315  * **Description**:\n
12316  * This instruction compares the 8-bit unsigned integer elements in Rs1 with the four 8-
12317  * bit unsigned integer elements in Rs2 and selects the numbers that is greater than the other one. The
12318  * two selected results are written to Rd.
12319  *
12320  * **Operations**:\n
12321  * ~~~
12322  * Rd.B[x] = (Rs1.B[x] >u Rs2.B[x])? Rs1.B[x] : Rs2.B[x];
12323  * for RV32: x=3...0,
12324  * for RV64: x=7...0
12325  * ~~~
12326  *
12327  * \param [in]  a    unsigned long type of value stored in a
12328  * \param [in]  b    unsigned long type of value stored in b
12329  * \return value stored in unsigned long type
12330  */
__RV_UMAX8(unsigned long a,unsigned long b)12331 __STATIC_FORCEINLINE unsigned long __RV_UMAX8(unsigned long a, unsigned long b)
12332 {
12333     register unsigned long result;
12334     __ASM volatile("umax8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12335     return result;
12336 }
12337 /* ===== Inline Function End for 3.167. UMAX8 ===== */
12338 
12339 /* ===== Inline Function Start for 3.168. UMAX16 ===== */
12340 /**
12341  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
12342  * \brief UMAX16 (SIMD 16-bit Unsigned Maximum)
12343  * \details
12344  * **Type**: SIMD
12345  *
12346  * **Syntax**:\n
12347  * ~~~
12348  * UMAX16 Rd, Rs1, Rs2
12349  * ~~~
12350  *
12351  * **Purpose**:\n
12352  * Do 16-bit unsigned integer elements finding maximum operations simultaneously.
12353  *
12354  * **Description**:\n
12355  * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit
12356  * unsigned integer elements in Rs2 and selects the numbers that is greater than the other one. The
12357  * selected results are written to Rd.
12358  *
12359  * **Operations**:\n
12360  * ~~~
12361  * Rd.H[x] = (Rs1.H[x] >u Rs2.H[x])? Rs1.H[x] : Rs2.H[x];
12362  * for RV32: x=1...0,
12363  * for RV64: x=3...0
12364  * ~~~
12365  *
12366  * \param [in]  a    unsigned long type of value stored in a
12367  * \param [in]  b    unsigned long type of value stored in b
12368  * \return value stored in unsigned long type
12369  */
__RV_UMAX16(unsigned long a,unsigned long b)12370 __STATIC_FORCEINLINE unsigned long __RV_UMAX16(unsigned long a, unsigned long b)
12371 {
12372     register unsigned long result;
12373     __ASM volatile("umax16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12374     return result;
12375 }
12376 /* ===== Inline Function End for 3.168. UMAX16 ===== */
12377 
12378 /* ===== Inline Function Start for 3.169. UMIN8 ===== */
12379 /**
12380  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
12381  * \brief UMIN8 (SIMD 8-bit Unsigned Minimum)
12382  * \details
12383  * **Type**: SIMD
12384  *
12385  * **Syntax**:\n
12386  * ~~~
12387  * UMIN8 Rd, Rs1, Rs2
12388  * ~~~
12389  *
12390  * **Purpose**:\n
12391  * Do 8-bit unsigned integer elements finding minimum operations simultaneously.
12392  *
12393  * **Description**:\n
12394  * This instruction compares the 8-bit unsigned integer elements in Rs1 with the 8-bit
12395  * unsigned integer elements in Rs2 and selects the numbers that is less than the other one. The
12396  * selected results are written to Rd.
12397  *
12398  * **Operations**:\n
12399  * ~~~
12400  * Rd.B[x] = (Rs1.B[x] <u Rs2.B[x])? Rs1.B[x] : Rs2.B[x];
12401  * for RV32: x=3...0,
12402  * for RV64: x=7...0
12403  * ~~~
12404  *
12405  * \param [in]  a    unsigned long type of value stored in a
12406  * \param [in]  b    unsigned long type of value stored in b
12407  * \return value stored in unsigned long type
12408  */
__RV_UMIN8(unsigned long a,unsigned long b)12409 __STATIC_FORCEINLINE unsigned long __RV_UMIN8(unsigned long a, unsigned long b)
12410 {
12411     register unsigned long result;
12412     __ASM volatile("umin8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12413     return result;
12414 }
12415 /* ===== Inline Function End for 3.169. UMIN8 ===== */
12416 
12417 /* ===== Inline Function Start for 3.170. UMIN16 ===== */
12418 /**
12419  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
12420  * \brief UMIN16 (SIMD 16-bit Unsigned Minimum)
12421  * \details
12422  * **Type**: SIMD
12423  *
12424  * **Syntax**:\n
12425  * ~~~
12426  * UMIN16 Rd, Rs1, Rs2
12427  * ~~~
12428  *
12429  * **Purpose**:\n
12430  * Do 16-bit unsigned integer elements finding minimum operations simultaneously.
12431  *
12432  * **Description**:\n
12433  * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit
12434  * unsigned integer elements in Rs2 and selects the numbers that is less than the other one. The
12435  * selected results are written to Rd.
12436  *
12437  * **Operations**:\n
12438  * ~~~
12439  * Rd.H[x] = (Rs1.H[x] <u Rs2.H[x])? Rs1.H[x] : Rs2.H[x];
12440  * for RV32: x=1...0,
12441  * for RV64: x=3...0
12442  * ~~~
12443  *
12444  * \param [in]  a    unsigned long type of value stored in a
12445  * \param [in]  b    unsigned long type of value stored in b
12446  * \return value stored in unsigned long type
12447  */
__RV_UMIN16(unsigned long a,unsigned long b)12448 __STATIC_FORCEINLINE unsigned long __RV_UMIN16(unsigned long a, unsigned long b)
12449 {
12450     register unsigned long result;
12451     __ASM volatile("umin16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12452     return result;
12453 }
12454 /* ===== Inline Function End for 3.170. UMIN16 ===== */
12455 
12456 /* ===== Inline Function Start for 3.171. UMSR64 ===== */
12457 /**
12458  * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
12459  * \brief UMSR64 (Unsigned Multiply and Subtract from 64-Bit Data)
12460  * \details
12461  * **Type**: DSP (64-bit Profile)
12462  *
12463  * **Syntax**:\n
12464  * ~~~
12465  * UMSR64 Rd, Rs1, Rs2
12466  * ~~~
12467  *
12468  * **Purpose**:\n
12469  * Multiply the 32-bit unsigned elements in two registers and subtract the 64-bit
12470  * multiplication results from the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64).
12471  * The result is written back to the pair of registers (RV32) or a register (RV64).
12472  *
12473  * **RV32 Description**:\n
12474  * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It
12475  * subtracts the 64-bit multiplication result from the 64-bit unsigned data of an even/odd pair of
12476  * registers specified by Rd(4,1). The subtraction result is written back to the even/odd pair of registers
12477  * specified by Rd(4,1).
12478  * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
12479  * includes register 2d and 2d+1.
12480  * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
12481  * of the pair contains the low 32-bit of the result.
12482  *
12483  * **RV64 Description**:\n
12484  * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2.
12485  * It subtracts the 64-bit multiplication results from the 64-bit unsigned data of Rd. The subtraction
12486  * result is written back to Rd.
12487  *
12488  * **Operations**:\n
12489  * ~~~
12490  * * RV32:
12491  * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
12492  * R[t_H].R[t_L] = R[t_H].R[t_L] - (Rs1 * Rs2);
12493  * * RV64:
12494  * Rd = Rd - (Rs1.W[0] u* Rs2.W[0]) - (Rs1.W[1] u* Rs2.W[1]);
12495  * ~~~
12496  *
12497  * \param [in]  t    unsigned long long type of value stored in t
12498  * \param [in]  a    unsigned long type of value stored in a
12499  * \param [in]  b    unsigned long type of value stored in b
12500  * \return value stored in unsigned long long type
12501  */
__RV_UMSR64(unsigned long long t,unsigned long a,unsigned long b)12502 __STATIC_FORCEINLINE unsigned long long __RV_UMSR64(unsigned long long t, unsigned long a, unsigned long b)
12503 {
12504     __ASM volatile("umsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
12505     return t;
12506 }
12507 /* ===== Inline Function End for 3.171. UMSR64 ===== */
12508 
12509 /* ===== Inline Function Start for 3.172.1. UMUL8 ===== */
12510 /**
12511  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
12512  * \brief UMUL8 (SIMD Unsigned 8-bit Multiply)
12513  * \details
12514  * **Type**: SIMD
12515  *
12516  * **Syntax**:\n
12517  * ~~~
12518  * UMUL8 Rd, Rs1, Rs2
12519  * UMULX8 Rd, Rs1, Rs2
12520  * ~~~
12521  *
12522  * **Purpose**:\n
12523  * Do unsigned 8-bit multiplications and generate four 16-bit results simultaneously.
12524  *
12525  * **RV32 Description**:\n
12526  * For the `UMUL8` instruction, multiply the unsigned 8-bit data elements of Rs1
12527  * with the corresponding unsigned 8-bit data elements of Rs2.
12528  * For the `UMULX8` instruction, multiply the first and second unsigned 8-bit data elements of Rs1
12529  * with the second and first unsigned 8-bit data elements of Rs2. At the same time, multiply the third
12530  * and fourth unsigned 8-bit data elements of Rs1 with the fourth and third unsigned 8-bit data
12531  * elements of Rs2.
12532  * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1).
12533  * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
12534  * includes register 2d and 2d+1.
12535  * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of
12536  * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom
12537  * part of Rs1.
12538  *
12539  * **RV64 Description**:\n
12540  * For the `UMUL8` instruction, multiply the unsigned 8-bit data elements of Rs1
12541  * with the corresponding unsigned 8-bit data elements of Rs2.
12542  * For the `UMULX8` instruction, multiply the first and second unsigned 8-bit data elements of Rs1
12543  * with the second and first unsigned 8-bit data elements of Rs2. At the same time, multiply the third
12544  * and fourth unsigned 8-bit data elements of Rs1 with the fourth and third unsigned 8-bit data
12545  * elements of Rs2.
12546  * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results
12547  * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from
12548  * the bottom part of Rs1.
12549  *
12550  * **Operations**:\n
12551  * ~~~
12552  * * RV32:
12553  * if (is `UMUL8`) {
12554  *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
12555  *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
12556  * } else if (is `UMULX8`) {
12557  *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
12558  *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
12559  * }
12560  * rest[x/2] = op1t[x/2] u* op2t[x/2];
12561  * resb[x/2] = op1b[x/2] u* op2b[x/2];
12562  * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
12563  * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1];
12564  * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0];
12565  * x = 0 and 2
12566  * * RV64:
12567  * if (is `UMUL8`) {
12568  *     op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
12569  *     op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
12570  * } else if (is `UMULX8`) {
12571  *     op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
12572  *     op1b[x/2]  =  Rs1.B[x]; op2b[x/2]  =  Rs2.B[x+1];  //  Rs1  bottom
12573  * }
12574  * rest[x/2]  =  op1t[x/2]  u*  op2t[x/2];
12575  * resb[x/2]  =  op1b[x/2]  u*  op2b[x/2];
12576  * t_L  =  CONCAT(Rd(4,1),1'b0); t_H  =  CONCAT(Rd(4,1),1'b1);
12577  * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1];
12578  * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0]; x = 0 and 2
12579  * ~~~
12580  *
12581  * \param [in]  a    unsigned int type of value stored in a
12582  * \param [in]  b    unsigned int type of value stored in b
12583  * \return value stored in unsigned long long type
12584  */
__RV_UMUL8(unsigned int a,unsigned int b)12585 __STATIC_FORCEINLINE unsigned long long __RV_UMUL8(unsigned int a, unsigned int b)
12586 {
12587     register unsigned long long result;
12588     __ASM volatile("umul8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12589     return result;
12590 }
12591 /* ===== Inline Function End for 3.172.1. UMUL8 ===== */
12592 
12593 /* ===== Inline Function Start for 3.172.2. UMULX8 ===== */
12594 /**
12595  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
12596  * \brief UMULX8 (SIMD Unsigned Crossed 8-bit Multiply)
12597  * \details
12598  * **Type**: SIMD
12599  *
12600  * **Syntax**:\n
12601  * ~~~
12602  * UMUL8 Rd, Rs1, Rs2
12603  * UMULX8 Rd, Rs1, Rs2
12604  * ~~~
12605  *
12606  * **Purpose**:\n
12607  * Do unsigned 8-bit multiplications and generate four 16-bit results simultaneously.
12608  *
12609  * **RV32 Description**:\n
12610  * For the `UMUL8` instruction, multiply the unsigned 8-bit data elements of Rs1
12611  * with the corresponding unsigned 8-bit data elements of Rs2.
12612  * For the `UMULX8` instruction, multiply the first and second unsigned 8-bit data elements of Rs1
12613  * with the second and first unsigned 8-bit data elements of Rs2. At the same time, multiply the third
12614  * and fourth unsigned 8-bit data elements of Rs1 with the fourth and third unsigned 8-bit data
12615  * elements of Rs2.
12616  * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1).
12617  * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
12618  * includes register 2d and 2d+1.
12619  * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of
12620  * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom
12621  * part of Rs1.
12622  *
12623  * **RV64 Description**:\n
12624  * For the `UMUL8` instruction, multiply the unsigned 8-bit data elements of Rs1
12625  * with the corresponding unsigned 8-bit data elements of Rs2.
12626  * For the `UMULX8` instruction, multiply the first and second unsigned 8-bit data elements of Rs1
12627  * with the second and first unsigned 8-bit data elements of Rs2. At the same time, multiply the third
12628  * and fourth unsigned 8-bit data elements of Rs1 with the fourth and third unsigned 8-bit data
12629  * elements of Rs2.
12630  * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results
12631  * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from
12632  * the bottom part of Rs1.
12633  *
12634  * **Operations**:\n
12635  * ~~~
12636  * * RV32:
12637  * if (is `UMUL8`) {
12638  *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
12639  *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
12640  * } else if (is `UMULX8`) {
12641  *   op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
12642  *   op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
12643  * }
12644  * rest[x/2] = op1t[x/2] u* op2t[x/2];
12645  * resb[x/2] = op1b[x/2] u* op2b[x/2];
12646  * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
12647  * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1];
12648  * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0];
12649  * x = 0 and 2
12650  * * RV64:
12651  * if (is `UMUL8`) {
12652  *     op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
12653  *     op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
12654  * } else if (is `UMULX8`) {
12655  *     op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
12656  *     op1b[x/2]  =  Rs1.B[x]; op2b[x/2]  =  Rs2.B[x+1];  //  Rs1  bottom
12657  * }
12658  * rest[x/2]  =  op1t[x/2]  u*  op2t[x/2];
12659  * resb[x/2]  =  op1b[x/2]  u*  op2b[x/2];
12660  * t_L  =  CONCAT(Rd(4,1),1'b0); t_H  =  CONCAT(Rd(4,1),1'b1);
12661  * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1];
12662  * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0]; x = 0 and 2
12663  * ~~~
12664  *
12665  * \param [in]  a    unsigned int type of value stored in a
12666  * \param [in]  b    unsigned int type of value stored in b
12667  * \return value stored in unsigned long long type
12668  */
__RV_UMULX8(unsigned int a,unsigned int b)12669 __STATIC_FORCEINLINE unsigned long long __RV_UMULX8(unsigned int a, unsigned int b)
12670 {
12671     register unsigned long long result;
12672     __ASM volatile("umulx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12673     return result;
12674 }
12675 /* ===== Inline Function End for 3.172.2. UMULX8 ===== */
12676 
12677 /* ===== Inline Function Start for 3.173.1. UMUL16 ===== */
12678 /**
12679  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
12680  * \brief UMUL16 (SIMD Unsigned 16-bit Multiply)
12681  * \details
12682  * **Type**: SIMD
12683  *
12684  * **Syntax**:\n
12685  * ~~~
12686  * UMUL16 Rd, Rs1, Rs2
12687  * UMULX16 Rd, Rs1, Rs2
12688  * ~~~
12689  *
12690  * **Purpose**:\n
12691  * Do unsigned 16-bit multiplications and generate two 32-bit results simultaneously.
12692  *
12693  * **RV32 Description**:\n
12694  * For the `UMUL16` instruction, multiply the top 16-bit U16 content of Rs1 with
12695  * the top 16-bit U16 content of Rs2. At the same time, multiply the bottom 16-bit U16 content of Rs1
12696  * with the bottom 16-bit U16 content of Rs2.
12697  * For the `UMULX16` instruction, multiply the top 16-bit U16 content of Rs1 with the bottom 16-bit
12698  * U16 content of Rs2. At the same time, multiply the bottom 16-bit U16 content of Rs1 with the top 16-
12699  * bit U16 content of Rs2.
12700  * The two U32 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1),
12701  * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes
12702  * register 2d and 2d+1.
12703  * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and
12704  * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1.
12705  *
12706  * **RV64 Description**:\n
12707  * For the `UMUL16` instruction, multiply the top 16-bit U16 content of the lower
12708  * 32-bit word in Rs1 with the top 16-bit U16 content of the lower 32-bit word in Rs2. At the same time,
12709  * multiply the bottom 16-bit U16 content of the lower 32-bit word in Rs1 with the bottom 16-bit U16
12710  * content of the lower 32-bit word in Rs2.
12711  * For the `UMULX16` instruction, multiply the top 16-bit U16 content of the lower 32-bit word in Rs1
12712  * with the bottom 16-bit U16 content of the lower 32-bit word in Rs2. At the same time, multiply the
12713  * bottom 16-bit U16 content of the lower 32-bit word in Rs1 with the top 16-bit U16 content of the
12714  * lower 32-bit word in Rs2.
12715  * The two 32-bit U32 results are then written into Rd. The result calculated from the top 16-bit of the
12716  * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of
12717  * the lower 32-bit word in Rs1 is written to Rd.W[0]
12718  *
12719  * **Operations**:\n
12720  * ~~~
12721  * * RV32:
12722  * if (is `UMUL16`) {
12723  *   op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
12724  *   op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
12725  * } else if (is `UMULX16`) {
12726  *   op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
12727  *   op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
12728  * }
12729  * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
12730  *   res = aop u* bop;
12731  * }
12732  * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
12733  * R[t_H] = rest;
12734  * R[t_L] = resb;
12735  * * RV64:
12736  * if (is `UMUL16`) {
12737  *   op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
12738  *   op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
12739  * } else if (is `UMULX16`) {
12740  *   op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
12741  *   op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
12742  * }
12743  * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
12744  *   res = aop u* bop;
12745  * }
12746  * Rd.W[1] = rest;
12747  * Rd.W[0] = resb;
12748  * ~~~
12749  *
12750  * \param [in]  a    unsigned int type of value stored in a
12751  * \param [in]  b    unsigned int type of value stored in b
12752  * \return value stored in unsigned long long type
12753  */
__RV_UMUL16(unsigned int a,unsigned int b)12754 __STATIC_FORCEINLINE unsigned long long __RV_UMUL16(unsigned int a, unsigned int b)
12755 {
12756     register unsigned long long result;
12757     __ASM volatile("umul16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12758     return result;
12759 }
12760 /* ===== Inline Function End for 3.173.1. UMUL16 ===== */
12761 
12762 /* ===== Inline Function Start for 3.173.2. UMULX16 ===== */
12763 /**
12764  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
12765  * \brief UMULX16 (SIMD Unsigned Crossed 16-bit Multiply)
12766  * \details
12767  * **Type**: SIMD
12768  *
12769  * **Syntax**:\n
12770  * ~~~
12771  * UMUL16 Rd, Rs1, Rs2
12772  * UMULX16 Rd, Rs1, Rs2
12773  * ~~~
12774  *
12775  * **Purpose**:\n
12776  * Do unsigned 16-bit multiplications and generate two 32-bit results simultaneously.
12777  *
12778  * **RV32 Description**:\n
12779  * For the `UMUL16` instruction, multiply the top 16-bit U16 content of Rs1 with
12780  * the top 16-bit U16 content of Rs2. At the same time, multiply the bottom 16-bit U16 content of Rs1
12781  * with the bottom 16-bit U16 content of Rs2.
12782  * For the `UMULX16` instruction, multiply the top 16-bit U16 content of Rs1 with the bottom 16-bit
12783  * U16 content of Rs2. At the same time, multiply the bottom 16-bit U16 content of Rs1 with the top 16-
12784  * bit U16 content of Rs2.
12785  * The two U32 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1),
12786  * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes
12787  * register 2d and 2d+1.
12788  * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and
12789  * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1.
12790  *
12791  * **RV64 Description**:\n
12792  * For the `UMUL16` instruction, multiply the top 16-bit U16 content of the lower
12793  * 32-bit word in Rs1 with the top 16-bit U16 content of the lower 32-bit word in Rs2. At the same time,
12794  * multiply the bottom 16-bit U16 content of the lower 32-bit word in Rs1 with the bottom 16-bit U16
12795  * content of the lower 32-bit word in Rs2.
12796  * For the `UMULX16` instruction, multiply the top 16-bit U16 content of the lower 32-bit word in Rs1
12797  * with the bottom 16-bit U16 content of the lower 32-bit word in Rs2. At the same time, multiply the
12798  * bottom 16-bit U16 content of the lower 32-bit word in Rs1 with the top 16-bit U16 content of the
12799  * lower 32-bit word in Rs2.
12800  * The two 32-bit U32 results are then written into Rd. The result calculated from the top 16-bit of the
12801  * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of
12802  * the lower 32-bit word in Rs1 is written to Rd.W[0]
12803  *
12804  * **Operations**:\n
12805  * ~~~
12806  * * RV32:
12807  * if (is `UMUL16`) {
12808  *   op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
12809  *   op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
12810  * } else if (is `UMULX16`) {
12811  *   op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
12812  *   op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
12813  * }
12814  * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
12815  *   res = aop u* bop;
12816  * }
12817  * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
12818  * R[t_H] = rest;
12819  * R[t_L] = resb;
12820  * * RV64:
12821  * if (is `UMUL16`) {
12822  *   op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
12823  *   op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
12824  * } else if (is `UMULX16`) {
12825  *   op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
12826  *   op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
12827  * }
12828  * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
12829  *   res = aop u* bop;
12830  * }
12831  * Rd.W[1] = rest;
12832  * Rd.W[0] = resb;
12833  * ~~~
12834  *
12835  * \param [in]  a    unsigned int type of value stored in a
12836  * \param [in]  b    unsigned int type of value stored in b
12837  * \return value stored in unsigned long long type
12838  */
__RV_UMULX16(unsigned int a,unsigned int b)12839 __STATIC_FORCEINLINE unsigned long long __RV_UMULX16(unsigned int a, unsigned int b)
12840 {
12841     register unsigned long long result;
12842     __ASM volatile("umulx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12843     return result;
12844 }
12845 /* ===== Inline Function End for 3.173.2. UMULX16 ===== */
12846 
12847 /* ===== Inline Function Start for 3.174. URADD8 ===== */
12848 /**
12849  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
12850  * \brief URADD8 (SIMD 8-bit Unsigned Halving Addition)
12851  * \details
12852  * **Type**: SIMD
12853  *
12854  * **Syntax**:\n
12855  * ~~~
12856  * URADD8 Rd, Rs1, Rs2
12857  * ~~~
12858  *
12859  * **Purpose**:\n
12860  * Do 8-bit unsigned integer element additions simultaneously. The results are halved to
12861  * avoid overflow or saturation.
12862  *
12863  * **Description**:\n
12864  * This instruction adds the 8-bit unsigned integer elements in Rs1 with the 8-bit
12865  * unsigned integer elements in Rs2. The results are first logically right-shifted by 1 bit and then
12866  * written to Rd.
12867  *
12868  * **Examples**:\n
12869  * ~~~
12870  * * Ra = 0x7F, Rb = 0x7F, Rt = 0x7F
12871  * * Ra = 0x80, Rb = 0x80, Rt = 0x80
12872  * * Ra = 0x40, Rb = 0x80, Rt = 0x60
12873  * ~~~
12874  *
12875  * **Operations**:\n
12876  * ~~~
12877  * Rd.B[x] = (Rs1.B[x] + Rs2.B[x]) u>> 1;
12878  * for RV32: x=3...0,
12879  * for RV64: x=7...0
12880  * ~~~
12881  *
12882  * \param [in]  a    unsigned long type of value stored in a
12883  * \param [in]  b    unsigned long type of value stored in b
12884  * \return value stored in unsigned long type
12885  */
__RV_URADD8(unsigned long a,unsigned long b)12886 __STATIC_FORCEINLINE unsigned long __RV_URADD8(unsigned long a, unsigned long b)
12887 {
12888     register unsigned long result;
12889     __ASM volatile("uradd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12890     return result;
12891 }
12892 /* ===== Inline Function End for 3.174. URADD8 ===== */
12893 
12894 /* ===== Inline Function Start for 3.175. URADD16 ===== */
12895 /**
12896  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
12897  * \brief URADD16 (SIMD 16-bit Unsigned Halving Addition)
12898  * \details
12899  * **Type**: SIMD
12900  *
12901  * **Syntax**:\n
12902  * ~~~
12903  * URADD16 Rd, Rs1, Rs2
12904  * ~~~
12905  *
12906  * **Purpose**:\n
12907  * Do 16-bit unsigned integer element additions simultaneously. The results are halved to
12908  * avoid overflow or saturation.
12909  *
12910  * **Description**:\n
12911  * This instruction adds the 16-bit unsigned integer elements in Rs1 with the 16-bit
12912  * unsigned integer elements in Rs2. The results are first logically right-shifted by 1 bit and then
12913  * written to Rd.
12914  *
12915  * **Examples**:\n
12916  * ~~~
12917  * * Ra = 0x7FFF, Rb = 0x7FFF Rt = 0x7FFF
12918  * * Ra = 0x8000, Rb = 0x8000 Rt = 0x8000
12919  * * Ra = 0x4000, Rb = 0x8000 Rt = 0x6000
12920  * ~~~
12921  *
12922  * **Operations**:\n
12923  * ~~~
12924  * Rd.H[x] = (Rs1.H[x] + Rs2.H[x]) u>> 1;
12925  * for RV32: x=1...0,
12926  * for RV64: x=3...0
12927  * ~~~
12928  *
12929  * \param [in]  a    unsigned long type of value stored in a
12930  * \param [in]  b    unsigned long type of value stored in b
12931  * \return value stored in unsigned long type
12932  */
__RV_URADD16(unsigned long a,unsigned long b)12933 __STATIC_FORCEINLINE unsigned long __RV_URADD16(unsigned long a, unsigned long b)
12934 {
12935     register unsigned long result;
12936     __ASM volatile("uradd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12937     return result;
12938 }
12939 /* ===== Inline Function End for 3.175. URADD16 ===== */
12940 
12941 /* ===== Inline Function Start for 3.176. URADD64 ===== */
12942 /**
12943  * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
12944  * \brief URADD64 (64-bit Unsigned Halving Addition)
12945  * \details
12946  * **Type**: DSP (64-bit Profile)
12947  *
12948  * **Syntax**:\n
12949  * ~~~
12950  * URADD64 Rd, Rs1, Rs2
12951  * ~~~
12952  *
12953  * **Purpose**:\n
12954  * Add two 64-bit unsigned integers. The result is halved to avoid overflow or saturation.
12955  *
12956  * **RV32 Description**:\n
12957  * This instruction adds the 64-bit unsigned integer of an even/odd pair of registers
12958  * specified by Rs1(4,1) with the 64-bit unsigned integer of an even/odd pair of registers specified by
12959  * Rs2(4,1). The 64-bit addition result is first logically right-shifted by 1 bit and then written to an
12960  * even/odd pair of registers specified by Rd(4,1).
12961  * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
12962  * includes register 2d and 2d+1.
12963  * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
12964  * of the pair contains the low 32-bit of the result.
12965  *
12966  * **RV64 Description**:\n
12967  * This instruction adds the 64-bit unsigned integer in Rs1 with the 64-bit unsigned
12968  * integer Rs2. The 64-bit addition result is first logically right-shifted by 1 bit and then written to Rd.
12969  *
12970  * **Operations**:\n
12971  * ~~~
12972  * * RV32:
12973  * t_L = CONCAT(Rt(4,1),1'b0); t_H = CONCAT(Rt(4,1),1'b1);
12974  * a_L = CONCAT(Ra(4,1),1'b0); a_H = CONCAT(Ra(4,1),1'b1);
12975  * b_L = CONCAT(Rb(4,1),1'b0); b_H = CONCAT(Rb(4,1),1'b1);
12976  * R[t_H].R[t_L] = (R[a_H].R[a_L] + R[b_H].R[b_L]) u>> 1;
12977  * * RV64:
12978  * Rd = (Rs1 + Rs2) u>> 1;
12979  * ~~~
12980  *
12981  * \param [in]  a    unsigned long long type of value stored in a
12982  * \param [in]  b    unsigned long long type of value stored in b
12983  * \return value stored in unsigned long long type
12984  */
__RV_URADD64(unsigned long long a,unsigned long long b)12985 __STATIC_FORCEINLINE unsigned long long __RV_URADD64(unsigned long long a, unsigned long long b)
12986 {
12987     register unsigned long long result;
12988     __ASM volatile("uradd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12989     return result;
12990 }
12991 /* ===== Inline Function End for 3.176. URADD64 ===== */
12992 
12993 /* ===== Inline Function Start for 3.177. URADDW ===== */
12994 /**
12995  * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
12996  * \brief URADDW (32-bit Unsigned Halving Addition)
12997  * \details
12998  * **Type**: DSP
12999  *
13000  * **Syntax**:\n
13001  * ~~~
13002  * URADDW Rd, Rs1, Rs2
13003  * ~~~
13004  *
13005  * **Purpose**:\n
13006  * Add 32-bit unsigned integers and the results are halved to avoid overflow or saturation.
13007  *
13008  * **Description**:\n
13009  * This instruction adds the first 32-bit unsigned integer in Rs1 with the first 32-bit
13010  * unsigned integer in Rs2. The result is first logically right-shifted by 1 bit and then sign-extended and
13011  * written to Rd.
13012  *
13013  * **Examples**:\n
13014  * ~~~
13015  * * Ra = 0x7FFFFFFF, Rb = 0x7FFFFFFF Rt = 0x7FFFFFFF
13016  * * Ra = 0x80000000, Rb = 0x80000000 Rt = 0x80000000
13017  * * Ra = 0x40000000, Rb = 0x80000000 Rt = 0x60000000
13018  * ~~~
13019  *
13020  * **Operations**:\n
13021  * ~~~
13022  * * RV32:
13023  * Rd[31:0] = (Rs1[31:0] + Rs2[31:0]) u>> 1;
13024  * * RV64:
13025  * resw[31:0] = (Rs1[31:0] + Rs2[31:0]) u>> 1;
13026  * Rd[63:0] = SE(resw[31:0]);
13027  * ~~~
13028  *
13029  * \param [in]  a    unsigned int type of value stored in a
13030  * \param [in]  b    unsigned int type of value stored in b
13031  * \return value stored in unsigned long type
13032  */
__RV_URADDW(unsigned int a,unsigned int b)13033 __STATIC_FORCEINLINE unsigned long __RV_URADDW(unsigned int a, unsigned int b)
13034 {
13035     register unsigned long result;
13036     __ASM volatile("uraddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13037     return result;
13038 }
13039 /* ===== Inline Function End for 3.177. URADDW ===== */
13040 
13041 /* ===== Inline Function Start for 3.178. URCRAS16 ===== */
13042 /**
13043  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
13044  * \brief URCRAS16 (SIMD 16-bit Unsigned Halving Cross Addition & Subtraction)
13045  * \details
13046  * **Type**: SIMD
13047  *
13048  * **Syntax**:\n
13049  * ~~~
13050  * URCRAS16 Rd, Rs1, Rs2
13051  * ~~~
13052  *
13053  * **Purpose**:\n
13054  * Do 16-bit unsigned integer element addition and 16-bit unsigned integer element
13055  * subtraction in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
13056  * The results are halved to avoid overflow or saturation.
13057  *
13058  * **Description**:\n
13059  * This instruction adds the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1
13060  * with the 16-bit unsigned integer in [15:0] of 32-bit chunks in Rs2, and subtracts the 16-bit unsigned
13061  * integer in [31:16] of 32-bit chunks in Rs2 from the 16-bit unsigned integer in [15:0] of 32-bit chunks
13062  * in Rs1. The element results are first logically right-shifted by 1 bit and then written to [31:16] of 32-
13063  * bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
13064  *
13065  * **Examples**:\n
13066  * ~~~
13067  * Please see `URADD16` and `URSUB16` instructions.
13068  * ~~~
13069  *
13070  * **Operations**:\n
13071  * ~~~
13072  * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][15:0]) u>> 1;
13073  * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][31:16]) u>> 1;
13074  * for RV32, x=0
13075  * for RV64, x=1...0
13076  * ~~~
13077  *
13078  * \param [in]  a    unsigned long type of value stored in a
13079  * \param [in]  b    unsigned long type of value stored in b
13080  * \return value stored in unsigned long type
13081  */
__RV_URCRAS16(unsigned long a,unsigned long b)13082 __STATIC_FORCEINLINE unsigned long __RV_URCRAS16(unsigned long a, unsigned long b)
13083 {
13084     register unsigned long result;
13085     __ASM volatile("urcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13086     return result;
13087 }
13088 /* ===== Inline Function End for 3.178. URCRAS16 ===== */
13089 
13090 /* ===== Inline Function Start for 3.179. URCRSA16 ===== */
13091 /**
13092  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
13093  * \brief URCRSA16 (SIMD 16-bit Unsigned Halving Cross Subtraction & Addition)
13094  * \details
13095  * **Type**: SIMD
13096  *
13097  * **Syntax**:\n
13098  * ~~~
13099  * URCRSA16 Rd, Rs1, Rs2
13100  * ~~~
13101  *
13102  * **Purpose**:\n
13103  * Do 16-bit unsigned integer element subtraction and 16-bit unsigned integer element
13104  * addition in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
13105  * The results are halved to avoid overflow or saturation.
13106  *
13107  * **Description**:\n
13108  * This instruction subtracts the 16-bit unsigned integer in [15:0] of 32-bit chunks in Rs2
13109  * from the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit unsigned
13110  * integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit unsigned integer in [31:16] of 32-bit chunks
13111  * in Rs2. The two results are first logically right-shifted by 1 bit and then written to [31:16] of 32-bit
13112  * chunks in Rd and [15:0] of 32-bit chunks in Rd.
13113  *
13114  * **Examples**:\n
13115  * ~~~
13116  * Please see `URADD16` and `URSUB16` instructions.
13117  * ~~~
13118  *
13119  * **Operations**:\n
13120  * ~~~
13121  * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][15:0]) u>> 1;
13122  * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][31:16]) u>> 1;
13123  * for RV32, x=0
13124  * for RV64, x=1...0
13125  * ~~~
13126  *
13127  * \param [in]  a    unsigned long type of value stored in a
13128  * \param [in]  b    unsigned long type of value stored in b
13129  * \return value stored in unsigned long type
13130  */
__RV_URCRSA16(unsigned long a,unsigned long b)13131 __STATIC_FORCEINLINE unsigned long __RV_URCRSA16(unsigned long a, unsigned long b)
13132 {
13133     register unsigned long result;
13134     __ASM volatile("urcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13135     return result;
13136 }
13137 /* ===== Inline Function End for 3.179. URCRSA16 ===== */
13138 
13139 /* ===== Inline Function Start for 3.180. URSTAS16 ===== */
13140 /**
13141  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
13142  * \brief URSTAS16 (SIMD 16-bit Unsigned Halving Straight Addition & Subtraction)
13143  * \details
13144  * **Type**: SIMD
13145  *
13146  * **Syntax**:\n
13147  * ~~~
13148  * URSTAS16 Rd, Rs1, Rs2
13149  * ~~~
13150  *
13151  * **Purpose**:\n
13152  * Do 16-bit unsigned integer element addition and 16-bit unsigned integer element
13153  * subtraction in a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit
13154  * chunks. The results are halved to avoid overflow or saturation.
13155  *
13156  * **Description**:\n
13157  * This instruction adds the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1
13158  * with the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs2, and subtracts the 16-bit unsigned
13159  * integer in [15:0] of 32-bit chunks in Rs2 from the 16-bit unsigned integer in [15:0] of 32-bit chunks
13160  * in Rs1. The element results are first logically right-shifted by 1 bit and then written to [31:16] of 32-
13161  * bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
13162  *
13163  * **Examples**:\n
13164  * ~~~
13165  * Please see `URADD16` and `URSUB16` instructions.
13166  * ~~~
13167  *
13168  * **Operations**:\n
13169  * ~~~
13170  * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][31:16]) u>> 1;
13171  * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][15:0]) u>> 1;
13172  * for RV32, x=0
13173  * for RV64, x=1...0
13174  * ~~~
13175  *
13176  * \param [in]  a    unsigned long type of value stored in a
13177  * \param [in]  b    unsigned long type of value stored in b
13178  * \return value stored in unsigned long type
13179  */
__RV_URSTAS16(unsigned long a,unsigned long b)13180 __STATIC_FORCEINLINE unsigned long __RV_URSTAS16(unsigned long a, unsigned long b)
13181 {
13182     register unsigned long result;
13183     __ASM volatile("urstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13184     return result;
13185 }
13186 /* ===== Inline Function End for 3.180. URSTAS16 ===== */
13187 
13188 /* ===== Inline Function Start for 3.181. URSTSA16 ===== */
13189 /**
13190  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
13191  * \brief URSTSA16 (SIMD 16-bit Unsigned Halving Straight Subtraction & Addition)
13192  * \details
13193  * **Type**: SIMD
13194  *
13195  * **Syntax**:\n
13196  * ~~~
13197  * URCRSA16 Rd, Rs1, Rs2
13198  * ~~~
13199  *
13200  * **Purpose**:\n
13201  * Do 16-bit unsigned integer element subtraction and 16-bit unsigned integer element
13202  * addition in a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit
13203  * chunks. The results are halved to avoid overflow or saturation.
13204  *
13205  * **Description**:\n
13206  * This instruction subtracts the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs2
13207  * from the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit unsigned
13208  * integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit unsigned integer in [15:0] of 32-bit chunks in
13209  * Rs2. The two results are first logically right-shifted by 1 bit and then written to [31:16] of 32-bit
13210  * chunks in Rd and [15:0] of 32-bit chunks in Rd.
13211  *
13212  * **Examples**:\n
13213  * ~~~
13214  * Please see `URADD16` and `URSUB16` instructions.
13215  * ~~~
13216  *
13217  * **Operations**:\n
13218  * ~~~
13219  * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][31:16]) u>> 1;
13220  * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][15:0]) u>> 1;
13221  * for RV32, x=0
13222  * for RV64, x=1...0
13223  * ~~~
13224  *
13225  * \param [in]  a    unsigned long type of value stored in a
13226  * \param [in]  b    unsigned long type of value stored in b
13227  * \return value stored in unsigned long type
13228  */
__RV_URSTSA16(unsigned long a,unsigned long b)13229 __STATIC_FORCEINLINE unsigned long __RV_URSTSA16(unsigned long a, unsigned long b)
13230 {
13231     register unsigned long result;
13232     __ASM volatile("urstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13233     return result;
13234 }
13235 /* ===== Inline Function End for 3.181. URSTSA16 ===== */
13236 
13237 /* ===== Inline Function Start for 3.182. URSUB8 ===== */
13238 /**
13239  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
13240  * \brief URSUB8 (SIMD 8-bit Unsigned Halving Subtraction)
13241  * \details
13242  * **Type**: SIMD
13243  *
13244  * **Syntax**:\n
13245  * ~~~
13246  * URSUB8 Rd, Rs1, Rs2
13247  * ~~~
13248  *
13249  * **Purpose**:\n
13250  * Do 8-bit unsigned integer element subtractions simultaneously. The results are halved to
13251  * avoid overflow or saturation.
13252  *
13253  * **Description**:\n
13254  * This instruction subtracts the 8-bit unsigned integer elements in Rs2 from the 8-bit
13255  * unsigned integer elements in Rs1. The results are first logically right-shifted by 1 bit and then
13256  * written to Rd.
13257  *
13258  * **Examples**:\n
13259  * ~~~
13260  * * Ra = 0x7F, Rb = 0x80 Rt = 0xFF
13261  * * Ra = 0x80, Rb = 0x7F Rt = 0x00
13262  * * Ra = 0x80, Rb = 0x40 Rt = 0x20
13263  * ~~~
13264  *
13265  * **Operations**:\n
13266  * ~~~
13267  * Rd.B[x] = (Rs1.B[x] - Rs2.B[x]) u>> 1;
13268  * for RV32: x=3...0,
13269  * for RV64: x=7...0
13270  * ~~~
13271  *
13272  * \param [in]  a    unsigned long type of value stored in a
13273  * \param [in]  b    unsigned long type of value stored in b
13274  * \return value stored in unsigned long type
13275  */
__RV_URSUB8(unsigned long a,unsigned long b)13276 __STATIC_FORCEINLINE unsigned long __RV_URSUB8(unsigned long a, unsigned long b)
13277 {
13278     register unsigned long result;
13279     __ASM volatile("ursub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13280     return result;
13281 }
13282 /* ===== Inline Function End for 3.182. URSUB8 ===== */
13283 
13284 /* ===== Inline Function Start for 3.183. URSUB16 ===== */
13285 /**
13286  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
13287  * \brief URSUB16 (SIMD 16-bit Unsigned Halving Subtraction)
13288  * \details
13289  * **Type**: SIMD
13290  *
13291  * **Syntax**:\n
13292  * ~~~
13293  * URSUB16 Rd, Rs1, Rs2
13294  * ~~~
13295  *
13296  * **Purpose**:\n
13297  * Do 16-bit unsigned integer element subtractions simultaneously. The results are halved to
13298  * avoid overflow or saturation.
13299  *
13300  * **Description**:\n
13301  * This instruction subtracts the 16-bit unsigned integer elements in Rs2 from the 16-bit
13302  * unsigned integer elements in Rs1. The results are first logically right-shifted by 1 bit and then
13303  * written to Rd.
13304  *
13305  * **Examples**:\n
13306  * ~~~
13307  * * Ra = 0x7FFF, Rb = 0x8000 Rt = 0xFFFF
13308  * * Ra = 0x8000, Rb = 0x7FFF Rt = 0x0000
13309  * * Ra = 0x8000, Rb = 0x4000 Rt = 0x2000
13310  * ~~~
13311  *
13312  * **Operations**:\n
13313  * ~~~
13314  * Rd.H[x] = (Rs1.H[x] - Rs2.H[x]) u>> 1;
13315  * for RV32: x=1...0,
13316  * for RV64: x=3...0
13317  * ~~~
13318  *
13319  * \param [in]  a    unsigned long type of value stored in a
13320  * \param [in]  b    unsigned long type of value stored in b
13321  * \return value stored in unsigned long type
13322  */
__RV_URSUB16(unsigned long a,unsigned long b)13323 __STATIC_FORCEINLINE unsigned long __RV_URSUB16(unsigned long a, unsigned long b)
13324 {
13325     register unsigned long result;
13326     __ASM volatile("ursub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13327     return result;
13328 }
13329 /* ===== Inline Function End for 3.183. URSUB16 ===== */
13330 
13331 /* ===== Inline Function Start for 3.184. URSUB64 ===== */
13332 /**
13333  * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
13334  * \brief URSUB64 (64-bit Unsigned Halving Subtraction)
13335  * \details
13336  * **Type**: DSP (64-bit Profile)
13337  *
13338  * **Syntax**:\n
13339  * ~~~
13340  * URSUB64 Rd, Rs1, Rs2
13341  * ~~~
13342  *
13343  * **Purpose**:\n
13344  * Perform a 64-bit unsigned integer subtraction. The result is halved to avoid overflow or
13345  * saturation.
13346  *
13347  * **RV32 Description**:\n
13348  * This instruction subtracts the 64-bit unsigned integer of an even/odd pair of
13349  * registers specified by Rs2(4,1) from the 64-bit unsigned integer of an even/odd pair of registers
13350  * specified by Rs1(4,1). The subtraction result is first logically right-shifted by 1 bit and then written
13351  * to an even/odd pair of registers specified by Rd(4,1).
13352  * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
13353  * includes register 2d and 2d+1.
13354  * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
13355  * of the pair contains the low 32-bit of the result.
13356  *
13357  * **RV64 Description**:\n
13358  * This instruction subtracts the 64-bit unsigned integer in Rs2 from the 64-bit
13359  * unsigned integer in Rs1. The subtraction result is first logically right-shifted by 1 bit and then
13360  * written to Rd.
13361  *
13362  * **Operations**:\n
13363  * ~~~
13364  * * RV32:
13365  * t_L = CONCAT(Rt(4,1),1'b0); t_H = CONCAT(Rt(4,1),1'b1);
13366  * a_L = CONCAT(Ra(4,1),1'b0); a_H = CONCAT(Ra(4,1),1'b1);
13367  * b_L = CONCAT(Rb(4,1),1'b0); b_H = CONCAT(Rb(4,1),1'b1);
13368  * R[t_H].R[t_L] = (R[a_H].R[a_L] - R[b_H].R[b_L]) u>> 1;
13369  * * RV64:
13370  * Rd = (Rs1 - Rs2) u>> 1;
13371  * ~~~
13372  *
13373  * \param [in]  a    unsigned long long type of value stored in a
13374  * \param [in]  b    unsigned long long type of value stored in b
13375  * \return value stored in unsigned long long type
13376  */
__RV_URSUB64(unsigned long long a,unsigned long long b)13377 __STATIC_FORCEINLINE unsigned long long __RV_URSUB64(unsigned long long a, unsigned long long b)
13378 {
13379     register unsigned long long result;
13380     __ASM volatile("ursub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13381     return result;
13382 }
13383 /* ===== Inline Function End for 3.184. URSUB64 ===== */
13384 
13385 /* ===== Inline Function Start for 3.185. URSUBW ===== */
13386 /**
13387  * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
13388  * \brief URSUBW (32-bit Unsigned Halving Subtraction)
13389  * \details
13390  * **Type**: DSP
13391  *
13392  * **Syntax**:\n
13393  * ~~~
13394  * URSUBW Rd, Rs1, Rs2
13395  * ~~~
13396  *
13397  * **Purpose**:\n
13398  * Subtract 32-bit unsigned integers and the result is halved to avoid overflow or saturation.
13399  *
13400  * **Description**:\n
13401  * This instruction subtracts the first 32-bit signed integer in Rs2 from the first 32-bit
13402  * signed integer in Rs1. The result is first logically right-shifted by 1 bit and then sign-extended and
13403  * written to Rd.
13404  *
13405  * **Examples**:\n
13406  * ~~~
13407  * * Ra = 0x7FFFFFFF, Rb = 0x80000000 Rt = 0xFFFFFFFF
13408  * * Ra = 0x80000000, Rb = 0x7FFFFFFF Rt = 0x00000000
13409  * * Ra = 0x80000000, Rb = 0x40000000 Rt = 0x20000000
13410  * ~~~
13411  *
13412  * **Operations**:\n
13413  * ~~~
13414  * * RV32:
13415  * Rd[31:0] = (Rs1[31:0] - Rs2[31:0]) u>> 1;
13416  * * RV64:
13417  * resw[31:0] = (Rs1[31:0] - Rs2[31:0]) u>> 1;
13418  * Rd[63:0] = SE(resw[31:0]);
13419  * ~~~
13420  *
13421  * \param [in]  a    unsigned int type of value stored in a
13422  * \param [in]  b    unsigned int type of value stored in b
13423  * \return value stored in unsigned long type
13424  */
__RV_URSUBW(unsigned int a,unsigned int b)13425 __STATIC_FORCEINLINE unsigned long __RV_URSUBW(unsigned int a, unsigned int b)
13426 {
13427     register unsigned long result;
13428     __ASM volatile("ursubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13429     return result;
13430 }
13431 /* ===== Inline Function End for 3.185. URSUBW ===== */
13432 
13433 /* ===== Inline Function Start for 3.186. WEXTI ===== */
13434 /**
13435  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
13436  * \brief WEXTI (Extract Word from 64-bit Immediate)
13437  * \details
13438  * **Type**: DSP
13439  *
13440  * **Syntax**:\n
13441  * ~~~
13442  * WEXTI Rd, Rs1, #LSBloc
13443  * ~~~
13444  *
13445  * **Purpose**:\n
13446  * Extract a 32-bit word from a 64-bit value stored in an even/odd pair of registers (RV32) or
13447  * a register (RV64) starting from a specified immediate LSB bit position.
13448  *
13449  * **RV32 Description**:\n
13450  * This instruction extracts a 32-bit word from a 64-bit value of an even/odd pair of registers specified
13451  * by Rs1(4,1) starting from a specified immediate LSB bit position, #LSBloc. The extracted word is
13452  * written to Rd.
13453  * Rs1(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register
13454  * pair includes register 2d and 2d+1.
13455  * The odd `2d+1` register of the pair contains the high 32-bit of the 64-bit value and the even `2d`
13456  * register of the pair contains the low 32-bit of the 64-bit value.
13457  *
13458  * **RV64 Description**:\n
13459  * This instruction extracts a 32-bit word from a 64-bit value in Rs1 starting from a specified
13460  * immediate LSB bit position, #LSBloc. The extracted word is sign-extended and written to lower 32-
13461  * bit of Rd.
13462  *
13463  * **Operations**:\n
13464  * ~~~
13465  * * RV32:
13466  * Idx0 = CONCAT(Rs1(4,1),1'b0); Idx1 = CONCAT(Rs2(4,1),1'b1);
13467  * src[63:0] = Concat(R[Idx1], R[Idx0]);
13468  * Rd = src[31+LSBloc:LSBloc];
13469  * * RV64:
13470  * ExtractW = Rs1[31+LSBloc:LSBloc];
13471  * Rd = SE(ExtractW)
13472  * ~~~
13473  *
13474  * \param [in]  a    long long type of value stored in a
13475  * \param [in]  b    unsigned int type of value stored in b
13476  * \return value stored in unsigned long type
13477  */
13478 #define __RV_WEXTI(a, b)    \
13479     ({    \
13480         register unsigned long result;    \
13481         register long long __a = (long long)(a);    \
13482         __ASM volatile("wexti %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b));    \
13483         result;    \
13484     })
13485 /* ===== Inline Function End for 3.186. WEXTI ===== */
13486 
13487 /* ===== Inline Function Start for 3.187. WEXT ===== */
13488 /**
13489  * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
13490  * \brief WEXT (Extract Word from 64-bit)
13491  * \details
13492  * **Type**: DSP
13493  *
13494  * **Syntax**:\n
13495  * ~~~
13496  * WEXT Rd, Rs1, Rs2
13497  * ~~~
13498  *
13499  * **Purpose**:\n
13500  * Extract a 32-bit word from a 64-bit value stored in an even/odd pair of registers (RV32) or
13501  * a register (RV64) starting from a specified LSB bit position in a register.
13502  *
13503  * **RV32 Description**:\n
13504  * This instruction extracts a 32-bit word from a 64-bit value of an even/odd pair of registers specified
13505  * by Rs1(4,1) starting from a specified LSB bit position, specified in Rs2[4:0]. The extracted word is
13506  * written to Rd.
13507  * Rs1(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register
13508  * pair includes register 2d and 2d+1.
13509  * The odd `2d+1` register of the pair contains the high 32-bit of the 64-bit value and the even `2d`
13510  * register of the pair contains the low 32-bit of the 64-bit value.
13511  *
13512  * **Operations**:\n
13513  * ~~~
13514  * * RV32:
13515  * Idx0 = CONCAT(Rs1(4,1),1'b0); Idx1 = CONCAT(Rs1(4,1),1'b1);
13516  * src[63:0] = Concat(R[Idx1], R[Idx0]);
13517  * LSBloc = Rs2[4:0];
13518  * Rd = src[31+LSBloc:LSBloc];
13519  * * RV64:
13520  * LSBloc = Rs2[4:0];
13521  * ExtractW = Rs1[31+LSBloc:LSBloc];
13522  * Rd = SE(ExtractW)
13523  * ~~~
13524  *
13525  * \param [in]  a    long long type of value stored in a
13526  * \param [in]  b    unsigned int type of value stored in b
13527  * \return value stored in unsigned long type
13528  */
__RV_WEXT(long long a,unsigned int b)13529 __STATIC_FORCEINLINE unsigned long __RV_WEXT(long long a, unsigned int b)
13530 {
13531     register unsigned long result;
13532     __ASM volatile("wext %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13533     return result;
13534 }
13535 /* ===== Inline Function End for 3.187. WEXT ===== */
13536 
13537 /* ===== Inline Function Start for 3.188.1. ZUNPKD810 ===== */
13538 /**
13539  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
13540  * \brief ZUNPKD810 (Unsigned Unpacking Bytes 1 & 0)
13541  * \details
13542  * **Type**: DSP
13543  *
13544  * **Syntax**:\n
13545  * ~~~
13546  * ZUNPKD8xy Rd, Rs1
13547  * xy = {10, 20, 30, 31, 32}
13548  * ~~~
13549  *
13550  * **Purpose**:\n
13551  * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
13552  * halfwords of 32-bit chunks in a register.
13553  *
13554  * **Description**:\n
13555  * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
13556  * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
13557  * chunks in Rd.
13558  *
13559  * **Operations**:\n
13560  * ~~~
13561  * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
13562  * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
13563  * // ZUNPKD810, x=1,y=0
13564  * // ZUNPKD820, x=2,y=0
13565  * // ZUNPKD830, x=3,y=0
13566  * // ZUNPKD831, x=3,y=1
13567  * // ZUNPKD832, x=3,y=2
13568  * for RV32: m=0,
13569  * for RV64: m=1...0
13570  * ~~~
13571  *
13572  * \param [in]  a    unsigned long type of value stored in a
13573  * \return value stored in unsigned long type
13574  */
__RV_ZUNPKD810(unsigned long a)13575 __STATIC_FORCEINLINE unsigned long __RV_ZUNPKD810(unsigned long a)
13576 {
13577     register unsigned long result;
13578     __ASM volatile("zunpkd810 %0, %1" : "=r"(result) : "r"(a));
13579     return result;
13580 }
13581 /* ===== Inline Function End for 3.188.1. ZUNPKD810 ===== */
13582 
13583 /* ===== Inline Function Start for 3.188.2. ZUNPKD820 ===== */
13584 /**
13585  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
13586  * \brief ZUNPKD820 (Unsigned Unpacking Bytes 2 & 0)
13587  * \details
13588  * **Type**: DSP
13589  *
13590  * **Syntax**:\n
13591  * ~~~
13592  * ZUNPKD8xy Rd, Rs1
13593  * xy = {10, 20, 30, 31, 32}
13594  * ~~~
13595  *
13596  * **Purpose**:\n
13597  * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
13598  * halfwords of 32-bit chunks in a register.
13599  *
13600  * **Description**:\n
13601  * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
13602  * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
13603  * chunks in Rd.
13604  *
13605  * **Operations**:\n
13606  * ~~~
13607  * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
13608  * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
13609  * // ZUNPKD810, x=1,y=0
13610  * // ZUNPKD820, x=2,y=0
13611  * // ZUNPKD830, x=3,y=0
13612  * // ZUNPKD831, x=3,y=1
13613  * // ZUNPKD832, x=3,y=2
13614  * for RV32: m=0,
13615  * for RV64: m=1...0
13616  * ~~~
13617  *
13618  * \param [in]  a    unsigned long type of value stored in a
13619  * \return value stored in unsigned long type
13620  */
__RV_ZUNPKD820(unsigned long a)13621 __STATIC_FORCEINLINE unsigned long __RV_ZUNPKD820(unsigned long a)
13622 {
13623     register unsigned long result;
13624     __ASM volatile("zunpkd820 %0, %1" : "=r"(result) : "r"(a));
13625     return result;
13626 }
13627 /* ===== Inline Function End for 3.188.2. ZUNPKD820 ===== */
13628 
13629 /* ===== Inline Function Start for 3.188.3. ZUNPKD830 ===== */
13630 /**
13631  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
13632  * \brief ZUNPKD830 (Unsigned Unpacking Bytes 3 & 0)
13633  * \details
13634  * **Type**: DSP
13635  *
13636  * **Syntax**:\n
13637  * ~~~
13638  * ZUNPKD8xy Rd, Rs1
13639  * xy = {10, 20, 30, 31, 32}
13640  * ~~~
13641  *
13642  * **Purpose**:\n
13643  * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
13644  * halfwords of 32-bit chunks in a register.
13645  *
13646  * **Description**:\n
13647  * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
13648  * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
13649  * chunks in Rd.
13650  *
13651  * **Operations**:\n
13652  * ~~~
13653  * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
13654  * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
13655  * // ZUNPKD810, x=1,y=0
13656  * // ZUNPKD820, x=2,y=0
13657  * // ZUNPKD830, x=3,y=0
13658  * // ZUNPKD831, x=3,y=1
13659  * // ZUNPKD832, x=3,y=2
13660  * for RV32: m=0,
13661  * for RV64: m=1...0
13662  * ~~~
13663  *
13664  * \param [in]  a    unsigned long type of value stored in a
13665  * \return value stored in unsigned long type
13666  */
__RV_ZUNPKD830(unsigned long a)13667 __STATIC_FORCEINLINE unsigned long __RV_ZUNPKD830(unsigned long a)
13668 {
13669     register unsigned long result;
13670     __ASM volatile("zunpkd830 %0, %1" : "=r"(result) : "r"(a));
13671     return result;
13672 }
13673 /* ===== Inline Function End for 3.188.3. ZUNPKD830 ===== */
13674 
13675 /* ===== Inline Function Start for 3.188.4. ZUNPKD831 ===== */
13676 /**
13677  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
13678  * \brief ZUNPKD831 (Unsigned Unpacking Bytes 3 & 1)
13679  * \details
13680  * **Type**: DSP
13681  *
13682  * **Syntax**:\n
13683  * ~~~
13684  * ZUNPKD8xy Rd, Rs1
13685  * xy = {10, 20, 30, 31, 32}
13686  * ~~~
13687  *
13688  * **Purpose**:\n
13689  * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
13690  * halfwords of 32-bit chunks in a register.
13691  *
13692  * **Description**:\n
13693  * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
13694  * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
13695  * chunks in Rd.
13696  *
13697  * **Operations**:\n
13698  * ~~~
13699  * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
13700  * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
13701  * // ZUNPKD810, x=1,y=0
13702  * // ZUNPKD820, x=2,y=0
13703  * // ZUNPKD830, x=3,y=0
13704  * // ZUNPKD831, x=3,y=1
13705  * // ZUNPKD832, x=3,y=2
13706  * for RV32: m=0,
13707  * for RV64: m=1...0
13708  * ~~~
13709  *
13710  * \param [in]  a    unsigned long type of value stored in a
13711  * \return value stored in unsigned long type
13712  */
__RV_ZUNPKD831(unsigned long a)13713 __STATIC_FORCEINLINE unsigned long __RV_ZUNPKD831(unsigned long a)
13714 {
13715     register unsigned long result;
13716     __ASM volatile("zunpkd831 %0, %1" : "=r"(result) : "r"(a));
13717     return result;
13718 }
13719 /* ===== Inline Function End for 3.188.4. ZUNPKD831 ===== */
13720 
13721 /* ===== Inline Function Start for 3.188.5. ZUNPKD832 ===== */
13722 /**
13723  * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
13724  * \brief ZUNPKD832 (Unsigned Unpacking Bytes 3 & 2)
13725  * \details
13726  * **Type**: DSP
13727  *
13728  * **Syntax**:\n
13729  * ~~~
13730  * ZUNPKD8xy Rd, Rs1
13731  * xy = {10, 20, 30, 31, 32}
13732  * ~~~
13733  *
13734  * **Purpose**:\n
13735  * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
13736  * halfwords of 32-bit chunks in a register.
13737  *
13738  * **Description**:\n
13739  * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
13740  * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
13741  * chunks in Rd.
13742  *
13743  * **Operations**:\n
13744  * ~~~
13745  * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
13746  * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
13747  * // ZUNPKD810, x=1,y=0
13748  * // ZUNPKD820, x=2,y=0
13749  * // ZUNPKD830, x=3,y=0
13750  * // ZUNPKD831, x=3,y=1
13751  * // ZUNPKD832, x=3,y=2
13752  * for RV32: m=0,
13753  * for RV64: m=1...0
13754  * ~~~
13755  *
13756  * \param [in]  a    unsigned long type of value stored in a
13757  * \return value stored in unsigned long type
13758  */
__RV_ZUNPKD832(unsigned long a)13759 __STATIC_FORCEINLINE unsigned long __RV_ZUNPKD832(unsigned long a)
13760 {
13761     register unsigned long result;
13762     __ASM volatile("zunpkd832 %0, %1" : "=r"(result) : "r"(a));
13763     return result;
13764 }
13765 /* ===== Inline Function End for 3.188.5. ZUNPKD832 ===== */
13766 
13767 #if (__RISCV_XLEN == 64) || defined(__ONLY_FOR_DOXYGEN_DOCUMENT_GENERATION__)
13768 
13769 /* ===== Inline Function Start for 4.1. ADD32 ===== */
13770 /**
13771  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
13772  * \brief ADD32 (SIMD 32-bit Addition)
13773  * \details
13774  * **Type**: SIMD (RV64 Only)
13775  *
13776  * **Syntax**:\n
13777  * ~~~
13778  * ADD32 Rd, Rs1, Rs2
13779  * ~~~
13780  *
13781  * **Purpose**:\n
13782  * Do 32-bit integer element additions simultaneously.
13783  *
13784  * **Description**:\n
13785  * This instruction adds the 32-bit integer elements in Rs1 with the 32-bit integer
13786  * elements in Rs2, and then writes the 32-bit element results to Rd.
13787  *
13788  * **Note**:\n
13789  * This instruction can be used for either signed or unsigned addition.
13790  *
13791  * **Operations**:\n
13792  * ~~~
13793  * Rd.W[x] = Rs1.W[x] + Rs2.W[x];
13794  * for RV64: x=1...0
13795  * ~~~
13796  *
13797  * \param [in]  a    unsigned long type of value stored in a
13798  * \param [in]  b    unsigned long type of value stored in b
13799  * \return value stored in unsigned long type
13800  */
__RV_ADD32(unsigned long a,unsigned long b)13801 __STATIC_FORCEINLINE unsigned long __RV_ADD32(unsigned long a, unsigned long b)
13802 {
13803     register unsigned long result;
13804     __ASM volatile("add32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13805     return result;
13806 }
13807 /* ===== Inline Function End for 4.1. ADD32 ===== */
13808 
13809 /* ===== Inline Function Start for 4.2. CRAS32 ===== */
13810 /**
13811  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
13812  * \brief CRAS32 (SIMD 32-bit Cross Addition & Subtraction)
13813  * \details
13814  * **Type**: SIMD (RV64 Only)
13815  *
13816  * **Syntax**:\n
13817  * ~~~
13818  * CRAS32 Rd, Rs1, Rs2
13819  * ~~~
13820  *
13821  * **Purpose**:\n
13822  * Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit
13823  * chunk simultaneously. Operands are from crossed 32-bit elements.
13824  *
13825  * **Description**:\n
13826  * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit
13827  * integer element in [31:0] of Rs2, and writes the result to [63:32] of Rd; at the same time, it subtracts
13828  * the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer element in [31:0] of Rs1, and
13829  * writes the result to [31:0] of Rd.
13830  *
13831  * **Note**:\n
13832  * This instruction can be used for either signed or unsigned operations.
13833  *
13834  * **Operations**:\n
13835  * ~~~
13836  * Rd.W[1] = Rs1.W[1] + Rs2.W[0];
13837  * Rd.W[0] = Rs1.W[0] - Rs2.W[1];
13838  * ~~~
13839  *
13840  * \param [in]  a    unsigned long type of value stored in a
13841  * \param [in]  b    unsigned long type of value stored in b
13842  * \return value stored in unsigned long type
13843  */
__RV_CRAS32(unsigned long a,unsigned long b)13844 __STATIC_FORCEINLINE unsigned long __RV_CRAS32(unsigned long a, unsigned long b)
13845 {
13846     register unsigned long result;
13847     __ASM volatile("cras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13848     return result;
13849 }
13850 /* ===== Inline Function End for 4.2. CRAS32 ===== */
13851 
13852 /* ===== Inline Function Start for 4.3. CRSA32 ===== */
13853 /**
13854  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
13855  * \brief CRSA32 (SIMD 32-bit Cross Subtraction & Addition)
13856  * \details
13857  * **Type**: SIMD (RV64 Only)
13858  *
13859  * **Syntax**:\n
13860  * ~~~
13861  * CRSA32 Rd, Rs1, Rs2
13862  * ~~~
13863  *
13864  * **Purpose**:\n
13865  * Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit
13866  * chunk simultaneously. Operands are from crossed 32-bit elements.
13867  * *Description: *
13868  * This instruction subtracts the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element
13869  * in [63:32] of Rs1, and writes the result to [63:32] of Rd; at the same time, it adds the 32-bit integer
13870  * element in [31:0] of Rs1 with the 32-bit integer element in [63:32] of Rs2, and writes the result to
13871  * [31:0] of Rd
13872  *
13873  * **Note**:\n
13874  * This instruction can be used for either signed or unsigned operations.
13875  *
13876  * **Operations**:\n
13877  * ~~~
13878  * Rd.W[1] = Rs1.W[1] - Rs2.W[0];
13879  * Rd.W[0] = Rs1.W[0] + Rs2.W[1];
13880  * ~~~
13881  *
13882  * \param [in]  a    unsigned long type of value stored in a
13883  * \param [in]  b    unsigned long type of value stored in b
13884  * \return value stored in unsigned long type
13885  */
__RV_CRSA32(unsigned long a,unsigned long b)13886 __STATIC_FORCEINLINE unsigned long __RV_CRSA32(unsigned long a, unsigned long b)
13887 {
13888     register unsigned long result;
13889     __ASM volatile("crsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13890     return result;
13891 }
13892 /* ===== Inline Function End for 4.3. CRSA32 ===== */
13893 
13894 /* ===== Inline Function Start for 4.4. KABS32 ===== */
13895 /**
13896  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
13897  * \brief KABS32 (Scalar 32-bit Absolute Value with Saturation)
13898  * \details
13899  * **Type**: DSP (RV64 Only)
13900 24    20
13901 19    15
13902 14    12
13903 11    7
13904 KABS32
13905 10010
13906 Rs1
13907 000
13908 Rd
13909 6    0
13910 GE80B
13911 1111111
13912  *
13913  * **Syntax**:\n
13914  * ~~~
13915  * KABS32 Rd, Rs1
13916  * ~~~
13917  *
13918  * **Purpose**:\n
13919  * Get the absolute value of signed 32-bit integer elements in a general register.
13920  *
13921  * **Description**:\n
13922  * This instruction calculates the absolute value of signed 32-bit integer elements stored
13923  * in Rs1. The results are written to Rd. This instruction with the minimum negative integer input of
13924  * 0x80000000 will produce a saturated output of maximum positive integer of 0x7fffffff and the OV
13925  * flag will be set to 1.
13926  *
13927  * **Operations**:\n
13928  * ~~~
13929  * if (Rs1.W[x] >= 0) {
13930  *   res[x] = Rs1.W[x];
13931  * } else {
13932  *   If (Rs1.W[x] == 0x80000000) {
13933  *     res[x] = 0x7fffffff;
13934  *     OV = 1;
13935  *   } else {
13936  *     res[x] = -Rs1.W[x];
13937  *   }
13938  * }
13939  * Rd.W[x] = res[x];
13940  * for RV64: x=1...0
13941  * ~~~
13942  *
13943  * \param [in]  a    unsigned long type of value stored in a
13944  * \return value stored in unsigned long type
13945  */
__RV_KABS32(unsigned long a)13946 __STATIC_FORCEINLINE unsigned long __RV_KABS32(unsigned long a)
13947 {
13948     register unsigned long result;
13949     __ASM volatile("kabs32 %0, %1" : "=r"(result) : "r"(a));
13950     return result;
13951 }
13952 /* ===== Inline Function End for 4.4. KABS32 ===== */
13953 
13954 /* ===== Inline Function Start for 4.5. KADD32 ===== */
13955 /**
13956  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
13957  * \brief KADD32 (SIMD 32-bit Signed Saturating Addition)
13958  * \details
13959  * **Type**: SIMD (RV64 Only)
13960  *
13961  * **Syntax**:\n
13962  * ~~~
13963  * KADD32 Rd, Rs1, Rs2
13964  * ~~~
13965  *
13966  * **Purpose**:\n
13967  * Do 32-bit signed integer element saturating additions simultaneously.
13968  *
13969  * **Description**:\n
13970  * This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed
13971  * integer elements in Rs2. If any of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1),
13972  * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
13973  *
13974  * **Operations**:\n
13975  * ~~~
13976  * res[x] = Rs1.W[x] + Rs2.W[x];
13977  * if (res[x] > (2^31)-1) {
13978  *   res[x] = (2^31)-1;
13979  *   OV = 1;
13980  * } else if (res[x] < -2^31) {
13981  *   res[x] = -2^31;
13982  *   OV = 1;
13983  * }
13984  * Rd.W[x] = res[x];
13985  * for RV64: x=1...0
13986  * ~~~
13987  *
13988  * \param [in]  a    unsigned long type of value stored in a
13989  * \param [in]  b    unsigned long type of value stored in b
13990  * \return value stored in unsigned long type
13991  */
__RV_KADD32(unsigned long a,unsigned long b)13992 __STATIC_FORCEINLINE unsigned long __RV_KADD32(unsigned long a, unsigned long b)
13993 {
13994     register unsigned long result;
13995     __ASM volatile("kadd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13996     return result;
13997 }
13998 /* ===== Inline Function End for 4.5. KADD32 ===== */
13999 
14000 /* ===== Inline Function Start for 4.6. KCRAS32 ===== */
14001 /**
14002  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
14003  * \brief KCRAS32 (SIMD 32-bit Signed Saturating Cross Addition & Subtraction)
14004  * \details
14005  * **Type**: SIM (RV64 Only)
14006  *
14007  * **Syntax**:\n
14008  * ~~~
14009  * KCRAS32 Rd, Rs1, Rs2
14010  * ~~~
14011  *
14012  * **Purpose**:\n
14013  * Do 32-bit signed integer element saturating addition and 32-bit signed integer element
14014  * saturating subtraction in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements.
14015  *
14016  * **Description**:\n
14017  * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit
14018  * integer element in [31:0] of Rs2; at the same time, it subtracts the 32-bit integer element in [63:32] of
14019  * Rs2 from the 32-bit integer element in [31:0] of Rs1. If any of the results are beyond the Q31 number
14020  * range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated
14021  * results are written to [63:32] of Rd for addition and [31:0] of Rd for subtraction.
14022  *
14023  * **Operations**:\n
14024  * ~~~
14025  * res[1] = Rs1.W[1] + Rs2.W[0];
14026  * res[0] = Rs1.W[0] - Rs2.W[1];
14027  * if (res[x] > (2^31)-1) {
14028  *   res[x] = (2^31)-1;
14029  *   OV = 1;
14030  * } else if (res < -2^31) {
14031  *   res[x] = -2^31;
14032  *   OV = 1;
14033  * }
14034  * Rd.W[1] = res[1];
14035  * Rd.W[0] = res[0];
14036  * for RV64, x=1...0
14037  * ~~~
14038  *
14039  * \param [in]  a    unsigned long type of value stored in a
14040  * \param [in]  b    unsigned long type of value stored in b
14041  * \return value stored in unsigned long type
14042  */
__RV_KCRAS32(unsigned long a,unsigned long b)14043 __STATIC_FORCEINLINE unsigned long __RV_KCRAS32(unsigned long a, unsigned long b)
14044 {
14045     register unsigned long result;
14046     __ASM volatile("kcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
14047     return result;
14048 }
14049 /* ===== Inline Function End for 4.6. KCRAS32 ===== */
14050 
14051 /* ===== Inline Function Start for 4.7. KCRSA32 ===== */
14052 /**
14053  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
14054  * \brief KCRSA32 (SIMD 32-bit Signed Saturating Cross Subtraction & Addition)
14055  * \details
14056  * **Type**: SIMD (RV64 Only)
14057  *
14058  * **Syntax**:\n
14059  * ~~~
14060  * KCRSA32 Rd, Rs1, Rs2
14061  * ~~~
14062  *
14063  * **Purpose**:\n
14064  * Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element
14065  * saturating addition in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements.
14066  * *Description: *
14067  * This instruction subtracts the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element
14068  * in [63:32] of Rs1; at the same time, it adds the 32-bit integer element in [31:0] of Rs1 with the 32-bit
14069  * integer element in [63:32] of Rs2. If any of the results are beyond the Q31 number range (-2^31 <= Q31
14070  * <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to
14071  * [63:32] of Rd for subtraction and [31:0] of Rd for addition.
14072  *
14073  * **Operations**:\n
14074  * ~~~
14075  * res[1] = Rs1.W[1] - Rs2.W[0];
14076  * res[0] = Rs1.W[0] + Rs2.W[1];
14077  * if (res[x] > (2^31)-1) {
14078  *   res[x] = (2^31)-1;
14079  *   OV = 1;
14080  * } else if (res < -2^31) {
14081  *   res[x] = -2^31;
14082  *   OV = 1;
14083  * }
14084  * Rd.W[1] = res[1];
14085  * Rd.W[0] = res[0];
14086  * for RV64, x=1...0
14087  * ~~~
14088  *
14089  * \param [in]  a    unsigned long type of value stored in a
14090  * \param [in]  b    unsigned long type of value stored in b
14091  * \return value stored in unsigned long type
14092  */
__RV_KCRSA32(unsigned long a,unsigned long b)14093 __STATIC_FORCEINLINE unsigned long __RV_KCRSA32(unsigned long a, unsigned long b)
14094 {
14095     register unsigned long result;
14096     __ASM volatile("kcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
14097     return result;
14098 }
14099 /* ===== Inline Function End for 4.7. KCRSA32 ===== */
14100 
14101 /* ===== Inline Function Start for 4.8.1. KDMBB16 ===== */
14102 /**
14103  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
14104  * \brief KDMBB16 (SIMD Signed Saturating Double Multiply B16 x B16)
14105  * \details
14106  * **Type**: SIMD (RV64 only)
14107  *
14108  * **Syntax**:\n
14109  * ~~~
14110  * KDMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
14111  * ~~~
14112  *
14113  * **Purpose**:\n
14114  * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
14115  * of the 32-bit chunks in registers and then double and saturate the Q31 results into the 32-bit chunks
14116  * in the destination register. If saturation happens, an overflow flag OV will be set.
14117  *
14118  * **Description**:\n
14119  * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
14120  * or bottom 16-bit Q15 content of the 32-bit portions in Rs2. The Q30 results are then doubled and
14121  * saturated into Q31 values. The Q31 values are then written into the 32-bit chunks in Rd. When both
14122  * the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated to 0x7FFFFFFF
14123  * and the overflow flag OV will be set.
14124  *
14125  * **Operations**:\n
14126  * ~~~
14127  * // KDMBB16: (x,y,z)=(0,0,0),(2,2,1)
14128  * // KDMBT16: (x,y,z)=(0,1,0),(2,3,1)
14129  * // KDMTT16: (x,y,z)=(1,1,0),(3,3,1)
14130  * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
14131  * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
14132  *   Mresult[z] = aop[z] * bop[z];
14133  *   resQ31[z] = Mresult[z] << 1;
14134  * } else {
14135  *   resQ31[z] = 0x7FFFFFFF;
14136  *   OV = 1;
14137  * }
14138  * Rd.W[z] = resQ31[z];
14139  * ~~~
14140  *
14141  * \param [in]  a    unsigned long type of value stored in a
14142  * \param [in]  b    unsigned long type of value stored in b
14143  * \return value stored in unsigned long type
14144  */
__RV_KDMBB16(unsigned long a,unsigned long b)14145 __STATIC_FORCEINLINE unsigned long __RV_KDMBB16(unsigned long a, unsigned long b)
14146 {
14147     register unsigned long result;
14148     __ASM volatile("kdmbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
14149     return result;
14150 }
14151 /* ===== Inline Function End for 4.8.1. KDMBB16 ===== */
14152 
14153 /* ===== Inline Function Start for 4.8.2. KDMBT16 ===== */
14154 /**
14155  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
14156  * \brief KDMBT16 (SIMD Signed Saturating Double Multiply B16 x T16)
14157  * \details
14158  * **Type**: SIMD (RV64 only)
14159  *
14160  * **Syntax**:\n
14161  * ~~~
14162  * KDMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
14163  * ~~~
14164  *
14165  * **Purpose**:\n
14166  * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
14167  * of the 32-bit chunks in registers and then double and saturate the Q31 results into the 32-bit chunks
14168  * in the destination register. If saturation happens, an overflow flag OV will be set.
14169  *
14170  * **Description**:\n
14171  * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
14172  * or bottom 16-bit Q15 content of the 32-bit portions in Rs2. The Q30 results are then doubled and
14173  * saturated into Q31 values. The Q31 values are then written into the 32-bit chunks in Rd. When both
14174  * the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated to 0x7FFFFFFF
14175  * and the overflow flag OV will be set.
14176  *
14177  * **Operations**:\n
14178  * ~~~
14179  * // KDMBB16: (x,y,z)=(0,0,0),(2,2,1)
14180  * // KDMBT16: (x,y,z)=(0,1,0),(2,3,1)
14181  * // KDMTT16: (x,y,z)=(1,1,0),(3,3,1)
14182  * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
14183  * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
14184  *   Mresult[z] = aop[z] * bop[z];
14185  *   resQ31[z] = Mresult[z] << 1;
14186  * } else {
14187  *   resQ31[z] = 0x7FFFFFFF;
14188  *   OV = 1;
14189  * }
14190  * Rd.W[z] = resQ31[z];
14191  * ~~~
14192  *
14193  * \param [in]  a    unsigned long type of value stored in a
14194  * \param [in]  b    unsigned long type of value stored in b
14195  * \return value stored in unsigned long type
14196  */
__RV_KDMBT16(unsigned long a,unsigned long b)14197 __STATIC_FORCEINLINE unsigned long __RV_KDMBT16(unsigned long a, unsigned long b)
14198 {
14199     register unsigned long result;
14200     __ASM volatile("kdmbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
14201     return result;
14202 }
14203 /* ===== Inline Function End for 4.8.2. KDMBT16 ===== */
14204 
14205 /* ===== Inline Function Start for 4.8.3. KDMTT16 ===== */
14206 /**
14207  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
14208  * \brief KDMTT16 (SIMD Signed Saturating Double Multiply T16 x T16)
14209  * \details
14210  * **Type**: SIMD (RV64 only)
14211  *
14212  * **Syntax**:\n
14213  * ~~~
14214  * KDMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
14215  * ~~~
14216  *
14217  * **Purpose**:\n
14218  * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
14219  * of the 32-bit chunks in registers and then double and saturate the Q31 results into the 32-bit chunks
14220  * in the destination register. If saturation happens, an overflow flag OV will be set.
14221  *
14222  * **Description**:\n
14223  * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
14224  * or bottom 16-bit Q15 content of the 32-bit portions in Rs2. The Q30 results are then doubled and
14225  * saturated into Q31 values. The Q31 values are then written into the 32-bit chunks in Rd. When both
14226  * the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated to 0x7FFFFFFF
14227  * and the overflow flag OV will be set.
14228  *
14229  * **Operations**:\n
14230  * ~~~
14231  * // KDMBB16: (x,y,z)=(0,0,0),(2,2,1)
14232  * // KDMBT16: (x,y,z)=(0,1,0),(2,3,1)
14233  * // KDMTT16: (x,y,z)=(1,1,0),(3,3,1)
14234  * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
14235  * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
14236  *   Mresult[z] = aop[z] * bop[z];
14237  *   resQ31[z] = Mresult[z] << 1;
14238  * } else {
14239  *   resQ31[z] = 0x7FFFFFFF;
14240  *   OV = 1;
14241  * }
14242  * Rd.W[z] = resQ31[z];
14243  * ~~~
14244  *
14245  * \param [in]  a    unsigned long type of value stored in a
14246  * \param [in]  b    unsigned long type of value stored in b
14247  * \return value stored in unsigned long type
14248  */
__RV_KDMTT16(unsigned long a,unsigned long b)14249 __STATIC_FORCEINLINE unsigned long __RV_KDMTT16(unsigned long a, unsigned long b)
14250 {
14251     register unsigned long result;
14252     __ASM volatile("kdmtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
14253     return result;
14254 }
14255 /* ===== Inline Function End for 4.8.3. KDMTT16 ===== */
14256 
14257 /* ===== Inline Function Start for 4.9.1. KDMABB16 ===== */
14258 /**
14259  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
14260  * \brief KDMABB16 (SIMD Signed Saturating Double Multiply Addition B16 x B16)
14261  * \details
14262  * **Type**: SIMD (RV64 only)
14263  *
14264  * **Syntax**:\n
14265  * ~~~
14266  * KDMAxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
14267  * ~~~
14268  *
14269  * **Purpose**:\n
14270  * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
14271  * of the 32-bit chunks in registers and then double and saturate the Q31 results, add the results with
14272  * the values of the corresponding 32-bit chunks from the destination register and write the saturated
14273  * addition results back into the corresponding 32-bit chunks of the destination register. If saturation
14274  * happens, an overflow flag OV will be set.
14275  *
14276  * **Description**:\n
14277  * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
14278  * or bottom 16-bit Q15 content of the corresponding 32-bit portions in Rs2. The Q30 results are then
14279  * doubled and saturated into Q31 values. The Q31 values are then added with the content of the
14280  * corresponding 32-bit portions of Rd. If the addition results are beyond the Q31 number range (-2^31 <=
14281  * Q31 <= 2^31-1), they are saturated to the range and the OV flag is set to 1. The results after saturation
14282  * are written back to Rd.
14283  * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
14284  * set.
14285  *
14286  * **Operations**:\n
14287  * ~~~
14288  * // KDMABB16: (x,y,z)=(0,0,0),(2,2,1)
14289  * // KDMABT16: (x,y,z)=(0,1,0),(2,3,1)
14290  * // KDMATT16: (x,y,z)=(1,1,0),(3,3,1)
14291  * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
14292  * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
14293  *   Mresult[z] = aop[z] * bop[z];
14294  *   resQ31[z] = Mresult[z] << 1;
14295  * } else {
14296  *   resQ31[z] = 0x7FFFFFFF;
14297  *   OV = 1;
14298  * }
14299  * resadd[z] = Rd.W[z] + resQ31[z];
14300  * if (resadd[z] > (2^31)-1) {
14301  *   resadd[z] = (2^31)-1;
14302  *   OV = 1;
14303  * } else if (resadd[z] < -2^31) {
14304  *   resadd[z] = -2^31;
14305  *   OV = 1;
14306  * }
14307  * Rd.W[z] = resadd[z];
14308  * ~~~
14309  *
14310  * \param [in]  t    unsigned long type of value stored in t
14311  * \param [in]  a    unsigned long type of value stored in a
14312  * \param [in]  b    unsigned long type of value stored in b
14313  * \return value stored in unsigned long type
14314  */
__RV_KDMABB16(unsigned long t,unsigned long a,unsigned long b)14315 __STATIC_FORCEINLINE unsigned long __RV_KDMABB16(unsigned long t, unsigned long a, unsigned long b)
14316 {
14317     __ASM volatile("kdmabb16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
14318     return t;
14319 }
14320 /* ===== Inline Function End for 4.9.1. KDMABB16 ===== */
14321 
14322 /* ===== Inline Function Start for 4.9.2. KDMABT16 ===== */
14323 /**
14324  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
14325  * \brief KDMABT16 (SIMD Signed Saturating Double Multiply Addition B16 x T16)
14326  * \details
14327  * **Type**: SIMD (RV64 only)
14328  *
14329  * **Syntax**:\n
14330  * ~~~
14331  * KDMAxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
14332  * ~~~
14333  *
14334  * **Purpose**:\n
14335  * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
14336  * of the 32-bit chunks in registers and then double and saturate the Q31 results, add the results with
14337  * the values of the corresponding 32-bit chunks from the destination register and write the saturated
14338  * addition results back into the corresponding 32-bit chunks of the destination register. If saturation
14339  * happens, an overflow flag OV will be set.
14340  *
14341  * **Description**:\n
14342  * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
14343  * or bottom 16-bit Q15 content of the corresponding 32-bit portions in Rs2. The Q30 results are then
14344  * doubled and saturated into Q31 values. The Q31 values are then added with the content of the
14345  * corresponding 32-bit portions of Rd. If the addition results are beyond the Q31 number range (-2^31 <=
14346  * Q31 <= 2^31-1), they are saturated to the range and the OV flag is set to 1. The results after saturation
14347  * are written back to Rd.
14348  * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
14349  * set.
14350  *
14351  * **Operations**:\n
14352  * ~~~
14353  * // KDMABB16: (x,y,z)=(0,0,0),(2,2,1)
14354  * // KDMABT16: (x,y,z)=(0,1,0),(2,3,1)
14355  * // KDMATT16: (x,y,z)=(1,1,0),(3,3,1)
14356  * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
14357  * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
14358  *   Mresult[z] = aop[z] * bop[z];
14359  *   resQ31[z] = Mresult[z] << 1;
14360  * } else {
14361  *   resQ31[z] = 0x7FFFFFFF;
14362  *   OV = 1;
14363  * }
14364  * resadd[z] = Rd.W[z] + resQ31[z];
14365  * if (resadd[z] > (2^31)-1) {
14366  *   resadd[z] = (2^31)-1;
14367  *   OV = 1;
14368  * } else if (resadd[z] < -2^31) {
14369  *   resadd[z] = -2^31;
14370  *   OV = 1;
14371  * }
14372  * Rd.W[z] = resadd[z];
14373  * ~~~
14374  *
14375  * \param [in]  t    unsigned long type of value stored in t
14376  * \param [in]  a    unsigned long type of value stored in a
14377  * \param [in]  b    unsigned long type of value stored in b
14378  * \return value stored in unsigned long type
14379  */
__RV_KDMABT16(unsigned long t,unsigned long a,unsigned long b)14380 __STATIC_FORCEINLINE unsigned long __RV_KDMABT16(unsigned long t, unsigned long a, unsigned long b)
14381 {
14382     __ASM volatile("kdmabt16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
14383     return t;
14384 }
14385 /* ===== Inline Function End for 4.9.2. KDMABT16 ===== */
14386 
14387 /* ===== Inline Function Start for 4.9.3. KDMATT16 ===== */
14388 /**
14389  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
14390  * \brief KDMATT16 (SIMD Signed Saturating Double Multiply Addition T16 x T16)
14391  * \details
14392  * **Type**: SIMD (RV64 only)
14393  *
14394  * **Syntax**:\n
14395  * ~~~
14396  * KDMAxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
14397  * ~~~
14398  *
14399  * **Purpose**:\n
14400  * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
14401  * of the 32-bit chunks in registers and then double and saturate the Q31 results, add the results with
14402  * the values of the corresponding 32-bit chunks from the destination register and write the saturated
14403  * addition results back into the corresponding 32-bit chunks of the destination register. If saturation
14404  * happens, an overflow flag OV will be set.
14405  *
14406  * **Description**:\n
14407  * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
14408  * or bottom 16-bit Q15 content of the corresponding 32-bit portions in Rs2. The Q30 results are then
14409  * doubled and saturated into Q31 values. The Q31 values are then added with the content of the
14410  * corresponding 32-bit portions of Rd. If the addition results are beyond the Q31 number range (-2^31 <=
14411  * Q31 <= 2^31-1), they are saturated to the range and the OV flag is set to 1. The results after saturation
14412  * are written back to Rd.
14413  * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
14414  * set.
14415  *
14416  * **Operations**:\n
14417  * ~~~
14418  * // KDMABB16: (x,y,z)=(0,0,0),(2,2,1)
14419  * // KDMABT16: (x,y,z)=(0,1,0),(2,3,1)
14420  * // KDMATT16: (x,y,z)=(1,1,0),(3,3,1)
14421  * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
14422  * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
14423  *   Mresult[z] = aop[z] * bop[z];
14424  *   resQ31[z] = Mresult[z] << 1;
14425  * } else {
14426  *   resQ31[z] = 0x7FFFFFFF;
14427  *   OV = 1;
14428  * }
14429  * resadd[z] = Rd.W[z] + resQ31[z];
14430  * if (resadd[z] > (2^31)-1) {
14431  *   resadd[z] = (2^31)-1;
14432  *   OV = 1;
14433  * } else if (resadd[z] < -2^31) {
14434  *   resadd[z] = -2^31;
14435  *   OV = 1;
14436  * }
14437  * Rd.W[z] = resadd[z];
14438  * ~~~
14439  *
14440  * \param [in]  t    unsigned long type of value stored in t
14441  * \param [in]  a    unsigned long type of value stored in a
14442  * \param [in]  b    unsigned long type of value stored in b
14443  * \return value stored in unsigned long type
14444  */
__RV_KDMATT16(unsigned long t,unsigned long a,unsigned long b)14445 __STATIC_FORCEINLINE unsigned long __RV_KDMATT16(unsigned long t, unsigned long a, unsigned long b)
14446 {
14447     __ASM volatile("kdmatt16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
14448     return t;
14449 }
14450 /* ===== Inline Function End for 4.9.3. KDMATT16 ===== */
14451 
14452 /* ===== Inline Function Start for 4.10.1. KHMBB16 ===== */
14453 /**
14454  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
14455  * \brief KHMBB16 (SIMD Signed Saturating Half Multiply B16 x B16)
14456  * \details
14457  * **Type**: SIMD (RV64 Only)
14458  *
14459  * **Syntax**:\n
14460  * ~~~
14461  * KHMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
14462  * ~~~
14463  *
14464  * **Purpose**:\n
14465  * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
14466  * of the 32-bit chunks in registers and then right-shift 15 bits to turn the Q30 results into Q15
14467  * numbers again and saturate the Q15 results into the destination register. If saturation happens, an
14468  * overflow flag OV will be set.
14469  *
14470  * **Description**:\n
14471  * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
14472  * or bottom 16-bit Q15 content of the 32-bit portion in Rs2. The Q30 results are then right-shifted 15-
14473  * bits and saturated into Q15 values. The 32-bit Q15 values are then written into the 32-bit chunks in
14474  * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
14475  * to 0x7FFF and the overflow flag OV will be set.
14476  *
14477  * **Operations**:\n
14478  * ~~~
14479  * // KHMBB16: (x,y,z)=(0,0,0),(2,2,1)
14480  * // KHMBT16: (x,y,z)=(0,1,0),(2,3,1)
14481  * // KHMTT16: (x,y,z)=(1,1,0),(3,3,1)
14482  * aop = Rs1.H[x]; bop = Rs2.H[y];
14483  * If (0x8000 != aop | 0x8000 != bop) {
14484  *   Mresult[31:0] = aop * bop;
14485  *   res[15:0] = Mresult[30:15];
14486  * } else {
14487  *   res[15:0] = 0x7FFF;
14488  *   OV = 1;
14489  * }
14490  * Rd.W[z] = SE32(res[15:0]);
14491  * ~~~
14492  *
14493  * \param [in]  a    unsigned long type of value stored in a
14494  * \param [in]  b    unsigned long type of value stored in b
14495  * \return value stored in unsigned long type
14496  */
__RV_KHMBB16(unsigned long a,unsigned long b)14497 __STATIC_FORCEINLINE unsigned long __RV_KHMBB16(unsigned long a, unsigned long b)
14498 {
14499     register unsigned long result;
14500     __ASM volatile("khmbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
14501     return result;
14502 }
14503 /* ===== Inline Function End for 4.10.1. KHMBB16 ===== */
14504 
14505 /* ===== Inline Function Start for 4.10.2. KHMBT16 ===== */
14506 /**
14507  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
14508  * \brief KHMBT16 (SIMD Signed Saturating Half Multiply B16 x T16)
14509  * \details
14510  * **Type**: SIMD (RV64 Only)
14511  *
14512  * **Syntax**:\n
14513  * ~~~
14514  * KHMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
14515  * ~~~
14516  *
14517  * **Purpose**:\n
14518  * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
14519  * of the 32-bit chunks in registers and then right-shift 15 bits to turn the Q30 results into Q15
14520  * numbers again and saturate the Q15 results into the destination register. If saturation happens, an
14521  * overflow flag OV will be set.
14522  *
14523  * **Description**:\n
14524  * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
14525  * or bottom 16-bit Q15 content of the 32-bit portion in Rs2. The Q30 results are then right-shifted 15-
14526  * bits and saturated into Q15 values. The 32-bit Q15 values are then written into the 32-bit chunks in
14527  * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
14528  * to 0x7FFF and the overflow flag OV will be set.
14529  *
14530  * **Operations**:\n
14531  * ~~~
14532  * // KHMBB16: (x,y,z)=(0,0,0),(2,2,1)
14533  * // KHMBT16: (x,y,z)=(0,1,0),(2,3,1)
14534  * // KHMTT16: (x,y,z)=(1,1,0),(3,3,1)
14535  * aop = Rs1.H[x]; bop = Rs2.H[y];
14536  * If (0x8000 != aop | 0x8000 != bop) {
14537  *   Mresult[31:0] = aop * bop;
14538  *   res[15:0] = Mresult[30:15];
14539  * } else {
14540  *   res[15:0] = 0x7FFF;
14541  *   OV = 1;
14542  * }
14543  * Rd.W[z] = SE32(res[15:0]);
14544  * ~~~
14545  *
14546  * \param [in]  a    unsigned long type of value stored in a
14547  * \param [in]  b    unsigned long type of value stored in b
14548  * \return value stored in unsigned long type
14549  */
__RV_KHMBT16(unsigned long a,unsigned long b)14550 __STATIC_FORCEINLINE unsigned long __RV_KHMBT16(unsigned long a, unsigned long b)
14551 {
14552     register unsigned long result;
14553     __ASM volatile("khmbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
14554     return result;
14555 }
14556 /* ===== Inline Function End for 4.10.2. KHMBT16 ===== */
14557 
14558 /* ===== Inline Function Start for 4.10.3. KHMTT16 ===== */
14559 /**
14560  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
14561  * \brief KHMTT16 (SIMD Signed Saturating Half Multiply T16 x T16)
14562  * \details
14563  * **Type**: SIMD (RV64 Only)
14564  *
14565  * **Syntax**:\n
14566  * ~~~
14567  * KHMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
14568  * ~~~
14569  *
14570  * **Purpose**:\n
14571  * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
14572  * of the 32-bit chunks in registers and then right-shift 15 bits to turn the Q30 results into Q15
14573  * numbers again and saturate the Q15 results into the destination register. If saturation happens, an
14574  * overflow flag OV will be set.
14575  *
14576  * **Description**:\n
14577  * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
14578  * or bottom 16-bit Q15 content of the 32-bit portion in Rs2. The Q30 results are then right-shifted 15-
14579  * bits and saturated into Q15 values. The 32-bit Q15 values are then written into the 32-bit chunks in
14580  * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
14581  * to 0x7FFF and the overflow flag OV will be set.
14582  *
14583  * **Operations**:\n
14584  * ~~~
14585  * // KHMBB16: (x,y,z)=(0,0,0),(2,2,1)
14586  * // KHMBT16: (x,y,z)=(0,1,0),(2,3,1)
14587  * // KHMTT16: (x,y,z)=(1,1,0),(3,3,1)
14588  * aop = Rs1.H[x]; bop = Rs2.H[y];
14589  * If (0x8000 != aop | 0x8000 != bop) {
14590  *   Mresult[31:0] = aop * bop;
14591  *   res[15:0] = Mresult[30:15];
14592  * } else {
14593  *   res[15:0] = 0x7FFF;
14594  *   OV = 1;
14595  * }
14596  * Rd.W[z] = SE32(res[15:0]);
14597  * ~~~
14598  *
14599  * \param [in]  a    unsigned long type of value stored in a
14600  * \param [in]  b    unsigned long type of value stored in b
14601  * \return value stored in unsigned long type
14602  */
__RV_KHMTT16(unsigned long a,unsigned long b)14603 __STATIC_FORCEINLINE unsigned long __RV_KHMTT16(unsigned long a, unsigned long b)
14604 {
14605     register unsigned long result;
14606     __ASM volatile("khmtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
14607     return result;
14608 }
14609 /* ===== Inline Function End for 4.10.3. KHMTT16 ===== */
14610 
14611 /* ===== Inline Function Start for 4.11.1. KMABB32 ===== */
14612 /**
14613  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD
14614  * \brief KMABB32 (Saturating Signed Multiply Bottom Words & Add)
14615  * \details
14616  * **Type**: DSP (RV64 Only)
14617  *
14618  * **Syntax**:\n
14619  * ~~~
14620  * KMABB32 Rd, Rs1, Rs2
14621  * KMABT32 Rd, Rs1, Rs2
14622  * KMATT32 Rd, Rs1, Rs2
14623  * ~~~
14624  *
14625  * **Purpose**:\n
14626  * Multiply the signed 32-bit element in a register with the 32-bit element in another register
14627  * and add the result to the content of 64-bit data in the third register. The addition result may be
14628  * saturated and is written to the third register.
14629  * * KMABB32: rd + bottom*bottom
14630  * * KMABT32: rd + bottom*top
14631  * * KMATT32: rd + top*top
14632  *
14633  * **Description**:\n
14634  * For the `KMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
14635  * element in Rs2.
14636  * For the `KMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
14637  * element in Rs2.
14638  * For the `KMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
14639  * element in Rs2.
14640  * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond
14641  * the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The
14642  * result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
14643  * integers.
14644  *
14645  * **Operations**:\n
14646  * ~~~
14647  * res = Rd + (Rs1.W[0] * Rs2.W[0]); // KMABB32
14648  *  res = Rd + (Rs1.W[0] * Rs2.W[1]); // KMABT32
14649  *  res = Rd + (Rs1.W[1] * Rs2.W[1]); // KMATT32
14650  *  if (res > (2^63)-1) {
14651  *    res = (2^63)-1;
14652  *    OV = 1;
14653  *  } else if (res < -2^63) {
14654  *    res = -2^63;
14655  *    OV = 1;
14656  *  }
14657  *  Rd = res;
14658  * *Exceptions:* None
14659  * ~~~
14660  *
14661  * \param [in]  t    long type of value stored in t
14662  * \param [in]  a    unsigned long type of value stored in a
14663  * \param [in]  b    unsigned long type of value stored in b
14664  * \return value stored in long type
14665  */
__RV_KMABB32(long t,unsigned long a,unsigned long b)14666 __STATIC_FORCEINLINE long __RV_KMABB32(long t, unsigned long a, unsigned long b)
14667 {
14668     __ASM volatile("kmabb32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
14669     return t;
14670 }
14671 /* ===== Inline Function End for 4.11.1. KMABB32 ===== */
14672 
14673 /* ===== Inline Function Start for 4.11.2. KMABT32 ===== */
14674 /**
14675  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD
14676  * \brief KMABT32 (Saturating Signed Multiply Bottom & Top Words & Add)
14677  * \details
14678  * **Type**: DSP (RV64 Only)
14679  *
14680  * **Syntax**:\n
14681  * ~~~
14682  * KMABB32 Rd, Rs1, Rs2
14683  * KMABT32 Rd, Rs1, Rs2
14684  * KMATT32 Rd, Rs1, Rs2
14685  * ~~~
14686  *
14687  * **Purpose**:\n
14688  * Multiply the signed 32-bit element in a register with the 32-bit element in another register
14689  * and add the result to the content of 64-bit data in the third register. The addition result may be
14690  * saturated and is written to the third register.
14691  * * KMABB32: rd + bottom*bottom
14692  * * KMABT32: rd + bottom*top
14693  * * KMATT32: rd + top*top
14694  *
14695  * **Description**:\n
14696  * For the `KMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
14697  * element in Rs2.
14698  * For the `KMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
14699  * element in Rs2.
14700  * For the `KMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
14701  * element in Rs2.
14702  * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond
14703  * the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The
14704  * result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
14705  * integers.
14706  *
14707  * **Operations**:\n
14708  * ~~~
14709  * res = Rd + (Rs1.W[0] * Rs2.W[0]); // KMABB32
14710  *  res = Rd + (Rs1.W[0] * Rs2.W[1]); // KMABT32
14711  *  res = Rd + (Rs1.W[1] * Rs2.W[1]); // KMATT32
14712  *  if (res > (2^63)-1) {
14713  *    res = (2^63)-1;
14714  *    OV = 1;
14715  *  } else if (res < -2^63) {
14716  *    res = -2^63;
14717  *    OV = 1;
14718  *  }
14719  *  Rd = res;
14720  * *Exceptions:* None
14721  * ~~~
14722  *
14723  * \param [in]  t    long type of value stored in t
14724  * \param [in]  a    unsigned long type of value stored in a
14725  * \param [in]  b    unsigned long type of value stored in b
14726  * \return value stored in long type
14727  */
__RV_KMABT32(long t,unsigned long a,unsigned long b)14728 __STATIC_FORCEINLINE long __RV_KMABT32(long t, unsigned long a, unsigned long b)
14729 {
14730     __ASM volatile("kmabt32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
14731     return t;
14732 }
14733 /* ===== Inline Function End for 4.11.2. KMABT32 ===== */
14734 
14735 /* ===== Inline Function Start for 4.11.3. KMATT32 ===== */
14736 /**
14737  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD
14738  * \brief KMATT32 (Saturating Signed Multiply Top Words & Add)
14739  * \details
14740  * **Type**: DSP (RV64 Only)
14741  *
14742  * **Syntax**:\n
14743  * ~~~
14744  * KMABB32 Rd, Rs1, Rs2
14745  * KMABT32 Rd, Rs1, Rs2
14746  * KMATT32 Rd, Rs1, Rs2
14747  * ~~~
14748  *
14749  * **Purpose**:\n
14750  * Multiply the signed 32-bit element in a register with the 32-bit element in another register
14751  * and add the result to the content of 64-bit data in the third register. The addition result may be
14752  * saturated and is written to the third register.
14753  * * KMABB32: rd + bottom*bottom
14754  * * KMABT32: rd + bottom*top
14755  * * KMATT32: rd + top*top
14756  *
14757  * **Description**:\n
14758  * For the `KMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
14759  * element in Rs2.
14760  * For the `KMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
14761  * element in Rs2.
14762  * For the `KMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
14763  * element in Rs2.
14764  * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond
14765  * the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The
14766  * result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
14767  * integers.
14768  *
14769  * **Operations**:\n
14770  * ~~~
14771  * res = Rd + (Rs1.W[0] * Rs2.W[0]); // KMABB32
14772  *  res = Rd + (Rs1.W[0] * Rs2.W[1]); // KMABT32
14773  *  res = Rd + (Rs1.W[1] * Rs2.W[1]); // KMATT32
14774  *  if (res > (2^63)-1) {
14775  *    res = (2^63)-1;
14776  *    OV = 1;
14777  *  } else if (res < -2^63) {
14778  *    res = -2^63;
14779  *    OV = 1;
14780  *  }
14781  *  Rd = res;
14782  * *Exceptions:* None
14783  * ~~~
14784  *
14785  * \param [in]  t    long type of value stored in t
14786  * \param [in]  a    unsigned long type of value stored in a
14787  * \param [in]  b    unsigned long type of value stored in b
14788  * \return value stored in long type
14789  */
__RV_KMATT32(long t,unsigned long a,unsigned long b)14790 __STATIC_FORCEINLINE long __RV_KMATT32(long t, unsigned long a, unsigned long b)
14791 {
14792     __ASM volatile("kmatt32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
14793     return t;
14794 }
14795 /* ===== Inline Function End for 4.11.3. KMATT32 ===== */
14796 
14797 /* ===== Inline Function Start for 4.12.1. KMADA32 ===== */
14798 /**
14799  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
14800  * \brief KMADA32 (Saturating Signed Multiply Two Words and Two Adds)
14801  * \details
14802  * **Type**: DSP (RV64 Only)
14803  *
14804  * **Syntax**:\n
14805  * ~~~
14806  * KMADA32 Rd, Rs1, Rs2
14807  * KMAXDA32 Rd, Rs1, Rs2
14808  * ~~~
14809  *
14810  * **Purpose**:\n
14811  * Do two signed 32-bit multiplications from 32-bit data in two registers; and then adds the
14812  * two 64-bit results and 64-bit data in a third register together. The addition result may be saturated.
14813  * * KMADA32: rd + top*top + bottom*bottom
14814  * * KMAXDA32: rd + top*bottom + bottom*top
14815  *
14816  * **Description**:\n
14817  * For the `KMADA32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-
14818  * bit element in Rs2 and then adds the result to the result of multiplying the top 32-bit element in Rs1
14819  * with the top 32-bit element in Rs2. It is actually an alias of the `KMAR64` instruction.
14820  * For the `KMAXDA32` instruction, it multiplies the top 32-bit element in Rs1 with the bottom 32-bit
14821  * element in Rs2 and then adds the result to the result of multiplying the bottom 32-bit element in Rs1
14822  * with the top 32-bit element in Rs2.
14823  * The result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63
14824  * number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The 64-bit
14825  * result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
14826  *
14827  * **Operations**:\n
14828  * ~~~
14829  * res = Rd + (Rs1.W[1] * Rs2.w[1]) + (Rs1.W[0] * Rs2.W[0]); // KMADA32
14830  * res = Rd + (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMAXDA32
14831  * if (res > (2^63)-1) {
14832  *   res = (2^63)-1;
14833  *   OV = 1;
14834  * } else if (res < -2^63) {
14835  *   res = -2^63;
14836  *   OV = 1;
14837  * }
14838  * Rd = res;
14839  * ~~~
14840  *
14841  * \param [in]  t    long type of value stored in t
14842  * \param [in]  a    unsigned long type of value stored in a
14843  * \param [in]  b    unsigned long type of value stored in b
14844  * \return value stored in long type
14845  */
__RV_KMADA32(long t,unsigned long a,unsigned long b)14846 __STATIC_FORCEINLINE long __RV_KMADA32(long t, unsigned long a, unsigned long b)
14847 {
14848     __ASM volatile("kmada32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
14849     return t;
14850 }
14851 /* ===== Inline Function End for 4.12.1. KMADA32 ===== */
14852 
14853 /* ===== Inline Function Start for 4.12.2. KMAXDA32 ===== */
14854 /**
14855  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
14856  * \brief KMAXDA32 (Saturating Signed Crossed Multiply Two Words and Two Adds)
14857  * \details
14858  * **Type**: DSP (RV64 Only)
14859  *
14860  * **Syntax**:\n
14861  * ~~~
14862  * KMADA32 Rd, Rs1, Rs2
14863  * KMAXDA32 Rd, Rs1, Rs2
14864  * ~~~
14865  *
14866  * **Purpose**:\n
14867  * Do two signed 32-bit multiplications from 32-bit data in two registers; and then adds the
14868  * two 64-bit results and 64-bit data in a third register together. The addition result may be saturated.
14869  * * KMADA32: rd + top*top + bottom*bottom
14870  * * KMAXDA32: rd + top*bottom + bottom*top
14871  *
14872  * **Description**:\n
14873  * For the `KMADA32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-
14874  * bit element in Rs2 and then adds the result to the result of multiplying the top 32-bit element in Rs1
14875  * with the top 32-bit element in Rs2. It is actually an alias of the `KMAR64` instruction.
14876  * For the `KMAXDA32` instruction, it multiplies the top 32-bit element in Rs1 with the bottom 32-bit
14877  * element in Rs2 and then adds the result to the result of multiplying the bottom 32-bit element in Rs1
14878  * with the top 32-bit element in Rs2.
14879  * The result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63
14880  * number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The 64-bit
14881  * result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
14882  *
14883  * **Operations**:\n
14884  * ~~~
14885  * res = Rd + (Rs1.W[1] * Rs2.w[1]) + (Rs1.W[0] * Rs2.W[0]); // KMADA32
14886  * res = Rd + (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMAXDA32
14887  * if (res > (2^63)-1) {
14888  *   res = (2^63)-1;
14889  *   OV = 1;
14890  * } else if (res < -2^63) {
14891  *   res = -2^63;
14892  *   OV = 1;
14893  * }
14894  * Rd = res;
14895  * ~~~
14896  *
14897  * \param [in]  t    long type of value stored in t
14898  * \param [in]  a    unsigned long type of value stored in a
14899  * \param [in]  b    unsigned long type of value stored in b
14900  * \return value stored in long type
14901  */
__RV_KMAXDA32(long t,unsigned long a,unsigned long b)14902 __STATIC_FORCEINLINE long __RV_KMAXDA32(long t, unsigned long a, unsigned long b)
14903 {
14904     __ASM volatile("kmaxda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
14905     return t;
14906 }
14907 /* ===== Inline Function End for 4.12.2. KMAXDA32 ===== */
14908 
14909 /* ===== Inline Function Start for 4.13.1. KMDA32 ===== */
14910 /**
14911  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
14912  * \brief KMDA32 (Signed Multiply Two Words and Add)
14913  * \details
14914  * **Type**: DSP (RV64 Only)
14915  *
14916  * **Syntax**:\n
14917  * ~~~
14918  * KMDA32 Rd, Rs1, Rs2
14919  * KMXDA32 Rd, Rs1, Rs2
14920  * ~~~
14921  *
14922  * **Purpose**:\n
14923  * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then
14924  * adds the two 64-bit results together. The addition result may be saturated.
14925  * * KMDA32: top*top + bottom*bottom
14926  * * KMXDA32: top*bottom + bottom*top
14927  *
14928  * **Description**:\n
14929  * For the `KMDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
14930  * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
14931  * with the top 32-bit element of Rs2.
14932  * For the `KMXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
14933  * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
14934  * with the bottom 32-bit element of Rs2.
14935  * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^63-1.
14936  * The final result is written to Rd. The 32-bit contents are treated as signed integers.
14937  *
14938  * **Operations**:\n
14939  * ~~~
14940  * if ((Rs1 != 0x8000000080000000) or (Rs2 != 0x8000000080000000)) {
14941  *   Rd = (Rs1.W[1] * Rs2.W[1]) + (Rs1.W[0] * Rs2.W[0]); // KMDA32
14942  *   Rd = (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMXDA32
14943  * } else {
14944  *   Rd = 0x7fffffffffffffff;
14945  *   OV = 1;
14946  * }
14947  * ~~~
14948  *
14949  * \param [in]  a    unsigned long type of value stored in a
14950  * \param [in]  b    unsigned long type of value stored in b
14951  * \return value stored in long type
14952  */
__RV_KMDA32(unsigned long a,unsigned long b)14953 __STATIC_FORCEINLINE long __RV_KMDA32(unsigned long a, unsigned long b)
14954 {
14955     register long result;
14956     __ASM volatile("kmda32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
14957     return result;
14958 }
14959 /* ===== Inline Function End for 4.13.1. KMDA32 ===== */
14960 
14961 /* ===== Inline Function Start for 4.13.2. KMXDA32 ===== */
14962 /**
14963  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
14964  * \brief KMXDA32 (Signed Crossed Multiply Two Words and Add)
14965  * \details
14966  * **Type**: DSP (RV64 Only)
14967  *
14968  * **Syntax**:\n
14969  * ~~~
14970  * KMDA32 Rd, Rs1, Rs2
14971  * KMXDA32 Rd, Rs1, Rs2
14972  * ~~~
14973  *
14974  * **Purpose**:\n
14975  * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then
14976  * adds the two 64-bit results together. The addition result may be saturated.
14977  * * KMDA32: top*top + bottom*bottom
14978  * * KMXDA32: top*bottom + bottom*top
14979  *
14980  * **Description**:\n
14981  * For the `KMDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
14982  * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
14983  * with the top 32-bit element of Rs2.
14984  * For the `KMXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
14985  * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
14986  * with the bottom 32-bit element of Rs2.
14987  * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^63-1.
14988  * The final result is written to Rd. The 32-bit contents are treated as signed integers.
14989  *
14990  * **Operations**:\n
14991  * ~~~
14992  * if ((Rs1 != 0x8000000080000000) or (Rs2 != 0x8000000080000000)) {
14993  *   Rd = (Rs1.W[1] * Rs2.W[1]) + (Rs1.W[0] * Rs2.W[0]); // KMDA32
14994  *   Rd = (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMXDA32
14995  * } else {
14996  *   Rd = 0x7fffffffffffffff;
14997  *   OV = 1;
14998  * }
14999  * ~~~
15000  *
15001  * \param [in]  a    unsigned long type of value stored in a
15002  * \param [in]  b    unsigned long type of value stored in b
15003  * \return value stored in long type
15004  */
__RV_KMXDA32(unsigned long a,unsigned long b)15005 __STATIC_FORCEINLINE long __RV_KMXDA32(unsigned long a, unsigned long b)
15006 {
15007     register long result;
15008     __ASM volatile("kmxda32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15009     return result;
15010 }
15011 /* ===== Inline Function End for 4.13.2. KMXDA32 ===== */
15012 
15013 /* ===== Inline Function Start for 4.14.1. KMADS32 ===== */
15014 /**
15015  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
15016  * \brief KMADS32 (Saturating Signed Multiply Two Words & Subtract & Add)
15017  * \details
15018  * **Type**: DSP (RV64 Only)
15019  *
15020  * **Syntax**:\n
15021  * ~~~
15022  * KMADS32 Rd, Rs1, Rs2
15023  * KMADRS32 Rd, Rs1, Rs2
15024  * KMAXDS32 Rd, Rs1, Rs2
15025  * ~~~
15026  *
15027  * **Purpose**:\n
15028  * Do two signed 32-bit multiplications from 32-bit elements in two registers; and then
15029  * perform a subtraction operation between the two 64-bit results. Then add the subtraction result to
15030  * 64-bit data in a third register. The addition result may be saturated.
15031  * * KMADS32: rd + (top*top - bottom*bottom)
15032  * * KMADRS32: rd + (bottom*bottom - top*top)
15033  * * KMAXDS32: rd + (top*bottom - bottom*top)
15034  *
15035  * **Description**:\n
15036  * For the `KMADS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
15037  * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
15038  * Rs1 with the top 32-bit element in Rs2.
15039  * For the `KMADRS32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
15040  * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
15041  * element in Rs1 with the bottom 32-bit element in Rs2.
15042  * For the `KMAXDS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
15043  * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
15044  * Rs1 with the bottom 32-bit element in Rs2.
15045  * The subtraction result is then added to the content of 64-bit data in Rd. If the addition result is
15046  * beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to
15047  * 1. The 64-bit result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated
15048  * as signed integers.
15049  *
15050  * **Operations**:\n
15051  * ~~~
15052  * res = Rd + (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMADS32
15053  * res = Rd + (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // KMADRS32
15054  * res = Rd + (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMAXDS32
15055  * if (res > (2^63)-1) {
15056  *   res = (2^63)-1;
15057  *   OV = 1;
15058  * } else if (res < -2^63) {
15059  *   res = -2^63;
15060  *   OV = 1;
15061  * }
15062  * Rd = res;
15063  * ~~~
15064  *
15065  * \param [in]  t    long type of value stored in t
15066  * \param [in]  a    unsigned long type of value stored in a
15067  * \param [in]  b    unsigned long type of value stored in b
15068  * \return value stored in long type
15069  */
__RV_KMADS32(long t,unsigned long a,unsigned long b)15070 __STATIC_FORCEINLINE long __RV_KMADS32(long t, unsigned long a, unsigned long b)
15071 {
15072     __ASM volatile("kmads32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
15073     return t;
15074 }
15075 /* ===== Inline Function End for 4.14.1. KMADS32 ===== */
15076 
15077 /* ===== Inline Function Start for 4.14.2. KMADRS32 ===== */
15078 /**
15079  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
15080  * \brief KMADRS32 (Saturating Signed Multiply Two Words & Reverse Subtract & Add)
15081  * \details
15082  * **Type**: DSP (RV64 Only)
15083  *
15084  * **Syntax**:\n
15085  * ~~~
15086  * KMADS32 Rd, Rs1, Rs2
15087  * KMADRS32 Rd, Rs1, Rs2
15088  * KMAXDS32 Rd, Rs1, Rs2
15089  * ~~~
15090  *
15091  * **Purpose**:\n
15092  * Do two signed 32-bit multiplications from 32-bit elements in two registers; and then
15093  * perform a subtraction operation between the two 64-bit results. Then add the subtraction result to
15094  * 64-bit data in a third register. The addition result may be saturated.
15095  * * KMADS32: rd + (top*top - bottom*bottom)
15096  * * KMADRS32: rd + (bottom*bottom - top*top)
15097  * * KMAXDS32: rd + (top*bottom - bottom*top)
15098  *
15099  * **Description**:\n
15100  * For the `KMADS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
15101  * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
15102  * Rs1 with the top 32-bit element in Rs2.
15103  * For the `KMADRS32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
15104  * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
15105  * element in Rs1 with the bottom 32-bit element in Rs2.
15106  * For the `KMAXDS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
15107  * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
15108  * Rs1 with the bottom 32-bit element in Rs2.
15109  * The subtraction result is then added to the content of 64-bit data in Rd. If the addition result is
15110  * beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to
15111  * 1. The 64-bit result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated
15112  * as signed integers.
15113  *
15114  * **Operations**:\n
15115  * ~~~
15116  * res = Rd + (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMADS32
15117  * res = Rd + (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // KMADRS32
15118  * res = Rd + (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMAXDS32
15119  * if (res > (2^63)-1) {
15120  *   res = (2^63)-1;
15121  *   OV = 1;
15122  * } else if (res < -2^63) {
15123  *   res = -2^63;
15124  *   OV = 1;
15125  * }
15126  * Rd = res;
15127  * ~~~
15128  *
15129  * \param [in]  t    long type of value stored in t
15130  * \param [in]  a    unsigned long type of value stored in a
15131  * \param [in]  b    unsigned long type of value stored in b
15132  * \return value stored in long type
15133  */
__RV_KMADRS32(long t,unsigned long a,unsigned long b)15134 __STATIC_FORCEINLINE long __RV_KMADRS32(long t, unsigned long a, unsigned long b)
15135 {
15136     __ASM volatile("kmadrs32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
15137     return t;
15138 }
15139 /* ===== Inline Function End for 4.14.2. KMADRS32 ===== */
15140 
15141 /* ===== Inline Function Start for 4.14.3. KMAXDS32 ===== */
15142 /**
15143  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
15144  * \brief KMAXDS32 (Saturating Signed Crossed Multiply Two Words & Subtract & Add)
15145  * \details
15146  * **Type**: DSP (RV64 Only)
15147  *
15148  * **Syntax**:\n
15149  * ~~~
15150  * KMADS32 Rd, Rs1, Rs2
15151  * KMADRS32 Rd, Rs1, Rs2
15152  * KMAXDS32 Rd, Rs1, Rs2
15153  * ~~~
15154  *
15155  * **Purpose**:\n
15156  * Do two signed 32-bit multiplications from 32-bit elements in two registers; and then
15157  * perform a subtraction operation between the two 64-bit results. Then add the subtraction result to
15158  * 64-bit data in a third register. The addition result may be saturated.
15159  * * KMADS32: rd + (top*top - bottom*bottom)
15160  * * KMADRS32: rd + (bottom*bottom - top*top)
15161  * * KMAXDS32: rd + (top*bottom - bottom*top)
15162  *
15163  * **Description**:\n
15164  * For the `KMADS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
15165  * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
15166  * Rs1 with the top 32-bit element in Rs2.
15167  * For the `KMADRS32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
15168  * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
15169  * element in Rs1 with the bottom 32-bit element in Rs2.
15170  * For the `KMAXDS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
15171  * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
15172  * Rs1 with the bottom 32-bit element in Rs2.
15173  * The subtraction result is then added to the content of 64-bit data in Rd. If the addition result is
15174  * beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to
15175  * 1. The 64-bit result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated
15176  * as signed integers.
15177  *
15178  * **Operations**:\n
15179  * ~~~
15180  * res = Rd + (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMADS32
15181  * res = Rd + (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // KMADRS32
15182  * res = Rd + (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMAXDS32
15183  * if (res > (2^63)-1) {
15184  *   res = (2^63)-1;
15185  *   OV = 1;
15186  * } else if (res < -2^63) {
15187  *   res = -2^63;
15188  *   OV = 1;
15189  * }
15190  * Rd = res;
15191  * ~~~
15192  *
15193  * \param [in]  t    long type of value stored in t
15194  * \param [in]  a    unsigned long type of value stored in a
15195  * \param [in]  b    unsigned long type of value stored in b
15196  * \return value stored in long type
15197  */
__RV_KMAXDS32(long t,unsigned long a,unsigned long b)15198 __STATIC_FORCEINLINE long __RV_KMAXDS32(long t, unsigned long a, unsigned long b)
15199 {
15200     __ASM volatile("kmaxds32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
15201     return t;
15202 }
15203 /* ===== Inline Function End for 4.14.3. KMAXDS32 ===== */
15204 
15205 /* ===== Inline Function Start for 4.15.1. KMSDA32 ===== */
15206 /**
15207  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
15208  * \brief KMSDA32 (Saturating Signed Multiply Two Words & Add & Subtract)
15209  * \details
15210  * **Type**: DSP (RV64 Only)
15211  *
15212  * **Syntax**:\n
15213  * ~~~
15214  * KMSDA32 Rd, Rs1, Rs2
15215  * KMSXDA32 Rd, Rs1, Rs2
15216  * ~~~
15217  *
15218  * **Purpose**:\n
15219  * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then
15220  * subtracts the two 64-bit results from a third register. The subtraction result may be saturated.
15221  * * KMSDA: rd - top*top - bottom*bottom
15222  * * KMSXDA: rd - top*bottom - bottom*top
15223  *
15224  * **Description**:\n
15225  * For the `KMSDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
15226  * element of Rs2 and multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2.
15227  * For the `KMSXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
15228  * element of Rs2 and multiplies the top 32-bit element of Rs1 with the bottom 32-bit element of Rs2.
15229  * The two 64-bit multiplication results are then subtracted from the content of Rd. If the subtraction
15230  * result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit
15231  * is set to 1. The result after saturation is written to Rd. The 32-bit contents are treated as signed
15232  * integers.
15233  *
15234  * **Operations**:\n
15235  * ~~~
15236  * res = Rd - (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMSDA32
15237  * res = Rd - (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMSXDA32
15238  * if (res > (2^63)-1) {
15239  *   res = (2^63)-1;
15240  *   OV = 1;
15241  * } else if (res < -2^63) {
15242  *   res = -2^63;
15243  *   OV = 1;
15244  * }
15245  * Rd = res;
15246  * ~~~
15247  *
15248  * \param [in]  t    long type of value stored in t
15249  * \param [in]  a    unsigned long type of value stored in a
15250  * \param [in]  b    unsigned long type of value stored in b
15251  * \return value stored in long type
15252  */
__RV_KMSDA32(long t,unsigned long a,unsigned long b)15253 __STATIC_FORCEINLINE long __RV_KMSDA32(long t, unsigned long a, unsigned long b)
15254 {
15255     __ASM volatile("kmsda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
15256     return t;
15257 }
15258 /* ===== Inline Function End for 4.15.1. KMSDA32 ===== */
15259 
15260 /* ===== Inline Function Start for 4.15.2. KMSXDA32 ===== */
15261 /**
15262  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
15263  * \brief KMSXDA32 (Saturating Signed Crossed Multiply Two Words & Add & Subtract)
15264  * \details
15265  * **Type**: DSP (RV64 Only)
15266  *
15267  * **Syntax**:\n
15268  * ~~~
15269  * KMSDA32 Rd, Rs1, Rs2
15270  * KMSXDA32 Rd, Rs1, Rs2
15271  * ~~~
15272  *
15273  * **Purpose**:\n
15274  * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then
15275  * subtracts the two 64-bit results from a third register. The subtraction result may be saturated.
15276  * * KMSDA: rd - top*top - bottom*bottom
15277  * * KMSXDA: rd - top*bottom - bottom*top
15278  *
15279  * **Description**:\n
15280  * For the `KMSDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
15281  * element of Rs2 and multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2.
15282  * For the `KMSXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
15283  * element of Rs2 and multiplies the top 32-bit element of Rs1 with the bottom 32-bit element of Rs2.
15284  * The two 64-bit multiplication results are then subtracted from the content of Rd. If the subtraction
15285  * result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit
15286  * is set to 1. The result after saturation is written to Rd. The 32-bit contents are treated as signed
15287  * integers.
15288  *
15289  * **Operations**:\n
15290  * ~~~
15291  * res = Rd - (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMSDA32
15292  * res = Rd - (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMSXDA32
15293  * if (res > (2^63)-1) {
15294  *   res = (2^63)-1;
15295  *   OV = 1;
15296  * } else if (res < -2^63) {
15297  *   res = -2^63;
15298  *   OV = 1;
15299  * }
15300  * Rd = res;
15301  * ~~~
15302  *
15303  * \param [in]  t    long type of value stored in t
15304  * \param [in]  a    unsigned long type of value stored in a
15305  * \param [in]  b    unsigned long type of value stored in b
15306  * \return value stored in long type
15307  */
__RV_KMSXDA32(long t,unsigned long a,unsigned long b)15308 __STATIC_FORCEINLINE long __RV_KMSXDA32(long t, unsigned long a, unsigned long b)
15309 {
15310     __ASM volatile("kmsxda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
15311     return t;
15312 }
15313 /* ===== Inline Function End for 4.15.2. KMSXDA32 ===== */
15314 
15315 /* ===== Inline Function Start for 4.16. KSLL32 ===== */
15316 /**
15317  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
15318  * \brief KSLL32 (SIMD 32-bit Saturating Shift Left Logical)
15319  * \details
15320  * **Type**: SIMD (RV64 Only)
15321  *
15322  * **Syntax**:\n
15323  * ~~~
15324  * KSLL32 Rd, Rs1, Rs2
15325  * ~~~
15326  *
15327  * **Purpose**:\n
15328  * Do 32-bit elements logical left shift operations with saturation simultaneously. The shift
15329  * amount is a variable from a GPR.
15330  *
15331  * **Description**:\n
15332  * The 32-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
15333  * with zero and the shift amount is specified by the low-order 5-bits of the value in the Rs2 register.
15334  * Any shifted value greater than 2^31-1 is saturated to 2^31-1. Any shifted value smaller than -2^31 is
15335  * saturated to -2^31. And the saturated results are written to Rd. If any saturation is performed, set OV
15336  * bit to 1.
15337  *
15338  * **Operations**:\n
15339  * ~~~
15340  * sa = Rs2[4:0];
15341  * if (sa != 0) {
15342  *   res[(31+sa):0] = Rs1.W[x] << sa;
15343  *   if (res > (2^31)-1) {
15344  *     res = 0x7fffffff; OV = 1;
15345  *   } else if (res < -2^31) {
15346  *     res = 0x80000000; OV = 1;
15347  *   }
15348  *   Rd.W[x] = res[31:0];
15349  * } else {
15350  *   Rd = Rs1;
15351  * }
15352  * for RV64: x=1...0
15353  * ~~~
15354  *
15355  * \param [in]  a    unsigned long type of value stored in a
15356  * \param [in]  b    unsigned int type of value stored in b
15357  * \return value stored in unsigned long type
15358  */
__RV_KSLL32(unsigned long a,unsigned int b)15359 __STATIC_FORCEINLINE unsigned long __RV_KSLL32(unsigned long a, unsigned int b)
15360 {
15361     register unsigned long result;
15362     __ASM volatile("ksll32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15363     return result;
15364 }
15365 /* ===== Inline Function End for 4.16. KSLL32 ===== */
15366 
15367 /* ===== Inline Function Start for 4.17. KSLLI32 ===== */
15368 /**
15369  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
15370  * \brief KSLLI32 (SIMD 32-bit Saturating Shift Left Logical Immediate)
15371  * \details
15372  * **Type**: SIMD (RV64 Only)
15373  *
15374  * **Syntax**:\n
15375  * ~~~
15376  * KSLLI32 Rd, Rs1, imm5u
15377  * ~~~
15378  *
15379  * **Purpose**:\n
15380  * Do 32-bit elements logical left shift operations with saturation simultaneously. The shift
15381  * amount is an immediate value.
15382  *
15383  * **Description**:\n
15384  * The 32-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
15385  * with zero and the shift amount is specified by the imm5u constant. Any shifted value greater than
15386  * 2^31-1 is saturated to 2^31-1. Any shifted value smaller than -2^31 is saturated to -2^31. And the saturated
15387  * results are written to Rd. If any saturation is performed, set OV bit to 1.
15388  *
15389  * **Operations**:\n
15390  * ~~~
15391  * sa = imm5u[4:0];
15392  * if (sa != 0) {
15393  *   res[(31+sa):0] = Rs1.W[x] << sa;
15394  *   if (res > (2^31)-1) {
15395  *     res = 0x7fffffff; OV = 1;
15396  *   } else if (res < -2^31) {
15397  *     res = 0x80000000; OV = 1;
15398  *   }
15399  *   Rd.W[x] = res[31:0];
15400  * } else {
15401  *   Rd = Rs1;
15402  * }
15403  * for RV64: x=1...0
15404  * ~~~
15405  *
15406  * \param [in]  a    unsigned long type of value stored in a
15407  * \param [in]  b    unsigned int type of value stored in b
15408  * \return value stored in unsigned long type
15409  */
__RV_KSLLI32(unsigned long a,unsigned int b)15410 __STATIC_FORCEINLINE unsigned long __RV_KSLLI32(unsigned long a, unsigned int b)
15411 {
15412     register unsigned long result;
15413     __ASM volatile("kslli32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15414     return result;
15415 }
15416 /* ===== Inline Function End for 4.17. KSLLI32 ===== */
15417 
15418 /* ===== Inline Function Start for 4.18.1. KSLRA32 ===== */
15419 /**
15420  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
15421  * \brief KSLRA32 (SIMD 32-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
15422  * \details
15423  * **Type**: SIMD (RV64 Only)
15424  *
15425  * **Syntax**:\n
15426  * ~~~
15427  * KSLRA32 Rd, Rs1, Rs2
15428  * KSLRA32.u Rd, Rs1, Rs2
15429  * ~~~
15430  *
15431  * **Purpose**:\n
15432  * Do 32-bit elements logical left (positive) or arithmetic right (negative) shift operation with
15433  * Q31 saturation for the left shift. The `.u` form performs additional rounding up operations for the
15434  * right shift.
15435  *
15436  * **Description**:\n
15437  * The 32-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
15438  * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means
15439  * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the
15440  * absolute value of Rs2[5:0]. However, the behavior of `Rs2[5:0]==-25 (0x20)` is defined to be
15441  * equivalent to the behavior of `Rs2[5:0]==-(25-1) (0x21)`.
15442  * The left-shifted results are saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. For the `.u`
15443  * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
15444  * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
15445  * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:6] will not affect
15446  * this instruction.
15447  *
15448  * **Operations**:\n
15449  * ~~~
15450  * if (Rs2[5:0] < 0) {
15451  *   sa = -Rs2[5:0];
15452  *   sa = (sa == 32)? 31 : sa;
15453  *   if (`.u` form) {
15454  *     res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
15455  *     Rd.W[x] = res[31:0];
15456  *   } else {
15457  *     Rd.W[x] = SE32(Rs1.W[x][31:sa]);
15458  *   }
15459  * } else {
15460  *   sa = Rs2[4:0];
15461  *   res[(31+sa):0] = Rs1.W[x] <<(logic) sa;
15462  *   if (res > (2^31)-1) {
15463  *     res[31:0] = 0x7fffffff; OV = 1;
15464  *   } else if (res < -2^31) {
15465  *     res[31:0] = 0x80000000; OV = 1;
15466  *   }
15467  *   Rd.W[x] = res[31:0];
15468  * }
15469  * for RV64: x=1...0
15470  * ~~~
15471  *
15472  * \param [in]  a    unsigned long type of value stored in a
15473  * \param [in]  b    int type of value stored in b
15474  * \return value stored in unsigned long type
15475  */
__RV_KSLRA32(unsigned long a,int b)15476 __STATIC_FORCEINLINE unsigned long __RV_KSLRA32(unsigned long a, int b)
15477 {
15478     register unsigned long result;
15479     __ASM volatile("kslra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15480     return result;
15481 }
15482 /* ===== Inline Function End for 4.18.1. KSLRA32 ===== */
15483 
15484 /* ===== Inline Function Start for 4.18.2. KSLRA32.u ===== */
15485 /**
15486  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
15487  * \brief KSLRA32.u (SIMD 32-bit Shift Left Logical with Saturation or Rounding Shift Right Arithmetic)
15488  * \details
15489  * **Type**: SIMD (RV64 Only)
15490  *
15491  * **Syntax**:\n
15492  * ~~~
15493  * KSLRA32 Rd, Rs1, Rs2
15494  * KSLRA32.u Rd, Rs1, Rs2
15495  * ~~~
15496  *
15497  * **Purpose**:\n
15498  * Do 32-bit elements logical left (positive) or arithmetic right (negative) shift operation with
15499  * Q31 saturation for the left shift. The `.u` form performs additional rounding up operations for the
15500  * right shift.
15501  *
15502  * **Description**:\n
15503  * The 32-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
15504  * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means
15505  * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the
15506  * absolute value of Rs2[5:0]. However, the behavior of `Rs2[5:0]==-25 (0x20)` is defined to be
15507  * equivalent to the behavior of `Rs2[5:0]==-(25-1) (0x21)`.
15508  * The left-shifted results are saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. For the `.u`
15509  * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
15510  * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
15511  * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:6] will not affect
15512  * this instruction.
15513  *
15514  * **Operations**:\n
15515  * ~~~
15516  * if (Rs2[5:0] < 0) {
15517  *   sa = -Rs2[5:0];
15518  *   sa = (sa == 32)? 31 : sa;
15519  *   if (`.u` form) {
15520  *     res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
15521  *     Rd.W[x] = res[31:0];
15522  *   } else {
15523  *     Rd.W[x] = SE32(Rs1.W[x][31:sa]);
15524  *   }
15525  * } else {
15526  *   sa = Rs2[4:0];
15527  *   res[(31+sa):0] = Rs1.W[x] <<(logic) sa;
15528  *   if (res > (2^31)-1) {
15529  *     res[31:0] = 0x7fffffff; OV = 1;
15530  *   } else if (res < -2^31) {
15531  *     res[31:0] = 0x80000000; OV = 1;
15532  *   }
15533  *   Rd.W[x] = res[31:0];
15534  * }
15535  * for RV64: x=1...0
15536  * ~~~
15537  *
15538  * \param [in]  a    unsigned long type of value stored in a
15539  * \param [in]  b    int type of value stored in b
15540  * \return value stored in unsigned long type
15541  */
__RV_KSLRA32_U(unsigned long a,int b)15542 __STATIC_FORCEINLINE unsigned long __RV_KSLRA32_U(unsigned long a, int b)
15543 {
15544     register unsigned long result;
15545     __ASM volatile("kslra32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15546     return result;
15547 }
15548 /* ===== Inline Function End for 4.18.2. KSLRA32.u ===== */
15549 
15550 /* ===== Inline Function Start for 4.19. KSTAS32 ===== */
15551 /**
15552  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
15553  * \brief KSTAS32 (SIMD 32-bit Signed Saturating Straight Addition & Subtraction)
15554  * \details
15555  * **Type**: SIMD (RV64 Only)
15556  *
15557  * **Syntax**:\n
15558  * ~~~
15559  * KSTAS32 Rd, Rs1, Rs2
15560  * ~~~
15561  *
15562  * **Purpose**:\n
15563  * Do 32-bit signed integer element saturating addition and 32-bit signed integer element
15564  * saturating subtraction in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit
15565  * elements.
15566  *
15567  * **Description**:\n
15568  * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit
15569  * integer element in [63:32] of Rs2; at the same time, it subtracts the 32-bit integer element in [31:0] of
15570  * Rs2 from the 32-bit integer element in [31:0] of Rs1. If any of the results are beyond the Q31 number
15571  * range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated
15572  * results are written to [63:32] of Rd for addition and [31:0] of Rd for subtraction.
15573  *
15574  * **Operations**:\n
15575  * ~~~
15576  * res[1] = Rs1.W[1] + Rs2.W[1];
15577  * res[0] = Rs1.W[0] - Rs2.W[0];
15578  * if (res[x] > (2^31)-1) {
15579  *   res[x] = (2^31)-1;
15580  *   OV = 1;
15581  * } else if (res < -2^31) {
15582  *   res[x] = -2^31;
15583  *   OV = 1;
15584  * }
15585  * Rd.W[1] = res[1];
15586  * Rd.W[0] = res[0];
15587  * for RV64, x=1...0
15588  * ~~~
15589  *
15590  * \param [in]  a    unsigned long type of value stored in a
15591  * \param [in]  b    unsigned long type of value stored in b
15592  * \return value stored in unsigned long type
15593  */
__RV_KSTAS32(unsigned long a,unsigned long b)15594 __STATIC_FORCEINLINE unsigned long __RV_KSTAS32(unsigned long a, unsigned long b)
15595 {
15596     register unsigned long result;
15597     __ASM volatile("kstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15598     return result;
15599 }
15600 /* ===== Inline Function End for 4.19. KSTAS32 ===== */
15601 
15602 /* ===== Inline Function Start for 4.20. KSTSA32 ===== */
15603 /**
15604  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
15605  * \brief KSTSA32 (SIMD 32-bit Signed Saturating Straight Subtraction & Addition)
15606  * \details
15607  * **Type**: SIM (RV64 Only)
15608  *
15609  * **Syntax**:\n
15610  * ~~~
15611  * KSTSA32 Rd, Rs1, Rs2
15612  * ~~~
15613  *
15614  * **Purpose**:\n
15615  * Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element
15616  * saturating addition in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit
15617  * elements.
15618  * *Description: *
15619  * This instruction subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer
15620  * element in [63:32] of Rs1; at the same time, it adds the 32-bit integer element in [31:0] of Rs1 with
15621  * the 32-bit integer element in [31:0] of Rs2. If any of the results are beyond the Q31 number range (-
15622  * 231 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are
15623  * written to [63:32] of Rd for subtraction and [31:0] of Rd for addition.
15624  *
15625  * **Operations**:\n
15626  * ~~~
15627  * res[1] = Rs1.W[1] - Rs2.W[1];
15628  * res[0] = Rs1.W[0] + Rs2.W[0];
15629  * if (res[x] > (2^31)-1) {
15630  *   res[x] = (2^31)-1;
15631  *   OV = 1;
15632  * } else if (res < -2^31) {
15633  *   res[x] = -2^31;
15634  *   OV = 1;
15635  * }
15636  * Rd.W[1] = res[1];
15637  * Rd.W[0] = res[0];
15638  * for RV64, x=1...0
15639  * ~~~
15640  *
15641  * \param [in]  a    unsigned long type of value stored in a
15642  * \param [in]  b    unsigned long type of value stored in b
15643  * \return value stored in unsigned long type
15644  */
__RV_KSTSA32(unsigned long a,unsigned long b)15645 __STATIC_FORCEINLINE unsigned long __RV_KSTSA32(unsigned long a, unsigned long b)
15646 {
15647     register unsigned long result;
15648     __ASM volatile("kstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15649     return result;
15650 }
15651 /* ===== Inline Function End for 4.20. KSTSA32 ===== */
15652 
15653 /* ===== Inline Function Start for 4.21. KSUB32 ===== */
15654 /**
15655  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
15656  * \brief KSUB32 (SIMD 32-bit Signed Saturating Subtraction)
15657  * \details
15658  * **Type**: SIMD (RV64 Only)
15659  *
15660  * **Syntax**:\n
15661  * ~~~
15662  * KSUB32 Rd, Rs1, Rs2
15663  * ~~~
15664  *
15665  * **Purpose**:\n
15666  * Do 32-bit signed integer elements saturating subtractions simultaneously.
15667  *
15668  * **Description**:\n
15669  * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit
15670  * signed integer elements in Rs1. If any of the results are beyond the Q31 number range (-2^31 <= Q31 <=
15671  * 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to
15672  * Rd.
15673  *
15674  * **Operations**:\n
15675  * ~~~
15676  * res[x] = Rs1.W[x] - Rs2.W[x];
15677  * if (res[x] > (2^31)-1) {
15678  *   res[x] = (2^31)-1;
15679  *   OV = 1;
15680  * } else if (res[x] < -2^31) {
15681  *   res[x] = -2^31;
15682  *   OV = 1;
15683  * }
15684  * Rd.W[x] = res[x];
15685  * for RV64: x=1...0
15686  * ~~~
15687  *
15688  * \param [in]  a    unsigned long type of value stored in a
15689  * \param [in]  b    unsigned long type of value stored in b
15690  * \return value stored in unsigned long type
15691  */
__RV_KSUB32(unsigned long a,unsigned long b)15692 __STATIC_FORCEINLINE unsigned long __RV_KSUB32(unsigned long a, unsigned long b)
15693 {
15694     register unsigned long result;
15695     __ASM volatile("ksub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15696     return result;
15697 }
15698 /* ===== Inline Function End for 4.21. KSUB32 ===== */
15699 
15700 /* ===== Inline Function Start for 4.22.1. PKBB32 ===== */
15701 /**
15702  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK
15703  * \brief PKBB32 (Pack Two 32-bit Data from Both Bottom Half)
15704  * \details
15705  * **Type**: DSP (RV64 Only)
15706  *
15707  * **Syntax**:\n
15708  * ~~~
15709  * PKBB32 Rd, Rs1, Rs2
15710  * PKBT32 Rd, Rs1, Rs2
15711  * PKTT32 Rd, Rs1, Rs2
15712  * PKTB32 Rd, Rs1, Rs2
15713  * ~~~
15714  *
15715  * **Purpose**:\n
15716  * Pack 32-bit data from 64-bit chunks in two registers.
15717  * * PKBB32: bottom.bottom
15718  * * PKBT32: bottom.top
15719  * * PKTT32: top.top
15720  * * PKTB32: top.bottom
15721  *
15722  * **Description**:\n
15723  * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
15724  * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
15725  * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
15726  * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
15727  *
15728  * **Operations**:\n
15729  * ~~~
15730  * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32
15731  * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32
15732  * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32
15733  * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32
15734  * ~~~
15735  *
15736  * \param [in]  a    unsigned long type of value stored in a
15737  * \param [in]  b    unsigned long type of value stored in b
15738  * \return value stored in unsigned long type
15739  */
__RV_PKBB32(unsigned long a,unsigned long b)15740 __STATIC_FORCEINLINE unsigned long __RV_PKBB32(unsigned long a, unsigned long b)
15741 {
15742     register unsigned long result;
15743     __ASM volatile("pkbb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15744     return result;
15745 }
15746 /* ===== Inline Function End for 4.22.1. PKBB32 ===== */
15747 
15748 /* ===== Inline Function Start for 4.22.2. PKBT32 ===== */
15749 /**
15750  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK
15751  * \brief PKBT32 (Pack Two 32-bit Data from Bottom and Top Half)
15752  * \details
15753  * **Type**: DSP (RV64 Only)
15754  *
15755  * **Syntax**:\n
15756  * ~~~
15757  * PKBB32 Rd, Rs1, Rs2
15758  * PKBT32 Rd, Rs1, Rs2
15759  * PKTT32 Rd, Rs1, Rs2
15760  * PKTB32 Rd, Rs1, Rs2
15761  * ~~~
15762  *
15763  * **Purpose**:\n
15764  * Pack 32-bit data from 64-bit chunks in two registers.
15765  * * PKBB32: bottom.bottom
15766  * * PKBT32: bottom.top
15767  * * PKTT32: top.top
15768  * * PKTB32: top.bottom
15769  *
15770  * **Description**:\n
15771  * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
15772  * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
15773  * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
15774  * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
15775  *
15776  * **Operations**:\n
15777  * ~~~
15778  * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32
15779  * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32
15780  * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32
15781  * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32
15782  * ~~~
15783  *
15784  * \param [in]  a    unsigned long type of value stored in a
15785  * \param [in]  b    unsigned long type of value stored in b
15786  * \return value stored in unsigned long type
15787  */
__RV_PKBT32(unsigned long a,unsigned long b)15788 __STATIC_FORCEINLINE unsigned long __RV_PKBT32(unsigned long a, unsigned long b)
15789 {
15790     register unsigned long result;
15791     __ASM volatile("pkbt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15792     return result;
15793 }
15794 /* ===== Inline Function End for 4.22.2. PKBT32 ===== */
15795 
15796 /* ===== Inline Function Start for 4.22.3. PKTT32 ===== */
15797 /**
15798  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK
15799  * \brief PKTT32 (Pack Two 32-bit Data from Both Top Half)
15800  * \details
15801  * **Type**: DSP (RV64 Only)
15802  *
15803  * **Syntax**:\n
15804  * ~~~
15805  * PKBB32 Rd, Rs1, Rs2
15806  * PKBT32 Rd, Rs1, Rs2
15807  * PKTT32 Rd, Rs1, Rs2
15808  * PKTB32 Rd, Rs1, Rs2
15809  * ~~~
15810  *
15811  * **Purpose**:\n
15812  * Pack 32-bit data from 64-bit chunks in two registers.
15813  * * PKBB32: bottom.bottom
15814  * * PKBT32: bottom.top
15815  * * PKTT32: top.top
15816  * * PKTB32: top.bottom
15817  *
15818  * **Description**:\n
15819  * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
15820  * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
15821  * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
15822  * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
15823  *
15824  * **Operations**:\n
15825  * ~~~
15826  * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32
15827  * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32
15828  * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32
15829  * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32
15830  * ~~~
15831  *
15832  * \param [in]  a    unsigned long type of value stored in a
15833  * \param [in]  b    unsigned long type of value stored in b
15834  * \return value stored in unsigned long type
15835  */
__RV_PKTT32(unsigned long a,unsigned long b)15836 __STATIC_FORCEINLINE unsigned long __RV_PKTT32(unsigned long a, unsigned long b)
15837 {
15838     register unsigned long result;
15839     __ASM volatile("pktt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15840     return result;
15841 }
15842 /* ===== Inline Function End for 4.22.3. PKTT32 ===== */
15843 
15844 /* ===== Inline Function Start for 4.22.4. PKTB32 ===== */
15845 /**
15846  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK
15847  * \brief PKTB32 (Pack Two 32-bit Data from Top and Bottom Half)
15848  * \details
15849  * **Type**: DSP (RV64 Only)
15850  *
15851  * **Syntax**:\n
15852  * ~~~
15853  * PKBB32 Rd, Rs1, Rs2
15854  * PKBT32 Rd, Rs1, Rs2
15855  * PKTT32 Rd, Rs1, Rs2
15856  * PKTB32 Rd, Rs1, Rs2
15857  * ~~~
15858  *
15859  * **Purpose**:\n
15860  * Pack 32-bit data from 64-bit chunks in two registers.
15861  * * PKBB32: bottom.bottom
15862  * * PKBT32: bottom.top
15863  * * PKTT32: top.top
15864  * * PKTB32: top.bottom
15865  *
15866  * **Description**:\n
15867  * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
15868  * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
15869  * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
15870  * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
15871  *
15872  * **Operations**:\n
15873  * ~~~
15874  * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32
15875  * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32
15876  * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32
15877  * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32
15878  * ~~~
15879  *
15880  * \param [in]  a    unsigned long type of value stored in a
15881  * \param [in]  b    unsigned long type of value stored in b
15882  * \return value stored in unsigned long type
15883  */
__RV_PKTB32(unsigned long a,unsigned long b)15884 __STATIC_FORCEINLINE unsigned long __RV_PKTB32(unsigned long a, unsigned long b)
15885 {
15886     register unsigned long result;
15887     __ASM volatile("pktb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15888     return result;
15889 }
15890 /* ===== Inline Function End for 4.22.4. PKTB32 ===== */
15891 
15892 /* ===== Inline Function Start for 4.23. RADD32 ===== */
15893 /**
15894  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
15895  * \brief RADD32 (SIMD 32-bit Signed Halving Addition)
15896  * \details
15897  * **Type**: SIMD (RV64 Only)
15898  *
15899  * **Syntax**:\n
15900  * ~~~
15901  * RADD32 Rd, Rs1, Rs2
15902  * ~~~
15903  *
15904  * **Purpose**:\n
15905  * Do 32-bit signed integer element additions simultaneously. The results are halved to avoid
15906  * overflow or saturation.
15907  *
15908  * **Description**:\n
15909  * This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed
15910  * integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to
15911  * Rd.
15912  *
15913  * **Examples**:\n
15914  * ~~~
15915  * * Rs1 = 0x7FFFFFFF, Rs2 = 0x7FFFFFFF Rd = 0x7FFFFFFF
15916  * * Rs1 = 0x80000000, Rs2 = 0x80000000 Rd = 0x80000000
15917  * * Rs1 = 0x40000000, Rs2 = 0x80000000 Rd = 0xE0000000
15918  * ~~~
15919  *
15920  * **Operations**:\n
15921  * ~~~
15922  * Rd.W[x] = (Rs1.W[x] + Rs2.W[x]) s>> 1;
15923  * for RV64: x=1...0
15924  * ~~~
15925  *
15926  * \param [in]  a    unsigned long type of value stored in a
15927  * \param [in]  b    unsigned long type of value stored in b
15928  * \return value stored in unsigned long type
15929  */
__RV_RADD32(unsigned long a,unsigned long b)15930 __STATIC_FORCEINLINE unsigned long __RV_RADD32(unsigned long a, unsigned long b)
15931 {
15932     register unsigned long result;
15933     __ASM volatile("radd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15934     return result;
15935 }
15936 /* ===== Inline Function End for 4.23. RADD32 ===== */
15937 
15938 /* ===== Inline Function Start for 4.24. RCRAS32 ===== */
15939 /**
15940  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
15941  * \brief RCRAS32 (SIMD 32-bit Signed Halving Cross Addition & Subtraction)
15942  * \details
15943  * **Type**: SIMD (RV64 Only)
15944  *
15945  * **Syntax**:\n
15946  * ~~~
15947  * RCRAS32 Rd, Rs1, Rs2
15948  * ~~~
15949  *
15950  * **Purpose**:\n
15951  * Do 32-bit signed integer element addition and 32-bit signed integer element subtraction in
15952  * a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The results are halved to
15953  * avoid overflow or saturation.
15954  *
15955  * **Description**:\n
15956  * This instruction adds the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit
15957  * signed integer element in [31:0] of Rs2, and subtracts the 32-bit signed integer element in [63:32] of
15958  * Rs2 from the 32-bit signed integer element in [31:0] of Rs1. The element results are first
15959  * arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd
15960  * for subtraction.
15961  *
15962  * **Examples**:\n
15963  * ~~~
15964  * Please see `RADD32` and `RSUB32` instructions.
15965  * ~~~
15966  *
15967  * **Operations**:\n
15968  * ~~~
15969  * Rd.W[1] = (Rs1.W[1] + Rs2.W[0]) s>> 1;
15970  * Rd.W[0] = (Rs1.W[0] - Rs2.W[1]) s>> 1;
15971  * ~~~
15972  *
15973  * \param [in]  a    unsigned long type of value stored in a
15974  * \param [in]  b    unsigned long type of value stored in b
15975  * \return value stored in unsigned long type
15976  */
__RV_RCRAS32(unsigned long a,unsigned long b)15977 __STATIC_FORCEINLINE unsigned long __RV_RCRAS32(unsigned long a, unsigned long b)
15978 {
15979     register unsigned long result;
15980     __ASM volatile("rcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15981     return result;
15982 }
15983 /* ===== Inline Function End for 4.24. RCRAS32 ===== */
15984 
15985 /* ===== Inline Function Start for 4.25. RCRSA32 ===== */
15986 /**
15987  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
15988  * \brief RCRSA32 (SIMD 32-bit Signed Halving Cross Subtraction & Addition)
15989  * \details
15990  * **Type**: SIMD (RV64 Only)
15991  *
15992  * **Syntax**:\n
15993  * ~~~
15994  * RCRSA32 Rd, Rs1, Rs2
15995  * ~~~
15996  *
15997  * **Purpose**:\n
15998  * Do 32-bit signed integer element subtraction and 32-bit signed integer element addition in
15999  * a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The results are halved to
16000  * avoid overflow or saturation.
16001  *
16002  * **Description**:\n
16003  * This instruction subtracts the 32-bit signed integer element in [31:0] of Rs2 from the
16004  * 32-bit signed integer element in [63:32] of Rs1, and adds the 32-bit signed element integer in [31:0]
16005  * of Rs1 with the 32-bit signed integer element in [63:32] of Rs2. The two results are first
16006  * arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of
16007  * Rd for addition.
16008  *
16009  * **Examples**:\n
16010  * ~~~
16011  * Please see `RADD32` and `RSUB32` instructions.
16012  * ~~~
16013  *
16014  * **Operations**:\n
16015  * ~~~
16016  * Rd.W[1] = (Rs1.W[1] - Rs2.W[0]) s>> 1;
16017  * Rd.W[0] = (Rs1.W[0] + Rs2.W[1]) s>> 1;
16018  * ~~~
16019  *
16020  * \param [in]  a    unsigned long type of value stored in a
16021  * \param [in]  b    unsigned long type of value stored in b
16022  * \return value stored in unsigned long type
16023  */
__RV_RCRSA32(unsigned long a,unsigned long b)16024 __STATIC_FORCEINLINE unsigned long __RV_RCRSA32(unsigned long a, unsigned long b)
16025 {
16026     register unsigned long result;
16027     __ASM volatile("rcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16028     return result;
16029 }
16030 /* ===== Inline Function End for 4.25. RCRSA32 ===== */
16031 
16032 /* ===== Inline Function Start for 4.26. RSTAS32 ===== */
16033 /**
16034  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
16035  * \brief RSTAS32 (SIMD 32-bit Signed Halving Straight Addition & Subtraction)
16036  * \details
16037  * **Type**: SIMD (RV64 Only)
16038  *
16039  * **Syntax**:\n
16040  * ~~~
16041  * RSTAS32 Rd, Rs1, Rs2
16042  * ~~~
16043  *
16044  * **Purpose**:\n
16045  * Do 32-bit signed integer element addition and 32-bit signed integer element subtraction in
16046  * a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements. The results are
16047  * halved to avoid overflow or saturation.
16048  *
16049  * **Description**:\n
16050  * This instruction adds the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit
16051  * signed integer element in [63:32] of Rs2, and subtracts the 32-bit signed integer element in [31:0] of
16052  * Rs2 from the 32-bit signed integer element in [31:0] of Rs1. The element results are first
16053  * arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd
16054  * for subtraction.
16055  *
16056  * **Examples**:\n
16057  * ~~~
16058  * Please see `RADD32` and `RSUB32` instructions.
16059  * ~~~
16060  *
16061  * **Operations**:\n
16062  * ~~~
16063  * Rd.W[1] = (Rs1.W[1] + Rs2.W[1]) s>> 1;
16064  * Rd.W[0] = (Rs1.W[0] - Rs2.W[0]) s>> 1;
16065  * ~~~
16066  *
16067  * \param [in]  a    unsigned long type of value stored in a
16068  * \param [in]  b    unsigned long type of value stored in b
16069  * \return value stored in unsigned long type
16070  */
__RV_RSTAS32(unsigned long a,unsigned long b)16071 __STATIC_FORCEINLINE unsigned long __RV_RSTAS32(unsigned long a, unsigned long b)
16072 {
16073     register unsigned long result;
16074     __ASM volatile("rstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16075     return result;
16076 }
16077 /* ===== Inline Function End for 4.26. RSTAS32 ===== */
16078 
16079 /* ===== Inline Function Start for 4.27. RSTSA32 ===== */
16080 /**
16081  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
16082  * \brief RSTSA32 (SIMD 32-bit Signed Halving Straight Subtraction & Addition)
16083  * \details
16084  * **Type**: SIMD (RV64 Only)
16085  *
16086  * **Syntax**:\n
16087  * ~~~
16088  * RSTSA32 Rd, Rs1, Rs2
16089  * ~~~
16090  *
16091  * **Purpose**:\n
16092  * Do 32-bit signed integer element subtraction and 32-bit signed integer element addition in
16093  * a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements. The results are
16094  * halved to avoid overflow or saturation.
16095  *
16096  * **Description**:\n
16097  * This instruction subtracts the 32-bit signed integer element in [63:32] of Rs2 from the
16098  * 32-bit signed integer element in [63:32] of Rs1, and adds the 32-bit signed element integer in [31:0]
16099  * of Rs1 with the 32-bit signed integer element in [31:0] of Rs2. The two results are first arithmetically
16100  * right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of Rd for addition.
16101  *
16102  * **Examples**:\n
16103  * ~~~
16104  * Please see `RADD32` and `RSUB32` instructions.
16105  * ~~~
16106  *
16107  * **Operations**:\n
16108  * ~~~
16109  * Rd.W[1] = (Rs1.W[1] - Rs2.W[1]) s>> 1;
16110  * Rd.W[0] = (Rs1.W[0] + Rs2.W[0]) s>> 1;
16111  * ~~~
16112  *
16113  * \param [in]  a    unsigned long type of value stored in a
16114  * \param [in]  b    unsigned long type of value stored in b
16115  * \return value stored in unsigned long type
16116  */
__RV_RSTSA32(unsigned long a,unsigned long b)16117 __STATIC_FORCEINLINE unsigned long __RV_RSTSA32(unsigned long a, unsigned long b)
16118 {
16119     register unsigned long result;
16120     __ASM volatile("rstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16121     return result;
16122 }
16123 /* ===== Inline Function End for 4.27. RSTSA32 ===== */
16124 
16125 /* ===== Inline Function Start for 4.28. RSUB32 ===== */
16126 /**
16127  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
16128  * \brief RSUB32 (SIMD 32-bit Signed Halving Subtraction)
16129  * \details
16130  * **Type**: SIMD (RV64 Only)
16131  *
16132  * **Syntax**:\n
16133  * ~~~
16134  * RSUB32 Rd, Rs1, Rs2
16135  * ~~~
16136  *
16137  * **Purpose**:\n
16138  * Do 32-bit signed integer element subtractions simultaneously. The results are halved to
16139  * avoid overflow or saturation.
16140  *
16141  * **Description**:\n
16142  * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit
16143  * signed integer elements in Rs1. The results are first arithmetically right-shifted by 1 bit and then
16144  * written to Rd.
16145  *
16146  * **Examples**:\n
16147  * ~~~
16148  * * Ra = 0x7FFFFFFF, Rb = 0x80000000 Rt = 0x7FFFFFFF
16149  * * Ra = 0x80000000, Rb = 0x7FFFFFFF Rt = 0x80000000
16150  * * Ra = 0x80000000, Rb = 0x40000000 Rt = 0xA0000000
16151  * ~~~
16152  *
16153  * **Operations**:\n
16154  * ~~~
16155  * Rd.W[x] = (Rs1.W[x] - Rs2.W[x]) s>> 1;
16156  * for RV64: x=1...0
16157  * ~~~
16158  *
16159  * \param [in]  a    unsigned long type of value stored in a
16160  * \param [in]  b    unsigned long type of value stored in b
16161  * \return value stored in unsigned long type
16162  */
__RV_RSUB32(unsigned long a,unsigned long b)16163 __STATIC_FORCEINLINE unsigned long __RV_RSUB32(unsigned long a, unsigned long b)
16164 {
16165     register unsigned long result;
16166     __ASM volatile("rsub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16167     return result;
16168 }
16169 /* ===== Inline Function End for 4.28. RSUB32 ===== */
16170 
16171 /* ===== Inline Function Start for 4.29. SLL32 ===== */
16172 /**
16173  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
16174  * \brief SLL32 (SIMD 32-bit Shift Left Logical)
16175  * \details
16176  * **Type**: SIMD (RV64 Only)
16177  *
16178  * **Syntax**:\n
16179  * ~~~
16180  * SLL32 Rd, Rs1, Rs2
16181  * ~~~
16182  *
16183  * **Purpose**:\n
16184  * Do 32-bit elements logical left shift operations simultaneously. The shift amount is a
16185  * variable from a GPR.
16186  *
16187  * **Description**:\n
16188  * The 32-bit elements in Rs1 are left-shifted logically. And the results are written to Rd.
16189  * The shifted out bits are filled with zero and the shift amount is specified by the low-order 5-bits of
16190  * the value in the Rs2 register.
16191  *
16192  * **Operations**:\n
16193  * ~~~
16194  * sa = Rs2[4:0];
16195  * Rd.W[x] = Rs1.W[x] << sa;
16196  * for RV64: x=1...0
16197  * ~~~
16198  *
16199  * \param [in]  a    unsigned long type of value stored in a
16200  * \param [in]  b    unsigned int type of value stored in b
16201  * \return value stored in unsigned long type
16202  */
__RV_SLL32(unsigned long a,unsigned int b)16203 __STATIC_FORCEINLINE unsigned long __RV_SLL32(unsigned long a, unsigned int b)
16204 {
16205     register unsigned long result;
16206     __ASM volatile("sll32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16207     return result;
16208 }
16209 /* ===== Inline Function End for 4.29. SLL32 ===== */
16210 
16211 /* ===== Inline Function Start for 4.30. SLLI32 ===== */
16212 /**
16213  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
16214  * \brief SLLI32 (SIMD 32-bit Shift Left Logical Immediate)
16215  * \details
16216  * **Type**: SIMD (RV64 Only)
16217  *
16218  * **Syntax**:\n
16219  * ~~~
16220  * SLLI32 Rd, Rs1, imm5u[4:0]
16221  * ~~~
16222  *
16223  * **Purpose**:\n
16224  * Do 32-bit element logical left shift operations simultaneously. The shift amount is an
16225  * immediate value.
16226  *
16227  * **Description**:\n
16228  * The 32-bit elements in Rs1 are left-shifted logically. The shifted out bits are filled with
16229  * zero and the shift amount is specified by the imm5u[4:0] constant. And the results are written to Rd.
16230  *
16231  * **Operations**:\n
16232  * ~~~
16233  * sa = imm5u[4:0];
16234  * Rd.W[x] = Rs1.W[x] << sa;
16235  * for RV64: x=1...0
16236  * ~~~
16237  *
16238  * \param [in]  a    unsigned long type of value stored in a
16239  * \param [in]  b    unsigned int type of value stored in b
16240  * \return value stored in unsigned long type
16241  */
__RV_SLLI32(unsigned long a,unsigned int b)16242 __STATIC_FORCEINLINE unsigned long __RV_SLLI32(unsigned long a, unsigned int b)
16243 {
16244     register unsigned long result;
16245     __ASM volatile("slli32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16246     return result;
16247 }
16248 /* ===== Inline Function End for 4.30. SLLI32 ===== */
16249 
16250 /* ===== Inline Function Start for 4.31. SMAX32 ===== */
16251 /**
16252  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
16253  * \brief SMAX32 (SIMD 32-bit Signed Maximum)
16254  * \details
16255  * **Type**: SIMD (RV64 Only)
16256  *
16257  * **Syntax**:\n
16258  * ~~~
16259  * SMAX32 Rd, Rs1, Rs2
16260  * ~~~
16261  *
16262  * **Purpose**:\n
16263  * Do 32-bit signed integer elements finding maximum operations simultaneously.
16264  *
16265  * **Description**:\n
16266  * This instruction compares the 32-bit signed integer elements in Rs1 with the 32-bit
16267  * signed integer elements in Rs2 and selects the numbers that is greater than the other one. The
16268  * selected results are written to Rd.
16269  *
16270  * **Operations**:\n
16271  * ~~~
16272  * Rd.W[x] = (Rs1.W[x] > Rs2.W[x])? Rs1.W[x] : Rs2.W[x];
16273  * for RV64: x=1...0
16274  * ~~~
16275  *
16276  * \param [in]  a    unsigned long type of value stored in a
16277  * \param [in]  b    unsigned long type of value stored in b
16278  * \return value stored in unsigned long type
16279  */
__RV_SMAX32(unsigned long a,unsigned long b)16280 __STATIC_FORCEINLINE unsigned long __RV_SMAX32(unsigned long a, unsigned long b)
16281 {
16282     register unsigned long result;
16283     __ASM volatile("smax32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16284     return result;
16285 }
16286 /* ===== Inline Function End for 4.31. SMAX32 ===== */
16287 
16288 /* ===== Inline Function Start for 4.32.1. SMBB32 ===== */
16289 /**
16290  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT
16291  * \brief SMBB32 (Signed Multiply Bottom Word & Bottom Word)
16292  * \details
16293  * **Type**: DSP (RV64 Only)
16294  *
16295  * **Syntax**:\n
16296  * ~~~
16297  * SMBB32 Rd, Rs1, Rs2
16298  * SMBT32 Rd, Rs1, Rs2
16299  * SMTT32 Rd, Rs1, Rs2
16300  * ~~~
16301  *
16302  * **Purpose**:\n
16303  * Multiply the signed 32-bit element of a register with the signed 32-bit element of another
16304  * register and write the 64-bit result to a third register.
16305  * * SMBB32: bottom*bottom
16306  * * SMBT32: bottom*top
16307  * * SMTT32: top*top
16308  *
16309  * **Description**:\n
16310  * For the `SMBB32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
16311  * element of Rs2. It is actually an alias of `MULSR64` instruction.
16312  * For the `SMBT32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
16313  * element of Rs2.
16314  * For the `SMTT32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit element
16315  * of Rs2.
16316  * The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as
16317  * signed integers.
16318  *
16319  * **Operations**:\n
16320  * ~~~
16321  * res = Rs1.W[0] * Rs2.W[0]; // SMBB32 res = Rs1.W[0] * Rs2.w[1]; // SMBT32 res = Rs1.W[1] * Rs2.W[1];
16322  * // SMTT32 Rd = res;
16323  * ~~~
16324  *
16325  * \param [in]  a    unsigned long type of value stored in a
16326  * \param [in]  b    unsigned long type of value stored in b
16327  * \return value stored in long type
16328  */
__RV_SMBB32(unsigned long a,unsigned long b)16329 __STATIC_FORCEINLINE long __RV_SMBB32(unsigned long a, unsigned long b)
16330 {
16331     register long result;
16332     __ASM volatile("smbb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16333     return result;
16334 }
16335 /* ===== Inline Function End for 4.32.1. SMBB32 ===== */
16336 
16337 /* ===== Inline Function Start for 4.32.2. SMBT32 ===== */
16338 /**
16339  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT
16340  * \brief SMBT32 (Signed Multiply Bottom Word & Top Word)
16341  * \details
16342  * **Type**: DSP (RV64 Only)
16343  *
16344  * **Syntax**:\n
16345  * ~~~
16346  * SMBB32 Rd, Rs1, Rs2
16347  * SMBT32 Rd, Rs1, Rs2
16348  * SMTT32 Rd, Rs1, Rs2
16349  * ~~~
16350  *
16351  * **Purpose**:\n
16352  * Multiply the signed 32-bit element of a register with the signed 32-bit element of another
16353  * register and write the 64-bit result to a third register.
16354  * * SMBB32: bottom*bottom
16355  * * SMBT32: bottom*top
16356  * * SMTT32: top*top
16357  *
16358  * **Description**:\n
16359  * For the `SMBB32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
16360  * element of Rs2. It is actually an alias of `MULSR64` instruction.
16361  * For the `SMBT32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
16362  * element of Rs2.
16363  * For the `SMTT32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit element
16364  * of Rs2.
16365  * The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as
16366  * signed integers.
16367  *
16368  * **Operations**:\n
16369  * ~~~
16370  * res = Rs1.W[0] * Rs2.W[0]; // SMBB32 res = Rs1.W[0] * Rs2.w[1]; // SMBT32 res = Rs1.W[1] * Rs2.W[1];
16371  * // SMTT32 Rd = res;
16372  * ~~~
16373  *
16374  * \param [in]  a    unsigned long type of value stored in a
16375  * \param [in]  b    unsigned long type of value stored in b
16376  * \return value stored in long type
16377  */
__RV_SMBT32(unsigned long a,unsigned long b)16378 __STATIC_FORCEINLINE long __RV_SMBT32(unsigned long a, unsigned long b)
16379 {
16380     register long result;
16381     __ASM volatile("smbt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16382     return result;
16383 }
16384 /* ===== Inline Function End for 4.32.2. SMBT32 ===== */
16385 
16386 /* ===== Inline Function Start for 4.32.3. SMTT32 ===== */
16387 /**
16388  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT
16389  * \brief SMTT32 (Signed Multiply Top Word & Top Word)
16390  * \details
16391  * **Type**: DSP (RV64 Only)
16392  *
16393  * **Syntax**:\n
16394  * ~~~
16395  * SMBB32 Rd, Rs1, Rs2
16396  * SMBT32 Rd, Rs1, Rs2
16397  * SMTT32 Rd, Rs1, Rs2
16398  * ~~~
16399  *
16400  * **Purpose**:\n
16401  * Multiply the signed 32-bit element of a register with the signed 32-bit element of another
16402  * register and write the 64-bit result to a third register.
16403  * * SMBB32: bottom*bottom
16404  * * SMBT32: bottom*top
16405  * * SMTT32: top*top
16406  *
16407  * **Description**:\n
16408  * For the `SMBB32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
16409  * element of Rs2. It is actually an alias of `MULSR64` instruction.
16410  * For the `SMBT32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
16411  * element of Rs2.
16412  * For the `SMTT32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit element
16413  * of Rs2.
16414  * The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as
16415  * signed integers.
16416  *
16417  * **Operations**:\n
16418  * ~~~
16419  * res = Rs1.W[0] * Rs2.W[0]; // SMBB32 res = Rs1.W[0] * Rs2.w[1]; // SMBT32 res = Rs1.W[1] * Rs2.W[1];
16420  * // SMTT32 Rd = res;
16421  * ~~~
16422  *
16423  * \param [in]  a    unsigned long type of value stored in a
16424  * \param [in]  b    unsigned long type of value stored in b
16425  * \return value stored in long type
16426  */
__RV_SMTT32(unsigned long a,unsigned long b)16427 __STATIC_FORCEINLINE long __RV_SMTT32(unsigned long a, unsigned long b)
16428 {
16429     register long result;
16430     __ASM volatile("smtt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16431     return result;
16432 }
16433 /* ===== Inline Function End for 4.32.3. SMTT32 ===== */
16434 
16435 /* ===== Inline Function Start for 4.33.1. SMDS32 ===== */
16436 /**
16437  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
16438  * \brief SMDS32 (Signed Multiply Two Words and Subtract)
16439  * \details
16440  * **Type**: DSP (RV64 Only)
16441  *
16442  * **Syntax**:\n
16443  * ~~~
16444  * SMDS32 Rd, Rs1, Rs2
16445  * SMDRS32 Rd, Rs1, Rs2
16446  * SMXDS32 Rd, Rs1, Rs2
16447  * ~~~
16448  *
16449  * **Purpose**:\n
16450  * Do two signed 32-bit multiplications from the l 32-bit element of two registers; and then
16451  * perform a subtraction operation between the two 64-bit results.
16452  * * SMDS32: top*top - bottom*bottom
16453  * * SMDRS32: bottom*bottom - top*top
16454  * * SMXDS32: top*bottom - bottom*top
16455  *
16456  * **Description**:\n
16457  * For the `SMDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
16458  * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
16459  * Rs1 with the top 32-bit element of Rs2.
16460  * For the `SMDRS32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit
16461  * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
16462  * element of Rs1 with the bottom 32-bit element of Rs2.
16463  * For the `SMXDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
16464  * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
16465  * Rs1 with the bottom 32-bit element of Rs2.
16466  * The subtraction result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
16467  * integers.
16468  *
16469  * **Operations**:\n
16470  * ~~~
16471  * Rt = (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // SMDS32
16472  * Rt = (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // SMDRS32
16473  * Rt = (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // SMXDS32
16474  * ~~~
16475  *
16476  * \param [in]  a    unsigned long type of value stored in a
16477  * \param [in]  b    unsigned long type of value stored in b
16478  * \return value stored in long type
16479  */
__RV_SMDS32(unsigned long a,unsigned long b)16480 __STATIC_FORCEINLINE long __RV_SMDS32(unsigned long a, unsigned long b)
16481 {
16482     register long result;
16483     __ASM volatile("smds32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16484     return result;
16485 }
16486 /* ===== Inline Function End for 4.33.1. SMDS32 ===== */
16487 
16488 /* ===== Inline Function Start for 4.33.2. SMDRS32 ===== */
16489 /**
16490  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
16491  * \brief SMDRS32 (Signed Multiply Two Words and Reverse Subtract)
16492  * \details
16493  * **Type**: DSP (RV64 Only)
16494  *
16495  * **Syntax**:\n
16496  * ~~~
16497  * SMDS32 Rd, Rs1, Rs2
16498  * SMDRS32 Rd, Rs1, Rs2
16499  * SMXDS32 Rd, Rs1, Rs2
16500  * ~~~
16501  *
16502  * **Purpose**:\n
16503  * Do two signed 32-bit multiplications from the l 32-bit element of two registers; and then
16504  * perform a subtraction operation between the two 64-bit results.
16505  * * SMDS32: top*top - bottom*bottom
16506  * * SMDRS32: bottom*bottom - top*top
16507  * * SMXDS32: top*bottom - bottom*top
16508  *
16509  * **Description**:\n
16510  * For the `SMDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
16511  * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
16512  * Rs1 with the top 32-bit element of Rs2.
16513  * For the `SMDRS32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit
16514  * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
16515  * element of Rs1 with the bottom 32-bit element of Rs2.
16516  * For the `SMXDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
16517  * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
16518  * Rs1 with the bottom 32-bit element of Rs2.
16519  * The subtraction result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
16520  * integers.
16521  *
16522  * **Operations**:\n
16523  * ~~~
16524  * Rt = (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // SMDS32
16525  * Rt = (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // SMDRS32
16526  * Rt = (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // SMXDS32
16527  * ~~~
16528  *
16529  * \param [in]  a    unsigned long type of value stored in a
16530  * \param [in]  b    unsigned long type of value stored in b
16531  * \return value stored in long type
16532  */
__RV_SMDRS32(unsigned long a,unsigned long b)16533 __STATIC_FORCEINLINE long __RV_SMDRS32(unsigned long a, unsigned long b)
16534 {
16535     register long result;
16536     __ASM volatile("smdrs32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16537     return result;
16538 }
16539 /* ===== Inline Function End for 4.33.2. SMDRS32 ===== */
16540 
16541 /* ===== Inline Function Start for 4.33.3. SMXDS32 ===== */
16542 /**
16543  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
16544  * \brief SMXDS32 (Signed Crossed Multiply Two Words and Subtract)
16545  * \details
16546  * **Type**: DSP (RV64 Only)
16547  *
16548  * **Syntax**:\n
16549  * ~~~
16550  * SMDS32 Rd, Rs1, Rs2
16551  * SMDRS32 Rd, Rs1, Rs2
16552  * SMXDS32 Rd, Rs1, Rs2
16553  * ~~~
16554  *
16555  * **Purpose**:\n
16556  * Do two signed 32-bit multiplications from the l 32-bit element of two registers; and then
16557  * perform a subtraction operation between the two 64-bit results.
16558  * * SMDS32: top*top - bottom*bottom
16559  * * SMDRS32: bottom*bottom - top*top
16560  * * SMXDS32: top*bottom - bottom*top
16561  *
16562  * **Description**:\n
16563  * For the `SMDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
16564  * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
16565  * Rs1 with the top 32-bit element of Rs2.
16566  * For the `SMDRS32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit
16567  * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
16568  * element of Rs1 with the bottom 32-bit element of Rs2.
16569  * For the `SMXDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
16570  * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
16571  * Rs1 with the bottom 32-bit element of Rs2.
16572  * The subtraction result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
16573  * integers.
16574  *
16575  * **Operations**:\n
16576  * ~~~
16577  * Rt = (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // SMDS32
16578  * Rt = (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // SMDRS32
16579  * Rt = (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // SMXDS32
16580  * ~~~
16581  *
16582  * \param [in]  a    unsigned long type of value stored in a
16583  * \param [in]  b    unsigned long type of value stored in b
16584  * \return value stored in long type
16585  */
__RV_SMXDS32(unsigned long a,unsigned long b)16586 __STATIC_FORCEINLINE long __RV_SMXDS32(unsigned long a, unsigned long b)
16587 {
16588     register long result;
16589     __ASM volatile("smxds32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16590     return result;
16591 }
16592 /* ===== Inline Function End for 4.33.3. SMXDS32 ===== */
16593 
16594 /* ===== Inline Function Start for 4.34. SMIN32 ===== */
16595 /**
16596  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
16597  * \brief SMIN32 (SIMD 32-bit Signed Minimum)
16598  * \details
16599  * **Type**: SIMD (RV64 Only)
16600  *
16601  * **Syntax**:\n
16602  * ~~~
16603  * SMIN32 Rd, Rs1, Rs2
16604  * ~~~
16605  *
16606  * **Purpose**:\n
16607  * Do 32-bit signed integer elements finding minimum operations simultaneously.
16608  *
16609  * **Description**:\n
16610  * This instruction compares the 32-bit signed integer elements in Rs1 with the 32-bit
16611  * signed integer elements in Rs2 and selects the numbers that is less than the other one. The selected
16612  * results are written to Rd.
16613  *
16614  * **Operations**:\n
16615  * ~~~
16616  * Rd.W[x] = (Rs1.W[x] < Rs2.W[x])? Rs1.W[x] : Rs2.W[x];
16617  * for RV64: x=1...0
16618  * ~~~
16619  *
16620  * \param [in]  a    unsigned long type of value stored in a
16621  * \param [in]  b    unsigned long type of value stored in b
16622  * \return value stored in unsigned long type
16623  */
__RV_SMIN32(unsigned long a,unsigned long b)16624 __STATIC_FORCEINLINE unsigned long __RV_SMIN32(unsigned long a, unsigned long b)
16625 {
16626     register unsigned long result;
16627     __ASM volatile("smin32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16628     return result;
16629 }
16630 /* ===== Inline Function End for 4.34. SMIN32 ===== */
16631 
16632 /* ===== Inline Function Start for 4.35.1. SRA32 ===== */
16633 /**
16634  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
16635  * \brief SRA32 (SIMD 32-bit Shift Right Arithmetic)
16636  * \details
16637  * **Type**: SIMD (RV64 Only)
16638  *
16639  * **Syntax**:\n
16640  * ~~~
16641  * SRA32 Rd, Rs1, Rs2
16642  * SRA32.u Rd, Rs1, Rs2
16643  * ~~~
16644  *
16645  * **Purpose**:\n
16646  * Do 32-bit element arithmetic right shift operations simultaneously. The shift amount is a
16647  * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
16648  * results.
16649  *
16650  * **Description**:\n
16651  * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
16652  * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
16653  * 5-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
16654  * added to the most significant discarded bit of each 32-bit data element to calculate the final results.
16655  * And the results are written to Rd.
16656  *
16657  * **Operations**:\n
16658  * ~~~
16659  * sa = Rs2[4:0];
16660  * if (sa > 0) {
16661  *   if (`.u` form) { // SRA32.u
16662  *     res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
16663  *     Rd.W[x] = res[31:0];
16664  *   else { // SRA32
16665  *     Rd.W[x] = SE32(Rs1.W[x][31:sa])
16666  *   }
16667  * } else {
16668  *   Rd = Rs1;
16669  * }
16670  * for RV64: x=1...0
16671  * ~~~
16672  *
16673  * \param [in]  a    unsigned long type of value stored in a
16674  * \param [in]  b    unsigned int type of value stored in b
16675  * \return value stored in unsigned long type
16676  */
__RV_SRA32(unsigned long a,unsigned int b)16677 __STATIC_FORCEINLINE unsigned long __RV_SRA32(unsigned long a, unsigned int b)
16678 {
16679     register unsigned long result;
16680     __ASM volatile("sra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16681     return result;
16682 }
16683 /* ===== Inline Function End for 4.35.1. SRA32 ===== */
16684 
16685 /* ===== Inline Function Start for 4.35.2. SRA32.u ===== */
16686 /**
16687  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
16688  * \brief SRA32.u (SIMD 32-bit Rounding Shift Right Arithmetic)
16689  * \details
16690  * **Type**: SIMD (RV64 Only)
16691  *
16692  * **Syntax**:\n
16693  * ~~~
16694  * SRA32 Rd, Rs1, Rs2
16695  * SRA32.u Rd, Rs1, Rs2
16696  * ~~~
16697  *
16698  * **Purpose**:\n
16699  * Do 32-bit element arithmetic right shift operations simultaneously. The shift amount is a
16700  * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
16701  * results.
16702  *
16703  * **Description**:\n
16704  * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
16705  * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
16706  * 5-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
16707  * added to the most significant discarded bit of each 32-bit data element to calculate the final results.
16708  * And the results are written to Rd.
16709  *
16710  * **Operations**:\n
16711  * ~~~
16712  * sa = Rs2[4:0];
16713  * if (sa > 0) {
16714  *   if (`.u` form) { // SRA32.u
16715  *     res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
16716  *     Rd.W[x] = res[31:0];
16717  *   else { // SRA32
16718  *     Rd.W[x] = SE32(Rs1.W[x][31:sa])
16719  *   }
16720  * } else {
16721  *   Rd = Rs1;
16722  * }
16723  * for RV64: x=1...0
16724  * ~~~
16725  *
16726  * \param [in]  a    unsigned long type of value stored in a
16727  * \param [in]  b    unsigned int type of value stored in b
16728  * \return value stored in unsigned long type
16729  */
__RV_SRA32_U(unsigned long a,unsigned int b)16730 __STATIC_FORCEINLINE unsigned long __RV_SRA32_U(unsigned long a, unsigned int b)
16731 {
16732     register unsigned long result;
16733     __ASM volatile("sra32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16734     return result;
16735 }
16736 /* ===== Inline Function End for 4.35.2. SRA32.u ===== */
16737 
16738 /* ===== Inline Function Start for 4.36.1. SRAI32 ===== */
16739 /**
16740  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
16741  * \brief SRAI32 (SIMD 32-bit Shift Right Arithmetic Immediate)
16742  * \details
16743  * **Type**: DSP (RV64 Only)
16744  *
16745  * **Syntax**:\n
16746  * ~~~
16747  * SRAI32 Rd, Rs1, imm5u
16748  * SRAI32.u Rd, Rs1, imm5u
16749  * ~~~
16750  *
16751  * **Purpose**:\n
16752  * Do 32-bit elements arithmetic right shift operations simultaneously. The shift amount is
16753  * an immediate value. The `.u` form performs additional rounding up operations on the shifted
16754  * results.
16755  *
16756  * **Description**:\n
16757  * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
16758  * bits are filled with the sign-bit of the 32-bit data elements. The shift amount is specified by the
16759  * imm5u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most
16760  * significant discarded bit of each 32-bit data to calculate the final results. And the results are written
16761  * to Rd.
16762  *
16763  * **Operations**:\n
16764  * ~~~
16765  * sa = imm5u[4:0];
16766  *   if (sa > 0) {
16767  *   if (`.u` form) { // SRAI32.u
16768  *     res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
16769  *     Rd.W[x] = res[31:0];
16770  *   else { // SRAI32
16771  *     Rd.W[x] = SE32(Rs1.W[x][31:sa]);
16772  *   }
16773  * } else {
16774  *   Rd = Rs1;
16775  * }
16776  * for RV64: x=1...0
16777  * ~~~
16778  *
16779  * \param [in]  a    unsigned long type of value stored in a
16780  * \param [in]  b    unsigned int type of value stored in b
16781  * \return value stored in unsigned long type
16782  */
__RV_SRAI32(unsigned long a,unsigned int b)16783 __STATIC_FORCEINLINE unsigned long __RV_SRAI32(unsigned long a, unsigned int b)
16784 {
16785     register unsigned long result;
16786     __ASM volatile("srai32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16787     return result;
16788 }
16789 /* ===== Inline Function End for 4.36.1. SRAI32 ===== */
16790 
16791 /* ===== Inline Function Start for 4.36.2. SRAI32.u ===== */
16792 /**
16793  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
16794  * \brief SRAI32.u (SIMD 32-bit Rounding Shift Right Arithmetic Immediate)
16795  * \details
16796  * **Type**: DSP (RV64 Only)
16797  *
16798  * **Syntax**:\n
16799  * ~~~
16800  * SRAI32 Rd, Rs1, imm5u
16801  * SRAI32.u Rd, Rs1, imm5u
16802  * ~~~
16803  *
16804  * **Purpose**:\n
16805  * Do 32-bit elements arithmetic right shift operations simultaneously. The shift amount is
16806  * an immediate value. The `.u` form performs additional rounding up operations on the shifted
16807  * results.
16808  *
16809  * **Description**:\n
16810  * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
16811  * bits are filled with the sign-bit of the 32-bit data elements. The shift amount is specified by the
16812  * imm5u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most
16813  * significant discarded bit of each 32-bit data to calculate the final results. And the results are written
16814  * to Rd.
16815  *
16816  * **Operations**:\n
16817  * ~~~
16818  * sa = imm5u[4:0];
16819  *   if (sa > 0) {
16820  *   if (`.u` form) { // SRAI32.u
16821  *     res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
16822  *     Rd.W[x] = res[31:0];
16823  *   else { // SRAI32
16824  *     Rd.W[x] = SE32(Rs1.W[x][31:sa]);
16825  *   }
16826  * } else {
16827  *   Rd = Rs1;
16828  * }
16829  * for RV64: x=1...0
16830  * ~~~
16831  *
16832  * \param [in]  a    unsigned long type of value stored in a
16833  * \param [in]  b    unsigned int type of value stored in b
16834  * \return value stored in unsigned long type
16835  */
__RV_SRAI32_U(unsigned long a,unsigned int b)16836 __STATIC_FORCEINLINE unsigned long __RV_SRAI32_U(unsigned long a, unsigned int b)
16837 {
16838     register unsigned long result;
16839     __ASM volatile("srai32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16840     return result;
16841 }
16842 /* ===== Inline Function End for 4.36.2. SRAI32.u ===== */
16843 
16844 /* ===== Inline Function Start for 4.37. SRAIW.u ===== */
16845 /**
16846  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_NON_SIMD_32B_SHIFT
16847  * \brief SRAIW.u (Rounding Shift Right Arithmetic Immediate Word)
16848  * \details
16849  * **Type**: DSP (RV64 only)
16850  *
16851  * **Syntax**:\n
16852  * ~~~
16853  * SRAIW.u Rd, Rs1, imm5u
16854  * ~~~
16855  *
16856  * **Purpose**:\n
16857  * Perform a 32-bit arithmetic right shift operation with rounding. The shift amount is an
16858  * immediate value.
16859  *
16860  * **Description**:\n
16861  * This instruction right-shifts the lower 32-bit content of Rs1 arithmetically. The shifted
16862  * out bits are filled with the sign-bit Rs1(31) and the shift amount is specified by the imm5u constant.
16863  * For the rounding operation, a value of 1 is added to the most significant discarded bit of the data to
16864  * calculate the final result. And the result is sign-extended and written to Rd.
16865  *
16866  * **Operations**:\n
16867  * ~~~
16868  * sa = imm5u;
16869  * if (sa != 0) {
16870  *   res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1;
16871  *   Rd = SE32(res[31:0]);
16872  * } else {
16873  *   Rd = SE32(Rs1.W[0]);
16874  * }
16875  * ~~~
16876  *
16877  * \param [in]  a    int type of value stored in a
16878  * \param [in]  b    unsigned int type of value stored in b
16879  * \return value stored in long type
16880  */
__RV_SRAIW_U(int a,unsigned int b)16881 __STATIC_FORCEINLINE long __RV_SRAIW_U(int a, unsigned int b)
16882 {
16883     register long result;
16884     __ASM volatile("sraiw.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16885     return result;
16886 }
16887 /* ===== Inline Function End for 4.37. SRAIW.u ===== */
16888 
16889 /* ===== Inline Function Start for 4.38.1. SRL32 ===== */
16890 /**
16891  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
16892  * \brief SRL32 (SIMD 32-bit Shift Right Logical)
16893  * \details
16894  * **Type**: SIMD (RV64 Only)
16895  *
16896  * **Syntax**:\n
16897  * ~~~
16898  * SRL32 Rd, Rs1, Rs2
16899  * SRL32.u Rd, Rs1, Rs2
16900  * ~~~
16901  *
16902  * **Purpose**:\n
16903  * Do 32-bit element logical right shift operations simultaneously. The shift amount is a
16904  * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
16905  * results.
16906  *
16907  * **Description**:\n
16908  * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
16909  * are filled with zero. The shift amount is specified by the low-order 5-bits of the value in the Rs2
16910  * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
16911  * discarded bit of each 32-bit data element to calculate the final results. And the results are written to
16912  * Rd.
16913  *
16914  * **Operations**:\n
16915  * ~~~
16916  * sa = Rs2[4:0];
16917  * if (sa > 0) {
16918  *   if (`.u` form) { // SRA32.u
16919  *     res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1;
16920  *     Rd.W[x] = res[31:0];
16921  *   else { // SRA32
16922  *     Rd.W[x] = ZE32(Rs1.W[x][31:sa])
16923  *   }
16924  * } else {
16925  *   Rd = Rs1;
16926  * }
16927  * for RV64: x=1...0
16928  * ~~~
16929  *
16930  * \param [in]  a    unsigned long type of value stored in a
16931  * \param [in]  b    unsigned int type of value stored in b
16932  * \return value stored in unsigned long type
16933  */
__RV_SRL32(unsigned long a,unsigned int b)16934 __STATIC_FORCEINLINE unsigned long __RV_SRL32(unsigned long a, unsigned int b)
16935 {
16936     register unsigned long result;
16937     __ASM volatile("srl32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16938     return result;
16939 }
16940 /* ===== Inline Function End for 4.38.1. SRL32 ===== */
16941 
16942 /* ===== Inline Function Start for 4.38.2. SRL32.u ===== */
16943 /**
16944  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
16945  * \brief SRL32.u (SIMD 32-bit Rounding Shift Right Logical)
16946  * \details
16947  * **Type**: SIMD (RV64 Only)
16948  *
16949  * **Syntax**:\n
16950  * ~~~
16951  * SRL32 Rd, Rs1, Rs2
16952  * SRL32.u Rd, Rs1, Rs2
16953  * ~~~
16954  *
16955  * **Purpose**:\n
16956  * Do 32-bit element logical right shift operations simultaneously. The shift amount is a
16957  * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
16958  * results.
16959  *
16960  * **Description**:\n
16961  * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
16962  * are filled with zero. The shift amount is specified by the low-order 5-bits of the value in the Rs2
16963  * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
16964  * discarded bit of each 32-bit data element to calculate the final results. And the results are written to
16965  * Rd.
16966  *
16967  * **Operations**:\n
16968  * ~~~
16969  * sa = Rs2[4:0];
16970  * if (sa > 0) {
16971  *   if (`.u` form) { // SRA32.u
16972  *     res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1;
16973  *     Rd.W[x] = res[31:0];
16974  *   else { // SRA32
16975  *     Rd.W[x] = ZE32(Rs1.W[x][31:sa])
16976  *   }
16977  * } else {
16978  *   Rd = Rs1;
16979  * }
16980  * for RV64: x=1...0
16981  * ~~~
16982  *
16983  * \param [in]  a    unsigned long type of value stored in a
16984  * \param [in]  b    unsigned int type of value stored in b
16985  * \return value stored in unsigned long type
16986  */
__RV_SRL32_U(unsigned long a,unsigned int b)16987 __STATIC_FORCEINLINE unsigned long __RV_SRL32_U(unsigned long a, unsigned int b)
16988 {
16989     register unsigned long result;
16990     __ASM volatile("srl32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16991     return result;
16992 }
16993 /* ===== Inline Function End for 4.38.2. SRL32.u ===== */
16994 
16995 /* ===== Inline Function Start for 4.39.1. SRLI32 ===== */
16996 /**
16997  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
16998  * \brief SRLI32 (SIMD 32-bit Shift Right Logical Immediate)
16999  * \details
17000  * **Type**: SIMD (RV64 Only)
17001  *
17002  * **Syntax**:\n
17003  * ~~~
17004  * SRLI32 Rd, Rs1, imm5u
17005  * SRLI32.u Rd, Rs1, imm5u
17006  * ~~~
17007  *
17008  * **Purpose**:\n
17009  * Do 32-bit elements logical right shift operations simultaneously. The shift amount is an
17010  * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
17011  *
17012  * **Description**:\n
17013  * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
17014  * are filled with zero. The shift amount is specified by the imm5u constant. For the rounding
17015  * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 32-bit
17016  * data to calculate the final results. And the results are written to Rd.
17017  *
17018  * **Operations**:\n
17019  * ~~~
17020  * sa = imm5u[4:0];
17021  * if (sa > 0) {
17022  *   if (`.u` form) { // SRLI32.u
17023  *     res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1;
17024  *     Rd.W[x] = res[31:0];
17025  *   else { // SRLI32
17026  *     Rd.W[x] = ZE32(Rs1.W[x][31:sa]);
17027  *   }
17028  * } else {
17029  *   Rd = Rs1;
17030  * }
17031  * for RV64: x=1...0
17032  * ~~~
17033  *
17034  * \param [in]  a    unsigned long type of value stored in a
17035  * \param [in]  b    unsigned int type of value stored in b
17036  * \return value stored in unsigned long type
17037  */
__RV_SRLI32(unsigned long a,unsigned int b)17038 __STATIC_FORCEINLINE unsigned long __RV_SRLI32(unsigned long a, unsigned int b)
17039 {
17040     register unsigned long result;
17041     __ASM volatile("srli32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17042     return result;
17043 }
17044 /* ===== Inline Function End for 4.39.1. SRLI32 ===== */
17045 
17046 /* ===== Inline Function Start for 4.39.2. SRLI32.u ===== */
17047 /**
17048  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
17049  * \brief SRLI32.u (SIMD 32-bit Rounding Shift Right Logical Immediate)
17050  * \details
17051  * **Type**: SIMD (RV64 Only)
17052  *
17053  * **Syntax**:\n
17054  * ~~~
17055  * SRLI32 Rd, Rs1, imm5u
17056  * SRLI32.u Rd, Rs1, imm5u
17057  * ~~~
17058  *
17059  * **Purpose**:\n
17060  * Do 32-bit elements logical right shift operations simultaneously. The shift amount is an
17061  * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
17062  *
17063  * **Description**:\n
17064  * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
17065  * are filled with zero. The shift amount is specified by the imm5u constant. For the rounding
17066  * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 32-bit
17067  * data to calculate the final results. And the results are written to Rd.
17068  *
17069  * **Operations**:\n
17070  * ~~~
17071  * sa = imm5u[4:0];
17072  * if (sa > 0) {
17073  *   if (`.u` form) { // SRLI32.u
17074  *     res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1;
17075  *     Rd.W[x] = res[31:0];
17076  *   else { // SRLI32
17077  *     Rd.W[x] = ZE32(Rs1.W[x][31:sa]);
17078  *   }
17079  * } else {
17080  *   Rd = Rs1;
17081  * }
17082  * for RV64: x=1...0
17083  * ~~~
17084  *
17085  * \param [in]  a    unsigned long type of value stored in a
17086  * \param [in]  b    unsigned int type of value stored in b
17087  * \return value stored in unsigned long type
17088  */
__RV_SRLI32_U(unsigned long a,unsigned int b)17089 __STATIC_FORCEINLINE unsigned long __RV_SRLI32_U(unsigned long a, unsigned int b)
17090 {
17091     register unsigned long result;
17092     __ASM volatile("srli32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17093     return result;
17094 }
17095 /* ===== Inline Function End for 4.39.2. SRLI32.u ===== */
17096 
17097 /* ===== Inline Function Start for 4.40. STAS32 ===== */
17098 /**
17099  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17100  * \brief STAS32 (SIMD 32-bit Straight Addition & Subtraction)
17101  * \details
17102  * **Type**: SIMD (RV64 Only)
17103  *
17104  * **Syntax**:\n
17105  * ~~~
17106  * STAS32 Rd, Rs1, Rs2
17107  * ~~~
17108  *
17109  * **Purpose**:\n
17110  * Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit
17111  * chunk simultaneously. Operands are from corresponding 32-bit elements.
17112  *
17113  * **Description**:\n
17114  * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit
17115  * integer element in [63:32] of Rs2, and writes the result to [63:32] of Rd; at the same time, it subtracts
17116  * the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element in [31:0] of Rs1, and
17117  * writes the result to [31:0] of Rd.
17118  *
17119  * **Note**:\n
17120  * This instruction can be used for either signed or unsigned operations.
17121  *
17122  * **Operations**:\n
17123  * ~~~
17124  * Rd.W[1] = Rs1.W[1] + Rs2.W[1];
17125  * Rd.W[0] = Rs1.W[0] - Rs2.W[0];
17126  * ~~~
17127  *
17128  * \param [in]  a    unsigned long type of value stored in a
17129  * \param [in]  b    unsigned long type of value stored in b
17130  * \return value stored in unsigned long type
17131  */
__RV_STAS32(unsigned long a,unsigned long b)17132 __STATIC_FORCEINLINE unsigned long __RV_STAS32(unsigned long a, unsigned long b)
17133 {
17134     register unsigned long result;
17135     __ASM volatile("stas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17136     return result;
17137 }
17138 /* ===== Inline Function End for 4.40. STAS32 ===== */
17139 
17140 /* ===== Inline Function Start for 4.41. STSA32 ===== */
17141 /**
17142  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17143  * \brief STSA32 (SIMD 32-bit Straight Subtraction & Addition)
17144  * \details
17145  * **Type**: SIMD (RV64 Only)
17146  *
17147  * **Syntax**:\n
17148  * ~~~
17149  * STSA32 Rd, Rs1, Rs2
17150  * ~~~
17151  *
17152  * **Purpose**:\n
17153  * Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit
17154  * chunk simultaneously. Operands are from corresponding 32-bit elements.
17155  * *Description: *
17156  * This instruction subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer
17157  * element in [63:32] of Rs1, and writes the result to [63:32] of Rd; at the same time, it adds the 32-bit
17158  * integer element in [31:0] of Rs1 with the 32-bit integer element in [31:0] of Rs2, and writes the result
17159  * to [31:0] of Rd
17160  *
17161  * **Note**:\n
17162  * This instruction can be used for either signed or unsigned operations.
17163  *
17164  * **Operations**:\n
17165  * ~~~
17166  * Rd.W[1] = Rs1.W[1] - Rs2.W[1];
17167  * Rd.W[0] = Rs1.W[0] + Rs2.W[0];
17168  * ~~~
17169  *
17170  * \param [in]  a    unsigned long type of value stored in a
17171  * \param [in]  b    unsigned long type of value stored in b
17172  * \return value stored in unsigned long type
17173  */
__RV_STSA32(unsigned long a,unsigned long b)17174 __STATIC_FORCEINLINE unsigned long __RV_STSA32(unsigned long a, unsigned long b)
17175 {
17176     register unsigned long result;
17177     __ASM volatile("stsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17178     return result;
17179 }
17180 /* ===== Inline Function End for 4.41. STSA32 ===== */
17181 
17182 /* ===== Inline Function Start for 4.42. SUB32 ===== */
17183 /**
17184  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17185  * \brief SUB32 (SIMD 32-bit Subtraction)
17186  * \details
17187  * **Type**: DSP (RV64 Only)
17188  *
17189  * **Syntax**:\n
17190  * ~~~
17191  * SUB32 Rd, Rs1, Rs2
17192  * ~~~
17193  *
17194  * **Purpose**:\n
17195  * Do 32-bit integer element subtractions simultaneously.
17196  *
17197  * **Description**:\n
17198  * This instruction subtracts the 32-bit integer elements in Rs2 from the 32-bit integer
17199  * elements in Rs1, and then writes the results to Rd.
17200  *
17201  * **Note**:\n
17202  * This instruction can be used for either signed or unsigned subtraction.
17203  *
17204  * **Operations**:\n
17205  * ~~~
17206  * Rd.W[x] = Rs1.W[x] - Rs2.W[x];
17207  * for RV64: x=1...0
17208  * ~~~
17209  *
17210  * \param [in]  a    unsigned long type of value stored in a
17211  * \param [in]  b    unsigned long type of value stored in b
17212  * \return value stored in unsigned long type
17213  */
__RV_SUB32(unsigned long a,unsigned long b)17214 __STATIC_FORCEINLINE unsigned long __RV_SUB32(unsigned long a, unsigned long b)
17215 {
17216     register unsigned long result;
17217     __ASM volatile("sub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17218     return result;
17219 }
17220 /* ===== Inline Function End for 4.42. SUB32 ===== */
17221 
17222 /* ===== Inline Function Start for 4.43. UKADD32 ===== */
17223 /**
17224  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17225  * \brief UKADD32 (SIMD 32-bit Unsigned Saturating Addition)
17226  * \details
17227  * **Type**: SIMD (RV64 Only)
17228  *
17229  * **Syntax**:\n
17230  * ~~~
17231  * UKADD32 Rd, Rs1, Rs2
17232  * ~~~
17233  *
17234  * **Purpose**:\n
17235  * Do 32-bit unsigned integer element saturating additions simultaneously.
17236  *
17237  * **Description**:\n
17238  * This instruction adds the 32-bit unsigned integer elements in Rs1 with the 32-bit
17239  * unsigned integer elements in Rs2. If any of the results are beyond the 32-bit unsigned number
17240  * range (0 <= RES <= 2^32-1), they are saturated to the range and the OV bit is set to 1. The saturated
17241  * results are written to Rd.
17242  *
17243  * **Operations**:\n
17244  * ~~~
17245  * res[x] = Rs1.W[x] + Rs2.W[x];
17246  * if (res[x] > (2^32)-1) {
17247  *   res[x] = (2^32)-1;
17248  *   OV = 1;
17249  * }
17250  * Rd.W[x] = res[x];
17251  * for RV64: x=1...0
17252  * ~~~
17253  *
17254  * \param [in]  a    unsigned long type of value stored in a
17255  * \param [in]  b    unsigned long type of value stored in b
17256  * \return value stored in unsigned long type
17257  */
__RV_UKADD32(unsigned long a,unsigned long b)17258 __STATIC_FORCEINLINE unsigned long __RV_UKADD32(unsigned long a, unsigned long b)
17259 {
17260     register unsigned long result;
17261     __ASM volatile("ukadd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17262     return result;
17263 }
17264 /* ===== Inline Function End for 4.43. UKADD32 ===== */
17265 
17266 /* ===== Inline Function Start for 4.44. UKCRAS32 ===== */
17267 /**
17268  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17269  * \brief UKCRAS32 (SIMD 32-bit Unsigned Saturating Cross Addition & Subtraction)
17270  * \details
17271  * **Type**: SIMD (RV64 Only)
17272  *
17273  * **Syntax**:\n
17274  * ~~~
17275  * UKCRAS32 Rd, Rs1, Rs2
17276  * ~~~
17277  *
17278  * **Purpose**:\n
17279  * Do one 32-bit unsigned integer element saturating addition and one 32-bit unsigned
17280  * integer element saturating subtraction in a 64-bit chunk simultaneously. Operands are from crossed
17281  * 32-bit elements.
17282  *
17283  * **Description**:\n
17284  * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32-
17285  * bit unsigned integer element in [31:0] of Rs2; at the same time, it subtracts the 32-bit unsigned
17286  * integer element in [63:32] of Rs2 from the 32-bit unsigned integer element in [31:0] Rs1. If any of the
17287  * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the
17288  * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for addition and
17289  * [31:0] of Rd for subtraction.
17290  *
17291  * **Operations**:\n
17292  * ~~~
17293  * res1 = Rs1.W[1] + Rs2.W[0];
17294  * res2 = Rs1.W[0] - Rs2.W[1];
17295  * if (res1 > (2^32)-1) {
17296  *   res1 = (2^32)-1;
17297  *   OV = 1;
17298  * }
17299  * if (res2 < 0) {
17300  *   res2 = 0;
17301  *   OV = 1;
17302  * }
17303  * Rd.W[1] = res1;
17304  * Rd.W[0] = res2;
17305  * ~~~
17306  *
17307  * \param [in]  a    unsigned long type of value stored in a
17308  * \param [in]  b    unsigned long type of value stored in b
17309  * \return value stored in unsigned long type
17310  */
__RV_UKCRAS32(unsigned long a,unsigned long b)17311 __STATIC_FORCEINLINE unsigned long __RV_UKCRAS32(unsigned long a, unsigned long b)
17312 {
17313     register unsigned long result;
17314     __ASM volatile("ukcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17315     return result;
17316 }
17317 /* ===== Inline Function End for 4.44. UKCRAS32 ===== */
17318 
17319 /* ===== Inline Function Start for 4.45. UKCRSA32 ===== */
17320 /**
17321  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17322  * \brief UKCRSA32 (SIMD 32-bit Unsigned Saturating Cross Subtraction & Addition)
17323  * \details
17324  * **Type**: SIMD (RV64 Only)
17325  *
17326  * **Syntax**:\n
17327  * ~~~
17328  * UKCRSA32 Rd, Rs1, Rs2
17329  * ~~~
17330  *
17331  * **Purpose**:\n
17332  * Do one 32-bit unsigned integer element saturating subtraction and one 32-bit unsigned
17333  * integer element saturating addition in a 64-bit chunk simultaneously. Operands are from crossed
17334  * 32-bit elements.
17335  *
17336  * **Description**:\n
17337  * This instruction subtracts the 32-bit unsigned integer element in [31:0] of Rs2 from the
17338  * 32-bit unsigned integer element in [63:32] of Rs1; at the same time, it adds the 32-bit unsigned
17339  * integer element in [63:32] of Rs2 with the 32-bit unsigned integer element in [31:0] Rs1. If any of the
17340  * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the
17341  * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for subtraction and
17342  * [31:0] of Rd for addition.
17343  *
17344  * **Operations**:\n
17345  * ~~~
17346  * res1 = Rs1.W[1] - Rs2.W[0];
17347  * res2 = Rs1.W[0] + Rs2.W[1];
17348  * if (res1 < 0) {
17349  *   res1 = 0;
17350  *   OV = 1;
17351  * } else if (res2 > (2^32)-1) {
17352  *   res2 = (2^32)-1;
17353  *   OV = 1;
17354  * }
17355  * Rd.W[1] = res1;
17356  * Rd.W[0] = res2;
17357  * ~~~
17358  *
17359  * \param [in]  a    unsigned long type of value stored in a
17360  * \param [in]  b    unsigned long type of value stored in b
17361  * \return value stored in unsigned long type
17362  */
__RV_UKCRSA32(unsigned long a,unsigned long b)17363 __STATIC_FORCEINLINE unsigned long __RV_UKCRSA32(unsigned long a, unsigned long b)
17364 {
17365     register unsigned long result;
17366     __ASM volatile("ukcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17367     return result;
17368 }
17369 /* ===== Inline Function End for 4.45. UKCRSA32 ===== */
17370 
17371 /* ===== Inline Function Start for 4.46. UKSTAS32 ===== */
17372 /**
17373  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17374  * \brief UKSTAS32 (SIMD 32-bit Unsigned Saturating Straight Addition & Subtraction)
17375  * \details
17376  * **Type**: SIMD (RV64 Only)
17377  *
17378  * **Syntax**:\n
17379  * ~~~
17380  * UKSTAS32 Rd, Rs1, Rs2
17381  * ~~~
17382  *
17383  * **Purpose**:\n
17384  * Do one 32-bit unsigned integer element saturating addition and one 32-bit unsigned
17385  * integer element saturating subtraction in a 64-bit chunk simultaneously. Operands are from
17386  * corresponding 32-bit elements.
17387  *
17388  * **Description**:\n
17389  * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32-
17390  * bit unsigned integer element in [63:32] of Rs2; at the same time, it subtracts the 32-bit unsigned
17391  * integer element in [31:0] of Rs2 from the 32-bit unsigned integer element in [31:0] Rs1. If any of the
17392  * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the
17393  * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for addition and
17394  * [31:0] of Rd for subtraction.
17395  *
17396  * **Operations**:\n
17397  * ~~~
17398  * res1 = Rs1.W[1] + Rs2.W[1];
17399  * res2 = Rs1.W[0] - Rs2.W[0];
17400  * if (res1 > (2^32)-1) {
17401  *   res1 = (2^32)-1;
17402  *   OV = 1;
17403  * }
17404  * if (res2 < 0) {
17405  *   res2 = 0;
17406  *   OV = 1;
17407  * }
17408  * Rd.W[1] = res1;
17409  * Rd.W[0] = res2;
17410  * ~~~
17411  *
17412  * \param [in]  a    unsigned long type of value stored in a
17413  * \param [in]  b    unsigned long type of value stored in b
17414  * \return value stored in unsigned long type
17415  */
__RV_UKSTAS32(unsigned long a,unsigned long b)17416 __STATIC_FORCEINLINE unsigned long __RV_UKSTAS32(unsigned long a, unsigned long b)
17417 {
17418     register unsigned long result;
17419     __ASM volatile("ukstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17420     return result;
17421 }
17422 /* ===== Inline Function End for 4.46. UKSTAS32 ===== */
17423 
17424 /* ===== Inline Function Start for 4.47. UKSTSA32 ===== */
17425 /**
17426  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17427  * \brief UKSTSA32 (SIMD 32-bit Unsigned Saturating Straight Subtraction & Addition)
17428  * \details
17429  * **Type**: SIMD (RV64 Only)
17430  *
17431  * **Syntax**:\n
17432  * ~~~
17433  * UKSTSA32 Rd, Rs1, Rs2
17434  * ~~~
17435  *
17436  * **Purpose**:\n
17437  * Do one 32-bit unsigned integer element saturating subtraction and one 32-bit unsigned
17438  * integer element saturating addition in a 64-bit chunk simultaneously. Operands are from
17439  * corresponding 32-bit elements.
17440  *
17441  * **Description**:\n
17442  * This instruction subtracts the 32-bit unsigned integer element in [63:32] of Rs2 from
17443  * the 32-bit unsigned integer element in [63:32] of Rs1; at the same time, it adds the 32-bit unsigned
17444  * integer element in [31:0] of Rs2 with the 32-bit unsigned integer element in [31:0] Rs1. If any of the
17445  * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the
17446  * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for subtraction and
17447  * [31:0] of Rd for addition.
17448  *
17449  * **Operations**:\n
17450  * ~~~
17451  * res1 = Rs1.W[1] - Rs2.W[1];
17452  * res2 = Rs1.W[0] + Rs2.W[0];
17453  * if (res1 < 0) {
17454  *   res1 = 0;
17455  *   OV = 1;
17456  * } else if (res2 > (2^32)-1) {
17457  *   res2 = (2^32)-1;
17458  *   OV = 1;
17459  * }
17460  * Rd.W[1] = res1;
17461  * Rd.W[0] = res2;
17462  * ~~~
17463  *
17464  * \param [in]  a    unsigned long type of value stored in a
17465  * \param [in]  b    unsigned long type of value stored in b
17466  * \return value stored in unsigned long type
17467  */
__RV_UKSTSA32(unsigned long a,unsigned long b)17468 __STATIC_FORCEINLINE unsigned long __RV_UKSTSA32(unsigned long a, unsigned long b)
17469 {
17470     register unsigned long result;
17471     __ASM volatile("ukstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17472     return result;
17473 }
17474 /* ===== Inline Function End for 4.47. UKSTSA32 ===== */
17475 
17476 /* ===== Inline Function Start for 4.48. UKSUB32 ===== */
17477 /**
17478  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17479  * \brief UKSUB32 (SIMD 32-bit Unsigned Saturating Subtraction)
17480  * \details
17481  * **Type**: SIMD (RV64 Only)
17482  *
17483  * **Syntax**:\n
17484  * ~~~
17485  * UKSUB32 Rd, Rs1, Rs2
17486  * ~~~
17487  *
17488  * **Purpose**:\n
17489  * Do 32-bit unsigned integer elements saturating subtractions simultaneously.
17490  *
17491  * **Description**:\n
17492  * This instruction subtracts the 32-bit unsigned integer elements in Rs2 from the 32-bit
17493  * unsigned integer elements in Rs1. If any of the results are beyond the 32-bit unsigned number
17494  * range (0 <= RES <= 2^32-1), they are saturated to the range and the OV bit is set to 1. The saturated
17495  * results are written to Rd.
17496  *
17497  * **Operations**:\n
17498  * ~~~
17499  * res[x] = Rs1.W[x] - Rs2.W[x];
17500  * if (res[x] < 0) {
17501  *   res[x] = 0;
17502  *   OV = 1;
17503  * }
17504  * Rd.W[x] = res[x];
17505  * for RV64: x=1...0
17506  * ~~~
17507  *
17508  * \param [in]  a    unsigned long type of value stored in a
17509  * \param [in]  b    unsigned long type of value stored in b
17510  * \return value stored in unsigned long type
17511  */
__RV_UKSUB32(unsigned long a,unsigned long b)17512 __STATIC_FORCEINLINE unsigned long __RV_UKSUB32(unsigned long a, unsigned long b)
17513 {
17514     register unsigned long result;
17515     __ASM volatile("uksub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17516     return result;
17517 }
17518 /* ===== Inline Function End for 4.48. UKSUB32 ===== */
17519 
17520 /* ===== Inline Function Start for 4.49. UMAX32 ===== */
17521 /**
17522  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
17523  * \brief UMAX32 (SIMD 32-bit Unsigned Maximum)
17524  * \details
17525  * **Type**: SIMD (RV64 Only)
17526  *
17527  * **Syntax**:\n
17528  * ~~~
17529  * UMAX32 Rd, Rs1, Rs2
17530  * ~~~
17531  *
17532  * **Purpose**:\n
17533  * Do 32-bit unsigned integer elements finding maximum operations simultaneously.
17534  *
17535  * **Description**:\n
17536  * This instruction compares the 32-bit unsigned integer elements in Rs1 with the 32-bit
17537  * unsigned integer elements in Rs2 and selects the numbers that is greater than the other one. The
17538  * selected results are written to Rd.
17539  *
17540  * **Operations**:\n
17541  * ~~~
17542  * Rd.W[x] = (Rs1.W[x] u> Rs2.W[x])? Rs1.W[x] : Rs2.W[x];
17543  * for RV64: x=1...0
17544  * ~~~
17545  *
17546  * \param [in]  a    unsigned long type of value stored in a
17547  * \param [in]  b    unsigned long type of value stored in b
17548  * \return value stored in unsigned long type
17549  */
__RV_UMAX32(unsigned long a,unsigned long b)17550 __STATIC_FORCEINLINE unsigned long __RV_UMAX32(unsigned long a, unsigned long b)
17551 {
17552     register unsigned long result;
17553     __ASM volatile("umax32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17554     return result;
17555 }
17556 /* ===== Inline Function End for 4.49. UMAX32 ===== */
17557 
17558 /* ===== Inline Function Start for 4.50. UMIN32 ===== */
17559 /**
17560  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
17561  * \brief UMIN32 (SIMD 32-bit Unsigned Minimum)
17562  * \details
17563  * **Type**: SIMD (RV64 Only)
17564  *
17565  * **Syntax**:\n
17566  * ~~~
17567  * UMIN32 Rd, Rs1, Rs2
17568  * ~~~
17569  *
17570  * **Purpose**:\n
17571  * Do 32-bit unsigned integer elements finding minimum operations simultaneously.
17572  *
17573  * **Description**:\n
17574  * This instruction compares the 32-bit unsigned integer elements in Rs1 with the 32-bit
17575  * unsigned integer elements in Rs2 and selects the numbers that is less than the other one. The
17576  * selected results are written to Rd.
17577  *
17578  * **Operations**:\n
17579  * ~~~
17580  * Rd.W[x] = (Rs1.W[x] <u Rs2.W[x])? Rs1.W[x] : Rs2.W[x];
17581  * for RV64: x=1...0
17582  * ~~~
17583  *
17584  * \param [in]  a    unsigned long type of value stored in a
17585  * \param [in]  b    unsigned long type of value stored in b
17586  * \return value stored in unsigned long type
17587  */
__RV_UMIN32(unsigned long a,unsigned long b)17588 __STATIC_FORCEINLINE unsigned long __RV_UMIN32(unsigned long a, unsigned long b)
17589 {
17590     register unsigned long result;
17591     __ASM volatile("umin32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17592     return result;
17593 }
17594 /* ===== Inline Function End for 4.50. UMIN32 ===== */
17595 
17596 /* ===== Inline Function Start for 4.51. URADD32 ===== */
17597 /**
17598  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17599  * \brief URADD32 (SIMD 32-bit Unsigned Halving Addition)
17600  * \details
17601  * **Type**: SIMD (RV64 Only)
17602  *
17603  * **Syntax**:\n
17604  * ~~~
17605  * URADD32 Rd, Rs1, Rs2
17606  * ~~~
17607  *
17608  * **Purpose**:\n
17609  * Do 32-bit unsigned integer element additions simultaneously. The results are halved to
17610  * avoid overflow or saturation.
17611  *
17612  * **Description**:\n
17613  * This instruction adds the 32-bit unsigned integer elements in Rs1 with the 32-bit
17614  * unsigned integer elements in Rs2. The results are first logically right-shifted by 1 bit and then
17615  * written to Rd.
17616  *
17617  * **Examples**:\n
17618  * ~~~
17619  * * Ra = 0x7FFFFFFF, Rb = 0x7FFFFFFF Rt = 0x7FFFFFFF
17620  * * Ra = 0x80000000, Rb = 0x80000000 Rt = 0x80000000
17621  * * Ra = 0x40000000, Rb = 0x80000000 Rt = 0x60000000
17622  * ~~~
17623  *
17624  * **Operations**:\n
17625  * ~~~
17626  * Rd.W[x] = (Rs1.W[x] + Rs2.W[x]) u>> 1;
17627  * for RV64: x=1...0
17628  * ~~~
17629  *
17630  * \param [in]  a    unsigned long type of value stored in a
17631  * \param [in]  b    unsigned long type of value stored in b
17632  * \return value stored in unsigned long type
17633  */
__RV_URADD32(unsigned long a,unsigned long b)17634 __STATIC_FORCEINLINE unsigned long __RV_URADD32(unsigned long a, unsigned long b)
17635 {
17636     register unsigned long result;
17637     __ASM volatile("uradd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17638     return result;
17639 }
17640 /* ===== Inline Function End for 4.51. URADD32 ===== */
17641 
17642 /* ===== Inline Function Start for 4.52. URCRAS32 ===== */
17643 /**
17644  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17645  * \brief URCRAS32 (SIMD 32-bit Unsigned Halving Cross Addition & Subtraction)
17646  * \details
17647  * **Type**: SIMD (RV64 Only)
17648  *
17649  * **Syntax**:\n
17650  * ~~~
17651  * URCRAS32 Rd, Rs1, Rs2
17652  * ~~~
17653  *
17654  * **Purpose**:\n
17655  * Do 32-bit unsigned integer element addition and 32-bit unsigned integer element
17656  * subtraction in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The
17657  * results are halved to avoid overflow or saturation.
17658  *
17659  * **Description**:\n
17660  * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32-
17661  * bit unsigned integer element in [31:0] of Rs2, and subtracts the 32-bit unsigned integer element in
17662  * [63:32] of Rs2 from the 32-bit unsigned integer element in [31:0] of Rs1. The element results are first
17663  * logically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd for
17664  * subtraction.
17665  *
17666  * **Examples**:\n
17667  * ~~~
17668  * Please see `URADD32` and `URSUB32` instructions.
17669  * ~~~
17670  *
17671  * **Operations**:\n
17672  * ~~~
17673  * Rd.W[1] = (Rs1.W[1] + Rs2.W[0]) u>> 1;
17674  * Rd.W[0] = (Rs1.W[0] - Rs2.W[1]) u>> 1;
17675  * ~~~
17676  *
17677  * \param [in]  a    unsigned long type of value stored in a
17678  * \param [in]  b    unsigned long type of value stored in b
17679  * \return value stored in unsigned long type
17680  */
__RV_URCRAS32(unsigned long a,unsigned long b)17681 __STATIC_FORCEINLINE unsigned long __RV_URCRAS32(unsigned long a, unsigned long b)
17682 {
17683     register unsigned long result;
17684     __ASM volatile("urcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17685     return result;
17686 }
17687 /* ===== Inline Function End for 4.52. URCRAS32 ===== */
17688 
17689 /* ===== Inline Function Start for 4.53. URCRSA32 ===== */
17690 /**
17691  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17692  * \brief URCRSA32 (SIMD 32-bit Unsigned Halving Cross Subtraction & Addition)
17693  * \details
17694  * **Type**: SIMD (RV64 Only)
17695  *
17696  * **Syntax**:\n
17697  * ~~~
17698  * URCRSA32 Rd, Rs1, Rs2
17699  * ~~~
17700  *
17701  * **Purpose**:\n
17702  * Do 32-bit unsigned integer element subtraction and 32-bit unsigned integer element
17703  * addition in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The results
17704  * are halved to avoid overflow or saturation.
17705  *
17706  * **Description**:\n
17707  * This instruction subtracts the 32-bit unsigned integer element in [31:0] of Rs2 from the
17708  * 32-bit unsigned integer element in [63:32] of Rs1, and adds the 32-bit unsigned element integer in
17709  * [31:0] of Rs1 with the 32-bit unsigned integer element in [63:32] of Rs2. The two results are first
17710  * logically right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of Rd for
17711  * addition.
17712  *
17713  * **Examples**:\n
17714  * ~~~
17715  * Please see `URADD32` and `URSUB32` instructions.
17716  * ~~~
17717  *
17718  * **Operations**:\n
17719  * ~~~
17720  * Rd.W[1] = (Rs1.W[1] - Rs2.W[0]) u>> 1;
17721  * Rd.W[0] = (Rs1.W[0] + Rs2.W[1]) u>> 1;
17722  * ~~~
17723  *
17724  * \param [in]  a    unsigned long type of value stored in a
17725  * \param [in]  b    unsigned long type of value stored in b
17726  * \return value stored in unsigned long type
17727  */
__RV_URCRSA32(unsigned long a,unsigned long b)17728 __STATIC_FORCEINLINE unsigned long __RV_URCRSA32(unsigned long a, unsigned long b)
17729 {
17730     register unsigned long result;
17731     __ASM volatile("urcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17732     return result;
17733 }
17734 /* ===== Inline Function End for 4.53. URCRSA32 ===== */
17735 
17736 /* ===== Inline Function Start for 4.54. URSTAS32 ===== */
17737 /**
17738  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17739  * \brief URSTAS32 (SIMD 32-bit Unsigned Halving Straight Addition & Subtraction)
17740  * \details
17741  * **Type**: SIMD (RV64 Only)
17742  *
17743  * **Syntax**:\n
17744  * ~~~
17745  * URSTAS32 Rd, Rs1, Rs2
17746  * ~~~
17747  *
17748  * **Purpose**:\n
17749  * Do 32-bit unsigned integer element addition and 32-bit unsigned integer element
17750  * subtraction in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements.
17751  * The results are halved to avoid overflow or saturation.
17752  *
17753  * **Description**:\n
17754  * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32-
17755  * bit unsigned integer element in [63:32] of Rs2, and subtracts the 32-bit unsigned integer element in
17756  * [31:0] of Rs2 from the 32-bit unsigned integer element in [31:0] of Rs1. The element results are first
17757  * logically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd for
17758  * subtraction.
17759  *
17760  * **Examples**:\n
17761  * ~~~
17762  * Please see `URADD32` and `URSUB32` instructions.
17763  * ~~~
17764  *
17765  * **Operations**:\n
17766  * ~~~
17767  * Rd.W[1] = (Rs1.W[1] + Rs2.W[1]) u>> 1;
17768  * Rd.W[0] = (Rs1.W[0] - Rs2.W[0]) u>> 1;
17769  * ~~~
17770  *
17771  * \param [in]  a    unsigned long type of value stored in a
17772  * \param [in]  b    unsigned long type of value stored in b
17773  * \return value stored in unsigned long type
17774  */
__RV_URSTAS32(unsigned long a,unsigned long b)17775 __STATIC_FORCEINLINE unsigned long __RV_URSTAS32(unsigned long a, unsigned long b)
17776 {
17777     register unsigned long result;
17778     __ASM volatile("urstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17779     return result;
17780 }
17781 /* ===== Inline Function End for 4.54. URSTAS32 ===== */
17782 
17783 /* ===== Inline Function Start for 4.55. URSTSA32 ===== */
17784 /**
17785  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17786  * \brief URSTSA32 (SIMD 32-bit Unsigned Halving Straight Subtraction & Addition)
17787  * \details
17788  * **Type**: SIMD (RV64 Only)
17789  *
17790  * **Syntax**:\n
17791  * ~~~
17792  * URSTSA32 Rd, Rs1, Rs2
17793  * ~~~
17794  *
17795  * **Purpose**:\n
17796  * Do 32-bit unsigned integer element subtraction and 32-bit unsigned integer element
17797  * addition in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements. The
17798  * results are halved to avoid overflow or saturation.
17799  *
17800  * **Description**:\n
17801  * This instruction subtracts the 32-bit unsigned integer element in [63:32] of Rs2 from
17802  * the 32-bit unsigned integer element in [63:32] of Rs1, and adds the 32-bit unsigned element integer
17803  * in [31:0] of Rs1 with the 32-bit unsigned integer element in [31:0] of Rs2. The two results are first
17804  * logically right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of Rd for
17805  * addition.
17806  *
17807  * **Examples**:\n
17808  * ~~~
17809  * Please see `URADD32` and `URSUB32` instructions.
17810  * ~~~
17811  *
17812  * **Operations**:\n
17813  * ~~~
17814  * Rd.W[1] = (Rs1.W[1] - Rs2.W[1]) u>> 1;
17815  * Rd.W[0] = (Rs1.W[0] + Rs2.W[0]) u>> 1;
17816  * ~~~
17817  *
17818  * \param [in]  a    unsigned long type of value stored in a
17819  * \param [in]  b    unsigned long type of value stored in b
17820  * \return value stored in unsigned long type
17821  */
__RV_URSTSA32(unsigned long a,unsigned long b)17822 __STATIC_FORCEINLINE unsigned long __RV_URSTSA32(unsigned long a, unsigned long b)
17823 {
17824     register unsigned long result;
17825     __ASM volatile("urstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17826     return result;
17827 }
17828 /* ===== Inline Function End for 4.55. URSTSA32 ===== */
17829 
17830 /* ===== Inline Function Start for 4.56. URSUB32 ===== */
17831 /**
17832  * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17833  * \brief URSUB32 (SIMD 32-bit Unsigned Halving Subtraction)
17834  * \details
17835  * **Type**: SIMD (RV64 Only)
17836  *
17837  * **Syntax**:\n
17838  * ~~~
17839  * URSUB32 Rd, Rs1, Rs2
17840  * ~~~
17841  *
17842  * **Purpose**:\n
17843  * Do 32-bit unsigned integer element subtractions simultaneously. The results are halved to
17844  * avoid overflow or saturation.
17845  *
17846  * **Description**:\n
17847  * This instruction subtracts the 32-bit unsigned integer elements in Rs2 from the 32-bit
17848  * unsigned integer elements in Rs1. The results are first logically right-shifted by 1 bit and then
17849  * written to Rd.
17850  *
17851  * **Examples**:\n
17852  * ~~~
17853  * * Ra = 0x7FFFFFFF, Rb = 0x80000000, Rt = 0xFFFFFFFF
17854  * * Ra = 0x80000000, Rb = 0x7FFFFFFF, Rt = 0x00000000
17855  * * Ra = 0x80000000, Rb = 0x40000000, Rt = 0x20000000
17856  * ~~~
17857  *
17858  * **Operations**:\n
17859  * ~~~
17860  * Rd.W[x] = (Rs1.W[x] - Rs2.W[x]) u>> 1;
17861  * for RV64: x=1...0
17862  * ~~~
17863  *
17864  * \param [in]  a    unsigned long type of value stored in a
17865  * \param [in]  b    unsigned long type of value stored in b
17866  * \return value stored in unsigned long type
17867  */
__RV_URSUB32(unsigned long a,unsigned long b)17868 __STATIC_FORCEINLINE unsigned long __RV_URSUB32(unsigned long a, unsigned long b)
17869 {
17870     register unsigned long result;
17871     __ASM volatile("ursub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17872     return result;
17873 }
17874 /* ===== Inline Function End for 4.56. URSUB32 ===== */
17875 
17876 #endif /* __RISCV_XLEN == 64 */
17877 
17878 
17879 #if (__RISCV_XLEN == 32) || defined(__ONLY_FOR_DOXYGEN_DOCUMENT_GENERATION__)
17880 /* XXXXX Nuclei Extended DSP Instructions for RV32 XXXXX */
17881 /**
17882  * \defgroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM      Nuclei Customized DSP Instructions
17883  * \ingroup  NMSIS_Core_DSP_Intrinsic
17884  * \brief    (RV32 only)Nuclei Customized DSP Instructions
17885  * \details  This is Nuclei customized DSP instructions only for RV32
17886  */
17887 /* ===== Inline Function Start for A.1. DKHM8 ===== */
17888 /**
17889  * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
17890  * \brief DKHM8 (64-bit SIMD Signed Saturating Q7 Multiply)
17891  * \details
17892  * **Type**: SIMD
17893  *
17894  * **Syntax**:\n
17895  * ~~~
17896  * DKHM8 Rd, Rs1, Rs2
17897  * # Rd, Rs1, Rs2 are all even/odd pair of registers
17898  * ~~~
17899  *
17900  * **Purpose**:\n
17901  * Do Q7xQ7 element multiplications simultaneously. The Q14 results are then reduced to Q7
17902  * numbers again.
17903  *
17904  * **Description**:\n
17905  * For the `DKHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1
17906  * with the top 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
17907  * content of 16-bit chunks in Rs1 with the bottom 8-bit Q7 content of 16-bit chunks in Rs2.
17908  *
17909  * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then
17910  * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen.
17911  * The result will be saturated to 0x7F and the overflow flag OV will be set.
17912  *
17913  * **Operations**:\n
17914  * ~~~
17915  * op1t = Rs1.B[x+1]; op2t = Rs2.B[x+1]; // top
17916  * op1b = Rs1.B[x]; op2b = Rs2.B[x]; // bottom
17917  * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
17918  *   if (0x80 != aop | 0x80 != bop) {
17919  *     res = (aop s* bop) >> 7;
17920  *   } else {
17921  *     res= 0x7F;
17922  *     OV = 1;
17923  *   }
17924  * }
17925  * Rd.H[x/2] = concat(rest, resb);
17926  * for RV32, x=0,2,4,6
17927  * ~~~
17928  *
17929  * \param [in]  a unsigned long long type of value stored in a
17930  * \param [in]  b unsigned long long type of value stored in b
17931  * \return value stored in unsigned long long type
17932  */
__RV_DKHM8(unsigned long long a,unsigned long long b)17933 __STATIC_FORCEINLINE unsigned long long __RV_DKHM8(unsigned long long a, unsigned long long b)
17934 {
17935     unsigned long long result;
17936     __ASM volatile("dkhm8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17937     return result;
17938 }
17939 /* ===== Inline Function End for A.1. DKHM8 ===== */
17940 
17941 /* ===== Inline Function Start for A.2. DKHM16 ===== */
17942 /**
17943  * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
17944  * \brief DKHM16 (64-bit SIMD Signed Saturating Q15 Multiply)
17945  * \details
17946  * **Type**: SIMD
17947  *
17948  * **Syntax**:\n
17949  * ~~~
17950  * DKHM16 Rd, Rs1, Rs2
17951  * # Rd, Rs1, Rs2 are all even/odd pair of registers
17952  * ~~~
17953  *
17954  * **Purpose**:\n
17955  * Do Q15xQ15 element multiplications simultaneously. The Q30 results are then reduced to
17956  * Q15 numbers again.
17957  *
17958  * **Description**:\n
17959  * For the `DKHM16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in
17960  * Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom
17961  * 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in
17962  * Rs2.
17963  *
17964  * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are
17965  * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will
17966  * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set.
17967  *
17968  * **Operations**:\n
17969  * ~~~
17970  * op1t = Rs1.H[x+1]; op2t = Rs2.H[x+1]; // top
17971  * op1b = Rs1.H[x]; op2b = Rs2.H[x]; // bottom
17972  * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
17973  *   if (0x8000 != aop | 0x8000 != bop) {
17974  *     res = (aop s* bop) >> 15;
17975  *   } else {
17976  *     res= 0x7FFF;
17977  *     OV = 1;
17978  *   }
17979  * }
17980  * Rd.W[x/2] = concat(rest, resb);
17981  * for RV32: x=0, 2
17982  * ~~~
17983  *
17984  * \param [in]  a unsigned long long type of value stored in a
17985  * \param [in]  b unsigned long long type of value stored in b
17986  * \return value stored in unsigned long long type
17987  */
__RV_DKHM16(unsigned long long a,unsigned long long b)17988 __STATIC_FORCEINLINE unsigned long long __RV_DKHM16(unsigned long long a, unsigned long long b)
17989 {
17990     unsigned long long result;
17991     __ASM volatile("dkhm16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17992     return result;
17993 }
17994 /* ===== Inline Function End for A.2. DKHM16 ===== */
17995 
17996 /* ===== Inline Function Start for A.3. DKABS8 ===== */
17997 /**
17998  * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
17999  * \brief DKABS8 (64-bit SIMD 8-bit Saturating Absolute)
18000  * \details
18001  * **Type**: SIMD
18002  *
18003  * **Syntax**:\n
18004  * ~~~
18005  * DKABS8 Rd, Rs1
18006  * # Rd, Rs1 are all even/odd pair of registers
18007  * ~~~
18008  *
18009  * **Purpose**:\n
18010  * Get the absolute value of 8-bit signed integer elements simultaneously.
18011  *
18012  * **Description**:\n
18013  * This instruction calculates the absolute value of 8-bit signed integer elements stored
18014  * in Rs1 and writes the element results to Rd. If the input number is 0x80, this instruction generates
18015  * 0x7f as the output and sets the OV bit to 1.
18016  *
18017  * **Operations**:\n
18018  * ~~~
18019  * src = Rs1.B[x];
18020  * if (src == 0x80) {
18021  *   src = 0x7f;
18022  *   OV = 1;
18023  * } else if (src[7] == 1)
18024  *   src = -src;
18025  * }
18026  * Rd.B[x] = src;
18027  * for RV32: x=7...0,
18028  * ~~~
18029  *
18030  * \param [in]  a unsigned long long type of value stored in a
18031  * \return value stored in unsigned long long type
18032  */
__RV_DKABS8(unsigned long long a)18033 __STATIC_FORCEINLINE unsigned long long __RV_DKABS8(unsigned long long a)
18034 {
18035     unsigned long long result;
18036     __ASM volatile("dkabs8 %0, %1" : "=r"(result) : "r"(a));
18037     return result;
18038 }
18039 /* ===== Inline Function End for A.3. DKABS8 ===== */
18040 
18041 /* ===== Inline Function Start for A.4. DKABS16 ===== */
18042 /**
18043  * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
18044  * \brief DKABS16 (64-bit SIMD 16-bit Saturating Absolute)
18045  * \details
18046  * **Type**: SIMD
18047  *
18048  * **Syntax**:\n
18049  * ~~~
18050  * DKABS16 Rd, Rs1
18051  * # Rd, Rs1 are all even/odd pair of registers
18052  * ~~~
18053  *
18054  * **Purpose**:\n
18055  * Get the absolute value of 16-bit signed integer elements simultaneously.
18056  *
18057  * **Description**:\n
18058  * This instruction calculates the absolute value of 16-bit signed integer elements stored
18059  * in Rs1 and writes the element results to Rd. If the input number is 0x8000, this instruction
18060  * generates 0x7fff as the output and sets the OV bit to 1.
18061  *
18062  * **Operations**:\n
18063  * ~~~
18064  * src = Rs1.H[x];
18065  * if (src == 0x8000) {
18066  *   src = 0x7fff;
18067  *   OV = 1;
18068  * } else if (src[15] == 1)
18069  *   src = -src;
18070  * }
18071  * Rd.H[x] = src;
18072  * for RV32: x=3...0,
18073  * ~~~
18074  *
18075  * \param [in]  a unsigned long long type of value stored in a
18076  * \return value stored in unsigned long long type
18077  */
__RV_DKABS16(unsigned long long a)18078 __STATIC_FORCEINLINE unsigned long long __RV_DKABS16(unsigned long long a)
18079 {
18080     unsigned long long result;
18081     __ASM volatile("dkabs16 %0, %1" : "=r"(result) : "r"(a));
18082     return result;
18083 }
18084 /* ===== Inline Function End for A.4. DKABS16 ===== */
18085 
18086 /* ===== Inline Function Start for A.5. DKSLRA8 ===== */
18087 /**
18088  * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
18089  * \brief DKSLRA8 (64-bit SIMD 8-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
18090  * \details
18091  * **Type**: SIMD
18092  *
18093  * **Syntax**:\n
18094  * ~~~
18095  * DKSLRA8 Rd, Rs1, Rs2
18096  * # Rd, Rs1 are all even/odd pair of registers
18097  * ~~~
18098  *
18099  * **Purpose**:\n
18100  * Do 8-bit elements logical left (positive) or arithmetic right (negative) shift operation with
18101  * Q7 saturation for the left shift.
18102  *
18103  * **Description**:\n
18104  * The 8-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
18105  * based on the value of Rs2[3:0]. Rs2[3:0] is in the signed range of [-2^3, 2^3-1]. A positive Rs2[3:0] means
18106  * logical left shift and a negative Rs2[3:0] means arithmetic right shift. The shift amount is the
18107  * absolute value of Rs2[3:0]. However, the behavior of `Rs2[3:0]==-2^3 (0x8)` is defined to be
18108  * equivalent to the behavior of `Rs2[3:0]==-(2^3-1) (0x9)`.
18109  * The left-shifted results are saturated to the 8-bit signed integer range of [-2^7, 2^7-1].
18110  * If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:4] will not affect
18111  * this instruction.
18112  *
18113  * **Operations**:\n
18114  * ~~~
18115  * if (Rs2[3:0] < 0) {
18116  *   sa = -Rs2[3:0];
18117  *   sa = (sa == 8)? 7 : sa;
18118  *   Rd.B[x] = SE8(Rs1.B[x][7:sa]);
18119  * } else {
18120  *   sa = Rs2[2:0];
18121  *   res[(7+sa):0] = Rs1.B[x] <<(logic) sa;
18122  *   if (res > (2^7)-1) {
18123  *     res[7:0] = 0x7f; OV = 1;
18124  *   } else if (res < -2^7) {
18125  *     res[7:0] = 0x80; OV = 1;
18126  *   }
18127  *   Rd.B[x] = res[7:0];
18128  * }
18129  * for RV32: x=7...0,
18130  * ~~~
18131  *
18132  * \param [in]  a unsigned long long type of value stored in a
18133  * \param [in]  b int type of value stored in b
18134  * \return value stored in unsigned long long type
18135  */
__RV_DKSLRA8(unsigned long long a,int b)18136 __STATIC_FORCEINLINE unsigned long long __RV_DKSLRA8(unsigned long long a, int b)
18137 {
18138     unsigned long long result;
18139     __ASM volatile("dkslra8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
18140     return result;
18141 }
18142 /* ===== Inline Function End for A.5. DKSLRA8 ===== */
18143 
18144 /* ===== Inline Function Start for A.6. DKSLRA16 ===== */
18145 /**
18146  * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
18147  * \brief DKSLRA16 (64-bit SIMD 16-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
18148  * \details
18149  * **Type**: SIMD
18150  *
18151  * **Syntax**:\n
18152  * ~~~
18153  * DKSLRA16 Rd, Rs1, Rs2
18154  * # Rd, Rs1 are all even/odd pair of registers
18155  * ~~~
18156  *
18157  * **Purpose**:\n
18158  * Do 16-bit elements logical left (positive) or arithmetic right (negative) shift operation with
18159  * Q15 saturation for the left shift.
18160  *
18161  * **Description**:\n
18162  * The 16-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
18163  * based on the value of Rs2[4:0]. Rs2[4:0] is in the signed range of [-2^4, 2^4-1]. A positive Rs2[4:0] means
18164  * logical left shift and a negative Rs2[4:0] means arithmetic right shift. The shift amount is the
18165  * absolute value of Rs2[4:0]. However, the behavior of `Rs2[4:0]==-2^4 (0x10)` is defined to be
18166  * equivalent to the behavior of `Rs2[4:0]==-(2^4-1) (0x11)`.
18167  * The left-shifted results are saturated to the 16-bit signed integer range of [-2^15, 2^15-1].
18168  * After the shift, saturation, or rounding, the final results are written to
18169  * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:5] will not affect
18170  * this instruction.
18171  *
18172  * **Operations**:\n
18173  * ~~~
18174  * if (Rs2[4:0] < 0) {
18175  *   sa = -Rs2[4:0];
18176  *   sa = (sa == 16)? 15 : sa;
18177  *   Rd.H[x] = SE16(Rs1.H[x][15:sa]);
18178  * } else {
18179  *   sa = Rs2[3:0];
18180  *   res[(15+sa):0] = Rs1.H[x] <<(logic) sa;
18181  *   if (res > (2^15)-1) {
18182  *     res[15:0] = 0x7fff; OV = 1;
18183  *   } else if (res < -2^15) {
18184  *     res[15:0] = 0x8000; OV = 1;
18185  *   }
18186  *   d.H[x] = res[15:0];
18187  * }
18188  * for RV32: x=3...0,
18189  * ~~~
18190  *
18191  * \param [in]  a unsigned long long type of value stored in a
18192  * \param [in]  b int type of value stored in b
18193  * \return value stored in unsigned long long type
18194  */
__RV_DKSLRA16(unsigned long long a,int b)18195 __STATIC_FORCEINLINE unsigned long long __RV_DKSLRA16(unsigned long long a, int b)
18196 {
18197     unsigned long long result;
18198     __ASM volatile("dkslra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
18199     return result;
18200 }
18201 /* ===== Inline Function End for A.6. DKSLRA16 ===== */
18202 
18203 /* ===== Inline Function Start for A.7. DKADD8 ===== */
18204 /**
18205  * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
18206  * \brief DKADD8 (64-bit SIMD 8-bit Signed Saturating Addition)
18207  * \details
18208  * **Type**: SIMD
18209  *
18210  * **Syntax**:\n
18211  * ~~~
18212  * DKADD8 Rd, Rs1, Rs2
18213  * # Rd, Rs1, Rs2 are all even/odd pair of registers
18214  * ~~~
18215  *
18216  * **Purpose**:\n
18217  * Do 8-bit signed integer element saturating additions simultaneously.
18218  *
18219  * **Description**:\n
18220  * This instruction adds the 8-bit signed integer elements in Rs1 with the 8-bit signed
18221  * integer elements in Rs2. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 2^7-1), they
18222  * are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
18223  *
18224  * **Operations**:\n
18225  * ~~~
18226  * res[x] = Rs1.B[x] + Rs2.B[x];
18227  * if (res[x] > 127) {
18228  *   res[x] = 127;
18229  *   OV = 1;
18230  * } else if (res[x] < -128) {
18231  *   res[x] = -128;
18232  *   OV = 1;
18233  * }
18234  * Rd.B[x] = res[x];
18235  * for RV32: x=7...0,
18236  * ~~~
18237  *
18238  * \param [in]  a unsigned long long type of value stored in a
18239  * \param [in]  b unsigned long long type of value stored in b
18240  * \return value stored in unsigned long long type
18241  */
__RV_DKADD8(unsigned long long a,unsigned long long b)18242 __STATIC_FORCEINLINE unsigned long long __RV_DKADD8(unsigned long long a, unsigned long long b)
18243 {
18244     unsigned long long result;
18245     __ASM volatile("dkadd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
18246     return result;
18247 }
18248 /* ===== Inline Function End for A.7. DKADD8 ===== */
18249 
18250 /* ===== Inline Function Start for A.8. DKADD16 ===== */
18251 /**
18252  * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
18253  * \brief DKADD16 (64-bit SIMD 16-bit Signed Saturating Addition)
18254  * \details
18255  * **Type**: SIMD
18256  *
18257  * **Syntax**:\n
18258  * ~~~
18259  * DKADD16 Rd, Rs1, Rs2
18260  * # Rd, Rs1, Rs2 are all even/odd pair of registers
18261  * ~~~
18262  *
18263  * **Purpose**:\n
18264  * Do 16-bit signed integer element saturating additions simultaneously.
18265  *
18266  * **Description**:\n
18267  * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed
18268  * integer elements in Rs2. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1),
18269  * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
18270  *
18271  * **Operations**:\n
18272  * ~~~
18273  * res[x] = Rs1.H[x] + Rs2.H[x];
18274  * if (res[x] > 32767) {
18275  *   res[x] = 32767;
18276  *   OV = 1;
18277  * } else if (res[x] < -32768) {
18278  *   res[x] = -32768;
18279  *   OV = 1;
18280  * }
18281  * Rd.H[x] = res[x];
18282  * for RV32: x=3...0,
18283  * ~~~
18284  *
18285  * \param [in]  a unsigned long long type of value stored in a
18286  * \param [in]  b unsigned long long type of value stored in b
18287  * \return value stored in unsigned long long type
18288  */
__RV_DKADD16(unsigned long long a,unsigned long long b)18289 __STATIC_FORCEINLINE unsigned long long __RV_DKADD16(unsigned long long a, unsigned long long b)
18290 {
18291     unsigned long long result;
18292     __ASM volatile("dkadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
18293     return result;
18294 }
18295 /* ===== Inline Function End for A.8. DKADD16 ===== */
18296 
18297 /* ===== Inline Function Start for A.10. DKSUB8 ===== */
18298 /**
18299  * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
18300  * \brief DKSUB8 (64-bit SIMD 8-bit Signed Saturating Subtraction)
18301  * \details
18302  * **Type**: SIMD
18303  *
18304  * **Syntax**:\n
18305  * ~~~
18306  * DKSUB8 Rd, Rs1, Rs2
18307  * # Rd, Rs1, Rs2 are all even/odd pair of registers
18308  * ~~~
18309  *
18310  * **Purpose**:\n
18311  * Do 8-bit signed elements saturating subtractions simultaneously.
18312  *
18313  * **Description**:\n
18314  * This instruction subtracts the 8-bit signed integer elements in Rs2 from the 8-bit
18315  * signed integer elements in Rs1. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 2^7-1),
18316  * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
18317  *
18318  * **Operations**:\n
18319  * ~~~
18320  * res[x] = Rs1.B[x] - Rs2.B[x];
18321  * if (res[x] > (2^7)-1) {
18322  *   res[x] = (2^7)-1;
18323  *   OV = 1;
18324  * } else if (res[x] < -2^7) {
18325  *   res[x] = -2^7;
18326  *   OV = 1;
18327  * }
18328  * Rd.B[x] = res[x];
18329  * for RV32: x=7...0,
18330  * ~~~
18331  *
18332  * \param [in]  a unsigned long long type of value stored in a
18333  * \param [in]  b unsigned long long type of value stored in b
18334  * \return value stored in unsigned long long type
18335  */
__RV_DKSUB8(unsigned long long a,unsigned long long b)18336 __STATIC_FORCEINLINE unsigned long long __RV_DKSUB8(unsigned long long a, unsigned long long b)
18337 {
18338     unsigned long long result;
18339     __ASM volatile("dksub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
18340     return result;
18341 }
18342 /* ===== Inline Function End for A.9. DKSUB8 ===== */
18343 
18344 /* ===== Inline Function Start for A.10. DKSUB16 ===== */
18345 /**
18346  * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
18347  * \brief DKSUB16 (64-bit SIMD 16-bit Signed Saturating Subtraction)
18348  * \details
18349  * **Type**: SIMD
18350  *
18351  * **Syntax**:\n
18352  * ~~~
18353  * DKSUB16 Rd, Rs1, Rs2
18354  * # Rd, Rs1, Rs2 are all even/odd pair of registers
18355  * ~~~
18356  *
18357  * **Purpose**:\n
18358  * Do 16-bit signed integer elements saturating subtractions simultaneously.
18359  *
18360  * **Description**:\n
18361  * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit
18362  * signed integer elements in Rs1. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <=
18363  * 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to
18364  * Rd.
18365  *
18366  * **Operations**:\n
18367  * ~~~
18368  * res[x] = Rs1.H[x] - Rs2.H[x];
18369  * if (res[x] > (2^15)-1) {
18370  *   res[x] = (2^15)-1;
18371  *   OV = 1;
18372  * } else if (res[x] < -2^15) {
18373  *   res[x] = -2^15;
18374  *   OV = 1;
18375  * }
18376  * Rd.H[x] = res[x];
18377  * for RV32: x=3...0,
18378  * ~~~
18379  *
18380  * \param [in]  a unsigned long long type of value stored in a
18381  * \param [in]  b unsigned long long type of value stored in b
18382  * \return value stored in unsigned long long type
18383  */
__RV_DKSUB16(unsigned long long a,unsigned long long b)18384 __STATIC_FORCEINLINE unsigned long long __RV_DKSUB16(unsigned long long a, unsigned long long b)
18385 {
18386     unsigned long long result;
18387     __ASM volatile("dksub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
18388     return result;
18389 }
18390 /* ===== Inline Function End for A.10. DKSUB16 ===== */
18391 
18392 /* ===== Inline Function Start for A.11.1. EXPD80 ===== */
18393 /**
18394  * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
18395  * \brief EXPD80 (Expand and Copy Byte 0 to 32bit)
18396  * \details
18397  * **Type**: DSP
18398  *
18399  * **Syntax**:\n
18400  * ~~~
18401  * EXPD80 Rd, Rs1
18402  * ~~~
18403  *
18404  * **Purpose**:\n
18405  * Copy 8-bit data from 32-bit chunks into 4 bytes in a register.
18406  *
18407  * **Description**:\n
18408  * Moves Rs1.B[0][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
18409  *
18410  * **Operations**:\n
18411  * ~~~
18412  * Rd.W[x][31:0] = CONCAT(Rs1.B[0][7:0], Rs1.B[0][7:0], Rs1.B[0][7:0], Rs1.B[0][7:0]);
18413  * for RV32: x=0
18414  * ~~~
18415  *
18416  * \param [in]  a unsigned long type of value stored in a
18417  * \return value stored in unsigned long type
18418  */
__RV_EXPD80(unsigned long a)18419 __STATIC_FORCEINLINE unsigned long __RV_EXPD80(unsigned long a)
18420 {
18421     unsigned long result;
18422     __ASM volatile("expd80 %0, %1" : "=r"(result) : "r"(a));
18423     return result;
18424 }
18425 /* ===== Inline Function End for A11.1. EXPD80 ===== */
18426 
18427 /* ===== Inline Function Start for A.11.2. EXPD81 ===== */
18428 /**
18429  * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
18430  * \brief EXPD81 (Expand and Copy Byte 1 to 32bit)
18431  * \details
18432  * **Type**: DSP
18433  *
18434  * **Syntax**:\n
18435  * ~~~
18436  * EXPD81 Rd, Rs1
18437  * ~~~
18438  *
18439  * **Purpose**:\n
18440  * Copy 8-bit data from 32-bit chunks into 4 bytes in a register.
18441  *
18442  * **Description**:\n
18443  * Moves Rs1.B[1][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
18444  *
18445  * **Operations**:\n
18446  * ~~~
18447  * Rd.W[x][31:0] = CONCAT(Rs1.B[1][7:0], Rs1.B[1][7:0], Rs1.B[1][7:0], Rs1.B[1][7:0]);
18448  * for RV32: x=0
18449  * ~~~
18450  *
18451  * \param [in]  a unsigned long type of value stored in a
18452  * \return value stored in unsigned long type
18453  */
__RV_EXPD81(unsigned long a)18454 __STATIC_FORCEINLINE unsigned long __RV_EXPD81(unsigned long a)
18455 {
18456     unsigned long result;
18457     __ASM volatile("expd81 %0, %1" : "=r"(result) : "r"(a));
18458     return result;
18459 }
18460 /* ===== Inline Function End for A11.2. EXPD81 ===== */
18461 
18462 /* ===== Inline Function Start for A.11.3. EXPD82 ===== */
18463 /**
18464  * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
18465  * \brief EXPD82 (Expand and Copy Byte 2 to 32bit)
18466  * \details
18467  * **Type**: DSP
18468  *
18469  * **Syntax**:\n
18470  * ~~~
18471  * EXPD82 Rd, Rs1
18472  * ~~~
18473  *
18474  * **Purpose**:\n
18475  * Copy 8-bit data from 32-bit chunks into 4 bytes in a register.
18476  *
18477  * **Description**:\n
18478  * Moves Rs1.B[2][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
18479  *
18480  * **Operations**:\n
18481  * ~~~
18482  * Rd.W[x][31:0] = CONCAT(Rs1.B[2][7:0], Rs1.B[2][7:0], Rs1.B[2][7:0], Rs1.B[2][7:0]);
18483  * for RV32: x=0
18484  * ~~~
18485  *
18486  * \param [in]  a unsigned long type of value stored in a
18487  * \return value stored in unsigned long type
18488  */
__RV_EXPD82(unsigned long a)18489 __STATIC_FORCEINLINE unsigned long __RV_EXPD82(unsigned long a)
18490 {
18491     unsigned long result;
18492     __ASM volatile("expd82 %0, %1" : "=r"(result) : "r"(a));
18493     return result;
18494 }
18495 /* ===== Inline Function End for A11.3. EXPD82 ===== */
18496 
18497 /* ===== Inline Function Start for A.11.4. EXPD83 ===== */
18498 /**
18499  * \ingroup  NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
18500  * \brief EXPD83 (Expand and Copy Byte 3 to 32bit)
18501  * \details
18502  * **Type**: DSP
18503  *
18504  * **Syntax**:\n
18505  * ~~~
18506  * EXPD83 Rd, Rs1
18507  * ~~~
18508  *
18509  * **Purpose**:\n
18510  * Copy 8-bit data from 32-bit chunks into 4 bytes in a register.
18511  *
18512  * **Description**:\n
18513  * Moves Rs1.B[3][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
18514  *
18515  * **Operations**:\n
18516  * ~~~
18517  * Rd.W[x][31:0] = CONCAT(Rs1.B[3][7:0], Rs1.B[3][7:0], Rs1.B[3][7:0], Rs1.B[3][7:0]);
18518  * for RV32: x=0
18519  * ~~~
18520  *
18521  * \param [in]  a unsigned long type of value stored in a
18522  * \return value stored in unsigned long type
18523  */
__RV_EXPD83(unsigned long a)18524 __STATIC_FORCEINLINE unsigned long __RV_EXPD83(unsigned long a)
18525 {
18526     unsigned long result;
18527     __ASM volatile("expd83 %0, %1" : "=r"(result) : "r"(a));
18528     return result;
18529 }
18530 /* ===== Inline Function End for A11.4. EXPD83 ===== */
18531 #endif /* __RISCV_XLEN == 32 */
18532 
18533 #if defined(__RISCV_FEATURE_DSP) && (__RISCV_FEATURE_DSP == 1)
18534 /* XXXXX ARM Compatiable SIMD API XXXXX */
18535 /** \brief Q setting quad 8-bit saturating addition. */
18536 #define __QADD8(x, y)               __RV_KADD8(x, y)
18537 /** \brief Q setting quad 8-bit saturating subtract. */
18538 #define __QSUB8(x, y)               __RV_KSUB8((x), (y))
18539 /** \brief Q setting dual 16-bit saturating addition. */
18540 #define __QADD16(x, y)              __RV_KADD16((x), (y))
18541 /** \brief Dual 16-bit signed addition with halved results. */
18542 #define __SHADD16(x, y)             __RV_RADD16((x), (y))
18543 /** \brief Q setting dual 16-bit saturating subtract. */
18544 #define __QSUB16(x, y)              __RV_KSUB16((x), (y))
18545 /** \brief Dual 16-bit signed subtraction with halved results. */
18546 #define __SHSUB16(x, y)             __RV_RSUB16((x), (y))
18547 /** \brief Q setting dual 16-bit add and subtract with exchange. */
18548 #define __QASX(x, y)                __RV_KCRAS16((x), (y))
18549 /** \brief Dual 16-bit signed addition and subtraction with halved results.*/
18550 #define __SHASX(x, y)               __RV_RCRAS16((x), (y))
18551 /** \brief Q setting dual 16-bit subtract and add with exchange. */
18552 #define __QSAX(x, y)                __RV_KCRSA16((x), (y))
18553 /** \brief Dual 16-bit signed subtraction and addition with halved results.*/
18554 #define __SHSAX(x, y)               __RV_RCRSA16((x), (y))
18555 /** \brief Dual 16-bit signed multiply with exchange returning difference. */
18556 #define __SMUSDX(x, y)              __RV_SMXDS((y), (x))
18557 /** \brief Q setting sum of dual 16-bit signed multiply with exchange. */
__SMUADX(int32_t op1,int32_t op2)18558 __STATIC_FORCEINLINE int32_t __SMUADX (int32_t op1, int32_t op2)
18559 {
18560     return (int32_t)__RV_KMXDA(op1, op2);
18561 }
18562 /** \brief Q setting saturating add. */
18563 #define __QADD(x, y)                __RV_KADDW((x), (y))
18564 /** \brief Q setting saturating subtract. */
18565 #define __QSUB(x, y)                __RV_KSUBW((x), (y))
18566 /** \brief Q setting dual 16-bit signed multiply with single 32-bit accumulator. */
__SMLAD(int32_t op1,int32_t op2,int32_t op3)18567 __STATIC_FORCEINLINE int32_t __SMLAD(int32_t op1, int32_t op2, int32_t op3)
18568 {
18569     return (int32_t)__RV_KMADA(op3, op1, op2);
18570 }
18571 /** \brief Q setting pre-exchanged dual 16-bit signed multiply with single 32-bit accumulator.  */
__SMLADX(int32_t op1,int32_t op2,int32_t op3)18572 __STATIC_FORCEINLINE int32_t __SMLADX(int32_t op1, int32_t op2, int32_t op3)
18573 {
18574     return (int32_t)__RV_KMAXDA(op3, op1, op2);
18575 }
18576 /** \brief Q setting dual 16-bit signed multiply with exchange subtract with 32-bit accumulate.  */
__SMLSDX(int32_t op1,int32_t op2,int32_t op3)18577 __STATIC_FORCEINLINE int32_t __SMLSDX(int32_t op1, int32_t op2, int32_t op3)
18578 {
18579     return (op3 - (int32_t)__RV_SMXDS(op1, op2));
18580 }
18581 /** \brief Dual 16-bit signed multiply with single 64-bit accumulator. */
__SMLALD(int32_t op1,int32_t op2,int64_t acc)18582 __STATIC_FORCEINLINE int64_t __SMLALD(int32_t op1, int32_t op2, int64_t acc)
18583 {
18584     return (int64_t)__RV_SMALDA(acc, op1, op2);
18585 }
18586 /** \brief Dual 16-bit signed multiply with exchange with single 64-bit accumulator.  */
__SMLALDX(int32_t op1,int32_t op2,int64_t acc)18587 __STATIC_FORCEINLINE int64_t __SMLALDX(int32_t op1, int32_t op2, int64_t acc)
18588 {
18589     return (int64_t)__RV_SMALXDA(acc, op1, op2);
18590 }
18591 /** \brief Q setting sum of dual 16-bit signed multiply. */
__SMUAD(int32_t op1,int32_t op2)18592 __STATIC_FORCEINLINE int32_t __SMUAD(int32_t op1, int32_t op2)
18593 {
18594     return (int32_t)__RV_KMDA(op1, op2);
18595 }
18596 /** \brief Dual 16-bit signed multiply returning difference. */
__SMUSD(int32_t op1,int32_t op2)18597 __STATIC_FORCEINLINE int32_t __SMUSD(int32_t op1, int32_t op2)
18598 {
18599     return (int32_t)__RV_SMDRS(op1, op2);
18600 }
18601 /** \brief Dual extract 8-bits and sign extend each to 16-bits. */
18602 #define __SXTB16(x)             __RV_SUNPKD820(x)
18603 /** \brief Dual extracted 8-bit to 16-bit signed addition. TODO Need test */
__SXTAB16(uint32_t op1,uint32_t op2)18604 __STATIC_FORCEINLINE int32_t __SXTAB16(uint32_t op1, uint32_t op2)
18605 {
18606     return __RV_ADD16(op1, __RV_SUNPKD830(op2));
18607 }
18608 /** \brief 32-bit signed multiply with 32-bit truncated accumulator. */
__SMMLA(int32_t op1,int32_t op2,int32_t op3)18609 __STATIC_FORCEINLINE int32_t __SMMLA(int32_t op1, int32_t op2, int32_t op3)
18610 {
18611     int32_t mul;
18612     mul = (int32_t)__RV_SMMUL(op1, op2);
18613     return (op3 + mul);
18614 }
18615 #define __DKHM8                 __RV_DKHM8
18616 #define __DKHM16                __RV_DKHM16
18617 #define __DKSUB16               __RV_DKSUB16
18618 #define __SMAQA                 __RV_SMAQA
18619 #define __MULSR64               __RV_MULSR64
18620 #define __DQADD8                __RV_DKADD8
18621 #define __DQSUB8                __RV_DKSUB8
18622 #define __DKADD16               __RV_DKADD16
18623 #define __PKBB16                __RV_PKBB16
18624 #define __DKSLRA16              __RV_DKSLRA16
18625 #define __DKSLRA8               __RV_DKSLRA8
18626 #define __KABSW                 __RV_KABSW
18627 #define __DKABS8                __RV_DKABS8
18628 #define __DKABS16               __RV_DKABS16
18629 #define __SMALDA                __RV_SMALDA
18630 #define __SMSLDA                __RV_SMSLDA
18631 #define __SMALBB                __RV_SMALBB
18632 #define __SUB64                 __RV_SUB64
18633 #define __ADD64                 __RV_ADD64
18634 #define __SMBB16                __RV_SMBB16
18635 #define __SMBT16                __RV_SMBT16
18636 #define __SMTT16                __RV_SMTT16
18637 #define __EXPD80                __RV_EXPD80
18638 #define __SMAX8                 __RV_SMAX8
18639 #define __SMAX16                __RV_SMAX16
18640 #define __PKTT16                __RV_PKTT16
18641 #define __KADD16                __RV_KADD16
18642 #define __SADD16                __RV_ADD16
18643 
18644 #endif /* (__RISCV_FEATURE_DSP == 1) */
18645 
18646 #endif /* defined(__DSP_PRESENT) && (__DSP_PRESENT == 1) */
18647 
18648 /** \brief Halfword packing instruction. Combines bits[15:0] of val1 with bits[31:16] of val2 levitated with the val3. */
18649 #define __PKHBT(ARG1,ARG2,ARG3)          ( ((((uint32_t)(ARG1))          ) & 0x0000FFFFUL) |  \
18650                                            ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL)  )
18651 /** \brief Halfword packing instruction. Combines bits[31:16] of val1 with bits[15:0] of val2 right-shifted with the val3. */
18652 #define __PKHTB(ARG1,ARG2,ARG3)          ( ((((uint32_t)(ARG1))          ) & 0xFFFF0000UL) |  \
18653                                            ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL)  )
18654 
18655 #ifdef __cplusplus
18656 }
18657 #endif
18658 
18659 #endif /* __CORE_FEATURE_DSP__ */
18660