1 /*
2 * Copyright (c) 2019 Nuclei Limited. All rights reserved.
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * Licensed under the Apache License, Version 2.0 (the License); you may
7 * not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
14 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18 #ifndef __CORE_FEATURE_DSP__
19 #define __CORE_FEATURE_DSP__
20
21 /*!
22 * @file core_feature_dsp.h
23 * @brief DSP feature API header file for Nuclei N/NX Core
24 */
25 /*
26 * DSP Feature Configuration Macro:
27 * 1. __DSP_PRESENT: Define whether Digital Signal Processing Unit(DSP) is present or not
28 * * 0: Not present
29 * * 1: Present
30 */
31 #ifdef __cplusplus
32 extern "C" {
33 #endif
34
35 #if defined(__DSP_PRESENT) && (__DSP_PRESENT == 1)
36
37 /* ########################### CPU SIMD DSP Intrinsic Functions ########################### */
38 /**
39 * \defgroup NMSIS_Core_DSP_Intrinsic Intrinsic Functions for SIMD Instructions
40 * \ingroup NMSIS_Core
41 * \brief Functions that generate RISC-V DSP SIMD instructions.
42 * \details
43 *
44 * The following functions generate specified RISC-V SIMD instructions that cannot be directly accessed by compiler.
45 * * **DSP ISA Extension Instruction Summary**
46 * + **Shorthand Definitions**
47 * - r.H == rH1: r[31:16], r.L == r.H0: r[15:0]
48 * - r.B3: r[31:24], r.B2: r[23:16], r.B1: r[15:8], r.B0: r[7:0]
49 * - r.B[x]: r[(x*8+7):(x*8+0)]
50 * - r.H[x]: r[(x*16+7):(x*16+0)]
51 * - r.W[x]: r[(x*32+31):(x*32+0)]
52 * - r[xU]: the upper 32-bit of a 64-bit number; xU represents the GPR number that contains this upper part 32-bit value.
53 * - r[xL]: the lower 32-bit of a 64-bit number; xL represents the GPR number that contains this lower part 32-bit value.
54 * - r[xU].r[xL]: a 64-bit number that is formed from a pair of GPRs.
55 * - s>>: signed arithmetic right shift:
56 * - u>>: unsigned logical right shift
57 * - SAT.Qn(): Saturate to the range of [-2^n, 2^n-1], if saturation happens, set PSW.OV.
58 * - SAT.Um(): Saturate to the range of [0, 2^m-1], if saturation happens, set PSW.OV.
59 * - RUND(): Indicate `rounding`, i.e., add 1 to the most significant discarded bit for right shift or MSW-type multiplication instructions.
60 * - Sign or Zero Extending functions:
61 * - SEm(data): Sign-Extend data to m-bit.:
62 * - ZEm(data): Zero-Extend data to m-bit.
63 * - ABS(x): Calculate the absolute value of `x`.
64 * - CONCAT(x,y): Concatinate `x` and `y` to form a value.
65 * - u<: Unsinged less than comparison.
66 * - u<=: Unsinged less than & equal comparison.
67 * - u>: Unsinged greater than comparison.
68 * - s*: Signed multiplication.
69 * - u*: Unsigned multiplication.
70 *
71 * @{
72 */
73 /** @} */ /* End of Doxygen Group NMSIS_Core_DSP_Intrinsic */
74
75
76 /**
77 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS SIMD Data Processing Instructions
78 * \ingroup NMSIS_Core_DSP_Intrinsic
79 * \brief SIMD Data Processing Instructions
80 * \details
81 */
82
83 /**
84 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB SIMD 16-bit Add/Subtract Instructions
85 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
86 * \brief SIMD 16-bit Add/Subtract Instructions
87 * \details
88 * Based on the combination of the types of the two 16-bit arithmetic operations, the SIMD 16-bit
89 * add/subtract instructions can be classified into 6 main categories: Addition (two 16-bit addition),
90 * Subtraction (two 16-bit subtraction), Crossed Add & Sub (one addition and one subtraction), and
91 * Crossed Sub & Add (one subtraction and one addition), Straight Add & Sub (one addition and one
92 * subtraction), and Straight Sub & Add (one subtraction and one addition).
93 * Based on the way of how an overflow condition is handled, the SIMD 16-bit add/subtract
94 * instructions can be classified into 5 groups: Wrap-around (dropping overflow), Signed Halving
95 * (keeping overflow by dropping 1 LSB bit), Unsigned Halving, Signed Saturation (clipping overflow),
96 * and Unsigned Saturation.
97 * Together, there are 30 SIMD 16-bit add/subtract instructions.
98 */
99
100 /**
101 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB SIMD 8-bit Addition & Subtraction Instructions
102 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
103 * \brief SIMD 8-bit Addition & Subtraction Instructions
104 * \details
105 * Based on the types of the four 8-bit arithmetic operations, the SIMD 8-bit add/subtract instructions
106 * can be classified into 2 main categories: Addition (four 8-bit addition), and Subtraction (four 8-bit
107 * subtraction).
108 * Based on the way of how an overflow condition is handled for signed or unsigned operation, the
109 * SIMD 8-bit add/subtract instructions can be classified into 5 groups: Wrap-around (dropping
110 * overflow), Signed Halving (keeping overflow by dropping 1 LSB bit), Unsigned Halving, Signed
111 * Saturation (clipping overflow), and Unsigned Saturation.
112 * Together, there are 10 SIMD 8-bit add/subtract instructions.
113 */
114
115 /**
116 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT SIMD 16-bit Shift Instructions
117 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
118 * \brief SIMD 16-bit Shift Instructions
119 * \details
120 * there are 14 SIMD 16-bit shift instructions.
121 */
122
123 /**
124 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT SIMD 8-bit Shift Instructions
125 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
126 * \brief SIMD 8-bit Shift Instructions
127 * \details
128 * there are 14 SIMD 8-bit shift instructions.
129 */
130
131 /**
132 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP SIMD 16-bit Compare Instructions
133 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
134 * \brief SIMD 16-bit Compare Instructions
135 * \details
136 * there are 5 SIMD 16-bit Compare instructions.
137 */
138
139 /**
140 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP SIMD 8-bit Compare Instructions
141 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
142 * \brief SIMD 8-bit Compare Instructions
143 * \details
144 * there are 5 SIMD 8-bit Compare instructions.
145 */
146
147 /**
148 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY SIMD 16-bit Multiply Instructions
149 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
150 * \brief SIMD 16-bit Multiply Instructions
151 * \details
152 * there are 6 SIMD 16-bit Multiply instructions.
153 */
154
155 /**
156 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY SIMD 8-bit Multiply Instructions
157 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
158 * \brief SIMD 8-bit Multiply Instructions
159 * \details
160 * there are 6 SIMD 8-bit Multiply instructions.
161 */
162
163 /**
164 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC SIMD 16-bit Miscellaneous Instructions
165 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
166 * \brief SIMD 16-bit Miscellaneous Instructions
167 * \details
168 * there are 10 SIMD 16-bit Misc instructions.
169 */
170
171 /**
172 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC SIMD 8-bit Miscellaneous Instructions
173 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
174 * \brief SIMD 8-bit Miscellaneous Instructions
175 * \details
176 * there are 10 SIMD 8-bit Miscellaneous instructions.
177 */
178
179 /**
180 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK SIMD 8-bit Unpacking Instructions
181 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
182 * \brief SIMD 8-bit Unpacking Instructions
183 * \details
184 * there are 8 SIMD 8-bit Unpacking instructions.
185 */
186
187 /**
188 * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD Non-SIMD Instructions
189 * \ingroup NMSIS_Core_DSP_Intrinsic
190 * \brief Non-SIMD Instructions
191 * \details
192 */
193
194 /**
195 * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU Non-SIMD Q15 saturation ALU Instructions
196 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD
197 * \brief Non-SIMD Q15 saturation ALU Instructions
198 * \details
199 * there are 7 Non-SIMD Q15 saturation ALU Instructions
200 */
201
202 /**
203 * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU Non-SIMD Q31 saturation ALU Instructions
204 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD
205 * \brief Non-SIMD Q31 saturation ALU Instructions
206 * \details
207 * there are Non-SIMD Q31 saturation ALU Instructions
208 */
209
210 /**
211 * \defgroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION 32-bit Computation Instructions
212 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD
213 * \brief 32-bit Computation Instructions
214 * \details
215 * there are 8 32-bit Computation Instructions
216 */
217
218 /**
219 * \defgroup NMSIS_Core_DSP_Intrinsic_OV_FLAG_SC OV (Overflow) flag Set/Clear Instructions
220 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD
221 * \brief OV (Overflow) flag Set/Clear Instructions
222 * \details
223 * The following table lists the user instructions related to Overflow (OV) flag manipulation. there are 2 OV (Overflow) flag Set/Clear Instructions
224 */
225
226 /**
227 * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC Non-SIMD Miscellaneous Instructions
228 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD
229 * \brief Non-SIMD Miscellaneous Instructions
230 * \details
231 * There are 13 Miscellaneous Instructions here.
232 */
233
234 /**
235 * \defgroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS Partial-SIMD Data Processing Instructions
236 * \ingroup NMSIS_Core_DSP_Intrinsic
237 * \brief Partial-SIMD Data Processing Instructions
238 * \details
239 */
240
241 /**
242 * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK SIMD 16-bit Packing Instructions
243 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
244 * \brief SIMD 16-bit Packing Instructions
245 * \details
246 * there are 4 SIMD16-bit Packing Instructions.
247 */
248
249 /**
250 * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC Signed MSW 32x32 Multiply and Add Instructions
251 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
252 * \brief Signed MSW 32x32 Multiply and Add Instructions
253 * \details
254 * there are 8 Signed MSW 32x32 Multiply and Add Instructions
255 */
256
257 /**
258 * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC Signed MSW 32x16 Multiply and Add Instructions
259 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
260 * \brief Signed MSW 32x16 Multiply and Add Instructions
261 * \details
262 * there are 15 Signed MSW 32x16 Multiply and Add Instructions
263 */
264
265 /**
266 * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB Signed 16-bit Multiply 32-bit Add/Subtract Instructions
267 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
268 * \brief Signed 16-bit Multiply 32-bit Add/Subtract Instructions
269 * \details
270 * there are 18 Signed 16-bit Multiply 32-bit Add/Subtract Instructions
271 */
272
273 /**
274 * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB Signed 16-bit Multiply 64-bit Add/Subtract Instructions
275 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
276 * \brief Signed 16-bit Multiply 64-bit Add/Subtract Instructions
277 * \details
278 * there is Signed 16-bit Multiply 64-bit Add/Subtract Instructions
279 */
280
281 /**
282 * \defgroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC Partial-SIMD Miscellaneous Instructions
283 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
284 * \brief Partial-SIMD Miscellaneous Instructions
285 * \details
286 * there are 7 Partial-SIMD Miscellaneous Instructions
287 */
288
289 /**
290 * \defgroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD 8-bit Multiply with 32-bit Add Instructions
291 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
292 * \brief 8-bit Multiply with 32-bit Add Instructions
293 * \details
294 * there are 3 8-bit Multiply with 32-bit Add Instructions
295 */
296
297 /**
298 * \defgroup NMSIS_Core_DSP_Intrinsic_64B_PROFILE 64-bit Profile Instructions
299 * \ingroup NMSIS_Core_DSP_Intrinsic
300 * \brief 64-bit Profile Instructions
301 * \details
302 */
303
304 /**
305 * \defgroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB 64-bit Addition & Subtraction Instructions
306 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_PROFILE
307 * \brief 64-bit Addition & Subtraction Instructions
308 * \details
309 * there are 10 64-bit Addition & Subtraction Instructions.
310 */
311
312 /**
313 * \defgroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB 32-bit Multiply with 64-bit Add/Subtract Instructions
314 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_PROFILE
315 * \brief 32-bit Multiply with 64-bit Add/Subtract Instructions
316 * \details
317 * there are 32-bit Multiply 64-bit Add/Subtract Instructions
318 */
319
320 /**
321 * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB Signed 16-bit Multiply with 64-bit Add/Subtract Instructions
322 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_PROFILE
323 * \brief Signed 16-bit Multiply with 64-bit Add/Subtract Instructions
324 * \details
325 * there are 10 Signed 16-bit Multiply with 64-bit Add/Subtract Instructions
326 */
327
328 /**
329 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY RV64 Only Instructions
330 * \ingroup NMSIS_Core_DSP_Intrinsic
331 * \brief RV64 Only Instructions
332 * \details
333 */
334
335 /**
336 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB (RV64 Only) SIMD 32-bit Add/Subtract Instructions
337 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
338 * \brief (RV64 Only) SIMD 32-bit Add/Subtract Instructions
339 * \details
340 * The following tables list instructions that are only present in RV64.
341 * There are 30 SIMD 32-bit addition or subtraction instructions.there are 4 SIMD16-bit Packing Instructions.
342 */
343
344 /**
345 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT (RV64 Only) SIMD 32-bit Shift Instructions
346 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
347 * \brief (RV64 Only) SIMD 32-bit Shift Instructions
348 * \details
349 * there are 14 (RV64 Only) SIMD 32-bit Shift Instructions
350 */
351
352 /**
353 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC (RV64 Only) SIMD 32-bit Miscellaneous Instructions
354 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
355 * \brief (RV64 Only) SIMD 32-bit Miscellaneous Instructions
356 * \details
357 * there are 5 (RV64 Only) SIMD 32-bit Miscellaneous Instructions
358 */
359
360 /**
361 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT (RV64 Only) SIMD Q15 Saturating Multiply Instructions
362 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
363 * \brief (RV64 Only) SIMD Q15 Saturating Multiply Instructions
364 * \details
365 * there are 9 (RV64 Only) SIMD Q15 saturating Multiply Instructions
366 */
367
368 /**
369 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT (RV64 Only) 32-bit Multiply Instructions
370 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
371 * \brief (RV64 Only) 32-bit Multiply Instructions
372 * \details
373 * there is 3 RV64 Only) 32-bit Multiply Instructions
374 */
375
376 /**
377 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD (RV64 Only) 32-bit Multiply & Add Instructions
378 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
379 * \brief (RV64 Only) 32-bit Multiply & Add Instructions
380 * \details
381 * there are 3 (RV64 Only) 32-bit Multiply & Add Instructions
382 */
383
384 /**
385 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC (RV64 Only) 32-bit Parallel Multiply & Add Instructions
386 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
387 * \brief (RV64 Only) 32-bit Parallel Multiply & Add Instructions
388 * \details
389 * there are 12 (RV64 Only) 32-bit Parallel Multiply & Add Instructions
390 */
391
392 /**
393 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_NON_SIMD_32B_SHIFT (RV64 Only) Non-SIMD 32-bit Shift Instructions
394 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
395 * \brief (RV64 Only) Non-SIMD 32-bit Shift Instructions
396 * \details
397 * there are 1 (RV64 Only) Non-SIMD 32-bit Shift Instructions
398 */
399
400 /**
401 * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK 32-bit Packing Instructions
402 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
403 * \brief 32-bit Packing Instructions
404 * \details
405 * There are four 32-bit packing instructions here
406 */
407
408 /* ===== Inline Function Start for 3.1. ADD8 ===== */
409 /**
410 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
411 * \brief ADD8 (SIMD 8-bit Addition)
412 * \details
413 * **Type**: SIMD
414 *
415 * **Syntax**:\n
416 * ~~~
417 * ADD8 Rd, Rs1, Rs2
418 * ~~~
419 *
420 * **Purpose**:\n
421 * Do 8-bit integer element additions simultaneously.
422 *
423 * **Description**:\n
424 * This instruction adds the 8-bit integer elements in Rs1 with the 8-bit integer elements
425 * in Rs2, and then writes the 8-bit element results to Rd.
426 *
427 * **Note**:\n
428 * This instruction can be used for either signed or unsigned addition.
429 *
430 * **Operations**:\n
431 * ~~~
432 * Rd.B[x] = Rs1.B[x] + Rs2.B[x];
433 * for RV32: x=3...0,
434 * for RV64: x=7...0
435 * ~~~
436 *
437 * \param [in] a unsigned long type of value stored in a
438 * \param [in] b unsigned long type of value stored in b
439 * \return value stored in unsigned long type
440 */
__RV_ADD8(unsigned long a,unsigned long b)441 __STATIC_FORCEINLINE unsigned long __RV_ADD8(unsigned long a, unsigned long b)
442 {
443 register unsigned long result;
444 __ASM volatile("add8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
445 return result;
446 }
447 /* ===== Inline Function End for 3.1. ADD8 ===== */
448
449 /* ===== Inline Function Start for 3.2. ADD16 ===== */
450 /**
451 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
452 * \brief ADD16 (SIMD 16-bit Addition)
453 * \details
454 * **Type**: SIMD
455 *
456 * **Syntax**:\n
457 * ~~~
458 * ADD16 Rd, Rs1, Rs2
459 * ~~~
460 *
461 * **Purpose**:\n
462 * Do 16-bit integer element additions simultaneously.
463 *
464 * **Description**:\n
465 * This instruction adds the 16-bit integer elements in Rs1 with the 16-bit integer
466 * elements in Rs2, and then writes the 16-bit element results to Rd.
467 *
468 * **Note**:\n
469 * This instruction can be used for either signed or unsigned addition.
470 *
471 * **Operations**:\n
472 * ~~~
473 * Rd.H[x] = Rs1.H[x] + Rs2.H[x];
474 * for RV32: x=1...0,
475 * for RV64: x=3...0
476 * ~~~
477 *
478 * \param [in] a unsigned long type of value stored in a
479 * \param [in] b unsigned long type of value stored in b
480 * \return value stored in unsigned long type
481 */
__RV_ADD16(unsigned long a,unsigned long b)482 __STATIC_FORCEINLINE unsigned long __RV_ADD16(unsigned long a, unsigned long b)
483 {
484 register unsigned long result;
485 __ASM volatile("add16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
486 return result;
487 }
488 /* ===== Inline Function End for 3.2. ADD16 ===== */
489
490 /* ===== Inline Function Start for 3.3. ADD64 ===== */
491 /**
492 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
493 * \brief ADD64 (64-bit Addition)
494 * \details
495 * **Type**: 64-bit Profile
496 *
497 * **Syntax**:\n
498 * ~~~
499 * ADD64 Rd, Rs1, Rs2
500 * ~~~
501 *
502 * **Purpose**:\n
503 * Add two 64-bit signed or unsigned integers.
504 *
505 * **RV32 Description**:\n
506 * This instruction adds the 64-bit integer of an even/odd pair of registers specified
507 * by Rs1(4,1) with the 64-bit integer of an even/odd pair of registers specified by Rs2(4,1), and then
508 * writes the 64-bit result to an even/odd pair of registers specified by Rd(4,1).
509 * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
510 * pair includes register 2d and 2d+1.
511 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
512 * of the pair contains the low 32-bit of the result.
513 *
514 * **RV64 Description**:\n
515 * This instruction has the same behavior as the ADD instruction in RV64I.
516 *
517 * **Note**:\n
518 * This instruction can be used for either signed or unsigned addition.
519 *
520 * **Operations**:\n
521 * ~~~
522 * RV32:
523 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
524 * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
525 * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
526 * R[t_H].R[t_L] = R[a_H].R[a_L] + R[b_H].R[b_L];
527 * RV64:
528 * Rd = Rs1 + Rs2;
529 * ~~~
530 *
531 * \param [in] a unsigned long long type of value stored in a
532 * \param [in] b unsigned long long type of value stored in b
533 * \return value stored in unsigned long long type
534 */
__RV_ADD64(unsigned long long a,unsigned long long b)535 __STATIC_FORCEINLINE unsigned long long __RV_ADD64(unsigned long long a, unsigned long long b)
536 {
537 register unsigned long long result;
538 __ASM volatile("add64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
539 return result;
540 }
541 /* ===== Inline Function End for 3.3. ADD64 ===== */
542
543 /* ===== Inline Function Start for 3.4. AVE ===== */
544 /**
545 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
546 * \brief AVE (Average with Rounding)
547 * \details
548 * **Type**: DSP
549 *
550 * **Syntax**:\n
551 * ~~~
552 * AVE Rd, Rs1, Rs2
553 * ~~~
554 *
555 * **Purpose**:\n
556 * Calculate the average of the contents of two general registers.
557 *
558 * **Description**:\n
559 * This instruction calculates the average value of two signed integers stored in Rs1 and
560 * Rs2, rounds up a half-integer result to the nearest integer, and writes the result to Rd.
561 *
562 * **Operations**:\n
563 * ~~~
564 * Sum = CONCAT(Rs1[MSB],Rs1[MSB:0]) + CONCAT(Rs2[MSB],Rs2[MSB:0]) + 1;
565 * Rd = Sum[(MSB+1):1];
566 * for RV32: MSB=31,
567 * for RV64: MSB=63
568 * ~~~
569 *
570 * \param [in] a long type of value stored in a
571 * \param [in] b long type of value stored in b
572 * \return value stored in long type
573 */
__RV_AVE(long a,long b)574 __STATIC_FORCEINLINE long __RV_AVE(long a, long b)
575 {
576 register long result;
577 __ASM volatile("ave %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
578 return result;
579 }
580 /* ===== Inline Function End for 3.4. AVE ===== */
581
582 /* ===== Inline Function Start for 3.5. BITREV ===== */
583 /**
584 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
585 * \brief BITREV (Bit Reverse)
586 * \details
587 * **Type**: DSP
588 *
589 * **Syntax**:\n
590 * ~~~
591 * BITREV Rd, Rs1, Rs2
592 * ~~~
593 *
594 * **Purpose**:\n
595 * Reverse the bit positions of the source operand within a specified width starting from bit
596 * 0. The reversed width is a variable from a GPR.
597 *
598 * **Description**:\n
599 * This instruction reverses the bit positions of the content of Rs1. The reversed bit width
600 * is calculated as Rs2[4:0]+1 (RV32) or Rs2[5:0]+1 (RV64). The upper bits beyond the reversed width
601 * are filled with zeros. After the bit reverse operation, the result is written to Rd.
602 *
603 * **Operations**:\n
604 * ~~~
605 * msb = Rs2[4:0]; (for RV32)
606 * msb = Rs2[5:0]; (for RV64)
607 * rev[0:msb] = Rs1[msb:0];
608 * Rd = ZE(rev[msb:0]);
609 * ~~~
610 *
611 * \param [in] a unsigned long type of value stored in a
612 * \param [in] b unsigned long type of value stored in b
613 * \return value stored in unsigned long type
614 */
__RV_BITREV(unsigned long a,unsigned long b)615 __STATIC_FORCEINLINE unsigned long __RV_BITREV(unsigned long a, unsigned long b)
616 {
617 register unsigned long result;
618 __ASM volatile("bitrev %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
619 return result;
620 }
621 /* ===== Inline Function End for 3.5. BITREV ===== */
622
623 /* ===== Inline Function Start for 3.6. BITREVI ===== */
624 /**
625 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
626 * \brief BITREVI (Bit Reverse Immediate)
627 * \details
628 * **Type**: DSP
629 *
630 * **Syntax**:\n
631 * ~~~
632 * (RV32) BITREVI Rd, Rs1, imm[4:0]
633 * (RV64) BITREVI Rd, Rs1, imm[5:0]
634 * ~~~
635 *
636 * **Purpose**:\n
637 * Reverse the bit positions of the source operand within a specified width starting from bit
638 * 0. The reversed width is an immediate value.
639 *
640 * **Description**:\n
641 * This instruction reverses the bit positions of the content of Rs1. The reversed bit width
642 * is calculated as imm[4:0]+1 (RV32) or imm[5:0]+1 (RV64). The upper bits beyond the reversed width
643 * are filled with zeros. After the bit reverse operation, the result is written to Rd.
644 *
645 * **Operations**:\n
646 * ~~~
647 * msb = imm[4:0]; (RV32)
648 * msb = imm[5:0]; (RV64)
649 * rev[0:msb] = Rs1[msb:0];
650 * Rd = ZE(rev[msb:0]);
651 * ~~~
652 *
653 * \param [in] a unsigned long type of value stored in a
654 * \param [in] b unsigned long type of value stored in b
655 * \return value stored in unsigned long type
656 */
657 #define __RV_BITREVI(a, b) \
658 ({ \
659 register unsigned long result; \
660 register unsigned long __a = (unsigned long)(a); \
661 __ASM volatile("bitrevi %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
662 result; \
663 })
664 /* ===== Inline Function End for 3.6. BITREVI ===== */
665
666 /* ===== Inline Function Start for 3.7. BPICK ===== */
667 /**
668 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
669 * \brief BPICK (Bit-wise Pick)
670 * \details
671 * **Type**: DSP
672 *
673 * **Syntax**:\n
674 * ~~~
675 * BPICK Rd, Rs1, Rs2, Rc
676 * ~~~
677 *
678 * **Purpose**:\n
679 * Select from two source operands based on a bit mask in the third operand.
680 *
681 * **Description**:\n
682 * This instruction selects individual bits from Rs1 or Rs2, based on the bit mask value in
683 * Rc. If a bit in Rc is 1, the corresponding bit is from Rs1; otherwise, the corresponding bit is from Rs2.
684 * The selection results are written to Rd.
685 *
686 * **Operations**:\n
687 * ~~~
688 * Rd[x] = Rc[x]? Rs1[x] : Rs2[x];
689 * for RV32, x=31...0
690 * for RV64, x=63...0
691 * ~~~
692 *
693 * \param [in] a unsigned long type of value stored in a
694 * \param [in] b unsigned long type of value stored in b
695 * \param [in] c unsigned long type of value stored in c
696 * \return value stored in unsigned long type
697 */
__RV_BPICK(unsigned long a,unsigned long b,unsigned long c)698 __STATIC_FORCEINLINE unsigned long __RV_BPICK(unsigned long a, unsigned long b, unsigned long c)
699 {
700 register unsigned long result;
701 __ASM volatile("bpick %0, %1, %2, %3" : "=r"(result) : "r"(a), "r"(b), "r"(c));
702 return result;
703 }
704 /* ===== Inline Function End for 3.7. BPICK ===== */
705
706 /* ===== Inline Function Start for 3.8. CLROV ===== */
707 /**
708 * \ingroup NMSIS_Core_DSP_Intrinsic_OV_FLAG_SC
709 * \brief CLROV (Clear OV flag)
710 * \details
711 * **Type**: DSP
712 *
713 * **Syntax**:\n
714 * ~~~
715 * CLROV # pseudo mnemonic
716 * ~~~
717 *
718 * **Purpose**:\n
719 * This pseudo instruction is an alias to `CSRRCI x0, ucode, 1` instruction.
720 *
721 *
722 */
__RV_CLROV(void)723 __STATIC_FORCEINLINE void __RV_CLROV(void)
724 {
725 __ASM volatile("clrov ");
726 }
727 /* ===== Inline Function End for 3.8. CLROV ===== */
728
729 /* ===== Inline Function Start for 3.9. CLRS8 ===== */
730 /**
731 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
732 * \brief CLRS8 (SIMD 8-bit Count Leading Redundant Sign)
733 * \details
734 * **Type**: SIMD
735 *
736 * **Syntax**:\n
737 * ~~~
738 * CLRS8 Rd, Rs1
739 * ~~~
740 *
741 * **Purpose**:\n
742 * Count the number of redundant sign bits of the 8-bit elements of a general register.
743 *
744 * **Description**:\n
745 * Starting from the bits next to the sign bits of the 8-bit elements of Rs1, this instruction
746 * counts the number of redundant sign bits and writes the result to the corresponding 8-bit elements
747 * of Rd.
748 *
749 * **Operations**:\n
750 * ~~~
751 * snum[x] = Rs1.B[x];
752 * cnt[x] = 0;
753 * for (i = 6 to 0) {
754 * if (snum[x](i) == snum[x](7)) {
755 * cnt[x] = cnt[x] + 1;
756 * } else {
757 * break;
758 * }
759 * }
760 * Rd.B[x] = cnt[x];
761 * for RV32: x=3...0
762 * for RV64: x=7...0
763 * ~~~
764 *
765 * \param [in] a unsigned long type of value stored in a
766 * \return value stored in unsigned long type
767 */
__RV_CLRS8(unsigned long a)768 __STATIC_FORCEINLINE unsigned long __RV_CLRS8(unsigned long a)
769 {
770 register unsigned long result;
771 __ASM volatile("clrs8 %0, %1" : "=r"(result) : "r"(a));
772 return result;
773 }
774 /* ===== Inline Function End for 3.9. CLRS8 ===== */
775
776 /* ===== Inline Function Start for 3.10. CLRS16 ===== */
777 /**
778 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
779 * \brief CLRS16 (SIMD 16-bit Count Leading Redundant Sign)
780 * \details
781 * **Type**: SIMD
782 *
783 * **Syntax**:\n
784 * ~~~
785 * CLRS16 Rd, Rs1
786 * ~~~
787 *
788 * **Purpose**:\n
789 * Count the number of redundant sign bits of the 16-bit elements of a general register.
790 *
791 * **Description**:\n
792 * Starting from the bits next to the sign bits of the 16-bit elements of Rs1, this
793 * instruction counts the number of redundant sign bits and writes the result to the corresponding 16-
794 * bit elements of Rd.
795 *
796 * **Operations**:\n
797 * ~~~
798 * snum[x] = Rs1.H[x];
799 * cnt[x] = 0;
800 * for (i = 14 to 0) {
801 * if (snum[x](i) == snum[x](15)) {
802 * cnt[x] = cnt[x] + 1;
803 * } else {
804 * break;
805 * }
806 * }
807 * Rd.H[x] = cnt[x];
808 * for RV32: x=1...0
809 * for RV64: x=3...0
810 * ~~~
811 *
812 * \param [in] a unsigned long type of value stored in a
813 * \return value stored in unsigned long type
814 */
__RV_CLRS16(unsigned long a)815 __STATIC_FORCEINLINE unsigned long __RV_CLRS16(unsigned long a)
816 {
817 register unsigned long result;
818 __ASM volatile("clrs16 %0, %1" : "=r"(result) : "r"(a));
819 return result;
820 }
821 /* ===== Inline Function End for 3.10. CLRS16 ===== */
822
823 /* ===== Inline Function Start for 3.11. CLRS32 ===== */
824 /**
825 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
826 * \brief CLRS32 (SIMD 32-bit Count Leading Redundant Sign)
827 * \details
828 * **Type**: SIMD
829 *
830 * **Syntax**:\n
831 * ~~~
832 * CLRS32 Rd, Rs1
833 * ~~~
834 *
835 * **Purpose**:\n
836 * Count the number of redundant sign bits of the 32-bit elements of a general register.
837 *
838 * **Description**:\n
839 * Starting from the bits next to the sign bits of the 32-bit elements of Rs1, this
840 * instruction counts the number of redundant sign bits and writes the result to the corresponding 32-
841 * bit elements of Rd.
842 *
843 * **Operations**:\n
844 * ~~~
845 * snum[x] = Rs1.W[x];
846 * cnt[x] = 0;
847 * for (i = 30 to 0) {
848 * if (snum[x](i) == snum[x](31)) {
849 * cnt[x] = cnt[x] + 1;
850 * } else {
851 * break;
852 * }
853 * }
854 * Rd.W[x] = cnt[x];
855 * for RV32: x=0
856 * for RV64: x=1...0
857 * ~~~
858 *
859 * \param [in] a unsigned long type of value stored in a
860 * \return value stored in unsigned long type
861 */
__RV_CLRS32(unsigned long a)862 __STATIC_FORCEINLINE unsigned long __RV_CLRS32(unsigned long a)
863 {
864 register unsigned long result;
865 __ASM volatile("clrs32 %0, %1" : "=r"(result) : "r"(a));
866 return result;
867 }
868 /* ===== Inline Function End for 3.11. CLRS32 ===== */
869
870 /* ===== Inline Function Start for 3.12. CLO8 ===== */
871 /**
872 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
873 * \brief CLO8 (SIMD 8-bit Count Leading One)
874 * \details
875 * **Type**: SIMD
876 *
877 * **Syntax**:\n
878 * ~~~
879 * CLO8 Rd, Rs1
880 * ~~~
881 *
882 * **Purpose**:\n
883 * Count the number of leading one bits of the 8-bit elements of a general register.
884 *
885 * **Description**:\n
886 * Starting from the most significant bits of the 8-bit elements of Rs1, this instruction
887 * counts the number of leading one bits and writes the results to the corresponding 8-bit elements of
888 * Rd.
889 *
890 * **Operations**:\n
891 * ~~~
892 * snum[x] = Rs1.B[x];
893 * cnt[x] = 0;
894 * for (i = 7 to 0) {
895 * if (snum[x](i) == 1) {
896 * cnt[x] = cnt[x] + 1;
897 * } else {
898 * break;
899 * }
900 * }
901 * Rd.B[x] = cnt[x];
902 * for RV32: x=3...0
903 * for RV64: x=7...0
904 * ~~~
905 *
906 * \param [in] a unsigned long type of value stored in a
907 * \return value stored in unsigned long type
908 */
__RV_CLO8(unsigned long a)909 __STATIC_FORCEINLINE unsigned long __RV_CLO8(unsigned long a)
910 {
911 register unsigned long result;
912 __ASM volatile("clo8 %0, %1" : "=r"(result) : "r"(a));
913 return result;
914 }
915 /* ===== Inline Function End for 3.12. CLO8 ===== */
916
917 /* ===== Inline Function Start for 3.13. CLO16 ===== */
918 /**
919 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
920 * \brief CLO16 (SIMD 16-bit Count Leading One)
921 * \details
922 * **Type**: SIMD
923 *
924 * **Syntax**:\n
925 * ~~~
926 * CLO16 Rd, Rs1
927 * ~~~
928 *
929 * **Purpose**:\n
930 * Count the number of leading one bits of the 16-bit elements of a general register.
931 *
932 * **Description**:\n
933 * Starting from the most significant bits of the 16-bit elements of Rs1, this instruction
934 * counts the number of leading one bits and writes the results to the corresponding 16-bit elements
935 * of Rd.
936 *
937 * **Operations**:\n
938 * ~~~
939 * snum[x] = Rs1.H[x];
940 * cnt[x] = 0;
941 * for (i = 15 to 0) {
942 * if (snum[x](i) == 1) {
943 * cnt[x] = cnt[x] + 1;
944 * } else {
945 * break;
946 * }
947 * }
948 * Rd.H[x] = cnt[x];
949 * for RV32: x=1...0
950 * for RV64: x=3...0
951 * ~~~
952 *
953 * \param [in] a unsigned long type of value stored in a
954 * \return value stored in unsigned long type
955 */
__RV_CLO16(unsigned long a)956 __STATIC_FORCEINLINE unsigned long __RV_CLO16(unsigned long a)
957 {
958 register unsigned long result;
959 __ASM volatile("clo16 %0, %1" : "=r"(result) : "r"(a));
960 return result;
961 }
962 /* ===== Inline Function End for 3.13. CLO16 ===== */
963
964 /* ===== Inline Function Start for 3.14. CLO32 ===== */
965 /**
966 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
967 * \brief CLO32 (SIMD 32-bit Count Leading One)
968 * \details
969 * **Type**: SIMD
970 *
971 * **Syntax**:\n
972 * ~~~
973 * CLO32 Rd, Rs1
974 * ~~~
975 *
976 * **Purpose**:\n
977 * Count the number of leading one bits of the 32-bit elements of a general register.
978 *
979 * **Description**:\n
980 * Starting from the most significant bits of the 32-bit elements of Rs1, this instruction
981 * counts the number of leading one bits and writes the results to the corresponding 32-bit elements
982 * of Rd.
983 *
984 * **Operations**:\n
985 * ~~~
986 * snum[x] = Rs1.W[x];
987 * cnt[x] = 0;
988 * for (i = 31 to 0) {
989 * if (snum[x](i) == 1) {
990 * cnt[x] = cnt[x] + 1;
991 * } else {
992 * break;
993 * }
994 * }
995 * Rd.W[x] = cnt[x];
996 * for RV32: x=0
997 * for RV64: x=1...0
998 * ~~~
999 *
1000 * \param [in] a unsigned long type of value stored in a
1001 * \return value stored in unsigned long type
1002 */
__RV_CLO32(unsigned long a)1003 __STATIC_FORCEINLINE unsigned long __RV_CLO32(unsigned long a)
1004 {
1005 register unsigned long result;
1006 __ASM volatile("clo32 %0, %1" : "=r"(result) : "r"(a));
1007 return result;
1008 }
1009 /* ===== Inline Function End for 3.14. CLO32 ===== */
1010
1011 /* ===== Inline Function Start for 3.15. CLZ8 ===== */
1012 /**
1013 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
1014 * \brief CLZ8 (SIMD 8-bit Count Leading Zero)
1015 * \details
1016 * **Type**: SIMD
1017 *
1018 * **Syntax**:\n
1019 * ~~~
1020 * CLZ8 Rd, Rs1
1021 * ~~~
1022 *
1023 * **Purpose**:\n
1024 * Count the number of leading zero bits of the 8-bit elements of a general register.
1025 *
1026 * **Description**:\n
1027 * Starting from the most significant bits of the 8-bit elements of Rs1, this instruction
1028 * counts the number of leading zero bits and writes the results to the corresponding 8-bit elements of
1029 * Rd.
1030 *
1031 * **Operations**:\n
1032 * ~~~
1033 * snum[x] = Rs1.B[x];
1034 * cnt[x] = 0;
1035 * for (i = 7 to 0) {
1036 * if (snum[x](i) == 0) {
1037 * cnt[x] = cnt[x] + 1;
1038 * } else {
1039 * break;
1040 * }
1041 * }
1042 * Rd.B[x] = cnt[x];
1043 * for RV32: x=3...0
1044 * for RV64: x=7...0
1045 * ~~~
1046 *
1047 * \param [in] a unsigned long type of value stored in a
1048 * \return value stored in unsigned long type
1049 */
__RV_CLZ8(unsigned long a)1050 __STATIC_FORCEINLINE unsigned long __RV_CLZ8(unsigned long a)
1051 {
1052 register unsigned long result;
1053 __ASM volatile("clz8 %0, %1" : "=r"(result) : "r"(a));
1054 return result;
1055 }
1056 /* ===== Inline Function End for 3.15. CLZ8 ===== */
1057
1058 /* ===== Inline Function Start for 3.16. CLZ16 ===== */
1059 /**
1060 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
1061 * \brief CLZ16 (SIMD 16-bit Count Leading Zero)
1062 * \details
1063 * **Type**: SIMD
1064 *
1065 * **Syntax**:\n
1066 * ~~~
1067 * CLZ16 Rd, Rs1
1068 * ~~~
1069 *
1070 * **Purpose**:\n
1071 * Count the number of leading zero bits of the 16-bit elements of a general register.
1072 *
1073 * **Description**:\n
1074 * Starting from the most significant bits of the 16-bit elements of Rs1, this instruction
1075 * counts the number of leading zero bits and writes the results to the corresponding 16-bit elements
1076 * of Rd.
1077 *
1078 * **Operations**:\n
1079 * ~~~
1080 * snum[x] = Rs1.H[x];
1081 * cnt[x] = 0;
1082 * for (i = 15 to 0) {
1083 * if (snum[x](i) == 0) {
1084 * cnt[x] = cnt[x] + 1;
1085 * } else {
1086 * break;
1087 * }
1088 * }
1089 * Rd.H[x] = cnt[x];
1090 * for RV32: x=1...0
1091 * for RV64: x=3...0
1092 * ~~~
1093 *
1094 * \param [in] a unsigned long type of value stored in a
1095 * \return value stored in unsigned long type
1096 */
__RV_CLZ16(unsigned long a)1097 __STATIC_FORCEINLINE unsigned long __RV_CLZ16(unsigned long a)
1098 {
1099 register unsigned long result;
1100 __ASM volatile("clz16 %0, %1" : "=r"(result) : "r"(a));
1101 return result;
1102 }
1103 /* ===== Inline Function End for 3.16. CLZ16 ===== */
1104
1105 /* ===== Inline Function Start for 3.17. CLZ32 ===== */
1106 /**
1107 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
1108 * \brief CLZ32 (SIMD 32-bit Count Leading Zero)
1109 * \details
1110 * **Type**: SIMD
1111 *
1112 * **Syntax**:\n
1113 * ~~~
1114 * CLZ32 Rd, Rs1
1115 * ~~~
1116 *
1117 * **Purpose**:\n
1118 * Count the number of leading zero bits of the 32-bit elements of a general register.
1119 *
1120 * **Description**:\n
1121 * Starting from the most significant bits of the 32-bit elements of Rs1, this instruction
1122 * counts the number of leading zero bits and writes the results to the corresponding 32-bit elements
1123 * of Rd.
1124 *
1125 * **Operations**:\n
1126 * ~~~
1127 * snum[x] = Rs1.W[x];
1128 * cnt[x] = 0;
1129 * for (i = 31 to 0) {
1130 * if (snum[x](i) == 0) {
1131 * cnt[x] = cnt[x] + 1;
1132 * } else {
1133 * break;
1134 * }
1135 * }
1136 * Rd.W[x] = cnt[x];
1137 * for RV32: x=0
1138 * for RV64: x=1...0
1139 * ~~~
1140 *
1141 * \param [in] a unsigned long type of value stored in a
1142 * \return value stored in unsigned long type
1143 */
__RV_CLZ32(unsigned long a)1144 __STATIC_FORCEINLINE unsigned long __RV_CLZ32(unsigned long a)
1145 {
1146 register unsigned long result;
1147 __ASM volatile("clz32 %0, %1" : "=r"(result) : "r"(a));
1148 return result;
1149 }
1150 /* ===== Inline Function End for 3.17. CLZ32 ===== */
1151
1152 /* ===== Inline Function Start for 3.18. CMPEQ8 ===== */
1153 /**
1154 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
1155 * \brief CMPEQ8 (SIMD 8-bit Integer Compare Equal)
1156 * \details
1157 * **Type**: SIMD
1158 *
1159 * **Syntax**:\n
1160 * ~~~
1161 * CMPEQ8 Rs, Rs1, Rs2
1162 * ~~~
1163 *
1164 * **Purpose**:\n
1165 * Do 8-bit integer elements equal comparisons simultaneously.
1166 *
1167 * **Description**:\n
1168 * This instruction compares the 8-bit integer elements in Rs1 with the 8-bit integer
1169 * elements in Rs2 to see if they are equal. If they are equal, the result is 0xFF; otherwise, the result is
1170 * 0x0. The 8-bit element comparison results are written to Rd.
1171 *
1172 * **Note**:\n
1173 * This instruction can be used for either signed or unsigned numbers.
1174 *
1175 * **Operations**:\n
1176 * ~~~
1177 * Rd.B[x] = (Rs1.B[x] == Rs2.B[x])? 0xff : 0x0;
1178 * for RV32: x=3...0,
1179 * for RV64: x=7...0
1180 * ~~~
1181 *
1182 * \param [in] a unsigned long type of value stored in a
1183 * \param [in] b unsigned long type of value stored in b
1184 * \return value stored in unsigned long type
1185 */
__RV_CMPEQ8(unsigned long a,unsigned long b)1186 __STATIC_FORCEINLINE unsigned long __RV_CMPEQ8(unsigned long a, unsigned long b)
1187 {
1188 register unsigned long result;
1189 __ASM volatile("cmpeq8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
1190 return result;
1191 }
1192 /* ===== Inline Function End for 3.18. CMPEQ8 ===== */
1193
1194 /* ===== Inline Function Start for 3.19. CMPEQ16 ===== */
1195 /**
1196 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
1197 * \brief CMPEQ16 (SIMD 16-bit Integer Compare Equal)
1198 * \details
1199 * **Type**: SIMD
1200 *
1201 * **Syntax**:\n
1202 * ~~~
1203 * CMPEQ16 Rd, Rs1, Rs2
1204 * ~~~
1205 *
1206 * **Purpose**:\n
1207 * Do 16-bit integer elements equal comparisons simultaneously.
1208 *
1209 * **Description**:\n
1210 * This instruction compares the 16-bit integer elements in Rs1 with the 16-bit integer
1211 * elements in Rs2 to see if they are equal. If they are equal, the result is 0xFFFF; otherwise, the result
1212 * is 0x0. The 16-bit element comparison results are written to Rt.
1213 *
1214 * **Note**:\n
1215 * This instruction can be used for either signed or unsigned numbers.
1216 *
1217 * **Operations**:\n
1218 * ~~~
1219 * Rd.H[x] = (Rs1.H[x] == Rs2.H[x])? 0xffff : 0x0;
1220 * for RV32: x=1...0,
1221 * for RV64: x=3...0
1222 * ~~~
1223 *
1224 * \param [in] a unsigned long type of value stored in a
1225 * \param [in] b unsigned long type of value stored in b
1226 * \return value stored in unsigned long type
1227 */
__RV_CMPEQ16(unsigned long a,unsigned long b)1228 __STATIC_FORCEINLINE unsigned long __RV_CMPEQ16(unsigned long a, unsigned long b)
1229 {
1230 register unsigned long result;
1231 __ASM volatile("cmpeq16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
1232 return result;
1233 }
1234 /* ===== Inline Function End for 3.19. CMPEQ16 ===== */
1235
1236 /* ===== Inline Function Start for 3.20. CRAS16 ===== */
1237 /**
1238 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
1239 * \brief CRAS16 (SIMD 16-bit Cross Addition & Subtraction)
1240 * \details
1241 * **Type**: SIMD
1242 *
1243 * **Syntax**:\n
1244 * ~~~
1245 * CRAS16 Rd, Rs1, Rs2
1246 * ~~~
1247 *
1248 * **Purpose**:\n
1249 * Do 16-bit integer element addition and 16-bit integer element subtraction in a 32-bit
1250 * chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
1251 *
1252 * **Description**:\n
1253 * This instruction adds the 16-bit integer element in [31:16] of 32-bit chunks in Rs1 with
1254 * the 16-bit integer element in [15:0] of 32-bit chunks in Rs2, and writes the result to [31:16] of 32-bit
1255 * chunks in Rd; at the same time, it subtracts the 16-bit integer element in [31:16] of 32-bit chunks in
1256 * Rs2 from the 16-bit integer element in [15:0] of 32-bit chunks, and writes the result to [15:0] of 32-
1257 * bit chunks in Rd.
1258 *
1259 * **Note**:\n
1260 * This instruction can be used for either signed or unsigned operations.
1261 *
1262 * **Operations**:\n
1263 * ~~~
1264 * Rd.W[x][31:16] = Rs1.W[x][31:16] + Rs2.W[x][15:0];
1265 * Rd.W[x][15:0] = Rs1.W[x][15:0] - Rs2.W[x][31:16];
1266 * for RV32, x=0
1267 * for RV64, x=1...0
1268 * ~~~
1269 *
1270 * \param [in] a unsigned long type of value stored in a
1271 * \param [in] b unsigned long type of value stored in b
1272 * \return value stored in unsigned long type
1273 */
__RV_CRAS16(unsigned long a,unsigned long b)1274 __STATIC_FORCEINLINE unsigned long __RV_CRAS16(unsigned long a, unsigned long b)
1275 {
1276 register unsigned long result;
1277 __ASM volatile("cras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
1278 return result;
1279 }
1280 /* ===== Inline Function End for 3.20. CRAS16 ===== */
1281
1282 /* ===== Inline Function Start for 3.21. CRSA16 ===== */
1283 /**
1284 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
1285 * \brief CRSA16 (SIMD 16-bit Cross Subtraction & Addition)
1286 * \details
1287 * **Type**: SIMD
1288 *
1289 * **Syntax**:\n
1290 * ~~~
1291 * CRSA16 Rd, Rs1, Rs2
1292 * ~~~
1293 *
1294 * **Purpose**:\n
1295 * Do 16-bit integer element subtraction and 16-bit integer element addition in a 32-bit
1296 * chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
1297 *
1298 * **Description**:\n
1299 * This instruction subtracts the 16-bit integer element in [15:0] of 32-bit chunks in Rs2
1300 * from the 16-bit integer element in [31:16] of 32-bit chunks in Rs1, and writes the result to [31:16] of
1301 * 32-bit chunks in Rd; at the same time, it adds the 16-bit integer element in [31:16] of 32-bit chunks
1302 * in Rs2 with the 16-bit integer element in [15:0] of 32-bit chunks in Rs1, and writes the result to
1303 * [15:0] of 32-bit chunks in Rd.
1304 *
1305 * **Note**:\n
1306 * This instruction can be used for either signed or unsigned operations.
1307 *
1308 * **Operations**:\n
1309 * ~~~
1310 * Rd.W[x][31:16] = Rs1.W[x][31:16] - Rs2.W[x][15:0];
1311 * Rd.W[x][15:0] = Rs1.W[x][15:0] + Rs2.W[x][31:16];
1312 * for RV32, x=0
1313 * for RV64, x=1...0
1314 * ~~~
1315 *
1316 * \param [in] a unsigned long type of value stored in a
1317 * \param [in] b unsigned long type of value stored in b
1318 * \return value stored in unsigned long type
1319 */
__RV_CRSA16(unsigned long a,unsigned long b)1320 __STATIC_FORCEINLINE unsigned long __RV_CRSA16(unsigned long a, unsigned long b)
1321 {
1322 register unsigned long result;
1323 __ASM volatile("crsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
1324 return result;
1325 }
1326 /* ===== Inline Function End for 3.21. CRSA16 ===== */
1327
1328 /* ===== Inline Function Start for 3.22. INSB ===== */
1329 /**
1330 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
1331 * \brief INSB (Insert Byte)
1332 * \details
1333 * **Type**: DSP
1334 *
1335 * **Syntax**:\n
1336 * ~~~
1337 * (RV32) INSB Rd, Rs1, imm[1:0]
1338 * (RV64) INSB Rd, Rs1, imm[2:0]
1339 * ~~~
1340 *
1341 * **Purpose**:\n
1342 * Insert byte 0 of a 32-bit or 64-bit register into one of the byte elements of another register.
1343 *
1344 * **Description**:\n
1345 * This instruction inserts byte 0 of Rs1 into byte `imm[1:0]` (RV32) or `imm[2:0]` (RV64)
1346 * of Rd.
1347 *
1348 * **Operations**:\n
1349 * ~~~
1350 * bpos = imm[1:0]; (RV32)
1351 * bpos = imm[2:0]; (RV64)
1352 * Rd.B[bpos] = Rs1.B[0]
1353 * ~~~
1354 *
1355 * \param [in] t unsigned long type of value stored in t
1356 * \param [in] a unsigned long type of value stored in a
1357 * \param [in] b unsigned long type of value stored in b
1358 * \return value stored in unsigned long type
1359 */
1360 #define __RV_INSB(t, a, b) \
1361 ({ \
1362 register unsigned long __t = (unsigned long)(t); \
1363 register unsigned long __a = (unsigned long)(a); \
1364 __ASM volatile("insb %0, %1, %2" : "+r"(__t) : "r"(__a), "K"(b)); \
1365 __t; \
1366 })
1367 /* ===== Inline Function End for 3.22. INSB ===== */
1368
1369 /* ===== Inline Function Start for 3.23. KABS8 ===== */
1370 /**
1371 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
1372 * \brief KABS8 (SIMD 8-bit Saturating Absolute)
1373 * \details
1374 * **Type**: SIMD
1375 *
1376 * **Syntax**:\n
1377 * ~~~
1378 * KABS8 Rd, Rs1
1379 * ~~~
1380 *
1381 * **Purpose**:\n
1382 * Get the absolute value of 8-bit signed integer elements simultaneously.
1383 *
1384 * **Description**:\n
1385 * This instruction calculates the absolute value of 8-bit signed integer elements stored
1386 * in Rs1 and writes the element results to Rd. If the input number is 0x80, this instruction generates
1387 * 0x7f as the output and sets the OV bit to 1.
1388 *
1389 * **Operations**:\n
1390 * ~~~
1391 * src = Rs1.B[x];
1392 * if (src == 0x80) {
1393 * src = 0x7f;
1394 * OV = 1;
1395 * } else if (src[7] == 1)
1396 * src = -src;
1397 * }
1398 * Rd.B[x] = src;
1399 * for RV32: x=3...0,
1400 * for RV64: x=7...0
1401 * ~~~
1402 *
1403 * \param [in] a unsigned long type of value stored in a
1404 * \return value stored in unsigned long type
1405 */
__RV_KABS8(unsigned long a)1406 __STATIC_FORCEINLINE unsigned long __RV_KABS8(unsigned long a)
1407 {
1408 register unsigned long result;
1409 __ASM volatile("kabs8 %0, %1" : "=r"(result) : "r"(a));
1410 return result;
1411 }
1412 /* ===== Inline Function End for 3.23. KABS8 ===== */
1413
1414 /* ===== Inline Function Start for 3.24. KABS16 ===== */
1415 /**
1416 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
1417 * \brief KABS16 (SIMD 16-bit Saturating Absolute)
1418 * \details
1419 * **Type**: SIMD
1420 *
1421 * **Syntax**:\n
1422 * ~~~
1423 * KABS16 Rd, Rs1
1424 * ~~~
1425 *
1426 * **Purpose**:\n
1427 * Get the absolute value of 16-bit signed integer elements simultaneously.
1428 *
1429 * **Description**:\n
1430 * This instruction calculates the absolute value of 16-bit signed integer elements stored
1431 * in Rs1 and writes the element results to Rd. If the input number is 0x8000, this instruction
1432 * generates 0x7fff as the output and sets the OV bit to 1.
1433 *
1434 * **Operations**:\n
1435 * ~~~
1436 * src = Rs1.H[x];
1437 * if (src == 0x8000) {
1438 * src = 0x7fff;
1439 * OV = 1;
1440 * } else if (src[15] == 1)
1441 * src = -src;
1442 * }
1443 * Rd.H[x] = src;
1444 * for RV32: x=1...0,
1445 * for RV64: x=3...0
1446 * ~~~
1447 *
1448 * \param [in] a unsigned long type of value stored in a
1449 * \return value stored in unsigned long type
1450 */
__RV_KABS16(unsigned long a)1451 __STATIC_FORCEINLINE unsigned long __RV_KABS16(unsigned long a)
1452 {
1453 register unsigned long result;
1454 __ASM volatile("kabs16 %0, %1" : "=r"(result) : "r"(a));
1455 return result;
1456 }
1457 /* ===== Inline Function End for 3.24. KABS16 ===== */
1458
1459 /* ===== Inline Function Start for 3.25. KABSW ===== */
1460 /**
1461 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
1462 * \brief KABSW (Scalar 32-bit Absolute Value with Saturation)
1463 * \details
1464 * **Type**: DSP
1465 *
1466 * **Syntax**:\n
1467 * ~~~
1468 * KABSW Rd, Rs1
1469 * ~~~
1470 *
1471 * **Purpose**:\n
1472 * Get the absolute value of a signed 32-bit integer in a general register.
1473 *
1474 * **Description**:\n
1475 * This instruction calculates the absolute value of a signed 32-bit integer stored in Rs1.
1476 * The result is sign-extended (for RV64) and written to Rd. This instruction with the minimum
1477 * negative integer input of 0x80000000 will produce a saturated output of maximum positive integer
1478 * of 0x7fffffff and the OV flag will be set to 1.
1479 *
1480 * **Operations**:\n
1481 * ~~~
1482 * if (Rs1.W[0] >= 0) {
1483 * res = Rs1.W[0];
1484 * } else {
1485 * If (Rs1.W[0] == 0x80000000) {
1486 * res = 0x7fffffff;
1487 * OV = 1;
1488 * } else {
1489 * res = -Rs1.W[0];
1490 * }
1491 * }
1492 * Rd = SE32(res);
1493 * ~~~
1494 *
1495 * \param [in] a signed long type of value stored in a
1496 * \return value stored in unsigned long type
1497 */
__RV_KABSW(signed long a)1498 __STATIC_FORCEINLINE unsigned long __RV_KABSW(signed long a)
1499 {
1500 register unsigned long result;
1501 __ASM volatile("kabsw %0, %1" : "=r"(result) : "r"(a));
1502 return result;
1503 }
1504 /* ===== Inline Function End for 3.25. KABSW ===== */
1505
1506 /* ===== Inline Function Start for 3.26. KADD8 ===== */
1507 /**
1508 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
1509 * \brief KADD8 (SIMD 8-bit Signed Saturating Addition)
1510 * \details
1511 * **Type**: SIMD
1512 *
1513 * **Syntax**:\n
1514 * ~~~
1515 * KADD8 Rd, Rs1, Rs2
1516 * ~~~
1517 *
1518 * **Purpose**:\n
1519 * Do 8-bit signed integer element saturating additions simultaneously.
1520 *
1521 * **Description**:\n
1522 * This instruction adds the 8-bit signed integer elements in Rs1 with the 8-bit signed
1523 * integer elements in Rs2. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 2^7-1), they
1524 * are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
1525 *
1526 * **Operations**:\n
1527 * ~~~
1528 * res[x] = Rs1.B[x] + Rs2.B[x];
1529 * if (res[x] > 127) {
1530 * res[x] = 127;
1531 * OV = 1;
1532 * } else if (res[x] < -128) {
1533 * res[x] = -128;
1534 * OV = 1;
1535 * }
1536 * Rd.B[x] = res[x];
1537 * for RV32: x=3...0,
1538 * for RV64: x=7...0
1539 * ~~~
1540 *
1541 * \param [in] a unsigned long type of value stored in a
1542 * \param [in] b unsigned long type of value stored in b
1543 * \return value stored in unsigned long type
1544 */
__RV_KADD8(unsigned long a,unsigned long b)1545 __STATIC_FORCEINLINE unsigned long __RV_KADD8(unsigned long a, unsigned long b)
1546 {
1547 register unsigned long result;
1548 __ASM volatile("kadd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
1549 return result;
1550 }
1551 /* ===== Inline Function End for 3.26. KADD8 ===== */
1552
1553 /* ===== Inline Function Start for 3.27. KADD16 ===== */
1554 /**
1555 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
1556 * \brief KADD16 (SIMD 16-bit Signed Saturating Addition)
1557 * \details
1558 * **Type**: SIMD
1559 *
1560 * **Syntax**:\n
1561 * ~~~
1562 * KADD16 Rd, Rs1, Rs2
1563 * ~~~
1564 *
1565 * **Purpose**:\n
1566 * Do 16-bit signed integer element saturating additions simultaneously.
1567 *
1568 * **Description**:\n
1569 * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed
1570 * integer elements in Rs2. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1),
1571 * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
1572 *
1573 * **Operations**:\n
1574 * ~~~
1575 * res[x] = Rs1.H[x] + Rs2.H[x];
1576 * if (res[x] > 32767) {
1577 * res[x] = 32767;
1578 * OV = 1;
1579 * } else if (res[x] < -32768) {
1580 * res[x] = -32768;
1581 * OV = 1;
1582 * }
1583 * Rd.H[x] = res[x];
1584 * for RV32: x=1...0,
1585 * for RV64: x=3...0
1586 * ~~~
1587 *
1588 * \param [in] a unsigned long type of value stored in a
1589 * \param [in] b unsigned long type of value stored in b
1590 * \return value stored in unsigned long type
1591 */
__RV_KADD16(unsigned long a,unsigned long b)1592 __STATIC_FORCEINLINE unsigned long __RV_KADD16(unsigned long a, unsigned long b)
1593 {
1594 register unsigned long result;
1595 __ASM volatile("kadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
1596 return result;
1597 }
1598 /* ===== Inline Function End for 3.27. KADD16 ===== */
1599
1600 /* ===== Inline Function Start for 3.28. KADD64 ===== */
1601 /**
1602 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
1603 * \brief KADD64 (64-bit Signed Saturating Addition)
1604 * \details
1605 * **Type**: DSP (64-bit Profile)
1606 *
1607 * **Syntax**:\n
1608 * ~~~
1609 * KADD64 Rd, Rs1, Rs2
1610 * ~~~
1611 *
1612 * **Purpose**:\n
1613 * Add two 64-bit signed integers. The result is saturated to the Q63 range.
1614 *
1615 * **RV32 Description**:\n
1616 * This instruction adds the 64-bit signed integer of an even/odd pair of registers
1617 * specified by Rs1(4,1) with the 64-bit signed integer of an even/odd pair of registers specified by
1618 * Rs2(4,1). If the 64-bit result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the
1619 * range and the OV bit is set to 1. The saturated result is written to an even/odd pair of registers
1620 * specified by Rd(4,1).
1621 * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
1622 * pair includes register 2d and 2d+1.
1623 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
1624 * of the pair contains the low 32-bit of the result.
1625 *
1626 * **RV64 Description**:\n
1627 * This instruction adds the 64-bit signed integer in Rs1 with the 64-bit signed
1628 * integer in Rs2. If the result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the
1629 * range and the OV bit is set to 1. The saturated result is written to Rd.
1630 *
1631 * **Operations**:\n
1632 * ~~~
1633 * RV32:
1634 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
1635 * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
1636 * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
1637 * result = R[a_H].R[a_L] + R[b_H].R[b_L];
1638 * if (result > (2^63)-1) {
1639 * result = (2^63)-1; OV = 1;
1640 * } else if (result < -2^63) {
1641 * result = -2^63; OV = 1;
1642 * }
1643 * R[t_H].R[t_L] = result;
1644 * RV64:
1645 * result = Rs1 + Rs2;
1646 * if (result > (2^63)-1) {
1647 * result = (2^63)-1; OV = 1;
1648 * } else if (result < -2^63) {
1649 * result = -2^63; OV = 1;
1650 * }
1651 * Rd = result;
1652 * ~~~
1653 *
1654 * \param [in] a long long type of value stored in a
1655 * \param [in] b long long type of value stored in b
1656 * \return value stored in long long type
1657 */
__RV_KADD64(long long a,long long b)1658 __STATIC_FORCEINLINE long long __RV_KADD64(long long a, long long b)
1659 {
1660 register long long result;
1661 __ASM volatile("kadd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
1662 return result;
1663 }
1664 /* ===== Inline Function End for 3.28. KADD64 ===== */
1665
1666 /* ===== Inline Function Start for 3.29. KADDH ===== */
1667 /**
1668 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
1669 * \brief KADDH (Signed Addition with Q15 Saturation)
1670 * \details
1671 * **Type**: DSP
1672 *
1673 * **Syntax**:\n
1674 * ~~~
1675 * KADDH Rd, Rs1, Rs2
1676 * ~~~
1677 *
1678 * **Purpose**:\n
1679 * Add the signed lower 32-bit content of two registers with Q15 saturation.
1680 *
1681 * **Description**:\n
1682 * The signed lower 32-bit content of Rs1 is added with the signed lower 32-bit content of
1683 * Rs2. And the result is saturated to the 16-bit signed integer range of [-2^15, 2^15-1] and then sign-
1684 * extended and written to Rd. If saturation happens, this instruction sets the OV flag.
1685 *
1686 * **Operations**:\n
1687 * ~~~
1688 * tmp = Rs1.W[0] + Rs2.W[0];
1689 * if (tmp > 32767) {
1690 * res = 32767;
1691 * OV = 1;
1692 * } else if (tmp < -32768) {
1693 * res = -32768;
1694 * OV = 1
1695 * } else {
1696 * res = tmp;
1697 * }
1698 * Rd = SE(tmp[15:0]);
1699 * ~~~
1700 *
1701 * \param [in] a int type of value stored in a
1702 * \param [in] b int type of value stored in b
1703 * \return value stored in long type
1704 */
__RV_KADDH(int a,int b)1705 __STATIC_FORCEINLINE long __RV_KADDH(int a, int b)
1706 {
1707 register long result;
1708 __ASM volatile("kaddh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
1709 return result;
1710 }
1711 /* ===== Inline Function End for 3.29. KADDH ===== */
1712
1713 /* ===== Inline Function Start for 3.30. KADDW ===== */
1714 /**
1715 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
1716 * \brief KADDW (Signed Addition with Q31 Saturation)
1717 * \details
1718 * **Type**: DSP
1719 *
1720 * **Syntax**:\n
1721 * ~~~
1722 * KADDW Rd, Rs1, Rs2
1723 * ~~~
1724 *
1725 * **Purpose**:\n
1726 * Add the lower 32-bit signed content of two registers with Q31 saturation.
1727 *
1728 * **Description**:\n
1729 * The lower 32-bit signed content of Rs1 is added with the lower 32-bit signed content of
1730 * Rs2. And the result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1] and then sign-
1731 * extended and written to Rd. If saturation happens, this instruction sets the OV flag.
1732 *
1733 * **Operations**:\n
1734 * ~~~
1735 * tmp = Rs1.W[0] + Rs2.W[0];
1736 * if (tmp > (2^31)-1) {
1737 * res = (2^31)-1;
1738 * OV = 1;
1739 * } else if (tmp < -2^31) {
1740 * res = -2^31;
1741 * OV = 1
1742 * } else {
1743 * res = tmp;
1744 * }
1745 * Rd = res[31:0]; // RV32
1746 * Rd = SE(res[31:0]) // RV64
1747 * ~~~
1748 *
1749 * \param [in] a int type of value stored in a
1750 * \param [in] b int type of value stored in b
1751 * \return value stored in long type
1752 */
__RV_KADDW(int a,int b)1753 __STATIC_FORCEINLINE long __RV_KADDW(int a, int b)
1754 {
1755 register long result;
1756 __ASM volatile("kaddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
1757 return result;
1758 }
1759 /* ===== Inline Function End for 3.30. KADDW ===== */
1760
1761 /* ===== Inline Function Start for 3.31. KCRAS16 ===== */
1762 /**
1763 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
1764 * \brief KCRAS16 (SIMD 16-bit Signed Saturating Cross Addition & Subtraction)
1765 * \details
1766 * **Type**: SIMD
1767 *
1768 * **Syntax**:\n
1769 * ~~~
1770 * KCRAS16 Rd, Rs1, Rs2
1771 * ~~~
1772 *
1773 * **Purpose**:\n
1774 * Do 16-bit signed integer element saturating addition and 16-bit signed integer element
1775 * saturating subtraction in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-
1776 * bit chunks.
1777 *
1778 * **Description**:\n
1779 * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in
1780 * Rs1 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2; at the same time, it
1781 * subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed
1782 * integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number
1783 * range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated
1784 * results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit chunks in Rd for
1785 * subtraction.
1786 *
1787 * **Operations**:\n
1788 * ~~~
1789 * res1 = Rs1.W[x][31:16] + Rs2.W[x][15:0];
1790 * res2 = Rs1.W[x][15:0] - Rs2.W[x][31:16];
1791 * for (res in [res1, res2]) {
1792 * if (res > (2^15)-1) {
1793 * res = (2^15)-1;
1794 * OV = 1;
1795 * } else if (res < -2^15) {
1796 * res = -2^15;
1797 * OV = 1;
1798 * }
1799 * }
1800 * Rd.W[x][31:16] = res1;
1801 * Rd.W[x][15:0] = res2;
1802 * for RV32, x=0
1803 * for RV64, x=1...0
1804 * ~~~
1805 *
1806 * \param [in] a unsigned long type of value stored in a
1807 * \param [in] b unsigned long type of value stored in b
1808 * \return value stored in unsigned long type
1809 */
__RV_KCRAS16(unsigned long a,unsigned long b)1810 __STATIC_FORCEINLINE unsigned long __RV_KCRAS16(unsigned long a, unsigned long b)
1811 {
1812 register unsigned long result;
1813 __ASM volatile("kcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
1814 return result;
1815 }
1816 /* ===== Inline Function End for 3.31. KCRAS16 ===== */
1817
1818 /* ===== Inline Function Start for 3.32. KCRSA16 ===== */
1819 /**
1820 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
1821 * \brief KCRSA16 (SIMD 16-bit Signed Saturating Cross Subtraction & Addition)
1822 * \details
1823 * **Type**: SIMD
1824 *
1825 * **Syntax**:\n
1826 * ~~~
1827 * KCRSA16 Rd, Rs1, Rs2
1828 * ~~~
1829 *
1830 * **Purpose**:\n
1831 * Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element
1832 * saturating addition in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit
1833 * chunks.
1834 *
1835 * **Description**:\n
1836 * This instruction subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks
1837 * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1; at the same time, it
1838 * adds the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2 with the 16-bit signed
1839 * integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number
1840 * range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated
1841 * results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks in Rd
1842 * for addition.
1843 *
1844 * **Operations**:\n
1845 * ~~~
1846 * res1 = Rs1.W[x][31:16] - Rs2.W[x][15:0];
1847 * res2 = Rs1.W[x][15:0] + Rs2.W[x][31:16];
1848 * for (res in [res1, res2]) {
1849 * if (res > (2^15)-1) {
1850 * res = (2^15)-1;
1851 * OV = 1;
1852 * } else if (res < -2^15) {
1853 * res = -2^15;
1854 * OV = 1;
1855 * }
1856 * }
1857 * Rd.W[x][31:16] = res1;
1858 * Rd.W[x][15:0] = res2;
1859 * for RV32, x=0
1860 * for RV64, x=1...0
1861 * ~~~
1862 *
1863 * \param [in] a unsigned long type of value stored in a
1864 * \param [in] b unsigned long type of value stored in b
1865 * \return value stored in unsigned long type
1866 */
__RV_KCRSA16(unsigned long a,unsigned long b)1867 __STATIC_FORCEINLINE unsigned long __RV_KCRSA16(unsigned long a, unsigned long b)
1868 {
1869 register unsigned long result;
1870 __ASM volatile("kcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
1871 return result;
1872 }
1873 /* ===== Inline Function End for 3.32. KCRSA16 ===== */
1874
1875 /* ===== Inline Function Start for 3.33.1. KDMBB ===== */
1876 /**
1877 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
1878 * \brief KDMBB (Signed Saturating Double Multiply B16 x B16)
1879 * \details
1880 * **Type**: DSP
1881 *
1882 * **Syntax**:\n
1883 * ~~~
1884 * KDMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
1885 * ~~~
1886 *
1887 * **Purpose**:\n
1888 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
1889 * of the lower 32-bit chunk in registers and then double and saturate the Q31 result. The result is
1890 * written into the destination register for RV32 or sign-extended to 64-bits and written into the
1891 * destination register for RV64. If saturation happens, an overflow flag OV will be set.
1892 *
1893 * **Description**:\n
1894 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
1895 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
1896 * doubled and saturated into a Q31 value. The Q31 value is then written into Rd (sign-extended in
1897 * RV64). When both the two Q15 inputs are 0x8000, saturation will happen. The result will be
1898 * saturated to 0x7FFFFFFF and the overflow flag OV will be set.
1899 *
1900 * **Operations**:\n
1901 * ~~~
1902 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMBB
1903 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMBT
1904 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMTT
1905 * If (0x8000 != aop | 0x8000 != bop) {
1906 * Mresult = aop * bop;
1907 * resQ31 = Mresult << 1;
1908 * Rd = resQ31; // RV32
1909 * Rd = SE(resQ31); // RV64
1910 * } else {
1911 * resQ31 = 0x7FFFFFFF;
1912 * Rd = resQ31; // RV32
1913 * Rd = SE(resQ31); // RV64
1914 * OV = 1;
1915 * }
1916 * ~~~
1917 *
1918 * \param [in] a unsigned int type of value stored in a
1919 * \param [in] b unsigned int type of value stored in b
1920 * \return value stored in long type
1921 */
__RV_KDMBB(unsigned int a,unsigned int b)1922 __STATIC_FORCEINLINE long __RV_KDMBB(unsigned int a, unsigned int b)
1923 {
1924 register long result;
1925 __ASM volatile("kdmbb %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
1926 return result;
1927 }
1928 /* ===== Inline Function End for 3.33.1. KDMBB ===== */
1929
1930 /* ===== Inline Function Start for 3.33.2. KDMBT ===== */
1931 /**
1932 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
1933 * \brief KDMBT (Signed Saturating Double Multiply B16 x T16)
1934 * \details
1935 * **Type**: DSP
1936 *
1937 * **Syntax**:\n
1938 * ~~~
1939 * KDMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
1940 * ~~~
1941 *
1942 * **Purpose**:\n
1943 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
1944 * of the lower 32-bit chunk in registers and then double and saturate the Q31 result. The result is
1945 * written into the destination register for RV32 or sign-extended to 64-bits and written into the
1946 * destination register for RV64. If saturation happens, an overflow flag OV will be set.
1947 *
1948 * **Description**:\n
1949 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
1950 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
1951 * doubled and saturated into a Q31 value. The Q31 value is then written into Rd (sign-extended in
1952 * RV64). When both the two Q15 inputs are 0x8000, saturation will happen. The result will be
1953 * saturated to 0x7FFFFFFF and the overflow flag OV will be set.
1954 *
1955 * **Operations**:\n
1956 * ~~~
1957 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMBB
1958 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMBT
1959 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMTT
1960 * If (0x8000 != aop | 0x8000 != bop) {
1961 * Mresult = aop * bop;
1962 * resQ31 = Mresult << 1;
1963 * Rd = resQ31; // RV32
1964 * Rd = SE(resQ31); // RV64
1965 * } else {
1966 * resQ31 = 0x7FFFFFFF;
1967 * Rd = resQ31; // RV32
1968 * Rd = SE(resQ31); // RV64
1969 * OV = 1;
1970 * }
1971 * ~~~
1972 *
1973 * \param [in] a unsigned int type of value stored in a
1974 * \param [in] b unsigned int type of value stored in b
1975 * \return value stored in long type
1976 */
__RV_KDMBT(unsigned int a,unsigned int b)1977 __STATIC_FORCEINLINE long __RV_KDMBT(unsigned int a, unsigned int b)
1978 {
1979 register long result;
1980 __ASM volatile("kdmbt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
1981 return result;
1982 }
1983 /* ===== Inline Function End for 3.33.2. KDMBT ===== */
1984
1985 /* ===== Inline Function Start for 3.33.3. KDMTT ===== */
1986 /**
1987 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
1988 * \brief KDMTT (Signed Saturating Double Multiply T16 x T16)
1989 * \details
1990 * **Type**: DSP
1991 *
1992 * **Syntax**:\n
1993 * ~~~
1994 * KDMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
1995 * ~~~
1996 *
1997 * **Purpose**:\n
1998 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
1999 * of the lower 32-bit chunk in registers and then double and saturate the Q31 result. The result is
2000 * written into the destination register for RV32 or sign-extended to 64-bits and written into the
2001 * destination register for RV64. If saturation happens, an overflow flag OV will be set.
2002 *
2003 * **Description**:\n
2004 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
2005 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
2006 * doubled and saturated into a Q31 value. The Q31 value is then written into Rd (sign-extended in
2007 * RV64). When both the two Q15 inputs are 0x8000, saturation will happen. The result will be
2008 * saturated to 0x7FFFFFFF and the overflow flag OV will be set.
2009 *
2010 * **Operations**:\n
2011 * ~~~
2012 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMBB
2013 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMBT
2014 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMTT
2015 * If (0x8000 != aop | 0x8000 != bop) {
2016 * Mresult = aop * bop;
2017 * resQ31 = Mresult << 1;
2018 * Rd = resQ31; // RV32
2019 * Rd = SE(resQ31); // RV64
2020 * } else {
2021 * resQ31 = 0x7FFFFFFF;
2022 * Rd = resQ31; // RV32
2023 * Rd = SE(resQ31); // RV64
2024 * OV = 1;
2025 * }
2026 * ~~~
2027 *
2028 * \param [in] a unsigned int type of value stored in a
2029 * \param [in] b unsigned int type of value stored in b
2030 * \return value stored in long type
2031 */
__RV_KDMTT(unsigned int a,unsigned int b)2032 __STATIC_FORCEINLINE long __RV_KDMTT(unsigned int a, unsigned int b)
2033 {
2034 register long result;
2035 __ASM volatile("kdmtt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
2036 return result;
2037 }
2038 /* ===== Inline Function End for 3.33.3. KDMTT ===== */
2039
2040 /* ===== Inline Function Start for 3.34.1. KDMABB ===== */
2041 /**
2042 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
2043 * \brief KDMABB (Signed Saturating Double Multiply Addition B16 x B16)
2044 * \details
2045 * **Type**: DSP
2046 *
2047 * **Syntax**:\n
2048 * ~~~
2049 * KDMAxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
2050 * ~~~
2051 *
2052 * **Purpose**:\n
2053 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
2054 * of the lower 32-bit chunk in registers and then double and saturate the Q31 result, add the result
2055 * with the sign-extended lower 32-bit chunk destination register and write the saturated addition
2056 * result into the destination register. If saturation happens, an overflow flag OV will be set.
2057 *
2058 * **Description**:\n
2059 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
2060 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
2061 * doubled and saturated into a Q31 value. The Q31 value is then added with the content of Rd. If the
2062 * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
2063 * the OV flag is set to 1. The result after saturation is written to Rd.
2064 * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
2065 * set.
2066 *
2067 * **Operations**:\n
2068 * ~~~
2069 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMABB
2070 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMABT
2071 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMATT
2072 * If (0x8000 != aop | 0x8000 != bop) {
2073 * Mresult = aop * bop;
2074 * resQ31 = Mresult << 1;
2075 * } else {
2076 * resQ31 = 0x7FFFFFFF;
2077 * OV = 1;
2078 * }
2079 * resadd = Rd + resQ31; // RV32
2080 * resadd = Rd.W[0] + resQ31; // RV64
2081 * if (resadd > (2^31)-1) {
2082 * resadd = (2^31)-1;
2083 * OV = 1;
2084 * } else if (resadd < -2^31) {
2085 * resadd = -2^31;
2086 * OV = 1;
2087 * }
2088 * Rd = resadd; // RV32
2089 * Rd = SE(resadd); // RV64
2090 * ~~~
2091 *
2092 * \param [in] t long type of value stored in t
2093 * \param [in] a unsigned int type of value stored in a
2094 * \param [in] b unsigned int type of value stored in b
2095 * \return value stored in long type
2096 */
__RV_KDMABB(long t,unsigned int a,unsigned int b)2097 __STATIC_FORCEINLINE long __RV_KDMABB(long t, unsigned int a, unsigned int b)
2098 {
2099 __ASM volatile("kdmabb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
2100 return t;
2101 }
2102 /* ===== Inline Function End for 3.34.1. KDMABB ===== */
2103
2104 /* ===== Inline Function Start for 3.34.2. KDMABT ===== */
2105 /**
2106 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
2107 * \brief KDMABT (Signed Saturating Double Multiply Addition B16 x T16)
2108 * \details
2109 * **Type**: DSP
2110 *
2111 * **Syntax**:\n
2112 * ~~~
2113 * KDMAxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
2114 * ~~~
2115 *
2116 * **Purpose**:\n
2117 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
2118 * of the lower 32-bit chunk in registers and then double and saturate the Q31 result, add the result
2119 * with the sign-extended lower 32-bit chunk destination register and write the saturated addition
2120 * result into the destination register. If saturation happens, an overflow flag OV will be set.
2121 *
2122 * **Description**:\n
2123 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
2124 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
2125 * doubled and saturated into a Q31 value. The Q31 value is then added with the content of Rd. If the
2126 * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
2127 * the OV flag is set to 1. The result after saturation is written to Rd.
2128 * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
2129 * set.
2130 *
2131 * **Operations**:\n
2132 * ~~~
2133 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMABB
2134 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMABT
2135 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMATT
2136 * If (0x8000 != aop | 0x8000 != bop) {
2137 * Mresult = aop * bop;
2138 * resQ31 = Mresult << 1;
2139 * } else {
2140 * resQ31 = 0x7FFFFFFF;
2141 * OV = 1;
2142 * }
2143 * resadd = Rd + resQ31; // RV32
2144 * resadd = Rd.W[0] + resQ31; // RV64
2145 * if (resadd > (2^31)-1) {
2146 * resadd = (2^31)-1;
2147 * OV = 1;
2148 * } else if (resadd < -2^31) {
2149 * resadd = -2^31;
2150 * OV = 1;
2151 * }
2152 * Rd = resadd; // RV32
2153 * Rd = SE(resadd); // RV64
2154 * ~~~
2155 *
2156 * \param [in] t long type of value stored in t
2157 * \param [in] a unsigned int type of value stored in a
2158 * \param [in] b unsigned int type of value stored in b
2159 * \return value stored in long type
2160 */
__RV_KDMABT(long t,unsigned int a,unsigned int b)2161 __STATIC_FORCEINLINE long __RV_KDMABT(long t, unsigned int a, unsigned int b)
2162 {
2163 __ASM volatile("kdmabt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
2164 return t;
2165 }
2166 /* ===== Inline Function End for 3.34.2. KDMABT ===== */
2167
2168 /* ===== Inline Function Start for 3.34.3. KDMATT ===== */
2169 /**
2170 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
2171 * \brief KDMATT (Signed Saturating Double Multiply Addition T16 x T16)
2172 * \details
2173 * **Type**: DSP
2174 *
2175 * **Syntax**:\n
2176 * ~~~
2177 * KDMAxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
2178 * ~~~
2179 *
2180 * **Purpose**:\n
2181 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
2182 * of the lower 32-bit chunk in registers and then double and saturate the Q31 result, add the result
2183 * with the sign-extended lower 32-bit chunk destination register and write the saturated addition
2184 * result into the destination register. If saturation happens, an overflow flag OV will be set.
2185 *
2186 * **Description**:\n
2187 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
2188 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
2189 * doubled and saturated into a Q31 value. The Q31 value is then added with the content of Rd. If the
2190 * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
2191 * the OV flag is set to 1. The result after saturation is written to Rd.
2192 * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
2193 * set.
2194 *
2195 * **Operations**:\n
2196 * ~~~
2197 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMABB
2198 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMABT
2199 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMATT
2200 * If (0x8000 != aop | 0x8000 != bop) {
2201 * Mresult = aop * bop;
2202 * resQ31 = Mresult << 1;
2203 * } else {
2204 * resQ31 = 0x7FFFFFFF;
2205 * OV = 1;
2206 * }
2207 * resadd = Rd + resQ31; // RV32
2208 * resadd = Rd.W[0] + resQ31; // RV64
2209 * if (resadd > (2^31)-1) {
2210 * resadd = (2^31)-1;
2211 * OV = 1;
2212 * } else if (resadd < -2^31) {
2213 * resadd = -2^31;
2214 * OV = 1;
2215 * }
2216 * Rd = resadd; // RV32
2217 * Rd = SE(resadd); // RV64
2218 * ~~~
2219 *
2220 * \param [in] t long type of value stored in t
2221 * \param [in] a unsigned int type of value stored in a
2222 * \param [in] b unsigned int type of value stored in b
2223 * \return value stored in long type
2224 */
__RV_KDMATT(long t,unsigned int a,unsigned int b)2225 __STATIC_FORCEINLINE long __RV_KDMATT(long t, unsigned int a, unsigned int b)
2226 {
2227 __ASM volatile("kdmatt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
2228 return t;
2229 }
2230 /* ===== Inline Function End for 3.34.3. KDMATT ===== */
2231
2232 /* ===== Inline Function Start for 3.35.1. KHM8 ===== */
2233 /**
2234 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
2235 * \brief KHM8 (SIMD Signed Saturating Q7 Multiply)
2236 * \details
2237 * **Type**: SIMD
2238 *
2239 * **Syntax**:\n
2240 * ~~~
2241 * KHM8 Rd, Rs1, Rs2
2242 * KHMX8 Rd, Rs1, Rs2
2243 * ~~~
2244 *
2245 * **Purpose**:\n
2246 * Do Q7xQ7 element multiplications simultaneously. The Q14 results are then reduced to Q7
2247 * numbers again.
2248 *
2249 * **Description**:\n
2250 * For the `KHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1
2251 * with the top 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
2252 * content of 16-bit chunks in Rs1 with the bottom 8-bit Q7 content of 16-bit chunks in Rs2.
2253 * For the `KHMX16` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 with the
2254 * bottom 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
2255 * content of 16-bit chunks in Rs1 with the top 8-bit Q7 content of 16-bit chunks in Rs2.
2256 * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then
2257 * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen.
2258 * The result will be saturated to 0x7F and the overflow flag OV will be set.
2259 *
2260 * **Operations**:\n
2261 * ~~~
2262 * if (is `KHM8`) {
2263 * op1t = Rs1.B[x+1]; op2t = Rs2.B[x+1]; // top
2264 * op1b = Rs1.B[x]; op2b = Rs2.B[x]; // bottom
2265 * } else if (is `KHMX8`) {
2266 * op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top
2267 * op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom
2268 * }
2269 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
2270 * if (0x80 != aop | 0x80 != bop) {
2271 * res = (aop s* bop) >> 7;
2272 * } else {
2273 * res= 0x7F;
2274 * OV = 1;
2275 * }
2276 * }
2277 * Rd.H[x/2] = concat(rest, resb);
2278 * for RV32, x=0,2
2279 * for RV64, x=0,2,4,6
2280 * ~~~
2281 *
2282 * \param [in] a unsigned long type of value stored in a
2283 * \param [in] b unsigned long type of value stored in b
2284 * \return value stored in unsigned long type
2285 */
__RV_KHM8(unsigned long a,unsigned long b)2286 __STATIC_FORCEINLINE unsigned long __RV_KHM8(unsigned long a, unsigned long b)
2287 {
2288 register unsigned long result;
2289 __ASM volatile("khm8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
2290 return result;
2291 }
2292 /* ===== Inline Function End for 3.35.1. KHM8 ===== */
2293
2294 /* ===== Inline Function Start for 3.35.2. KHMX8 ===== */
2295 /**
2296 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
2297 * \brief KHMX8 (SIMD Signed Saturating Crossed Q7 Multiply)
2298 * \details
2299 * **Type**: SIMD
2300 *
2301 * **Syntax**:\n
2302 * ~~~
2303 * KHM8 Rd, Rs1, Rs2
2304 * KHMX8 Rd, Rs1, Rs2
2305 * ~~~
2306 *
2307 * **Purpose**:\n
2308 * Do Q7xQ7 element multiplications simultaneously. The Q14 results are then reduced to Q7
2309 * numbers again.
2310 *
2311 * **Description**:\n
2312 * For the `KHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1
2313 * with the top 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
2314 * content of 16-bit chunks in Rs1 with the bottom 8-bit Q7 content of 16-bit chunks in Rs2.
2315 * For the `KHMX16` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 with the
2316 * bottom 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
2317 * content of 16-bit chunks in Rs1 with the top 8-bit Q7 content of 16-bit chunks in Rs2.
2318 * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then
2319 * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen.
2320 * The result will be saturated to 0x7F and the overflow flag OV will be set.
2321 *
2322 * **Operations**:\n
2323 * ~~~
2324 * if (is `KHM8`) {
2325 * op1t = Rs1.B[x+1]; op2t = Rs2.B[x+1]; // top
2326 * op1b = Rs1.B[x]; op2b = Rs2.B[x]; // bottom
2327 * } else if (is `KHMX8`) {
2328 * op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top
2329 * op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom
2330 * }
2331 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
2332 * if (0x80 != aop | 0x80 != bop) {
2333 * res = (aop s* bop) >> 7;
2334 * } else {
2335 * res= 0x7F;
2336 * OV = 1;
2337 * }
2338 * }
2339 * Rd.H[x/2] = concat(rest, resb);
2340 * for RV32, x=0,2
2341 * for RV64, x=0,2,4,6
2342 * ~~~
2343 *
2344 * \param [in] a unsigned long type of value stored in a
2345 * \param [in] b unsigned long type of value stored in b
2346 * \return value stored in unsigned long type
2347 */
__RV_KHMX8(unsigned long a,unsigned long b)2348 __STATIC_FORCEINLINE unsigned long __RV_KHMX8(unsigned long a, unsigned long b)
2349 {
2350 register unsigned long result;
2351 __ASM volatile("khmx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
2352 return result;
2353 }
2354 /* ===== Inline Function End for 3.35.2. KHMX8 ===== */
2355
2356 /* ===== Inline Function Start for 3.36.1. KHM16 ===== */
2357 /**
2358 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
2359 * \brief KHM16 (SIMD Signed Saturating Q15 Multiply)
2360 * \details
2361 * **Type**: SIMD
2362 *
2363 * **Syntax**:\n
2364 * ~~~
2365 * KHM16 Rd, Rs1, Rs2
2366 * KHMX16 Rd, Rs1, Rs2
2367 * ~~~
2368 *
2369 * **Purpose**:\n
2370 * Do Q15xQ15 element multiplications simultaneously. The Q30 results are then reduced to
2371 * Q15 numbers again.
2372 *
2373 * **Description**:\n
2374 * For the `KHM16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in
2375 * Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom
2376 * 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in
2377 * Rs2.
2378 * For the `KHMX16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the
2379 * bottom 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom 16-bit Q15
2380 * content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2.
2381 * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are
2382 * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will
2383 * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set.
2384 *
2385 * **Operations**:\n
2386 * ~~~
2387 * if (is `KHM16`) {
2388 * op1t = Rs1.H[x+1]; op2t = Rs2.H[x+1]; // top
2389 * op1b = Rs1.H[x]; op2b = Rs2.H[x]; // bottom
2390 * } else if (is `KHMX16`) {
2391 * op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top
2392 * op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom
2393 * }
2394 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
2395 * if (0x8000 != aop | 0x8000 != bop) {
2396 * res = (aop s* bop) >> 15;
2397 * } else {
2398 * res= 0x7FFF;
2399 * OV = 1;
2400 * }
2401 * }
2402 * Rd.W[x/2] = concat(rest, resb);
2403 * for RV32: x=0
2404 * for RV64: x=0,2
2405 * ~~~
2406 *
2407 * \param [in] a unsigned long type of value stored in a
2408 * \param [in] b unsigned long type of value stored in b
2409 * \return value stored in unsigned long type
2410 */
__RV_KHM16(unsigned long a,unsigned long b)2411 __STATIC_FORCEINLINE unsigned long __RV_KHM16(unsigned long a, unsigned long b)
2412 {
2413 register unsigned long result;
2414 __ASM volatile("khm16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
2415 return result;
2416 }
2417 /* ===== Inline Function End for 3.36.1. KHM16 ===== */
2418
2419 /* ===== Inline Function Start for 3.36.2. KHMX16 ===== */
2420 /**
2421 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
2422 * \brief KHMX16 (SIMD Signed Saturating Crossed Q15 Multiply)
2423 * \details
2424 * **Type**: SIMD
2425 *
2426 * **Syntax**:\n
2427 * ~~~
2428 * KHM16 Rd, Rs1, Rs2
2429 * KHMX16 Rd, Rs1, Rs2
2430 * ~~~
2431 *
2432 * **Purpose**:\n
2433 * Do Q15xQ15 element multiplications simultaneously. The Q30 results are then reduced to
2434 * Q15 numbers again.
2435 *
2436 * **Description**:\n
2437 * For the `KHM16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in
2438 * Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom
2439 * 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in
2440 * Rs2.
2441 * For the `KHMX16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the
2442 * bottom 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom 16-bit Q15
2443 * content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2.
2444 * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are
2445 * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will
2446 * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set.
2447 *
2448 * **Operations**:\n
2449 * ~~~
2450 * if (is `KHM16`) {
2451 * op1t = Rs1.H[x+1]; op2t = Rs2.H[x+1]; // top
2452 * op1b = Rs1.H[x]; op2b = Rs2.H[x]; // bottom
2453 * } else if (is `KHMX16`) {
2454 * op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top
2455 * op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom
2456 * }
2457 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
2458 * if (0x8000 != aop | 0x8000 != bop) {
2459 * res = (aop s* bop) >> 15;
2460 * } else {
2461 * res= 0x7FFF;
2462 * OV = 1;
2463 * }
2464 * }
2465 * Rd.W[x/2] = concat(rest, resb);
2466 * for RV32: x=0
2467 * for RV64: x=0,2
2468 * ~~~
2469 *
2470 * \param [in] a unsigned long type of value stored in a
2471 * \param [in] b unsigned long type of value stored in b
2472 * \return value stored in unsigned long type
2473 */
__RV_KHMX16(unsigned long a,unsigned long b)2474 __STATIC_FORCEINLINE unsigned long __RV_KHMX16(unsigned long a, unsigned long b)
2475 {
2476 register unsigned long result;
2477 __ASM volatile("khmx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
2478 return result;
2479 }
2480 /* ===== Inline Function End for 3.36.2. KHMX16 ===== */
2481
2482 /* ===== Inline Function Start for 3.37.1. KHMBB ===== */
2483 /**
2484 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
2485 * \brief KHMBB (Signed Saturating Half Multiply B16 x B16)
2486 * \details
2487 * **Type**: DSP
2488 *
2489 * **Syntax**:\n
2490 * ~~~
2491 * KHMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
2492 * ~~~
2493 *
2494 * **Purpose**:\n
2495 * Multiply the signed Q15 number contents of two 16-bit data in the corresponding portion
2496 * of the lower 32-bit chunk in registers and then right-shift 15 bits to turn the Q30 result into a Q15
2497 * number again and saturate the Q15 result into the destination register. If saturation happens, an
2498 * overflow flag OV will be set.
2499 *
2500 * **Description**:\n
2501 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
2502 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then right-
2503 * shifted 15-bits and saturated into a Q15 value. The Q15 value is then sing-extended and written into
2504 * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
2505 * to 0x7FFF and the overflow flag OV will be set.
2506 *
2507 * **Operations**:\n
2508 * ~~~
2509 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KHMBB
2510 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KHMBT
2511 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KHMTT
2512 * If (0x8000 != aop | 0x8000 != bop) {
2513 * Mresult[31:0] = aop * bop;
2514 * res[15:0] = Mresult[30:15];
2515 * } else {
2516 * res[15:0] = 0x7FFF;
2517 * OV = 1;
2518 * }
2519 * Rd = SE32(res[15:0]); // Rv32
2520 * Rd = SE64(res[15:0]); // RV64
2521 * ~~~
2522 *
2523 * \param [in] a unsigned int type of value stored in a
2524 * \param [in] b unsigned int type of value stored in b
2525 * \return value stored in long type
2526 */
__RV_KHMBB(unsigned int a,unsigned int b)2527 __STATIC_FORCEINLINE long __RV_KHMBB(unsigned int a, unsigned int b)
2528 {
2529 register long result;
2530 __ASM volatile("khmbb %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
2531 return result;
2532 }
2533 /* ===== Inline Function End for 3.37.1. KHMBB ===== */
2534
2535 /* ===== Inline Function Start for 3.37.2. KHMBT ===== */
2536 /**
2537 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
2538 * \brief KHMBT (Signed Saturating Half Multiply B16 x T16)
2539 * \details
2540 * **Type**: DSP
2541 *
2542 * **Syntax**:\n
2543 * ~~~
2544 * KHMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
2545 * ~~~
2546 *
2547 * **Purpose**:\n
2548 * Multiply the signed Q15 number contents of two 16-bit data in the corresponding portion
2549 * of the lower 32-bit chunk in registers and then right-shift 15 bits to turn the Q30 result into a Q15
2550 * number again and saturate the Q15 result into the destination register. If saturation happens, an
2551 * overflow flag OV will be set.
2552 *
2553 * **Description**:\n
2554 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
2555 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then right-
2556 * shifted 15-bits and saturated into a Q15 value. The Q15 value is then sing-extended and written into
2557 * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
2558 * to 0x7FFF and the overflow flag OV will be set.
2559 *
2560 * **Operations**:\n
2561 * ~~~
2562 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KHMBB
2563 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KHMBT
2564 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KHMTT
2565 * If (0x8000 != aop | 0x8000 != bop) {
2566 * Mresult[31:0] = aop * bop;
2567 * res[15:0] = Mresult[30:15];
2568 * } else {
2569 * res[15:0] = 0x7FFF;
2570 * OV = 1;
2571 * }
2572 * Rd = SE32(res[15:0]); // Rv32
2573 * Rd = SE64(res[15:0]); // RV64
2574 * ~~~
2575 *
2576 * \param [in] a unsigned int type of value stored in a
2577 * \param [in] b unsigned int type of value stored in b
2578 * \return value stored in long type
2579 */
__RV_KHMBT(unsigned int a,unsigned int b)2580 __STATIC_FORCEINLINE long __RV_KHMBT(unsigned int a, unsigned int b)
2581 {
2582 register long result;
2583 __ASM volatile("khmbt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
2584 return result;
2585 }
2586 /* ===== Inline Function End for 3.37.2. KHMBT ===== */
2587
2588 /* ===== Inline Function Start for 3.37.3. KHMTT ===== */
2589 /**
2590 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
2591 * \brief KHMTT (Signed Saturating Half Multiply T16 x T16)
2592 * \details
2593 * **Type**: DSP
2594 *
2595 * **Syntax**:\n
2596 * ~~~
2597 * KHMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
2598 * ~~~
2599 *
2600 * **Purpose**:\n
2601 * Multiply the signed Q15 number contents of two 16-bit data in the corresponding portion
2602 * of the lower 32-bit chunk in registers and then right-shift 15 bits to turn the Q30 result into a Q15
2603 * number again and saturate the Q15 result into the destination register. If saturation happens, an
2604 * overflow flag OV will be set.
2605 *
2606 * **Description**:\n
2607 * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
2608 * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then right-
2609 * shifted 15-bits and saturated into a Q15 value. The Q15 value is then sing-extended and written into
2610 * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
2611 * to 0x7FFF and the overflow flag OV will be set.
2612 *
2613 * **Operations**:\n
2614 * ~~~
2615 * aop = Rs1.H[0]; bop = Rs2.H[0]; // KHMBB
2616 * aop = Rs1.H[0]; bop = Rs2.H[1]; // KHMBT
2617 * aop = Rs1.H[1]; bop = Rs2.H[1]; // KHMTT
2618 * If (0x8000 != aop | 0x8000 != bop) {
2619 * Mresult[31:0] = aop * bop;
2620 * res[15:0] = Mresult[30:15];
2621 * } else {
2622 * res[15:0] = 0x7FFF;
2623 * OV = 1;
2624 * }
2625 * Rd = SE32(res[15:0]); // Rv32
2626 * Rd = SE64(res[15:0]); // RV64
2627 * ~~~
2628 *
2629 * \param [in] a unsigned int type of value stored in a
2630 * \param [in] b unsigned int type of value stored in b
2631 * \return value stored in long type
2632 */
__RV_KHMTT(unsigned int a,unsigned int b)2633 __STATIC_FORCEINLINE long __RV_KHMTT(unsigned int a, unsigned int b)
2634 {
2635 register long result;
2636 __ASM volatile("khmtt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
2637 return result;
2638 }
2639 /* ===== Inline Function End for 3.37.3. KHMTT ===== */
2640
2641 /* ===== Inline Function Start for 3.38.1. KMABB ===== */
2642 /**
2643 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
2644 * \brief KMABB (SIMD Saturating Signed Multiply Bottom Halfs & Add)
2645 * \details
2646 * **Type**: SIMD
2647 *
2648 * **Syntax**:\n
2649 * ~~~
2650 * KMABB Rd, Rs1, Rs2
2651 * KMABT Rd, Rs1, Rs2
2652 * KMATT Rd, Rs1, Rs2
2653 * ~~~
2654 *
2655 * **Purpose**:\n
2656 * Multiply the signed 16-bit content of 32-bit elements in a register with the 16-bit content
2657 * of 32-bit elements in another register and add the result to the content of 32-bit elements in the
2658 * third register. The addition result may be saturated and is written to the third register.
2659 * * KMABB: rd.W[x] + bottom*bottom (per 32-bit element)
2660 * * KMABT rd.W[x] + bottom*top (per 32-bit element)
2661 * * KMATT rd.W[x] + top*top (per 32-bit element)
2662 *
2663 * **Description**:\n
2664 * For the `KMABB` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
2665 * the bottom 16-bit content of 32-bit elements in Rs2.
2666 * For the `KMABT` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
2667 * the top 16-bit content of 32-bit elements in Rs2.
2668 * For the `KMATT` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
2669 * top 16-bit content of 32-bit elements in Rs2.
2670 * The multiplication result is added to the content of 32-bit elements in Rd. If the addition result is
2671 * beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to
2672 * 1. The results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as
2673 * signed integers.
2674 *
2675 * **Operations**:\n
2676 * ~~~
2677 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); // KMABB
2678 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); // KMABT
2679 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]); // KMATT
2680 * if (res[x] > (2^31)-1) {
2681 * res[x] = (2^31)-1;
2682 * OV = 1;
2683 * } else if (res[x] < -2^31) {
2684 * res[x] = -2^31;
2685 * OV = 1;
2686 * }
2687 * Rd.W[x] = res[x];
2688 * for RV32: x=0
2689 * for RV64: x=1...0
2690 * ~~~
2691 *
2692 * \param [in] t long type of value stored in t
2693 * \param [in] a unsigned long type of value stored in a
2694 * \param [in] b unsigned long type of value stored in b
2695 * \return value stored in long type
2696 */
__RV_KMABB(long t,unsigned long a,unsigned long b)2697 __STATIC_FORCEINLINE long __RV_KMABB(long t, unsigned long a, unsigned long b)
2698 {
2699 __ASM volatile("kmabb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
2700 return t;
2701 }
2702 /* ===== Inline Function End for 3.38.1. KMABB ===== */
2703
2704 /* ===== Inline Function Start for 3.38.2. KMABT ===== */
2705 /**
2706 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
2707 * \brief KMABT (SIMD Saturating Signed Multiply Bottom & Top Halfs & Add)
2708 * \details
2709 * **Type**: SIMD
2710 *
2711 * **Syntax**:\n
2712 * ~~~
2713 * KMABB Rd, Rs1, Rs2
2714 * KMABT Rd, Rs1, Rs2
2715 * KMATT Rd, Rs1, Rs2
2716 * ~~~
2717 *
2718 * **Purpose**:\n
2719 * Multiply the signed 16-bit content of 32-bit elements in a register with the 16-bit content
2720 * of 32-bit elements in another register and add the result to the content of 32-bit elements in the
2721 * third register. The addition result may be saturated and is written to the third register.
2722 * * KMABB: rd.W[x] + bottom*bottom (per 32-bit element)
2723 * * KMABT rd.W[x] + bottom*top (per 32-bit element)
2724 * * KMATT rd.W[x] + top*top (per 32-bit element)
2725 *
2726 * **Description**:\n
2727 * For the `KMABB` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
2728 * the bottom 16-bit content of 32-bit elements in Rs2.
2729 * For the `KMABT` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
2730 * the top 16-bit content of 32-bit elements in Rs2.
2731 * For the `KMATT` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
2732 * top 16-bit content of 32-bit elements in Rs2.
2733 * The multiplication result is added to the content of 32-bit elements in Rd. If the addition result is
2734 * beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to
2735 * 1. The results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as
2736 * signed integers.
2737 *
2738 * **Operations**:\n
2739 * ~~~
2740 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); // KMABB
2741 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); // KMABT
2742 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]); // KMATT
2743 * if (res[x] > (2^31)-1) {
2744 * res[x] = (2^31)-1;
2745 * OV = 1;
2746 * } else if (res[x] < -2^31) {
2747 * res[x] = -2^31;
2748 * OV = 1;
2749 * }
2750 * Rd.W[x] = res[x];
2751 * for RV32: x=0
2752 * for RV64: x=1...0
2753 * ~~~
2754 *
2755 * \param [in] t long type of value stored in t
2756 * \param [in] a unsigned long type of value stored in a
2757 * \param [in] b unsigned long type of value stored in b
2758 * \return value stored in long type
2759 */
__RV_KMABT(long t,unsigned long a,unsigned long b)2760 __STATIC_FORCEINLINE long __RV_KMABT(long t, unsigned long a, unsigned long b)
2761 {
2762 __ASM volatile("kmabt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
2763 return t;
2764 }
2765 /* ===== Inline Function End for 3.38.2. KMABT ===== */
2766
2767 /* ===== Inline Function Start for 3.38.3. KMATT ===== */
2768 /**
2769 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
2770 * \brief KMATT (SIMD Saturating Signed Multiply Top Halfs & Add)
2771 * \details
2772 * **Type**: SIMD
2773 *
2774 * **Syntax**:\n
2775 * ~~~
2776 * KMABB Rd, Rs1, Rs2
2777 * KMABT Rd, Rs1, Rs2
2778 * KMATT Rd, Rs1, Rs2
2779 * ~~~
2780 *
2781 * **Purpose**:\n
2782 * Multiply the signed 16-bit content of 32-bit elements in a register with the 16-bit content
2783 * of 32-bit elements in another register and add the result to the content of 32-bit elements in the
2784 * third register. The addition result may be saturated and is written to the third register.
2785 * * KMABB: rd.W[x] + bottom*bottom (per 32-bit element)
2786 * * KMABT rd.W[x] + bottom*top (per 32-bit element)
2787 * * KMATT rd.W[x] + top*top (per 32-bit element)
2788 *
2789 * **Description**:\n
2790 * For the `KMABB` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
2791 * the bottom 16-bit content of 32-bit elements in Rs2.
2792 * For the `KMABT` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
2793 * the top 16-bit content of 32-bit elements in Rs2.
2794 * For the `KMATT` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
2795 * top 16-bit content of 32-bit elements in Rs2.
2796 * The multiplication result is added to the content of 32-bit elements in Rd. If the addition result is
2797 * beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to
2798 * 1. The results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as
2799 * signed integers.
2800 *
2801 * **Operations**:\n
2802 * ~~~
2803 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); // KMABB
2804 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); // KMABT
2805 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]); // KMATT
2806 * if (res[x] > (2^31)-1) {
2807 * res[x] = (2^31)-1;
2808 * OV = 1;
2809 * } else if (res[x] < -2^31) {
2810 * res[x] = -2^31;
2811 * OV = 1;
2812 * }
2813 * Rd.W[x] = res[x];
2814 * for RV32: x=0
2815 * for RV64: x=1...0
2816 * ~~~
2817 *
2818 * \param [in] t long type of value stored in t
2819 * \param [in] a unsigned long type of value stored in a
2820 * \param [in] b unsigned long type of value stored in b
2821 * \return value stored in long type
2822 */
__RV_KMATT(long t,unsigned long a,unsigned long b)2823 __STATIC_FORCEINLINE long __RV_KMATT(long t, unsigned long a, unsigned long b)
2824 {
2825 __ASM volatile("kmatt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
2826 return t;
2827 }
2828 /* ===== Inline Function End for 3.38.3. KMATT ===== */
2829
2830 /* ===== Inline Function Start for 3.39.1. KMADA ===== */
2831 /**
2832 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
2833 * \brief KMADA (SIMD Saturating Signed Multiply Two Halfs and Two Adds)
2834 * \details
2835 * **Type**: SIMD
2836 *
2837 * **Syntax**:\n
2838 * ~~~
2839 * KMADA Rd, Rs1, Rs2
2840 * KMAXDA Rd, Rs1, Rs2
2841 * ~~~
2842 *
2843 * **Purpose**:\n
2844 * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then adds
2845 * the two 32-bit results and 32-bit elements in a third register together. The addition result may be
2846 * saturated.
2847 * * KMADA: rd.W[x] + top*top + bottom*bottom (per 32-bit element)
2848 * * KMAXDA: rd.W[x] + top*bottom + bottom*top (per 32-bit element)
2849 *
2850 * **Description**:\n
2851 * For the `KMADA instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
2852 * the bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of
2853 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
2854 * elements in Rs2.
2855 * For the `KMAXDA` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
2856 * bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of multiplying
2857 * the bottom 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit elements in
2858 * Rs2.
2859 * The result is added to the content of 32-bit elements in Rd. If the addition result is beyond the Q31
2860 * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The 32-bit
2861 * results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
2862 * integers.
2863 *
2864 * **Operations**:\n
2865 * ~~~
2866 * // KMADA
2867 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
2868 * // KMAXDA
2869 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) + (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
2870 * if (res[x] > (2^31)-1) {
2871 * res[x] = (2^31)-1;
2872 * OV = 1;
2873 * } else if (res[x] < -2^31) {
2874 * res[x] = -2^31;
2875 * OV = 1;
2876 * }
2877 * Rd.W[x] = res[x];
2878 * for RV32: x=0
2879 * for RV64: x=1...0
2880 * ~~~
2881 *
2882 * \param [in] t long type of value stored in t
2883 * \param [in] a unsigned long type of value stored in a
2884 * \param [in] b unsigned long type of value stored in b
2885 * \return value stored in long type
2886 */
__RV_KMADA(long t,unsigned long a,unsigned long b)2887 __STATIC_FORCEINLINE long __RV_KMADA(long t, unsigned long a, unsigned long b)
2888 {
2889 __ASM volatile("kmada %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
2890 return t;
2891 }
2892 /* ===== Inline Function End for 3.39.1. KMADA ===== */
2893
2894 /* ===== Inline Function Start for 3.39.2. KMAXDA ===== */
2895 /**
2896 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
2897 * \brief KMAXDA (SIMD Saturating Signed Crossed Multiply Two Halfs and Two Adds)
2898 * \details
2899 * **Type**: SIMD
2900 *
2901 * **Syntax**:\n
2902 * ~~~
2903 * KMADA Rd, Rs1, Rs2
2904 * KMAXDA Rd, Rs1, Rs2
2905 * ~~~
2906 *
2907 * **Purpose**:\n
2908 * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then adds
2909 * the two 32-bit results and 32-bit elements in a third register together. The addition result may be
2910 * saturated.
2911 * * KMADA: rd.W[x] + top*top + bottom*bottom (per 32-bit element)
2912 * * KMAXDA: rd.W[x] + top*bottom + bottom*top (per 32-bit element)
2913 *
2914 * **Description**:\n
2915 * For the `KMADA instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
2916 * the bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of
2917 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
2918 * elements in Rs2.
2919 * For the `KMAXDA` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
2920 * bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of multiplying
2921 * the bottom 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit elements in
2922 * Rs2.
2923 * The result is added to the content of 32-bit elements in Rd. If the addition result is beyond the Q31
2924 * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The 32-bit
2925 * results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
2926 * integers.
2927 *
2928 * **Operations**:\n
2929 * ~~~
2930 * // KMADA
2931 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
2932 * // KMAXDA
2933 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) + (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
2934 * if (res[x] > (2^31)-1) {
2935 * res[x] = (2^31)-1;
2936 * OV = 1;
2937 * } else if (res[x] < -2^31) {
2938 * res[x] = -2^31;
2939 * OV = 1;
2940 * }
2941 * Rd.W[x] = res[x];
2942 * for RV32: x=0
2943 * for RV64: x=1...0
2944 * ~~~
2945 *
2946 * \param [in] t long type of value stored in t
2947 * \param [in] a unsigned long type of value stored in a
2948 * \param [in] b unsigned long type of value stored in b
2949 * \return value stored in long type
2950 */
__RV_KMAXDA(long t,unsigned long a,unsigned long b)2951 __STATIC_FORCEINLINE long __RV_KMAXDA(long t, unsigned long a, unsigned long b)
2952 {
2953 __ASM volatile("kmaxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
2954 return t;
2955 }
2956 /* ===== Inline Function End for 3.39.2. KMAXDA ===== */
2957
2958 /* ===== Inline Function Start for 3.40.1. KMADS ===== */
2959 /**
2960 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
2961 * \brief KMADS (SIMD Saturating Signed Multiply Two Halfs & Subtract & Add)
2962 * \details
2963 * **Type**: SIMD
2964 *
2965 * **Syntax**:\n
2966 * ~~~
2967 * KMADS Rd, Rs1, Rs2
2968 * KMADRS Rd, Rs1, Rs2
2969 * KMAXDS Rd, Rs1, Rs2
2970 * ~~~
2971 *
2972 * **Purpose**:\n
2973 * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then
2974 * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
2975 * the corresponding 32-bit elements in a third register. The addition result may be saturated.
2976 * * KMADS: rd.W[x] + (top*top - bottom*bottom) (per 32-bit element)
2977 * * KMADRS: rd.W[x] + (bottom*bottom - top*top) (per 32-bit element)
2978 * * KMAXDS: rd.W[x] + (top*bottom - bottom*top) (per 32-bit element)
2979 *
2980 * **Description**:\n
2981 * For the `KMADS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
2982 * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
2983 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
2984 * elements in Rs2.
2985 * For the `KMADRS` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
2986 * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
2987 * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-
2988 * bit elements in Rs2.
2989 * For the `KMAXDS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
2990 * the top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
2991 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit
2992 * elements in Rs2.
2993 * The subtraction result is then added to the content of the corresponding 32-bit elements in Rd. If the
2994 * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
2995 * the OV bit is set to 1. The 32-bit results after saturation are written to Rd. The 16-bit contents of Rs1
2996 * and Rs2 are treated as signed integers.
2997 *
2998 * **Operations**:\n
2999 * ~~~
3000 * // KMADS
3001 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
3002 * // KMADRS
3003 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
3004 * // KMAXDS
3005 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
3006 * if (res[x] > (2^31)-1) {
3007 * res[x] = (2^31)-1;
3008 * OV = 1;
3009 * } else if (res[x] < -2^31) {
3010 * res[x] = -2^31;
3011 * OV = 1;
3012 * }
3013 * Rd.W[x] = res[x];
3014 * for RV32: x=0
3015 * for RV64: x=1...0
3016 * ~~~
3017 *
3018 * \param [in] t long type of value stored in t
3019 * \param [in] a unsigned long type of value stored in a
3020 * \param [in] b unsigned long type of value stored in b
3021 * \return value stored in long type
3022 */
__RV_KMADS(long t,unsigned long a,unsigned long b)3023 __STATIC_FORCEINLINE long __RV_KMADS(long t, unsigned long a, unsigned long b)
3024 {
3025 __ASM volatile("kmads %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3026 return t;
3027 }
3028 /* ===== Inline Function End for 3.40.1. KMADS ===== */
3029
3030 /* ===== Inline Function Start for 3.40.2. KMADRS ===== */
3031 /**
3032 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
3033 * \brief KMADRS (SIMD Saturating Signed Multiply Two Halfs & Reverse Subtract & Add)
3034 * \details
3035 * **Type**: SIMD
3036 *
3037 * **Syntax**:\n
3038 * ~~~
3039 * KMADS Rd, Rs1, Rs2
3040 * KMADRS Rd, Rs1, Rs2
3041 * KMAXDS Rd, Rs1, Rs2
3042 * ~~~
3043 *
3044 * **Purpose**:\n
3045 * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then
3046 * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
3047 * the corresponding 32-bit elements in a third register. The addition result may be saturated.
3048 * * KMADS: rd.W[x] + (top*top - bottom*bottom) (per 32-bit element)
3049 * * KMADRS: rd.W[x] + (bottom*bottom - top*top) (per 32-bit element)
3050 * * KMAXDS: rd.W[x] + (top*bottom - bottom*top) (per 32-bit element)
3051 *
3052 * **Description**:\n
3053 * For the `KMADS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
3054 * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
3055 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
3056 * elements in Rs2.
3057 * For the `KMADRS` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
3058 * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
3059 * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-
3060 * bit elements in Rs2.
3061 * For the `KMAXDS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
3062 * the top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
3063 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit
3064 * elements in Rs2.
3065 * The subtraction result is then added to the content of the corresponding 32-bit elements in Rd. If the
3066 * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
3067 * the OV bit is set to 1. The 32-bit results after saturation are written to Rd. The 16-bit contents of Rs1
3068 * and Rs2 are treated as signed integers.
3069 *
3070 * **Operations**:\n
3071 * ~~~
3072 * // KMADS
3073 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
3074 * // KMADRS
3075 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
3076 * // KMAXDS
3077 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
3078 * if (res[x] > (2^31)-1) {
3079 * res[x] = (2^31)-1;
3080 * OV = 1;
3081 * } else if (res[x] < -2^31) {
3082 * res[x] = -2^31;
3083 * OV = 1;
3084 * }
3085 * Rd.W[x] = res[x];
3086 * for RV32: x=0
3087 * for RV64: x=1...0
3088 * ~~~
3089 *
3090 * \param [in] t long type of value stored in t
3091 * \param [in] a unsigned long type of value stored in a
3092 * \param [in] b unsigned long type of value stored in b
3093 * \return value stored in long type
3094 */
__RV_KMADRS(long t,unsigned long a,unsigned long b)3095 __STATIC_FORCEINLINE long __RV_KMADRS(long t, unsigned long a, unsigned long b)
3096 {
3097 __ASM volatile("kmadrs %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3098 return t;
3099 }
3100 /* ===== Inline Function End for 3.40.2. KMADRS ===== */
3101
3102 /* ===== Inline Function Start for 3.40.3. KMAXDS ===== */
3103 /**
3104 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
3105 * \brief KMAXDS (SIMD Saturating Signed Crossed Multiply Two Halfs & Subtract & Add)
3106 * \details
3107 * **Type**: SIMD
3108 *
3109 * **Syntax**:\n
3110 * ~~~
3111 * KMADS Rd, Rs1, Rs2
3112 * KMADRS Rd, Rs1, Rs2
3113 * KMAXDS Rd, Rs1, Rs2
3114 * ~~~
3115 *
3116 * **Purpose**:\n
3117 * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then
3118 * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
3119 * the corresponding 32-bit elements in a third register. The addition result may be saturated.
3120 * * KMADS: rd.W[x] + (top*top - bottom*bottom) (per 32-bit element)
3121 * * KMADRS: rd.W[x] + (bottom*bottom - top*top) (per 32-bit element)
3122 * * KMAXDS: rd.W[x] + (top*bottom - bottom*top) (per 32-bit element)
3123 *
3124 * **Description**:\n
3125 * For the `KMADS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
3126 * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
3127 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
3128 * elements in Rs2.
3129 * For the `KMADRS` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
3130 * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
3131 * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-
3132 * bit elements in Rs2.
3133 * For the `KMAXDS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
3134 * the top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
3135 * multiplying the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit
3136 * elements in Rs2.
3137 * The subtraction result is then added to the content of the corresponding 32-bit elements in Rd. If the
3138 * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
3139 * the OV bit is set to 1. The 32-bit results after saturation are written to Rd. The 16-bit contents of Rs1
3140 * and Rs2 are treated as signed integers.
3141 *
3142 * **Operations**:\n
3143 * ~~~
3144 * // KMADS
3145 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
3146 * // KMADRS
3147 * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
3148 * // KMAXDS
3149 * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
3150 * if (res[x] > (2^31)-1) {
3151 * res[x] = (2^31)-1;
3152 * OV = 1;
3153 * } else if (res[x] < -2^31) {
3154 * res[x] = -2^31;
3155 * OV = 1;
3156 * }
3157 * Rd.W[x] = res[x];
3158 * for RV32: x=0
3159 * for RV64: x=1...0
3160 * ~~~
3161 *
3162 * \param [in] t long type of value stored in t
3163 * \param [in] a unsigned long type of value stored in a
3164 * \param [in] b unsigned long type of value stored in b
3165 * \return value stored in long type
3166 */
__RV_KMAXDS(long t,unsigned long a,unsigned long b)3167 __STATIC_FORCEINLINE long __RV_KMAXDS(long t, unsigned long a, unsigned long b)
3168 {
3169 __ASM volatile("kmaxds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3170 return t;
3171 }
3172 /* ===== Inline Function End for 3.40.3. KMAXDS ===== */
3173
3174 /* ===== Inline Function Start for 3.41. KMAR64 ===== */
3175 /**
3176 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
3177 * \brief KMAR64 (Signed Multiply and Saturating Add to 64-Bit Data)
3178 * \details
3179 * **Type**: DSP (64-bit Profile)
3180 *
3181 * **Syntax**:\n
3182 * ~~~
3183 * KMAR64 Rd, Rs1, Rs2
3184 * ~~~
3185 *
3186 * **Purpose**:\n
3187 * Multiply the 32-bit signed elements in two registers and add the 64-bit multiplication
3188 * results to the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is
3189 * saturated to the Q63 range and written back to the pair of registers (RV32) or the register (RV64).
3190 *
3191 * **RV32 Description**:\n
3192 * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It adds
3193 * the 64-bit multiplication result to the 64-bit signed data of an even/odd pair of registers specified by
3194 * Rd(4,1) with unlimited precision. If the 64-bit addition result is beyond the Q63 number range (-2^63 <=
3195 * Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The saturated result is written back
3196 * to the even/odd pair of registers specified by Rd(4,1).
3197 * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
3198 * pair includes register 2d and 2d+1.
3199 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
3200 * of the pair contains the low 32-bit of the result.
3201 *
3202 * **RV64 Description**:\n
3203 * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It
3204 * adds the 64-bit multiplication results to the 64-bit signed data of Rd with unlimited precision. If the
3205 * 64-bit addition result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range
3206 * and the OV bit is set to 1. The saturated result is written back to Rd.
3207 *
3208 * **Operations**:\n
3209 * ~~~
3210 * RV32:
3211 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
3212 * result = R[t_H].R[t_L] + (Rs1 * Rs2);
3213 * if (result > (2^63)-1) {
3214 * result = (2^63)-1; OV = 1;
3215 * } else if (result < -2^63) {
3216 * result = -2^63; OV = 1;
3217 * }
3218 * R[t_H].R[t_L] = result;
3219 * RV64:
3220 * // `result` has unlimited precision
3221 * result = Rd + (Rs1.W[0] * Rs2.W[0]) + (Rs1.W[1] * Rs2.W[1]);
3222 * if (result > (2^63)-1) {
3223 * result = (2^63)-1; OV = 1;
3224 * } else if (result < -2^63) {
3225 * result = -2^63; OV = 1;
3226 * }
3227 * Rd = result;
3228 * ~~~
3229 *
3230 * \param [in] t long long type of value stored in t
3231 * \param [in] a long type of value stored in a
3232 * \param [in] b long type of value stored in b
3233 * \return value stored in long long type
3234 */
__RV_KMAR64(long long t,long a,long b)3235 __STATIC_FORCEINLINE long long __RV_KMAR64(long long t, long a, long b)
3236 {
3237 __ASM volatile("kmar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3238 return t;
3239 }
3240 /* ===== Inline Function End for 3.41. KMAR64 ===== */
3241
3242 /* ===== Inline Function Start for 3.42.1. KMDA ===== */
3243 /**
3244 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
3245 * \brief KMDA (SIMD Signed Multiply Two Halfs and Add)
3246 * \details
3247 * **Type**: SIMD
3248 *
3249 * **Syntax**:\n
3250 * ~~~
3251 * KMDA Rd, Rs1, Rs2
3252 * KMXDA Rd, Rs1, Rs2
3253 * ~~~
3254 *
3255 * **Purpose**:\n
3256 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
3257 * adds the two 32-bit results together. The addition result may be saturated.
3258 * * KMDA: top*top + bottom*bottom (per 32-bit element)
3259 * * KMXDA: top*bottom + bottom*top (per 32-bit element)
3260 *
3261 * **Description**:\n
3262 * For the `KMDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
3263 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
3264 * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-
3265 * bit elements of Rs2.
3266 * For the `KMXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
3267 * with the top 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
3268 * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the
3269 * 32-bit elements of Rs2.
3270 * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^31-1.
3271 * The final results are written to Rd. The 16-bit contents are treated as signed integers.
3272 *
3273 * **Operations**:\n
3274 * ~~~
3275 * if Rs1.W[x] != 0x80008000) or (Rs2.W[x] != 0x80008000 { // KMDA Rd.W[x] = Rs1.W[x].H[1] *
3276 * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]; // KMXDA Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[0])
3277 * + (Rs1.W[x].H[0] * Rs2.W[x].H[1]; } else { Rd.W[x] = 0x7fffffff; OV = 1; } for RV32: x=0 for RV64:
3278 * x=1...0
3279 * ~~~
3280 *
3281 * \param [in] a unsigned long type of value stored in a
3282 * \param [in] b unsigned long type of value stored in b
3283 * \return value stored in long type
3284 */
__RV_KMDA(unsigned long a,unsigned long b)3285 __STATIC_FORCEINLINE long __RV_KMDA(unsigned long a, unsigned long b)
3286 {
3287 register long result;
3288 __ASM volatile("kmda %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
3289 return result;
3290 }
3291 /* ===== Inline Function End for 3.42.1. KMDA ===== */
3292
3293 /* ===== Inline Function Start for 3.42.2. KMXDA ===== */
3294 /**
3295 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
3296 * \brief KMXDA (SIMD Signed Crossed Multiply Two Halfs and Add)
3297 * \details
3298 * **Type**: SIMD
3299 *
3300 * **Syntax**:\n
3301 * ~~~
3302 * KMDA Rd, Rs1, Rs2
3303 * KMXDA Rd, Rs1, Rs2
3304 * ~~~
3305 *
3306 * **Purpose**:\n
3307 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
3308 * adds the two 32-bit results together. The addition result may be saturated.
3309 * * KMDA: top*top + bottom*bottom (per 32-bit element)
3310 * * KMXDA: top*bottom + bottom*top (per 32-bit element)
3311 *
3312 * **Description**:\n
3313 * For the `KMDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
3314 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
3315 * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-
3316 * bit elements of Rs2.
3317 * For the `KMXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
3318 * with the top 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
3319 * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the
3320 * 32-bit elements of Rs2.
3321 * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^31-1.
3322 * The final results are written to Rd. The 16-bit contents are treated as signed integers.
3323 *
3324 * **Operations**:\n
3325 * ~~~
3326 * if Rs1.W[x] != 0x80008000) or (Rs2.W[x] != 0x80008000 { // KMDA Rd.W[x] = Rs1.W[x].H[1] *
3327 * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]; // KMXDA Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[0])
3328 * + (Rs1.W[x].H[0] * Rs2.W[x].H[1]; } else { Rd.W[x] = 0x7fffffff; OV = 1; } for RV32: x=0 for RV64:
3329 * x=1...0
3330 * ~~~
3331 *
3332 * \param [in] a unsigned long type of value stored in a
3333 * \param [in] b unsigned long type of value stored in b
3334 * \return value stored in long type
3335 */
__RV_KMXDA(unsigned long a,unsigned long b)3336 __STATIC_FORCEINLINE long __RV_KMXDA(unsigned long a, unsigned long b)
3337 {
3338 register long result;
3339 __ASM volatile("kmxda %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
3340 return result;
3341 }
3342 /* ===== Inline Function End for 3.42.2. KMXDA ===== */
3343
3344 /* ===== Inline Function Start for 3.43.1. KMMAC ===== */
3345 /**
3346 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
3347 * \brief KMMAC (SIMD Saturating MSW Signed Multiply Word and Add)
3348 * \details
3349 * **Type**: SIMD
3350 *
3351 * **Syntax**:\n
3352 * ~~~
3353 * KMMAC Rd, Rs1, Rs2
3354 * KMMAC.u Rd, Rs1, Rs2
3355 * ~~~
3356 *
3357 * **Purpose**:\n
3358 * Multiply the signed 32-bit integer elements of two registers and add the most significant
3359 * 32-bit results with the signed 32-bit integer elements of a third register. The addition results are
3360 * saturated first and then written back to the third register. The `.u` form performs an additional
3361 * rounding up operation on the multiplication results before adding the most significant 32-bit part
3362 * of the results.
3363 *
3364 * **Description**:\n
3365 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
3366 * and adds the most significant 32-bit multiplication results with the signed 32-bit elements of Rd. If
3367 * the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range
3368 * and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the
3369 * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
3370 * adding a 1 to bit 31 of the results.
3371 *
3372 * **Operations**:\n
3373 * ~~~
3374 * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
3375 * if (`.u` form) {
3376 * Round[x][32:0] = Mres[x][63:31] + 1;
3377 * res[x] = Rd.W[x] + Round[x][32:1];
3378 * } else {
3379 * res[x] = Rd.W[x] + Mres[x][63:32];
3380 * }
3381 * if (res[x] > (2^31)-1) {
3382 * res[x] = (2^31)-1;
3383 * OV = 1;
3384 * } else if (res[x] < -2^31) {
3385 * res[x] = -2^31;
3386 * OV = 1;
3387 * }
3388 * Rd.W[x] = res[x];
3389 * for RV32: x=0
3390 * for RV64: x=1...0
3391 * ~~~
3392 *
3393 * \param [in] t long type of value stored in t
3394 * \param [in] a long type of value stored in a
3395 * \param [in] b long type of value stored in b
3396 * \return value stored in long type
3397 */
__RV_KMMAC(long t,long a,long b)3398 __STATIC_FORCEINLINE long __RV_KMMAC(long t, long a, long b)
3399 {
3400 __ASM volatile("kmmac %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3401 return t;
3402 }
3403 /* ===== Inline Function End for 3.43.1. KMMAC ===== */
3404
3405 /* ===== Inline Function Start for 3.43.2. KMMAC.u ===== */
3406 /**
3407 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
3408 * \brief KMMAC.u (SIMD Saturating MSW Signed Multiply Word and Add with Rounding)
3409 * \details
3410 * **Type**: SIMD
3411 *
3412 * **Syntax**:\n
3413 * ~~~
3414 * KMMAC Rd, Rs1, Rs2
3415 * KMMAC.u Rd, Rs1, Rs2
3416 * ~~~
3417 *
3418 * **Purpose**:\n
3419 * Multiply the signed 32-bit integer elements of two registers and add the most significant
3420 * 32-bit results with the signed 32-bit integer elements of a third register. The addition results are
3421 * saturated first and then written back to the third register. The `.u` form performs an additional
3422 * rounding up operation on the multiplication results before adding the most significant 32-bit part
3423 * of the results.
3424 *
3425 * **Description**:\n
3426 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
3427 * and adds the most significant 32-bit multiplication results with the signed 32-bit elements of Rd. If
3428 * the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range
3429 * and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the
3430 * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
3431 * adding a 1 to bit 31 of the results.
3432 *
3433 * **Operations**:\n
3434 * ~~~
3435 * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
3436 * if (`.u` form) {
3437 * Round[x][32:0] = Mres[x][63:31] + 1;
3438 * res[x] = Rd.W[x] + Round[x][32:1];
3439 * } else {
3440 * res[x] = Rd.W[x] + Mres[x][63:32];
3441 * }
3442 * if (res[x] > (2^31)-1) {
3443 * res[x] = (2^31)-1;
3444 * OV = 1;
3445 * } else if (res[x] < -2^31) {
3446 * res[x] = -2^31;
3447 * OV = 1;
3448 * }
3449 * Rd.W[x] = res[x];
3450 * for RV32: x=0
3451 * for RV64: x=1...0
3452 * ~~~
3453 *
3454 * \param [in] t long type of value stored in t
3455 * \param [in] a long type of value stored in a
3456 * \param [in] b long type of value stored in b
3457 * \return value stored in long type
3458 */
__RV_KMMAC_U(long t,long a,long b)3459 __STATIC_FORCEINLINE long __RV_KMMAC_U(long t, long a, long b)
3460 {
3461 __ASM volatile("kmmac.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3462 return t;
3463 }
3464 /* ===== Inline Function End for 3.43.2. KMMAC.u ===== */
3465
3466 /* ===== Inline Function Start for 3.44.1. KMMAWB ===== */
3467 /**
3468 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
3469 * \brief KMMAWB (SIMD Saturating MSW Signed Multiply Word and Bottom Half and Add)
3470 * \details
3471 * **Type**: SIMD
3472 *
3473 * **Syntax**:\n
3474 * ~~~
3475 * KMMAWB Rd, Rs1, Rs2
3476 * KMMAWB.u Rd, Rs1, Rs2
3477 * ~~~
3478 *
3479 * **Purpose**:\n
3480 * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
3481 * corresponding 32-bit elements of another register and add the most significant 32-bit results with
3482 * the corresponding signed 32-bit elements of a third register. The addition result is written to the
3483 * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication
3484 * results from the most significant discarded bit before the addition operations.
3485 *
3486 * **Description**:\n
3487 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content
3488 * of the corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication
3489 * results with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31
3490 * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results
3491 * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the
3492 * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to
3493 * bit 15 of the result before the addition operations.
3494 *
3495 * **Operations**:\n
3496 * ~~~
3497 * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0];
3498 * if (`.u` form) {
3499 * Round[x][32:0] = Mres[x][47:15] + 1;
3500 * res[x] = Rd.W[x] + Round[x][32:1];
3501 * } else {
3502 * res[x] = Rd.W[x] + Mres[x][47:16];
3503 * }
3504 * if (res[x] > (2^31)-1) {
3505 * res[x] = (2^31)-1;
3506 * OV = 1;
3507 * } else if (res[x] < -2^31) {
3508 * res[x] = -2^31;
3509 * OV = 1;
3510 * }
3511 * Rd.W[x] = res[x];
3512 * for RV32: x=0
3513 * for RV64: x=1...0
3514 * ~~~
3515 *
3516 * \param [in] t long type of value stored in t
3517 * \param [in] a unsigned long type of value stored in a
3518 * \param [in] b unsigned long type of value stored in b
3519 * \return value stored in long type
3520 */
__RV_KMMAWB(long t,unsigned long a,unsigned long b)3521 __STATIC_FORCEINLINE long __RV_KMMAWB(long t, unsigned long a, unsigned long b)
3522 {
3523 __ASM volatile("kmmawb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3524 return t;
3525 }
3526 /* ===== Inline Function End for 3.44.1. KMMAWB ===== */
3527
3528 /* ===== Inline Function Start for 3.44.2. KMMAWB.u ===== */
3529 /**
3530 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
3531 * \brief KMMAWB.u (SIMD Saturating MSW Signed Multiply Word and Bottom Half and Add with Rounding)
3532 * \details
3533 * **Type**: SIMD
3534 *
3535 * **Syntax**:\n
3536 * ~~~
3537 * KMMAWB Rd, Rs1, Rs2
3538 * KMMAWB.u Rd, Rs1, Rs2
3539 * ~~~
3540 *
3541 * **Purpose**:\n
3542 * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
3543 * corresponding 32-bit elements of another register and add the most significant 32-bit results with
3544 * the corresponding signed 32-bit elements of a third register. The addition result is written to the
3545 * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication
3546 * results from the most significant discarded bit before the addition operations.
3547 *
3548 * **Description**:\n
3549 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content
3550 * of the corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication
3551 * results with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31
3552 * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results
3553 * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the
3554 * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to
3555 * bit 15 of the result before the addition operations.
3556 *
3557 * **Operations**:\n
3558 * ~~~
3559 * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0];
3560 * if (`.u` form) {
3561 * Round[x][32:0] = Mres[x][47:15] + 1;
3562 * res[x] = Rd.W[x] + Round[x][32:1];
3563 * } else {
3564 * res[x] = Rd.W[x] + Mres[x][47:16];
3565 * }
3566 * if (res[x] > (2^31)-1) {
3567 * res[x] = (2^31)-1;
3568 * OV = 1;
3569 * } else if (res[x] < -2^31) {
3570 * res[x] = -2^31;
3571 * OV = 1;
3572 * }
3573 * Rd.W[x] = res[x];
3574 * for RV32: x=0
3575 * for RV64: x=1...0
3576 * ~~~
3577 *
3578 * \param [in] t long type of value stored in t
3579 * \param [in] a unsigned long type of value stored in a
3580 * \param [in] b unsigned long type of value stored in b
3581 * \return value stored in long type
3582 */
__RV_KMMAWB_U(long t,unsigned long a,unsigned long b)3583 __STATIC_FORCEINLINE long __RV_KMMAWB_U(long t, unsigned long a, unsigned long b)
3584 {
3585 __ASM volatile("kmmawb.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3586 return t;
3587 }
3588 /* ===== Inline Function End for 3.44.2. KMMAWB.u ===== */
3589
3590 /* ===== Inline Function Start for 3.45.1. KMMAWB2 ===== */
3591 /**
3592 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
3593 * \brief KMMAWB2 (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2 and Add)
3594 * \details
3595 * **Type**: SIMD
3596 *
3597 * **Syntax**:\n
3598 * ~~~
3599 * KMMAWB2 Rd, Rs1, Rs2
3600 * KMMAWB2.u Rd, Rs1, Rs2
3601 * ~~~
3602 *
3603 * **Purpose**:\n
3604 * Multiply the signed 32-bit elements of one register and the bottom 16-bit of the
3605 * corresponding 32-bit elements of another register, double the multiplication results and add the
3606 * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third
3607 * register. The saturated addition result is written to the corresponding 32-bit elements of the third
3608 * register. The `.u` form rounds up the multiplication results from the most significant discarded bit
3609 * before the addition operations.
3610 *
3611 * **Description**:\n
3612 * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15
3613 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
3614 * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed
3615 * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
3616 * saturated to the range and the OV bit is set to 1. The results after saturation are written to the
3617 * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant
3618 * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of
3619 * the result before the addition operations.
3620 *
3621 * **Operations**:\n
3622 * ~~~
3623 * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) {
3624 * addop.W[x] = 0x7fffffff;
3625 * OV = 1;
3626 * } else {
3627 * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0];
3628 * if (`.u` form) {
3629 * Mres[x][47:14] = Mres[x][47:14] + 1;
3630 * }
3631 * addop.W[x] = Mres[x][46:15]; // doubling
3632 * }
3633 * res[x] = Rd.W[x] + addop.W[x];
3634 * if (res[x] > (2^31)-1) {
3635 * res[x] = (2^31)-1;
3636 * OV = 1;
3637 * } else if (res[x] < -2^31) {
3638 * res[x] = -2^31;
3639 * OV = 1;
3640 * }
3641 * Rd.W[x] = res[x];
3642 * for RV32: x=0
3643 * for RV64: x=1...0
3644 * ~~~
3645 *
3646 * \param [in] t long type of value stored in t
3647 * \param [in] a unsigned long type of value stored in a
3648 * \param [in] b unsigned long type of value stored in b
3649 * \return value stored in long type
3650 */
__RV_KMMAWB2(long t,unsigned long a,unsigned long b)3651 __STATIC_FORCEINLINE long __RV_KMMAWB2(long t, unsigned long a, unsigned long b)
3652 {
3653 __ASM volatile("kmmawb2 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3654 return t;
3655 }
3656 /* ===== Inline Function End for 3.45.1. KMMAWB2 ===== */
3657
3658 /* ===== Inline Function Start for 3.45.2. KMMAWB2.u ===== */
3659 /**
3660 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
3661 * \brief KMMAWB2.u (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2 and Add with Rounding)
3662 * \details
3663 * **Type**: SIMD
3664 *
3665 * **Syntax**:\n
3666 * ~~~
3667 * KMMAWB2 Rd, Rs1, Rs2
3668 * KMMAWB2.u Rd, Rs1, Rs2
3669 * ~~~
3670 *
3671 * **Purpose**:\n
3672 * Multiply the signed 32-bit elements of one register and the bottom 16-bit of the
3673 * corresponding 32-bit elements of another register, double the multiplication results and add the
3674 * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third
3675 * register. The saturated addition result is written to the corresponding 32-bit elements of the third
3676 * register. The `.u` form rounds up the multiplication results from the most significant discarded bit
3677 * before the addition operations.
3678 *
3679 * **Description**:\n
3680 * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15
3681 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
3682 * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed
3683 * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
3684 * saturated to the range and the OV bit is set to 1. The results after saturation are written to the
3685 * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant
3686 * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of
3687 * the result before the addition operations.
3688 *
3689 * **Operations**:\n
3690 * ~~~
3691 * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) {
3692 * addop.W[x] = 0x7fffffff;
3693 * OV = 1;
3694 * } else {
3695 * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0];
3696 * if (`.u` form) {
3697 * Mres[x][47:14] = Mres[x][47:14] + 1;
3698 * }
3699 * addop.W[x] = Mres[x][46:15]; // doubling
3700 * }
3701 * res[x] = Rd.W[x] + addop.W[x];
3702 * if (res[x] > (2^31)-1) {
3703 * res[x] = (2^31)-1;
3704 * OV = 1;
3705 * } else if (res[x] < -2^31) {
3706 * res[x] = -2^31;
3707 * OV = 1;
3708 * }
3709 * Rd.W[x] = res[x];
3710 * for RV32: x=0
3711 * for RV64: x=1...0
3712 * ~~~
3713 *
3714 * \param [in] t long type of value stored in t
3715 * \param [in] a unsigned long type of value stored in a
3716 * \param [in] b unsigned long type of value stored in b
3717 * \return value stored in long type
3718 */
__RV_KMMAWB2_U(long t,unsigned long a,unsigned long b)3719 __STATIC_FORCEINLINE long __RV_KMMAWB2_U(long t, unsigned long a, unsigned long b)
3720 {
3721 __ASM volatile("kmmawb2.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3722 return t;
3723 }
3724 /* ===== Inline Function End for 3.45.2. KMMAWB2.u ===== */
3725
3726 /* ===== Inline Function Start for 3.46.1. KMMAWT ===== */
3727 /**
3728 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
3729 * \brief KMMAWT (SIMD Saturating MSW Signed Multiply Word and Top Half and Add)
3730 * \details
3731 * **Type**: SIMD
3732 *
3733 * **Syntax**:\n
3734 * ~~~
3735 * KMMAWT Rd, Rs1, Rs2
3736 * KMMAWT.u Rd Rs1, Rs2
3737 * ~~~
3738 *
3739 * **Purpose**:\n
3740 * Multiply the signed 32-bit integer elements of one register and the signed top 16-bit of the
3741 * corresponding 32-bit elements of another register and add the most significant 32-bit results with
3742 * the corresponding signed 32-bit elements of a third register. The addition results are written to the
3743 * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication
3744 * results from the most significant discarded bit before the addition operations.
3745 *
3746 * **Description**:\n
3747 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed top 16-bit of the
3748 * corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication results
3749 * with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31
3750 * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results
3751 * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the
3752 * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to
3753 * bit 15 of the result before the addition operations.
3754 *
3755 * **Operations**:\n
3756 * ~~~
3757 * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1];
3758 * if (`.u` form) {
3759 * Round[x][32:0] = Mres[x][47:15] + 1;
3760 * res[x] = Rd.W[x] + Round[x][32:1];
3761 * } else {
3762 * res[x] = Rd.W[x] + Mres[x][47:16];
3763 * }
3764 * if (res[x] > (2^31)-1) {
3765 * res[x] = (2^31)-1;
3766 * OV = 1;
3767 * } else if (res[x] < -2^31) {
3768 * res[x] = -2^31;
3769 * OV = 1;
3770 * }
3771 * Rd.W[x] = res[x];
3772 * for RV32: x=0
3773 * for RV64: x=1...0
3774 * ~~~
3775 *
3776 * \param [in] t long type of value stored in t
3777 * \param [in] a unsigned long type of value stored in a
3778 * \param [in] b unsigned long type of value stored in b
3779 * \return value stored in long type
3780 */
__RV_KMMAWT(long t,unsigned long a,unsigned long b)3781 __STATIC_FORCEINLINE long __RV_KMMAWT(long t, unsigned long a, unsigned long b)
3782 {
3783 __ASM volatile("kmmawt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3784 return t;
3785 }
3786 /* ===== Inline Function End for 3.46.1. KMMAWT ===== */
3787
3788 /* ===== Inline Function Start for 3.46.2. KMMAWT.u ===== */
3789 /**
3790 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
3791 * \brief KMMAWT.u (SIMD Saturating MSW Signed Multiply Word and Top Half and Add with Rounding)
3792 * \details
3793 * **Type**: SIMD
3794 *
3795 * **Syntax**:\n
3796 * ~~~
3797 * KMMAWT Rd, Rs1, Rs2
3798 * KMMAWT.u Rd Rs1, Rs2
3799 * ~~~
3800 *
3801 * **Purpose**:\n
3802 * Multiply the signed 32-bit integer elements of one register and the signed top 16-bit of the
3803 * corresponding 32-bit elements of another register and add the most significant 32-bit results with
3804 * the corresponding signed 32-bit elements of a third register. The addition results are written to the
3805 * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication
3806 * results from the most significant discarded bit before the addition operations.
3807 *
3808 * **Description**:\n
3809 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed top 16-bit of the
3810 * corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication results
3811 * with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31
3812 * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results
3813 * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the
3814 * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to
3815 * bit 15 of the result before the addition operations.
3816 *
3817 * **Operations**:\n
3818 * ~~~
3819 * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1];
3820 * if (`.u` form) {
3821 * Round[x][32:0] = Mres[x][47:15] + 1;
3822 * res[x] = Rd.W[x] + Round[x][32:1];
3823 * } else {
3824 * res[x] = Rd.W[x] + Mres[x][47:16];
3825 * }
3826 * if (res[x] > (2^31)-1) {
3827 * res[x] = (2^31)-1;
3828 * OV = 1;
3829 * } else if (res[x] < -2^31) {
3830 * res[x] = -2^31;
3831 * OV = 1;
3832 * }
3833 * Rd.W[x] = res[x];
3834 * for RV32: x=0
3835 * for RV64: x=1...0
3836 * ~~~
3837 *
3838 * \param [in] t long type of value stored in t
3839 * \param [in] a unsigned long type of value stored in a
3840 * \param [in] b unsigned long type of value stored in b
3841 * \return value stored in long type
3842 */
__RV_KMMAWT_U(long t,unsigned long a,unsigned long b)3843 __STATIC_FORCEINLINE long __RV_KMMAWT_U(long t, unsigned long a, unsigned long b)
3844 {
3845 __ASM volatile("kmmawt.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3846 return t;
3847 }
3848 /* ===== Inline Function End for 3.46.2. KMMAWT.u ===== */
3849
3850 /* ===== Inline Function Start for 3.47.1. KMMAWT2 ===== */
3851 /**
3852 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
3853 * \brief KMMAWT2 (SIMD Saturating MSW Signed Multiply Word and Top Half & 2 and Add)
3854 * \details
3855 * **Type**: SIMD
3856 *
3857 * **Syntax**:\n
3858 * ~~~
3859 * KMMAWT2 Rd, Rs1, Rs2
3860 * KMMAWT2.u Rd, Rs1, Rs2
3861 * ~~~
3862 *
3863 * **Purpose**:\n
3864 * Multiply the signed 32-bit elements of one register and the top 16-bit of the
3865 * corresponding 32-bit elements of another register, double the multiplication results and add the
3866 * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third
3867 * register. The saturated addition result is written to the corresponding 32-bit elements of the third
3868 * register. The `.u` form rounds up the multiplication results from the most significant discarded bit
3869 * before the addition operations.
3870 *
3871 * **Description**:\n
3872 * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15
3873 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
3874 * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed
3875 * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
3876 * saturated to the range and the OV bit is set to 1. The results after saturation are written to the
3877 * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant
3878 * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of
3879 * the result before the addition operations.
3880 *
3881 * **Operations**:\n
3882 * ~~~
3883 * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) {
3884 * addop.W[x] = 0x7fffffff;
3885 * OV = 1;
3886 * } else {
3887 * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1];
3888 * if (`.u` form) {
3889 * Mres[x][47:14] = Mres[x][47:14] + 1;
3890 * }
3891 * addop.W[x] = Mres[x][46:15]; // doubling
3892 * }
3893 * res[x] = Rd.W[x] + addop.W[x];
3894 * if (res[x] > (2^31)-1) {
3895 * res[x] = (2^31)-1;
3896 * OV = 1;
3897 * } else if (res[x] < -2^31) {
3898 * res[x] = -2^31;
3899 * OV = 1;
3900 * }
3901 * Rd.W[x] = res[x];
3902 * for RV32: x=0
3903 * for RV64: x=1...0
3904 * ~~~
3905 *
3906 * \param [in] t long type of value stored in t
3907 * \param [in] a unsigned long type of value stored in a
3908 * \param [in] b unsigned long type of value stored in b
3909 * \return value stored in long type
3910 */
__RV_KMMAWT2(long t,unsigned long a,unsigned long b)3911 __STATIC_FORCEINLINE long __RV_KMMAWT2(long t, unsigned long a, unsigned long b)
3912 {
3913 __ASM volatile("kmmawt2 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3914 return t;
3915 }
3916 /* ===== Inline Function End for 3.47.1. KMMAWT2 ===== */
3917
3918 /* ===== Inline Function Start for 3.47.2. KMMAWT2.u ===== */
3919 /**
3920 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
3921 * \brief KMMAWT2.u (SIMD Saturating MSW Signed Multiply Word and Top Half & 2 and Add with Rounding)
3922 * \details
3923 * **Type**: SIMD
3924 *
3925 * **Syntax**:\n
3926 * ~~~
3927 * KMMAWT2 Rd, Rs1, Rs2
3928 * KMMAWT2.u Rd, Rs1, Rs2
3929 * ~~~
3930 *
3931 * **Purpose**:\n
3932 * Multiply the signed 32-bit elements of one register and the top 16-bit of the
3933 * corresponding 32-bit elements of another register, double the multiplication results and add the
3934 * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third
3935 * register. The saturated addition result is written to the corresponding 32-bit elements of the third
3936 * register. The `.u` form rounds up the multiplication results from the most significant discarded bit
3937 * before the addition operations.
3938 *
3939 * **Description**:\n
3940 * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15
3941 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
3942 * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed
3943 * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
3944 * saturated to the range and the OV bit is set to 1. The results after saturation are written to the
3945 * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant
3946 * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of
3947 * the result before the addition operations.
3948 *
3949 * **Operations**:\n
3950 * ~~~
3951 * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) {
3952 * addop.W[x] = 0x7fffffff;
3953 * OV = 1;
3954 * } else {
3955 * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1];
3956 * if (`.u` form) {
3957 * Mres[x][47:14] = Mres[x][47:14] + 1;
3958 * }
3959 * addop.W[x] = Mres[x][46:15]; // doubling
3960 * }
3961 * res[x] = Rd.W[x] + addop.W[x];
3962 * if (res[x] > (2^31)-1) {
3963 * res[x] = (2^31)-1;
3964 * OV = 1;
3965 * } else if (res[x] < -2^31) {
3966 * res[x] = -2^31;
3967 * OV = 1;
3968 * }
3969 * Rd.W[x] = res[x];
3970 * for RV32: x=0
3971 * for RV64: x=1...0
3972 * ~~~
3973 *
3974 * \param [in] t long type of value stored in t
3975 * \param [in] a unsigned long type of value stored in a
3976 * \param [in] b unsigned long type of value stored in b
3977 * \return value stored in long type
3978 */
__RV_KMMAWT2_U(long t,unsigned long a,unsigned long b)3979 __STATIC_FORCEINLINE long __RV_KMMAWT2_U(long t, unsigned long a, unsigned long b)
3980 {
3981 __ASM volatile("kmmawt2.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
3982 return t;
3983 }
3984 /* ===== Inline Function End for 3.47.2. KMMAWT2.u ===== */
3985
3986 /* ===== Inline Function Start for 3.48.1. KMMSB ===== */
3987 /**
3988 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
3989 * \brief KMMSB (SIMD Saturating MSW Signed Multiply Word and Subtract)
3990 * \details
3991 * **Type**: SIMD
3992 *
3993 * **Syntax**:\n
3994 * ~~~
3995 * KMMSB Rd, Rs1, Rs2
3996 * KMMSB.u Rd, Rs1, Rs2
3997 * ~~~
3998 *
3999 * **Purpose**:\n
4000 * Multiply the signed 32-bit integer elements of two registers and subtract the most
4001 * significant 32-bit results from the signed 32-bit elements of a third register. The subtraction results
4002 * are written to the third register. The `.u` form performs an additional rounding up operation on
4003 * the multiplication results before subtracting the most significant 32-bit part of the results.
4004 *
4005 * **Description**:\n
4006 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
4007 * and subtracts the most significant 32-bit multiplication results from the signed 32-bit elements of
4008 * Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the
4009 * range and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the
4010 * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
4011 * adding a 1 to bit 31 of the results.
4012 *
4013 * **Operations**:\n
4014 * ~~~
4015 * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
4016 * if (`.u` form) {
4017 * Round[x][32:0] = Mres[x][63:31] + 1;
4018 * res[x] = Rd.W[x] - Round[x][32:1];
4019 * } else {
4020 * res[x] = Rd.W[x] - Mres[x][63:32];
4021 * }
4022 * if (res[x] > (2^31)-1) {
4023 * res[x] = (2^31)-1;
4024 * OV = 1;
4025 * } else if (res[x] < -2^31) {
4026 * res[x] = -2^31;
4027 * OV = 1;
4028 * }
4029 * Rd.W[x] = res[x];
4030 * for RV32: x=0
4031 * for RV64: x=1...0
4032 * ~~~
4033 *
4034 * \param [in] t long type of value stored in t
4035 * \param [in] a long type of value stored in a
4036 * \param [in] b long type of value stored in b
4037 * \return value stored in long type
4038 */
__RV_KMMSB(long t,long a,long b)4039 __STATIC_FORCEINLINE long __RV_KMMSB(long t, long a, long b)
4040 {
4041 __ASM volatile("kmmsb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
4042 return t;
4043 }
4044 /* ===== Inline Function End for 3.48.1. KMMSB ===== */
4045
4046 /* ===== Inline Function Start for 3.48.2. KMMSB.u ===== */
4047 /**
4048 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
4049 * \brief KMMSB.u (SIMD Saturating MSW Signed Multiply Word and Subtraction with Rounding)
4050 * \details
4051 * **Type**: SIMD
4052 *
4053 * **Syntax**:\n
4054 * ~~~
4055 * KMMSB Rd, Rs1, Rs2
4056 * KMMSB.u Rd, Rs1, Rs2
4057 * ~~~
4058 *
4059 * **Purpose**:\n
4060 * Multiply the signed 32-bit integer elements of two registers and subtract the most
4061 * significant 32-bit results from the signed 32-bit elements of a third register. The subtraction results
4062 * are written to the third register. The `.u` form performs an additional rounding up operation on
4063 * the multiplication results before subtracting the most significant 32-bit part of the results.
4064 *
4065 * **Description**:\n
4066 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
4067 * and subtracts the most significant 32-bit multiplication results from the signed 32-bit elements of
4068 * Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the
4069 * range and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the
4070 * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
4071 * adding a 1 to bit 31 of the results.
4072 *
4073 * **Operations**:\n
4074 * ~~~
4075 * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
4076 * if (`.u` form) {
4077 * Round[x][32:0] = Mres[x][63:31] + 1;
4078 * res[x] = Rd.W[x] - Round[x][32:1];
4079 * } else {
4080 * res[x] = Rd.W[x] - Mres[x][63:32];
4081 * }
4082 * if (res[x] > (2^31)-1) {
4083 * res[x] = (2^31)-1;
4084 * OV = 1;
4085 * } else if (res[x] < -2^31) {
4086 * res[x] = -2^31;
4087 * OV = 1;
4088 * }
4089 * Rd.W[x] = res[x];
4090 * for RV32: x=0
4091 * for RV64: x=1...0
4092 * ~~~
4093 *
4094 * \param [in] t long type of value stored in t
4095 * \param [in] a long type of value stored in a
4096 * \param [in] b long type of value stored in b
4097 * \return value stored in long type
4098 */
__RV_KMMSB_U(long t,long a,long b)4099 __STATIC_FORCEINLINE long __RV_KMMSB_U(long t, long a, long b)
4100 {
4101 __ASM volatile("kmmsb.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
4102 return t;
4103 }
4104 /* ===== Inline Function End for 3.48.2. KMMSB.u ===== */
4105
4106 /* ===== Inline Function Start for 3.49.1. KMMWB2 ===== */
4107 /**
4108 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
4109 * \brief KMMWB2 (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2)
4110 * \details
4111 * **Type**: SIMD
4112 *
4113 * **Syntax**:\n
4114 * ~~~
4115 * KMMWB2 Rd, Rs1, Rs2
4116 * KMMWB2.u Rd, Rs1, Rs2
4117 * ~~~
4118 *
4119 * **Purpose**:\n
4120 * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
4121 * corresponding 32-bit elements of another register, double the multiplication results and write the
4122 * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u`
4123 * form rounds up the results from the most significant discarded bit.
4124 *
4125 * **Description**:\n
4126 * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15
4127 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
4128 * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit
4129 * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit
4130 * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results.
4131 *
4132 * **Operations**:\n
4133 * ~~~
4134 * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) {
4135 * Rd.W[x] = 0x7fffffff;
4136 * OV = 1;
4137 * } else {
4138 * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0];
4139 * if (`.u` form) {
4140 * Round[x][32:0] = Mres[x][46:14] + 1;
4141 * Rd.W[x] = Round[x][32:1];
4142 * } else {
4143 * Rd.W[x] = Mres[x][46:15];
4144 * }
4145 * }
4146 * for RV32: x=0
4147 * for RV64: x=1...0
4148 * ~~~
4149 *
4150 * \param [in] a long type of value stored in a
4151 * \param [in] b unsigned long type of value stored in b
4152 * \return value stored in long type
4153 */
__RV_KMMWB2(long a,unsigned long b)4154 __STATIC_FORCEINLINE long __RV_KMMWB2(long a, unsigned long b)
4155 {
4156 register long result;
4157 __ASM volatile("kmmwb2 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
4158 return result;
4159 }
4160 /* ===== Inline Function End for 3.49.1. KMMWB2 ===== */
4161
4162 /* ===== Inline Function Start for 3.49.2. KMMWB2.u ===== */
4163 /**
4164 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
4165 * \brief KMMWB2.u (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2 with Rounding)
4166 * \details
4167 * **Type**: SIMD
4168 *
4169 * **Syntax**:\n
4170 * ~~~
4171 * KMMWB2 Rd, Rs1, Rs2
4172 * KMMWB2.u Rd, Rs1, Rs2
4173 * ~~~
4174 *
4175 * **Purpose**:\n
4176 * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
4177 * corresponding 32-bit elements of another register, double the multiplication results and write the
4178 * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u`
4179 * form rounds up the results from the most significant discarded bit.
4180 *
4181 * **Description**:\n
4182 * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15
4183 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
4184 * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit
4185 * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit
4186 * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results.
4187 *
4188 * **Operations**:\n
4189 * ~~~
4190 * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) {
4191 * Rd.W[x] = 0x7fffffff;
4192 * OV = 1;
4193 * } else {
4194 * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0];
4195 * if (`.u` form) {
4196 * Round[x][32:0] = Mres[x][46:14] + 1;
4197 * Rd.W[x] = Round[x][32:1];
4198 * } else {
4199 * Rd.W[x] = Mres[x][46:15];
4200 * }
4201 * }
4202 * for RV32: x=0
4203 * for RV64: x=1...0
4204 * ~~~
4205 *
4206 * \param [in] a long type of value stored in a
4207 * \param [in] b unsigned long type of value stored in b
4208 * \return value stored in long type
4209 */
__RV_KMMWB2_U(long a,unsigned long b)4210 __STATIC_FORCEINLINE long __RV_KMMWB2_U(long a, unsigned long b)
4211 {
4212 register long result;
4213 __ASM volatile("kmmwb2.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
4214 return result;
4215 }
4216 /* ===== Inline Function End for 3.49.2. KMMWB2.u ===== */
4217
4218 /* ===== Inline Function Start for 3.50.1. KMMWT2 ===== */
4219 /**
4220 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
4221 * \brief KMMWT2 (SIMD Saturating MSW Signed Multiply Word and Top Half & 2)
4222 * \details
4223 * **Type**: SIMD
4224 *
4225 * **Syntax**:\n
4226 * ~~~
4227 * KMMWT2 Rd, Rs1, Rs2
4228 * KMMWT2.u Rd, Rs1, Rs2
4229 * ~~~
4230 *
4231 * **Purpose**:\n
4232 * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the
4233 * corresponding 32-bit elements of another register, double the multiplication results and write the
4234 * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u`
4235 * form rounds up the results from the most significant discarded bit.
4236 *
4237 * **Description**:\n
4238 * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15
4239 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
4240 * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit
4241 * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit
4242 * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results.
4243 *
4244 * **Operations**:\n
4245 * ~~~
4246 * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) {
4247 * Rd.W[x] = 0x7fffffff;
4248 * OV = 1;
4249 * } else {
4250 * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1];
4251 * if (`.u` form) {
4252 * Round[x][32:0] = Mres[x][46:14] + 1;
4253 * Rd.W[x] = Round[x][32:1];
4254 * } else {
4255 * Rd.W[x] = Mres[x][46:15];
4256 * }
4257 * }
4258 * for RV32: x=0
4259 * for RV64: x=1...0
4260 * ~~~
4261 *
4262 * \param [in] a long type of value stored in a
4263 * \param [in] b unsigned long type of value stored in b
4264 * \return value stored in long type
4265 */
__RV_KMMWT2(long a,unsigned long b)4266 __STATIC_FORCEINLINE long __RV_KMMWT2(long a, unsigned long b)
4267 {
4268 register long result;
4269 __ASM volatile("kmmwt2 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
4270 return result;
4271 }
4272 /* ===== Inline Function End for 3.50.1. KMMWT2 ===== */
4273
4274 /* ===== Inline Function Start for 3.50.2. KMMWT2.u ===== */
4275 /**
4276 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
4277 * \brief KMMWT2.u (SIMD Saturating MSW Signed Multiply Word and Top Half & 2 with Rounding)
4278 * \details
4279 * **Type**: SIMD
4280 *
4281 * **Syntax**:\n
4282 * ~~~
4283 * KMMWT2 Rd, Rs1, Rs2
4284 * KMMWT2.u Rd, Rs1, Rs2
4285 * ~~~
4286 *
4287 * **Purpose**:\n
4288 * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the
4289 * corresponding 32-bit elements of another register, double the multiplication results and write the
4290 * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u`
4291 * form rounds up the results from the most significant discarded bit.
4292 *
4293 * **Description**:\n
4294 * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15
4295 * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
4296 * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit
4297 * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit
4298 * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results.
4299 *
4300 * **Operations**:\n
4301 * ~~~
4302 * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) {
4303 * Rd.W[x] = 0x7fffffff;
4304 * OV = 1;
4305 * } else {
4306 * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1];
4307 * if (`.u` form) {
4308 * Round[x][32:0] = Mres[x][46:14] + 1;
4309 * Rd.W[x] = Round[x][32:1];
4310 * } else {
4311 * Rd.W[x] = Mres[x][46:15];
4312 * }
4313 * }
4314 * for RV32: x=0
4315 * for RV64: x=1...0
4316 * ~~~
4317 *
4318 * \param [in] a long type of value stored in a
4319 * \param [in] b unsigned long type of value stored in b
4320 * \return value stored in long type
4321 */
__RV_KMMWT2_U(long a,unsigned long b)4322 __STATIC_FORCEINLINE long __RV_KMMWT2_U(long a, unsigned long b)
4323 {
4324 register long result;
4325 __ASM volatile("kmmwt2.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
4326 return result;
4327 }
4328 /* ===== Inline Function End for 3.50.2. KMMWT2.u ===== */
4329
4330 /* ===== Inline Function Start for 3.51.1. KMSDA ===== */
4331 /**
4332 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
4333 * \brief KMSDA (SIMD Saturating Signed Multiply Two Halfs & Add & Subtract)
4334 * \details
4335 * **Type**: SIMD
4336 *
4337 * **Syntax**:\n
4338 * ~~~
4339 * KMSDA Rd, Rs1, Rs2
4340 * KMSXDA Rd, Rs1, Rs2
4341 * ~~~
4342 *
4343 * **Purpose**:\n
4344 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
4345 * subtracts the two 32-bit results from the corresponding 32-bit elements of a third register. The
4346 * subtraction result may be saturated.
4347 * * KMSDA: rd.W[x] - top*top - bottom*bottom (per 32-bit element)
4348 * * KMSXDA: rd.W[x] - top*bottom - bottom*top (per 32-bit element)
4349 *
4350 * **Description**:\n
4351 * For the `KMSDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
4352 * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
4353 * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
4354 * For the `KMSXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
4355 * with the top 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of the
4356 * 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2.
4357 * The two 32-bit multiplication results are then subtracted from the content of the corresponding 32-
4358 * bit elements of Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
4359 * saturated to the range and the OV bit is set to 1. The results after saturation are written to Rd. The
4360 * 16-bit contents are treated as signed integers.
4361 *
4362 * **Operations**:\n
4363 * ~~~
4364 * // KMSDA
4365 * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
4366 * // KMSXDA
4367 * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
4368 * if (res[x] > (2^31)-1) {
4369 * res[x] = (2^31)-1;
4370 * OV = 1;
4371 * } else if (res[x] < -2^31) {
4372 * res[x] = -2^31;
4373 * OV = 1;
4374 * }
4375 * Rd.W[x] = res[x];
4376 * for RV32: x=0
4377 * for RV64: x=1...0
4378 * ~~~
4379 *
4380 * \param [in] t long type of value stored in t
4381 * \param [in] a unsigned long type of value stored in a
4382 * \param [in] b unsigned long type of value stored in b
4383 * \return value stored in long type
4384 */
__RV_KMSDA(long t,unsigned long a,unsigned long b)4385 __STATIC_FORCEINLINE long __RV_KMSDA(long t, unsigned long a, unsigned long b)
4386 {
4387 __ASM volatile("kmsda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
4388 return t;
4389 }
4390 /* ===== Inline Function End for 3.51.1. KMSDA ===== */
4391
4392 /* ===== Inline Function Start for 3.51.2. KMSXDA ===== */
4393 /**
4394 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
4395 * \brief KMSXDA (SIMD Saturating Signed Crossed Multiply Two Halfs & Add & Subtract)
4396 * \details
4397 * **Type**: SIMD
4398 *
4399 * **Syntax**:\n
4400 * ~~~
4401 * KMSDA Rd, Rs1, Rs2
4402 * KMSXDA Rd, Rs1, Rs2
4403 * ~~~
4404 *
4405 * **Purpose**:\n
4406 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
4407 * subtracts the two 32-bit results from the corresponding 32-bit elements of a third register. The
4408 * subtraction result may be saturated.
4409 * * KMSDA: rd.W[x] - top*top - bottom*bottom (per 32-bit element)
4410 * * KMSXDA: rd.W[x] - top*bottom - bottom*top (per 32-bit element)
4411 *
4412 * **Description**:\n
4413 * For the `KMSDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
4414 * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
4415 * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
4416 * For the `KMSXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
4417 * with the top 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of the
4418 * 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2.
4419 * The two 32-bit multiplication results are then subtracted from the content of the corresponding 32-
4420 * bit elements of Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
4421 * saturated to the range and the OV bit is set to 1. The results after saturation are written to Rd. The
4422 * 16-bit contents are treated as signed integers.
4423 *
4424 * **Operations**:\n
4425 * ~~~
4426 * // KMSDA
4427 * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
4428 * // KMSXDA
4429 * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
4430 * if (res[x] > (2^31)-1) {
4431 * res[x] = (2^31)-1;
4432 * OV = 1;
4433 * } else if (res[x] < -2^31) {
4434 * res[x] = -2^31;
4435 * OV = 1;
4436 * }
4437 * Rd.W[x] = res[x];
4438 * for RV32: x=0
4439 * for RV64: x=1...0
4440 * ~~~
4441 *
4442 * \param [in] t long type of value stored in t
4443 * \param [in] a unsigned long type of value stored in a
4444 * \param [in] b unsigned long type of value stored in b
4445 * \return value stored in long type
4446 */
__RV_KMSXDA(long t,unsigned long a,unsigned long b)4447 __STATIC_FORCEINLINE long __RV_KMSXDA(long t, unsigned long a, unsigned long b)
4448 {
4449 __ASM volatile("kmsxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
4450 return t;
4451 }
4452 /* ===== Inline Function End for 3.51.2. KMSXDA ===== */
4453
4454 /* ===== Inline Function Start for 3.52. KMSR64 ===== */
4455 /**
4456 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
4457 * \brief KMSR64 (Signed Multiply and Saturating Subtract from 64-Bit Data)
4458 * \details
4459 * **Type**: DSP (64-bit Profile)
4460 *
4461 * **Syntax**:\n
4462 * ~~~
4463 * KMSR64 Rd, Rs1, Rs2
4464 * ~~~
4465 *
4466 * **Purpose**:\n
4467 * Multiply the 32-bit signed elements in two registers and subtract the 64-bit multiplication
4468 * results from the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is
4469 * saturated to the Q63 range and written back to the pair of registers (RV32) or the register (RV64).
4470 *
4471 * **RV32 Description**:\n
4472 * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It
4473 * subtracts the 64-bit multiplication result from the 64-bit signed data of an even/odd pair of registers
4474 * specified by Rd(4,1) with unlimited precision. If the 64-bit subtraction result is beyond the Q63
4475 * number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The saturated
4476 * result is written back to the even/odd pair of registers specified by Rd(4,1).
4477 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
4478 * includes register 2d and 2d+1.
4479 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
4480 * of the pair contains the low 32-bit of the result.
4481 *
4482 * **RV64 Description**:\n
4483 * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It
4484 * subtracts the 64-bit multiplication results from the 64-bit signed data in Rd with unlimited
4485 * precision. If the 64-bit subtraction result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is
4486 * saturated to the range and the OV bit is set to 1. The saturated result is written back to Rd.
4487 *
4488 * **Operations**:\n
4489 * ~~~
4490 * RV32:
4491 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
4492 * result = R[t_H].R[t_L] - (Rs1 * Rs2);
4493 * if (result > (2^63)-1) {
4494 * result = (2^63)-1; OV = 1;
4495 * } else if (result < -2^63) {
4496 * result = -2^63; OV = 1;
4497 * }
4498 * R[t_H].R[t_L] = result;
4499 * RV64:
4500 * // `result` has unlimited precision
4501 * result = Rd - (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]);
4502 * if (result > (2^63)-1) {
4503 * result = (2^63)-1; OV = 1;
4504 * } else if (result < -2^63) {
4505 * result = -2^63; OV = 1;
4506 * }
4507 * Rd = result;
4508 * ~~~
4509 *
4510 * \param [in] t long long type of value stored in t
4511 * \param [in] a long type of value stored in a
4512 * \param [in] b long type of value stored in b
4513 * \return value stored in long long type
4514 */
__RV_KMSR64(long long t,long a,long b)4515 __STATIC_FORCEINLINE long long __RV_KMSR64(long long t, long a, long b)
4516 {
4517 __ASM volatile("kmsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
4518 return t;
4519 }
4520 /* ===== Inline Function End for 3.52. KMSR64 ===== */
4521
4522 /* ===== Inline Function Start for 3.53. KSLLW ===== */
4523 /**
4524 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
4525 * \brief KSLLW (Saturating Shift Left Logical for Word)
4526 * \details
4527 * **Type**: DSP
4528 *
4529 * **Syntax**:\n
4530 * ~~~
4531 * KSLLW Rd, Rs1, Rs2
4532 * ~~~
4533 *
4534 * **Purpose**:\n
4535 * Do logical left shift operation with saturation on a 32-bit word. The shift amount is a
4536 * variable from a GPR.
4537 *
4538 * **Description**:\n
4539 * The first word data in Rs1 is left-shifted logically. The shifted out bits are filled with
4540 * zero and the shift amount is specified by the low-order 5-bits of the value in the Rs2 register. Any
4541 * shifted value greater than 2^31-1 is saturated to 2^31-1. Any shifted value smaller than -2^31 is saturated
4542 * to -2^31. And the saturated result is sign-extended and written to Rd. If any saturation is performed,
4543 * set OV bit to 1.
4544 *
4545 * **Operations**:\n
4546 * ~~~
4547 * sa = Rs2[4:0];
4548 * res[(31+sa):0] = Rs1.W[0] << sa;
4549 * if (res > (2^31)-1) {
4550 * res = 0x7fffffff; OV = 1;
4551 * } else if (res < -2^31) {
4552 * res = 0x80000000; OV = 1;
4553 * }
4554 * Rd[31:0] = res[31:0]; // RV32
4555 * Rd[63:0] = SE(res[31:0]); // RV64
4556 * ~~~
4557 *
4558 * \param [in] a long type of value stored in a
4559 * \param [in] b unsigned int type of value stored in b
4560 * \return value stored in long type
4561 */
__RV_KSLLW(long a,unsigned int b)4562 __STATIC_FORCEINLINE long __RV_KSLLW(long a, unsigned int b)
4563 {
4564 register long result;
4565 __ASM volatile("ksllw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
4566 return result;
4567 }
4568 /* ===== Inline Function End for 3.53. KSLLW ===== */
4569
4570 /* ===== Inline Function Start for 3.54. KSLLIW ===== */
4571 /**
4572 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
4573 * \brief KSLLIW (Saturating Shift Left Logical Immediate for Word)
4574 * \details
4575 * **Type**: DSP
4576 *
4577 * **Syntax**:\n
4578 * ~~~
4579 * KSLLIW Rd, Rs1, imm5u
4580 * ~~~
4581 *
4582 * **Purpose**:\n
4583 * Do logical left shift operation with saturation on a 32-bit word. The shift amount is an
4584 * immediate value.
4585 *
4586 * **Description**:\n
4587 * The first word data in Rs1 is left-shifted logically. The shifted out bits are filled with
4588 * zero and the shift amount is specified by the imm5u constant. Any shifted value greater than 2^31-1 is
4589 * saturated to 2^31-1. Any shifted value smaller than -2^31 is saturated to -2^31. And the saturated result is
4590 * sign-extended and written to Rd. If any saturation is performed, set OV bit to 1.
4591 *
4592 * **Operations**:\n
4593 * ~~~
4594 * sa = imm5u;
4595 * res[(31+sa):0] = Rs1.W[0] << sa;
4596 * if (res > (2^31)-1) {
4597 * res = 0x7fffffff; OV = 1;
4598 * } else if (res < -2^31) {
4599 * res = 0x80000000; OV = 1;
4600 * }
4601 * Rd[31:0] = res[31:0]; // RV32
4602 * Rd[63:0] = SE(res[31:0]); // RV64
4603 * ~~~
4604 *
4605 * \param [in] a long type of value stored in a
4606 * \param [in] b unsigned int type of value stored in b
4607 * \return value stored in long type
4608 */
4609 #define __RV_KSLLIW(a, b) \
4610 ({ \
4611 register long result; \
4612 register long __a = (long)(a); \
4613 __ASM volatile("kslliw %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
4614 result; \
4615 })
4616 /* ===== Inline Function End for 3.54. KSLLIW ===== */
4617
4618 /* ===== Inline Function Start for 3.55. KSLL8 ===== */
4619 /**
4620 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
4621 * \brief KSLL8 (SIMD 8-bit Saturating Shift Left Logical)
4622 * \details
4623 * **Type**: SIMD
4624 *
4625 * **Syntax**:\n
4626 * ~~~
4627 * KSLL8 Rd, Rs1, Rs2
4628 * ~~~
4629 *
4630 * **Purpose**:\n
4631 * Do 8-bit elements logical left shift operations with saturation simultaneously. The shift
4632 * amount is a variable from a GPR.
4633 *
4634 * **Description**:\n
4635 * The 8-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
4636 * with zero and the shift amount is specified by the low-order 3-bits of the value in the Rs2 register.
4637 * Any shifted value greater than 2^7-1 is saturated to 2^7-1. Any shifted value smaller than -2^7 is
4638 * saturated to -2^7. And the saturated results are written to Rd. If any saturation is performed, set OV
4639 * bit to 1.
4640 *
4641 * **Operations**:\n
4642 * ~~~
4643 * sa = Rs2[2:0];
4644 * if (sa != 0) {
4645 * res[(7+sa):0] = Rs1.B[x] << sa;
4646 * if (res > (2^7)-1) {
4647 * res = 0x7f; OV = 1;
4648 * } else if (res < -2^7) {
4649 * res = 0x80; OV = 1;
4650 * }
4651 * Rd.B[x] = res[7:0];
4652 * } else {
4653 * Rd = Rs1;
4654 * }
4655 * for RV32: x=3...0,
4656 * for RV64: x=7...0
4657 * ~~~
4658 *
4659 * \param [in] a unsigned long type of value stored in a
4660 * \param [in] b unsigned int type of value stored in b
4661 * \return value stored in unsigned long type
4662 */
__RV_KSLL8(unsigned long a,unsigned int b)4663 __STATIC_FORCEINLINE unsigned long __RV_KSLL8(unsigned long a, unsigned int b)
4664 {
4665 register unsigned long result;
4666 __ASM volatile("ksll8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
4667 return result;
4668 }
4669 /* ===== Inline Function End for 3.55. KSLL8 ===== */
4670
4671 /* ===== Inline Function Start for 3.56. KSLLI8 ===== */
4672 /**
4673 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
4674 * \brief KSLLI8 (SIMD 8-bit Saturating Shift Left Logical Immediate)
4675 * \details
4676 * **Type**: SIMD
4677 *
4678 * **Syntax**:\n
4679 * ~~~
4680 * KSLLI8 Rd, Rs1, imm3u
4681 * ~~~
4682 *
4683 * **Purpose**:\n
4684 * Do 8-bit elements logical left shift operations with saturation simultaneously. The shift
4685 * amount is an immediate value.
4686 *
4687 * **Description**:\n
4688 * The 8-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
4689 * with zero and the shift amount is specified by the imm3u constant. Any shifted value greater than
4690 * 2^7-1 is saturated to 2^7-1. Any shifted value smaller than -2^7 is saturated to -2^7. And the saturated
4691 * results are written to Rd. If any saturation is performed, set OV bit to 1.
4692 *
4693 * **Operations**:\n
4694 * ~~~
4695 * sa = imm3u[2:0];
4696 * if (sa != 0) {
4697 * res[(7+sa):0] = Rs1.B[x] << sa;
4698 * if (res > (2^7)-1) {
4699 * res = 0x7f; OV = 1;
4700 * } else if (res < -2^7) {
4701 * res = 0x80; OV = 1;
4702 * }
4703 * Rd.B[x] = res[7:0];
4704 * } else {
4705 * Rd = Rs1;
4706 * }
4707 * for RV32: x=3...0,
4708 * for RV64: x=7...0
4709 * ~~~
4710 *
4711 * \param [in] a unsigned long type of value stored in a
4712 * \param [in] b unsigned int type of value stored in b
4713 * \return value stored in unsigned long type
4714 */
4715 #define __RV_KSLLI8(a, b) \
4716 ({ \
4717 register unsigned long result; \
4718 register unsigned long __a = (unsigned long)(a); \
4719 __ASM volatile("kslli8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
4720 result; \
4721 })
4722 /* ===== Inline Function End for 3.56. KSLLI8 ===== */
4723
4724 /* ===== Inline Function Start for 3.57. KSLL16 ===== */
4725 /**
4726 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
4727 * \brief KSLL16 (SIMD 16-bit Saturating Shift Left Logical)
4728 * \details
4729 * **Type**: SIMD
4730 *
4731 * **Syntax**:\n
4732 * ~~~
4733 * KSLL16 Rd, Rs1, Rs2
4734 * ~~~
4735 *
4736 * **Purpose**:\n
4737 * Do 16-bit elements logical left shift operations with saturation simultaneously. The shift
4738 * amount is a variable from a GPR.
4739 *
4740 * **Description**:\n
4741 * The 16-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
4742 * with zero and the shift amount is specified by the low-order 4-bits of the value in the Rs2 register.
4743 * Any shifted value greater than 2^15-1 is saturated to 2^15-1. Any shifted value smaller than -2^15 is
4744 * saturated to -2^15. And the saturated results are written to Rd. If any saturation is performed, set OV
4745 * bit to 1.
4746 *
4747 * **Operations**:\n
4748 * ~~~
4749 * sa = Rs2[3:0];
4750 * if (sa != 0) {
4751 * res[(15+sa):0] = Rs1.H[x] << sa;
4752 * if (res > (2^15)-1) {
4753 * res = 0x7fff; OV = 1;
4754 * } else if (res < -2^15) {
4755 * res = 0x8000; OV = 1;
4756 * }
4757 * Rd.H[x] = res[15:0];
4758 * } else {
4759 * Rd = Rs1;
4760 * }
4761 * for RV32: x=1...0,
4762 * for RV64: x=3...0
4763 * ~~~
4764 *
4765 * \param [in] a unsigned long type of value stored in a
4766 * \param [in] b unsigned int type of value stored in b
4767 * \return value stored in unsigned long type
4768 */
__RV_KSLL16(unsigned long a,unsigned int b)4769 __STATIC_FORCEINLINE unsigned long __RV_KSLL16(unsigned long a, unsigned int b)
4770 {
4771 register unsigned long result;
4772 __ASM volatile("ksll16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
4773 return result;
4774 }
4775 /* ===== Inline Function End for 3.57. KSLL16 ===== */
4776
4777 /* ===== Inline Function Start for 3.58. KSLLI16 ===== */
4778 /**
4779 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
4780 * \brief KSLLI16 (SIMD 16-bit Saturating Shift Left Logical Immediate)
4781 * \details
4782 * **Type**: SIMD
4783 *
4784 * **Syntax**:\n
4785 * ~~~
4786 * KSLLI16 Rd, Rs1, imm4u
4787 * ~~~
4788 *
4789 * **Purpose**:\n
4790 * Do 16-bit elements logical left shift operations with saturation simultaneously. The shift
4791 * amount is an immediate value.
4792 *
4793 * **Description**:\n
4794 * The 16-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
4795 * with zero and the shift amount is specified by the imm4u constant. Any shifted value greater than
4796 * 2^15-1 is saturated to 2^15-1. Any shifted value smaller than -2^15 is saturated to -2^15. And the saturated
4797 * results are written to Rd. If any saturation is performed, set OV bit to 1.
4798 *
4799 * **Operations**:\n
4800 * ~~~
4801 * sa = imm4u[3:0];
4802 * if (sa != 0) {
4803 * res[(15+sa):0] = Rs1.H[x] << sa;
4804 * if (res > (2^15)-1) {
4805 * res = 0x7fff; OV = 1;
4806 * } else if (res < -2^15) {
4807 * res = 0x8000; OV = 1;
4808 * }
4809 * Rd.H[x] = res[15:0];
4810 * } else {
4811 * Rd = Rs1;
4812 * }
4813 * for RV32: x=1...0,
4814 * for RV64: x=3...0
4815 * ~~~
4816 *
4817 * \param [in] a unsigned long type of value stored in a
4818 * \param [in] b unsigned int type of value stored in b
4819 * \return value stored in unsigned long type
4820 */
4821 #define __RV_KSLLI16(a, b) \
4822 ({ \
4823 register unsigned long result; \
4824 register unsigned long __a = (unsigned long)(a); \
4825 __ASM volatile("kslli16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
4826 result; \
4827 })
4828 /* ===== Inline Function End for 3.58. KSLLI16 ===== */
4829
4830 /* ===== Inline Function Start for 3.59.1. KSLRA8 ===== */
4831 /**
4832 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
4833 * \brief KSLRA8 (SIMD 8-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
4834 * \details
4835 * **Type**: SIMD
4836 *
4837 * **Syntax**:\n
4838 * ~~~
4839 * KSLRA8 Rd, Rs1, Rs2
4840 * KSLRA8.u Rd, Rs1, Rs2
4841 * ~~~
4842 *
4843 * **Purpose**:\n
4844 * Do 8-bit elements logical left (positive) or arithmetic right (negative) shift operation with
4845 * Q7 saturation for the left shift. The `.u` form performs additional rounding up operations for the
4846 * right shift.
4847 *
4848 * **Description**:\n
4849 * The 8-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
4850 * based on the value of Rs2[3:0]. Rs2[3:0] is in the signed range of [-2^3, 2^3-1]. A positive Rs2[3:0] means
4851 * logical left shift and a negative Rs2[3:0] means arithmetic right shift. The shift amount is the
4852 * absolute value of Rs2[3:0]. However, the behavior of `Rs2[3:0]==-2^3 (0x8)` is defined to be
4853 * equivalent to the behavior of `Rs2[3:0]==-(2^3-1) (0x9)`.
4854 * The left-shifted results are saturated to the 8-bit signed integer range of [-2^7, 2^7-1]. For the `.u` form
4855 * of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
4856 * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
4857 * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:4] will not affect
4858 * this instruction.
4859 *
4860 * **Operations**:\n
4861 * ~~~
4862 * if (Rs2[3:0] < 0) {
4863 * sa = -Rs2[3:0];
4864 * sa = (sa == 8)? 7 : sa;
4865 * if (`.u` form) {
4866 * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
4867 * Rd.B[x] = res[7:0];
4868 * } else {
4869 * Rd.B[x] = SE8(Rs1.B[x][7:sa]);
4870 * }
4871 * } else {
4872 * sa = Rs2[2:0];
4873 * res[(7+sa):0] = Rs1.B[x] <<(logic) sa;
4874 * if (res > (2^7)-1) {
4875 * res[7:0] = 0x7f; OV = 1;
4876 * } else if (res < -2^7) {
4877 * res[7:0] = 0x80; OV = 1;
4878 * }
4879 * Rd.B[x] = res[7:0];
4880 * }
4881 * for RV32: x=3...0,
4882 * for RV64: x=7...0
4883 * ~~~
4884 *
4885 * \param [in] a unsigned long type of value stored in a
4886 * \param [in] b int type of value stored in b
4887 * \return value stored in unsigned long type
4888 */
__RV_KSLRA8(unsigned long a,int b)4889 __STATIC_FORCEINLINE unsigned long __RV_KSLRA8(unsigned long a, int b)
4890 {
4891 register unsigned long result;
4892 __ASM volatile("kslra8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
4893 return result;
4894 }
4895 /* ===== Inline Function End for 3.59.1. KSLRA8 ===== */
4896
4897 /* ===== Inline Function Start for 3.59.2. KSLRA8.u ===== */
4898 /**
4899 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
4900 * \brief KSLRA8.u (SIMD 8-bit Shift Left Logical with Saturation or Rounding Shift Right Arithmetic)
4901 * \details
4902 * **Type**: SIMD
4903 *
4904 * **Syntax**:\n
4905 * ~~~
4906 * KSLRA8 Rd, Rs1, Rs2
4907 * KSLRA8.u Rd, Rs1, Rs2
4908 * ~~~
4909 *
4910 * **Purpose**:\n
4911 * Do 8-bit elements logical left (positive) or arithmetic right (negative) shift operation with
4912 * Q7 saturation for the left shift. The `.u` form performs additional rounding up operations for the
4913 * right shift.
4914 *
4915 * **Description**:\n
4916 * The 8-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
4917 * based on the value of Rs2[3:0]. Rs2[3:0] is in the signed range of [-2^3, 2^3-1]. A positive Rs2[3:0] means
4918 * logical left shift and a negative Rs2[3:0] means arithmetic right shift. The shift amount is the
4919 * absolute value of Rs2[3:0]. However, the behavior of `Rs2[3:0]==-2^3 (0x8)` is defined to be
4920 * equivalent to the behavior of `Rs2[3:0]==-(2^3-1) (0x9)`.
4921 * The left-shifted results are saturated to the 8-bit signed integer range of [-2^7, 2^7-1]. For the `.u` form
4922 * of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
4923 * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
4924 * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:4] will not affect
4925 * this instruction.
4926 *
4927 * **Operations**:\n
4928 * ~~~
4929 * if (Rs2[3:0] < 0) {
4930 * sa = -Rs2[3:0];
4931 * sa = (sa == 8)? 7 : sa;
4932 * if (`.u` form) {
4933 * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
4934 * Rd.B[x] = res[7:0];
4935 * } else {
4936 * Rd.B[x] = SE8(Rs1.B[x][7:sa]);
4937 * }
4938 * } else {
4939 * sa = Rs2[2:0];
4940 * res[(7+sa):0] = Rs1.B[x] <<(logic) sa;
4941 * if (res > (2^7)-1) {
4942 * res[7:0] = 0x7f; OV = 1;
4943 * } else if (res < -2^7) {
4944 * res[7:0] = 0x80; OV = 1;
4945 * }
4946 * Rd.B[x] = res[7:0];
4947 * }
4948 * for RV32: x=3...0,
4949 * for RV64: x=7...0
4950 * ~~~
4951 *
4952 * \param [in] a unsigned long type of value stored in a
4953 * \param [in] b int type of value stored in b
4954 * \return value stored in unsigned long type
4955 */
__RV_KSLRA8_U(unsigned long a,int b)4956 __STATIC_FORCEINLINE unsigned long __RV_KSLRA8_U(unsigned long a, int b)
4957 {
4958 register unsigned long result;
4959 __ASM volatile("kslra8.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
4960 return result;
4961 }
4962 /* ===== Inline Function End for 3.59.2. KSLRA8.u ===== */
4963
4964 /* ===== Inline Function Start for 3.60.1. KSLRA16 ===== */
4965 /**
4966 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
4967 * \brief KSLRA16 (SIMD 16-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
4968 * \details
4969 * **Type**: SIMD
4970 *
4971 * **Syntax**:\n
4972 * ~~~
4973 * KSLRA16 Rd, Rs1, Rs2
4974 * KSLRA16.u Rd, Rs1, Rs2
4975 * ~~~
4976 *
4977 * **Purpose**:\n
4978 * Do 16-bit elements logical left (positive) or arithmetic right (negative) shift operation with
4979 * Q15 saturation for the left shift. The `.u` form performs additional rounding up operations for the
4980 * right shift.
4981 *
4982 * **Description**:\n
4983 * The 16-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
4984 * based on the value of Rs2[4:0]. Rs2[4:0] is in the signed range of [-2^4, 2^4-1]. A positive Rs2[4:0] means
4985 * logical left shift and a negative Rs2[4:0] means arithmetic right shift. The shift amount is the
4986 * absolute value of Rs2[4:0]. However, the behavior of `Rs2[4:0]==-2^4 (0x10)` is defined to be
4987 * equivalent to the behavior of `Rs2[4:0]==-(2^4-1) (0x11)`.
4988 * The left-shifted results are saturated to the 16-bit signed integer range of [-2^15, 2^15-1]. For the `.u`
4989 * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
4990 * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
4991 * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:5] will not affect
4992 * this instruction.
4993 *
4994 * **Operations**:\n
4995 * ~~~
4996 * if (Rs2[4:0] < 0) {
4997 * sa = -Rs2[4:0];
4998 * sa = (sa == 16)? 15 : sa;
4999 * if (`.u` form) {
5000 * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
5001 * Rd.H[x] = res[15:0];
5002 * } else {
5003 * Rd.H[x] = SE16(Rs1.H[x][15:sa]);
5004 * }
5005 * } else {
5006 * sa = Rs2[3:0];
5007 * res[(15+sa):0] = Rs1.H[x] <<(logic) sa;
5008 * if (res > (2^15)-1) {
5009 * res[15:0] = 0x7fff; OV = 1;
5010 * } else if (res < -2^15) {
5011 * res[15:0] = 0x8000; OV = 1;
5012 * }
5013 * d.H[x] = res[15:0];
5014 * }
5015 * for RV32: x=1...0,
5016 * for RV64: x=3...0
5017 * ~~~
5018 *
5019 * \param [in] a unsigned long type of value stored in a
5020 * \param [in] b int type of value stored in b
5021 * \return value stored in unsigned long type
5022 */
__RV_KSLRA16(unsigned long a,int b)5023 __STATIC_FORCEINLINE unsigned long __RV_KSLRA16(unsigned long a, int b)
5024 {
5025 register unsigned long result;
5026 __ASM volatile("kslra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5027 return result;
5028 }
5029 /* ===== Inline Function End for 3.60.1. KSLRA16 ===== */
5030
5031 /* ===== Inline Function Start for 3.60.2. KSLRA16.u ===== */
5032 /**
5033 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
5034 * \brief KSLRA16.u (SIMD 16-bit Shift Left Logical with Saturation or Rounding Shift Right Arithmetic)
5035 * \details
5036 * **Type**: SIMD
5037 *
5038 * **Syntax**:\n
5039 * ~~~
5040 * KSLRA16 Rd, Rs1, Rs2
5041 * KSLRA16.u Rd, Rs1, Rs2
5042 * ~~~
5043 *
5044 * **Purpose**:\n
5045 * Do 16-bit elements logical left (positive) or arithmetic right (negative) shift operation with
5046 * Q15 saturation for the left shift. The `.u` form performs additional rounding up operations for the
5047 * right shift.
5048 *
5049 * **Description**:\n
5050 * The 16-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
5051 * based on the value of Rs2[4:0]. Rs2[4:0] is in the signed range of [-2^4, 2^4-1]. A positive Rs2[4:0] means
5052 * logical left shift and a negative Rs2[4:0] means arithmetic right shift. The shift amount is the
5053 * absolute value of Rs2[4:0]. However, the behavior of `Rs2[4:0]==-2^4 (0x10)` is defined to be
5054 * equivalent to the behavior of `Rs2[4:0]==-(2^4-1) (0x11)`.
5055 * The left-shifted results are saturated to the 16-bit signed integer range of [-2^15, 2^15-1]. For the `.u`
5056 * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
5057 * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
5058 * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:5] will not affect
5059 * this instruction.
5060 *
5061 * **Operations**:\n
5062 * ~~~
5063 * if (Rs2[4:0] < 0) {
5064 * sa = -Rs2[4:0];
5065 * sa = (sa == 16)? 15 : sa;
5066 * if (`.u` form) {
5067 * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
5068 * Rd.H[x] = res[15:0];
5069 * } else {
5070 * Rd.H[x] = SE16(Rs1.H[x][15:sa]);
5071 * }
5072 * } else {
5073 * sa = Rs2[3:0];
5074 * res[(15+sa):0] = Rs1.H[x] <<(logic) sa;
5075 * if (res > (2^15)-1) {
5076 * res[15:0] = 0x7fff; OV = 1;
5077 * } else if (res < -2^15) {
5078 * res[15:0] = 0x8000; OV = 1;
5079 * }
5080 * d.H[x] = res[15:0];
5081 * }
5082 * for RV32: x=1...0,
5083 * for RV64: x=3...0
5084 * ~~~
5085 *
5086 * \param [in] a unsigned long type of value stored in a
5087 * \param [in] b int type of value stored in b
5088 * \return value stored in unsigned long type
5089 */
__RV_KSLRA16_U(unsigned long a,int b)5090 __STATIC_FORCEINLINE unsigned long __RV_KSLRA16_U(unsigned long a, int b)
5091 {
5092 register unsigned long result;
5093 __ASM volatile("kslra16.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5094 return result;
5095 }
5096 /* ===== Inline Function End for 3.60.2. KSLRA16.u ===== */
5097
5098 /* ===== Inline Function Start for 3.61. KSLRAW ===== */
5099 /**
5100 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
5101 * \brief KSLRAW (Shift Left Logical with Q31 Saturation or Shift Right Arithmetic)
5102 * \details
5103 * **Type**: DSP
5104 *
5105 * **Syntax**:\n
5106 * ~~~
5107 * KSLRAW Rd, Rs1, Rs2
5108 * ~~~
5109 *
5110 * **Purpose**:\n
5111 * Perform a logical left (positive) or arithmetic right (negative) shift operation with Q31
5112 * saturation for the left shift on a 32-bit data.
5113 *
5114 * **Description**:\n
5115 * The lower 32-bit content of Rs1 is left-shifted logically or right-shifted arithmetically
5116 * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means
5117 * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the
5118 * absolute value of Rs2[5:0] clamped to the actual shift range of [0, 31].
5119 * The left-shifted result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. After the shift
5120 * operation, the final result is bit-31 sign-extended and written to Rd. If any saturation happens, this
5121 * instruction sets the OV flag. The value of Rs2[31:6] will not affected the operation of this instruction.
5122 *
5123 * **Operations**:\n
5124 * ~~~
5125 * if (Rs2[5:0] < 0) {
5126 * sa = -Rs2[5:0];
5127 * sa = (sa == 32)? 31 : sa;
5128 * res[31:0] = Rs1.W[0] >>(arith) sa;
5129 * } else {
5130 * sa = Rs2[5:0];
5131 * tmp = Rs1.W[0] <<(logic) sa;
5132 * if (tmp > (2^31)-1) {
5133 * res[31:0] = (2^31)-1;
5134 * OV = 1;
5135 * } else if (tmp < -2^31) {
5136 * res[31:0] = -2^31;
5137 * OV = 1
5138 * } else {
5139 * res[31:0] = tmp[31:0];
5140 * }
5141 * }
5142 * Rd = res[31:0]; // RV32
5143 * Rd = SE64(res[31:0]); // RV64
5144 * ~~~
5145 *
5146 * \param [in] a int type of value stored in a
5147 * \param [in] b int type of value stored in b
5148 * \return value stored in long type
5149 */
__RV_KSLRAW(int a,int b)5150 __STATIC_FORCEINLINE long __RV_KSLRAW(int a, int b)
5151 {
5152 register long result;
5153 __ASM volatile("kslraw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5154 return result;
5155 }
5156 /* ===== Inline Function End for 3.61. KSLRAW ===== */
5157
5158 /* ===== Inline Function Start for 3.62. KSLRAW.u ===== */
5159 /**
5160 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
5161 * \brief KSLRAW.u (Shift Left Logical with Q31 Saturation or Rounding Shift Right Arithmetic)
5162 * \details
5163 * **Type**: DSP
5164 *
5165 * **Syntax**:\n
5166 * ~~~
5167 * KSLRAW.u Rd, Rs1, Rs2
5168 * ~~~
5169 *
5170 * **Purpose**:\n
5171 * Perform a logical left (positive) or arithmetic right (negative) shift operation with Q31
5172 * saturation for the left shift and a rounding up operation for the right shift on a 32-bit data.
5173 *
5174 * **Description**:\n
5175 * The lower 32-bit content of Rs1 is left-shifted logically or right-shifted arithmetically
5176 * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means
5177 * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the
5178 * absolute value of Rs2[5:0] clamped to the actual shift range of [0, 31].
5179 * The left-shifted result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. The right-shifted
5180 * result is added a 1 to the most significant discarded bit position for rounding effect. After the shift,
5181 * saturation, or rounding, the final result is bit-31 sign-extended and written to Rd. If any saturation
5182 * happens, this instruction sets the OV flag. The value of Rs2[31:6] will not affect the operation of this
5183 * instruction.
5184 *
5185 * **Operations**:\n
5186 * ~~~
5187 * if (Rs2[5:0] < 0) {
5188 * sa = -Rs2[5:0];
5189 * sa = (sa == 32)? 31 : sa;
5190 * res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1;
5191 * rst[31:0] = res[31:0];
5192 * } else {
5193 * sa = Rs2[5:0];
5194 * tmp = Rs1.W[0] <<(logic) sa;
5195 * if (tmp > (2^31)-1) {
5196 * rst[31:0] = (2^31)-1;
5197 * OV = 1;
5198 * } else if (tmp < -2^31) {
5199 * rst[31:0] = -2^31;
5200 * OV = 1
5201 * } else {
5202 * rst[31:0] = tmp[31:0];
5203 * }
5204 * }
5205 * Rd = rst[31:0]; // RV32
5206 * Rd = SE64(rst[31:0]); // RV64
5207 * ~~~
5208 *
5209 * \param [in] a int type of value stored in a
5210 * \param [in] b int type of value stored in b
5211 * \return value stored in long type
5212 */
__RV_KSLRAW_U(int a,int b)5213 __STATIC_FORCEINLINE long __RV_KSLRAW_U(int a, int b)
5214 {
5215 register long result;
5216 __ASM volatile("kslraw.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5217 return result;
5218 }
5219 /* ===== Inline Function End for 3.62. KSLRAW.u ===== */
5220
5221 /* ===== Inline Function Start for 3.63. KSTAS16 ===== */
5222 /**
5223 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
5224 * \brief KSTAS16 (SIMD 16-bit Signed Saturating Straight Addition & Subtraction)
5225 * \details
5226 * **Type**: SIMD
5227 *
5228 * **Syntax**:\n
5229 * ~~~
5230 * KSTAS16 Rd, Rs1, Rs2
5231 * ~~~
5232 *
5233 * **Purpose**:\n
5234 * Do 16-bit signed integer element saturating addition and 16-bit signed integer element
5235 * saturating subtraction in a 32-bit chunk simultaneously. Operands are from corresponding
5236 * positions in 32-bit chunks.
5237 *
5238 * **Description**:\n
5239 * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in
5240 * Rs1 with the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2; at the same time, it
5241 * subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit signed
5242 * integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number
5243 * range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated
5244 * results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit chunks in Rd for
5245 * subtraction.
5246 *
5247 * **Operations**:\n
5248 * ~~~
5249 * res1 = Rs1.W[x][31:16] + Rs2.W[x][31:16];
5250 * res2 = Rs1.W[x][15:0] - Rs2.W[x][15:0];
5251 * for (res in [res1, res2]) {
5252 * if (res > (2^15)-1) {
5253 * res = (2^15)-1;
5254 * OV = 1;
5255 * } else if (res < -2^15) {
5256 * res = -2^15;
5257 * OV = 1;
5258 * }
5259 * }
5260 * Rd.W[x][31:16] = res1;
5261 * Rd.W[x][15:0] = res2;
5262 * for RV32, x=0
5263 * for RV64, x=1...0
5264 * ~~~
5265 *
5266 * \param [in] a unsigned long type of value stored in a
5267 * \param [in] b unsigned long type of value stored in b
5268 * \return value stored in unsigned long type
5269 */
__RV_KSTAS16(unsigned long a,unsigned long b)5270 __STATIC_FORCEINLINE unsigned long __RV_KSTAS16(unsigned long a, unsigned long b)
5271 {
5272 register unsigned long result;
5273 __ASM volatile("kstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5274 return result;
5275 }
5276 /* ===== Inline Function End for 3.63. KSTAS16 ===== */
5277
5278 /* ===== Inline Function Start for 3.64. KSTSA16 ===== */
5279 /**
5280 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
5281 * \brief KSTSA16 (SIMD 16-bit Signed Saturating Straight Subtraction & Addition)
5282 * \details
5283 * **Type**: SIMD
5284 *
5285 * **Syntax**:\n
5286 * ~~~
5287 * KSTSA16 Rd, Rs1, Rs2
5288 * ~~~
5289 *
5290 * **Purpose**:\n
5291 * Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element
5292 * saturating addition in a 32-bit chunk simultaneously. Operands are from corresponding positions in
5293 * 32-bit chunks.
5294 *
5295 * **Description**:\n
5296 * This instruction subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks
5297 * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1; at the same time, it
5298 * adds the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2 with the 16-bit signed integer
5299 * element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number range (-2^15
5300 * <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are
5301 * written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks in Rd for
5302 * addition.
5303 *
5304 * **Operations**:\n
5305 * ~~~
5306 * res1 = Rs1.W[x][31:16] - Rs2.W[x][31:16];
5307 * res2 = Rs1.W[x][15:0] + Rs2.W[x][15:0];
5308 * for (res in [res1, res2]) {
5309 * if (res > (2^15)-1) {
5310 * res = (2^15)-1;
5311 * OV = 1;
5312 * } else if (res < -2^15) {
5313 * res = -2^15;
5314 * OV = 1;
5315 * }
5316 * }
5317 * Rd.W[x][31:16] = res1;
5318 * Rd.W[x][15:0] = res2;
5319 * for RV32, x=0
5320 * for RV64, x=1...0
5321 * ~~~
5322 *
5323 * \param [in] a unsigned long type of value stored in a
5324 * \param [in] b unsigned long type of value stored in b
5325 * \return value stored in unsigned long type
5326 */
__RV_KSTSA16(unsigned long a,unsigned long b)5327 __STATIC_FORCEINLINE unsigned long __RV_KSTSA16(unsigned long a, unsigned long b)
5328 {
5329 register unsigned long result;
5330 __ASM volatile("kstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5331 return result;
5332 }
5333 /* ===== Inline Function End for 3.64. KSTSA16 ===== */
5334
5335 /* ===== Inline Function Start for 3.65. KSUB8 ===== */
5336 /**
5337 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
5338 * \brief KSUB8 (SIMD 8-bit Signed Saturating Subtraction)
5339 * \details
5340 * **Type**: SIMD
5341 *
5342 * **Syntax**:\n
5343 * ~~~
5344 * KSUB8 Rd, Rs1, Rs2
5345 * ~~~
5346 *
5347 * **Purpose**:\n
5348 * Do 8-bit signed elements saturating subtractions simultaneously.
5349 *
5350 * **Description**:\n
5351 * This instruction subtracts the 8-bit signed integer elements in Rs2 from the 8-bit
5352 * signed integer elements in Rs1. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 27
5353 * -1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
5354 *
5355 * **Operations**:\n
5356 * ~~~
5357 * res[x] = Rs1.B[x] - Rs2.B[x];
5358 * if (res[x] > (2^7)-1) {
5359 * res[x] = (2^7)-1;
5360 * OV = 1;
5361 * } else if (res[x] < -2^7) {
5362 * res[x] = -2^7;
5363 * OV = 1;
5364 * }
5365 * Rd.B[x] = res[x];
5366 * for RV32: x=3...0,
5367 * for RV64: x=7...0
5368 * ~~~
5369 *
5370 * \param [in] a unsigned long type of value stored in a
5371 * \param [in] b unsigned long type of value stored in b
5372 * \return value stored in unsigned long type
5373 */
__RV_KSUB8(unsigned long a,unsigned long b)5374 __STATIC_FORCEINLINE unsigned long __RV_KSUB8(unsigned long a, unsigned long b)
5375 {
5376 register unsigned long result;
5377 __ASM volatile("ksub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5378 return result;
5379 }
5380 /* ===== Inline Function End for 3.65. KSUB8 ===== */
5381
5382 /* ===== Inline Function Start for 3.66. KSUB16 ===== */
5383 /**
5384 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
5385 * \brief KSUB16 (SIMD 16-bit Signed Saturating Subtraction)
5386 * \details
5387 * **Type**: SIMD
5388 *
5389 * **Syntax**:\n
5390 * ~~~
5391 * KSUB16 Rd, Rs1, Rs2
5392 * ~~~
5393 *
5394 * **Purpose**:\n
5395 * Do 16-bit signed integer elements saturating subtractions simultaneously.
5396 *
5397 * **Description**:\n
5398 * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit
5399 * signed integer elements in Rs1. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <=
5400 * 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to
5401 * Rd.
5402 *
5403 * **Operations**:\n
5404 * ~~~
5405 * res[x] = Rs1.H[x] - Rs2.H[x];
5406 * if (res[x] > (2^15)-1) {
5407 * res[x] = (2^15)-1;
5408 * OV = 1;
5409 * } else if (res[x] < -2^15) {
5410 * res[x] = -2^15;
5411 * OV = 1;
5412 * }
5413 * Rd.H[x] = res[x];
5414 * for RV32: x=1...0,
5415 * for RV64: x=3...0
5416 * ~~~
5417 *
5418 * \param [in] a unsigned long type of value stored in a
5419 * \param [in] b unsigned long type of value stored in b
5420 * \return value stored in unsigned long type
5421 */
__RV_KSUB16(unsigned long a,unsigned long b)5422 __STATIC_FORCEINLINE unsigned long __RV_KSUB16(unsigned long a, unsigned long b)
5423 {
5424 register unsigned long result;
5425 __ASM volatile("ksub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5426 return result;
5427 }
5428 /* ===== Inline Function End for 3.66. KSUB16 ===== */
5429
5430 /* ===== Inline Function Start for 3.67. KSUB64 ===== */
5431 /**
5432 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
5433 * \brief KSUB64 (64-bit Signed Saturating Subtraction)
5434 * \details
5435 * **Type**: DSP (64-bit Profile)
5436 *
5437 * **Syntax**:\n
5438 * ~~~
5439 * KSUB64 Rd, Rs1, Rs2
5440 * ~~~
5441 *
5442 * **Purpose**:\n
5443 * Perform a 64-bit signed integer subtraction. The result is saturated to the Q63 range.
5444 *
5445 * **RV32 Description**:\n
5446 * This instruction subtracts the 64-bit signed integer of an even/odd pair of
5447 * registers specified by Rs2(4,1) from the 64-bit signed integer of an even/odd pair of registers
5448 * specified by Rs1(4,1). If the 64-bit result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is
5449 * saturated to the range and the OV bit is set to 1. The saturated result is then written to an even/odd
5450 * pair of registers specified by Rd(4,1).
5451 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
5452 * includes register 2d and 2d+1.
5453 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
5454 * register of the pair contains the low 32-bit of the operand.
5455 *
5456 * **RV64 Description**:\n
5457 * This instruction subtracts the 64-bit signed integer of Rs2 from the 64-bit signed
5458 * integer of Rs1. If the 64-bit result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated
5459 * to the range and the OV bit is set to 1. The saturated result is then written to Rd.
5460 *
5461 * **Operations**:\n
5462 * ~~~
5463 * RV32:
5464 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
5465 * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
5466 * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
5467 * result = R[a_H].R[a_L] - R[b_H].R[b_L];
5468 * if (result > (2^63)-1) {
5469 * result = (2^63)-1; OV = 1;
5470 * } else if (result < -2^63) {
5471 * result = -2^63; OV = 1;
5472 * }
5473 * R[t_H].R[t_L] = result;
5474 * RV64:
5475 * result = Rs1 - Rs2;
5476 * if (result > (2^63)-1) {
5477 * result = (2^63)-1; OV = 1;
5478 * } else if (result < -2^63) {
5479 * result = -2^63; OV = 1;
5480 * }
5481 * Rd = result;
5482 * ~~~
5483 *
5484 * \param [in] a long long type of value stored in a
5485 * \param [in] b long long type of value stored in b
5486 * \return value stored in long long type
5487 */
__RV_KSUB64(long long a,long long b)5488 __STATIC_FORCEINLINE long long __RV_KSUB64(long long a, long long b)
5489 {
5490 register long long result;
5491 __ASM volatile("ksub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5492 return result;
5493 }
5494 /* ===== Inline Function End for 3.67. KSUB64 ===== */
5495
5496 /* ===== Inline Function Start for 3.68. KSUBH ===== */
5497 /**
5498 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
5499 * \brief KSUBH (Signed Subtraction with Q15 Saturation)
5500 * \details
5501 * **Type**: DSP
5502 *
5503 * **Syntax**:\n
5504 * ~~~
5505 * KSUBH Rd, Rs1, Rs2
5506 * ~~~
5507 *
5508 * **Purpose**:\n
5509 * Subtract the signed lower 32-bit content of two registers with Q15 saturation.
5510 *
5511 * **Description**:\n
5512 * The signed lower 32-bit content of Rs2 is subtracted from the signed lower 32-bit
5513 * content of Rs1. And the result is saturated to the 16-bit signed integer range of [-2^15, 2^15-1] and then
5514 * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
5515 *
5516 * **Operations**:\n
5517 * ~~~
5518 * tmp = Rs1.W[0] - Rs2.W[0];
5519 * if (tmp > (2^15)-1) {
5520 * res = (2^15)-1;
5521 * OV = 1;
5522 * } else if (tmp < -2^15) {
5523 * res = -2^15;
5524 * OV = 1
5525 * } else {
5526 * res = tmp;
5527 * }
5528 * Rd = SE(res[15:0]);
5529 * ~~~
5530 *
5531 * \param [in] a int type of value stored in a
5532 * \param [in] b int type of value stored in b
5533 * \return value stored in long type
5534 */
__RV_KSUBH(int a,int b)5535 __STATIC_FORCEINLINE long __RV_KSUBH(int a, int b)
5536 {
5537 register long result;
5538 __ASM volatile("ksubh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5539 return result;
5540 }
5541 /* ===== Inline Function End for 3.68. KSUBH ===== */
5542
5543 /* ===== Inline Function Start for 3.69. KSUBW ===== */
5544 /**
5545 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
5546 * \brief KSUBW (Signed Subtraction with Q31 Saturation)
5547 * \details
5548 * **Type**: DSP
5549 *
5550 * **Syntax**:\n
5551 * ~~~
5552 * KSUBW Rd, Rs1, Rs2
5553 * ~~~
5554 *
5555 * **Purpose**:\n
5556 * Subtract the signed lower 32-bit content of two registers with Q31 saturation.
5557 *
5558 * **Description**:\n
5559 * The signed lower 32-bit content of Rs2 is subtracted from the signed lower 32-bit
5560 * content of Rs1. And the result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1] and then
5561 * sign-extened and written to Rd. If saturation happens, this instruction sets the OV flag.
5562 *
5563 * **Operations**:\n
5564 * ~~~
5565 * tmp = Rs1.W[0] - Rs2.W[0];
5566 * if (tmp > (2^31)-1) {
5567 * res = (2^31)-1;
5568 * OV = 1;
5569 * } else if (tmp < -2^31) {
5570 * res = -2^31;
5571 * OV = 1
5572 * } else {
5573 * res = tmp;
5574 * }
5575 * Rd = res[31:0]; // RV32
5576 * Rd = SE(res[31:0]); // RV64
5577 * ~~~
5578 *
5579 * \param [in] a int type of value stored in a
5580 * \param [in] b int type of value stored in b
5581 * \return value stored in long type
5582 */
__RV_KSUBW(int a,int b)5583 __STATIC_FORCEINLINE long __RV_KSUBW(int a, int b)
5584 {
5585 register long result;
5586 __ASM volatile("ksubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5587 return result;
5588 }
5589 /* ===== Inline Function End for 3.69. KSUBW ===== */
5590
5591 /* ===== Inline Function Start for 3.70.1. KWMMUL ===== */
5592 /**
5593 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
5594 * \brief KWMMUL (SIMD Saturating MSW Signed Multiply Word & Double)
5595 * \details
5596 * **Type**: SIMD
5597 *
5598 * **Syntax**:\n
5599 * ~~~
5600 * KWMMUL Rd, Rs1, Rs2
5601 * KWMMUL.u Rd, Rs1, Rs2
5602 * ~~~
5603 *
5604 * **Purpose**:\n
5605 * Multiply the signed 32-bit integer elements of two registers, shift the results left 1-bit,
5606 * saturate, and write the most significant 32-bit results to a register. The `.u` form additionally
5607 * rounds up the multiplication results from the most signification discarded bit.
5608 *
5609 * **Description**:\n
5610 * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts
5611 * the multiplication results one bit to the left and takes the most significant 32-bit results. If the
5612 * shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element
5613 * result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The `.u`
5614 * form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit
5615 * 30 before the shift and saturation operations.
5616 *
5617 * **Operations**:\n
5618 * ~~~
5619 * if ((0x80000000 != Rs1.W[x]) | (0x80000000 != Rs2.W[x])) {
5620 * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
5621 * if (`.u` form) {
5622 * Round[x][33:0] = Mres[x][63:30] + 1;
5623 * Rd.W[x] = Round[x][32:1];
5624 * } else {
5625 * Rd.W[x] = Mres[x][62:31];
5626 * }
5627 * } else {
5628 * Rd.W[x] = 0x7fffffff;
5629 * OV = 1;
5630 * }
5631 * for RV32: x=0
5632 * for RV64: x=1...0
5633 * ~~~
5634 *
5635 * \param [in] a long type of value stored in a
5636 * \param [in] b long type of value stored in b
5637 * \return value stored in long type
5638 */
__RV_KWMMUL(long a,long b)5639 __STATIC_FORCEINLINE long __RV_KWMMUL(long a, long b)
5640 {
5641 register long result;
5642 __ASM volatile("kwmmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5643 return result;
5644 }
5645 /* ===== Inline Function End for 3.70.1. KWMMUL ===== */
5646
5647 /* ===== Inline Function Start for 3.70.2. KWMMUL.u ===== */
5648 /**
5649 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
5650 * \brief KWMMUL.u (SIMD Saturating MSW Signed Multiply Word & Double with Rounding)
5651 * \details
5652 * **Type**: SIMD
5653 *
5654 * **Syntax**:\n
5655 * ~~~
5656 * KWMMUL Rd, Rs1, Rs2
5657 * KWMMUL.u Rd, Rs1, Rs2
5658 * ~~~
5659 *
5660 * **Purpose**:\n
5661 * Multiply the signed 32-bit integer elements of two registers, shift the results left 1-bit,
5662 * saturate, and write the most significant 32-bit results to a register. The `.u` form additionally
5663 * rounds up the multiplication results from the most signification discarded bit.
5664 *
5665 * **Description**:\n
5666 * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts
5667 * the multiplication results one bit to the left and takes the most significant 32-bit results. If the
5668 * shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element
5669 * result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The `.u`
5670 * form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit
5671 * 30 before the shift and saturation operations.
5672 *
5673 * **Operations**:\n
5674 * ~~~
5675 * if ((0x80000000 != Rs1.W[x]) | (0x80000000 != Rs2.W[x])) {
5676 * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
5677 * if (`.u` form) {
5678 * Round[x][33:0] = Mres[x][63:30] + 1;
5679 * Rd.W[x] = Round[x][32:1];
5680 * } else {
5681 * Rd.W[x] = Mres[x][62:31];
5682 * }
5683 * } else {
5684 * Rd.W[x] = 0x7fffffff;
5685 * OV = 1;
5686 * }
5687 * for RV32: x=0
5688 * for RV64: x=1...0
5689 * ~~~
5690 *
5691 * \param [in] a long type of value stored in a
5692 * \param [in] b long type of value stored in b
5693 * \return value stored in long type
5694 */
__RV_KWMMUL_U(long a,long b)5695 __STATIC_FORCEINLINE long __RV_KWMMUL_U(long a, long b)
5696 {
5697 register long result;
5698 __ASM volatile("kwmmul.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5699 return result;
5700 }
5701 /* ===== Inline Function End for 3.70.2. KWMMUL.u ===== */
5702
5703 /* ===== Inline Function Start for 3.71. MADDR32 ===== */
5704 /**
5705 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
5706 * \brief MADDR32 (Multiply and Add to 32-Bit Word)
5707 * \details
5708 * **Type**: DSP
5709 *
5710 * **Syntax**:\n
5711 * ~~~
5712 * MADDR32 Rd, Rs1, Rs2
5713 * ~~~
5714 *
5715 * **Purpose**:\n
5716 * Multiply the 32-bit contents of two registers and add the lower 32-bit multiplication result
5717 * to the 32-bit content of a destination register. Write the final result back to the destination register.
5718 *
5719 * **Description**:\n
5720 * This instruction multiplies the lower 32-bit content of Rs1 with that of Rs2. It adds the
5721 * lower 32-bit multiplication result to the lower 32-bit content of Rd and writes the final result (RV32)
5722 * or sign-extended result (RV64) back to Rd. The contents of Rs1 and Rs2 can be either signed or
5723 * unsigned integers.
5724 *
5725 * **Operations**:\n
5726 * ~~~
5727 * RV32:
5728 * Mresult = Rs1 * Rs2;
5729 * Rd = Rd + Mresult.W[0];
5730 * RV64:
5731 * Mresult = Rs1.W[0] * Rs2.W[0];
5732 * tres[31:0] = Rd.W[0] + Mresult.W[0];
5733 * Rd = SE64(tres[31:0]);
5734 * ~~~
5735 *
5736 * \param [in] t unsigned long type of value stored in t
5737 * \param [in] a unsigned long type of value stored in a
5738 * \param [in] b unsigned long type of value stored in b
5739 * \return value stored in unsigned long type
5740 */
__RV_MADDR32(unsigned long t,unsigned long a,unsigned long b)5741 __STATIC_FORCEINLINE unsigned long __RV_MADDR32(unsigned long t, unsigned long a, unsigned long b)
5742 {
5743 __ASM volatile("maddr32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
5744 return t;
5745 }
5746 /* ===== Inline Function End for 3.71. MADDR32 ===== */
5747
5748 /* ===== Inline Function Start for 3.72. MAXW ===== */
5749 /**
5750 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
5751 * \brief MAXW (32-bit Signed Word Maximum)
5752 * \details
5753 * **Type**: DSP
5754 *
5755 * **Syntax**:\n
5756 * ~~~
5757 * MAXW Rd, Rs1, Rs2
5758 * ~~~
5759 *
5760 * **Purpose**:\n
5761 * Get the larger value from the 32-bit contents of two general registers.
5762 *
5763 * **Description**:\n
5764 * This instruction compares two signed 32-bit integers stored in Rs1 and Rs2, picks the
5765 * larger value as the result, and writes the result to Rd.
5766 *
5767 * **Operations**:\n
5768 * ~~~
5769 * if (Rs1.W[0] >= Rs2.W[0]) {
5770 * Rd = SE(Rs1.W[0]);
5771 * } else {
5772 * Rd = SE(Rs2.W[0]);
5773 * }
5774 * ~~~
5775 *
5776 * \param [in] a int type of value stored in a
5777 * \param [in] b int type of value stored in b
5778 * \return value stored in long type
5779 */
__RV_MAXW(int a,int b)5780 __STATIC_FORCEINLINE long __RV_MAXW(int a, int b)
5781 {
5782 register long result;
5783 __ASM volatile("maxw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5784 return result;
5785 }
5786 /* ===== Inline Function End for 3.72. MAXW ===== */
5787
5788 /* ===== Inline Function Start for 3.73. MINW ===== */
5789 /**
5790 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
5791 * \brief MINW (32-bit Signed Word Minimum)
5792 * \details
5793 * **Type**: DSP
5794 *
5795 * **Syntax**:\n
5796 * ~~~
5797 * MINW Rd, Rs1, Rs2
5798 * ~~~
5799 *
5800 * **Purpose**:\n
5801 * Get the smaller value from the 32-bit contents of two general registers.
5802 *
5803 * **Description**:\n
5804 * This instruction compares two signed 32-bit integers stored in Rs1 and Rs2, picks the
5805 * smaller value as the result, and writes the result to Rd.
5806 *
5807 * **Operations**:\n
5808 * ~~~
5809 * if (Rs1.W[0] >= Rs2.W[0]) { Rd = SE(Rs2.W[0]); } else { Rd = SE(Rs1.W[0]); }
5810 * ~~~
5811 *
5812 * \param [in] a int type of value stored in a
5813 * \param [in] b int type of value stored in b
5814 * \return value stored in long type
5815 */
__RV_MINW(int a,int b)5816 __STATIC_FORCEINLINE long __RV_MINW(int a, int b)
5817 {
5818 register long result;
5819 __ASM volatile("minw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5820 return result;
5821 }
5822 /* ===== Inline Function End for 3.73. MINW ===== */
5823
5824 /* ===== Inline Function Start for 3.74. MSUBR32 ===== */
5825 /**
5826 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
5827 * \brief MSUBR32 (Multiply and Subtract from 32-Bit Word)
5828 * \details
5829 * **Type**: DSP
5830 *
5831 * **Syntax**:\n
5832 * ~~~
5833 * MSUBR32 Rd, Rs1, Rs2
5834 * ~~~
5835 *
5836 * **Purpose**:\n
5837 * Multiply the 32-bit contents of two registers and subtract the lower 32-bit multiplication
5838 * result from the 32-bit content of a destination register. Write the final result back to the destination
5839 * register.
5840 *
5841 * **Description**:\n
5842 * This instruction multiplies the lower 32-bit content of Rs1 with that of Rs2, subtracts
5843 * the lower 32-bit multiplication result from the lower 32-bit content of Rd, then writes the final
5844 * result (RV32) or sign-extended result (RV64) back to Rd. The contents of Rs1 and Rs2 can be either
5845 * signed or unsigned integers.
5846 *
5847 * **Operations**:\n
5848 * ~~~
5849 * RV32:
5850 * Mresult = Rs1 * Rs2;
5851 * Rd = Rd - Mresult.W[0];
5852 * RV64:
5853 * Mresult = Rs1.W[0] * Rs2.W[0];
5854 * tres[31:0] = Rd.W[0] - Mresult.W[0];
5855 * Rd = SE64(tres[31:0]);
5856 * ~~~
5857 *
5858 * \param [in] t unsigned long type of value stored in t
5859 * \param [in] a unsigned long type of value stored in a
5860 * \param [in] b unsigned long type of value stored in b
5861 * \return value stored in unsigned long type
5862 */
__RV_MSUBR32(unsigned long t,unsigned long a,unsigned long b)5863 __STATIC_FORCEINLINE unsigned long __RV_MSUBR32(unsigned long t, unsigned long a, unsigned long b)
5864 {
5865 __ASM volatile("msubr32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
5866 return t;
5867 }
5868 /* ===== Inline Function End for 3.74. MSUBR32 ===== */
5869
5870 /* ===== Inline Function Start for 3.75. MULR64 ===== */
5871 /**
5872 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
5873 * \brief MULR64 (Multiply Word Unsigned to 64-bit Data)
5874 * \details
5875 * **Type**: DSP
5876 *
5877 * **Syntax**:\n
5878 * ~~~
5879 * MULR64 Rd, Rs1, Rs2
5880 * ~~~
5881 *
5882 * **Purpose**:\n
5883 * Multiply the 32-bit unsigned integer contents of two registers and write the 64-bit result.
5884 *
5885 * **RV32 Description**:\n
5886 * This instruction multiplies the 32-bit content of Rs1 with that of Rs2 and writes the 64-bit
5887 * multiplication result to an even/odd pair of registers containing Rd. Rd(4,1) index d determines the
5888 * even/odd pair group of the two registers. Specifically, the register pair includes register 2d and
5889 * 2d+1.
5890 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
5891 * of the pair contains the low 32-bit of the result.
5892 * The lower 32-bit contents of Rs1 and Rs2 are treated as unsigned integers.
5893 *
5894 * **RV64 Description**:\n
5895 * This instruction multiplies the lower 32-bit content of Rs1 with that of Rs2 and writes the 64-bit
5896 * multiplication result to Rd.
5897 * The lower 32-bit contents of Rs1 and Rs2 are treated as unsigned integers.
5898 *
5899 * **Operations**:\n
5900 * ~~~
5901 * RV32:
5902 * Mresult = CONCAT(1`b0,Rs1) u* CONCAT(1`b0,Rs2);
5903 * R[Rd(4,1).1(0)][31:0] = Mresult[63:32];
5904 * R[Rd(4,1).0(0)][31:0] = Mresult[31:0];
5905 * RV64:
5906 * Rd = Mresult[63:0];
5907 * Mresult = CONCAT(1`b0,Rs1.W[0]) u* CONCAT(1`b0,Rs2.W[0]);
5908 * ~~~
5909 *
5910 * \param [in] a unsigned long type of value stored in a
5911 * \param [in] b unsigned long type of value stored in b
5912 * \return value stored in unsigned long long type
5913 */
__RV_MULR64(unsigned long a,unsigned long b)5914 __STATIC_FORCEINLINE unsigned long long __RV_MULR64(unsigned long a, unsigned long b)
5915 {
5916 register unsigned long long result;
5917 __ASM volatile("mulr64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5918 return result;
5919 }
5920 /* ===== Inline Function End for 3.75. MULR64 ===== */
5921
5922 /* ===== Inline Function Start for 3.76. MULSR64 ===== */
5923 /**
5924 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
5925 * \brief MULSR64 (Multiply Word Signed to 64-bit Data)
5926 * \details
5927 * **Type**: DSP
5928 *
5929 * **Syntax**:\n
5930 * ~~~
5931 * MULSR64 Rd, Rs1, Rs2
5932 * ~~~
5933 *
5934 * **Purpose**:\n
5935 * Multiply the 32-bit signed integer contents of two registers and write the 64-bit result.
5936 *
5937 * **RV32 Description**:\n
5938 * This instruction multiplies the lower 32-bit content of Rs1 with the lower 32-bit content of Rs2 and
5939 * writes the 64-bit multiplication result to an even/odd pair of registers containing Rd. Rd(4,1) index d
5940 * determines the even/odd pair group of the two registers. Specifically, the register pair includes
5941 * register 2d and 2d+1.
5942 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
5943 * of the pair contains the low 32-bit of the result.
5944 * The lower 32-bit contents of Rs1 and Rs2 are treated as signed integers.
5945 *
5946 * **RV64 Description**:\n
5947 * This instruction multiplies the lower 32-bit content of Rs1 with the lower 32-bit content of Rs2 and
5948 * writes the 64-bit multiplication result to Rd.
5949 * The lower 32-bit contents of Rs1 and Rs2 are treated as signed integers.
5950 *
5951 * **Operations**:\n
5952 * ~~~
5953 * RV32:
5954 * Mresult = Ra s* Rb;
5955 * R[Rd(4,1).1(0)][31:0] = Mresult[63:32];
5956 * R[Rd(4,1).0(0)][31:0] = Mresult[31:0];
5957 * RV64:
5958 * Mresult = Ra.W[0] s* Rb.W[0];
5959 * Rd = Mresult[63:0];
5960 * ~~~
5961 *
5962 * \param [in] a long type of value stored in a
5963 * \param [in] b long type of value stored in b
5964 * \return value stored in long long type
5965 */
__RV_MULSR64(long a,long b)5966 __STATIC_FORCEINLINE long long __RV_MULSR64(long a, long b)
5967 {
5968 register long long result;
5969 __ASM volatile("mulsr64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
5970 return result;
5971 }
5972 /* ===== Inline Function End for 3.76. MULSR64 ===== */
5973
5974 /* ===== Inline Function Start for 3.77. PBSAD ===== */
5975 /**
5976 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
5977 * \brief PBSAD (Parallel Byte Sum of Absolute Difference)
5978 * \details
5979 * **Type**: DSP
5980 *
5981 * **Syntax**:\n
5982 * ~~~
5983 * PBSAD Rd, Rs1, Rs2
5984 * ~~~
5985 *
5986 * **Purpose**:\n
5987 * Calculate the sum of absolute difference of unsigned 8-bit data elements.
5988 *
5989 * **Description**:\n
5990 * This instruction subtracts the un-signed 8-bit elements of Rs2 from those of Rs1. Then
5991 * it adds the absolute value of each difference together and writes the result to Rd.
5992 *
5993 * **Operations**:\n
5994 * ~~~
5995 * absdiff[x] = ABS(Rs1.B[x] - Rs2.B[x]);
5996 * Rd = SUM(absdiff[x]);
5997 * for RV32: x=3...0,
5998 * for RV64: x=7...0
5999 * ~~~
6000 *
6001 * \param [in] a unsigned long type of value stored in a
6002 * \param [in] b unsigned long type of value stored in b
6003 * \return value stored in unsigned long type
6004 */
__RV_PBSAD(unsigned long a,unsigned long b)6005 __STATIC_FORCEINLINE unsigned long __RV_PBSAD(unsigned long a, unsigned long b)
6006 {
6007 register unsigned long result;
6008 __ASM volatile("pbsad %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6009 return result;
6010 }
6011 /* ===== Inline Function End for 3.77. PBSAD ===== */
6012
6013 /* ===== Inline Function Start for 3.78. PBSADA ===== */
6014 /**
6015 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
6016 * \brief PBSADA (Parallel Byte Sum of Absolute Difference Accum)
6017 * \details
6018 * **Type**: DSP
6019 *
6020 * **Syntax**:\n
6021 * ~~~
6022 * PBSADA Rd, Rs1, Rs2
6023 * ~~~
6024 *
6025 * **Purpose**:\n
6026 * Calculate the sum of absolute difference of four unsigned 8-bit data elements and
6027 * accumulate it into a register.
6028 *
6029 * **Description**:\n
6030 * This instruction subtracts the un-signed 8-bit elements of Rs2 from those of Rs1. It
6031 * then adds the absolute value of each difference together along with the content of Rd and writes the
6032 * accumulated result back to Rd.
6033 *
6034 * **Operations**:\n
6035 * ~~~
6036 * absdiff[x] = ABS(Rs1.B[x] - Rs2.B[x]);
6037 * Rd = Rd + SUM(absdiff[x]);
6038 * for RV32: x=3...0,
6039 * for RV64: x=7...0
6040 * ~~~
6041 *
6042 * \param [in] t unsigned long type of value stored in t
6043 * \param [in] a unsigned long type of value stored in a
6044 * \param [in] b unsigned long type of value stored in b
6045 * \return value stored in unsigned long type
6046 */
__RV_PBSADA(unsigned long t,unsigned long a,unsigned long b)6047 __STATIC_FORCEINLINE unsigned long __RV_PBSADA(unsigned long t, unsigned long a, unsigned long b)
6048 {
6049 __ASM volatile("pbsada %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
6050 return t;
6051 }
6052 /* ===== Inline Function End for 3.78. PBSADA ===== */
6053
6054 /* ===== Inline Function Start for 3.79.1. PKBB16 ===== */
6055 /**
6056 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK
6057 * \brief PKBB16 (Pack Two 16-bit Data from Both Bottom Half)
6058 * \details
6059 * **Type**: DSP
6060 *
6061 * **Syntax**:\n
6062 * ~~~
6063 * PKBB16 Rd, Rs1, Rs2
6064 * PKBT16 Rd, Rs1, Rs2
6065 * PKTT16 Rd, Rs1, Rs2
6066 * PKTB16 Rd, Rs1, Rs2
6067 * ~~~
6068 *
6069 * **Purpose**:\n
6070 * Pack 16-bit data from 32-bit chunks in two registers.
6071 * * PKBB16: bottom.bottom
6072 * * PKBT16 bottom.top
6073 * * PKTT16 top.top
6074 * * PKTB16 top.bottom
6075 *
6076 * **Description**:\n
6077 * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to
6078 * Rd.W[x] [15:0].
6079 * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
6080 * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
6081 * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
6082 *
6083 * **Operations**:\n
6084 * ~~~
6085 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16
6086 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16
6087 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16
6088 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16
6089 * for RV32: x=0,
6090 * for RV64: x=1...0
6091 * ~~~
6092 *
6093 * \param [in] a unsigned long type of value stored in a
6094 * \param [in] b unsigned long type of value stored in b
6095 * \return value stored in unsigned long type
6096 */
__RV_PKBB16(unsigned long a,unsigned long b)6097 __STATIC_FORCEINLINE unsigned long __RV_PKBB16(unsigned long a, unsigned long b)
6098 {
6099 register unsigned long result;
6100 __ASM volatile("pkbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6101 return result;
6102 }
6103 /* ===== Inline Function End for 3.79.1. PKBB16 ===== */
6104
6105 /* ===== Inline Function Start for 3.79.2. PKBT16 ===== */
6106 /**
6107 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK
6108 * \brief PKBT16 (Pack Two 16-bit Data from Bottom and Top Half)
6109 * \details
6110 * **Type**: DSP
6111 *
6112 * **Syntax**:\n
6113 * ~~~
6114 * PKBB16 Rd, Rs1, Rs2
6115 * PKBT16 Rd, Rs1, Rs2
6116 * PKTT16 Rd, Rs1, Rs2
6117 * PKTB16 Rd, Rs1, Rs2
6118 * ~~~
6119 *
6120 * **Purpose**:\n
6121 * Pack 16-bit data from 32-bit chunks in two registers.
6122 * * PKBB16: bottom.bottom
6123 * * PKBT16 bottom.top
6124 * * PKTT16 top.top
6125 * * PKTB16 top.bottom
6126 *
6127 * **Description**:\n
6128 * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to
6129 * Rd.W[x] [15:0].
6130 * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
6131 * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
6132 * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
6133 *
6134 * **Operations**:\n
6135 * ~~~
6136 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16
6137 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16
6138 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16
6139 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16
6140 * for RV32: x=0,
6141 * for RV64: x=1...0
6142 * ~~~
6143 *
6144 * \param [in] a unsigned long type of value stored in a
6145 * \param [in] b unsigned long type of value stored in b
6146 * \return value stored in unsigned long type
6147 */
__RV_PKBT16(unsigned long a,unsigned long b)6148 __STATIC_FORCEINLINE unsigned long __RV_PKBT16(unsigned long a, unsigned long b)
6149 {
6150 register unsigned long result;
6151 __ASM volatile("pkbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6152 return result;
6153 }
6154 /* ===== Inline Function End for 3.79.2. PKBT16 ===== */
6155
6156 /* ===== Inline Function Start for 3.79.3. PKTT16 ===== */
6157 /**
6158 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK
6159 * \brief PKTT16 (Pack Two 16-bit Data from Both Top Half)
6160 * \details
6161 * **Type**: DSP
6162 *
6163 * **Syntax**:\n
6164 * ~~~
6165 * PKBB16 Rd, Rs1, Rs2
6166 * PKBT16 Rd, Rs1, Rs2
6167 * PKTT16 Rd, Rs1, Rs2
6168 * PKTB16 Rd, Rs1, Rs2
6169 * ~~~
6170 *
6171 * **Purpose**:\n
6172 * Pack 16-bit data from 32-bit chunks in two registers.
6173 * * PKBB16: bottom.bottom
6174 * * PKBT16 bottom.top
6175 * * PKTT16 top.top
6176 * * PKTB16 top.bottom
6177 *
6178 * **Description**:\n
6179 * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to
6180 * Rd.W[x] [15:0].
6181 * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
6182 * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
6183 * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
6184 *
6185 * **Operations**:\n
6186 * ~~~
6187 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16
6188 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16
6189 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16
6190 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16
6191 * for RV32: x=0,
6192 * for RV64: x=1...0
6193 * ~~~
6194 *
6195 * \param [in] a unsigned long type of value stored in a
6196 * \param [in] b unsigned long type of value stored in b
6197 * \return value stored in unsigned long type
6198 */
__RV_PKTT16(unsigned long a,unsigned long b)6199 __STATIC_FORCEINLINE unsigned long __RV_PKTT16(unsigned long a, unsigned long b)
6200 {
6201 register unsigned long result;
6202 __ASM volatile("pktt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6203 return result;
6204 }
6205 /* ===== Inline Function End for 3.79.3. PKTT16 ===== */
6206
6207 /* ===== Inline Function Start for 3.79.4. PKTB16 ===== */
6208 /**
6209 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK
6210 * \brief PKTB16 (Pack Two 16-bit Data from Top and Bottom Half)
6211 * \details
6212 * **Type**: DSP
6213 *
6214 * **Syntax**:\n
6215 * ~~~
6216 * PKBB16 Rd, Rs1, Rs2
6217 * PKBT16 Rd, Rs1, Rs2
6218 * PKTT16 Rd, Rs1, Rs2
6219 * PKTB16 Rd, Rs1, Rs2
6220 * ~~~
6221 *
6222 * **Purpose**:\n
6223 * Pack 16-bit data from 32-bit chunks in two registers.
6224 * * PKBB16: bottom.bottom
6225 * * PKBT16 bottom.top
6226 * * PKTT16 top.top
6227 * * PKTB16 top.bottom
6228 *
6229 * **Description**:\n
6230 * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to
6231 * Rd.W[x] [15:0].
6232 * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
6233 * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
6234 * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
6235 *
6236 * **Operations**:\n
6237 * ~~~
6238 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16
6239 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16
6240 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16
6241 * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16
6242 * for RV32: x=0,
6243 * for RV64: x=1...0
6244 * ~~~
6245 *
6246 * \param [in] a unsigned long type of value stored in a
6247 * \param [in] b unsigned long type of value stored in b
6248 * \return value stored in unsigned long type
6249 */
__RV_PKTB16(unsigned long a,unsigned long b)6250 __STATIC_FORCEINLINE unsigned long __RV_PKTB16(unsigned long a, unsigned long b)
6251 {
6252 register unsigned long result;
6253 __ASM volatile("pktb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6254 return result;
6255 }
6256 /* ===== Inline Function End for 3.79.4. PKTB16 ===== */
6257
6258 /* ===== Inline Function Start for 3.80. RADD8 ===== */
6259 /**
6260 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
6261 * \brief RADD8 (SIMD 8-bit Signed Halving Addition)
6262 * \details
6263 * **Type**: SIMD
6264 *
6265 * **Syntax**:\n
6266 * ~~~
6267 * RADD8 Rd, Rs1, Rs2
6268 * ~~~
6269 *
6270 * **Purpose**:\n
6271 * Do 8-bit signed integer element additions simultaneously. The element results are halved
6272 * to avoid overflow or saturation.
6273 *
6274 * **Description**:\n
6275 * This instruction adds the 8-bit signed integer elements in Rs1 with the 8-bit signed
6276 * integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to
6277 * Rd.
6278 *
6279 * **Examples**:\n
6280 * ~~~
6281 * * Rs1 = 0x7F, Rs2 = 0x7F, Rd = 0x7F
6282 * * Rs1 = 0x80, Rs2 = 0x80, Rd = 0x80
6283 * * Rs1 = 0x40, Rs2 = 0x80, Rd = 0xE0
6284 * ~~~
6285 *
6286 * **Operations**:\n
6287 * ~~~
6288 * Rd.B[x] = (Rs1.B[x] + Rs2.B[x]) s>> 1; for RV32: x=3...0, for RV64: x=7...0
6289 * ~~~
6290 *
6291 * \param [in] a unsigned long type of value stored in a
6292 * \param [in] b unsigned long type of value stored in b
6293 * \return value stored in unsigned long type
6294 */
__RV_RADD8(unsigned long a,unsigned long b)6295 __STATIC_FORCEINLINE unsigned long __RV_RADD8(unsigned long a, unsigned long b)
6296 {
6297 register unsigned long result;
6298 __ASM volatile("radd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6299 return result;
6300 }
6301 /* ===== Inline Function End for 3.80. RADD8 ===== */
6302
6303 /* ===== Inline Function Start for 3.81. RADD16 ===== */
6304 /**
6305 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
6306 * \brief RADD16 (SIMD 16-bit Signed Halving Addition)
6307 * \details
6308 * **Type**: SIMD
6309 *
6310 * **Syntax**:\n
6311 * ~~~
6312 * RADD16 Rd, Rs1, Rs2
6313 * ~~~
6314 *
6315 * **Purpose**:\n
6316 * Do 16-bit signed integer element additions simultaneously. The results are halved to avoid
6317 * overflow or saturation.
6318 *
6319 * **Description**:\n
6320 * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed
6321 * integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to
6322 * Rd.
6323 *
6324 * **Examples**:\n
6325 * ~~~
6326 * * Rs1 = 0x7FFF, Rs2 = 0x7FFF, Rd = 0x7FFF
6327 * * Rs1 = 0x8000, Rs2 = 0x8000, Rd = 0x8000
6328 * * Rs1 = 0x4000, Rs2 = 0x8000, Rd = 0xE000
6329 * ~~~
6330 *
6331 * **Operations**:\n
6332 * ~~~
6333 * Rd.H[x] = (Rs1.H[x] + Rs2.H[x]) s>> 1; for RV32: x=1...0, for RV64: x=3...0
6334 * ~~~
6335 *
6336 * \param [in] a unsigned long type of value stored in a
6337 * \param [in] b unsigned long type of value stored in b
6338 * \return value stored in unsigned long type
6339 */
__RV_RADD16(unsigned long a,unsigned long b)6340 __STATIC_FORCEINLINE unsigned long __RV_RADD16(unsigned long a, unsigned long b)
6341 {
6342 register unsigned long result;
6343 __ASM volatile("radd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6344 return result;
6345 }
6346 /* ===== Inline Function End for 3.81. RADD16 ===== */
6347
6348 /* ===== Inline Function Start for 3.82. RADD64 ===== */
6349 /**
6350 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
6351 * \brief RADD64 (64-bit Signed Halving Addition)
6352 * \details
6353 * **Type**: DSP (64-bit Profile)
6354 *
6355 * **Syntax**:\n
6356 * ~~~
6357 * RADD64 Rd, Rs1, Rs2
6358 * ~~~
6359 *
6360 * **Purpose**:\n
6361 * Add two 64-bit signed integers. The result is halved to avoid overflow or saturation.
6362 *
6363 * **RV32 Description**:\n
6364 * This instruction adds the 64-bit signed integer of an even/odd pair of registers
6365 * specified by Rs1(4,1) with the 64-bit signed integer of an even/odd pair of registers specified by
6366 * Rs2(4,1). The 64-bit addition result is first arithmetically right-shifted by 1 bit and then written to an
6367 * even/odd pair of registers specified by Rd(4,1).
6368 * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
6369 * pair includes register 2d and 2d+1.
6370 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
6371 * of the pair contains the low 32-bit of the result.
6372 *
6373 * **RV64 Description**:\n
6374 * This instruction adds the 64-bit signed integer in Rs1 with the 64-bit signed
6375 * integer in Rs2. The 64-bit addition result is first arithmetically right-shifted by 1 bit and then
6376 * written to Rd.
6377 *
6378 * **Operations**:\n
6379 * ~~~
6380 * RV32:
6381 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
6382 * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
6383 * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
6384 * R[t_H].R[t_L] = (R[a_H].R[a_L] + R[b_H].R[b_L]) s>> 1;
6385 * RV64:
6386 * Rd = (Rs1 + Rs2) s>> 1;
6387 * ~~~
6388 *
6389 * \param [in] a long long type of value stored in a
6390 * \param [in] b long long type of value stored in b
6391 * \return value stored in long long type
6392 */
__RV_RADD64(long long a,long long b)6393 __STATIC_FORCEINLINE long long __RV_RADD64(long long a, long long b)
6394 {
6395 register long long result;
6396 __ASM volatile("radd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6397 return result;
6398 }
6399 /* ===== Inline Function End for 3.82. RADD64 ===== */
6400
6401 /* ===== Inline Function Start for 3.83. RADDW ===== */
6402 /**
6403 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
6404 * \brief RADDW (32-bit Signed Halving Addition)
6405 * \details
6406 * **Type**: DSP
6407 *
6408 * **Syntax**:\n
6409 * ~~~
6410 * RADDW Rd, Rs1, Rs2
6411 * ~~~
6412 *
6413 * **Purpose**:\n
6414 * Add 32-bit signed integers and the results are halved to avoid overflow or saturation.
6415 *
6416 * **Description**:\n
6417 * This instruction adds the first 32-bit signed integer in Rs1 with the first 32-bit signed
6418 * integer in Rs2. The result is first arithmetically right-shifted by 1 bit and then sign-extended and
6419 * written to Rd.
6420 *
6421 * **Examples**:\n
6422 * ~~~
6423 * * Rs1 = 0x7FFFFFFF, Rs2 = 0x7FFFFFFF, Rd = 0x7FFFFFFF
6424 * * Rs1 = 0x80000000, Rs2 = 0x80000000, Rd = 0x80000000
6425 * * Rs1 = 0x40000000, Rs2 = 0x80000000, Rd = 0xE0000000
6426 * ~~~
6427 *
6428 * **Operations**:\n
6429 * ~~~
6430 * RV32:
6431 * Rd[31:0] = (Rs1[31:0] + Rs2[31:0]) s>> 1;
6432 * RV64:
6433 * resw[31:0] = (Rs1[31:0] + Rs2[31:0]) s>> 1;
6434 * Rd[63:0] = SE(resw[31:0]);
6435 * ~~~
6436 *
6437 * \param [in] a int type of value stored in a
6438 * \param [in] b int type of value stored in b
6439 * \return value stored in long type
6440 */
__RV_RADDW(int a,int b)6441 __STATIC_FORCEINLINE long __RV_RADDW(int a, int b)
6442 {
6443 register long result;
6444 __ASM volatile("raddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6445 return result;
6446 }
6447 /* ===== Inline Function End for 3.83. RADDW ===== */
6448
6449 /* ===== Inline Function Start for 3.84. RCRAS16 ===== */
6450 /**
6451 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
6452 * \brief RCRAS16 (SIMD 16-bit Signed Halving Cross Addition & Subtraction)
6453 * \details
6454 * **Type**: SIMD
6455 *
6456 * **Syntax**:\n
6457 * ~~~
6458 * RCRAS16 Rd, Rs1, Rs2
6459 * ~~~
6460 *
6461 * **Purpose**:\n
6462 * Do 16-bit signed integer element addition and 16-bit signed integer element subtraction in
6463 * a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks. The results
6464 * are halved to avoid overflow or saturation.
6465 *
6466 * **Description**:\n
6467 * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in
6468 * Rs1 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2, and subtracts the 16-bit
6469 * signed integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed integer element in
6470 * [15:0] of 32-bit chunks in Rs1. The element results are first arithmetically right-shifted by 1 bit and
6471 * then written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
6472 *
6473 * **Examples**:\n
6474 * ~~~
6475 * Please see `RADD16` and `RSUB16` instructions.
6476 * ~~~
6477 *
6478 * **Operations**:\n
6479 * ~~~
6480 * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][15:0]) s>> 1;
6481 * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][31:16]) s>> 1;
6482 * for RV32, x=0
6483 * for RV64, x=1...0
6484 * ~~~
6485 *
6486 * \param [in] a unsigned long type of value stored in a
6487 * \param [in] b unsigned long type of value stored in b
6488 * \return value stored in unsigned long type
6489 */
__RV_RCRAS16(unsigned long a,unsigned long b)6490 __STATIC_FORCEINLINE unsigned long __RV_RCRAS16(unsigned long a, unsigned long b)
6491 {
6492 register unsigned long result;
6493 __ASM volatile("rcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6494 return result;
6495 }
6496 /* ===== Inline Function End for 3.84. RCRAS16 ===== */
6497
6498 /* ===== Inline Function Start for 3.85. RCRSA16 ===== */
6499 /**
6500 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
6501 * \brief RCRSA16 (SIMD 16-bit Signed Halving Cross Subtraction & Addition)
6502 * \details
6503 * **Type**: SIMD
6504 *
6505 * **Syntax**:\n
6506 * ~~~
6507 * RCRSA16 Rd, Rs1, Rs2
6508 * ~~~
6509 *
6510 * **Purpose**:\n
6511 * Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in
6512 * a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks. The results
6513 * are halved to avoid overflow or saturation.
6514 *
6515 * **Description**:\n
6516 * This instruction subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks
6517 * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit
6518 * signed element integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit signed integer element in
6519 * [31:16] of 32-bit chunks in Rs2. The two results are first arithmetically right-shifted by 1 bit and
6520 * then written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
6521 *
6522 * **Examples**:\n
6523 * ~~~
6524 * Please see `RADD16` and `RSUB16` instructions.
6525 * ~~~
6526 *
6527 * **Operations**:\n
6528 * ~~~
6529 * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][15:0]) s>> 1;
6530 * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][31:16]) s>> 1;
6531 * for RV32, x=0
6532 * for RV64, x=1...0
6533 * ~~~
6534 *
6535 * \param [in] a unsigned long type of value stored in a
6536 * \param [in] b unsigned long type of value stored in b
6537 * \return value stored in unsigned long type
6538 */
__RV_RCRSA16(unsigned long a,unsigned long b)6539 __STATIC_FORCEINLINE unsigned long __RV_RCRSA16(unsigned long a, unsigned long b)
6540 {
6541 register unsigned long result;
6542 __ASM volatile("rcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6543 return result;
6544 }
6545 /* ===== Inline Function End for 3.85. RCRSA16 ===== */
6546
6547 /* ===== Inline Function Start for 3.86. RDOV ===== */
6548 /**
6549 * \ingroup NMSIS_Core_DSP_Intrinsic_OV_FLAG_SC
6550 * \brief RDOV (Read OV flag)
6551 * \details
6552 * **Type**: DSP
6553 *
6554 * **Syntax**:\n
6555 * ~~~
6556 * RDOV Rd # pseudo mnemonic
6557 * ~~~
6558 *
6559 * **Purpose**:\n
6560 * This pseudo instruction is an alias to `CSRR Rd, ucode` instruction which maps to the real
6561 * instruction of `CSRRS Rd, ucode, x0`.
6562 *
6563 *
6564 * \return value stored in unsigned long type
6565 */
__RV_RDOV(void)6566 __STATIC_FORCEINLINE unsigned long __RV_RDOV(void)
6567 {
6568 register unsigned long result;
6569 __ASM volatile("rdov %0" : "=r"(result));
6570 return result;
6571 }
6572 /* ===== Inline Function End for 3.86. RDOV ===== */
6573
6574 /* ===== Inline Function Start for 3.87. RSTAS16 ===== */
6575 /**
6576 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
6577 * \brief RSTAS16 (SIMD 16-bit Signed Halving Straight Addition & Subtraction)
6578 * \details
6579 * **Type**: SIMD
6580 *
6581 * **Syntax**:\n
6582 * ~~~
6583 * RSTAS16 Rd, Rs1, Rs2
6584 * ~~~
6585 *
6586 * **Purpose**:\n
6587 * Do 16-bit signed integer element addition and 16-bit signed integer element subtraction in
6588 * a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit chunks. The
6589 * results are halved to avoid overflow or saturation.
6590 *
6591 * **Description**:\n
6592 * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in
6593 * Rs1 with the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2, and subtracts the 16-bit
6594 * signed integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit signed integer element in
6595 * [15:0] of 32-bit chunks in Rs1. The element results are first arithmetically right-shifted by 1 bit and
6596 * then written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
6597 *
6598 * **Examples**:\n
6599 * ~~~
6600 * Please see `RADD16` and `RSUB16` instructions.
6601 * ~~~
6602 *
6603 * **Operations**:\n
6604 * ~~~
6605 * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][31:16]) s>> 1;
6606 * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][15:0]) s>> 1;
6607 * for RV32, x=0
6608 * for RV64, x=1...0
6609 * ~~~
6610 *
6611 * \param [in] a unsigned long type of value stored in a
6612 * \param [in] b unsigned long type of value stored in b
6613 * \return value stored in unsigned long type
6614 */
__RV_RSTAS16(unsigned long a,unsigned long b)6615 __STATIC_FORCEINLINE unsigned long __RV_RSTAS16(unsigned long a, unsigned long b)
6616 {
6617 register unsigned long result;
6618 __ASM volatile("rstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6619 return result;
6620 }
6621 /* ===== Inline Function End for 3.87. RSTAS16 ===== */
6622
6623 /* ===== Inline Function Start for 3.88. RSTSA16 ===== */
6624 /**
6625 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
6626 * \brief RSTSA16 (SIMD 16-bit Signed Halving Straight Subtraction & Addition)
6627 * \details
6628 * **Type**: SIMD
6629 *
6630 * **Syntax**:\n
6631 * ~~~
6632 * RSTSA16 Rd, Rs1, Rs2
6633 * ~~~
6634 *
6635 * **Purpose**:\n
6636 * Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in
6637 * a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit chunks. The
6638 * results are halved to avoid overflow or saturation.
6639 *
6640 * **Description**:\n
6641 * This instruction subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks
6642 * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit
6643 * signed element integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit signed integer element in
6644 * [15:0] of 32-bit chunks in Rs2. The two results are first arithmetically right-shifted by 1 bit and then
6645 * written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
6646 *
6647 * **Examples**:\n
6648 * ~~~
6649 * Please see `RADD16` and `RSUB16` instructions.
6650 * ~~~
6651 *
6652 * **Operations**:\n
6653 * ~~~
6654 * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][31:16]) s>> 1;
6655 * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][15:0]) s>> 1;
6656 * for RV32, x=0
6657 * for RV64, x=1...0
6658 * ~~~
6659 *
6660 * \param [in] a unsigned long type of value stored in a
6661 * \param [in] b unsigned long type of value stored in b
6662 * \return value stored in unsigned long type
6663 */
__RV_RSTSA16(unsigned long a,unsigned long b)6664 __STATIC_FORCEINLINE unsigned long __RV_RSTSA16(unsigned long a, unsigned long b)
6665 {
6666 register unsigned long result;
6667 __ASM volatile("rstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6668 return result;
6669 }
6670 /* ===== Inline Function End for 3.88. RSTSA16 ===== */
6671
6672 /* ===== Inline Function Start for 3.89. RSUB8 ===== */
6673 /**
6674 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
6675 * \brief RSUB8 (SIMD 8-bit Signed Halving Subtraction)
6676 * \details
6677 * **Type**: SIMD
6678 *
6679 * **Syntax**:\n
6680 * ~~~
6681 * RSUB8 Rd, Rs1, Rs2
6682 * ~~~
6683 *
6684 * **Purpose**:\n
6685 * Do 8-bit signed integer element subtractions simultaneously. The results are halved to
6686 * avoid overflow or saturation.
6687 *
6688 * **Description**:\n
6689 * This instruction subtracts the 8-bit signed integer elements in Rs2 from the 8-bit
6690 * signed integer elements in Rs1. The results are first arithmetically right-shifted by 1 bit and then
6691 * written to Rd.
6692 *
6693 * **Examples**:\n
6694 * ~~~
6695 * * Rs1 = 0x7F, Rs2 = 0x80, Rd = 0x7F
6696 * * Rs1 = 0x80, Rs2 = 0x7F, Rd = 0x80
6697 * * Rs1= 0x80, Rs2 = 0x40, Rd = 0xA0
6698 * ~~~
6699 *
6700 * **Operations**:\n
6701 * ~~~
6702 * Rd.B[x] = (Rs1.B[x] - Rs2.B[x]) s>> 1;
6703 * for RV32: x=3...0,
6704 * for RV64: x=7...0
6705 * ~~~
6706 *
6707 * \param [in] a unsigned long type of value stored in a
6708 * \param [in] b unsigned long type of value stored in b
6709 * \return value stored in unsigned long type
6710 */
__RV_RSUB8(unsigned long a,unsigned long b)6711 __STATIC_FORCEINLINE unsigned long __RV_RSUB8(unsigned long a, unsigned long b)
6712 {
6713 register unsigned long result;
6714 __ASM volatile("rsub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6715 return result;
6716 }
6717 /* ===== Inline Function End for 3.89. RSUB8 ===== */
6718
6719 /* ===== Inline Function Start for 3.90. RSUB16 ===== */
6720 /**
6721 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
6722 * \brief RSUB16 (SIMD 16-bit Signed Halving Subtraction)
6723 * \details
6724 * **Type**: SIMD
6725 *
6726 * **Syntax**:\n
6727 * ~~~
6728 * RSUB16 Rd, Rs1, Rs2
6729 * ~~~
6730 *
6731 * **Purpose**:\n
6732 * Do 16-bit signed integer element subtractions simultaneously. The results are halved to
6733 * avoid overflow or saturation.
6734 *
6735 * **Description**:\n
6736 * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit
6737 * signed integer elements in Rs1. The results are first arithmetically right-shifted by 1 bit and then
6738 * written to Rd.
6739 *
6740 * **Examples**:\n
6741 * ~~~
6742 * * Ra = 0x7FFF, Rb = 0x8000, Rt = 0x7FFF
6743 * * Ra = 0x8000, Rb = 0x7FFF, Rt = 0x8000
6744 * * Ra = 0x8000, Rb = 0x4000, Rt = 0xA000
6745 * ~~~
6746 *
6747 * **Operations**:\n
6748 * ~~~
6749 * Rd.H[x] = (Rs1.H[x] - Rs2.H[x]) s>> 1;
6750 * for RV32: x=1...0,
6751 * for RV64: x=3...0
6752 * ~~~
6753 *
6754 * \param [in] a unsigned long type of value stored in a
6755 * \param [in] b unsigned long type of value stored in b
6756 * \return value stored in unsigned long type
6757 */
__RV_RSUB16(unsigned long a,unsigned long b)6758 __STATIC_FORCEINLINE unsigned long __RV_RSUB16(unsigned long a, unsigned long b)
6759 {
6760 register unsigned long result;
6761 __ASM volatile("rsub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6762 return result;
6763 }
6764 /* ===== Inline Function End for 3.90. RSUB16 ===== */
6765
6766 /* ===== Inline Function Start for 3.91. RSUB64 ===== */
6767 /**
6768 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
6769 * \brief RSUB64 (64-bit Signed Halving Subtraction)
6770 * \details
6771 * **Type**: DSP (64-bit Profile)
6772 *
6773 * **Syntax**:\n
6774 * ~~~
6775 * RSUB64 Rd, Rs1, Rs2
6776 * ~~~
6777 *
6778 * **Purpose**:\n
6779 * Perform a 64-bit signed integer subtraction. The result is halved to avoid overflow or
6780 * saturation.
6781 *
6782 * **RV32 Description**:\n
6783 * This instruction subtracts the 64-bit signed integer of an even/odd pair of
6784 * registers specified by Rb(4,1) from the 64-bit signed integer of an even/odd pair of registers
6785 * specified by Ra(4,1). The subtraction result is first arithmetically right-shifted by 1 bit and then
6786 * written to an even/odd pair of registers specified by Rt(4,1).
6787 * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
6788 * pair includes register 2d and 2d+1.
6789 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
6790 * of the pair contains the low 32-bit of the result.
6791 *
6792 * **RV64 Description**:\n
6793 * This instruction subtracts the 64-bit signed integer in Rs2 from the 64-bit signed
6794 * integer in Rs1. The 64-bit subtraction result is first arithmetically right-shifted by 1 bit and then
6795 * written to Rd.
6796 *
6797 * **Operations**:\n
6798 * ~~~
6799 * RV32:
6800 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
6801 * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
6802 * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
6803 * R[t_H].R[t_L] = (R[a_H].R[a_L] - R[b_H].R[b_L]) s>> 1;
6804 * RV64:
6805 * Rd = (Rs1 - Rs2) s>> 1;
6806 * ~~~
6807 *
6808 * \param [in] a long long type of value stored in a
6809 * \param [in] b long long type of value stored in b
6810 * \return value stored in long long type
6811 */
__RV_RSUB64(long long a,long long b)6812 __STATIC_FORCEINLINE long long __RV_RSUB64(long long a, long long b)
6813 {
6814 register long long result;
6815 __ASM volatile("rsub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6816 return result;
6817 }
6818 /* ===== Inline Function End for 3.91. RSUB64 ===== */
6819
6820 /* ===== Inline Function Start for 3.92. RSUBW ===== */
6821 /**
6822 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
6823 * \brief RSUBW (32-bit Signed Halving Subtraction)
6824 * \details
6825 * **Type**: DSP
6826 *
6827 * **Syntax**:\n
6828 * ~~~
6829 * RSUBW Rd, Rs1, Rs2
6830 * ~~~
6831 *
6832 * **Purpose**:\n
6833 * Subtract 32-bit signed integers and the result is halved to avoid overflow or saturation.
6834 *
6835 * **Description**:\n
6836 * This instruction subtracts the first 32-bit signed integer in Rs2 from the first 32-bit
6837 * signed integer in Rs1. The result is first arithmetically right-shifted by 1 bit and then sign-extended
6838 * and written to Rd.
6839 *
6840 * **Examples**:\n
6841 * ~~~
6842 * * Rs1 = 0x7FFFFFFF, Rs2 = 0x80000000, Rd = 0x7FFFFFFF
6843 * * Rs1 = 0x80000000, Rs2 = 0x7FFFFFFF, Rd = 0x80000000
6844 * * Rs1 = 0x80000000, Rs2 = 0x40000000, Rd = 0xA0000000
6845 * ~~~
6846 *
6847 * **Operations**:\n
6848 * ~~~
6849 * RV32:
6850 * Rd[31:0] = (Rs1[31:0] - Rs2[31:0]) s>> 1;
6851 * RV64:
6852 * resw[31:0] = (Rs1[31:0] - Rs2[31:0]) s>> 1;
6853 * Rd[63:0] = SE(resw[31:0]);
6854 * ~~~
6855 *
6856 * \param [in] a int type of value stored in a
6857 * \param [in] b int type of value stored in b
6858 * \return value stored in long type
6859 */
__RV_RSUBW(int a,int b)6860 __STATIC_FORCEINLINE long __RV_RSUBW(int a, int b)
6861 {
6862 register long result;
6863 __ASM volatile("rsubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
6864 return result;
6865 }
6866 /* ===== Inline Function End for 3.92. RSUBW ===== */
6867
6868 /* ===== Inline Function Start for 3.93. SCLIP8 ===== */
6869 /**
6870 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
6871 * \brief SCLIP8 (SIMD 8-bit Signed Clip Value)
6872 * \details
6873 * **Type**: SIMD
6874 *
6875 * **Syntax**:\n
6876 * ~~~
6877 * SCLIP8 Rd, Rs1, imm3u[2:0]
6878 * ~~~
6879 *
6880 * **Purpose**:\n
6881 * Limit the 8-bit signed integer elements of a register into a signed range simultaneously.
6882 *
6883 * **Description**:\n
6884 * This instruction limits the 8-bit signed integer elements stored in Rs1 into a signed
6885 * integer range between 2^imm3u-1 and -2^imm3u, and writes the limited results to Rd. For example, if
6886 * imm3u is 3, the 8-bit input values should be saturated between 7 and -8. If saturation is performed,
6887 * set OV bit to 1.
6888 *
6889 * **Operations**:\n
6890 * ~~~
6891 * src = Rs1.B[x];
6892 * if (src > (2^imm3u)-1) {
6893 * src = (2^imm3u)-1;
6894 * OV = 1;
6895 * } else if (src < -2^imm3u) {
6896 * src = -2^imm3u;
6897 * OV = 1;
6898 * }
6899 * Rd.B[x] = src
6900 * for RV32: x=3...0,
6901 * for RV64: x=7...0
6902 * ~~~
6903 *
6904 * \param [in] a unsigned long type of value stored in a
6905 * \param [in] b unsigned int type of value stored in b
6906 * \return value stored in unsigned long type
6907 */
6908 #define __RV_SCLIP8(a, b) \
6909 ({ \
6910 register unsigned long result; \
6911 register unsigned long __a = (unsigned long)(a); \
6912 __ASM volatile("sclip8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
6913 result; \
6914 })
6915 /* ===== Inline Function End for 3.93. SCLIP8 ===== */
6916
6917 /* ===== Inline Function Start for 3.94. SCLIP16 ===== */
6918 /**
6919 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
6920 * \brief SCLIP16 (SIMD 16-bit Signed Clip Value)
6921 * \details
6922 * **Type**: SIMD
6923 *
6924 * **Syntax**:\n
6925 * ~~~
6926 * SCLIP16 Rd, Rs1, imm4u[3:0]
6927 * ~~~
6928 *
6929 * **Purpose**:\n
6930 * Limit the 16-bit signed integer elements of a register into a signed range simultaneously.
6931 *
6932 * **Description**:\n
6933 * This instruction limits the 16-bit signed integer elements stored in Rs1 into a signed
6934 * integer range between 2imm4u-1 and -2imm4u, and writes the limited results to Rd. For example, if
6935 * imm4u is 3, the 16-bit input values should be saturated between 7 and -8. If saturation is performed,
6936 * set OV bit to 1.
6937 *
6938 * **Operations**:\n
6939 * ~~~
6940 * src = Rs1.H[x];
6941 * if (src > (2^imm4u)-1) {
6942 * src = (2^imm4u)-1;
6943 * OV = 1;
6944 * } else if (src < -2^imm4u) {
6945 * src = -2^imm4u;
6946 * OV = 1;
6947 * }
6948 * Rd.H[x] = src
6949 * for RV32: x=1...0,
6950 * for RV64: x=3...0
6951 * ~~~
6952 *
6953 * \param [in] a unsigned long type of value stored in a
6954 * \param [in] b unsigned int type of value stored in b
6955 * \return value stored in unsigned long type
6956 */
6957 #define __RV_SCLIP16(a, b) \
6958 ({ \
6959 register unsigned long result; \
6960 register unsigned long __a = (unsigned long)(a); \
6961 __ASM volatile("sclip16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
6962 result; \
6963 })
6964 /* ===== Inline Function End for 3.94. SCLIP16 ===== */
6965
6966 /* ===== Inline Function Start for 3.95. SCLIP32 ===== */
6967 /**
6968 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
6969 * \brief SCLIP32 (SIMD 32-bit Signed Clip Value)
6970 * \details
6971 * **Type**: DSP
6972 *
6973 * **Syntax**:\n
6974 * ~~~
6975 * SCLIP32 Rd, Rs1, imm5u[4:0]
6976 * ~~~
6977 *
6978 * **Purpose**:\n
6979 * Limit the 32-bit signed integer elements of a register into a signed range simultaneously.
6980 *
6981 * **Description**:\n
6982 * This instruction limits the 32-bit signed integer elements stored in Rs1 into a signed
6983 * integer range between 2imm5u-1 and -2imm5u, and writes the limited results to Rd. For example, if
6984 * imm5u is 3, the 32-bit input values should be saturated between 7 and -8. If saturation is performed,
6985 * set OV bit to 1.
6986 *
6987 * **Operations**:\n
6988 * ~~~
6989 * src = Rs1.W[x];
6990 * if (src > (2^imm5u)-1) {
6991 * src = (2^imm5u)-1;
6992 * OV = 1;
6993 * } else if (src < -2^imm5u) {
6994 * src = -2^imm5u;
6995 * OV = 1;
6996 * }
6997 * Rd.W[x] = src
6998 * for RV32: x=0,
6999 * for RV64: x=1...0
7000 * ~~~
7001 *
7002 * \param [in] a long type of value stored in a
7003 * \param [in] b unsigned int type of value stored in b
7004 * \return value stored in long type
7005 */
7006 #define __RV_SCLIP32(a, b) \
7007 ({ \
7008 register long result; \
7009 register long __a = (long)(a); \
7010 __ASM volatile("sclip32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
7011 result; \
7012 })
7013 /* ===== Inline Function End for 3.95. SCLIP32 ===== */
7014
7015 /* ===== Inline Function Start for 3.96. SCMPLE8 ===== */
7016 /**
7017 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
7018 * \brief SCMPLE8 (SIMD 8-bit Signed Compare Less Than & Equal)
7019 * \details
7020 * **Type**: SIMD
7021 *
7022 * **Syntax**:\n
7023 * ~~~
7024 * SCMPLE8 Rd, Rs1, Rs2
7025 * ~~~
7026 *
7027 * **Purpose**:\n
7028 * Do 8-bit signed integer elements less than & equal comparisons simultaneously.
7029 *
7030 * **Description**:\n
7031 * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit
7032 * signed integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it is
7033 * true, the result is 0xFF; otherwise, the result is 0x0. The element comparison results are written to
7034 * Rd
7035 *
7036 * **Operations**:\n
7037 * ~~~
7038 * Rd.B[x] = (Rs1.B[x] {le} Rs2.B[x])? 0xff : 0x0;
7039 * for RV32: x=3...0,
7040 * for RV64: x=7...0
7041 * ~~~
7042 *
7043 * \param [in] a unsigned long type of value stored in a
7044 * \param [in] b unsigned long type of value stored in b
7045 * \return value stored in unsigned long type
7046 */
__RV_SCMPLE8(unsigned long a,unsigned long b)7047 __STATIC_FORCEINLINE unsigned long __RV_SCMPLE8(unsigned long a, unsigned long b)
7048 {
7049 register unsigned long result;
7050 __ASM volatile("scmple8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
7051 return result;
7052 }
7053 /* ===== Inline Function End for 3.96. SCMPLE8 ===== */
7054
7055 /* ===== Inline Function Start for 3.97. SCMPLE16 ===== */
7056 /**
7057 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
7058 * \brief SCMPLE16 (SIMD 16-bit Signed Compare Less Than & Equal)
7059 * \details
7060 * **Type**: SIMD
7061 *
7062 * **Syntax**:\n
7063 * ~~~
7064 * SCMPLE16 Rd, Rs1, Rs2
7065 * ~~~
7066 *
7067 * **Purpose**:\n
7068 * Do 16-bit signed integer elements less than & equal comparisons simultaneously.
7069 *
7070 * **Description**:\n
7071 * This instruction compares the 16-bit signed integer elements in Rs1 with the 16-bit
7072 * signed integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it is
7073 * true, the result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are written
7074 * to Rd.
7075 *
7076 * **Operations**:\n
7077 * ~~~
7078 * Rd.H[x] = (Rs1.H[x] {le} Rs2.H[x])? 0xffff : 0x0;
7079 * for RV32: x=1...0,
7080 * for RV64: x=3...0
7081 * ~~~
7082 *
7083 * \param [in] a unsigned long type of value stored in a
7084 * \param [in] b unsigned long type of value stored in b
7085 * \return value stored in unsigned long type
7086 */
__RV_SCMPLE16(unsigned long a,unsigned long b)7087 __STATIC_FORCEINLINE unsigned long __RV_SCMPLE16(unsigned long a, unsigned long b)
7088 {
7089 register unsigned long result;
7090 __ASM volatile("scmple16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
7091 return result;
7092 }
7093 /* ===== Inline Function End for 3.97. SCMPLE16 ===== */
7094
7095 /* ===== Inline Function Start for 3.98. SCMPLT8 ===== */
7096 /**
7097 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
7098 * \brief SCMPLT8 (SIMD 8-bit Signed Compare Less Than)
7099 * \details
7100 * **Type**: SIMD
7101 *
7102 * **Syntax**:\n
7103 * ~~~
7104 * SCMPLT8 Rd, Rs1, Rs2
7105 * ~~~
7106 *
7107 * **Purpose**:\n
7108 * Do 8-bit signed integer elements less than comparisons simultaneously.
7109 *
7110 * **Description**:\n
7111 * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit
7112 * signed integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the
7113 * result is 0xFF; otherwise, the result is 0x0. The element comparison results are written to Rd.
7114 *
7115 * **Operations**:\n
7116 * ~~~
7117 * Rd.B[x] = (Rs1.B[x] < Rs2.B[x])? 0xff : 0x0;
7118 * for RV32: x=3...0,
7119 * for RV64: x=7...0
7120 * ~~~
7121 *
7122 * \param [in] a unsigned long type of value stored in a
7123 * \param [in] b unsigned long type of value stored in b
7124 * \return value stored in unsigned long type
7125 */
__RV_SCMPLT8(unsigned long a,unsigned long b)7126 __STATIC_FORCEINLINE unsigned long __RV_SCMPLT8(unsigned long a, unsigned long b)
7127 {
7128 register unsigned long result;
7129 __ASM volatile("scmplt8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
7130 return result;
7131 }
7132 /* ===== Inline Function End for 3.98. SCMPLT8 ===== */
7133
7134 /* ===== Inline Function Start for 3.99. SCMPLT16 ===== */
7135 /**
7136 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
7137 * \brief SCMPLT16 (SIMD 16-bit Signed Compare Less Than)
7138 * \details
7139 * **Type**: SIMD
7140 *
7141 * **Syntax**:\n
7142 * ~~~
7143 * SCMPLT16 Rd, Rs1, Rs2
7144 * ~~~
7145 *
7146 * **Purpose**:\n
7147 * Do 16-bit signed integer elements less than comparisons simultaneously.
7148 *
7149 * **Description**:\n
7150 * This instruction compares the 16-bit signed integer elements in Rs1 with the two 16-
7151 * bit signed integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the
7152 * result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are written to Rd.
7153 *
7154 * **Operations**:\n
7155 * ~~~
7156 * Rd.H[x] = (Rs1.H[x] < Rs2.H[x])? 0xffff : 0x0;
7157 * for RV32: x=1...0,
7158 * for RV64: x=3...0
7159 * ~~~
7160 *
7161 * \param [in] a unsigned long type of value stored in a
7162 * \param [in] b unsigned long type of value stored in b
7163 * \return value stored in unsigned long type
7164 */
__RV_SCMPLT16(unsigned long a,unsigned long b)7165 __STATIC_FORCEINLINE unsigned long __RV_SCMPLT16(unsigned long a, unsigned long b)
7166 {
7167 register unsigned long result;
7168 __ASM volatile("scmplt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
7169 return result;
7170 }
7171 /* ===== Inline Function End for 3.99. SCMPLT16 ===== */
7172
7173 /* ===== Inline Function Start for 3.100. SLL8 ===== */
7174 /**
7175 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
7176 * \brief SLL8 (SIMD 8-bit Shift Left Logical)
7177 * \details
7178 * **Type**: SIMD
7179 *
7180 * **Syntax**:\n
7181 * ~~~
7182 * SLL8 Rd, Rs1, Rs2
7183 * ~~~
7184 *
7185 * **Purpose**:\n
7186 * Do 8-bit elements logical left shift operations simultaneously. The shift amount is a
7187 * variable from a GPR.
7188 *
7189 * **Description**:\n
7190 * The 8-bit elements in Rs1 are left-shifted logically. And the results are written to Rd.
7191 * The shifted out bits are filled with zero and the shift amount is specified by the low-order 3-bits of
7192 * the value in the Rs2 register.
7193 *
7194 * **Operations**:\n
7195 * ~~~
7196 * sa = Rs2[2:0];
7197 * Rd.B[x] = Rs1.B[x] << sa;
7198 * for RV32: x=3...0,
7199 * for RV64: x=7...0
7200 * ~~~
7201 *
7202 * \param [in] a unsigned long type of value stored in a
7203 * \param [in] b unsigned int type of value stored in b
7204 * \return value stored in unsigned long type
7205 */
__RV_SLL8(unsigned long a,unsigned int b)7206 __STATIC_FORCEINLINE unsigned long __RV_SLL8(unsigned long a, unsigned int b)
7207 {
7208 register unsigned long result;
7209 __ASM volatile("sll8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
7210 return result;
7211 }
7212 /* ===== Inline Function End for 3.100. SLL8 ===== */
7213
7214 /* ===== Inline Function Start for 3.101. SLLI8 ===== */
7215 /**
7216 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
7217 * \brief SLLI8 (SIMD 8-bit Shift Left Logical Immediate)
7218 * \details
7219 * **Type**: SIMD
7220 *
7221 * **Syntax**:\n
7222 * ~~~
7223 * SLLI8 Rd, Rs1, imm3u
7224 * ~~~
7225 *
7226 * **Purpose**:\n
7227 * Do 8-bit elements logical left shift operations simultaneously. The shift amount is an
7228 * immediate value.
7229 *
7230 * **Description**:\n
7231 * The 8-bit elements in Rs1 are left-shifted logically. And the results are written to Rd.
7232 * The shifted out bits are filled with zero and the shift amount is specified by the imm3u constant.
7233 *
7234 * **Operations**:\n
7235 * ~~~
7236 * sa = imm3u[2:0];
7237 * Rd.B[x] = Rs1.B[x] << sa;
7238 * for RV32: x=3...0,
7239 * for RV64: x=7...0
7240 * ~~~
7241 *
7242 * \param [in] a unsigned long type of value stored in a
7243 * \param [in] b unsigned int type of value stored in b
7244 * \return value stored in unsigned long type
7245 */
7246 #define __RV_SLLI8(a, b) \
7247 ({ \
7248 register unsigned long result; \
7249 register unsigned long __a = (unsigned long)(a); \
7250 __ASM volatile("slli8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
7251 result; \
7252 })
7253 /* ===== Inline Function End for 3.101. SLLI8 ===== */
7254
7255 /* ===== Inline Function Start for 3.102. SLL16 ===== */
7256 /**
7257 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
7258 * \brief SLL16 (SIMD 16-bit Shift Left Logical)
7259 * \details
7260 * **Type**: SIMD
7261 *
7262 * **Syntax**:\n
7263 * ~~~
7264 * SLL16 Rd, Rs1, Rs2
7265 * ~~~
7266 *
7267 * **Purpose**:\n
7268 * Do 16-bit elements logical left shift operations simultaneously. The shift amount is a
7269 * variable from a GPR.
7270 *
7271 * **Description**:\n
7272 * The 16-bit elements in Rs1 are left-shifted logically. And the results are written to Rd.
7273 * The shifted out bits are filled with zero and the shift amount is specified by the low-order 4-bits of
7274 * the value in the Rs2 register.
7275 *
7276 * **Operations**:\n
7277 * ~~~
7278 * sa = Rs2[3:0];
7279 * Rd.H[x] = Rs1.H[x] << sa;
7280 * for RV32: x=1...0,
7281 * for RV64: x=3...0
7282 * ~~~
7283 *
7284 * \param [in] a unsigned long type of value stored in a
7285 * \param [in] b unsigned int type of value stored in b
7286 * \return value stored in unsigned long type
7287 */
__RV_SLL16(unsigned long a,unsigned int b)7288 __STATIC_FORCEINLINE unsigned long __RV_SLL16(unsigned long a, unsigned int b)
7289 {
7290 register unsigned long result;
7291 __ASM volatile("sll16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
7292 return result;
7293 }
7294 /* ===== Inline Function End for 3.102. SLL16 ===== */
7295
7296 /* ===== Inline Function Start for 3.103. SLLI16 ===== */
7297 /**
7298 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
7299 * \brief SLLI16 (SIMD 16-bit Shift Left Logical Immediate)
7300 * \details
7301 * **Type**: SIMD
7302 *
7303 * **Syntax**:\n
7304 * ~~~
7305 * SLLI16 Rd, Rs1, imm4[3:0]
7306 * ~~~
7307 *
7308 * **Purpose**:\n
7309 * Do 16-bit element logical left shift operations simultaneously. The shift amount is an
7310 * immediate value.
7311 *
7312 * **Description**:\n
7313 * The 16-bit elements in Rs1 are left-shifted logically. The shifted out bits are filled with
7314 * zero and the shift amount is specified by the imm4[3:0] constant. And the results are written to Rd.
7315 *
7316 * **Operations**:\n
7317 * ~~~
7318 * sa = imm4[3:0];
7319 * Rd.H[x] = Rs1.H[x] << sa;
7320 * for RV32: x=1...0,
7321 * for RV64: x=3...0
7322 * ~~~
7323 *
7324 * \param [in] a unsigned long type of value stored in a
7325 * \param [in] b unsigned int type of value stored in b
7326 * \return value stored in unsigned long type
7327 */
7328 #define __RV_SLLI16(a, b) \
7329 ({ \
7330 register unsigned long result; \
7331 register unsigned long __a = (unsigned long)(a); \
7332 __ASM volatile("slli16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
7333 result; \
7334 })
7335 /* ===== Inline Function End for 3.103. SLLI16 ===== */
7336
7337 /* ===== Inline Function Start for 3.104. SMAL ===== */
7338 /**
7339 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
7340 * \brief SMAL (Signed Multiply Halfs & Add 64-bit)
7341 * \details
7342 * **Type**: Partial-SIMD
7343 *
7344 * **Syntax**:\n
7345 * ~~~
7346 * SMAL Rd, Rs1, Rs2
7347 * ~~~
7348 *
7349 * **Purpose**:\n
7350 * Multiply the signed bottom 16-bit content of the 32-bit elements of a register with the top
7351 * 16-bit content of the same 32-bit elements of the same register, and add the results with a 64-bit
7352 * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back
7353 * to another even/odd pair of registers (RV32) or a register (RV64).
7354 *
7355 * **RV32 Description**:\n
7356 * This instruction multiplies the bottom 16-bit content of the lower 32-bit of Rs2 with the top 16-bit
7357 * content of the lower 32-bit of Rs2 and adds the result with the 64-bit value of an even/odd pair of
7358 * registers specified by Rs1(4,1). The 64-bit addition result is written back to an even/odd pair of
7359 * registers specified by Rd(4,1). The 16-bit values of Rs2, and the 64-bit value of the Rs1(4,1) register-
7360 * pair are treated as signed integers.
7361 * Rx(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
7362 * includes register 2d and 2d+1.
7363 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
7364 * register of the pair contains the low 32-bit of the operand.
7365 *
7366 * **RV64 Description**:\n
7367 * This instruction multiplies the bottom 16-bit content of the 32-bit elements of Rs2 with the top 16-bit
7368 * content of the same 32-bit elements of Rs2 and adds the results with the 64-bit value of Rs1. The 64-
7369 * bit addition result is written back to Rd. The 16-bit values of Rs2, and the 64-bit value of Rs1 are
7370 * treated as signed integers.
7371 *
7372 * **Operations**:\n
7373 * ~~~
7374 * RV32:
7375 * Mres[31:0] = Rs2.H[1] * Rs2.H[0];
7376 * Idx0 = CONCAT(Rs1(4,1),1'b0); Idx1 = CONCAT(Rs1(4,1),1'b1); +
7377 * Idx2 = CONCAT(Rd(4,1),1'b0); Idx3 = CONCAT(Rd(4,1),1'b1);
7378 * R[Idx3].R[Idx2] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
7379 * RV64:
7380 * Mres[0][31:0] = Rs2.W[0].H[1] * Rs2.W[0].H[0];
7381 * Mres[1][31:0] = Rs2.W[1].H[1] * Rs2.W[1].H[0];
7382 * Rd = Rs1 + SE64(Mres[1][31:0]) + SE64(Mres[0][31:0]);
7383 * ~~~
7384 *
7385 * \param [in] a long long type of value stored in a
7386 * \param [in] b unsigned long type of value stored in b
7387 * \return value stored in long long type
7388 */
__RV_SMAL(long long a,unsigned long b)7389 __STATIC_FORCEINLINE long long __RV_SMAL(long long a, unsigned long b)
7390 {
7391 register long long result;
7392 __ASM volatile("smal %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
7393 return result;
7394 }
7395 /* ===== Inline Function End for 3.104. SMAL ===== */
7396
7397 /* ===== Inline Function Start for 3.105.1. SMALBB ===== */
7398 /**
7399 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
7400 * \brief SMALBB (Signed Multiply Bottom Halfs & Add 64-bit)
7401 * \details
7402 * **Type**: DSP (64-bit Profile)
7403 *
7404 * **Syntax**:\n
7405 * ~~~
7406 * SMALBB Rd, Rs1, Rs2
7407 * SMALBT Rd, Rs1, Rs2
7408 * SMALTT Rd, Rs1, Rs2
7409 * ~~~
7410 *
7411 * **Purpose**:\n
7412 * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit
7413 * content of the corresponding 32-bit elements of another register and add the results with a 64-bit
7414 * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back
7415 * to the register-pair (RV32) or the register (RV64).
7416 * * SMALBB: rt pair + bottom*bottom (all 32-bit elements)
7417 * * SMALBT rt pair + bottom*top (all 32-bit elements)
7418 * * SMALTT rt pair + top*top (all 32-bit elements)
7419 *
7420 * **RV32 Description**:\n
7421 * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
7422 * content of Rs2.
7423 * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
7424 * content of Rs2.
7425 * For the `SMALTT` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
7426 * of Rs2.
7427 * The multiplication result is added with the 64-bit value of an even/odd pair of registers specified by
7428 * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
7429 * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
7430 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
7431 * includes register 2d and 2d+1.
7432 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
7433 * register of the pair contains the low 32-bit of the operand.
7434 *
7435 * **RV64 Description**:\n
7436 * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
7437 * with the bottom 16-bit content of the 32-bit elements of Rs2.
7438 * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
7439 * with the top 16-bit content of the 32-bit elements of Rs2.
7440 * For the `SMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
7441 * the top 16-bit content of the 32-bit elements of Rs2.
7442 * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written
7443 * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
7444 * integers.
7445 *
7446 * **Operations**:\n
7447 * ~~~
7448 * RV32:
7449 * Mres[31:0] = Rs1.H[0] * Rs2.H[0]; // SMALBB
7450 * Mres[31:0] = Rs1.H[0] * Rs2.H[1]; // SMALBT
7451 * Mres[31:0] = Rs1.H[1] * Rs2.H[1]; // SMALTT
7452 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
7453 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
7454 * RV64:
7455 * // SMALBB
7456 * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0];
7457 * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0];
7458 * // SMALBT
7459 * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1];
7460 * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1];
7461 * // SMALTT
7462 * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1];
7463 * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1];
7464 * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
7465 * ~~~
7466 *
7467 * \param [in] t long long type of value stored in t
7468 * \param [in] a unsigned long type of value stored in a
7469 * \param [in] b unsigned long type of value stored in b
7470 * \return value stored in long long type
7471 */
__RV_SMALBB(long long t,unsigned long a,unsigned long b)7472 __STATIC_FORCEINLINE long long __RV_SMALBB(long long t, unsigned long a, unsigned long b)
7473 {
7474 __ASM volatile("smalbb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
7475 return t;
7476 }
7477 /* ===== Inline Function End for 3.105.1. SMALBB ===== */
7478
7479 /* ===== Inline Function Start for 3.105.2. SMALBT ===== */
7480 /**
7481 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
7482 * \brief SMALBT (Signed Multiply Bottom Half & Top Half & Add 64-bit)
7483 * \details
7484 * **Type**: DSP (64-bit Profile)
7485 *
7486 * **Syntax**:\n
7487 * ~~~
7488 * SMALBB Rd, Rs1, Rs2
7489 * SMALBT Rd, Rs1, Rs2
7490 * SMALTT Rd, Rs1, Rs2
7491 * ~~~
7492 *
7493 * **Purpose**:\n
7494 * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit
7495 * content of the corresponding 32-bit elements of another register and add the results with a 64-bit
7496 * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back
7497 * to the register-pair (RV32) or the register (RV64).
7498 * * SMALBB: rt pair + bottom*bottom (all 32-bit elements)
7499 * * SMALBT rt pair + bottom*top (all 32-bit elements)
7500 * * SMALTT rt pair + top*top (all 32-bit elements)
7501 *
7502 * **RV32 Description**:\n
7503 * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
7504 * content of Rs2.
7505 * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
7506 * content of Rs2.
7507 * For the `SMALTT` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
7508 * of Rs2.
7509 * The multiplication result is added with the 64-bit value of an even/odd pair of registers specified by
7510 * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
7511 * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
7512 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
7513 * includes register 2d and 2d+1.
7514 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
7515 * register of the pair contains the low 32-bit of the operand.
7516 *
7517 * **RV64 Description**:\n
7518 * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
7519 * with the bottom 16-bit content of the 32-bit elements of Rs2.
7520 * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
7521 * with the top 16-bit content of the 32-bit elements of Rs2.
7522 * For the `SMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
7523 * the top 16-bit content of the 32-bit elements of Rs2.
7524 * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written
7525 * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
7526 * integers.
7527 *
7528 * **Operations**:\n
7529 * ~~~
7530 * RV32:
7531 * Mres[31:0] = Rs1.H[0] * Rs2.H[0]; // SMALBB
7532 * Mres[31:0] = Rs1.H[0] * Rs2.H[1]; // SMALBT
7533 * Mres[31:0] = Rs1.H[1] * Rs2.H[1]; // SMALTT
7534 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
7535 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
7536 * RV64:
7537 * // SMALBB
7538 * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0];
7539 * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0];
7540 * // SMALBT
7541 * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1];
7542 * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1];
7543 * // SMALTT
7544 * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1];
7545 * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1];
7546 * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
7547 * ~~~
7548 *
7549 * \param [in] t long long type of value stored in t
7550 * \param [in] a unsigned long type of value stored in a
7551 * \param [in] b unsigned long type of value stored in b
7552 * \return value stored in long long type
7553 */
__RV_SMALBT(long long t,unsigned long a,unsigned long b)7554 __STATIC_FORCEINLINE long long __RV_SMALBT(long long t, unsigned long a, unsigned long b)
7555 {
7556 __ASM volatile("smalbt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
7557 return t;
7558 }
7559 /* ===== Inline Function End for 3.105.2. SMALBT ===== */
7560
7561 /* ===== Inline Function Start for 3.105.3. SMALTT ===== */
7562 /**
7563 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
7564 * \brief SMALTT (Signed Multiply Top Halfs & Add 64-bit)
7565 * \details
7566 * **Type**: DSP (64-bit Profile)
7567 *
7568 * **Syntax**:\n
7569 * ~~~
7570 * SMALBB Rd, Rs1, Rs2
7571 * SMALBT Rd, Rs1, Rs2
7572 * SMALTT Rd, Rs1, Rs2
7573 * ~~~
7574 *
7575 * **Purpose**:\n
7576 * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit
7577 * content of the corresponding 32-bit elements of another register and add the results with a 64-bit
7578 * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back
7579 * to the register-pair (RV32) or the register (RV64).
7580 * * SMALBB: rt pair + bottom*bottom (all 32-bit elements)
7581 * * SMALBT rt pair + bottom*top (all 32-bit elements)
7582 * * SMALTT rt pair + top*top (all 32-bit elements)
7583 *
7584 * **RV32 Description**:\n
7585 * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
7586 * content of Rs2.
7587 * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
7588 * content of Rs2.
7589 * For the `SMALTT` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
7590 * of Rs2.
7591 * The multiplication result is added with the 64-bit value of an even/odd pair of registers specified by
7592 * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
7593 * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
7594 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
7595 * includes register 2d and 2d+1.
7596 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
7597 * register of the pair contains the low 32-bit of the operand.
7598 *
7599 * **RV64 Description**:\n
7600 * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
7601 * with the bottom 16-bit content of the 32-bit elements of Rs2.
7602 * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
7603 * with the top 16-bit content of the 32-bit elements of Rs2.
7604 * For the `SMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
7605 * the top 16-bit content of the 32-bit elements of Rs2.
7606 * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written
7607 * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
7608 * integers.
7609 *
7610 * **Operations**:\n
7611 * ~~~
7612 * RV32:
7613 * Mres[31:0] = Rs1.H[0] * Rs2.H[0]; // SMALBB
7614 * Mres[31:0] = Rs1.H[0] * Rs2.H[1]; // SMALBT
7615 * Mres[31:0] = Rs1.H[1] * Rs2.H[1]; // SMALTT
7616 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
7617 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
7618 * RV64:
7619 * // SMALBB
7620 * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0];
7621 * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0];
7622 * // SMALBT
7623 * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1];
7624 * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1];
7625 * // SMALTT
7626 * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1];
7627 * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1];
7628 * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
7629 * ~~~
7630 *
7631 * \param [in] t long long type of value stored in t
7632 * \param [in] a unsigned long type of value stored in a
7633 * \param [in] b unsigned long type of value stored in b
7634 * \return value stored in long long type
7635 */
__RV_SMALTT(long long t,unsigned long a,unsigned long b)7636 __STATIC_FORCEINLINE long long __RV_SMALTT(long long t, unsigned long a, unsigned long b)
7637 {
7638 __ASM volatile("smaltt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
7639 return t;
7640 }
7641 /* ===== Inline Function End for 3.105.3. SMALTT ===== */
7642
7643 /* ===== Inline Function Start for 3.106.1. SMALDA ===== */
7644 /**
7645 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
7646 * \brief SMALDA (Signed Multiply Two Halfs and Two Adds 64-bit)
7647 * \details
7648 * **Type**: DSP (64-bit Profile)
7649 *
7650 * **Syntax**:\n
7651 * ~~~
7652 * SMALDA Rd, Rs1, Rs2
7653 * SMALXDA Rd, Rs1, Rs2
7654 * ~~~
7655 *
7656 * **Purpose**:\n
7657 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
7658 * adds the two 32-bit results and the 64-bit value of an even/odd pair of registers together.
7659 * * SMALDA: rt pair+ top*top + bottom*bottom (all 32-bit elements)
7660 * * SMALXDA: rt pair+ top*bottom + bottom*top (all 32-bit elements)
7661 *
7662 * **RV32 Description**:\n
7663 * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
7664 * content of Rs2 and then adds the result to the result of multiplying the top 16-bit content of Rs1 with
7665 * the top 16-bit content of Rs2 with unlimited precision.
7666 * For the `SMALXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit
7667 * content of Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of Rs1
7668 * with the top 16-bit content of Rs2 with unlimited precision.
7669 * The result is added to the 64-bit value of an even/odd pair of registers specified by Rd(4,1). The 64-
7670 * bit addition result is written back to the register-pair. The 16-bit values of Rs1 and Rs2, and the 64-
7671 * bit value of the register-pair are treated as signed integers.
7672 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
7673 * includes register 2d and 2d+1.
7674 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
7675 * register of the pair contains the low 32-bit of the operand.
7676 *
7677 * **RV64 Description**:\n
7678 * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
7679 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
7680 * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-
7681 * bit elements of Rs2 with unlimited precision.
7682 * For the `SMALXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1
7683 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
7684 * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
7685 * 32-bit elements of Rs2 with unlimited precision.
7686 * The results are added to the 64-bit value of Rd. The 64-bit addition result is written back to Rd. The
7687 * 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed integers.
7688 *
7689 * **Operations**:\n
7690 * ~~~
7691 * RV32:
7692 * // SMALDA
7693 * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]);
7694 * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]);
7695 * // SMALXDA
7696 * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]);
7697 * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]);
7698 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
7699 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres0[31:0]) + SE64(Mres1[31:0]);
7700 * RV64:
7701 * // SMALDA
7702 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
7703 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
7704 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
7705 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
7706 * // SMALXDA
7707 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
7708 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
7709 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
7710 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
7711 * Rd = Rd + SE64(Mres0[0][31:0]) + SE64(Mres1[0][31:0]) + SE64(Mres0[1][31:0]) +
7712 * SE64(Mres1[1][31:0]);
7713 * ~~~
7714 *
7715 * \param [in] t long long type of value stored in t
7716 * \param [in] a unsigned long type of value stored in a
7717 * \param [in] b unsigned long type of value stored in b
7718 * \return value stored in long long type
7719 */
__RV_SMALDA(long long t,unsigned long a,unsigned long b)7720 __STATIC_FORCEINLINE long long __RV_SMALDA(long long t, unsigned long a, unsigned long b)
7721 {
7722 __ASM volatile("smalda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
7723 return t;
7724 }
7725 /* ===== Inline Function End for 3.106.1. SMALDA ===== */
7726
7727 /* ===== Inline Function Start for 3.106.2. SMALXDA ===== */
7728 /**
7729 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
7730 * \brief SMALXDA (Signed Crossed Multiply Two Halfs and Two Adds 64-bit)
7731 * \details
7732 * **Type**: DSP (64-bit Profile)
7733 *
7734 * **Syntax**:\n
7735 * ~~~
7736 * SMALDA Rd, Rs1, Rs2
7737 * SMALXDA Rd, Rs1, Rs2
7738 * ~~~
7739 *
7740 * **Purpose**:\n
7741 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
7742 * adds the two 32-bit results and the 64-bit value of an even/odd pair of registers together.
7743 * * SMALDA: rt pair+ top*top + bottom*bottom (all 32-bit elements)
7744 * * SMALXDA: rt pair+ top*bottom + bottom*top (all 32-bit elements)
7745 *
7746 * **RV32 Description**:\n
7747 * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
7748 * content of Rs2 and then adds the result to the result of multiplying the top 16-bit content of Rs1 with
7749 * the top 16-bit content of Rs2 with unlimited precision.
7750 * For the `SMALXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit
7751 * content of Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of Rs1
7752 * with the top 16-bit content of Rs2 with unlimited precision.
7753 * The result is added to the 64-bit value of an even/odd pair of registers specified by Rd(4,1). The 64-
7754 * bit addition result is written back to the register-pair. The 16-bit values of Rs1 and Rs2, and the 64-
7755 * bit value of the register-pair are treated as signed integers.
7756 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
7757 * includes register 2d and 2d+1.
7758 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
7759 * register of the pair contains the low 32-bit of the operand.
7760 *
7761 * **RV64 Description**:\n
7762 * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
7763 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
7764 * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-
7765 * bit elements of Rs2 with unlimited precision.
7766 * For the `SMALXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1
7767 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
7768 * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
7769 * 32-bit elements of Rs2 with unlimited precision.
7770 * The results are added to the 64-bit value of Rd. The 64-bit addition result is written back to Rd. The
7771 * 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed integers.
7772 *
7773 * **Operations**:\n
7774 * ~~~
7775 * RV32:
7776 * // SMALDA
7777 * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]);
7778 * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]);
7779 * // SMALXDA
7780 * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]);
7781 * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]);
7782 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
7783 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres0[31:0]) + SE64(Mres1[31:0]);
7784 * RV64:
7785 * // SMALDA
7786 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
7787 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
7788 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
7789 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
7790 * // SMALXDA
7791 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
7792 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
7793 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
7794 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
7795 * Rd = Rd + SE64(Mres0[0][31:0]) + SE64(Mres1[0][31:0]) + SE64(Mres0[1][31:0]) +
7796 * SE64(Mres1[1][31:0]);
7797 * ~~~
7798 *
7799 * \param [in] t long long type of value stored in t
7800 * \param [in] a unsigned long type of value stored in a
7801 * \param [in] b unsigned long type of value stored in b
7802 * \return value stored in long long type
7803 */
__RV_SMALXDA(long long t,unsigned long a,unsigned long b)7804 __STATIC_FORCEINLINE long long __RV_SMALXDA(long long t, unsigned long a, unsigned long b)
7805 {
7806 __ASM volatile("smalxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
7807 return t;
7808 }
7809 /* ===== Inline Function End for 3.106.2. SMALXDA ===== */
7810
7811 /* ===== Inline Function Start for 3.107.1. SMALDS ===== */
7812 /**
7813 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
7814 * \brief SMALDS (Signed Multiply Two Halfs & Subtract & Add 64-bit)
7815 * \details
7816 * **Type**: DSP (64-bit Profile)
7817 *
7818 * **Syntax**:\n
7819 * ~~~
7820 * SMALDS Rd, Rs1, Rs2
7821 * SMALDRS Rd, Rs1, Rs2
7822 * SMALXDS Rd, Rs1, Rs2
7823 * ~~~
7824 *
7825 * **Purpose**:\n
7826 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
7827 * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
7828 * the 64-bit value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is
7829 * written back to the register-pair.
7830 * * SMALDS: rt pair + (top*top - bottom*bottom) (all 32-bit elements)
7831 * * SMALDRS: rt pair + (bottom*bottom - top*top) (all 32-bit elements)
7832 * * SMALXDS: rt pair + (top*bottom - bottom*top) (all 32-bit elements)
7833 *
7834 * **RV32 Description**:\n
7835 * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
7836 * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
7837 * Rs1 with the top 16-bit content of Rs2.
7838 * For the `SMALDRS` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
7839 * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1
7840 * with the bottom 16-bit content of Rs2.
7841 * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
7842 * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
7843 * Rs1 with the bottom 16-bit content of Rs2.
7844 * The subtraction result is then added to the 64-bit value of an even/odd pair of registers specified by
7845 * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
7846 * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
7847 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
7848 * includes register 2d and 2d+1.
7849 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
7850 * register of the pair contains the low 32-bit of the operand.
7851 *
7852 * **RV64 Description**:\n
7853 * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
7854 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
7855 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content
7856 * of the 32-bit elements of Rs2.
7857 * For the `SMALDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
7858 * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
7859 * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
7860 * the 32-bit elements of Rs2.
7861 * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
7862 * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
7863 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
7864 * content of the 32-bit elements of Rs2.
7865 * The subtraction results are then added to the 64-bit value of Rd. The 64-bit addition result is written
7866 * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
7867 * integers.
7868 *
7869 * **Operations**:\n
7870 * ~~~
7871 * * RV32:
7872 * Mres[31:0] = (Rs1.H[1] * Rs2.H[1]) - (Rs1.H[0] * Rs2.H[0]); // SMALDS
7873 * Mres[31:0] = (Rs1.H[0] * Rs2.H[0]) - (Rs1.H[1] * Rs2.H[1]); // SMALDRS
7874 * Mres[31:0] = (Rs1.H[1] * Rs2.H[0]) - (Rs1.H[0] * Rs2.H[1]); // SMALXDS
7875 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
7876 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
7877 * * RV64:
7878 * // SMALDS
7879 * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]) - (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
7880 * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[1]) - (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
7881 * // SMALDRS
7882 * Mres[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]) - (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
7883 * Mres[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[0].H[0]) - (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
7884 * // SMALXDS
7885 * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]) - (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
7886 * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[0]) - (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
7887 * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
7888 * ~~~
7889 *
7890 * \param [in] t long long type of value stored in t
7891 * \param [in] a unsigned long type of value stored in a
7892 * \param [in] b unsigned long type of value stored in b
7893 * \return value stored in long long type
7894 */
__RV_SMALDS(long long t,unsigned long a,unsigned long b)7895 __STATIC_FORCEINLINE long long __RV_SMALDS(long long t, unsigned long a, unsigned long b)
7896 {
7897 __ASM volatile("smalds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
7898 return t;
7899 }
7900 /* ===== Inline Function End for 3.107.1. SMALDS ===== */
7901
7902 /* ===== Inline Function Start for 3.107.2. SMALDRS ===== */
7903 /**
7904 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
7905 * \brief SMALDRS (Signed Multiply Two Halfs & Reverse Subtract & Add 64- bit)
7906 * \details
7907 * **Type**: DSP (64-bit Profile)
7908 *
7909 * **Syntax**:\n
7910 * ~~~
7911 * SMALDS Rd, Rs1, Rs2
7912 * SMALDRS Rd, Rs1, Rs2
7913 * SMALXDS Rd, Rs1, Rs2
7914 * ~~~
7915 *
7916 * **Purpose**:\n
7917 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
7918 * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
7919 * the 64-bit value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is
7920 * written back to the register-pair.
7921 * * SMALDS: rt pair + (top*top - bottom*bottom) (all 32-bit elements)
7922 * * SMALDRS: rt pair + (bottom*bottom - top*top) (all 32-bit elements)
7923 * * SMALXDS: rt pair + (top*bottom - bottom*top) (all 32-bit elements)
7924 *
7925 * **RV32 Description**:\n
7926 * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
7927 * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
7928 * Rs1 with the top 16-bit content of Rs2.
7929 * For the `SMALDRS` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
7930 * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1
7931 * with the bottom 16-bit content of Rs2.
7932 * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
7933 * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
7934 * Rs1 with the bottom 16-bit content of Rs2.
7935 * The subtraction result is then added to the 64-bit value of an even/odd pair of registers specified by
7936 * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
7937 * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
7938 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
7939 * includes register 2d and 2d+1.
7940 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
7941 * register of the pair contains the low 32-bit of the operand.
7942 *
7943 * **RV64 Description**:\n
7944 * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
7945 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
7946 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content
7947 * of the 32-bit elements of Rs2.
7948 * For the `SMALDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
7949 * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
7950 * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
7951 * the 32-bit elements of Rs2.
7952 * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
7953 * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
7954 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
7955 * content of the 32-bit elements of Rs2.
7956 * The subtraction results are then added to the 64-bit value of Rd. The 64-bit addition result is written
7957 * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
7958 * integers.
7959 *
7960 * **Operations**:\n
7961 * ~~~
7962 * * RV32:
7963 * Mres[31:0] = (Rs1.H[1] * Rs2.H[1]) - (Rs1.H[0] * Rs2.H[0]); // SMALDS
7964 * Mres[31:0] = (Rs1.H[0] * Rs2.H[0]) - (Rs1.H[1] * Rs2.H[1]); // SMALDRS
7965 * Mres[31:0] = (Rs1.H[1] * Rs2.H[0]) - (Rs1.H[0] * Rs2.H[1]); // SMALXDS
7966 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
7967 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
7968 * * RV64:
7969 * // SMALDS
7970 * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]) - (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
7971 * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[1]) - (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
7972 * // SMALDRS
7973 * Mres[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]) - (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
7974 * Mres[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[0].H[0]) - (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
7975 * // SMALXDS
7976 * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]) - (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
7977 * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[0]) - (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
7978 * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
7979 * ~~~
7980 *
7981 * \param [in] t long long type of value stored in t
7982 * \param [in] a unsigned long type of value stored in a
7983 * \param [in] b unsigned long type of value stored in b
7984 * \return value stored in long long type
7985 */
__RV_SMALDRS(long long t,unsigned long a,unsigned long b)7986 __STATIC_FORCEINLINE long long __RV_SMALDRS(long long t, unsigned long a, unsigned long b)
7987 {
7988 __ASM volatile("smaldrs %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
7989 return t;
7990 }
7991 /* ===== Inline Function End for 3.107.2. SMALDRS ===== */
7992
7993 /* ===== Inline Function Start for 3.107.3. SMALXDS ===== */
7994 /**
7995 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
7996 * \brief SMALXDS (Signed Crossed Multiply Two Halfs & Subtract & Add 64- bit)
7997 * \details
7998 * **Type**: DSP (64-bit Profile)
7999 *
8000 * **Syntax**:\n
8001 * ~~~
8002 * SMALDS Rd, Rs1, Rs2
8003 * SMALDRS Rd, Rs1, Rs2
8004 * SMALXDS Rd, Rs1, Rs2
8005 * ~~~
8006 *
8007 * **Purpose**:\n
8008 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
8009 * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
8010 * the 64-bit value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is
8011 * written back to the register-pair.
8012 * * SMALDS: rt pair + (top*top - bottom*bottom) (all 32-bit elements)
8013 * * SMALDRS: rt pair + (bottom*bottom - top*top) (all 32-bit elements)
8014 * * SMALXDS: rt pair + (top*bottom - bottom*top) (all 32-bit elements)
8015 *
8016 * **RV32 Description**:\n
8017 * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
8018 * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
8019 * Rs1 with the top 16-bit content of Rs2.
8020 * For the `SMALDRS` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
8021 * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1
8022 * with the bottom 16-bit content of Rs2.
8023 * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
8024 * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
8025 * Rs1 with the bottom 16-bit content of Rs2.
8026 * The subtraction result is then added to the 64-bit value of an even/odd pair of registers specified by
8027 * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
8028 * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
8029 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
8030 * includes register 2d and 2d+1.
8031 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
8032 * register of the pair contains the low 32-bit of the operand.
8033 *
8034 * **RV64 Description**:\n
8035 * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
8036 * with the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
8037 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content
8038 * of the 32-bit elements of Rs2.
8039 * For the `SMALDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
8040 * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
8041 * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
8042 * the 32-bit elements of Rs2.
8043 * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
8044 * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
8045 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
8046 * content of the 32-bit elements of Rs2.
8047 * The subtraction results are then added to the 64-bit value of Rd. The 64-bit addition result is written
8048 * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
8049 * integers.
8050 *
8051 * **Operations**:\n
8052 * ~~~
8053 * * RV32:
8054 * Mres[31:0] = (Rs1.H[1] * Rs2.H[1]) - (Rs1.H[0] * Rs2.H[0]); // SMALDS
8055 * Mres[31:0] = (Rs1.H[0] * Rs2.H[0]) - (Rs1.H[1] * Rs2.H[1]); // SMALDRS
8056 * Mres[31:0] = (Rs1.H[1] * Rs2.H[0]) - (Rs1.H[0] * Rs2.H[1]); // SMALXDS
8057 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
8058 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
8059 * * RV64:
8060 * // SMALDS
8061 * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]) - (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
8062 * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[1]) - (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
8063 * // SMALDRS
8064 * Mres[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]) - (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
8065 * Mres[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[0].H[0]) - (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
8066 * // SMALXDS
8067 * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]) - (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
8068 * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[0]) - (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
8069 * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
8070 * ~~~
8071 *
8072 * \param [in] t long long type of value stored in t
8073 * \param [in] a unsigned long type of value stored in a
8074 * \param [in] b unsigned long type of value stored in b
8075 * \return value stored in long long type
8076 */
__RV_SMALXDS(long long t,unsigned long a,unsigned long b)8077 __STATIC_FORCEINLINE long long __RV_SMALXDS(long long t, unsigned long a, unsigned long b)
8078 {
8079 __ASM volatile("smalxds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
8080 return t;
8081 }
8082 /* ===== Inline Function End for 3.107.3. SMALXDS ===== */
8083
8084 /* ===== Inline Function Start for 3.108. SMAR64 ===== */
8085 /**
8086 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
8087 * \brief SMAR64 (Signed Multiply and Add to 64-Bit Data)
8088 * \details
8089 * **Type**: DSP (64-bit Profile)
8090 *
8091 * **Syntax**:\n
8092 * ~~~
8093 * SMAR64 Rd, Rs1, Rs2
8094 * ~~~
8095 *
8096 * **Purpose**:\n
8097 * Multiply the 32-bit signed elements in two registers and add the 64-bit multiplication
8098 * result to the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is written
8099 * back to the pair of registers (RV32) or a register (RV64).
8100 *
8101 * **RV32 Description**:\n
8102 * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It adds
8103 * the 64-bit multiplication result to the 64-bit signed data of an even/odd pair of registers specified by
8104 * Rd(4,1). The addition result is written back to the even/odd pair of registers specified by Rd(4,1).
8105 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
8106 * includes register 2d and 2d+1.
8107 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
8108 * of the pair contains the low 32-bit of the result.
8109 *
8110 * **RV64 Description**:\n
8111 * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It
8112 * adds the 64-bit multiplication results to the 64-bit signed data of Rd. The addition result is written
8113 * back to Rd.
8114 *
8115 * **Operations**:\n
8116 * ~~~
8117 * * RV32:
8118 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
8119 * R[t_H].R[t_L] = R[t_H].R[t_L] + (Rs1 * Rs2);
8120 * * RV64:
8121 * Rd = Rd + (Rs1.W[0] * Rs2.W[0]) + (Rs1.W[1] * Rs2.W[1]);
8122 * ~~~
8123 *
8124 * \param [in] t long long type of value stored in t
8125 * \param [in] a long type of value stored in a
8126 * \param [in] b long type of value stored in b
8127 * \return value stored in long long type
8128 */
__RV_SMAR64(long long t,long a,long b)8129 __STATIC_FORCEINLINE long long __RV_SMAR64(long long t, long a, long b)
8130 {
8131 __ASM volatile("smar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
8132 return t;
8133 }
8134 /* ===== Inline Function End for 3.108. SMAR64 ===== */
8135
8136 /* ===== Inline Function Start for 3.109. SMAQA ===== */
8137 /**
8138 * \ingroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD
8139 * \brief SMAQA (Signed Multiply Four Bytes with 32-bit Adds)
8140 * \details
8141 * **Type**: Partial-SIMD (Reduction)
8142 *
8143 * **Syntax**:\n
8144 * ~~~
8145 * SMAQA Rd, Rs1, Rs2
8146 * ~~~
8147 *
8148 * **Purpose**:\n
8149 * Do four signed 8-bit multiplications from 32-bit chunks of two registers; and then adds
8150 * the four 16-bit results and the content of corresponding 32-bit chunks of a third register together.
8151 *
8152 * **Description**:\n
8153 * This instruction multiplies the four signed 8-bit elements of 32-bit chunks of Rs1 with the four
8154 * signed 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the signed
8155 * content of the corresponding 32-bit chunks of Rd. The final results are written back to the
8156 * corresponding 32-bit chunks in Rd.
8157 *
8158 * **Operations**:\n
8159 * ~~~
8160 * res[x] = Rd.W[x] +
8161 * (Rs1.W[x].B[3] s* Rs2.W[x].B[3]) + (Rs1.W[x].B[2] s* Rs2.W[x].B[2]) +
8162 * (Rs1.W[x].B[1] s* Rs2.W[x].B[1]) + (Rs1.W[x].B[0] s* Rs2.W[x].B[0]);
8163 * Rd.W[x] = res[x];
8164 * for RV32: x=0,
8165 * for RV64: x=1,0
8166 * ~~~
8167 *
8168 * \param [in] t long type of value stored in t
8169 * \param [in] a unsigned long type of value stored in a
8170 * \param [in] b unsigned long type of value stored in b
8171 * \return value stored in long type
8172 */
__RV_SMAQA(long t,unsigned long a,unsigned long b)8173 __STATIC_FORCEINLINE long __RV_SMAQA(long t, unsigned long a, unsigned long b)
8174 {
8175 __ASM volatile("smaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
8176 return t;
8177 }
8178 /* ===== Inline Function End for 3.109. SMAQA ===== */
8179
8180 /* ===== Inline Function Start for 3.110. SMAQA.SU ===== */
8181 /**
8182 * \ingroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD
8183 * \brief SMAQA.SU (Signed and Unsigned Multiply Four Bytes with 32-bit Adds)
8184 * \details
8185 * **Type**: Partial-SIMD (Reduction)
8186 *
8187 * **Syntax**:\n
8188 * ~~~
8189 * SMAQA.SU Rd, Rs1, Rs2
8190 * ~~~
8191 *
8192 * **Purpose**:\n
8193 * Do four `signed x unsigned` 8-bit multiplications from 32-bit chunks of two registers; and
8194 * then adds the four 16-bit results and the content of corresponding 32-bit chunks of a third register
8195 * together.
8196 *
8197 * **Description**:\n
8198 * This instruction multiplies the four signed 8-bit elements of 32-bit chunks of Rs1 with the four
8199 * unsigned 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the
8200 * signed content of the corresponding 32-bit chunks of Rd. The final results are written back to the
8201 * corresponding 32-bit chunks in Rd.
8202 *
8203 * **Operations**:\n
8204 * ~~~
8205 * res[x] = Rd.W[x] +
8206 * (Rs1.W[x].B[3] su* Rs2.W[x].B[3]) + (Rs1.W[x].B[2] su* Rs2.W[x].B[2]) +
8207 * (Rs1.W[x].B[1] su* Rs2.W[x].B[1]) + (Rs1.W[x].B[0] su* Rs2.W[x].B[0]);
8208 * Rd.W[x] = res[x];
8209 * for RV32: x=0,
8210 * for RV64: x=1...0
8211 * ~~~
8212 *
8213 * \param [in] t long type of value stored in t
8214 * \param [in] a unsigned long type of value stored in a
8215 * \param [in] b unsigned long type of value stored in b
8216 * \return value stored in long type
8217 */
__RV_SMAQA_SU(long t,unsigned long a,unsigned long b)8218 __STATIC_FORCEINLINE long __RV_SMAQA_SU(long t, unsigned long a, unsigned long b)
8219 {
8220 __ASM volatile("smaqa.su %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
8221 return t;
8222 }
8223 /* ===== Inline Function End for 3.110. SMAQA.SU ===== */
8224
8225 /* ===== Inline Function Start for 3.111. SMAX8 ===== */
8226 /**
8227 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
8228 * \brief SMAX8 (SIMD 8-bit Signed Maximum)
8229 * \details
8230 * **Type**: SIMD
8231 *
8232 * **Syntax**:\n
8233 * ~~~
8234 * SMAX8 Rd, Rs1, Rs2
8235 * ~~~
8236 *
8237 * **Purpose**:\n
8238 * Do 8-bit signed integer elements finding maximum operations simultaneously.
8239 *
8240 * **Description**:\n
8241 * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit
8242 * signed integer elements in Rs2 and selects the numbers that is greater than the other one. The
8243 * selected results are written to Rd.
8244 *
8245 * **Operations**:\n
8246 * ~~~
8247 * Rd.B[x] = (Rs1.B[x] > Rs2.B[x])? Rs1.B[x] : Rs2.B[x];
8248 * for RV32: x=3...0,
8249 * for RV64: x=7...0
8250 * ~~~
8251 *
8252 * \param [in] a unsigned long type of value stored in a
8253 * \param [in] b unsigned long type of value stored in b
8254 * \return value stored in unsigned long type
8255 */
__RV_SMAX8(unsigned long a,unsigned long b)8256 __STATIC_FORCEINLINE unsigned long __RV_SMAX8(unsigned long a, unsigned long b)
8257 {
8258 register unsigned long result;
8259 __ASM volatile("smax8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8260 return result;
8261 }
8262 /* ===== Inline Function End for 3.111. SMAX8 ===== */
8263
8264 /* ===== Inline Function Start for 3.112. SMAX16 ===== */
8265 /**
8266 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
8267 * \brief SMAX16 (SIMD 16-bit Signed Maximum)
8268 * \details
8269 * **Type**: SIMD
8270 *
8271 * **Syntax**:\n
8272 * ~~~
8273 * SMAX16 Rd, Rs1, Rs2
8274 * ~~~
8275 *
8276 * **Purpose**:\n
8277 * Do 16-bit signed integer elements finding maximum operations simultaneously.
8278 *
8279 * **Description**:\n
8280 * This instruction compares the 16-bit signed integer elements in Rs1 with the 16-bit
8281 * signed integer elements in Rs2 and selects the numbers that is greater than the other one. The
8282 * selected results are written to Rd.
8283 *
8284 * **Operations**:\n
8285 * ~~~
8286 * Rd.H[x] = (Rs1.H[x] > Rs2.H[x])? Rs1.H[x] : Rs2.H[x];
8287 * for RV32: x=1...0,
8288 * for RV64: x=3...0
8289 * ~~~
8290 *
8291 * \param [in] a unsigned long type of value stored in a
8292 * \param [in] b unsigned long type of value stored in b
8293 * \return value stored in unsigned long type
8294 */
__RV_SMAX16(unsigned long a,unsigned long b)8295 __STATIC_FORCEINLINE unsigned long __RV_SMAX16(unsigned long a, unsigned long b)
8296 {
8297 register unsigned long result;
8298 __ASM volatile("smax16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8299 return result;
8300 }
8301 /* ===== Inline Function End for 3.112. SMAX16 ===== */
8302
8303 /* ===== Inline Function Start for 3.113.1. SMBB16 ===== */
8304 /**
8305 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
8306 * \brief SMBB16 (SIMD Signed Multiply Bottom Half & Bottom Half)
8307 * \details
8308 * **Type**: SIMD
8309 *
8310 * **Syntax**:\n
8311 * ~~~
8312 * SMBB16 Rd, Rs1, Rs2
8313 * SMBT16 Rd, Rs1, Rs2
8314 * SMTT16 Rd, Rs1, Rs2
8315 * ~~~
8316 *
8317 * **Purpose**:\n
8318 * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-
8319 * bit content of the 32-bit elements of another register and write the result to a third register.
8320 * * SMBB16: W[x].bottom*W[x].bottom
8321 * * SMBT16: W[x].bottom *W[x].top
8322 * * SMTT16: W[x].top * W[x].top
8323 *
8324 * **Description**:\n
8325 * For the `SMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
8326 * with the bottom 16-bit content of the 32-bit elements of Rs2.
8327 * For the `SMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
8328 * with the top 16-bit content of the 32-bit elements of Rs2.
8329 * For the `SMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
8330 * the top 16-bit content of the 32-bit elements of Rs2.
8331 * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
8332 * integers.
8333 *
8334 * **Operations**:\n
8335 * ~~~
8336 * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0]; // SMBB16
8337 * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1]; // SMBT16
8338 * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1]; // SMTT16
8339 * for RV32: x=0,
8340 * for RV64: x=1...0
8341 * ~~~
8342 *
8343 * \param [in] a unsigned long type of value stored in a
8344 * \param [in] b unsigned long type of value stored in b
8345 * \return value stored in long type
8346 */
__RV_SMBB16(unsigned long a,unsigned long b)8347 __STATIC_FORCEINLINE long __RV_SMBB16(unsigned long a, unsigned long b)
8348 {
8349 register long result;
8350 __ASM volatile("smbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8351 return result;
8352 }
8353 /* ===== Inline Function End for 3.113.1. SMBB16 ===== */
8354
8355 /* ===== Inline Function Start for 3.113.2. SMBT16 ===== */
8356 /**
8357 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
8358 * \brief SMBT16 (SIMD Signed Multiply Bottom Half & Top Half)
8359 * \details
8360 * **Type**: SIMD
8361 *
8362 * **Syntax**:\n
8363 * ~~~
8364 * SMBB16 Rd, Rs1, Rs2
8365 * SMBT16 Rd, Rs1, Rs2
8366 * SMTT16 Rd, Rs1, Rs2
8367 * ~~~
8368 *
8369 * **Purpose**:\n
8370 * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-
8371 * bit content of the 32-bit elements of another register and write the result to a third register.
8372 * * SMBB16: W[x].bottom*W[x].bottom
8373 * * SMBT16: W[x].bottom *W[x].top
8374 * * SMTT16: W[x].top * W[x].top
8375 *
8376 * **Description**:\n
8377 * For the `SMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
8378 * with the bottom 16-bit content of the 32-bit elements of Rs2.
8379 * For the `SMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
8380 * with the top 16-bit content of the 32-bit elements of Rs2.
8381 * For the `SMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
8382 * the top 16-bit content of the 32-bit elements of Rs2.
8383 * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
8384 * integers.
8385 *
8386 * **Operations**:\n
8387 * ~~~
8388 * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0]; // SMBB16
8389 * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1]; // SMBT16
8390 * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1]; // SMTT16
8391 * for RV32: x=0,
8392 * for RV64: x=1...0
8393 * ~~~
8394 *
8395 * \param [in] a unsigned long type of value stored in a
8396 * \param [in] b unsigned long type of value stored in b
8397 * \return value stored in long type
8398 */
__RV_SMBT16(unsigned long a,unsigned long b)8399 __STATIC_FORCEINLINE long __RV_SMBT16(unsigned long a, unsigned long b)
8400 {
8401 register long result;
8402 __ASM volatile("smbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8403 return result;
8404 }
8405 /* ===== Inline Function End for 3.113.2. SMBT16 ===== */
8406
8407 /* ===== Inline Function Start for 3.113.3. SMTT16 ===== */
8408 /**
8409 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
8410 * \brief SMTT16 (SIMD Signed Multiply Top Half & Top Half)
8411 * \details
8412 * **Type**: SIMD
8413 *
8414 * **Syntax**:\n
8415 * ~~~
8416 * SMBB16 Rd, Rs1, Rs2
8417 * SMBT16 Rd, Rs1, Rs2
8418 * SMTT16 Rd, Rs1, Rs2
8419 * ~~~
8420 *
8421 * **Purpose**:\n
8422 * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-
8423 * bit content of the 32-bit elements of another register and write the result to a third register.
8424 * * SMBB16: W[x].bottom*W[x].bottom
8425 * * SMBT16: W[x].bottom *W[x].top
8426 * * SMTT16: W[x].top * W[x].top
8427 *
8428 * **Description**:\n
8429 * For the `SMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
8430 * with the bottom 16-bit content of the 32-bit elements of Rs2.
8431 * For the `SMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
8432 * with the top 16-bit content of the 32-bit elements of Rs2.
8433 * For the `SMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
8434 * the top 16-bit content of the 32-bit elements of Rs2.
8435 * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
8436 * integers.
8437 *
8438 * **Operations**:\n
8439 * ~~~
8440 * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0]; // SMBB16
8441 * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1]; // SMBT16
8442 * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1]; // SMTT16
8443 * for RV32: x=0,
8444 * for RV64: x=1...0
8445 * ~~~
8446 *
8447 * \param [in] a unsigned long type of value stored in a
8448 * \param [in] b unsigned long type of value stored in b
8449 * \return value stored in long type
8450 */
__RV_SMTT16(unsigned long a,unsigned long b)8451 __STATIC_FORCEINLINE long __RV_SMTT16(unsigned long a, unsigned long b)
8452 {
8453 register long result;
8454 __ASM volatile("smtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8455 return result;
8456 }
8457 /* ===== Inline Function End for 3.113.3. SMTT16 ===== */
8458
8459 /* ===== Inline Function Start for 3.114.1. SMDS ===== */
8460 /**
8461 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
8462 * \brief SMDS (SIMD Signed Multiply Two Halfs and Subtract)
8463 * \details
8464 * **Type**: SIMD
8465 *
8466 * **Syntax**:\n
8467 * ~~~
8468 * SMDS Rd, Rs1, Rs2
8469 * SMDRS Rd, Rs1, Rs2
8470 * SMXDS Rd, Rs1, Rs2
8471 * ~~~
8472 *
8473 * **Purpose**:\n
8474 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
8475 * perform a subtraction operation between the two 32-bit results.
8476 * * SMDS: top*top - bottom*bottom (per 32-bit element)
8477 * * SMDRS: bottom*bottom - top*top (per 32-bit element)
8478 * * SMXDS: top*bottom - bottom*top (per 32-bit element)
8479 *
8480 * **Description**:\n
8481 * For the `SMDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with
8482 * the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result
8483 * of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
8484 * 32-bit elements of Rs2.
8485 * For the `SMDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
8486 * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
8487 * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
8488 * the 32-bit elements of Rs2.
8489 * For the `SMXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
8490 * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
8491 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
8492 * content of the 32-bit elements of Rs2.
8493 * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of
8494 * multiplication are treated as signed integers.
8495 *
8496 * **Operations**:\n
8497 * ~~~
8498 * * SMDS:
8499 * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
8500 * * SMDRS:
8501 * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
8502 * * SMXDS:
8503 * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
8504 * ~~~
8505 *
8506 * \param [in] a unsigned long type of value stored in a
8507 * \param [in] b unsigned long type of value stored in b
8508 * \return value stored in long type
8509 */
__RV_SMDS(unsigned long a,unsigned long b)8510 __STATIC_FORCEINLINE long __RV_SMDS(unsigned long a, unsigned long b)
8511 {
8512 register long result;
8513 __ASM volatile("smds %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8514 return result;
8515 }
8516 /* ===== Inline Function End for 3.114.1. SMDS ===== */
8517
8518 /* ===== Inline Function Start for 3.114.2. SMDRS ===== */
8519 /**
8520 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
8521 * \brief SMDRS (SIMD Signed Multiply Two Halfs and Reverse Subtract)
8522 * \details
8523 * **Type**: SIMD
8524 *
8525 * **Syntax**:\n
8526 * ~~~
8527 * SMDS Rd, Rs1, Rs2
8528 * SMDRS Rd, Rs1, Rs2
8529 * SMXDS Rd, Rs1, Rs2
8530 * ~~~
8531 *
8532 * **Purpose**:\n
8533 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
8534 * perform a subtraction operation between the two 32-bit results.
8535 * * SMDS: top*top - bottom*bottom (per 32-bit element)
8536 * * SMDRS: bottom*bottom - top*top (per 32-bit element)
8537 * * SMXDS: top*bottom - bottom*top (per 32-bit element)
8538 *
8539 * **Description**:\n
8540 * For the `SMDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with
8541 * the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result
8542 * of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
8543 * 32-bit elements of Rs2.
8544 * For the `SMDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
8545 * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
8546 * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
8547 * the 32-bit elements of Rs2.
8548 * For the `SMXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
8549 * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
8550 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
8551 * content of the 32-bit elements of Rs2.
8552 * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of
8553 * multiplication are treated as signed integers.
8554 *
8555 * **Operations**:\n
8556 * ~~~
8557 * * SMDS:
8558 * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
8559 * * SMDRS:
8560 * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
8561 * * SMXDS:
8562 * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
8563 * ~~~
8564 *
8565 * \param [in] a unsigned long type of value stored in a
8566 * \param [in] b unsigned long type of value stored in b
8567 * \return value stored in long type
8568 */
__RV_SMDRS(unsigned long a,unsigned long b)8569 __STATIC_FORCEINLINE long __RV_SMDRS(unsigned long a, unsigned long b)
8570 {
8571 register long result;
8572 __ASM volatile("smdrs %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8573 return result;
8574 }
8575 /* ===== Inline Function End for 3.114.2. SMDRS ===== */
8576
8577 /* ===== Inline Function Start for 3.114.3. SMXDS ===== */
8578 /**
8579 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
8580 * \brief SMXDS (SIMD Signed Crossed Multiply Two Halfs and Subtract)
8581 * \details
8582 * **Type**: SIMD
8583 *
8584 * **Syntax**:\n
8585 * ~~~
8586 * SMDS Rd, Rs1, Rs2
8587 * SMDRS Rd, Rs1, Rs2
8588 * SMXDS Rd, Rs1, Rs2
8589 * ~~~
8590 *
8591 * **Purpose**:\n
8592 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
8593 * perform a subtraction operation between the two 32-bit results.
8594 * * SMDS: top*top - bottom*bottom (per 32-bit element)
8595 * * SMDRS: bottom*bottom - top*top (per 32-bit element)
8596 * * SMXDS: top*bottom - bottom*top (per 32-bit element)
8597 *
8598 * **Description**:\n
8599 * For the `SMDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with
8600 * the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result
8601 * of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
8602 * 32-bit elements of Rs2.
8603 * For the `SMDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
8604 * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
8605 * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
8606 * the 32-bit elements of Rs2.
8607 * For the `SMXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
8608 * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
8609 * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
8610 * content of the 32-bit elements of Rs2.
8611 * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of
8612 * multiplication are treated as signed integers.
8613 *
8614 * **Operations**:\n
8615 * ~~~
8616 * * SMDS:
8617 * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
8618 * * SMDRS:
8619 * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
8620 * * SMXDS:
8621 * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
8622 * ~~~
8623 *
8624 * \param [in] a unsigned long type of value stored in a
8625 * \param [in] b unsigned long type of value stored in b
8626 * \return value stored in long type
8627 */
__RV_SMXDS(unsigned long a,unsigned long b)8628 __STATIC_FORCEINLINE long __RV_SMXDS(unsigned long a, unsigned long b)
8629 {
8630 register long result;
8631 __ASM volatile("smxds %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8632 return result;
8633 }
8634 /* ===== Inline Function End for 3.114.3. SMXDS ===== */
8635
8636 /* ===== Inline Function Start for 3.115. SMIN8 ===== */
8637 /**
8638 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
8639 * \brief SMIN8 (SIMD 8-bit Signed Minimum)
8640 * \details
8641 * **Type**: SIMD
8642 *
8643 * **Syntax**:\n
8644 * ~~~
8645 * SMIN8 Rd, Rs1, Rs2
8646 * ~~~
8647 *
8648 * **Purpose**:\n
8649 * Do 8-bit signed integer elements finding minimum operations simultaneously.
8650 *
8651 * **Description**:\n
8652 * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit
8653 * signed integer elements in Rs2 and selects the numbers that is less than the other one. The selected
8654 * results are written to Rd.
8655 *
8656 * **Operations**:\n
8657 * ~~~
8658 * Rd.B[x] = (Rs1.B[x] < Rs2.B[x])? Rs1.B[x] : Rs2.B[x];
8659 * for RV32: x=3...0,
8660 * for RV64: x=7...0
8661 * ~~~
8662 *
8663 * \param [in] a unsigned long type of value stored in a
8664 * \param [in] b unsigned long type of value stored in b
8665 * \return value stored in unsigned long type
8666 */
__RV_SMIN8(unsigned long a,unsigned long b)8667 __STATIC_FORCEINLINE unsigned long __RV_SMIN8(unsigned long a, unsigned long b)
8668 {
8669 register unsigned long result;
8670 __ASM volatile("smin8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8671 return result;
8672 }
8673 /* ===== Inline Function End for 3.115. SMIN8 ===== */
8674
8675 /* ===== Inline Function Start for 3.116. SMIN16 ===== */
8676 /**
8677 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
8678 * \brief SMIN16 (SIMD 16-bit Signed Minimum)
8679 * \details
8680 * **Type**: SIMD
8681 *
8682 * **Syntax**:\n
8683 * ~~~
8684 * SMIN16 Rd, Rs1, Rs2
8685 * ~~~
8686 *
8687 * **Purpose**:\n
8688 * Do 16-bit signed integer elements finding minimum operations simultaneously.
8689 *
8690 * **Description**:\n
8691 * This instruction compares the 16-bit signed integer elements in Rs1 with the 16-bit
8692 * signed integer elements in Rs2 and selects the numbers that is less than the other one. The selected
8693 * results are written to Rd.
8694 *
8695 * **Operations**:\n
8696 * ~~~
8697 * Rd.H[x] = (Rs1.H[x] < Rs2.H[x])? Rs1.H[x] : Rs2.H[x];
8698 * for RV32: x=1...0,
8699 * for RV64: x=3...0
8700 * ~~~
8701 *
8702 * \param [in] a unsigned long type of value stored in a
8703 * \param [in] b unsigned long type of value stored in b
8704 * \return value stored in unsigned long type
8705 */
__RV_SMIN16(unsigned long a,unsigned long b)8706 __STATIC_FORCEINLINE unsigned long __RV_SMIN16(unsigned long a, unsigned long b)
8707 {
8708 register unsigned long result;
8709 __ASM volatile("smin16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8710 return result;
8711 }
8712 /* ===== Inline Function End for 3.116. SMIN16 ===== */
8713
8714 /* ===== Inline Function Start for 3.117.1. SMMUL ===== */
8715 /**
8716 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
8717 * \brief SMMUL (SIMD MSW Signed Multiply Word)
8718 * \details
8719 * **Type**: SIMD
8720 *
8721 * **Syntax**:\n
8722 * ~~~
8723 * SMMUL Rd, Rs1, Rs2
8724 * SMMUL.u Rd, Rs1, Rs2
8725 * ~~~
8726 *
8727 * **Purpose**:\n
8728 * Multiply the 32-bit signed integer elements of two registers and write the most significant
8729 * 32-bit results to the corresponding 32-bit elements of a register. The `.u` form performs an
8730 * additional rounding up operation on the multiplication results before taking the most significant
8731 * 32-bit part of the results.
8732 *
8733 * **Description**:\n
8734 * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the
8735 * most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit
8736 * elements of Rs1 and Rs2 are treated as signed integers. The `.u` form of the instruction rounds up
8737 * the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results.
8738 * * For `smmul/RV32` instruction, it is an alias to `mulh/RV32` instruction.
8739 *
8740 * **Operations**:\n
8741 * ~~~
8742 * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
8743 * if (`.u` form) {
8744 * Round[x][32:0] = Mres[x][63:31] + 1;
8745 * Rd.W[x] = Round[x][32:1];
8746 * } else {
8747 * Rd.W[x] = Mres[x][63:32];
8748 * }
8749 * for RV32: x=0
8750 * for RV64: x=1...0
8751 * ~~~
8752 *
8753 * \param [in] a long type of value stored in a
8754 * \param [in] b long type of value stored in b
8755 * \return value stored in long type
8756 */
__RV_SMMUL(long a,long b)8757 __STATIC_FORCEINLINE long __RV_SMMUL(long a, long b)
8758 {
8759 register long result;
8760 __ASM volatile("smmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8761 return result;
8762 }
8763 /* ===== Inline Function End for 3.117.1. SMMUL ===== */
8764
8765 /* ===== Inline Function Start for 3.117.2. SMMUL.u ===== */
8766 /**
8767 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
8768 * \brief SMMUL.u (SIMD MSW Signed Multiply Word with Rounding)
8769 * \details
8770 * **Type**: SIMD
8771 *
8772 * **Syntax**:\n
8773 * ~~~
8774 * SMMUL Rd, Rs1, Rs2
8775 * SMMUL.u Rd, Rs1, Rs2
8776 * ~~~
8777 *
8778 * **Purpose**:\n
8779 * Multiply the 32-bit signed integer elements of two registers and write the most significant
8780 * 32-bit results to the corresponding 32-bit elements of a register. The `.u` form performs an
8781 * additional rounding up operation on the multiplication results before taking the most significant
8782 * 32-bit part of the results.
8783 *
8784 * **Description**:\n
8785 * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the
8786 * most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit
8787 * elements of Rs1 and Rs2 are treated as signed integers. The `.u` form of the instruction rounds up
8788 * the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results.
8789 * * For `smmul/RV32` instruction, it is an alias to `mulh/RV32` instruction.
8790 *
8791 * **Operations**:\n
8792 * ~~~
8793 * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
8794 * if (`.u` form) {
8795 * Round[x][32:0] = Mres[x][63:31] + 1;
8796 * Rd.W[x] = Round[x][32:1];
8797 * } else {
8798 * Rd.W[x] = Mres[x][63:32];
8799 * }
8800 * for RV32: x=0
8801 * for RV64: x=1...0
8802 * ~~~
8803 *
8804 * \param [in] a long type of value stored in a
8805 * \param [in] b long type of value stored in b
8806 * \return value stored in long type
8807 */
__RV_SMMUL_U(long a,long b)8808 __STATIC_FORCEINLINE long __RV_SMMUL_U(long a, long b)
8809 {
8810 register long result;
8811 __ASM volatile("smmul.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8812 return result;
8813 }
8814 /* ===== Inline Function End for 3.117.2. SMMUL.u ===== */
8815
8816 /* ===== Inline Function Start for 3.118.1. SMMWB ===== */
8817 /**
8818 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
8819 * \brief SMMWB (SIMD MSW Signed Multiply Word and Bottom Half)
8820 * \details
8821 * **Type**: SIMD
8822 *
8823 * **Syntax**:\n
8824 * ~~~
8825 * SMMWB Rd, Rs1, Rs2
8826 * SMMWB.u Rd, Rs1, Rs2
8827 * ~~~
8828 *
8829 * **Purpose**:\n
8830 * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
8831 * corresponding 32-bit elements of another register, and write the most significant 32-bit results to
8832 * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most
8833 * significant discarded bit.
8834 *
8835 * **Description**:\n
8836 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content
8837 * of the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication
8838 * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the
8839 * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results.
8840 *
8841 * **Operations**:\n
8842 * ~~~
8843 * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0];
8844 * if (`.u` form) {
8845 * Round[x][32:0] = Mres[x][47:15] + 1;
8846 * Rd.W[x] = Round[x][32:1];
8847 * } else {
8848 * Rd.W[x] = Mres[x][47:16];
8849 * }
8850 * for RV32: x=0
8851 * for RV64: x=1...0
8852 * ~~~
8853 *
8854 * \param [in] a long type of value stored in a
8855 * \param [in] b unsigned long type of value stored in b
8856 * \return value stored in long type
8857 */
__RV_SMMWB(long a,unsigned long b)8858 __STATIC_FORCEINLINE long __RV_SMMWB(long a, unsigned long b)
8859 {
8860 register long result;
8861 __ASM volatile("smmwb %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8862 return result;
8863 }
8864 /* ===== Inline Function End for 3.118.1. SMMWB ===== */
8865
8866 /* ===== Inline Function Start for 3.118.2. SMMWB.u ===== */
8867 /**
8868 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
8869 * \brief SMMWB.u (SIMD MSW Signed Multiply Word and Bottom Half with Rounding)
8870 * \details
8871 * **Type**: SIMD
8872 *
8873 * **Syntax**:\n
8874 * ~~~
8875 * SMMWB Rd, Rs1, Rs2
8876 * SMMWB.u Rd, Rs1, Rs2
8877 * ~~~
8878 *
8879 * **Purpose**:\n
8880 * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
8881 * corresponding 32-bit elements of another register, and write the most significant 32-bit results to
8882 * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most
8883 * significant discarded bit.
8884 *
8885 * **Description**:\n
8886 * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content
8887 * of the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication
8888 * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the
8889 * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results.
8890 *
8891 * **Operations**:\n
8892 * ~~~
8893 * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0];
8894 * if (`.u` form) {
8895 * Round[x][32:0] = Mres[x][47:15] + 1;
8896 * Rd.W[x] = Round[x][32:1];
8897 * } else {
8898 * Rd.W[x] = Mres[x][47:16];
8899 * }
8900 * for RV32: x=0
8901 * for RV64: x=1...0
8902 * ~~~
8903 *
8904 * \param [in] a long type of value stored in a
8905 * \param [in] b unsigned long type of value stored in b
8906 * \return value stored in long type
8907 */
__RV_SMMWB_U(long a,unsigned long b)8908 __STATIC_FORCEINLINE long __RV_SMMWB_U(long a, unsigned long b)
8909 {
8910 register long result;
8911 __ASM volatile("smmwb.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8912 return result;
8913 }
8914 /* ===== Inline Function End for 3.118.2. SMMWB.u ===== */
8915
8916 /* ===== Inline Function Start for 3.119.1. SMMWT ===== */
8917 /**
8918 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
8919 * \brief SMMWT (SIMD MSW Signed Multiply Word and Top Half)
8920 * \details
8921 * **Type**: SIMD
8922 *
8923 * **Syntax**:\n
8924 * ~~~
8925 * SMMWT Rd, Rs1, Rs2
8926 * SMMWT.u Rd, Rs1, Rs2
8927 * ~~~
8928 *
8929 * **Purpose**:\n
8930 * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the
8931 * corresponding 32-bit elements of another register, and write the most significant 32-bit results to
8932 * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most
8933 * significant discarded bit.
8934 *
8935 * **Description**:\n
8936 * This instruction multiplies the signed 32-bit elements of Rs1 with the top signed 16-bit content of
8937 * the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication
8938 * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the
8939 * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results.
8940 *
8941 * **Operations**:\n
8942 * ~~~
8943 * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1];
8944 * if (`.u` form) {
8945 * Round[x][32:0] = Mres[x][47:15] + 1;
8946 * Rd.W[x] = Round[x][32:1];
8947 * } else {
8948 * Rd.W[x] = Mres[x][47:16];
8949 * }
8950 * for RV32: x=0
8951 * for RV64: x=1...0
8952 * ~~~
8953 *
8954 * \param [in] a long type of value stored in a
8955 * \param [in] b unsigned long type of value stored in b
8956 * \return value stored in long type
8957 */
__RV_SMMWT(long a,unsigned long b)8958 __STATIC_FORCEINLINE long __RV_SMMWT(long a, unsigned long b)
8959 {
8960 register long result;
8961 __ASM volatile("smmwt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
8962 return result;
8963 }
8964 /* ===== Inline Function End for 3.119.1. SMMWT ===== */
8965
8966 /* ===== Inline Function Start for 3.119.2. SMMWT.u ===== */
8967 /**
8968 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
8969 * \brief SMMWT.u (SIMD MSW Signed Multiply Word and Top Half with Rounding)
8970 * \details
8971 * **Type**: SIMD
8972 *
8973 * **Syntax**:\n
8974 * ~~~
8975 * SMMWT Rd, Rs1, Rs2
8976 * SMMWT.u Rd, Rs1, Rs2
8977 * ~~~
8978 *
8979 * **Purpose**:\n
8980 * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the
8981 * corresponding 32-bit elements of another register, and write the most significant 32-bit results to
8982 * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most
8983 * significant discarded bit.
8984 *
8985 * **Description**:\n
8986 * This instruction multiplies the signed 32-bit elements of Rs1 with the top signed 16-bit content of
8987 * the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication
8988 * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the
8989 * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results.
8990 *
8991 * **Operations**:\n
8992 * ~~~
8993 * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1];
8994 * if (`.u` form) {
8995 * Round[x][32:0] = Mres[x][47:15] + 1;
8996 * Rd.W[x] = Round[x][32:1];
8997 * } else {
8998 * Rd.W[x] = Mres[x][47:16];
8999 * }
9000 * for RV32: x=0
9001 * for RV64: x=1...0
9002 * ~~~
9003 *
9004 * \param [in] a long type of value stored in a
9005 * \param [in] b unsigned long type of value stored in b
9006 * \return value stored in long type
9007 */
__RV_SMMWT_U(long a,unsigned long b)9008 __STATIC_FORCEINLINE long __RV_SMMWT_U(long a, unsigned long b)
9009 {
9010 register long result;
9011 __ASM volatile("smmwt.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
9012 return result;
9013 }
9014 /* ===== Inline Function End for 3.119.2. SMMWT.u ===== */
9015
9016 /* ===== Inline Function Start for 3.120.1. SMSLDA ===== */
9017 /**
9018 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
9019 * \brief SMSLDA (Signed Multiply Two Halfs & Add & Subtract 64-bit)
9020 * \details
9021 * **Type**: DSP (64-bit Profile)
9022 *
9023 * **Syntax**:\n
9024 * ~~~
9025 * SMSLDA Rd, Rs1, Rs2
9026 * SMSLXDA Rd, Rs1, Rs2
9027 * ~~~
9028 *
9029 * **Purpose**:\n
9030 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
9031 * subtracts the two 32-bit results from the 64-bit value of an even/odd pair of registers (RV32) or a
9032 * register (RV64). The subtraction result is written back to the register-pair.
9033 * * SMSLDA: rd pair - top*top - bottom*bottom (all 32-bit elements)
9034 * * SMSLXDA: rd pair - top*bottom - bottom*top (all 32-bit elements)
9035 *
9036 * **RV32 Description**:\n
9037 * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
9038 * content Rs2 and multiplies the top 16-bit content of Rs1 with the top 16-bit content of Rs2.
9039 * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit
9040 * content of Rs2 and multiplies the bottom 16-bit content of Rs1 with the top 16-bit content of Rs2.
9041 * The two multiplication results are subtracted from the 64-bit value of an even/odd pair of registers
9042 * specified by Rd(4,1). The 64-bit subtraction result is written back to the register-pair. The 16-bit
9043 * values of Rs1 and Rs2, and the 64-bit value of the register-pair are treated as signed integers.
9044 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
9045 * includes register 2d and 2d+1.
9046 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
9047 * of the pair contains the low 32-bit of the result.
9048 *
9049 * **RV64 Description**:\n
9050 * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
9051 * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
9052 * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
9053 * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
9054 * the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the bottom 16-bit content of
9055 * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
9056 * The four multiplication results are subtracted from the 64-bit value of Rd. The 64-bit subtraction
9057 * result is written back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated
9058 * as signed integers.
9059 *
9060 * **Operations**:\n
9061 * ~~~
9062 * * RV32:
9063 * // SMSLDA
9064 * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]);
9065 * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]);
9066 * // SMSLXDA
9067 * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]);
9068 * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]);
9069 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
9070 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] - SE64(Mres0[31:0]) - SE64(Mres1[31:0]);
9071 * * RV64:
9072 * // SMSLDA
9073 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
9074 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
9075 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
9076 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
9077 * // SMSLXDA
9078 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
9079 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
9080 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
9081 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
9082 * Rd = Rd - SE64(Mres0[0][31:0]) - SE64(Mres1[0][31:0]) - SE64(Mres0[1][31:0]) -
9083 * SE64(Mres1[1][31:0]);
9084 * ~~~
9085 *
9086 * \param [in] t long long type of value stored in t
9087 * \param [in] a unsigned long type of value stored in a
9088 * \param [in] b unsigned long type of value stored in b
9089 * \return value stored in long long type
9090 */
__RV_SMSLDA(long long t,unsigned long a,unsigned long b)9091 __STATIC_FORCEINLINE long long __RV_SMSLDA(long long t, unsigned long a, unsigned long b)
9092 {
9093 __ASM volatile("smslda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
9094 return t;
9095 }
9096 /* ===== Inline Function End for 3.120.1. SMSLDA ===== */
9097
9098 /* ===== Inline Function Start for 3.120.2. SMSLXDA ===== */
9099 /**
9100 * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
9101 * \brief SMSLXDA (Signed Crossed Multiply Two Halfs & Add & Subtract 64- bit)
9102 * \details
9103 * **Type**: DSP (64-bit Profile)
9104 *
9105 * **Syntax**:\n
9106 * ~~~
9107 * SMSLDA Rd, Rs1, Rs2
9108 * SMSLXDA Rd, Rs1, Rs2
9109 * ~~~
9110 *
9111 * **Purpose**:\n
9112 * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
9113 * subtracts the two 32-bit results from the 64-bit value of an even/odd pair of registers (RV32) or a
9114 * register (RV64). The subtraction result is written back to the register-pair.
9115 * * SMSLDA: rd pair - top*top - bottom*bottom (all 32-bit elements)
9116 * * SMSLXDA: rd pair - top*bottom - bottom*top (all 32-bit elements)
9117 *
9118 * **RV32 Description**:\n
9119 * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
9120 * content Rs2 and multiplies the top 16-bit content of Rs1 with the top 16-bit content of Rs2.
9121 * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit
9122 * content of Rs2 and multiplies the bottom 16-bit content of Rs1 with the top 16-bit content of Rs2.
9123 * The two multiplication results are subtracted from the 64-bit value of an even/odd pair of registers
9124 * specified by Rd(4,1). The 64-bit subtraction result is written back to the register-pair. The 16-bit
9125 * values of Rs1 and Rs2, and the 64-bit value of the register-pair are treated as signed integers.
9126 * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
9127 * includes register 2d and 2d+1.
9128 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
9129 * of the pair contains the low 32-bit of the result.
9130 *
9131 * **RV64 Description**:\n
9132 * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
9133 * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
9134 * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
9135 * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
9136 * the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the bottom 16-bit content of
9137 * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
9138 * The four multiplication results are subtracted from the 64-bit value of Rd. The 64-bit subtraction
9139 * result is written back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated
9140 * as signed integers.
9141 *
9142 * **Operations**:\n
9143 * ~~~
9144 * * RV32:
9145 * // SMSLDA
9146 * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]);
9147 * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]);
9148 * // SMSLXDA
9149 * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]);
9150 * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]);
9151 * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
9152 * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] - SE64(Mres0[31:0]) - SE64(Mres1[31:0]);
9153 * * RV64:
9154 * // SMSLDA
9155 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
9156 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
9157 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
9158 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
9159 * // SMSLXDA
9160 * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
9161 * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
9162 * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
9163 * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
9164 * Rd = Rd - SE64(Mres0[0][31:0]) - SE64(Mres1[0][31:0]) - SE64(Mres0[1][31:0]) -
9165 * SE64(Mres1[1][31:0]);
9166 * ~~~
9167 *
9168 * \param [in] t long long type of value stored in t
9169 * \param [in] a unsigned long type of value stored in a
9170 * \param [in] b unsigned long type of value stored in b
9171 * \return value stored in long long type
9172 */
__RV_SMSLXDA(long long t,unsigned long a,unsigned long b)9173 __STATIC_FORCEINLINE long long __RV_SMSLXDA(long long t, unsigned long a, unsigned long b)
9174 {
9175 __ASM volatile("smslxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
9176 return t;
9177 }
9178 /* ===== Inline Function End for 3.120.2. SMSLXDA ===== */
9179
9180 /* ===== Inline Function Start for 3.121. SMSR64 ===== */
9181 /**
9182 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
9183 * \brief SMSR64 (Signed Multiply and Subtract from 64- Bit Data)
9184 * \details
9185 * **Type**: DSP (64-bit Profile)
9186 *
9187 * **Syntax**:\n
9188 * ~~~
9189 * SMSR64 Rd, Rs1, Rs2
9190 * ~~~
9191 *
9192 * **Purpose**:\n
9193 * Multiply the 32-bit signed elements in two registers and subtract the 64-bit multiplication
9194 * results from the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is
9195 * written back to the pair of registers (RV32) or a register (RV64).
9196 *
9197 * **RV32 Description**:\n
9198 * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It
9199 * subtracts the 64-bit multiplication result from the 64-bit signed data of an even/odd pair of registers
9200 * specified by Rd(4,1). The subtraction result is written back to the even/odd pair of registers
9201 * specified by Rd(4,1).
9202 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
9203 * includes register 2d and 2d+1.
9204 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
9205 * of the pair contains the low 32-bit of the result.
9206 *
9207 * **RV64 Description**:\n
9208 * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It
9209 * subtracts the 64-bit multiplication results from the 64-bit signed data of Rd. The subtraction result is
9210 * written back to Rd.
9211 *
9212 * **Operations**:\n
9213 * ~~~
9214 * * RV32:
9215 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
9216 * R[t_H].R[t_L] = R[t_H].R[t_L] - (Rs1 * Rs2);
9217 * * RV64:
9218 * Rd = Rd - (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]);
9219 * ~~~
9220 *
9221 * \param [in] t long long type of value stored in t
9222 * \param [in] a long type of value stored in a
9223 * \param [in] b long type of value stored in b
9224 * \return value stored in long long type
9225 */
__RV_SMSR64(long long t,long a,long b)9226 __STATIC_FORCEINLINE long long __RV_SMSR64(long long t, long a, long b)
9227 {
9228 __ASM volatile("smsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
9229 return t;
9230 }
9231 /* ===== Inline Function End for 3.121. SMSR64 ===== */
9232
9233 /* ===== Inline Function Start for 3.122.1. SMUL8 ===== */
9234 /**
9235 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
9236 * \brief SMUL8 (SIMD Signed 8-bit Multiply)
9237 * \details
9238 * **Type**: SIMD
9239 *
9240 * **Syntax**:\n
9241 * ~~~
9242 * SMUL8 Rd, Rs1, Rs2
9243 * SMULX8 Rd, Rs1, Rs2
9244 * ~~~
9245 *
9246 * **Purpose**:\n
9247 * Do signed 8-bit multiplications and generate four 16-bit results simultaneously.
9248 *
9249 * **RV32 Description**:\n
9250 * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the
9251 * corresponding 8-bit data elements of Rs2.
9252 * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the
9253 * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data
9254 * elements of Rs1 with the fourth and third 8-bit data elements of Rs2.
9255 * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1).
9256 * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
9257 * includes register 2d and 2d+1.
9258 * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of
9259 * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom
9260 * part of Rs1.
9261 *
9262 * **RV64 Description**:\n
9263 * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the
9264 * corresponding 8-bit data elements of Rs2.
9265 * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the
9266 * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data
9267 * elements of Rs1 with the fourth and third 8-bit data elements of Rs2.
9268 * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results
9269 * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from
9270 * the bottom part of Rs1.
9271 *
9272 * **Operations**:\n
9273 * ~~~
9274 * * RV32:
9275 * if (is `SMUL8`) {
9276 * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
9277 * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
9278 * } else if (is `SMULX8`) {
9279 * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
9280 * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
9281 * }
9282 * rest[x/2] = op1t[x/2] s* op2t[x/2];
9283 * resb[x/2] = op1b[x/2] s* op2b[x/2];
9284 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
9285 * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1];
9286 * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0];
9287 * x = 0 and 2
9288 * * RV64:
9289 * if (is `SMUL8`) {
9290 * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
9291 * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
9292 * } else if (is `SMULX8`) {
9293 * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
9294 * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
9295 * }
9296 * rest[x/2] = op1t[x/2] s* op2t[x/2];
9297 * resb[x/2] = op1b[x/2] s* op2b[x/2];
9298 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
9299 * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1];
9300 * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0];
9301 * x = 0 and 2
9302 * ~~~
9303 *
9304 * \param [in] a unsigned int type of value stored in a
9305 * \param [in] b unsigned int type of value stored in b
9306 * \return value stored in unsigned long long type
9307 */
__RV_SMUL8(unsigned int a,unsigned int b)9308 __STATIC_FORCEINLINE unsigned long long __RV_SMUL8(unsigned int a, unsigned int b)
9309 {
9310 register unsigned long long result;
9311 __ASM volatile("smul8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
9312 return result;
9313 }
9314 /* ===== Inline Function End for 3.122.1. SMUL8 ===== */
9315
9316 /* ===== Inline Function Start for 3.122.2. SMULX8 ===== */
9317 /**
9318 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
9319 * \brief SMULX8 (SIMD Signed Crossed 8-bit Multiply)
9320 * \details
9321 * **Type**: SIMD
9322 *
9323 * **Syntax**:\n
9324 * ~~~
9325 * SMUL8 Rd, Rs1, Rs2
9326 * SMULX8 Rd, Rs1, Rs2
9327 * ~~~
9328 *
9329 * **Purpose**:\n
9330 * Do signed 8-bit multiplications and generate four 16-bit results simultaneously.
9331 *
9332 * **RV32 Description**:\n
9333 * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the
9334 * corresponding 8-bit data elements of Rs2.
9335 * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the
9336 * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data
9337 * elements of Rs1 with the fourth and third 8-bit data elements of Rs2.
9338 * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1).
9339 * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
9340 * includes register 2d and 2d+1.
9341 * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of
9342 * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom
9343 * part of Rs1.
9344 *
9345 * **RV64 Description**:\n
9346 * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the
9347 * corresponding 8-bit data elements of Rs2.
9348 * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the
9349 * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data
9350 * elements of Rs1 with the fourth and third 8-bit data elements of Rs2.
9351 * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results
9352 * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from
9353 * the bottom part of Rs1.
9354 *
9355 * **Operations**:\n
9356 * ~~~
9357 * * RV32:
9358 * if (is `SMUL8`) {
9359 * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
9360 * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
9361 * } else if (is `SMULX8`) {
9362 * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
9363 * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
9364 * }
9365 * rest[x/2] = op1t[x/2] s* op2t[x/2];
9366 * resb[x/2] = op1b[x/2] s* op2b[x/2];
9367 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
9368 * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1];
9369 * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0];
9370 * x = 0 and 2
9371 * * RV64:
9372 * if (is `SMUL8`) {
9373 * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
9374 * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
9375 * } else if (is `SMULX8`) {
9376 * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
9377 * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
9378 * }
9379 * rest[x/2] = op1t[x/2] s* op2t[x/2];
9380 * resb[x/2] = op1b[x/2] s* op2b[x/2];
9381 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
9382 * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1];
9383 * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0];
9384 * x = 0 and 2
9385 * ~~~
9386 *
9387 * \param [in] a unsigned int type of value stored in a
9388 * \param [in] b unsigned int type of value stored in b
9389 * \return value stored in unsigned long long type
9390 */
__RV_SMULX8(unsigned int a,unsigned int b)9391 __STATIC_FORCEINLINE unsigned long long __RV_SMULX8(unsigned int a, unsigned int b)
9392 {
9393 register unsigned long long result;
9394 __ASM volatile("smulx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
9395 return result;
9396 }
9397 /* ===== Inline Function End for 3.122.2. SMULX8 ===== */
9398
9399 /* ===== Inline Function Start for 3.123.1. SMUL16 ===== */
9400 /**
9401 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
9402 * \brief SMUL16 (SIMD Signed 16-bit Multiply)
9403 * \details
9404 * **Type**: SIMD
9405 *
9406 * **Syntax**:\n
9407 * ~~~
9408 * SMUL16 Rd, Rs1, Rs2
9409 * SMULX16 Rd, Rs1, Rs2
9410 * ~~~
9411 *
9412 * **Purpose**:\n
9413 * Do signed 16-bit multiplications and generate two 32-bit results simultaneously.
9414 *
9415 * **RV32 Description**:\n
9416 * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of Rs1 with
9417 * the top 16-bit Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1
9418 * with the bottom 16-bit Q15 content of Rs2.
9419 * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of Rs1 with the bottom 16-bit
9420 * Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1 with the top 16-
9421 * bit Q15 content of Rs2.
9422 * The two Q30 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1),
9423 * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes
9424 * register 2d and 2d+1.
9425 * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and
9426 * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1.
9427 *
9428 * **RV64 Description**:\n
9429 * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of the lower
9430 * 32-bit word in Rs1 with the top 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time,
9431 * multiply the bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the bottom 16-bit Q15
9432 * content of the lower 32-bit word in Rs2.
9433 * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of the lower 32-bit word in Rs1
9434 * with the bottom 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time, multiply the
9435 * bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the top 16-bit Q15 content of the
9436 * lower 32-bit word in Rs2.
9437 * The two 32-bit Q30 results are then written into Rd. The result calculated from the top 16-bit of the
9438 * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of
9439 * the lower 32-bit word in Rs1 is written to Rd.W[0]
9440 *
9441 * **Operations**:\n
9442 * ~~~
9443 * * RV32:
9444 * if (is `SMUL16`) {
9445 * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
9446 * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
9447 * } else if (is `SMULX16`) {
9448 * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
9449 * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
9450 * }
9451 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
9452 * res = aop s* bop;
9453 * }
9454 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
9455 * R[t_H] = rest;
9456 * R[t_L] = resb;
9457 * * RV64:
9458 * if (is `SMUL16`) {
9459 * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
9460 * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
9461 * } else if (is `SMULX16`) {
9462 * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
9463 * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
9464 * }
9465 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
9466 * res = aop s* bop;
9467 * }
9468 * Rd.W[1] = rest;
9469 * Rd.W[0] = resb;
9470 * ~~~
9471 *
9472 * \param [in] a unsigned int type of value stored in a
9473 * \param [in] b unsigned int type of value stored in b
9474 * \return value stored in unsigned long long type
9475 */
__RV_SMUL16(unsigned int a,unsigned int b)9476 __STATIC_FORCEINLINE unsigned long long __RV_SMUL16(unsigned int a, unsigned int b)
9477 {
9478 register unsigned long long result;
9479 __ASM volatile("smul16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
9480 return result;
9481 }
9482 /* ===== Inline Function End for 3.123.1. SMUL16 ===== */
9483
9484 /* ===== Inline Function Start for 3.123.2. SMULX16 ===== */
9485 /**
9486 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
9487 * \brief SMULX16 (SIMD Signed Crossed 16-bit Multiply)
9488 * \details
9489 * **Type**: SIMD
9490 *
9491 * **Syntax**:\n
9492 * ~~~
9493 * SMUL16 Rd, Rs1, Rs2
9494 * SMULX16 Rd, Rs1, Rs2
9495 * ~~~
9496 *
9497 * **Purpose**:\n
9498 * Do signed 16-bit multiplications and generate two 32-bit results simultaneously.
9499 *
9500 * **RV32 Description**:\n
9501 * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of Rs1 with
9502 * the top 16-bit Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1
9503 * with the bottom 16-bit Q15 content of Rs2.
9504 * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of Rs1 with the bottom 16-bit
9505 * Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1 with the top 16-
9506 * bit Q15 content of Rs2.
9507 * The two Q30 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1),
9508 * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes
9509 * register 2d and 2d+1.
9510 * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and
9511 * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1.
9512 *
9513 * **RV64 Description**:\n
9514 * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of the lower
9515 * 32-bit word in Rs1 with the top 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time,
9516 * multiply the bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the bottom 16-bit Q15
9517 * content of the lower 32-bit word in Rs2.
9518 * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of the lower 32-bit word in Rs1
9519 * with the bottom 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time, multiply the
9520 * bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the top 16-bit Q15 content of the
9521 * lower 32-bit word in Rs2.
9522 * The two 32-bit Q30 results are then written into Rd. The result calculated from the top 16-bit of the
9523 * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of
9524 * the lower 32-bit word in Rs1 is written to Rd.W[0]
9525 *
9526 * **Operations**:\n
9527 * ~~~
9528 * * RV32:
9529 * if (is `SMUL16`) {
9530 * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
9531 * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
9532 * } else if (is `SMULX16`) {
9533 * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
9534 * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
9535 * }
9536 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
9537 * res = aop s* bop;
9538 * }
9539 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
9540 * R[t_H] = rest;
9541 * R[t_L] = resb;
9542 * * RV64:
9543 * if (is `SMUL16`) {
9544 * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
9545 * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
9546 * } else if (is `SMULX16`) {
9547 * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
9548 * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
9549 * }
9550 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
9551 * res = aop s* bop;
9552 * }
9553 * Rd.W[1] = rest;
9554 * Rd.W[0] = resb;
9555 * ~~~
9556 *
9557 * \param [in] a unsigned int type of value stored in a
9558 * \param [in] b unsigned int type of value stored in b
9559 * \return value stored in unsigned long long type
9560 */
__RV_SMULX16(unsigned int a,unsigned int b)9561 __STATIC_FORCEINLINE unsigned long long __RV_SMULX16(unsigned int a, unsigned int b)
9562 {
9563 register unsigned long long result;
9564 __ASM volatile("smulx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
9565 return result;
9566 }
9567 /* ===== Inline Function End for 3.123.2. SMULX16 ===== */
9568
9569 /* ===== Inline Function Start for 3.124. SRA.u ===== */
9570 /**
9571 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
9572 * \brief SRA.u (Rounding Shift Right Arithmetic)
9573 * \details
9574 * **Type**: DSP
9575 *
9576 * **Syntax**:\n
9577 * ~~~
9578 * SRA.u Rd, Rs1, Rs2
9579 * ~~~
9580 *
9581 * **Purpose**:\n
9582 * Perform an arithmetic right shift operation with rounding. The shift amount is a variable
9583 * from a GPR.
9584 *
9585 * **Description**:\n
9586 * This instruction right-shifts the content of Rs1 arithmetically. The shifted out bits are
9587 * filled with the sign-bit and the shift amount is specified by the low-order 5-bits (RV32) or 6-bits
9588 * (RV64) of the Rs2 register. For the rounding operation, a value of 1 is added to the most significant
9589 * discarded bit of the data to calculate the final result. And the result is written to Rd.
9590 *
9591 * **Operations**:\n
9592 * ~~~
9593 * * RV32:
9594 * sa = Rs2[4:0];
9595 * if (sa > 0) {
9596 * res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1;
9597 * Rd = res[31:0];
9598 * } else {
9599 * Rd = Rs1;
9600 * }
9601 * * RV64:
9602 * sa = Rs2[5:0];
9603 * if (sa > 0) {
9604 * res[63:-1] = SE65(Rs1[63:(sa-1)]) + 1;
9605 * Rd = res[63:0];
9606 * } else {
9607 * Rd = Rs1;
9608 * }
9609 * ~~~
9610 *
9611 * \param [in] a long type of value stored in a
9612 * \param [in] b unsigned int type of value stored in b
9613 * \return value stored in long type
9614 */
__RV_SRA_U(long a,unsigned int b)9615 __STATIC_FORCEINLINE long __RV_SRA_U(long a, unsigned int b)
9616 {
9617 register long result;
9618 __ASM volatile("sra.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
9619 return result;
9620 }
9621 /* ===== Inline Function End for 3.124. SRA.u ===== */
9622
9623 /* ===== Inline Function Start for 3.125. SRAI.u ===== */
9624 /**
9625 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
9626 * \brief SRAI.u (Rounding Shift Right Arithmetic Immediate)
9627 * \details
9628 * **Type**: DSP
9629 *
9630 * **Syntax**:\n
9631 * ~~~
9632 * SRAI.u Rd, Rs1, imm6u[4:0] (RV32)
9633 * SRAI.u Rd, Rs1, imm6u[5:0] (RV64)
9634 * ~~~
9635 *
9636 * **Purpose**:\n
9637 * Perform an arithmetic right shift operation with rounding. The shift amount is an
9638 * immediate value.
9639 *
9640 * **Description**:\n
9641 * This instruction right-shifts the content of Rs1 arithmetically. The shifted out bits are
9642 * filled with the sign-bit and the shift amount is specified by the imm6u[4:0] (RV32) or imm6u[5:0]
9643 * (RV64) constant . For the rounding operation, a value of 1 is added to the most significant discarded
9644 * bit of the data to calculate the final result. And the result is written to Rd.
9645 *
9646 * **Operations**:\n
9647 * ~~~
9648 * * RV32:
9649 * sa = imm6u[4:0];
9650 * if (sa > 0) {
9651 * res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1;
9652 * Rd = res[31:0];
9653 * } else {
9654 * Rd = Rs1;
9655 * }
9656 * * RV64:
9657 * sa = imm6u[5:0];
9658 * if (sa > 0) {
9659 * res[63:-1] = SE65(Rs1[63:(sa-1)]) + 1;
9660 * Rd = res[63:0];
9661 * } else {
9662 * Rd = Rs1;
9663 * }
9664 * ~~~
9665 *
9666 * \param [in] a long type of value stored in a
9667 * \param [in] b unsigned int type of value stored in b
9668 * \return value stored in long type
9669 */
9670 #define __RV_SRAI_U(a, b) \
9671 ({ \
9672 register long result; \
9673 register long __a = (long)(a); \
9674 __ASM volatile("srai.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
9675 result; \
9676 })
9677 /* ===== Inline Function End for 3.125. SRAI.u ===== */
9678
9679 /* ===== Inline Function Start for 3.126.1. SRA8 ===== */
9680 /**
9681 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
9682 * \brief SRA8 (SIMD 8-bit Shift Right Arithmetic)
9683 * \details
9684 * **Type**: SIMD
9685 *
9686 * **Syntax**:\n
9687 * ~~~
9688 * SRA8 Rd, Rs1, Rs2
9689 * SRA8.u Rd, Rs1, Rs2
9690 * ~~~
9691 *
9692 * **Purpose**:\n
9693 * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is a
9694 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
9695 * results.
9696 *
9697 * **Description**:\n
9698 * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
9699 * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
9700 * 3-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
9701 * added to the most significant discarded bit of each 8-bit data element to calculate the final results.
9702 * And the results are written to Rd.
9703 *
9704 * **Operations**:\n
9705 * ~~~
9706 * sa = Rs2[2:0];
9707 * if (sa > 0) {
9708 * if (`.u` form) { // SRA8.u
9709 * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
9710 * Rd.B[x] = res[7:0];
9711 * } else { // SRA8
9712 * Rd.B[x] = SE8(Rd.B[x][7:sa])
9713 * }
9714 * } else {
9715 * Rd = Rs1;
9716 * }
9717 * for RV32: x=3...0,
9718 * for RV64: x=7...0
9719 * ~~~
9720 *
9721 * \param [in] a unsigned long type of value stored in a
9722 * \param [in] b unsigned int type of value stored in b
9723 * \return value stored in unsigned long type
9724 */
__RV_SRA8(unsigned long a,unsigned int b)9725 __STATIC_FORCEINLINE unsigned long __RV_SRA8(unsigned long a, unsigned int b)
9726 {
9727 register unsigned long result;
9728 __ASM volatile("sra8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
9729 return result;
9730 }
9731 /* ===== Inline Function End for 3.126.1. SRA8 ===== */
9732
9733 /* ===== Inline Function Start for 3.126.2. SRA8.u ===== */
9734 /**
9735 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
9736 * \brief SRA8.u (SIMD 8-bit Rounding Shift Right Arithmetic)
9737 * \details
9738 * **Type**: SIMD
9739 *
9740 * **Syntax**:\n
9741 * ~~~
9742 * SRA8 Rd, Rs1, Rs2
9743 * SRA8.u Rd, Rs1, Rs2
9744 * ~~~
9745 *
9746 * **Purpose**:\n
9747 * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is a
9748 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
9749 * results.
9750 *
9751 * **Description**:\n
9752 * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
9753 * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
9754 * 3-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
9755 * added to the most significant discarded bit of each 8-bit data element to calculate the final results.
9756 * And the results are written to Rd.
9757 *
9758 * **Operations**:\n
9759 * ~~~
9760 * sa = Rs2[2:0];
9761 * if (sa > 0) {
9762 * if (`.u` form) { // SRA8.u
9763 * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
9764 * Rd.B[x] = res[7:0];
9765 * } else { // SRA8
9766 * Rd.B[x] = SE8(Rd.B[x][7:sa])
9767 * }
9768 * } else {
9769 * Rd = Rs1;
9770 * }
9771 * for RV32: x=3...0,
9772 * for RV64: x=7...0
9773 * ~~~
9774 *
9775 * \param [in] a unsigned long type of value stored in a
9776 * \param [in] b unsigned int type of value stored in b
9777 * \return value stored in unsigned long type
9778 */
__RV_SRA8_U(unsigned long a,unsigned int b)9779 __STATIC_FORCEINLINE unsigned long __RV_SRA8_U(unsigned long a, unsigned int b)
9780 {
9781 register unsigned long result;
9782 __ASM volatile("sra8.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
9783 return result;
9784 }
9785 /* ===== Inline Function End for 3.126.2. SRA8.u ===== */
9786
9787 /* ===== Inline Function Start for 3.127.1. SRAI8 ===== */
9788 /**
9789 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
9790 * \brief SRAI8 (SIMD 8-bit Shift Right Arithmetic Immediate)
9791 * \details
9792 * **Type**: SIMD
9793 *
9794 * **Syntax**:\n
9795 * ~~~
9796 * SRAI8 Rd, Rs1, imm3u
9797 * SRAI8.u Rd, Rs1, imm3u
9798 * ~~~
9799 *
9800 * **Purpose**:\n
9801 * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is an
9802 * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
9803 *
9804 * **Description**:\n
9805 * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
9806 * bits are filled with the sign-bit of the data elements. The shift amount is specified by the imm3u
9807 * constant. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
9808 * discarded bit of each 8-bit data element to calculate the final results. And the results are written to
9809 * Rd.
9810 *
9811 * **Operations**:\n
9812 * ~~~
9813 * sa = imm3u[2:0];
9814 * if (sa > 0) {
9815 * if (`.u` form) { // SRA8.u
9816 * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
9817 * Rd.B[x] = res[7:0];
9818 * } else { // SRA8
9819 * Rd.B[x] = SE8(Rd.B[x][7:sa])
9820 * }
9821 * } else {
9822 * Rd = Rs1;
9823 * }
9824 * for RV32: x=3...0,
9825 * for RV64: x=7...0
9826 * ~~~
9827 *
9828 * \param [in] a unsigned long type of value stored in a
9829 * \param [in] b unsigned int type of value stored in b
9830 * \return value stored in unsigned long type
9831 */
9832 #define __RV_SRAI8(a, b) \
9833 ({ \
9834 register unsigned long result; \
9835 register unsigned long __a = (unsigned long)(a); \
9836 __ASM volatile("srai8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
9837 result; \
9838 })
9839 /* ===== Inline Function End for 3.127.1. SRAI8 ===== */
9840
9841 /* ===== Inline Function Start for 3.127.2. SRAI8.u ===== */
9842 /**
9843 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
9844 * \brief SRAI8.u (SIMD 8-bit Rounding Shift Right Arithmetic Immediate)
9845 * \details
9846 * **Type**: SIMD
9847 *
9848 * **Syntax**:\n
9849 * ~~~
9850 * SRAI8 Rd, Rs1, imm3u
9851 * SRAI8.u Rd, Rs1, imm3u
9852 * ~~~
9853 *
9854 * **Purpose**:\n
9855 * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is an
9856 * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
9857 *
9858 * **Description**:\n
9859 * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
9860 * bits are filled with the sign-bit of the data elements. The shift amount is specified by the imm3u
9861 * constant. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
9862 * discarded bit of each 8-bit data element to calculate the final results. And the results are written to
9863 * Rd.
9864 *
9865 * **Operations**:\n
9866 * ~~~
9867 * sa = imm3u[2:0];
9868 * if (sa > 0) {
9869 * if (`.u` form) { // SRA8.u
9870 * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
9871 * Rd.B[x] = res[7:0];
9872 * } else { // SRA8
9873 * Rd.B[x] = SE8(Rd.B[x][7:sa])
9874 * }
9875 * } else {
9876 * Rd = Rs1;
9877 * }
9878 * for RV32: x=3...0,
9879 * for RV64: x=7...0
9880 * ~~~
9881 *
9882 * \param [in] a unsigned long type of value stored in a
9883 * \param [in] b unsigned int type of value stored in b
9884 * \return value stored in unsigned long type
9885 */
9886 #define __RV_SRAI8_U(a, b) \
9887 ({ \
9888 register unsigned long result; \
9889 register unsigned long __a = (unsigned long)(a); \
9890 __ASM volatile("srai8.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
9891 result; \
9892 })
9893 /* ===== Inline Function End for 3.127.2. SRAI8.u ===== */
9894
9895 /* ===== Inline Function Start for 3.128.1. SRA16 ===== */
9896 /**
9897 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
9898 * \brief SRA16 (SIMD 16-bit Shift Right Arithmetic)
9899 * \details
9900 * **Type**: SIMD
9901 *
9902 * **Syntax**:\n
9903 * ~~~
9904 * SRA16 Rd, Rs1, Rs2
9905 * SRA16.u Rd, Rs1, Rs2
9906 * ~~~
9907 *
9908 * **Purpose**:\n
9909 * Do 16-bit element arithmetic right shift operations simultaneously. The shift amount is a
9910 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
9911 * results.
9912 *
9913 * **Description**:\n
9914 * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
9915 * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
9916 * 4-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
9917 * added to the most significant discarded bit of each 16-bit data element to calculate the final results.
9918 * And the results are written to Rd.
9919 *
9920 * **Operations**:\n
9921 * ~~~
9922 * sa = Rs2[3:0];
9923 * if (sa != 0) {
9924 * if (`.u` form) { // SRA16.u
9925 * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
9926 * Rd.H[x] = res[15:0];
9927 * } else { // SRA16
9928 * Rd.H[x] = SE16(Rs1.H[x][15:sa])
9929 * }
9930 * } else {
9931 * Rd = Rs1;
9932 * }
9933 * for RV32: x=1...0,
9934 * for RV64: x=3...0
9935 * ~~~
9936 *
9937 * \param [in] a unsigned long type of value stored in a
9938 * \param [in] b unsigned long type of value stored in b
9939 * \return value stored in unsigned long type
9940 */
__RV_SRA16(unsigned long a,unsigned long b)9941 __STATIC_FORCEINLINE unsigned long __RV_SRA16(unsigned long a, unsigned long b)
9942 {
9943 register unsigned long result;
9944 __ASM volatile("sra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
9945 return result;
9946 }
9947 /* ===== Inline Function End for 3.128.1. SRA16 ===== */
9948
9949 /* ===== Inline Function Start for 3.128.2. SRA16.u ===== */
9950 /**
9951 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
9952 * \brief SRA16.u (SIMD 16-bit Rounding Shift Right Arithmetic)
9953 * \details
9954 * **Type**: SIMD
9955 *
9956 * **Syntax**:\n
9957 * ~~~
9958 * SRA16 Rd, Rs1, Rs2
9959 * SRA16.u Rd, Rs1, Rs2
9960 * ~~~
9961 *
9962 * **Purpose**:\n
9963 * Do 16-bit element arithmetic right shift operations simultaneously. The shift amount is a
9964 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
9965 * results.
9966 *
9967 * **Description**:\n
9968 * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
9969 * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
9970 * 4-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
9971 * added to the most significant discarded bit of each 16-bit data element to calculate the final results.
9972 * And the results are written to Rd.
9973 *
9974 * **Operations**:\n
9975 * ~~~
9976 * sa = Rs2[3:0];
9977 * if (sa != 0) {
9978 * if (`.u` form) { // SRA16.u
9979 * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
9980 * Rd.H[x] = res[15:0];
9981 * } else { // SRA16
9982 * Rd.H[x] = SE16(Rs1.H[x][15:sa])
9983 * }
9984 * } else {
9985 * Rd = Rs1;
9986 * }
9987 * for RV32: x=1...0,
9988 * for RV64: x=3...0
9989 * ~~~
9990 *
9991 * \param [in] a unsigned long type of value stored in a
9992 * \param [in] b unsigned long type of value stored in b
9993 * \return value stored in unsigned long type
9994 */
__RV_SRA16_U(unsigned long a,unsigned long b)9995 __STATIC_FORCEINLINE unsigned long __RV_SRA16_U(unsigned long a, unsigned long b)
9996 {
9997 register unsigned long result;
9998 __ASM volatile("sra16.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
9999 return result;
10000 }
10001 /* ===== Inline Function End for 3.128.2. SRA16.u ===== */
10002
10003 /* ===== Inline Function Start for 3.129.1. SRAI16 ===== */
10004 /**
10005 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
10006 * \brief SRAI16 (SIMD 16-bit Shift Right Arithmetic Immediate)
10007 * \details
10008 * **Type**: SIMD
10009 *
10010 * **Syntax**:\n
10011 * ~~~
10012 * SRAI16 Rd, Rs1, imm4u
10013 * SRAI16.u Rd, Rs1, imm4u
10014 * ~~~
10015 *
10016 * **Purpose**:\n
10017 * Do 16-bit elements arithmetic right shift operations simultaneously. The shift amount is
10018 * an immediate value. The `.u` form performs additional rounding up operations on the shifted
10019 * results.
10020 *
10021 * **Description**:\n
10022 * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
10023 * bits are filled with the sign-bit of the 16-bit data elements. The shift amount is specified by the
10024 * imm4u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most
10025 * significant discarded bit of each 16-bit data to calculate the final results. And the results are written
10026 * to Rd.
10027 *
10028 * **Operations**:\n
10029 * ~~~
10030 * sa = imm4u[3:0];
10031 * if (sa > 0) {
10032 * if (`.u` form) { // SRAI16.u
10033 * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
10034 * Rd.H[x] = res[15:0];
10035 * } else { // SRAI16
10036 * Rd.H[x] = SE16(Rs1.H[x][15:sa]);
10037 * }
10038 * } else {
10039 * Rd = Rs1;
10040 * }
10041 * for RV32: x=1...0,
10042 * for RV64: x=3...0
10043 * ~~~
10044 *
10045 * \param [in] a unsigned long type of value stored in a
10046 * \param [in] b unsigned long type of value stored in b
10047 * \return value stored in unsigned long type
10048 */
10049 #define __RV_SRAI16(a, b) \
10050 ({ \
10051 register unsigned long result; \
10052 register unsigned long __a = (unsigned long)(a); \
10053 __ASM volatile("srai16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
10054 result; \
10055 })
10056 /* ===== Inline Function End for 3.129.1. SRAI16 ===== */
10057
10058 /* ===== Inline Function Start for 3.129.2. SRAI16.u ===== */
10059 /**
10060 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
10061 * \brief SRAI16.u (SIMD 16-bit Rounding Shift Right Arithmetic Immediate)
10062 * \details
10063 * **Type**: SIMD
10064 *
10065 * **Syntax**:\n
10066 * ~~~
10067 * SRAI16 Rd, Rs1, imm4u
10068 * SRAI16.u Rd, Rs1, imm4u
10069 * ~~~
10070 *
10071 * **Purpose**:\n
10072 * Do 16-bit elements arithmetic right shift operations simultaneously. The shift amount is
10073 * an immediate value. The `.u` form performs additional rounding up operations on the shifted
10074 * results.
10075 *
10076 * **Description**:\n
10077 * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
10078 * bits are filled with the sign-bit of the 16-bit data elements. The shift amount is specified by the
10079 * imm4u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most
10080 * significant discarded bit of each 16-bit data to calculate the final results. And the results are written
10081 * to Rd.
10082 *
10083 * **Operations**:\n
10084 * ~~~
10085 * sa = imm4u[3:0];
10086 * if (sa > 0) {
10087 * if (`.u` form) { // SRAI16.u
10088 * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
10089 * Rd.H[x] = res[15:0];
10090 * } else { // SRAI16
10091 * Rd.H[x] = SE16(Rs1.H[x][15:sa]);
10092 * }
10093 * } else {
10094 * Rd = Rs1;
10095 * }
10096 * for RV32: x=1...0,
10097 * for RV64: x=3...0
10098 * ~~~
10099 *
10100 * \param [in] a unsigned long type of value stored in a
10101 * \param [in] b unsigned long type of value stored in b
10102 * \return value stored in unsigned long type
10103 */
10104 #define __RV_SRAI16_U(a, b) \
10105 ({ \
10106 register unsigned long result; \
10107 register unsigned long __a = (unsigned long)(a); \
10108 __ASM volatile("srai16.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
10109 result; \
10110 })
10111 /* ===== Inline Function End for 3.129.2. SRAI16.u ===== */
10112
10113 /* ===== Inline Function Start for 3.130.1. SRL8 ===== */
10114 /**
10115 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
10116 * \brief SRL8 (SIMD 8-bit Shift Right Logical)
10117 * \details
10118 * **Type**: SIMD
10119 *
10120 * **Syntax**:\n
10121 * ~~~
10122 * SRL8 Rt, Ra, Rb
10123 * SRL8.u Rt, Ra, Rb
10124 * ~~~
10125 *
10126 * **Purpose**:\n
10127 * Do 8-bit elements logical right shift operations simultaneously. The shift amount is a
10128 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
10129 * results.
10130 *
10131 * **Description**:\n
10132 * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are
10133 * filled with zero. The shift amount is specified by the low-order 3-bits of the value in the Rs2 register.
10134 * For the rounding operation of the `.u` form, a value of 1 is added to the most significant discarded
10135 * bit of each 8-bit data element to calculate the final results. And the results are written to Rd.
10136 *
10137 * **Operations**:\n
10138 * ~~~
10139 * sa = Rs2[2:0];
10140 * if (sa > 0) {
10141 * if (`.u` form) { // SRL8.u
10142 * res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1;
10143 * Rd.B[x] = res[8:1];
10144 * } else { // SRL8
10145 * Rd.B[x] = ZE8(Rs1.B[x][7:sa]);
10146 * }
10147 * } else {
10148 * Rd = Rs1;
10149 * }
10150 * for RV32: x=3...0,
10151 * for RV64: x=7...0
10152 * ~~~
10153 *
10154 * \param [in] a unsigned long type of value stored in a
10155 * \param [in] b unsigned int type of value stored in b
10156 * \return value stored in unsigned long type
10157 */
__RV_SRL8(unsigned long a,unsigned int b)10158 __STATIC_FORCEINLINE unsigned long __RV_SRL8(unsigned long a, unsigned int b)
10159 {
10160 register unsigned long result;
10161 __ASM volatile("srl8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
10162 return result;
10163 }
10164 /* ===== Inline Function End for 3.130.1. SRL8 ===== */
10165
10166 /* ===== Inline Function Start for 3.130.2. SRL8.u ===== */
10167 /**
10168 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
10169 * \brief SRL8.u (SIMD 8-bit Rounding Shift Right Logical)
10170 * \details
10171 * **Type**: SIMD
10172 *
10173 * **Syntax**:\n
10174 * ~~~
10175 * SRL8 Rt, Ra, Rb
10176 * SRL8.u Rt, Ra, Rb
10177 * ~~~
10178 *
10179 * **Purpose**:\n
10180 * Do 8-bit elements logical right shift operations simultaneously. The shift amount is a
10181 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
10182 * results.
10183 *
10184 * **Description**:\n
10185 * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are
10186 * filled with zero. The shift amount is specified by the low-order 3-bits of the value in the Rs2 register.
10187 * For the rounding operation of the `.u` form, a value of 1 is added to the most significant discarded
10188 * bit of each 8-bit data element to calculate the final results. And the results are written to Rd.
10189 *
10190 * **Operations**:\n
10191 * ~~~
10192 * sa = Rs2[2:0];
10193 * if (sa > 0) {
10194 * if (`.u` form) { // SRL8.u
10195 * res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1;
10196 * Rd.B[x] = res[8:1];
10197 * } else { // SRL8
10198 * Rd.B[x] = ZE8(Rs1.B[x][7:sa]);
10199 * }
10200 * } else {
10201 * Rd = Rs1;
10202 * }
10203 * for RV32: x=3...0,
10204 * for RV64: x=7...0
10205 * ~~~
10206 *
10207 * \param [in] a unsigned long type of value stored in a
10208 * \param [in] b unsigned int type of value stored in b
10209 * \return value stored in unsigned long type
10210 */
__RV_SRL8_U(unsigned long a,unsigned int b)10211 __STATIC_FORCEINLINE unsigned long __RV_SRL8_U(unsigned long a, unsigned int b)
10212 {
10213 register unsigned long result;
10214 __ASM volatile("srl8.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
10215 return result;
10216 }
10217 /* ===== Inline Function End for 3.130.2. SRL8.u ===== */
10218
10219 /* ===== Inline Function Start for 3.131.1. SRLI8 ===== */
10220 /**
10221 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
10222 * \brief SRLI8 (SIMD 8-bit Shift Right Logical Immediate)
10223 * \details
10224 * **Type**: SIMD
10225 *
10226 * **Syntax**:\n
10227 * ~~~
10228 * SRLI8 Rt, Ra, imm3u
10229 * SRLI8.u Rt, Ra, imm3u
10230 * ~~~
10231 *
10232 * **Purpose**:\n
10233 * Do 8-bit elements logical right shift operations simultaneously. The shift amount is an
10234 * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
10235 *
10236 * **Description**:\n
10237 * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are
10238 * filled with zero. The shift amount is specified by the imm3u constant. For the rounding operation of
10239 * the `.u` form, a value of 1 is added to the most significant discarded bit of each 8-bit data element to
10240 * calculate the final results. And the results are written to Rd.
10241 *
10242 * **Operations**:\n
10243 * ~~~
10244 * sa = imm3u[2:0];
10245 * if (sa > 0) {
10246 * if (`.u` form) { // SRLI8.u
10247 * res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1;
10248 * Rd.B[x] = res[8:1];
10249 * } else { // SRLI8
10250 * Rd.B[x] = ZE8(Rs1.B[x][7:sa]);
10251 * }
10252 * } else {
10253 * Rd = Rs1;
10254 * }
10255 * for RV32: x=3...0,
10256 * for RV64: x=7...0
10257 * ~~~
10258 *
10259 * \param [in] a unsigned long type of value stored in a
10260 * \param [in] b unsigned int type of value stored in b
10261 * \return value stored in unsigned long type
10262 */
10263 #define __RV_SRLI8(a, b) \
10264 ({ \
10265 register unsigned long result; \
10266 register unsigned long __a = (unsigned long)(a); \
10267 __ASM volatile("srli8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
10268 result; \
10269 })
10270 /* ===== Inline Function End for 3.131.1. SRLI8 ===== */
10271
10272 /* ===== Inline Function Start for 3.131.2. SRLI8.u ===== */
10273 /**
10274 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
10275 * \brief SRLI8.u (SIMD 8-bit Rounding Shift Right Logical Immediate)
10276 * \details
10277 * **Type**: SIMD
10278 *
10279 * **Syntax**:\n
10280 * ~~~
10281 * SRLI8 Rt, Ra, imm3u
10282 * SRLI8.u Rt, Ra, imm3u
10283 * ~~~
10284 *
10285 * **Purpose**:\n
10286 * Do 8-bit elements logical right shift operations simultaneously. The shift amount is an
10287 * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
10288 *
10289 * **Description**:\n
10290 * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are
10291 * filled with zero. The shift amount is specified by the imm3u constant. For the rounding operation of
10292 * the `.u` form, a value of 1 is added to the most significant discarded bit of each 8-bit data element to
10293 * calculate the final results. And the results are written to Rd.
10294 *
10295 * **Operations**:\n
10296 * ~~~
10297 * sa = imm3u[2:0];
10298 * if (sa > 0) {
10299 * if (`.u` form) { // SRLI8.u
10300 * res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1;
10301 * Rd.B[x] = res[8:1];
10302 * } else { // SRLI8
10303 * Rd.B[x] = ZE8(Rs1.B[x][7:sa]);
10304 * }
10305 * } else {
10306 * Rd = Rs1;
10307 * }
10308 * for RV32: x=3...0,
10309 * for RV64: x=7...0
10310 * ~~~
10311 *
10312 * \param [in] a unsigned long type of value stored in a
10313 * \param [in] b unsigned int type of value stored in b
10314 * \return value stored in unsigned long type
10315 */
10316 #define __RV_SRLI8_U(a, b) \
10317 ({ \
10318 register unsigned long result; \
10319 register unsigned long __a = (unsigned long)(a); \
10320 __ASM volatile("srli8.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
10321 result; \
10322 })
10323 /* ===== Inline Function End for 3.131.2. SRLI8.u ===== */
10324
10325 /* ===== Inline Function Start for 3.132.1. SRL16 ===== */
10326 /**
10327 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
10328 * \brief SRL16 (SIMD 16-bit Shift Right Logical)
10329 * \details
10330 * **Type**: SIMD
10331 *
10332 * **Syntax**:\n
10333 * ~~~
10334 * SRL16 Rt, Ra, Rb
10335 * SRL16.u Rt, Ra, Rb
10336 * ~~~
10337 *
10338 * **Purpose**:\n
10339 * Do 16-bit elements logical right shift operations simultaneously. The shift amount is a variable from a GPR. The `.u` form performs additional rounding upoperations on the shifted results.
10340 *
10341 * **Description**:\n
10342 * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
10343 * are filled with zero. The shift amount is specified by the low-order 4-bits of the value in the Rs2
10344 * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
10345 * discarded bit of each 16-bit data element to calculate the final results. And the results are written to
10346 * Rd.
10347 *
10348 * **Operations**:\n
10349 * ~~~
10350 * sa = Rs2[3:0];
10351 * if (sa > 0) {
10352 * if (`.u` form) { // SRL16.u
10353 * res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1;
10354 * Rd.H[x] = res[16:1];
10355 * } else { // SRL16
10356 * Rd.H[x] = ZE16(Rs1.H[x][15:sa]);
10357 * }
10358 * } else {
10359 * Rd = Rs1;
10360 * }
10361 * for RV32: x=1...0,
10362 * for RV64: x=3...0
10363 * ~~~
10364 *
10365 * \param [in] a unsigned long type of value stored in a
10366 * \param [in] b unsigned int type of value stored in b
10367 * \return value stored in unsigned long type
10368 */
__RV_SRL16(unsigned long a,unsigned int b)10369 __STATIC_FORCEINLINE unsigned long __RV_SRL16(unsigned long a, unsigned int b)
10370 {
10371 register unsigned long result;
10372 __ASM volatile("srl16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
10373 return result;
10374 }
10375 /* ===== Inline Function End for 3.132.1. SRL16 ===== */
10376
10377 /* ===== Inline Function Start for 3.132.2. SRL16.u ===== */
10378 /**
10379 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
10380 * \brief SRL16.u (SIMD 16-bit Rounding Shift Right Logical)
10381 * \details
10382 * **Type**: SIMD
10383 *
10384 * **Syntax**:\n
10385 * ~~~
10386 * SRL16 Rt, Ra, Rb
10387 * SRL16.u Rt, Ra, Rb
10388 * ~~~
10389 *
10390 * **Purpose**:\n
10391 * Do 16-bit elements logical right shift operations simultaneously. The shift amount is a variable from a GPR. The `.u` form performs additional rounding upoperations on the shifted results.
10392 *
10393 * **Description**:\n
10394 * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
10395 * are filled with zero. The shift amount is specified by the low-order 4-bits of the value in the Rs2
10396 * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
10397 * discarded bit of each 16-bit data element to calculate the final results. And the results are written to
10398 * Rd.
10399 *
10400 * **Operations**:\n
10401 * ~~~
10402 * sa = Rs2[3:0];
10403 * if (sa > 0) {
10404 * if (`.u` form) { // SRL16.u
10405 * res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1;
10406 * Rd.H[x] = res[16:1];
10407 * } else { // SRL16
10408 * Rd.H[x] = ZE16(Rs1.H[x][15:sa]);
10409 * }
10410 * } else {
10411 * Rd = Rs1;
10412 * }
10413 * for RV32: x=1...0,
10414 * for RV64: x=3...0
10415 * ~~~
10416 *
10417 * \param [in] a unsigned long type of value stored in a
10418 * \param [in] b unsigned int type of value stored in b
10419 * \return value stored in unsigned long type
10420 */
__RV_SRL16_U(unsigned long a,unsigned int b)10421 __STATIC_FORCEINLINE unsigned long __RV_SRL16_U(unsigned long a, unsigned int b)
10422 {
10423 register unsigned long result;
10424 __ASM volatile("srl16.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
10425 return result;
10426 }
10427 /* ===== Inline Function End for 3.132.2. SRL16.u ===== */
10428
10429 /* ===== Inline Function Start for 3.133.1. SRLI16 ===== */
10430 /**
10431 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
10432 * \brief SRLI16 (SIMD 16-bit Shift Right Logical Immediate)
10433 * \details
10434 * **Type**: SIMD
10435 *
10436 * **Syntax**:\n
10437 * ~~~
10438 * SRLI16 Rt, Ra, imm4u
10439 * SRLI16.u Rt, Ra, imm4u
10440 * ~~~
10441 *
10442 * **Purpose**:\n
10443 * Do 16-bit elements logical right shift operations simultaneously. The shift amount is an
10444 * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
10445 *
10446 * **Description**:\n
10447 * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
10448 * are filled with zero. The shift amount is specified by the imm4u constant. For the rounding
10449 * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 16-bit
10450 * data element to calculate the final results. And the results are written to Rd.
10451 *
10452 * **Operations**:\n
10453 * ~~~
10454 * sa = imm4u;
10455 * if (sa > 0) {
10456 * if (`.u` form) { // SRLI16.u
10457 * res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1;
10458 * Rd.H[x] = res[16:1];
10459 * } else { // SRLI16
10460 * Rd.H[x] = ZE16(Rs1.H[x][15:sa]);
10461 * }
10462 * } else {
10463 * Rd = Rs1;
10464 * }
10465 * for RV32: x=1...0,
10466 * for RV64: x=3...0
10467 * ~~~
10468 *
10469 * \param [in] a unsigned long type of value stored in a
10470 * \param [in] b unsigned int type of value stored in b
10471 * \return value stored in unsigned long type
10472 */
10473 #define __RV_SRLI16(a, b) \
10474 ({ \
10475 register unsigned long result; \
10476 register unsigned long __a = (unsigned long)(a); \
10477 __ASM volatile("srli16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
10478 result; \
10479 })
10480 /* ===== Inline Function End for 3.133.1. SRLI16 ===== */
10481
10482 /* ===== Inline Function Start for 3.133.2. SRLI16.u ===== */
10483 /**
10484 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
10485 * \brief SRLI16.u (SIMD 16-bit Rounding Shift Right Logical Immediate)
10486 * \details
10487 * **Type**: SIMD
10488 *
10489 * **Syntax**:\n
10490 * ~~~
10491 * SRLI16 Rt, Ra, imm4u
10492 * SRLI16.u Rt, Ra, imm4u
10493 * ~~~
10494 *
10495 * **Purpose**:\n
10496 * Do 16-bit elements logical right shift operations simultaneously. The shift amount is an
10497 * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
10498 *
10499 * **Description**:\n
10500 * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
10501 * are filled with zero. The shift amount is specified by the imm4u constant. For the rounding
10502 * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 16-bit
10503 * data element to calculate the final results. And the results are written to Rd.
10504 *
10505 * **Operations**:\n
10506 * ~~~
10507 * sa = imm4u;
10508 * if (sa > 0) {
10509 * if (`.u` form) { // SRLI16.u
10510 * res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1;
10511 * Rd.H[x] = res[16:1];
10512 * } else { // SRLI16
10513 * Rd.H[x] = ZE16(Rs1.H[x][15:sa]);
10514 * }
10515 * } else {
10516 * Rd = Rs1;
10517 * }
10518 * for RV32: x=1...0,
10519 * for RV64: x=3...0
10520 * ~~~
10521 *
10522 * \param [in] a unsigned long type of value stored in a
10523 * \param [in] b unsigned int type of value stored in b
10524 * \return value stored in unsigned long type
10525 */
10526 #define __RV_SRLI16_U(a, b) \
10527 ({ \
10528 register unsigned long result; \
10529 register unsigned long __a = (unsigned long)(a); \
10530 __ASM volatile("srli16.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
10531 result; \
10532 })
10533 /* ===== Inline Function End for 3.133.2. SRLI16.u ===== */
10534
10535 /* ===== Inline Function Start for 3.134. STAS16 ===== */
10536 /**
10537 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
10538 * \brief STAS16 (SIMD 16-bit Straight Addition & Subtraction)
10539 * \details
10540 * **Type**: SIMD
10541 *
10542 * **Syntax**:\n
10543 * ~~~
10544 * STAS16 Rd, Rs1, Rs2
10545 * ~~~
10546 *
10547 * **Purpose**:\n
10548 * Do 16-bit integer element addition and 16-bit integer element subtraction in a 32-bit
10549 * chunk simultaneously. Operands are from corresponding positions in 32-bit chunks.
10550 *
10551 * **Description**:\n
10552 * This instruction adds the 16-bit integer element in [31:16] of 32-bit chunks in Rs1 with
10553 * the 16-bit integer element in [31:16] of 32-bit chunks in Rs2, and writes the result to [31:16] of 32-bit
10554 * chunks in Rd; at the same time, it subtracts the 16-bit integer element in [15:0] of 32-bit chunks in
10555 * Rs2 from the 16-bit integer element in [15:0] of 32-bit chunks, and writes the result to [15:0] of 32-
10556 * bit chunks in Rd.
10557 *
10558 * **Note**:\n
10559 * This instruction can be used for either signed or unsigned operations.
10560 *
10561 * **Operations**:\n
10562 * ~~~
10563 * Rd.W[x][31:16] = Rs1.W[x][31:16] + Rs2.W[x][31:16];
10564 * Rd.W[x][15:0] = Rs1.W[x][15:0] - Rs2.W[x][15:0];
10565 * for RV32, x=0
10566 * for RV64, x=1...0
10567 * ~~~
10568 *
10569 * \param [in] a unsigned long type of value stored in a
10570 * \param [in] b unsigned long type of value stored in b
10571 * \return value stored in unsigned long type
10572 */
__RV_STAS16(unsigned long a,unsigned long b)10573 __STATIC_FORCEINLINE unsigned long __RV_STAS16(unsigned long a, unsigned long b)
10574 {
10575 register unsigned long result;
10576 __ASM volatile("stas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
10577 return result;
10578 }
10579 /* ===== Inline Function End for 3.134. STAS16 ===== */
10580
10581 /* ===== Inline Function Start for 3.135. STSA16 ===== */
10582 /**
10583 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
10584 * \brief STSA16 (SIMD 16-bit Straight Subtraction & Addition)
10585 * \details
10586 * **Type**: SIMD
10587 *
10588 * **Syntax**:\n
10589 * ~~~
10590 * STSA16 Rd, Rs1, Rs2
10591 * ~~~
10592 *
10593 * **Purpose**:\n
10594 * Do 16-bit integer element subtraction and 16-bit integer element addition in a 32-bit
10595 * chunk simultaneously. Operands are from corresponding positions in 32-bit chunks.
10596 *
10597 * **Description**:\n
10598 * This instruction subtracts the 16-bit integer element in [31:16] of 32-bit chunks in Rs2
10599 * from the 16-bit integer element in [31:16] of 32-bit chunks in Rs1, and writes the result to [31:16] of
10600 * 32-bit chunks in Rd; at the same time, it adds the 16-bit integer element in [15:0] of 32-bit chunks in
10601 * Rs2 with the 16-bit integer element in [15:0] of 32-bit chunks in Rs1, and writes the result to [15:0] of
10602 * 32-bit chunks in Rd.
10603 *
10604 * **Note**:\n
10605 * This instruction can be used for either signed or unsigned operations.
10606 *
10607 * **Operations**:\n
10608 * ~~~
10609 * Rd.W[x][31:16] = Rs1.W[x][31:16] - Rs2.W[x][31:16];
10610 * Rd.W[x][15:0] = Rs1.W[x][15:0] + Rs2.W[x][15:0];
10611 * for RV32, x=0
10612 * for RV64, x=1...0
10613 * ~~~
10614 *
10615 * \param [in] a unsigned long type of value stored in a
10616 * \param [in] b unsigned long type of value stored in b
10617 * \return value stored in unsigned long type
10618 */
__RV_STSA16(unsigned long a,unsigned long b)10619 __STATIC_FORCEINLINE unsigned long __RV_STSA16(unsigned long a, unsigned long b)
10620 {
10621 register unsigned long result;
10622 __ASM volatile("stsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
10623 return result;
10624 }
10625 /* ===== Inline Function End for 3.135. STSA16 ===== */
10626
10627 /* ===== Inline Function Start for 3.136. SUB8 ===== */
10628 /**
10629 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
10630 * \brief SUB8 (SIMD 8-bit Subtraction)
10631 * \details
10632 * **Type**: SIMD
10633 *
10634 * **Syntax**:\n
10635 * ~~~
10636 * SUB8 Rd, Rs1, Rs2
10637 * ~~~
10638 *
10639 * **Purpose**:\n
10640 * Do 8-bit integer element subtractions simultaneously.
10641 *
10642 * **Description**:\n
10643 * This instruction subtracts the 8-bit integer elements in Rs2 from the 8-bit integer
10644 * elements in Rs1, and then writes the result to Rd.
10645 *
10646 * **Note**:\n
10647 * This instruction can be used for either signed or unsigned subtraction.
10648 *
10649 * **Operations**:\n
10650 * ~~~
10651 * Rd.B[x] = Rs1.B[x] - Rs2.B[x];
10652 * for RV32: x=3...0,
10653 * for RV64: x=7...0
10654 * ~~~
10655 *
10656 * \param [in] a unsigned long type of value stored in a
10657 * \param [in] b unsigned long type of value stored in b
10658 * \return value stored in unsigned long type
10659 */
__RV_SUB8(unsigned long a,unsigned long b)10660 __STATIC_FORCEINLINE unsigned long __RV_SUB8(unsigned long a, unsigned long b)
10661 {
10662 register unsigned long result;
10663 __ASM volatile("sub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
10664 return result;
10665 }
10666 /* ===== Inline Function End for 3.136. SUB8 ===== */
10667
10668 /* ===== Inline Function Start for 3.137. SUB16 ===== */
10669 /**
10670 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
10671 * \brief SUB16 (SIMD 16-bit Subtraction)
10672 * \details
10673 * **Type**: SIMD
10674 *
10675 * **Syntax**:\n
10676 * ~~~
10677 * SUB16 Rd, Rs1, Rs2
10678 * ~~~
10679 *
10680 * **Purpose**:\n
10681 * Do 16-bit integer element subtractions simultaneously.
10682 *
10683 * **Description**:\n
10684 * This instruction subtracts the 16-bit integer elements in Rs2 from the 16-bit integer
10685 * elements in Rs1, and then writes the result to Rd.
10686 *
10687 * **Note**:\n
10688 * This instruction can be used for either signed or unsigned subtraction.
10689 *
10690 * **Operations**:\n
10691 * ~~~
10692 * Rd.H[x] = Rs1.H[x] - Rs2.H[x];
10693 * for RV32: x=1...0,
10694 * for RV64: x=3...0
10695 * ~~~
10696 *
10697 * \param [in] a unsigned long type of value stored in a
10698 * \param [in] b unsigned long type of value stored in b
10699 * \return value stored in unsigned long type
10700 */
__RV_SUB16(unsigned long a,unsigned long b)10701 __STATIC_FORCEINLINE unsigned long __RV_SUB16(unsigned long a, unsigned long b)
10702 {
10703 register unsigned long result;
10704 __ASM volatile("sub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
10705 return result;
10706 }
10707 /* ===== Inline Function End for 3.137. SUB16 ===== */
10708
10709 /* ===== Inline Function Start for 3.138. SUB64 ===== */
10710 /**
10711 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
10712 * \brief SUB64 (64-bit Subtraction)
10713 * \details
10714 * **Type**: DSP (64-bit Profile)
10715 *
10716 * **Syntax**:\n
10717 * ~~~
10718 * SUB64 Rd, Rs1, Rs2
10719 * ~~~
10720 *
10721 * **Purpose**:\n
10722 * Perform a 64-bit signed or unsigned integer subtraction.
10723 *
10724 * **RV32 Description**:\n
10725 * This instruction subtracts the 64-bit integer of an even/odd pair of registers
10726 * specified by Rs2(4,1) from the 64-bit integer of an even/odd pair of registers specified by Rs1(4,1),
10727 * and then writes the 64-bit result to an even/odd pair of registers specified by Rd(4,1).
10728 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
10729 * includes register 2d and 2d+1.
10730 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
10731 * register of the pair contains the low 32-bit of the operand.
10732 *
10733 * **RV64 Description**:\n
10734 * This instruction subtracts the 64-bit integer of Rs2 from the 64-bit integer of Rs1,
10735 * and then writes the 64-bit result to Rd.
10736 *
10737 * **Note**:\n
10738 * This instruction can be used for either signed or unsigned subtraction.
10739 *
10740 * **Operations**:\n
10741 * ~~~
10742 * * RV32:
10743 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
10744 * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
10745 * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
10746 * R[t_H].R[t_L] = R[a_H].R[a_L] - R[b_H].R[b_L];
10747 * * RV64:
10748 * Rd = Rs1 - Rs2;
10749 * ~~~
10750 *
10751 * \param [in] a unsigned long long type of value stored in a
10752 * \param [in] b unsigned long long type of value stored in b
10753 * \return value stored in unsigned long long type
10754 */
__RV_SUB64(unsigned long long a,unsigned long long b)10755 __STATIC_FORCEINLINE unsigned long long __RV_SUB64(unsigned long long a, unsigned long long b)
10756 {
10757 register unsigned long long result;
10758 __ASM volatile("sub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
10759 return result;
10760 }
10761 /* ===== Inline Function End for 3.138. SUB64 ===== */
10762
10763 /* ===== Inline Function Start for 3.139.1. SUNPKD810 ===== */
10764 /**
10765 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
10766 * \brief SUNPKD810 (Signed Unpacking Bytes 1 & 0)
10767 * \details
10768 * **Type**: DSP
10769 *
10770 * **Syntax**:\n
10771 * ~~~
10772 * SUNPKD8xy Rd, Rs1
10773 * xy = {10, 20, 30, 31, 32}
10774 * ~~~
10775 *
10776 * **Purpose**:\n
10777 * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
10778 * of 32-bit chunks in a register.
10779 *
10780 * **Description**:\n
10781 * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
10782 * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
10783 * chunks in Rd.
10784 *
10785 * **Operations**:\n
10786 * ~~~
10787 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
10788 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
10789 * // SUNPKD810, x=1,y=0
10790 * // SUNPKD820, x=2,y=0
10791 * // SUNPKD830, x=3,y=0
10792 * // SUNPKD831, x=3,y=1
10793 * // SUNPKD832, x=3,y=2
10794 * for RV32: m=0,
10795 * for RV64: m=1...0
10796 * ~~~
10797 *
10798 * \param [in] a unsigned long type of value stored in a
10799 * \return value stored in unsigned long type
10800 */
__RV_SUNPKD810(unsigned long a)10801 __STATIC_FORCEINLINE unsigned long __RV_SUNPKD810(unsigned long a)
10802 {
10803 register unsigned long result;
10804 __ASM volatile("sunpkd810 %0, %1" : "=r"(result) : "r"(a));
10805 return result;
10806 }
10807 /* ===== Inline Function End for 3.139.1. SUNPKD810 ===== */
10808
10809 /* ===== Inline Function Start for 3.139.2. SUNPKD820 ===== */
10810 /**
10811 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
10812 * \brief SUNPKD820 (Signed Unpacking Bytes 2 & 0)
10813 * \details
10814 * **Type**: DSP
10815 *
10816 * **Syntax**:\n
10817 * ~~~
10818 * SUNPKD8xy Rd, Rs1
10819 * xy = {10, 20, 30, 31, 32}
10820 * ~~~
10821 *
10822 * **Purpose**:\n
10823 * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
10824 * of 32-bit chunks in a register.
10825 *
10826 * **Description**:\n
10827 * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
10828 * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
10829 * chunks in Rd.
10830 *
10831 * **Operations**:\n
10832 * ~~~
10833 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
10834 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
10835 * // SUNPKD810, x=1,y=0
10836 * // SUNPKD820, x=2,y=0
10837 * // SUNPKD830, x=3,y=0
10838 * // SUNPKD831, x=3,y=1
10839 * // SUNPKD832, x=3,y=2
10840 * for RV32: m=0,
10841 * for RV64: m=1...0
10842 * ~~~
10843 *
10844 * \param [in] a unsigned long type of value stored in a
10845 * \return value stored in unsigned long type
10846 */
__RV_SUNPKD820(unsigned long a)10847 __STATIC_FORCEINLINE unsigned long __RV_SUNPKD820(unsigned long a)
10848 {
10849 register unsigned long result;
10850 __ASM volatile("sunpkd820 %0, %1" : "=r"(result) : "r"(a));
10851 return result;
10852 }
10853 /* ===== Inline Function End for 3.139.2. SUNPKD820 ===== */
10854
10855 /* ===== Inline Function Start for 3.139.3. SUNPKD830 ===== */
10856 /**
10857 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
10858 * \brief SUNPKD830 (Signed Unpacking Bytes 3 & 0)
10859 * \details
10860 * **Type**: DSP
10861 *
10862 * **Syntax**:\n
10863 * ~~~
10864 * SUNPKD8xy Rd, Rs1
10865 * xy = {10, 20, 30, 31, 32}
10866 * ~~~
10867 *
10868 * **Purpose**:\n
10869 * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
10870 * of 32-bit chunks in a register.
10871 *
10872 * **Description**:\n
10873 * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
10874 * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
10875 * chunks in Rd.
10876 *
10877 * **Operations**:\n
10878 * ~~~
10879 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
10880 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
10881 * // SUNPKD810, x=1,y=0
10882 * // SUNPKD820, x=2,y=0
10883 * // SUNPKD830, x=3,y=0
10884 * // SUNPKD831, x=3,y=1
10885 * // SUNPKD832, x=3,y=2
10886 * for RV32: m=0,
10887 * for RV64: m=1...0
10888 * ~~~
10889 *
10890 * \param [in] a unsigned long type of value stored in a
10891 * \return value stored in unsigned long type
10892 */
__RV_SUNPKD830(unsigned long a)10893 __STATIC_FORCEINLINE unsigned long __RV_SUNPKD830(unsigned long a)
10894 {
10895 register unsigned long result;
10896 __ASM volatile("sunpkd830 %0, %1" : "=r"(result) : "r"(a));
10897 return result;
10898 }
10899 /* ===== Inline Function End for 3.139.3. SUNPKD830 ===== */
10900
10901 /* ===== Inline Function Start for 3.139.4. SUNPKD831 ===== */
10902 /**
10903 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
10904 * \brief SUNPKD831 (Signed Unpacking Bytes 3 & 1)
10905 * \details
10906 * **Type**: DSP
10907 *
10908 * **Syntax**:\n
10909 * ~~~
10910 * SUNPKD8xy Rd, Rs1
10911 * xy = {10, 20, 30, 31, 32}
10912 * ~~~
10913 *
10914 * **Purpose**:\n
10915 * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
10916 * of 32-bit chunks in a register.
10917 *
10918 * **Description**:\n
10919 * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
10920 * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
10921 * chunks in Rd.
10922 *
10923 * **Operations**:\n
10924 * ~~~
10925 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
10926 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
10927 * // SUNPKD810, x=1,y=0
10928 * // SUNPKD820, x=2,y=0
10929 * // SUNPKD830, x=3,y=0
10930 * // SUNPKD831, x=3,y=1
10931 * // SUNPKD832, x=3,y=2
10932 * for RV32: m=0,
10933 * for RV64: m=1...0
10934 * ~~~
10935 *
10936 * \param [in] a unsigned long type of value stored in a
10937 * \return value stored in unsigned long type
10938 */
__RV_SUNPKD831(unsigned long a)10939 __STATIC_FORCEINLINE unsigned long __RV_SUNPKD831(unsigned long a)
10940 {
10941 register unsigned long result;
10942 __ASM volatile("sunpkd831 %0, %1" : "=r"(result) : "r"(a));
10943 return result;
10944 }
10945 /* ===== Inline Function End for 3.139.4. SUNPKD831 ===== */
10946
10947 /* ===== Inline Function Start for 3.139.5. SUNPKD832 ===== */
10948 /**
10949 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
10950 * \brief SUNPKD832 (Signed Unpacking Bytes 3 & 2)
10951 * \details
10952 * **Type**: DSP
10953 *
10954 * **Syntax**:\n
10955 * ~~~
10956 * SUNPKD8xy Rd, Rs1
10957 * xy = {10, 20, 30, 31, 32}
10958 * ~~~
10959 *
10960 * **Purpose**:\n
10961 * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
10962 * of 32-bit chunks in a register.
10963 *
10964 * **Description**:\n
10965 * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
10966 * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
10967 * chunks in Rd.
10968 *
10969 * **Operations**:\n
10970 * ~~~
10971 * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
10972 * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
10973 * // SUNPKD810, x=1,y=0
10974 * // SUNPKD820, x=2,y=0
10975 * // SUNPKD830, x=3,y=0
10976 * // SUNPKD831, x=3,y=1
10977 * // SUNPKD832, x=3,y=2
10978 * for RV32: m=0,
10979 * for RV64: m=1...0
10980 * ~~~
10981 *
10982 * \param [in] a unsigned long type of value stored in a
10983 * \return value stored in unsigned long type
10984 */
__RV_SUNPKD832(unsigned long a)10985 __STATIC_FORCEINLINE unsigned long __RV_SUNPKD832(unsigned long a)
10986 {
10987 register unsigned long result;
10988 __ASM volatile("sunpkd832 %0, %1" : "=r"(result) : "r"(a));
10989 return result;
10990 }
10991 /* ===== Inline Function End for 3.139.5. SUNPKD832 ===== */
10992
10993 /* ===== Inline Function Start for 3.140. SWAP8 ===== */
10994 /**
10995 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
10996 * \brief SWAP8 (Swap Byte within Halfword)
10997 * \details
10998 * **Type**: DSP
10999 *
11000 * **Syntax**:\n
11001 * ~~~
11002 * SWAP8 Rd, Rs1
11003 * ~~~
11004 *
11005 * **Purpose**:\n
11006 * Swap the bytes within each halfword of a register.
11007 *
11008 * **Description**:\n
11009 * This instruction swaps the bytes within each halfword of Rs1 and writes the result to
11010 * Rd.
11011 *
11012 * **Operations**:\n
11013 * ~~~
11014 * Rd.H[x] = CONCAT(Rs1.H[x][7:0],Rs1.H[x][15:8]);
11015 * for RV32: x=1...0,
11016 * for RV64: x=3...0
11017 * ~~~
11018 *
11019 * \param [in] a unsigned long type of value stored in a
11020 * \return value stored in unsigned long type
11021 */
__RV_SWAP8(unsigned long a)11022 __STATIC_FORCEINLINE unsigned long __RV_SWAP8(unsigned long a)
11023 {
11024 register unsigned long result;
11025 __ASM volatile("swap8 %0, %1" : "=r"(result) : "r"(a));
11026 return result;
11027 }
11028 /* ===== Inline Function End for 3.140. SWAP8 ===== */
11029
11030 /* ===== Inline Function Start for 3.141. SWAP16 ===== */
11031 /**
11032 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
11033 * \brief SWAP16 (Swap Halfword within Word)
11034 * \details
11035 * **Type**: DSP
11036 *
11037 * **Syntax**:\n
11038 * ~~~
11039 * SWAP16 Rd, Rs1
11040 * ~~~
11041 *
11042 * **Purpose**:\n
11043 * Swap the 16-bit halfwords within each word of a register.
11044 *
11045 * **Description**:\n
11046 * This instruction swaps the 16-bit halfwords within each word of Rs1 and writes the
11047 * result to Rd.
11048 *
11049 * **Operations**:\n
11050 * ~~~
11051 * Rd.W[x] = CONCAT(Rs1.W[x][15:0],Rs1.H[x][31:16]);
11052 * for RV32: x=0,
11053 * for RV64: x=1...0
11054 * ~~~
11055 *
11056 * \param [in] a unsigned long type of value stored in a
11057 * \return value stored in unsigned long type
11058 */
__RV_SWAP16(unsigned long a)11059 __STATIC_FORCEINLINE unsigned long __RV_SWAP16(unsigned long a)
11060 {
11061 register unsigned long result;
11062 __ASM volatile("swap16 %0, %1" : "=r"(result) : "r"(a));
11063 return result;
11064 }
11065 /* ===== Inline Function End for 3.141. SWAP16 ===== */
11066
11067 /* ===== Inline Function Start for 3.142. UCLIP8 ===== */
11068 /**
11069 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
11070 * \brief UCLIP8 (SIMD 8-bit Unsigned Clip Value)
11071 * \details
11072 * **Type**: SIMD
11073 *
11074 * **Syntax**:\n
11075 * ~~~
11076 * UCLIP8 Rt, Ra, imm3u
11077 * ~~~
11078 *
11079 * **Purpose**:\n
11080 * Limit the 8-bit signed elements of a register into an unsigned range simultaneously.
11081 *
11082 * **Description**:\n
11083 * This instruction limits the 8-bit signed elements stored in Rs1 into an unsigned integer
11084 * range between 2^imm3u-1 and 0, and writes the limited results to Rd. For example, if imm3u is 3, the 8-
11085 * bit input values should be saturated between 7 and 0. If saturation is performed, set OV bit to 1.
11086 *
11087 * **Operations**:\n
11088 * ~~~
11089 * src = Rs1.H[x];
11090 * if (src > (2^imm3u)-1) {
11091 * src = (2^imm3u)-1;
11092 * OV = 1;
11093 * } else if (src < 0) {
11094 * src = 0;
11095 * OV = 1;
11096 * }
11097 * Rd.H[x] = src;
11098 * for RV32: x=3...0,
11099 * for RV64: x=7...0
11100 * ~~~
11101 *
11102 * \param [in] a unsigned long type of value stored in a
11103 * \param [in] b unsigned int type of value stored in b
11104 * \return value stored in unsigned long type
11105 */
11106 #define __RV_UCLIP8(a, b) \
11107 ({ \
11108 register unsigned long result; \
11109 register unsigned long __a = (unsigned long)(a); \
11110 __ASM volatile("uclip8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
11111 result; \
11112 })
11113 /* ===== Inline Function End for 3.142. UCLIP8 ===== */
11114
11115 /* ===== Inline Function Start for 3.143. UCLIP16 ===== */
11116 /**
11117 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
11118 * \brief UCLIP16 (SIMD 16-bit Unsigned Clip Value)
11119 * \details
11120 * **Type**: SIMD
11121 *
11122 * **Syntax**:\n
11123 * ~~~
11124 * UCLIP16 Rt, Ra, imm4u
11125 * ~~~
11126 *
11127 * **Purpose**:\n
11128 * Limit the 16-bit signed elements of a register into an unsigned range simultaneously.
11129 *
11130 * **Description**:\n
11131 * This instruction limits the 16-bit signed elements stored in Rs1 into an unsigned
11132 * integer range between 2imm4u-1 and 0, and writes the limited results to Rd. For example, if imm4u is
11133 * 3, the 16-bit input values should be saturated between 7 and 0. If saturation is performed, set OV bit
11134 * to 1.
11135 *
11136 * **Operations**:\n
11137 * ~~~
11138 * src = Rs1.H[x];
11139 * if (src > (2^imm4u)-1) {
11140 * src = (2^imm4u)-1;
11141 * OV = 1;
11142 * } else if (src < 0) {
11143 * src = 0;
11144 * OV = 1;
11145 * }
11146 * Rd.H[x] = src;
11147 * for RV32: x=1...0,
11148 * for RV64: x=3...0
11149 * ~~~
11150 *
11151 * \param [in] a unsigned long type of value stored in a
11152 * \param [in] b unsigned int type of value stored in b
11153 * \return value stored in unsigned long type
11154 */
11155 #define __RV_UCLIP16(a, b) \
11156 ({ \
11157 register unsigned long result; \
11158 register unsigned long __a = (unsigned long)(a); \
11159 __ASM volatile("uclip16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
11160 result; \
11161 })
11162 /* ===== Inline Function End for 3.143. UCLIP16 ===== */
11163
11164 /* ===== Inline Function Start for 3.144. UCLIP32 ===== */
11165 /**
11166 * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
11167 * \brief UCLIP32 (SIMD 32-bit Unsigned Clip Value)
11168 * \details
11169 * **Type**: SIMD
11170 *
11171 * **Syntax**:\n
11172 * ~~~
11173 * UCLIP32 Rd, Rs1, imm5u[4:0]
11174 * ~~~
11175 *
11176 * **Purpose**:\n
11177 * Limit the 32-bit signed integer elements of a register into an unsigned range
11178 * simultaneously.
11179 *
11180 * **Description**:\n
11181 * This instruction limits the 32-bit signed integer elements stored in Rs1 into an
11182 * unsigned integer range between 2imm5u-1 and 0, and writes the limited results to Rd. For example, if
11183 * imm5u is 3, the 32-bit input values should be saturated between 7 and 0. If saturation is performed,
11184 * set OV bit to 1.
11185 *
11186 * **Operations**:\n
11187 * ~~~
11188 * src = Rs1.W[x];
11189 * if (src > (2^imm5u)-1) {
11190 * src = (2^imm5u)-1;
11191 * OV = 1;
11192 * } else if (src < 0) {
11193 * src = 0;
11194 * OV = 1;
11195 * }
11196 * Rd.W[x] = src
11197 * for RV32: x=0,
11198 * for RV64: x=1...0
11199 * ~~~
11200 *
11201 * \param [in] a unsigned long type of value stored in a
11202 * \param [in] b unsigned int type of value stored in b
11203 * \return value stored in unsigned long type
11204 */
11205 #define __RV_UCLIP32(a, b) \
11206 ({ \
11207 register unsigned long result; \
11208 register unsigned long __a = (unsigned long)(a); \
11209 __ASM volatile("uclip32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
11210 result; \
11211 })
11212 /* ===== Inline Function End for 3.144. UCLIP32 ===== */
11213
11214 /* ===== Inline Function Start for 3.145. UCMPLE8 ===== */
11215 /**
11216 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
11217 * \brief UCMPLE8 (SIMD 8-bit Unsigned Compare Less Than & Equal)
11218 * \details
11219 * **Type**: SIMD
11220 *
11221 * **Syntax**:\n
11222 * ~~~
11223 * UCMPLE8 Rd, Rs1, Rs2
11224 * ~~~
11225 *
11226 * **Purpose**:\n
11227 * Do 8-bit unsigned integer elements less than & equal comparisons simultaneously.
11228 *
11229 * **Description**:\n
11230 * This instruction compares the 8-bit unsigned integer elements in Rs1 with the 8-bit
11231 * unsigned integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it
11232 * is true, the result is 0xFF; otherwise, the result is 0x0. The four comparison results are written to
11233 * Rd.
11234 *
11235 * **Operations**:\n
11236 * ~~~
11237 * Rd.B[x] = (Rs1.B[x] <=u Rs2.B[x])? 0xff : 0x0;
11238 * for RV32: x=3...0,
11239 * for RV64: x=7...0
11240 * ~~~
11241 *
11242 * \param [in] a unsigned long type of value stored in a
11243 * \param [in] b unsigned long type of value stored in b
11244 * \return value stored in unsigned long type
11245 */
__RV_UCMPLE8(unsigned long a,unsigned long b)11246 __STATIC_FORCEINLINE unsigned long __RV_UCMPLE8(unsigned long a, unsigned long b)
11247 {
11248 register unsigned long result;
11249 __ASM volatile("ucmple8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
11250 return result;
11251 }
11252 /* ===== Inline Function End for 3.145. UCMPLE8 ===== */
11253
11254 /* ===== Inline Function Start for 3.146. UCMPLE16 ===== */
11255 /**
11256 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
11257 * \brief UCMPLE16 (SIMD 16-bit Unsigned Compare Less Than & Equal)
11258 * \details
11259 * **Type**: SIMD
11260 *
11261 * **Syntax**:\n
11262 * ~~~
11263 * UCMPLE16 Rd, Rs1, Rs2
11264 * ~~~
11265 *
11266 * **Purpose**:\n
11267 * Do 16-bit unsigned integer elements less than & equal comparisons simultaneously.
11268 *
11269 * **Description**:\n
11270 * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit
11271 * unsigned integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it
11272 * is true, the result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are
11273 * written to Rd.
11274 *
11275 * **Operations**:\n
11276 * ~~~
11277 * Rd.H[x] = (Rs1.H[x] <=u Rs2.H[x])? 0xffff : 0x0;
11278 * for RV32: x=1...0,
11279 * for RV64: x=3...0
11280 * ~~~
11281 *
11282 * \param [in] a unsigned long type of value stored in a
11283 * \param [in] b unsigned long type of value stored in b
11284 * \return value stored in unsigned long type
11285 */
__RV_UCMPLE16(unsigned long a,unsigned long b)11286 __STATIC_FORCEINLINE unsigned long __RV_UCMPLE16(unsigned long a, unsigned long b)
11287 {
11288 register unsigned long result;
11289 __ASM volatile("ucmple16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
11290 return result;
11291 }
11292 /* ===== Inline Function End for 3.146. UCMPLE16 ===== */
11293
11294 /* ===== Inline Function Start for 3.147. UCMPLT8 ===== */
11295 /**
11296 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
11297 * \brief UCMPLT8 (SIMD 8-bit Unsigned Compare Less Than)
11298 * \details
11299 * **Type**: SIMD
11300 *
11301 * **Syntax**:\n
11302 * ~~~
11303 * UCMPLT8 Rd, Rs1, Rs2
11304 * ~~~
11305 *
11306 * **Purpose**:\n
11307 * Do 8-bit unsigned integer elements less than comparisons simultaneously.
11308 *
11309 * **Description**:\n
11310 * This instruction compares the 8-bit unsigned integer elements in Rs1 with the 8-bit
11311 * unsigned integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the
11312 * result is 0xFF; otherwise, the result is 0x0. The element comparison results are written to Rd.
11313 *
11314 * **Operations**:\n
11315 * ~~~
11316 * Rd.B[x] = (Rs1.B[x] <u Rs2.B[x])? 0xff : 0x0;
11317 * for RV32: x=3...0,
11318 * for RV64: x=7...0
11319 * ~~~
11320 *
11321 * \param [in] a unsigned long type of value stored in a
11322 * \param [in] b unsigned long type of value stored in b
11323 * \return value stored in unsigned long type
11324 */
__RV_UCMPLT8(unsigned long a,unsigned long b)11325 __STATIC_FORCEINLINE unsigned long __RV_UCMPLT8(unsigned long a, unsigned long b)
11326 {
11327 register unsigned long result;
11328 __ASM volatile("ucmplt8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
11329 return result;
11330 }
11331 /* ===== Inline Function End for 3.147. UCMPLT8 ===== */
11332
11333 /* ===== Inline Function Start for 3.148. UCMPLT16 ===== */
11334 /**
11335 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
11336 * \brief UCMPLT16 (SIMD 16-bit Unsigned Compare Less Than)
11337 * \details
11338 * **Type**: SIMD
11339 *
11340 * **Syntax**:\n
11341 * ~~~
11342 * UCMPLT16 Rd, Rs1, Rs2
11343 * ~~~
11344 *
11345 * **Purpose**:\n
11346 * Do 16-bit unsigned integer elements less than comparisons simultaneously.
11347 *
11348 * **Description**:\n
11349 * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit
11350 * unsigned integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the
11351 * result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are written to Rd.
11352 *
11353 * **Operations**:\n
11354 * ~~~
11355 * Rd.H[x] = (Rs1.H[x] <u Rs2.H[x])? 0xffff : 0x0;
11356 * for RV32: x=1...0,
11357 * for RV64: x=3...0
11358 * ~~~
11359 *
11360 * \param [in] a unsigned long type of value stored in a
11361 * \param [in] b unsigned long type of value stored in b
11362 * \return value stored in unsigned long type
11363 */
__RV_UCMPLT16(unsigned long a,unsigned long b)11364 __STATIC_FORCEINLINE unsigned long __RV_UCMPLT16(unsigned long a, unsigned long b)
11365 {
11366 register unsigned long result;
11367 __ASM volatile("ucmplt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
11368 return result;
11369 }
11370 /* ===== Inline Function End for 3.148. UCMPLT16 ===== */
11371
11372 /* ===== Inline Function Start for 3.149. UKADD8 ===== */
11373 /**
11374 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
11375 * \brief UKADD8 (SIMD 8-bit Unsigned Saturating Addition)
11376 * \details
11377 * **Type**: SIMD
11378 *
11379 * **Syntax**:\n
11380 * ~~~
11381 * UKADD8 Rd, Rs1, Rs2
11382 * ~~~
11383 *
11384 * **Purpose**:\n
11385 * Do 8-bit unsigned integer element saturating additions simultaneously.
11386 *
11387 * **Description**:\n
11388 * This instruction adds the 8-bit unsigned integer elements in Rs1 with the 8-bit
11389 * unsigned integer elements in Rs2. If any of the results are beyond the 8-bit unsigned number range
11390 * (0 <= RES <= 28-1), they are saturated to the range and the OV bit is set to 1. The saturated results are
11391 * written to Rd.
11392 *
11393 * **Operations**:\n
11394 * ~~~
11395 * res[x] = Rs1.B[x] + Rs2.B[x];
11396 * if (res[x] > (2^8)-1) {
11397 * res[x] = (2^8)-1;
11398 * OV = 1;
11399 * }
11400 * Rd.B[x] = res[x];
11401 * for RV32: x=3...0,
11402 * for RV64: x=7...0
11403 * ~~~
11404 *
11405 * \param [in] a unsigned long type of value stored in a
11406 * \param [in] b unsigned long type of value stored in b
11407 * \return value stored in unsigned long type
11408 */
__RV_UKADD8(unsigned long a,unsigned long b)11409 __STATIC_FORCEINLINE unsigned long __RV_UKADD8(unsigned long a, unsigned long b)
11410 {
11411 register unsigned long result;
11412 __ASM volatile("ukadd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
11413 return result;
11414 }
11415 /* ===== Inline Function End for 3.149. UKADD8 ===== */
11416
11417 /* ===== Inline Function Start for 3.150. UKADD16 ===== */
11418 /**
11419 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
11420 * \brief UKADD16 (SIMD 16-bit Unsigned Saturating Addition)
11421 * \details
11422 * **Type**: SIMD
11423 *
11424 * **Syntax**:\n
11425 * ~~~
11426 * UKADD16 Rd, Rs1, Rs2
11427 * ~~~
11428 *
11429 * **Purpose**:\n
11430 * Do 16-bit unsigned integer element saturating additions simultaneously.
11431 *
11432 * **Description**:\n
11433 * This instruction adds the 16-bit unsigned integer elements in Rs1 with the 16-bit
11434 * unsigned integer elements in Rs2. If any of the results are beyond the 16-bit unsigned number
11435 * range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1. The saturated
11436 * results are written to Rd.
11437 *
11438 * **Operations**:\n
11439 * ~~~
11440 * res[x] = Rs1.H[x] + Rs2.H[x];
11441 * if (res[x] > (2^16)-1) {
11442 * res[x] = (2^16)-1;
11443 * OV = 1;
11444 * }
11445 * Rd.H[x] = res[x];
11446 * for RV32: x=1...0,
11447 * for RV64: x=3...0
11448 * ~~~
11449 *
11450 * \param [in] a unsigned long type of value stored in a
11451 * \param [in] b unsigned long type of value stored in b
11452 * \return value stored in unsigned long type
11453 */
__RV_UKADD16(unsigned long a,unsigned long b)11454 __STATIC_FORCEINLINE unsigned long __RV_UKADD16(unsigned long a, unsigned long b)
11455 {
11456 register unsigned long result;
11457 __ASM volatile("ukadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
11458 return result;
11459 }
11460 /* ===== Inline Function End for 3.150. UKADD16 ===== */
11461
11462 /* ===== Inline Function Start for 3.151. UKADD64 ===== */
11463 /**
11464 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
11465 * \brief UKADD64 (64-bit Unsigned Saturating Addition)
11466 * \details
11467 * **Type**: DSP (64-bit Profile)
11468 *
11469 * **Syntax**:\n
11470 * ~~~
11471 * UKADD64 Rd, Rs1, Rs2
11472 * ~~~
11473 *
11474 * **Purpose**:\n
11475 * Add two 64-bit unsigned integers. The result is saturated to the U64 range.
11476 *
11477 * **RV32 Description**:\n
11478 * This instruction adds the 64-bit unsigned integer of an even/odd pair of registers
11479 * specified by Rs1(4,1) with the 64-bit unsigned integer of an even/odd pair of registers specified by
11480 * Rs2(4,1). If the 64-bit result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is saturated to the
11481 * range and the OV bit is set to 1. The saturated result is written to an even/odd pair of registers
11482 * specified by Rd(4,1).
11483 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
11484 * includes register 2d and 2d+1.
11485 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
11486 * of the pair contains the low 32-bit of the result.
11487 *
11488 * **RV64 Description**:\n
11489 * This instruction adds the 64-bit unsigned integer in Rs1 with the 64-bit unsigned
11490 * integer in Rs2. If the 64-bit result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is saturated to
11491 * the range and the OV bit is set to 1. The saturated result is written to Rd.
11492 *
11493 * **Operations**:\n
11494 * ~~~
11495 * * RV32:
11496 * t_L = CONCAT(Rt(4,1),1'b0); t_H = CONCAT(Rt(4,1),1'b1);
11497 * a_L = CONCAT(Ra(4,1),1'b0); a_H = CONCAT(Ra(4,1),1'b1);
11498 * b_L = CONCAT(Rb(4,1),1'b0); b_H = CONCAT(Rb(4,1),1'b1);
11499 * result = R[a_H].R[a_L] + R[b_H].R[b_L];
11500 * if (result > (2^64)-1) {
11501 * result = (2^64)-1; OV = 1;
11502 * }
11503 * R[t_H].R[t_L] = result;
11504 * * RV64:
11505 * result = Rs1 + Rs2;
11506 * if (result > (2^64)-1) {
11507 * result = (2^64)-1; OV = 1;
11508 * }
11509 * Rd = result;
11510 * ~~~
11511 *
11512 * \param [in] a unsigned long long type of value stored in a
11513 * \param [in] b unsigned long long type of value stored in b
11514 * \return value stored in unsigned long long type
11515 */
__RV_UKADD64(unsigned long long a,unsigned long long b)11516 __STATIC_FORCEINLINE unsigned long long __RV_UKADD64(unsigned long long a, unsigned long long b)
11517 {
11518 register unsigned long long result;
11519 __ASM volatile("ukadd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
11520 return result;
11521 }
11522 /* ===== Inline Function End for 3.151. UKADD64 ===== */
11523
11524 /* ===== Inline Function Start for 3.152. UKADDH ===== */
11525 /**
11526 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
11527 * \brief UKADDH (Unsigned Addition with U16 Saturation)
11528 * \details
11529 * **Type**: DSP
11530 *
11531 * **Syntax**:\n
11532 * ~~~
11533 * UKADDH Rd, Rs1, Rs2
11534 * ~~~
11535 *
11536 * **Purpose**:\n
11537 * Add the unsigned lower 32-bit content of two registers with U16 saturation.
11538 *
11539 * **Description**:\n
11540 * The unsigned lower 32-bit content of Rs1 is added with the unsigned lower 32-bit
11541 * content of Rs2. And the result is saturated to the 16-bit unsigned integer range of [0, 2^16-1] and then
11542 * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
11543 *
11544 * **Operations**:\n
11545 * ~~~
11546 * tmp = Rs1.W[0] + Rs2.W[0];
11547 * if (tmp > (2^16)-1) {
11548 * tmp = (2^16)-1;
11549 * OV = 1;
11550 * }
11551 * Rd = SE(tmp[15:0]);
11552 * ~~~
11553 *
11554 * \param [in] a unsigned int type of value stored in a
11555 * \param [in] b unsigned int type of value stored in b
11556 * \return value stored in unsigned long type
11557 */
__RV_UKADDH(unsigned int a,unsigned int b)11558 __STATIC_FORCEINLINE unsigned long __RV_UKADDH(unsigned int a, unsigned int b)
11559 {
11560 register unsigned long result;
11561 __ASM volatile("ukaddh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
11562 return result;
11563 }
11564 /* ===== Inline Function End for 3.152. UKADDH ===== */
11565
11566 /* ===== Inline Function Start for 3.153. UKADDW ===== */
11567 /**
11568 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
11569 * \brief UKADDW (Unsigned Addition with U32 Saturation)
11570 * \details
11571 * **Type**: DSP
11572 *
11573 * **Syntax**:\n
11574 * ~~~
11575 * UKADDW Rd, Rs1, Rs2
11576 * ~~~
11577 *
11578 * **Purpose**:\n
11579 * Add the unsigned lower 32-bit content of two registers with U32 saturation.
11580 *
11581 * **Description**:\n
11582 * The unsigned lower 32-bit content of Rs1 is added with the unsigned lower 32-bit
11583 * content of Rs2. And the result is saturated to the 32-bit unsigned integer range of [0, 2^32-1] and then
11584 * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
11585 *
11586 * **Operations**:\n
11587 * ~~~
11588 * tmp = Rs1.W[0] + Rs2.W[0];
11589 * if (tmp > (2^32)-1) {
11590 * tmp[31:0] = (2^32)-1;
11591 * OV = 1;
11592 * }
11593 * Rd = tmp[31:0]; // RV32
11594 * Rd = SE(tmp[31:0]); // RV64
11595 * ~~~
11596 *
11597 * \param [in] a unsigned int type of value stored in a
11598 * \param [in] b unsigned int type of value stored in b
11599 * \return value stored in unsigned long type
11600 */
__RV_UKADDW(unsigned int a,unsigned int b)11601 __STATIC_FORCEINLINE unsigned long __RV_UKADDW(unsigned int a, unsigned int b)
11602 {
11603 register unsigned long result;
11604 __ASM volatile("ukaddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
11605 return result;
11606 }
11607 /* ===== Inline Function End for 3.153. UKADDW ===== */
11608
11609 /* ===== Inline Function Start for 3.154. UKCRAS16 ===== */
11610 /**
11611 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
11612 * \brief UKCRAS16 (SIMD 16-bit Unsigned Saturating Cross Addition & Subtraction)
11613 * \details
11614 * **Type**: SIMD
11615 *
11616 * **Syntax**:\n
11617 * ~~~
11618 * UKCRAS16 Rd, Rs1, Rs2
11619 * ~~~
11620 *
11621 * **Purpose**:\n
11622 * Do one 16-bit unsigned integer element saturating addition and one 16-bit unsigned
11623 * integer element saturating subtraction in a 32-bit chunk simultaneously. Operands are from crossed
11624 * positions in 32-bit chunks.
11625 *
11626 * **Description**:\n
11627 * This instruction adds the 16-bit unsigned integer element in [31:16] of 32-bit chunks in
11628 * Rs1 with the 16-bit unsigned integer element in [15:0] of 32-bit chunks in Rs2; at the same time, it
11629 * subtracts the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit
11630 * unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the 16-bit
11631 * unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1.
11632 * The saturated results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit
11633 * chunks in Rd for subtraction.
11634 *
11635 * **Operations**:\n
11636 * ~~~
11637 * res1 = Rs1.W[x][31:16] + Rs2.W[x][15:0];
11638 * res2 = Rs1.W[x][15:0] - Rs2.W[x][31:16];
11639 * if (res1 > (2^16)-1) {
11640 * res1 = (2^16)-1;
11641 * OV = 1;
11642 * }
11643 * if (res2 < 0) {
11644 * res2 = 0;
11645 * OV = 1;
11646 * }
11647 * Rd.W[x][31:16] = res1;
11648 * Rd.W[x][15:0] = res2;
11649 * for RV32, x=0
11650 * for RV64, x=1...0
11651 * ~~~
11652 *
11653 * \param [in] a unsigned long type of value stored in a
11654 * \param [in] b unsigned long type of value stored in b
11655 * \return value stored in unsigned long type
11656 */
__RV_UKCRAS16(unsigned long a,unsigned long b)11657 __STATIC_FORCEINLINE unsigned long __RV_UKCRAS16(unsigned long a, unsigned long b)
11658 {
11659 register unsigned long result;
11660 __ASM volatile("ukcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
11661 return result;
11662 }
11663 /* ===== Inline Function End for 3.154. UKCRAS16 ===== */
11664
11665 /* ===== Inline Function Start for 3.155. UKCRSA16 ===== */
11666 /**
11667 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
11668 * \brief UKCRSA16 (SIMD 16-bit Unsigned Saturating Cross Subtraction & Addition)
11669 * \details
11670 * **Type**: SIMD
11671 *
11672 * **Syntax**:\n
11673 * ~~~
11674 * UKCRSA16 Rd, Rs1, Rs2
11675 * ~~~
11676 *
11677 * **Purpose**:\n
11678 * Do one 16-bit unsigned integer element saturating subtraction and one 16-bit unsigned
11679 * integer element saturating addition in a 32-bit chunk simultaneously. Operands are from crossed
11680 * positions in 32-bit chunks.
11681 *
11682 * **Description**:\n
11683 * This instruction subtracts the 16-bit unsigned integer element in [15:0] of 32-bit
11684 * chunks in Rs2 from the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs1; at the
11685 * same time, it adds the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs2 with the 16-
11686 * bit unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the
11687 * 16-bit unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set
11688 * to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of
11689 * 32-bit chunks in Rd for addition.
11690 *
11691 * **Operations**:\n
11692 * ~~~
11693 * res1 = Rs1.W[x][31:16] - Rs2.W[x][15:0];
11694 * res2 = Rs1.W[x][15:0] + Rs2.W[x][31:16];
11695 * if (res1 < 0) {
11696 * res1 = 0;
11697 * OV = 1;
11698 * } else if (res2 > (2^16)-1) {
11699 * res2 = (2^16)-1;
11700 * OV = 1;
11701 * }
11702 * Rd.W[x][31:16] = res1;
11703 * Rd.W[x][15:0] = res2;
11704 * for RV32, x=0
11705 * for RV64, x=1...0
11706 * ~~~
11707 *
11708 * \param [in] a unsigned long type of value stored in a
11709 * \param [in] b unsigned long type of value stored in b
11710 * \return value stored in unsigned long type
11711 */
__RV_UKCRSA16(unsigned long a,unsigned long b)11712 __STATIC_FORCEINLINE unsigned long __RV_UKCRSA16(unsigned long a, unsigned long b)
11713 {
11714 register unsigned long result;
11715 __ASM volatile("ukcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
11716 return result;
11717 }
11718 /* ===== Inline Function End for 3.155. UKCRSA16 ===== */
11719
11720 /* ===== Inline Function Start for 3.156. UKMAR64 ===== */
11721 /**
11722 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
11723 * \brief UKMAR64 (Unsigned Multiply and Saturating Add to 64-Bit Data)
11724 * \details
11725 * **Type**: DSP (64-bit Profile)
11726 *
11727 * **Syntax**:\n
11728 * ~~~
11729 * UKMAR64 Rd, Rs1, Rs2
11730 * ~~~
11731 *
11732 * **Purpose**:\n
11733 * Multiply the 32-bit unsigned elements in two registers and add the 64-bit multiplication
11734 * results to the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64). The result is
11735 * saturated to the U64 range and written back to the pair of registers (RV32) or the register (RV64).
11736 *
11737 * **RV32 Description**:\n
11738 * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It
11739 * adds the 64-bit multiplication result to the 64-bit unsigned data of an even/odd pair of registers
11740 * specified by Rd(4,1) with unlimited precision. If the 64-bit addition result is beyond the U64 number
11741 * range (0 <= U64 <= 2^64-1), it is saturated to the range and the OV bit is set to 1. The saturated result is
11742 * written back to the even/odd pair of registers specified by Rd(4,1).
11743 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
11744 * includes register 2d and 2d+1.
11745 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
11746 * of the pair contains the low 32-bit of the result.
11747 *
11748 * **RV64 Description**:\n
11749 * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2.
11750 * It adds the 64-bit multiplication results to the 64-bit unsigned data in Rd with unlimited precision. If
11751 * the 64-bit addition result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is saturated to the
11752 * range and the OV bit is set to 1. The saturated result is written back to Rd.
11753 *
11754 * **Operations**:\n
11755 * ~~~
11756 * * RV32:
11757 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
11758 * result = R[t_H].R[t_L] + (Rs1 * Rs2);
11759 * if (result > (2^64)-1) {
11760 * result = (2^64)-1; OV = 1;
11761 * }
11762 * R[t_H].R[t_L] = result;
11763 * * RV64:
11764 * // `result` has unlimited precision
11765 * result = Rd + (Rs1.W[0] u* Rs2.W[0]) + (Rs1.W[1] u* Rs2.W[1]);
11766 * if (result > (2^64)-1) {
11767 * result = (2^64)-1; OV = 1;
11768 * }
11769 * Rd = result;
11770 * ~~~
11771 *
11772 * \param [in] t unsigned long long type of value stored in t
11773 * \param [in] a unsigned long type of value stored in a
11774 * \param [in] b unsigned long type of value stored in b
11775 * \return value stored in unsigned long long type
11776 */
__RV_UKMAR64(unsigned long long t,unsigned long a,unsigned long b)11777 __STATIC_FORCEINLINE unsigned long long __RV_UKMAR64(unsigned long long t, unsigned long a, unsigned long b)
11778 {
11779 __ASM volatile("ukmar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
11780 return t;
11781 }
11782 /* ===== Inline Function End for 3.156. UKMAR64 ===== */
11783
11784 /* ===== Inline Function Start for 3.157. UKMSR64 ===== */
11785 /**
11786 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
11787 * \brief UKMSR64 (Unsigned Multiply and Saturating Subtract from 64-Bit Data)
11788 * \details
11789 * **Type**: DSP (64-bit Profile)
11790 *
11791 * **Syntax**:\n
11792 * ~~~
11793 * UKMSR64 Rd, Rs1, Rs2
11794 * ~~~
11795 *
11796 * **Purpose**:\n
11797 * Multiply the 32-bit unsigned elements in two registers and subtract the 64-bit
11798 * multiplication results from the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64).
11799 * The result is saturated to the U64 range and written back to the pair of registers (RV32) or a register
11800 * (RV64).
11801 *
11802 * **RV32 Description**:\n
11803 * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It
11804 * subtracts the 64-bit multiplication result from the 64-bit unsigned data of an even/odd pair of
11805 * registers specified by Rd(4,1) with unlimited precision. If the 64-bit subtraction result is beyond the
11806 * U64 number range (0 <= U64 <= 2^64-1), it is saturated to the range and the OV bit is set to 1. The
11807 * saturated result is written back to the even/odd pair of registers specified by Rd(4,1).
11808 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
11809 * includes register 2d and 2d+1.
11810 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
11811 * of the pair contains the low 32-bit of the result.
11812 *
11813 * **RV64 Description**:\n
11814 * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2.
11815 * It subtracts the 64-bit multiplication results from the 64-bit unsigned data of Rd with unlimited
11816 * precision. If the 64-bit subtraction result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is
11817 * saturated to the range and the OV bit is set to 1. The saturated result is written back to Rd.
11818 *
11819 * **Operations**:\n
11820 * ~~~
11821 * * RV32:
11822 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
11823 * result = R[t_H].R[t_L] - (Rs1 u* Rs2);
11824 * if (result < 0) {
11825 * result = 0; OV = 1;
11826 * }
11827 * R[t_H].R[t_L] = result;
11828 * * RV64:
11829 * // `result` has unlimited precision
11830 * result = Rd - (Rs1.W[0] u* Rs2.W[0]) - (Rs1.W[1] u* Rs2.W[1]);
11831 * if (result < 0) {
11832 * result = 0; OV = 1;
11833 * }
11834 * Rd = result;
11835 * ~~~
11836 *
11837 * \param [in] t unsigned long long type of value stored in t
11838 * \param [in] a unsigned long type of value stored in a
11839 * \param [in] b unsigned long type of value stored in b
11840 * \return value stored in unsigned long long type
11841 */
__RV_UKMSR64(unsigned long long t,unsigned long a,unsigned long b)11842 __STATIC_FORCEINLINE unsigned long long __RV_UKMSR64(unsigned long long t, unsigned long a, unsigned long b)
11843 {
11844 __ASM volatile("ukmsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
11845 return t;
11846 }
11847 /* ===== Inline Function End for 3.157. UKMSR64 ===== */
11848
11849 /* ===== Inline Function Start for 3.158. UKSTAS16 ===== */
11850 /**
11851 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
11852 * \brief UKSTAS16 (SIMD 16-bit Unsigned Saturating Straight Addition & Subtraction)
11853 * \details
11854 * **Type**: SIMD
11855 *
11856 * **Syntax**:\n
11857 * ~~~
11858 * UKSTAS16 Rd, Rs1, Rs2
11859 * ~~~
11860 *
11861 * **Purpose**:\n
11862 * Do one 16-bit unsigned integer element saturating addition and one 16-bit unsigned
11863 * integer element saturating subtraction in a 32-bit chunk simultaneously. Operands are from
11864 * corresponding positions in 32-bit chunks.
11865 *
11866 * **Description**:\n
11867 * This instruction adds the 16-bit unsigned integer element in [31:16] of 32-bit chunks in
11868 * Rs1 with the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs2; at the same time, it
11869 * subtracts the 16-bit unsigned integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit
11870 * unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the 16-bit
11871 * unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1.
11872 * The saturated results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit
11873 * chunks in Rd for subtraction.
11874 *
11875 * **Operations**:\n
11876 * ~~~
11877 * res1 = Rs1.W[x][31:16] + Rs2.W[x][31:16];
11878 * res2 = Rs1.W[x][15:0] - Rs2.W[x][15:0];
11879 * if (res1 > (2^16)-1) {
11880 * res1 = (2^16)-1;
11881 * OV = 1;
11882 * }
11883 * if (res2 < 0) {
11884 * res2 = 0;
11885 * OV = 1;
11886 * }
11887 * Rd.W[x][31:16] = res1;
11888 * Rd.W[x][15:0] = res2;
11889 * for RV32, x=0
11890 * for RV64, x=1...0
11891 * ~~~
11892 *
11893 * \param [in] a unsigned long type of value stored in a
11894 * \param [in] b unsigned long type of value stored in b
11895 * \return value stored in unsigned long type
11896 */
__RV_UKSTAS16(unsigned long a,unsigned long b)11897 __STATIC_FORCEINLINE unsigned long __RV_UKSTAS16(unsigned long a, unsigned long b)
11898 {
11899 register unsigned long result;
11900 __ASM volatile("ukstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
11901 return result;
11902 }
11903 /* ===== Inline Function End for 3.158. UKSTAS16 ===== */
11904
11905 /* ===== Inline Function Start for 3.159. UKSTSA16 ===== */
11906 /**
11907 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
11908 * \brief UKSTSA16 (SIMD 16-bit Unsigned Saturating Straight Subtraction & Addition)
11909 * \details
11910 * **Type**: SIMD
11911 *
11912 * **Syntax**:\n
11913 * ~~~
11914 * UKSTSA16 Rd, Rs1, Rs2
11915 * ~~~
11916 *
11917 * **Purpose**:\n
11918 * Do one 16-bit unsigned integer element saturating subtraction and one 16-bit unsigned
11919 * integer element saturating addition in a 32-bit chunk simultaneously. Operands are from
11920 * corresponding positions in 32-bit chunks.
11921 *
11922 * **Description**:\n
11923 * This instruction subtracts the 16-bit unsigned integer element in [31:16] of 32-bit
11924 * chunks in Rs2 from the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs1; at the
11925 * same time, it adds the 16-bit unsigned integer element in [15:0] of 32-bit chunks in Rs2 with the 16-
11926 * bit unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the
11927 * 16-bit unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set
11928 * to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of
11929 * 32-bit chunks in Rd for addition.
11930 *
11931 * **Operations**:\n
11932 * ~~~
11933 * res1 = Rs1.W[x][31:16] - Rs2.W[x][31:16];
11934 * res2 = Rs1.W[x][15:0] + Rs2.W[x][15:0];
11935 * if (res1 < 0) {
11936 * res1 = 0;
11937 * OV = 1;
11938 * } else if (res2 > (2^16)-1) {
11939 * res2 = (2^16)-1;
11940 * OV = 1;
11941 * }
11942 * Rd.W[x][31:16] = res1;
11943 * Rd.W[x][15:0] = res2;
11944 * for RV32, x=0
11945 * for RV64, x=1...0
11946 * ~~~
11947 *
11948 * \param [in] a unsigned long type of value stored in a
11949 * \param [in] b unsigned long type of value stored in b
11950 * \return value stored in unsigned long type
11951 */
__RV_UKSTSA16(unsigned long a,unsigned long b)11952 __STATIC_FORCEINLINE unsigned long __RV_UKSTSA16(unsigned long a, unsigned long b)
11953 {
11954 register unsigned long result;
11955 __ASM volatile("ukstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
11956 return result;
11957 }
11958 /* ===== Inline Function End for 3.159. UKSTSA16 ===== */
11959
11960 /* ===== Inline Function Start for 3.160. UKSUB8 ===== */
11961 /**
11962 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
11963 * \brief UKSUB8 (SIMD 8-bit Unsigned Saturating Subtraction)
11964 * \details
11965 * **Type**: SIMD
11966 *
11967 * **Syntax**:\n
11968 * ~~~
11969 * UKSUB8 Rd, Rs1, Rs2
11970 * ~~~
11971 *
11972 * **Purpose**:\n
11973 * Do 8-bit unsigned integer elements saturating subtractions simultaneously.
11974 *
11975 * **Description**:\n
11976 * This instruction subtracts the 8-bit unsigned integer elements in Rs2 from the 8-bit
11977 * unsigned integer elements in Rs1. If any of the results are beyond the 8-bit unsigned number range
11978 * (0 <= RES <= 28-1), they are saturated to the range and the OV bit is set to 1. The saturated results are
11979 * written to Rd.
11980 *
11981 * **Operations**:\n
11982 * ~~~
11983 * res[x] = Rs1.B[x] - Rs2.B[x];
11984 * if (res[x] < 0) {
11985 * res[x] = 0;
11986 * OV = 1;
11987 * }
11988 * Rd.B[x] = res[x];
11989 * for RV32: x=3...0,
11990 * for RV64: x=7...0
11991 * ~~~
11992 *
11993 * \param [in] a unsigned long type of value stored in a
11994 * \param [in] b unsigned long type of value stored in b
11995 * \return value stored in unsigned long type
11996 */
__RV_UKSUB8(unsigned long a,unsigned long b)11997 __STATIC_FORCEINLINE unsigned long __RV_UKSUB8(unsigned long a, unsigned long b)
11998 {
11999 register unsigned long result;
12000 __ASM volatile("uksub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12001 return result;
12002 }
12003 /* ===== Inline Function End for 3.160. UKSUB8 ===== */
12004
12005 /* ===== Inline Function Start for 3.161. UKSUB16 ===== */
12006 /**
12007 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
12008 * \brief UKSUB16 (SIMD 16-bit Unsigned Saturating Subtraction)
12009 * \details
12010 * **Type**: SIMD
12011 *
12012 * **Syntax**:\n
12013 * ~~~
12014 * UKSUB16 Rd, Rs1, Rs2
12015 * ~~~
12016 *
12017 * **Purpose**:\n
12018 * Do 16-bit unsigned integer elements saturating subtractions simultaneously.
12019 *
12020 * **Description**:\n
12021 * This instruction subtracts the 16-bit unsigned integer elements in Rs2 from the 16-bit
12022 * unsigned integer elements in Rs1. If any of the results are beyond the 16-bit unsigned number
12023 * range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1. The saturated
12024 * results are written to Rd.
12025 *
12026 * **Operations**:\n
12027 * ~~~
12028 * res[x] = Rs1.H[x] - Rs2.H[x];
12029 * if (res[x] < 0) {
12030 * res[x] = 0;
12031 * OV = 1;
12032 * }
12033 * Rd.H[x] = res[x];
12034 * for RV32: x=1...0,
12035 * for RV64: x=3...0
12036 * ~~~
12037 *
12038 * \param [in] a unsigned long type of value stored in a
12039 * \param [in] b unsigned long type of value stored in b
12040 * \return value stored in unsigned long type
12041 */
__RV_UKSUB16(unsigned long a,unsigned long b)12042 __STATIC_FORCEINLINE unsigned long __RV_UKSUB16(unsigned long a, unsigned long b)
12043 {
12044 register unsigned long result;
12045 __ASM volatile("uksub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12046 return result;
12047 }
12048 /* ===== Inline Function End for 3.161. UKSUB16 ===== */
12049
12050 /* ===== Inline Function Start for 3.162. UKSUB64 ===== */
12051 /**
12052 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
12053 * \brief UKSUB64 (64-bit Unsigned Saturating Subtraction)
12054 * \details
12055 * **Type**: DSP (64-bit Profile)
12056 *
12057 * **Syntax**:\n
12058 * ~~~
12059 * UKSUB64 Rd, Rs1, Rs2
12060 * ~~~
12061 *
12062 * **Purpose**:\n
12063 * Perform a 64-bit signed integer subtraction. The result is saturated to the U64 range.
12064 *
12065 * **RV32 Description**:\n
12066 * This instruction subtracts the 64-bit unsigned integer of an even/odd pair of
12067 * registers specified by Rs2(4,1) from the 64-bit unsigned integer of an even/odd pair of registers
12068 * specified by Rs1(4,1). If the 64-bit result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is
12069 * saturated to the range and the OV bit is set to 1. The saturated result is then written to an even/odd
12070 * pair of registers specified by Rd(4,1).
12071 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
12072 * includes register 2d and 2d+1.
12073 * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
12074 * register of the pair contains the low 32-bit of the operand.
12075 *
12076 * **RV64 Description**:\n
12077 * This instruction subtracts the 64-bit unsigned integer of Rs2 from the 64-bit
12078 * unsigned integer of an even/odd pair of Rs1. If the 64-bit result is beyond the U64 number range (0 <=
12079 * U64 <= 2^64-1), it is saturated to the range and the OV bit is set to 1. The saturated result is then written
12080 * to Rd.
12081 *
12082 * **Operations**:\n
12083 * ~~~
12084 * * RV32:
12085 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
12086 * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
12087 * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
12088 * result = R[a_H].R[a_L] - R[b_H].R[b_L];
12089 * if (result < 0) {
12090 * result = 0; OV = 1;
12091 * }
12092 * R[t_H].R[t_L] = result;
12093 * * RV64
12094 * result = Rs1 - Rs2;
12095 * if (result < 0) {
12096 * result = 0; OV = 1;
12097 * }
12098 * Rd = result;
12099 * ~~~
12100 *
12101 * \param [in] a unsigned long long type of value stored in a
12102 * \param [in] b unsigned long long type of value stored in b
12103 * \return value stored in unsigned long long type
12104 */
__RV_UKSUB64(unsigned long long a,unsigned long long b)12105 __STATIC_FORCEINLINE unsigned long long __RV_UKSUB64(unsigned long long a, unsigned long long b)
12106 {
12107 register unsigned long long result;
12108 __ASM volatile("uksub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12109 return result;
12110 }
12111 /* ===== Inline Function End for 3.162. UKSUB64 ===== */
12112
12113 /* ===== Inline Function Start for 3.163. UKSUBH ===== */
12114 /**
12115 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
12116 * \brief UKSUBH (Unsigned Subtraction with U16 Saturation)
12117 * \details
12118 * **Type**: DSP
12119 *
12120 * **Syntax**:\n
12121 * ~~~
12122 * UKSUBH Rd, Rs1, Rs2
12123 * ~~~
12124 *
12125 * **Purpose**:\n
12126 * Subtract the unsigned lower 32-bit content of two registers with U16 saturation.
12127 *
12128 * **Description**:\n
12129 * The unsigned lower 32-bit content of Rs2 is subtracted from the unsigned lower 32-bit
12130 * content of Rs1. And the result is saturated to the 16-bit unsigned integer range of [0, 2^16-1] and then
12131 * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
12132 *
12133 * **Operations**:\n
12134 * ~~~
12135 * tmp = Rs1.W[0] - Rs2.W[0];
12136 * if (tmp > (2^16)-1) {
12137 * tmp = (2^16)-1;
12138 * OV = 1;
12139 * }
12140 * else if (tmp < 0) {
12141 * tmp = 0;
12142 * OV = 1;
12143 * }
12144 * Rd = SE(tmp[15:0]);
12145 * ~~~
12146 *
12147 * \param [in] a unsigned int type of value stored in a
12148 * \param [in] b unsigned int type of value stored in b
12149 * \return value stored in unsigned long type
12150 */
__RV_UKSUBH(unsigned int a,unsigned int b)12151 __STATIC_FORCEINLINE unsigned long __RV_UKSUBH(unsigned int a, unsigned int b)
12152 {
12153 register unsigned long result;
12154 __ASM volatile("uksubh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12155 return result;
12156 }
12157 /* ===== Inline Function End for 3.163. UKSUBH ===== */
12158
12159 /* ===== Inline Function Start for 3.164. UKSUBW ===== */
12160 /**
12161 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
12162 * \brief UKSUBW (Unsigned Subtraction with U32 Saturation)
12163 * \details
12164 * **Type**: DSP
12165 *
12166 * **Syntax**:\n
12167 * ~~~
12168 * UKSUBW Rd, Rs1, Rs2
12169 * ~~~
12170 *
12171 * **Purpose**:\n
12172 * Subtract the unsigned lower 32-bit content of two registers with unsigned 32-bit
12173 * saturation.
12174 *
12175 * **Description**:\n
12176 * The unsigned lower 32-bit content of Rs2 is subtracted from the unsigned lower 32-bit
12177 * content of Rs1. And the result is saturated to the 32-bit unsigned integer range of [0, 2^32-1] and then
12178 * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
12179 *
12180 * **Operations**:\n
12181 * ~~~
12182 * tmp = Rs1.W[0] - Rs2.W[0];
12183 * if (tmp < 0) {
12184 * tmp[31:0] = 0;
12185 * OV = 1;
12186 * }
12187 * Rd = tmp[31:0]; // RV32
12188 * Rd = SE(tmp[31:0]); // RV64
12189 * ~~~
12190 *
12191 * \param [in] a unsigned int type of value stored in a
12192 * \param [in] b unsigned int type of value stored in b
12193 * \return value stored in unsigned long type
12194 */
__RV_UKSUBW(unsigned int a,unsigned int b)12195 __STATIC_FORCEINLINE unsigned long __RV_UKSUBW(unsigned int a, unsigned int b)
12196 {
12197 register unsigned long result;
12198 __ASM volatile("uksubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12199 return result;
12200 }
12201 /* ===== Inline Function End for 3.164. UKSUBW ===== */
12202
12203 /* ===== Inline Function Start for 3.165. UMAR64 ===== */
12204 /**
12205 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
12206 * \brief UMAR64 (Unsigned Multiply and Add to 64-Bit Data)
12207 * \details
12208 * **Type**: DSP (64-bit Profile)
12209 *
12210 * **Syntax**:\n
12211 * ~~~
12212 * UMAR64 Rd, Rs1, Rs2
12213 * ~~~
12214 *
12215 * **Purpose**:\n
12216 * Multiply the 32-bit unsigned elements in two registers and add the 64-bit multiplication
12217 * results to the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64). The result is
12218 * written back to the pair of registers (RV32) or a register (RV64).
12219 *
12220 * **RV32 Description**:\n
12221 * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It
12222 * adds the 64-bit multiplication result to the 64-bit unsigned data of an even/odd pair of registers
12223 * specified by Rd(4,1). The addition result is written back to the even/odd pair of registers specified by
12224 * Rd(4,1).
12225 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
12226 * includes register 2d and 2d+1.
12227 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
12228 * of the pair contains the low 32-bit of the result.
12229 *
12230 * **RV64 Description**:\n
12231 * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2.
12232 * It adds the 64-bit multiplication results to the 64-bit unsigned data of Rd. The addition result is
12233 * written back to Rd.
12234 *
12235 * **Operations**:\n
12236 * ~~~
12237 * * RV32:
12238 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
12239 * R[t_H].R[t_L] = R[t_H].R[t_L] + (Rs1 * Rs2);
12240 * * RV64:
12241 * Rd = Rd + (Rs1.W[0] u* Rs2.W[0]) + (Rs1.W[1] u* Rs2.W[1]);
12242 * ~~~
12243 *
12244 * \param [in] t unsigned long long type of value stored in t
12245 * \param [in] a unsigned long type of value stored in a
12246 * \param [in] b unsigned long type of value stored in b
12247 * \return value stored in unsigned long long type
12248 */
__RV_UMAR64(unsigned long long t,unsigned long a,unsigned long b)12249 __STATIC_FORCEINLINE unsigned long long __RV_UMAR64(unsigned long long t, unsigned long a, unsigned long b)
12250 {
12251 __ASM volatile("umar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
12252 return t;
12253 }
12254 /* ===== Inline Function End for 3.165. UMAR64 ===== */
12255
12256 /* ===== Inline Function Start for 3.166. UMAQA ===== */
12257 /**
12258 * \ingroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD
12259 * \brief UMAQA (Unsigned Multiply Four Bytes with 32- bit Adds)
12260 * \details
12261 * **Type**: DSP
12262 *
12263 * **Syntax**:\n
12264 * ~~~
12265 * UMAQA Rd, Rs1, Rs2
12266 * ~~~
12267 *
12268 * **Purpose**:\n
12269 * Do four unsigned 8-bit multiplications from 32-bit chunks of two registers; and then adds
12270 * the four 16-bit results and the content of corresponding 32-bit chunks of a third register together.
12271 *
12272 * **Description**:\n
12273 * This instruction multiplies the four unsigned 8-bit elements of 32-bit chunks of Rs1 with the four
12274 * unsigned 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the
12275 * unsigned content of the corresponding 32-bit chunks of Rd. The final results are written back to the
12276 * corresponding 32-bit chunks in Rd.
12277 *
12278 * **Operations**:\n
12279 * ~~~
12280 * res[x] = Rd.W[x] + (Rs1.W[x].B[3] u* Rs2.W[x].B[3]) +
12281 * (Rs1.W[x].B[2] u* Rs2.W[x].B[2]) + (Rs1.W[x].B[1] u* Rs2.W[x].B[1]) +
12282 * (Rs1.W[x].B[0] u* Rs2.W[x].B[0]);
12283 * Rd.W[x] = res[x];
12284 * for RV32: x=0,
12285 * for RV64: x=1...0
12286 * ~~~
12287 *
12288 * \param [in] t unsigned long type of value stored in t
12289 * \param [in] a unsigned long type of value stored in a
12290 * \param [in] b unsigned long type of value stored in b
12291 * \return value stored in unsigned long type
12292 */
__RV_UMAQA(unsigned long t,unsigned long a,unsigned long b)12293 __STATIC_FORCEINLINE unsigned long __RV_UMAQA(unsigned long t, unsigned long a, unsigned long b)
12294 {
12295 __ASM volatile("umaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
12296 return t;
12297 }
12298 /* ===== Inline Function End for 3.166. UMAQA ===== */
12299
12300 /* ===== Inline Function Start for 3.167. UMAX8 ===== */
12301 /**
12302 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
12303 * \brief UMAX8 (SIMD 8-bit Unsigned Maximum)
12304 * \details
12305 * **Type**: SIMD
12306 *
12307 * **Syntax**:\n
12308 * ~~~
12309 * UMAX8 Rd, Rs1, Rs2
12310 * ~~~
12311 *
12312 * **Purpose**:\n
12313 * Do 8-bit unsigned integer elements finding maximum operations simultaneously.
12314 *
12315 * **Description**:\n
12316 * This instruction compares the 8-bit unsigned integer elements in Rs1 with the four 8-
12317 * bit unsigned integer elements in Rs2 and selects the numbers that is greater than the other one. The
12318 * two selected results are written to Rd.
12319 *
12320 * **Operations**:\n
12321 * ~~~
12322 * Rd.B[x] = (Rs1.B[x] >u Rs2.B[x])? Rs1.B[x] : Rs2.B[x];
12323 * for RV32: x=3...0,
12324 * for RV64: x=7...0
12325 * ~~~
12326 *
12327 * \param [in] a unsigned long type of value stored in a
12328 * \param [in] b unsigned long type of value stored in b
12329 * \return value stored in unsigned long type
12330 */
__RV_UMAX8(unsigned long a,unsigned long b)12331 __STATIC_FORCEINLINE unsigned long __RV_UMAX8(unsigned long a, unsigned long b)
12332 {
12333 register unsigned long result;
12334 __ASM volatile("umax8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12335 return result;
12336 }
12337 /* ===== Inline Function End for 3.167. UMAX8 ===== */
12338
12339 /* ===== Inline Function Start for 3.168. UMAX16 ===== */
12340 /**
12341 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
12342 * \brief UMAX16 (SIMD 16-bit Unsigned Maximum)
12343 * \details
12344 * **Type**: SIMD
12345 *
12346 * **Syntax**:\n
12347 * ~~~
12348 * UMAX16 Rd, Rs1, Rs2
12349 * ~~~
12350 *
12351 * **Purpose**:\n
12352 * Do 16-bit unsigned integer elements finding maximum operations simultaneously.
12353 *
12354 * **Description**:\n
12355 * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit
12356 * unsigned integer elements in Rs2 and selects the numbers that is greater than the other one. The
12357 * selected results are written to Rd.
12358 *
12359 * **Operations**:\n
12360 * ~~~
12361 * Rd.H[x] = (Rs1.H[x] >u Rs2.H[x])? Rs1.H[x] : Rs2.H[x];
12362 * for RV32: x=1...0,
12363 * for RV64: x=3...0
12364 * ~~~
12365 *
12366 * \param [in] a unsigned long type of value stored in a
12367 * \param [in] b unsigned long type of value stored in b
12368 * \return value stored in unsigned long type
12369 */
__RV_UMAX16(unsigned long a,unsigned long b)12370 __STATIC_FORCEINLINE unsigned long __RV_UMAX16(unsigned long a, unsigned long b)
12371 {
12372 register unsigned long result;
12373 __ASM volatile("umax16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12374 return result;
12375 }
12376 /* ===== Inline Function End for 3.168. UMAX16 ===== */
12377
12378 /* ===== Inline Function Start for 3.169. UMIN8 ===== */
12379 /**
12380 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
12381 * \brief UMIN8 (SIMD 8-bit Unsigned Minimum)
12382 * \details
12383 * **Type**: SIMD
12384 *
12385 * **Syntax**:\n
12386 * ~~~
12387 * UMIN8 Rd, Rs1, Rs2
12388 * ~~~
12389 *
12390 * **Purpose**:\n
12391 * Do 8-bit unsigned integer elements finding minimum operations simultaneously.
12392 *
12393 * **Description**:\n
12394 * This instruction compares the 8-bit unsigned integer elements in Rs1 with the 8-bit
12395 * unsigned integer elements in Rs2 and selects the numbers that is less than the other one. The
12396 * selected results are written to Rd.
12397 *
12398 * **Operations**:\n
12399 * ~~~
12400 * Rd.B[x] = (Rs1.B[x] <u Rs2.B[x])? Rs1.B[x] : Rs2.B[x];
12401 * for RV32: x=3...0,
12402 * for RV64: x=7...0
12403 * ~~~
12404 *
12405 * \param [in] a unsigned long type of value stored in a
12406 * \param [in] b unsigned long type of value stored in b
12407 * \return value stored in unsigned long type
12408 */
__RV_UMIN8(unsigned long a,unsigned long b)12409 __STATIC_FORCEINLINE unsigned long __RV_UMIN8(unsigned long a, unsigned long b)
12410 {
12411 register unsigned long result;
12412 __ASM volatile("umin8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12413 return result;
12414 }
12415 /* ===== Inline Function End for 3.169. UMIN8 ===== */
12416
12417 /* ===== Inline Function Start for 3.170. UMIN16 ===== */
12418 /**
12419 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
12420 * \brief UMIN16 (SIMD 16-bit Unsigned Minimum)
12421 * \details
12422 * **Type**: SIMD
12423 *
12424 * **Syntax**:\n
12425 * ~~~
12426 * UMIN16 Rd, Rs1, Rs2
12427 * ~~~
12428 *
12429 * **Purpose**:\n
12430 * Do 16-bit unsigned integer elements finding minimum operations simultaneously.
12431 *
12432 * **Description**:\n
12433 * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit
12434 * unsigned integer elements in Rs2 and selects the numbers that is less than the other one. The
12435 * selected results are written to Rd.
12436 *
12437 * **Operations**:\n
12438 * ~~~
12439 * Rd.H[x] = (Rs1.H[x] <u Rs2.H[x])? Rs1.H[x] : Rs2.H[x];
12440 * for RV32: x=1...0,
12441 * for RV64: x=3...0
12442 * ~~~
12443 *
12444 * \param [in] a unsigned long type of value stored in a
12445 * \param [in] b unsigned long type of value stored in b
12446 * \return value stored in unsigned long type
12447 */
__RV_UMIN16(unsigned long a,unsigned long b)12448 __STATIC_FORCEINLINE unsigned long __RV_UMIN16(unsigned long a, unsigned long b)
12449 {
12450 register unsigned long result;
12451 __ASM volatile("umin16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12452 return result;
12453 }
12454 /* ===== Inline Function End for 3.170. UMIN16 ===== */
12455
12456 /* ===== Inline Function Start for 3.171. UMSR64 ===== */
12457 /**
12458 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
12459 * \brief UMSR64 (Unsigned Multiply and Subtract from 64-Bit Data)
12460 * \details
12461 * **Type**: DSP (64-bit Profile)
12462 *
12463 * **Syntax**:\n
12464 * ~~~
12465 * UMSR64 Rd, Rs1, Rs2
12466 * ~~~
12467 *
12468 * **Purpose**:\n
12469 * Multiply the 32-bit unsigned elements in two registers and subtract the 64-bit
12470 * multiplication results from the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64).
12471 * The result is written back to the pair of registers (RV32) or a register (RV64).
12472 *
12473 * **RV32 Description**:\n
12474 * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It
12475 * subtracts the 64-bit multiplication result from the 64-bit unsigned data of an even/odd pair of
12476 * registers specified by Rd(4,1). The subtraction result is written back to the even/odd pair of registers
12477 * specified by Rd(4,1).
12478 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
12479 * includes register 2d and 2d+1.
12480 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
12481 * of the pair contains the low 32-bit of the result.
12482 *
12483 * **RV64 Description**:\n
12484 * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2.
12485 * It subtracts the 64-bit multiplication results from the 64-bit unsigned data of Rd. The subtraction
12486 * result is written back to Rd.
12487 *
12488 * **Operations**:\n
12489 * ~~~
12490 * * RV32:
12491 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
12492 * R[t_H].R[t_L] = R[t_H].R[t_L] - (Rs1 * Rs2);
12493 * * RV64:
12494 * Rd = Rd - (Rs1.W[0] u* Rs2.W[0]) - (Rs1.W[1] u* Rs2.W[1]);
12495 * ~~~
12496 *
12497 * \param [in] t unsigned long long type of value stored in t
12498 * \param [in] a unsigned long type of value stored in a
12499 * \param [in] b unsigned long type of value stored in b
12500 * \return value stored in unsigned long long type
12501 */
__RV_UMSR64(unsigned long long t,unsigned long a,unsigned long b)12502 __STATIC_FORCEINLINE unsigned long long __RV_UMSR64(unsigned long long t, unsigned long a, unsigned long b)
12503 {
12504 __ASM volatile("umsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
12505 return t;
12506 }
12507 /* ===== Inline Function End for 3.171. UMSR64 ===== */
12508
12509 /* ===== Inline Function Start for 3.172.1. UMUL8 ===== */
12510 /**
12511 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
12512 * \brief UMUL8 (SIMD Unsigned 8-bit Multiply)
12513 * \details
12514 * **Type**: SIMD
12515 *
12516 * **Syntax**:\n
12517 * ~~~
12518 * UMUL8 Rd, Rs1, Rs2
12519 * UMULX8 Rd, Rs1, Rs2
12520 * ~~~
12521 *
12522 * **Purpose**:\n
12523 * Do unsigned 8-bit multiplications and generate four 16-bit results simultaneously.
12524 *
12525 * **RV32 Description**:\n
12526 * For the `UMUL8` instruction, multiply the unsigned 8-bit data elements of Rs1
12527 * with the corresponding unsigned 8-bit data elements of Rs2.
12528 * For the `UMULX8` instruction, multiply the first and second unsigned 8-bit data elements of Rs1
12529 * with the second and first unsigned 8-bit data elements of Rs2. At the same time, multiply the third
12530 * and fourth unsigned 8-bit data elements of Rs1 with the fourth and third unsigned 8-bit data
12531 * elements of Rs2.
12532 * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1).
12533 * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
12534 * includes register 2d and 2d+1.
12535 * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of
12536 * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom
12537 * part of Rs1.
12538 *
12539 * **RV64 Description**:\n
12540 * For the `UMUL8` instruction, multiply the unsigned 8-bit data elements of Rs1
12541 * with the corresponding unsigned 8-bit data elements of Rs2.
12542 * For the `UMULX8` instruction, multiply the first and second unsigned 8-bit data elements of Rs1
12543 * with the second and first unsigned 8-bit data elements of Rs2. At the same time, multiply the third
12544 * and fourth unsigned 8-bit data elements of Rs1 with the fourth and third unsigned 8-bit data
12545 * elements of Rs2.
12546 * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results
12547 * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from
12548 * the bottom part of Rs1.
12549 *
12550 * **Operations**:\n
12551 * ~~~
12552 * * RV32:
12553 * if (is `UMUL8`) {
12554 * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
12555 * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
12556 * } else if (is `UMULX8`) {
12557 * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
12558 * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
12559 * }
12560 * rest[x/2] = op1t[x/2] u* op2t[x/2];
12561 * resb[x/2] = op1b[x/2] u* op2b[x/2];
12562 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
12563 * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1];
12564 * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0];
12565 * x = 0 and 2
12566 * * RV64:
12567 * if (is `UMUL8`) {
12568 * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
12569 * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
12570 * } else if (is `UMULX8`) {
12571 * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
12572 * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
12573 * }
12574 * rest[x/2] = op1t[x/2] u* op2t[x/2];
12575 * resb[x/2] = op1b[x/2] u* op2b[x/2];
12576 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
12577 * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1];
12578 * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0]; x = 0 and 2
12579 * ~~~
12580 *
12581 * \param [in] a unsigned int type of value stored in a
12582 * \param [in] b unsigned int type of value stored in b
12583 * \return value stored in unsigned long long type
12584 */
__RV_UMUL8(unsigned int a,unsigned int b)12585 __STATIC_FORCEINLINE unsigned long long __RV_UMUL8(unsigned int a, unsigned int b)
12586 {
12587 register unsigned long long result;
12588 __ASM volatile("umul8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12589 return result;
12590 }
12591 /* ===== Inline Function End for 3.172.1. UMUL8 ===== */
12592
12593 /* ===== Inline Function Start for 3.172.2. UMULX8 ===== */
12594 /**
12595 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
12596 * \brief UMULX8 (SIMD Unsigned Crossed 8-bit Multiply)
12597 * \details
12598 * **Type**: SIMD
12599 *
12600 * **Syntax**:\n
12601 * ~~~
12602 * UMUL8 Rd, Rs1, Rs2
12603 * UMULX8 Rd, Rs1, Rs2
12604 * ~~~
12605 *
12606 * **Purpose**:\n
12607 * Do unsigned 8-bit multiplications and generate four 16-bit results simultaneously.
12608 *
12609 * **RV32 Description**:\n
12610 * For the `UMUL8` instruction, multiply the unsigned 8-bit data elements of Rs1
12611 * with the corresponding unsigned 8-bit data elements of Rs2.
12612 * For the `UMULX8` instruction, multiply the first and second unsigned 8-bit data elements of Rs1
12613 * with the second and first unsigned 8-bit data elements of Rs2. At the same time, multiply the third
12614 * and fourth unsigned 8-bit data elements of Rs1 with the fourth and third unsigned 8-bit data
12615 * elements of Rs2.
12616 * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1).
12617 * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
12618 * includes register 2d and 2d+1.
12619 * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of
12620 * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom
12621 * part of Rs1.
12622 *
12623 * **RV64 Description**:\n
12624 * For the `UMUL8` instruction, multiply the unsigned 8-bit data elements of Rs1
12625 * with the corresponding unsigned 8-bit data elements of Rs2.
12626 * For the `UMULX8` instruction, multiply the first and second unsigned 8-bit data elements of Rs1
12627 * with the second and first unsigned 8-bit data elements of Rs2. At the same time, multiply the third
12628 * and fourth unsigned 8-bit data elements of Rs1 with the fourth and third unsigned 8-bit data
12629 * elements of Rs2.
12630 * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results
12631 * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from
12632 * the bottom part of Rs1.
12633 *
12634 * **Operations**:\n
12635 * ~~~
12636 * * RV32:
12637 * if (is `UMUL8`) {
12638 * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
12639 * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
12640 * } else if (is `UMULX8`) {
12641 * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
12642 * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
12643 * }
12644 * rest[x/2] = op1t[x/2] u* op2t[x/2];
12645 * resb[x/2] = op1b[x/2] u* op2b[x/2];
12646 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
12647 * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1];
12648 * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0];
12649 * x = 0 and 2
12650 * * RV64:
12651 * if (is `UMUL8`) {
12652 * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
12653 * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
12654 * } else if (is `UMULX8`) {
12655 * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
12656 * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
12657 * }
12658 * rest[x/2] = op1t[x/2] u* op2t[x/2];
12659 * resb[x/2] = op1b[x/2] u* op2b[x/2];
12660 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
12661 * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1];
12662 * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0]; x = 0 and 2
12663 * ~~~
12664 *
12665 * \param [in] a unsigned int type of value stored in a
12666 * \param [in] b unsigned int type of value stored in b
12667 * \return value stored in unsigned long long type
12668 */
__RV_UMULX8(unsigned int a,unsigned int b)12669 __STATIC_FORCEINLINE unsigned long long __RV_UMULX8(unsigned int a, unsigned int b)
12670 {
12671 register unsigned long long result;
12672 __ASM volatile("umulx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12673 return result;
12674 }
12675 /* ===== Inline Function End for 3.172.2. UMULX8 ===== */
12676
12677 /* ===== Inline Function Start for 3.173.1. UMUL16 ===== */
12678 /**
12679 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
12680 * \brief UMUL16 (SIMD Unsigned 16-bit Multiply)
12681 * \details
12682 * **Type**: SIMD
12683 *
12684 * **Syntax**:\n
12685 * ~~~
12686 * UMUL16 Rd, Rs1, Rs2
12687 * UMULX16 Rd, Rs1, Rs2
12688 * ~~~
12689 *
12690 * **Purpose**:\n
12691 * Do unsigned 16-bit multiplications and generate two 32-bit results simultaneously.
12692 *
12693 * **RV32 Description**:\n
12694 * For the `UMUL16` instruction, multiply the top 16-bit U16 content of Rs1 with
12695 * the top 16-bit U16 content of Rs2. At the same time, multiply the bottom 16-bit U16 content of Rs1
12696 * with the bottom 16-bit U16 content of Rs2.
12697 * For the `UMULX16` instruction, multiply the top 16-bit U16 content of Rs1 with the bottom 16-bit
12698 * U16 content of Rs2. At the same time, multiply the bottom 16-bit U16 content of Rs1 with the top 16-
12699 * bit U16 content of Rs2.
12700 * The two U32 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1),
12701 * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes
12702 * register 2d and 2d+1.
12703 * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and
12704 * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1.
12705 *
12706 * **RV64 Description**:\n
12707 * For the `UMUL16` instruction, multiply the top 16-bit U16 content of the lower
12708 * 32-bit word in Rs1 with the top 16-bit U16 content of the lower 32-bit word in Rs2. At the same time,
12709 * multiply the bottom 16-bit U16 content of the lower 32-bit word in Rs1 with the bottom 16-bit U16
12710 * content of the lower 32-bit word in Rs2.
12711 * For the `UMULX16` instruction, multiply the top 16-bit U16 content of the lower 32-bit word in Rs1
12712 * with the bottom 16-bit U16 content of the lower 32-bit word in Rs2. At the same time, multiply the
12713 * bottom 16-bit U16 content of the lower 32-bit word in Rs1 with the top 16-bit U16 content of the
12714 * lower 32-bit word in Rs2.
12715 * The two 32-bit U32 results are then written into Rd. The result calculated from the top 16-bit of the
12716 * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of
12717 * the lower 32-bit word in Rs1 is written to Rd.W[0]
12718 *
12719 * **Operations**:\n
12720 * ~~~
12721 * * RV32:
12722 * if (is `UMUL16`) {
12723 * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
12724 * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
12725 * } else if (is `UMULX16`) {
12726 * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
12727 * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
12728 * }
12729 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
12730 * res = aop u* bop;
12731 * }
12732 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
12733 * R[t_H] = rest;
12734 * R[t_L] = resb;
12735 * * RV64:
12736 * if (is `UMUL16`) {
12737 * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
12738 * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
12739 * } else if (is `UMULX16`) {
12740 * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
12741 * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
12742 * }
12743 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
12744 * res = aop u* bop;
12745 * }
12746 * Rd.W[1] = rest;
12747 * Rd.W[0] = resb;
12748 * ~~~
12749 *
12750 * \param [in] a unsigned int type of value stored in a
12751 * \param [in] b unsigned int type of value stored in b
12752 * \return value stored in unsigned long long type
12753 */
__RV_UMUL16(unsigned int a,unsigned int b)12754 __STATIC_FORCEINLINE unsigned long long __RV_UMUL16(unsigned int a, unsigned int b)
12755 {
12756 register unsigned long long result;
12757 __ASM volatile("umul16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12758 return result;
12759 }
12760 /* ===== Inline Function End for 3.173.1. UMUL16 ===== */
12761
12762 /* ===== Inline Function Start for 3.173.2. UMULX16 ===== */
12763 /**
12764 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
12765 * \brief UMULX16 (SIMD Unsigned Crossed 16-bit Multiply)
12766 * \details
12767 * **Type**: SIMD
12768 *
12769 * **Syntax**:\n
12770 * ~~~
12771 * UMUL16 Rd, Rs1, Rs2
12772 * UMULX16 Rd, Rs1, Rs2
12773 * ~~~
12774 *
12775 * **Purpose**:\n
12776 * Do unsigned 16-bit multiplications and generate two 32-bit results simultaneously.
12777 *
12778 * **RV32 Description**:\n
12779 * For the `UMUL16` instruction, multiply the top 16-bit U16 content of Rs1 with
12780 * the top 16-bit U16 content of Rs2. At the same time, multiply the bottom 16-bit U16 content of Rs1
12781 * with the bottom 16-bit U16 content of Rs2.
12782 * For the `UMULX16` instruction, multiply the top 16-bit U16 content of Rs1 with the bottom 16-bit
12783 * U16 content of Rs2. At the same time, multiply the bottom 16-bit U16 content of Rs1 with the top 16-
12784 * bit U16 content of Rs2.
12785 * The two U32 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1),
12786 * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes
12787 * register 2d and 2d+1.
12788 * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and
12789 * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1.
12790 *
12791 * **RV64 Description**:\n
12792 * For the `UMUL16` instruction, multiply the top 16-bit U16 content of the lower
12793 * 32-bit word in Rs1 with the top 16-bit U16 content of the lower 32-bit word in Rs2. At the same time,
12794 * multiply the bottom 16-bit U16 content of the lower 32-bit word in Rs1 with the bottom 16-bit U16
12795 * content of the lower 32-bit word in Rs2.
12796 * For the `UMULX16` instruction, multiply the top 16-bit U16 content of the lower 32-bit word in Rs1
12797 * with the bottom 16-bit U16 content of the lower 32-bit word in Rs2. At the same time, multiply the
12798 * bottom 16-bit U16 content of the lower 32-bit word in Rs1 with the top 16-bit U16 content of the
12799 * lower 32-bit word in Rs2.
12800 * The two 32-bit U32 results are then written into Rd. The result calculated from the top 16-bit of the
12801 * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of
12802 * the lower 32-bit word in Rs1 is written to Rd.W[0]
12803 *
12804 * **Operations**:\n
12805 * ~~~
12806 * * RV32:
12807 * if (is `UMUL16`) {
12808 * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
12809 * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
12810 * } else if (is `UMULX16`) {
12811 * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
12812 * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
12813 * }
12814 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
12815 * res = aop u* bop;
12816 * }
12817 * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
12818 * R[t_H] = rest;
12819 * R[t_L] = resb;
12820 * * RV64:
12821 * if (is `UMUL16`) {
12822 * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
12823 * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
12824 * } else if (is `UMULX16`) {
12825 * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
12826 * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
12827 * }
12828 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
12829 * res = aop u* bop;
12830 * }
12831 * Rd.W[1] = rest;
12832 * Rd.W[0] = resb;
12833 * ~~~
12834 *
12835 * \param [in] a unsigned int type of value stored in a
12836 * \param [in] b unsigned int type of value stored in b
12837 * \return value stored in unsigned long long type
12838 */
__RV_UMULX16(unsigned int a,unsigned int b)12839 __STATIC_FORCEINLINE unsigned long long __RV_UMULX16(unsigned int a, unsigned int b)
12840 {
12841 register unsigned long long result;
12842 __ASM volatile("umulx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12843 return result;
12844 }
12845 /* ===== Inline Function End for 3.173.2. UMULX16 ===== */
12846
12847 /* ===== Inline Function Start for 3.174. URADD8 ===== */
12848 /**
12849 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
12850 * \brief URADD8 (SIMD 8-bit Unsigned Halving Addition)
12851 * \details
12852 * **Type**: SIMD
12853 *
12854 * **Syntax**:\n
12855 * ~~~
12856 * URADD8 Rd, Rs1, Rs2
12857 * ~~~
12858 *
12859 * **Purpose**:\n
12860 * Do 8-bit unsigned integer element additions simultaneously. The results are halved to
12861 * avoid overflow or saturation.
12862 *
12863 * **Description**:\n
12864 * This instruction adds the 8-bit unsigned integer elements in Rs1 with the 8-bit
12865 * unsigned integer elements in Rs2. The results are first logically right-shifted by 1 bit and then
12866 * written to Rd.
12867 *
12868 * **Examples**:\n
12869 * ~~~
12870 * * Ra = 0x7F, Rb = 0x7F, Rt = 0x7F
12871 * * Ra = 0x80, Rb = 0x80, Rt = 0x80
12872 * * Ra = 0x40, Rb = 0x80, Rt = 0x60
12873 * ~~~
12874 *
12875 * **Operations**:\n
12876 * ~~~
12877 * Rd.B[x] = (Rs1.B[x] + Rs2.B[x]) u>> 1;
12878 * for RV32: x=3...0,
12879 * for RV64: x=7...0
12880 * ~~~
12881 *
12882 * \param [in] a unsigned long type of value stored in a
12883 * \param [in] b unsigned long type of value stored in b
12884 * \return value stored in unsigned long type
12885 */
__RV_URADD8(unsigned long a,unsigned long b)12886 __STATIC_FORCEINLINE unsigned long __RV_URADD8(unsigned long a, unsigned long b)
12887 {
12888 register unsigned long result;
12889 __ASM volatile("uradd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12890 return result;
12891 }
12892 /* ===== Inline Function End for 3.174. URADD8 ===== */
12893
12894 /* ===== Inline Function Start for 3.175. URADD16 ===== */
12895 /**
12896 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
12897 * \brief URADD16 (SIMD 16-bit Unsigned Halving Addition)
12898 * \details
12899 * **Type**: SIMD
12900 *
12901 * **Syntax**:\n
12902 * ~~~
12903 * URADD16 Rd, Rs1, Rs2
12904 * ~~~
12905 *
12906 * **Purpose**:\n
12907 * Do 16-bit unsigned integer element additions simultaneously. The results are halved to
12908 * avoid overflow or saturation.
12909 *
12910 * **Description**:\n
12911 * This instruction adds the 16-bit unsigned integer elements in Rs1 with the 16-bit
12912 * unsigned integer elements in Rs2. The results are first logically right-shifted by 1 bit and then
12913 * written to Rd.
12914 *
12915 * **Examples**:\n
12916 * ~~~
12917 * * Ra = 0x7FFF, Rb = 0x7FFF Rt = 0x7FFF
12918 * * Ra = 0x8000, Rb = 0x8000 Rt = 0x8000
12919 * * Ra = 0x4000, Rb = 0x8000 Rt = 0x6000
12920 * ~~~
12921 *
12922 * **Operations**:\n
12923 * ~~~
12924 * Rd.H[x] = (Rs1.H[x] + Rs2.H[x]) u>> 1;
12925 * for RV32: x=1...0,
12926 * for RV64: x=3...0
12927 * ~~~
12928 *
12929 * \param [in] a unsigned long type of value stored in a
12930 * \param [in] b unsigned long type of value stored in b
12931 * \return value stored in unsigned long type
12932 */
__RV_URADD16(unsigned long a,unsigned long b)12933 __STATIC_FORCEINLINE unsigned long __RV_URADD16(unsigned long a, unsigned long b)
12934 {
12935 register unsigned long result;
12936 __ASM volatile("uradd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12937 return result;
12938 }
12939 /* ===== Inline Function End for 3.175. URADD16 ===== */
12940
12941 /* ===== Inline Function Start for 3.176. URADD64 ===== */
12942 /**
12943 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
12944 * \brief URADD64 (64-bit Unsigned Halving Addition)
12945 * \details
12946 * **Type**: DSP (64-bit Profile)
12947 *
12948 * **Syntax**:\n
12949 * ~~~
12950 * URADD64 Rd, Rs1, Rs2
12951 * ~~~
12952 *
12953 * **Purpose**:\n
12954 * Add two 64-bit unsigned integers. The result is halved to avoid overflow or saturation.
12955 *
12956 * **RV32 Description**:\n
12957 * This instruction adds the 64-bit unsigned integer of an even/odd pair of registers
12958 * specified by Rs1(4,1) with the 64-bit unsigned integer of an even/odd pair of registers specified by
12959 * Rs2(4,1). The 64-bit addition result is first logically right-shifted by 1 bit and then written to an
12960 * even/odd pair of registers specified by Rd(4,1).
12961 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
12962 * includes register 2d and 2d+1.
12963 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
12964 * of the pair contains the low 32-bit of the result.
12965 *
12966 * **RV64 Description**:\n
12967 * This instruction adds the 64-bit unsigned integer in Rs1 with the 64-bit unsigned
12968 * integer Rs2. The 64-bit addition result is first logically right-shifted by 1 bit and then written to Rd.
12969 *
12970 * **Operations**:\n
12971 * ~~~
12972 * * RV32:
12973 * t_L = CONCAT(Rt(4,1),1'b0); t_H = CONCAT(Rt(4,1),1'b1);
12974 * a_L = CONCAT(Ra(4,1),1'b0); a_H = CONCAT(Ra(4,1),1'b1);
12975 * b_L = CONCAT(Rb(4,1),1'b0); b_H = CONCAT(Rb(4,1),1'b1);
12976 * R[t_H].R[t_L] = (R[a_H].R[a_L] + R[b_H].R[b_L]) u>> 1;
12977 * * RV64:
12978 * Rd = (Rs1 + Rs2) u>> 1;
12979 * ~~~
12980 *
12981 * \param [in] a unsigned long long type of value stored in a
12982 * \param [in] b unsigned long long type of value stored in b
12983 * \return value stored in unsigned long long type
12984 */
__RV_URADD64(unsigned long long a,unsigned long long b)12985 __STATIC_FORCEINLINE unsigned long long __RV_URADD64(unsigned long long a, unsigned long long b)
12986 {
12987 register unsigned long long result;
12988 __ASM volatile("uradd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
12989 return result;
12990 }
12991 /* ===== Inline Function End for 3.176. URADD64 ===== */
12992
12993 /* ===== Inline Function Start for 3.177. URADDW ===== */
12994 /**
12995 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
12996 * \brief URADDW (32-bit Unsigned Halving Addition)
12997 * \details
12998 * **Type**: DSP
12999 *
13000 * **Syntax**:\n
13001 * ~~~
13002 * URADDW Rd, Rs1, Rs2
13003 * ~~~
13004 *
13005 * **Purpose**:\n
13006 * Add 32-bit unsigned integers and the results are halved to avoid overflow or saturation.
13007 *
13008 * **Description**:\n
13009 * This instruction adds the first 32-bit unsigned integer in Rs1 with the first 32-bit
13010 * unsigned integer in Rs2. The result is first logically right-shifted by 1 bit and then sign-extended and
13011 * written to Rd.
13012 *
13013 * **Examples**:\n
13014 * ~~~
13015 * * Ra = 0x7FFFFFFF, Rb = 0x7FFFFFFF Rt = 0x7FFFFFFF
13016 * * Ra = 0x80000000, Rb = 0x80000000 Rt = 0x80000000
13017 * * Ra = 0x40000000, Rb = 0x80000000 Rt = 0x60000000
13018 * ~~~
13019 *
13020 * **Operations**:\n
13021 * ~~~
13022 * * RV32:
13023 * Rd[31:0] = (Rs1[31:0] + Rs2[31:0]) u>> 1;
13024 * * RV64:
13025 * resw[31:0] = (Rs1[31:0] + Rs2[31:0]) u>> 1;
13026 * Rd[63:0] = SE(resw[31:0]);
13027 * ~~~
13028 *
13029 * \param [in] a unsigned int type of value stored in a
13030 * \param [in] b unsigned int type of value stored in b
13031 * \return value stored in unsigned long type
13032 */
__RV_URADDW(unsigned int a,unsigned int b)13033 __STATIC_FORCEINLINE unsigned long __RV_URADDW(unsigned int a, unsigned int b)
13034 {
13035 register unsigned long result;
13036 __ASM volatile("uraddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13037 return result;
13038 }
13039 /* ===== Inline Function End for 3.177. URADDW ===== */
13040
13041 /* ===== Inline Function Start for 3.178. URCRAS16 ===== */
13042 /**
13043 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
13044 * \brief URCRAS16 (SIMD 16-bit Unsigned Halving Cross Addition & Subtraction)
13045 * \details
13046 * **Type**: SIMD
13047 *
13048 * **Syntax**:\n
13049 * ~~~
13050 * URCRAS16 Rd, Rs1, Rs2
13051 * ~~~
13052 *
13053 * **Purpose**:\n
13054 * Do 16-bit unsigned integer element addition and 16-bit unsigned integer element
13055 * subtraction in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
13056 * The results are halved to avoid overflow or saturation.
13057 *
13058 * **Description**:\n
13059 * This instruction adds the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1
13060 * with the 16-bit unsigned integer in [15:0] of 32-bit chunks in Rs2, and subtracts the 16-bit unsigned
13061 * integer in [31:16] of 32-bit chunks in Rs2 from the 16-bit unsigned integer in [15:0] of 32-bit chunks
13062 * in Rs1. The element results are first logically right-shifted by 1 bit and then written to [31:16] of 32-
13063 * bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
13064 *
13065 * **Examples**:\n
13066 * ~~~
13067 * Please see `URADD16` and `URSUB16` instructions.
13068 * ~~~
13069 *
13070 * **Operations**:\n
13071 * ~~~
13072 * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][15:0]) u>> 1;
13073 * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][31:16]) u>> 1;
13074 * for RV32, x=0
13075 * for RV64, x=1...0
13076 * ~~~
13077 *
13078 * \param [in] a unsigned long type of value stored in a
13079 * \param [in] b unsigned long type of value stored in b
13080 * \return value stored in unsigned long type
13081 */
__RV_URCRAS16(unsigned long a,unsigned long b)13082 __STATIC_FORCEINLINE unsigned long __RV_URCRAS16(unsigned long a, unsigned long b)
13083 {
13084 register unsigned long result;
13085 __ASM volatile("urcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13086 return result;
13087 }
13088 /* ===== Inline Function End for 3.178. URCRAS16 ===== */
13089
13090 /* ===== Inline Function Start for 3.179. URCRSA16 ===== */
13091 /**
13092 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
13093 * \brief URCRSA16 (SIMD 16-bit Unsigned Halving Cross Subtraction & Addition)
13094 * \details
13095 * **Type**: SIMD
13096 *
13097 * **Syntax**:\n
13098 * ~~~
13099 * URCRSA16 Rd, Rs1, Rs2
13100 * ~~~
13101 *
13102 * **Purpose**:\n
13103 * Do 16-bit unsigned integer element subtraction and 16-bit unsigned integer element
13104 * addition in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
13105 * The results are halved to avoid overflow or saturation.
13106 *
13107 * **Description**:\n
13108 * This instruction subtracts the 16-bit unsigned integer in [15:0] of 32-bit chunks in Rs2
13109 * from the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit unsigned
13110 * integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit unsigned integer in [31:16] of 32-bit chunks
13111 * in Rs2. The two results are first logically right-shifted by 1 bit and then written to [31:16] of 32-bit
13112 * chunks in Rd and [15:0] of 32-bit chunks in Rd.
13113 *
13114 * **Examples**:\n
13115 * ~~~
13116 * Please see `URADD16` and `URSUB16` instructions.
13117 * ~~~
13118 *
13119 * **Operations**:\n
13120 * ~~~
13121 * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][15:0]) u>> 1;
13122 * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][31:16]) u>> 1;
13123 * for RV32, x=0
13124 * for RV64, x=1...0
13125 * ~~~
13126 *
13127 * \param [in] a unsigned long type of value stored in a
13128 * \param [in] b unsigned long type of value stored in b
13129 * \return value stored in unsigned long type
13130 */
__RV_URCRSA16(unsigned long a,unsigned long b)13131 __STATIC_FORCEINLINE unsigned long __RV_URCRSA16(unsigned long a, unsigned long b)
13132 {
13133 register unsigned long result;
13134 __ASM volatile("urcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13135 return result;
13136 }
13137 /* ===== Inline Function End for 3.179. URCRSA16 ===== */
13138
13139 /* ===== Inline Function Start for 3.180. URSTAS16 ===== */
13140 /**
13141 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
13142 * \brief URSTAS16 (SIMD 16-bit Unsigned Halving Straight Addition & Subtraction)
13143 * \details
13144 * **Type**: SIMD
13145 *
13146 * **Syntax**:\n
13147 * ~~~
13148 * URSTAS16 Rd, Rs1, Rs2
13149 * ~~~
13150 *
13151 * **Purpose**:\n
13152 * Do 16-bit unsigned integer element addition and 16-bit unsigned integer element
13153 * subtraction in a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit
13154 * chunks. The results are halved to avoid overflow or saturation.
13155 *
13156 * **Description**:\n
13157 * This instruction adds the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1
13158 * with the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs2, and subtracts the 16-bit unsigned
13159 * integer in [15:0] of 32-bit chunks in Rs2 from the 16-bit unsigned integer in [15:0] of 32-bit chunks
13160 * in Rs1. The element results are first logically right-shifted by 1 bit and then written to [31:16] of 32-
13161 * bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
13162 *
13163 * **Examples**:\n
13164 * ~~~
13165 * Please see `URADD16` and `URSUB16` instructions.
13166 * ~~~
13167 *
13168 * **Operations**:\n
13169 * ~~~
13170 * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][31:16]) u>> 1;
13171 * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][15:0]) u>> 1;
13172 * for RV32, x=0
13173 * for RV64, x=1...0
13174 * ~~~
13175 *
13176 * \param [in] a unsigned long type of value stored in a
13177 * \param [in] b unsigned long type of value stored in b
13178 * \return value stored in unsigned long type
13179 */
__RV_URSTAS16(unsigned long a,unsigned long b)13180 __STATIC_FORCEINLINE unsigned long __RV_URSTAS16(unsigned long a, unsigned long b)
13181 {
13182 register unsigned long result;
13183 __ASM volatile("urstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13184 return result;
13185 }
13186 /* ===== Inline Function End for 3.180. URSTAS16 ===== */
13187
13188 /* ===== Inline Function Start for 3.181. URSTSA16 ===== */
13189 /**
13190 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
13191 * \brief URSTSA16 (SIMD 16-bit Unsigned Halving Straight Subtraction & Addition)
13192 * \details
13193 * **Type**: SIMD
13194 *
13195 * **Syntax**:\n
13196 * ~~~
13197 * URCRSA16 Rd, Rs1, Rs2
13198 * ~~~
13199 *
13200 * **Purpose**:\n
13201 * Do 16-bit unsigned integer element subtraction and 16-bit unsigned integer element
13202 * addition in a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit
13203 * chunks. The results are halved to avoid overflow or saturation.
13204 *
13205 * **Description**:\n
13206 * This instruction subtracts the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs2
13207 * from the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit unsigned
13208 * integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit unsigned integer in [15:0] of 32-bit chunks in
13209 * Rs2. The two results are first logically right-shifted by 1 bit and then written to [31:16] of 32-bit
13210 * chunks in Rd and [15:0] of 32-bit chunks in Rd.
13211 *
13212 * **Examples**:\n
13213 * ~~~
13214 * Please see `URADD16` and `URSUB16` instructions.
13215 * ~~~
13216 *
13217 * **Operations**:\n
13218 * ~~~
13219 * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][31:16]) u>> 1;
13220 * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][15:0]) u>> 1;
13221 * for RV32, x=0
13222 * for RV64, x=1...0
13223 * ~~~
13224 *
13225 * \param [in] a unsigned long type of value stored in a
13226 * \param [in] b unsigned long type of value stored in b
13227 * \return value stored in unsigned long type
13228 */
__RV_URSTSA16(unsigned long a,unsigned long b)13229 __STATIC_FORCEINLINE unsigned long __RV_URSTSA16(unsigned long a, unsigned long b)
13230 {
13231 register unsigned long result;
13232 __ASM volatile("urstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13233 return result;
13234 }
13235 /* ===== Inline Function End for 3.181. URSTSA16 ===== */
13236
13237 /* ===== Inline Function Start for 3.182. URSUB8 ===== */
13238 /**
13239 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
13240 * \brief URSUB8 (SIMD 8-bit Unsigned Halving Subtraction)
13241 * \details
13242 * **Type**: SIMD
13243 *
13244 * **Syntax**:\n
13245 * ~~~
13246 * URSUB8 Rd, Rs1, Rs2
13247 * ~~~
13248 *
13249 * **Purpose**:\n
13250 * Do 8-bit unsigned integer element subtractions simultaneously. The results are halved to
13251 * avoid overflow or saturation.
13252 *
13253 * **Description**:\n
13254 * This instruction subtracts the 8-bit unsigned integer elements in Rs2 from the 8-bit
13255 * unsigned integer elements in Rs1. The results are first logically right-shifted by 1 bit and then
13256 * written to Rd.
13257 *
13258 * **Examples**:\n
13259 * ~~~
13260 * * Ra = 0x7F, Rb = 0x80 Rt = 0xFF
13261 * * Ra = 0x80, Rb = 0x7F Rt = 0x00
13262 * * Ra = 0x80, Rb = 0x40 Rt = 0x20
13263 * ~~~
13264 *
13265 * **Operations**:\n
13266 * ~~~
13267 * Rd.B[x] = (Rs1.B[x] - Rs2.B[x]) u>> 1;
13268 * for RV32: x=3...0,
13269 * for RV64: x=7...0
13270 * ~~~
13271 *
13272 * \param [in] a unsigned long type of value stored in a
13273 * \param [in] b unsigned long type of value stored in b
13274 * \return value stored in unsigned long type
13275 */
__RV_URSUB8(unsigned long a,unsigned long b)13276 __STATIC_FORCEINLINE unsigned long __RV_URSUB8(unsigned long a, unsigned long b)
13277 {
13278 register unsigned long result;
13279 __ASM volatile("ursub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13280 return result;
13281 }
13282 /* ===== Inline Function End for 3.182. URSUB8 ===== */
13283
13284 /* ===== Inline Function Start for 3.183. URSUB16 ===== */
13285 /**
13286 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
13287 * \brief URSUB16 (SIMD 16-bit Unsigned Halving Subtraction)
13288 * \details
13289 * **Type**: SIMD
13290 *
13291 * **Syntax**:\n
13292 * ~~~
13293 * URSUB16 Rd, Rs1, Rs2
13294 * ~~~
13295 *
13296 * **Purpose**:\n
13297 * Do 16-bit unsigned integer element subtractions simultaneously. The results are halved to
13298 * avoid overflow or saturation.
13299 *
13300 * **Description**:\n
13301 * This instruction subtracts the 16-bit unsigned integer elements in Rs2 from the 16-bit
13302 * unsigned integer elements in Rs1. The results are first logically right-shifted by 1 bit and then
13303 * written to Rd.
13304 *
13305 * **Examples**:\n
13306 * ~~~
13307 * * Ra = 0x7FFF, Rb = 0x8000 Rt = 0xFFFF
13308 * * Ra = 0x8000, Rb = 0x7FFF Rt = 0x0000
13309 * * Ra = 0x8000, Rb = 0x4000 Rt = 0x2000
13310 * ~~~
13311 *
13312 * **Operations**:\n
13313 * ~~~
13314 * Rd.H[x] = (Rs1.H[x] - Rs2.H[x]) u>> 1;
13315 * for RV32: x=1...0,
13316 * for RV64: x=3...0
13317 * ~~~
13318 *
13319 * \param [in] a unsigned long type of value stored in a
13320 * \param [in] b unsigned long type of value stored in b
13321 * \return value stored in unsigned long type
13322 */
__RV_URSUB16(unsigned long a,unsigned long b)13323 __STATIC_FORCEINLINE unsigned long __RV_URSUB16(unsigned long a, unsigned long b)
13324 {
13325 register unsigned long result;
13326 __ASM volatile("ursub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13327 return result;
13328 }
13329 /* ===== Inline Function End for 3.183. URSUB16 ===== */
13330
13331 /* ===== Inline Function Start for 3.184. URSUB64 ===== */
13332 /**
13333 * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
13334 * \brief URSUB64 (64-bit Unsigned Halving Subtraction)
13335 * \details
13336 * **Type**: DSP (64-bit Profile)
13337 *
13338 * **Syntax**:\n
13339 * ~~~
13340 * URSUB64 Rd, Rs1, Rs2
13341 * ~~~
13342 *
13343 * **Purpose**:\n
13344 * Perform a 64-bit unsigned integer subtraction. The result is halved to avoid overflow or
13345 * saturation.
13346 *
13347 * **RV32 Description**:\n
13348 * This instruction subtracts the 64-bit unsigned integer of an even/odd pair of
13349 * registers specified by Rs2(4,1) from the 64-bit unsigned integer of an even/odd pair of registers
13350 * specified by Rs1(4,1). The subtraction result is first logically right-shifted by 1 bit and then written
13351 * to an even/odd pair of registers specified by Rd(4,1).
13352 * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
13353 * includes register 2d and 2d+1.
13354 * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
13355 * of the pair contains the low 32-bit of the result.
13356 *
13357 * **RV64 Description**:\n
13358 * This instruction subtracts the 64-bit unsigned integer in Rs2 from the 64-bit
13359 * unsigned integer in Rs1. The subtraction result is first logically right-shifted by 1 bit and then
13360 * written to Rd.
13361 *
13362 * **Operations**:\n
13363 * ~~~
13364 * * RV32:
13365 * t_L = CONCAT(Rt(4,1),1'b0); t_H = CONCAT(Rt(4,1),1'b1);
13366 * a_L = CONCAT(Ra(4,1),1'b0); a_H = CONCAT(Ra(4,1),1'b1);
13367 * b_L = CONCAT(Rb(4,1),1'b0); b_H = CONCAT(Rb(4,1),1'b1);
13368 * R[t_H].R[t_L] = (R[a_H].R[a_L] - R[b_H].R[b_L]) u>> 1;
13369 * * RV64:
13370 * Rd = (Rs1 - Rs2) u>> 1;
13371 * ~~~
13372 *
13373 * \param [in] a unsigned long long type of value stored in a
13374 * \param [in] b unsigned long long type of value stored in b
13375 * \return value stored in unsigned long long type
13376 */
__RV_URSUB64(unsigned long long a,unsigned long long b)13377 __STATIC_FORCEINLINE unsigned long long __RV_URSUB64(unsigned long long a, unsigned long long b)
13378 {
13379 register unsigned long long result;
13380 __ASM volatile("ursub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13381 return result;
13382 }
13383 /* ===== Inline Function End for 3.184. URSUB64 ===== */
13384
13385 /* ===== Inline Function Start for 3.185. URSUBW ===== */
13386 /**
13387 * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
13388 * \brief URSUBW (32-bit Unsigned Halving Subtraction)
13389 * \details
13390 * **Type**: DSP
13391 *
13392 * **Syntax**:\n
13393 * ~~~
13394 * URSUBW Rd, Rs1, Rs2
13395 * ~~~
13396 *
13397 * **Purpose**:\n
13398 * Subtract 32-bit unsigned integers and the result is halved to avoid overflow or saturation.
13399 *
13400 * **Description**:\n
13401 * This instruction subtracts the first 32-bit signed integer in Rs2 from the first 32-bit
13402 * signed integer in Rs1. The result is first logically right-shifted by 1 bit and then sign-extended and
13403 * written to Rd.
13404 *
13405 * **Examples**:\n
13406 * ~~~
13407 * * Ra = 0x7FFFFFFF, Rb = 0x80000000 Rt = 0xFFFFFFFF
13408 * * Ra = 0x80000000, Rb = 0x7FFFFFFF Rt = 0x00000000
13409 * * Ra = 0x80000000, Rb = 0x40000000 Rt = 0x20000000
13410 * ~~~
13411 *
13412 * **Operations**:\n
13413 * ~~~
13414 * * RV32:
13415 * Rd[31:0] = (Rs1[31:0] - Rs2[31:0]) u>> 1;
13416 * * RV64:
13417 * resw[31:0] = (Rs1[31:0] - Rs2[31:0]) u>> 1;
13418 * Rd[63:0] = SE(resw[31:0]);
13419 * ~~~
13420 *
13421 * \param [in] a unsigned int type of value stored in a
13422 * \param [in] b unsigned int type of value stored in b
13423 * \return value stored in unsigned long type
13424 */
__RV_URSUBW(unsigned int a,unsigned int b)13425 __STATIC_FORCEINLINE unsigned long __RV_URSUBW(unsigned int a, unsigned int b)
13426 {
13427 register unsigned long result;
13428 __ASM volatile("ursubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13429 return result;
13430 }
13431 /* ===== Inline Function End for 3.185. URSUBW ===== */
13432
13433 /* ===== Inline Function Start for 3.186. WEXTI ===== */
13434 /**
13435 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
13436 * \brief WEXTI (Extract Word from 64-bit Immediate)
13437 * \details
13438 * **Type**: DSP
13439 *
13440 * **Syntax**:\n
13441 * ~~~
13442 * WEXTI Rd, Rs1, #LSBloc
13443 * ~~~
13444 *
13445 * **Purpose**:\n
13446 * Extract a 32-bit word from a 64-bit value stored in an even/odd pair of registers (RV32) or
13447 * a register (RV64) starting from a specified immediate LSB bit position.
13448 *
13449 * **RV32 Description**:\n
13450 * This instruction extracts a 32-bit word from a 64-bit value of an even/odd pair of registers specified
13451 * by Rs1(4,1) starting from a specified immediate LSB bit position, #LSBloc. The extracted word is
13452 * written to Rd.
13453 * Rs1(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register
13454 * pair includes register 2d and 2d+1.
13455 * The odd `2d+1` register of the pair contains the high 32-bit of the 64-bit value and the even `2d`
13456 * register of the pair contains the low 32-bit of the 64-bit value.
13457 *
13458 * **RV64 Description**:\n
13459 * This instruction extracts a 32-bit word from a 64-bit value in Rs1 starting from a specified
13460 * immediate LSB bit position, #LSBloc. The extracted word is sign-extended and written to lower 32-
13461 * bit of Rd.
13462 *
13463 * **Operations**:\n
13464 * ~~~
13465 * * RV32:
13466 * Idx0 = CONCAT(Rs1(4,1),1'b0); Idx1 = CONCAT(Rs2(4,1),1'b1);
13467 * src[63:0] = Concat(R[Idx1], R[Idx0]);
13468 * Rd = src[31+LSBloc:LSBloc];
13469 * * RV64:
13470 * ExtractW = Rs1[31+LSBloc:LSBloc];
13471 * Rd = SE(ExtractW)
13472 * ~~~
13473 *
13474 * \param [in] a long long type of value stored in a
13475 * \param [in] b unsigned int type of value stored in b
13476 * \return value stored in unsigned long type
13477 */
13478 #define __RV_WEXTI(a, b) \
13479 ({ \
13480 register unsigned long result; \
13481 register long long __a = (long long)(a); \
13482 __ASM volatile("wexti %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
13483 result; \
13484 })
13485 /* ===== Inline Function End for 3.186. WEXTI ===== */
13486
13487 /* ===== Inline Function Start for 3.187. WEXT ===== */
13488 /**
13489 * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
13490 * \brief WEXT (Extract Word from 64-bit)
13491 * \details
13492 * **Type**: DSP
13493 *
13494 * **Syntax**:\n
13495 * ~~~
13496 * WEXT Rd, Rs1, Rs2
13497 * ~~~
13498 *
13499 * **Purpose**:\n
13500 * Extract a 32-bit word from a 64-bit value stored in an even/odd pair of registers (RV32) or
13501 * a register (RV64) starting from a specified LSB bit position in a register.
13502 *
13503 * **RV32 Description**:\n
13504 * This instruction extracts a 32-bit word from a 64-bit value of an even/odd pair of registers specified
13505 * by Rs1(4,1) starting from a specified LSB bit position, specified in Rs2[4:0]. The extracted word is
13506 * written to Rd.
13507 * Rs1(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register
13508 * pair includes register 2d and 2d+1.
13509 * The odd `2d+1` register of the pair contains the high 32-bit of the 64-bit value and the even `2d`
13510 * register of the pair contains the low 32-bit of the 64-bit value.
13511 *
13512 * **Operations**:\n
13513 * ~~~
13514 * * RV32:
13515 * Idx0 = CONCAT(Rs1(4,1),1'b0); Idx1 = CONCAT(Rs1(4,1),1'b1);
13516 * src[63:0] = Concat(R[Idx1], R[Idx0]);
13517 * LSBloc = Rs2[4:0];
13518 * Rd = src[31+LSBloc:LSBloc];
13519 * * RV64:
13520 * LSBloc = Rs2[4:0];
13521 * ExtractW = Rs1[31+LSBloc:LSBloc];
13522 * Rd = SE(ExtractW)
13523 * ~~~
13524 *
13525 * \param [in] a long long type of value stored in a
13526 * \param [in] b unsigned int type of value stored in b
13527 * \return value stored in unsigned long type
13528 */
__RV_WEXT(long long a,unsigned int b)13529 __STATIC_FORCEINLINE unsigned long __RV_WEXT(long long a, unsigned int b)
13530 {
13531 register unsigned long result;
13532 __ASM volatile("wext %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13533 return result;
13534 }
13535 /* ===== Inline Function End for 3.187. WEXT ===== */
13536
13537 /* ===== Inline Function Start for 3.188.1. ZUNPKD810 ===== */
13538 /**
13539 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
13540 * \brief ZUNPKD810 (Unsigned Unpacking Bytes 1 & 0)
13541 * \details
13542 * **Type**: DSP
13543 *
13544 * **Syntax**:\n
13545 * ~~~
13546 * ZUNPKD8xy Rd, Rs1
13547 * xy = {10, 20, 30, 31, 32}
13548 * ~~~
13549 *
13550 * **Purpose**:\n
13551 * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
13552 * halfwords of 32-bit chunks in a register.
13553 *
13554 * **Description**:\n
13555 * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
13556 * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
13557 * chunks in Rd.
13558 *
13559 * **Operations**:\n
13560 * ~~~
13561 * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
13562 * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
13563 * // ZUNPKD810, x=1,y=0
13564 * // ZUNPKD820, x=2,y=0
13565 * // ZUNPKD830, x=3,y=0
13566 * // ZUNPKD831, x=3,y=1
13567 * // ZUNPKD832, x=3,y=2
13568 * for RV32: m=0,
13569 * for RV64: m=1...0
13570 * ~~~
13571 *
13572 * \param [in] a unsigned long type of value stored in a
13573 * \return value stored in unsigned long type
13574 */
__RV_ZUNPKD810(unsigned long a)13575 __STATIC_FORCEINLINE unsigned long __RV_ZUNPKD810(unsigned long a)
13576 {
13577 register unsigned long result;
13578 __ASM volatile("zunpkd810 %0, %1" : "=r"(result) : "r"(a));
13579 return result;
13580 }
13581 /* ===== Inline Function End for 3.188.1. ZUNPKD810 ===== */
13582
13583 /* ===== Inline Function Start for 3.188.2. ZUNPKD820 ===== */
13584 /**
13585 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
13586 * \brief ZUNPKD820 (Unsigned Unpacking Bytes 2 & 0)
13587 * \details
13588 * **Type**: DSP
13589 *
13590 * **Syntax**:\n
13591 * ~~~
13592 * ZUNPKD8xy Rd, Rs1
13593 * xy = {10, 20, 30, 31, 32}
13594 * ~~~
13595 *
13596 * **Purpose**:\n
13597 * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
13598 * halfwords of 32-bit chunks in a register.
13599 *
13600 * **Description**:\n
13601 * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
13602 * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
13603 * chunks in Rd.
13604 *
13605 * **Operations**:\n
13606 * ~~~
13607 * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
13608 * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
13609 * // ZUNPKD810, x=1,y=0
13610 * // ZUNPKD820, x=2,y=0
13611 * // ZUNPKD830, x=3,y=0
13612 * // ZUNPKD831, x=3,y=1
13613 * // ZUNPKD832, x=3,y=2
13614 * for RV32: m=0,
13615 * for RV64: m=1...0
13616 * ~~~
13617 *
13618 * \param [in] a unsigned long type of value stored in a
13619 * \return value stored in unsigned long type
13620 */
__RV_ZUNPKD820(unsigned long a)13621 __STATIC_FORCEINLINE unsigned long __RV_ZUNPKD820(unsigned long a)
13622 {
13623 register unsigned long result;
13624 __ASM volatile("zunpkd820 %0, %1" : "=r"(result) : "r"(a));
13625 return result;
13626 }
13627 /* ===== Inline Function End for 3.188.2. ZUNPKD820 ===== */
13628
13629 /* ===== Inline Function Start for 3.188.3. ZUNPKD830 ===== */
13630 /**
13631 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
13632 * \brief ZUNPKD830 (Unsigned Unpacking Bytes 3 & 0)
13633 * \details
13634 * **Type**: DSP
13635 *
13636 * **Syntax**:\n
13637 * ~~~
13638 * ZUNPKD8xy Rd, Rs1
13639 * xy = {10, 20, 30, 31, 32}
13640 * ~~~
13641 *
13642 * **Purpose**:\n
13643 * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
13644 * halfwords of 32-bit chunks in a register.
13645 *
13646 * **Description**:\n
13647 * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
13648 * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
13649 * chunks in Rd.
13650 *
13651 * **Operations**:\n
13652 * ~~~
13653 * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
13654 * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
13655 * // ZUNPKD810, x=1,y=0
13656 * // ZUNPKD820, x=2,y=0
13657 * // ZUNPKD830, x=3,y=0
13658 * // ZUNPKD831, x=3,y=1
13659 * // ZUNPKD832, x=3,y=2
13660 * for RV32: m=0,
13661 * for RV64: m=1...0
13662 * ~~~
13663 *
13664 * \param [in] a unsigned long type of value stored in a
13665 * \return value stored in unsigned long type
13666 */
__RV_ZUNPKD830(unsigned long a)13667 __STATIC_FORCEINLINE unsigned long __RV_ZUNPKD830(unsigned long a)
13668 {
13669 register unsigned long result;
13670 __ASM volatile("zunpkd830 %0, %1" : "=r"(result) : "r"(a));
13671 return result;
13672 }
13673 /* ===== Inline Function End for 3.188.3. ZUNPKD830 ===== */
13674
13675 /* ===== Inline Function Start for 3.188.4. ZUNPKD831 ===== */
13676 /**
13677 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
13678 * \brief ZUNPKD831 (Unsigned Unpacking Bytes 3 & 1)
13679 * \details
13680 * **Type**: DSP
13681 *
13682 * **Syntax**:\n
13683 * ~~~
13684 * ZUNPKD8xy Rd, Rs1
13685 * xy = {10, 20, 30, 31, 32}
13686 * ~~~
13687 *
13688 * **Purpose**:\n
13689 * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
13690 * halfwords of 32-bit chunks in a register.
13691 *
13692 * **Description**:\n
13693 * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
13694 * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
13695 * chunks in Rd.
13696 *
13697 * **Operations**:\n
13698 * ~~~
13699 * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
13700 * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
13701 * // ZUNPKD810, x=1,y=0
13702 * // ZUNPKD820, x=2,y=0
13703 * // ZUNPKD830, x=3,y=0
13704 * // ZUNPKD831, x=3,y=1
13705 * // ZUNPKD832, x=3,y=2
13706 * for RV32: m=0,
13707 * for RV64: m=1...0
13708 * ~~~
13709 *
13710 * \param [in] a unsigned long type of value stored in a
13711 * \return value stored in unsigned long type
13712 */
__RV_ZUNPKD831(unsigned long a)13713 __STATIC_FORCEINLINE unsigned long __RV_ZUNPKD831(unsigned long a)
13714 {
13715 register unsigned long result;
13716 __ASM volatile("zunpkd831 %0, %1" : "=r"(result) : "r"(a));
13717 return result;
13718 }
13719 /* ===== Inline Function End for 3.188.4. ZUNPKD831 ===== */
13720
13721 /* ===== Inline Function Start for 3.188.5. ZUNPKD832 ===== */
13722 /**
13723 * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
13724 * \brief ZUNPKD832 (Unsigned Unpacking Bytes 3 & 2)
13725 * \details
13726 * **Type**: DSP
13727 *
13728 * **Syntax**:\n
13729 * ~~~
13730 * ZUNPKD8xy Rd, Rs1
13731 * xy = {10, 20, 30, 31, 32}
13732 * ~~~
13733 *
13734 * **Purpose**:\n
13735 * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
13736 * halfwords of 32-bit chunks in a register.
13737 *
13738 * **Description**:\n
13739 * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
13740 * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
13741 * chunks in Rd.
13742 *
13743 * **Operations**:\n
13744 * ~~~
13745 * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
13746 * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
13747 * // ZUNPKD810, x=1,y=0
13748 * // ZUNPKD820, x=2,y=0
13749 * // ZUNPKD830, x=3,y=0
13750 * // ZUNPKD831, x=3,y=1
13751 * // ZUNPKD832, x=3,y=2
13752 * for RV32: m=0,
13753 * for RV64: m=1...0
13754 * ~~~
13755 *
13756 * \param [in] a unsigned long type of value stored in a
13757 * \return value stored in unsigned long type
13758 */
__RV_ZUNPKD832(unsigned long a)13759 __STATIC_FORCEINLINE unsigned long __RV_ZUNPKD832(unsigned long a)
13760 {
13761 register unsigned long result;
13762 __ASM volatile("zunpkd832 %0, %1" : "=r"(result) : "r"(a));
13763 return result;
13764 }
13765 /* ===== Inline Function End for 3.188.5. ZUNPKD832 ===== */
13766
13767 #if (__RISCV_XLEN == 64) || defined(__ONLY_FOR_DOXYGEN_DOCUMENT_GENERATION__)
13768
13769 /* ===== Inline Function Start for 4.1. ADD32 ===== */
13770 /**
13771 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
13772 * \brief ADD32 (SIMD 32-bit Addition)
13773 * \details
13774 * **Type**: SIMD (RV64 Only)
13775 *
13776 * **Syntax**:\n
13777 * ~~~
13778 * ADD32 Rd, Rs1, Rs2
13779 * ~~~
13780 *
13781 * **Purpose**:\n
13782 * Do 32-bit integer element additions simultaneously.
13783 *
13784 * **Description**:\n
13785 * This instruction adds the 32-bit integer elements in Rs1 with the 32-bit integer
13786 * elements in Rs2, and then writes the 32-bit element results to Rd.
13787 *
13788 * **Note**:\n
13789 * This instruction can be used for either signed or unsigned addition.
13790 *
13791 * **Operations**:\n
13792 * ~~~
13793 * Rd.W[x] = Rs1.W[x] + Rs2.W[x];
13794 * for RV64: x=1...0
13795 * ~~~
13796 *
13797 * \param [in] a unsigned long type of value stored in a
13798 * \param [in] b unsigned long type of value stored in b
13799 * \return value stored in unsigned long type
13800 */
__RV_ADD32(unsigned long a,unsigned long b)13801 __STATIC_FORCEINLINE unsigned long __RV_ADD32(unsigned long a, unsigned long b)
13802 {
13803 register unsigned long result;
13804 __ASM volatile("add32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13805 return result;
13806 }
13807 /* ===== Inline Function End for 4.1. ADD32 ===== */
13808
13809 /* ===== Inline Function Start for 4.2. CRAS32 ===== */
13810 /**
13811 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
13812 * \brief CRAS32 (SIMD 32-bit Cross Addition & Subtraction)
13813 * \details
13814 * **Type**: SIMD (RV64 Only)
13815 *
13816 * **Syntax**:\n
13817 * ~~~
13818 * CRAS32 Rd, Rs1, Rs2
13819 * ~~~
13820 *
13821 * **Purpose**:\n
13822 * Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit
13823 * chunk simultaneously. Operands are from crossed 32-bit elements.
13824 *
13825 * **Description**:\n
13826 * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit
13827 * integer element in [31:0] of Rs2, and writes the result to [63:32] of Rd; at the same time, it subtracts
13828 * the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer element in [31:0] of Rs1, and
13829 * writes the result to [31:0] of Rd.
13830 *
13831 * **Note**:\n
13832 * This instruction can be used for either signed or unsigned operations.
13833 *
13834 * **Operations**:\n
13835 * ~~~
13836 * Rd.W[1] = Rs1.W[1] + Rs2.W[0];
13837 * Rd.W[0] = Rs1.W[0] - Rs2.W[1];
13838 * ~~~
13839 *
13840 * \param [in] a unsigned long type of value stored in a
13841 * \param [in] b unsigned long type of value stored in b
13842 * \return value stored in unsigned long type
13843 */
__RV_CRAS32(unsigned long a,unsigned long b)13844 __STATIC_FORCEINLINE unsigned long __RV_CRAS32(unsigned long a, unsigned long b)
13845 {
13846 register unsigned long result;
13847 __ASM volatile("cras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13848 return result;
13849 }
13850 /* ===== Inline Function End for 4.2. CRAS32 ===== */
13851
13852 /* ===== Inline Function Start for 4.3. CRSA32 ===== */
13853 /**
13854 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
13855 * \brief CRSA32 (SIMD 32-bit Cross Subtraction & Addition)
13856 * \details
13857 * **Type**: SIMD (RV64 Only)
13858 *
13859 * **Syntax**:\n
13860 * ~~~
13861 * CRSA32 Rd, Rs1, Rs2
13862 * ~~~
13863 *
13864 * **Purpose**:\n
13865 * Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit
13866 * chunk simultaneously. Operands are from crossed 32-bit elements.
13867 * *Description: *
13868 * This instruction subtracts the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element
13869 * in [63:32] of Rs1, and writes the result to [63:32] of Rd; at the same time, it adds the 32-bit integer
13870 * element in [31:0] of Rs1 with the 32-bit integer element in [63:32] of Rs2, and writes the result to
13871 * [31:0] of Rd
13872 *
13873 * **Note**:\n
13874 * This instruction can be used for either signed or unsigned operations.
13875 *
13876 * **Operations**:\n
13877 * ~~~
13878 * Rd.W[1] = Rs1.W[1] - Rs2.W[0];
13879 * Rd.W[0] = Rs1.W[0] + Rs2.W[1];
13880 * ~~~
13881 *
13882 * \param [in] a unsigned long type of value stored in a
13883 * \param [in] b unsigned long type of value stored in b
13884 * \return value stored in unsigned long type
13885 */
__RV_CRSA32(unsigned long a,unsigned long b)13886 __STATIC_FORCEINLINE unsigned long __RV_CRSA32(unsigned long a, unsigned long b)
13887 {
13888 register unsigned long result;
13889 __ASM volatile("crsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13890 return result;
13891 }
13892 /* ===== Inline Function End for 4.3. CRSA32 ===== */
13893
13894 /* ===== Inline Function Start for 4.4. KABS32 ===== */
13895 /**
13896 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
13897 * \brief KABS32 (Scalar 32-bit Absolute Value with Saturation)
13898 * \details
13899 * **Type**: DSP (RV64 Only)
13900 24 20
13901 19 15
13902 14 12
13903 11 7
13904 KABS32
13905 10010
13906 Rs1
13907 000
13908 Rd
13909 6 0
13910 GE80B
13911 1111111
13912 *
13913 * **Syntax**:\n
13914 * ~~~
13915 * KABS32 Rd, Rs1
13916 * ~~~
13917 *
13918 * **Purpose**:\n
13919 * Get the absolute value of signed 32-bit integer elements in a general register.
13920 *
13921 * **Description**:\n
13922 * This instruction calculates the absolute value of signed 32-bit integer elements stored
13923 * in Rs1. The results are written to Rd. This instruction with the minimum negative integer input of
13924 * 0x80000000 will produce a saturated output of maximum positive integer of 0x7fffffff and the OV
13925 * flag will be set to 1.
13926 *
13927 * **Operations**:\n
13928 * ~~~
13929 * if (Rs1.W[x] >= 0) {
13930 * res[x] = Rs1.W[x];
13931 * } else {
13932 * If (Rs1.W[x] == 0x80000000) {
13933 * res[x] = 0x7fffffff;
13934 * OV = 1;
13935 * } else {
13936 * res[x] = -Rs1.W[x];
13937 * }
13938 * }
13939 * Rd.W[x] = res[x];
13940 * for RV64: x=1...0
13941 * ~~~
13942 *
13943 * \param [in] a unsigned long type of value stored in a
13944 * \return value stored in unsigned long type
13945 */
__RV_KABS32(unsigned long a)13946 __STATIC_FORCEINLINE unsigned long __RV_KABS32(unsigned long a)
13947 {
13948 register unsigned long result;
13949 __ASM volatile("kabs32 %0, %1" : "=r"(result) : "r"(a));
13950 return result;
13951 }
13952 /* ===== Inline Function End for 4.4. KABS32 ===== */
13953
13954 /* ===== Inline Function Start for 4.5. KADD32 ===== */
13955 /**
13956 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
13957 * \brief KADD32 (SIMD 32-bit Signed Saturating Addition)
13958 * \details
13959 * **Type**: SIMD (RV64 Only)
13960 *
13961 * **Syntax**:\n
13962 * ~~~
13963 * KADD32 Rd, Rs1, Rs2
13964 * ~~~
13965 *
13966 * **Purpose**:\n
13967 * Do 32-bit signed integer element saturating additions simultaneously.
13968 *
13969 * **Description**:\n
13970 * This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed
13971 * integer elements in Rs2. If any of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1),
13972 * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
13973 *
13974 * **Operations**:\n
13975 * ~~~
13976 * res[x] = Rs1.W[x] + Rs2.W[x];
13977 * if (res[x] > (2^31)-1) {
13978 * res[x] = (2^31)-1;
13979 * OV = 1;
13980 * } else if (res[x] < -2^31) {
13981 * res[x] = -2^31;
13982 * OV = 1;
13983 * }
13984 * Rd.W[x] = res[x];
13985 * for RV64: x=1...0
13986 * ~~~
13987 *
13988 * \param [in] a unsigned long type of value stored in a
13989 * \param [in] b unsigned long type of value stored in b
13990 * \return value stored in unsigned long type
13991 */
__RV_KADD32(unsigned long a,unsigned long b)13992 __STATIC_FORCEINLINE unsigned long __RV_KADD32(unsigned long a, unsigned long b)
13993 {
13994 register unsigned long result;
13995 __ASM volatile("kadd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
13996 return result;
13997 }
13998 /* ===== Inline Function End for 4.5. KADD32 ===== */
13999
14000 /* ===== Inline Function Start for 4.6. KCRAS32 ===== */
14001 /**
14002 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
14003 * \brief KCRAS32 (SIMD 32-bit Signed Saturating Cross Addition & Subtraction)
14004 * \details
14005 * **Type**: SIM (RV64 Only)
14006 *
14007 * **Syntax**:\n
14008 * ~~~
14009 * KCRAS32 Rd, Rs1, Rs2
14010 * ~~~
14011 *
14012 * **Purpose**:\n
14013 * Do 32-bit signed integer element saturating addition and 32-bit signed integer element
14014 * saturating subtraction in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements.
14015 *
14016 * **Description**:\n
14017 * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit
14018 * integer element in [31:0] of Rs2; at the same time, it subtracts the 32-bit integer element in [63:32] of
14019 * Rs2 from the 32-bit integer element in [31:0] of Rs1. If any of the results are beyond the Q31 number
14020 * range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated
14021 * results are written to [63:32] of Rd for addition and [31:0] of Rd for subtraction.
14022 *
14023 * **Operations**:\n
14024 * ~~~
14025 * res[1] = Rs1.W[1] + Rs2.W[0];
14026 * res[0] = Rs1.W[0] - Rs2.W[1];
14027 * if (res[x] > (2^31)-1) {
14028 * res[x] = (2^31)-1;
14029 * OV = 1;
14030 * } else if (res < -2^31) {
14031 * res[x] = -2^31;
14032 * OV = 1;
14033 * }
14034 * Rd.W[1] = res[1];
14035 * Rd.W[0] = res[0];
14036 * for RV64, x=1...0
14037 * ~~~
14038 *
14039 * \param [in] a unsigned long type of value stored in a
14040 * \param [in] b unsigned long type of value stored in b
14041 * \return value stored in unsigned long type
14042 */
__RV_KCRAS32(unsigned long a,unsigned long b)14043 __STATIC_FORCEINLINE unsigned long __RV_KCRAS32(unsigned long a, unsigned long b)
14044 {
14045 register unsigned long result;
14046 __ASM volatile("kcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
14047 return result;
14048 }
14049 /* ===== Inline Function End for 4.6. KCRAS32 ===== */
14050
14051 /* ===== Inline Function Start for 4.7. KCRSA32 ===== */
14052 /**
14053 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
14054 * \brief KCRSA32 (SIMD 32-bit Signed Saturating Cross Subtraction & Addition)
14055 * \details
14056 * **Type**: SIMD (RV64 Only)
14057 *
14058 * **Syntax**:\n
14059 * ~~~
14060 * KCRSA32 Rd, Rs1, Rs2
14061 * ~~~
14062 *
14063 * **Purpose**:\n
14064 * Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element
14065 * saturating addition in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements.
14066 * *Description: *
14067 * This instruction subtracts the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element
14068 * in [63:32] of Rs1; at the same time, it adds the 32-bit integer element in [31:0] of Rs1 with the 32-bit
14069 * integer element in [63:32] of Rs2. If any of the results are beyond the Q31 number range (-2^31 <= Q31
14070 * <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to
14071 * [63:32] of Rd for subtraction and [31:0] of Rd for addition.
14072 *
14073 * **Operations**:\n
14074 * ~~~
14075 * res[1] = Rs1.W[1] - Rs2.W[0];
14076 * res[0] = Rs1.W[0] + Rs2.W[1];
14077 * if (res[x] > (2^31)-1) {
14078 * res[x] = (2^31)-1;
14079 * OV = 1;
14080 * } else if (res < -2^31) {
14081 * res[x] = -2^31;
14082 * OV = 1;
14083 * }
14084 * Rd.W[1] = res[1];
14085 * Rd.W[0] = res[0];
14086 * for RV64, x=1...0
14087 * ~~~
14088 *
14089 * \param [in] a unsigned long type of value stored in a
14090 * \param [in] b unsigned long type of value stored in b
14091 * \return value stored in unsigned long type
14092 */
__RV_KCRSA32(unsigned long a,unsigned long b)14093 __STATIC_FORCEINLINE unsigned long __RV_KCRSA32(unsigned long a, unsigned long b)
14094 {
14095 register unsigned long result;
14096 __ASM volatile("kcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
14097 return result;
14098 }
14099 /* ===== Inline Function End for 4.7. KCRSA32 ===== */
14100
14101 /* ===== Inline Function Start for 4.8.1. KDMBB16 ===== */
14102 /**
14103 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
14104 * \brief KDMBB16 (SIMD Signed Saturating Double Multiply B16 x B16)
14105 * \details
14106 * **Type**: SIMD (RV64 only)
14107 *
14108 * **Syntax**:\n
14109 * ~~~
14110 * KDMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
14111 * ~~~
14112 *
14113 * **Purpose**:\n
14114 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
14115 * of the 32-bit chunks in registers and then double and saturate the Q31 results into the 32-bit chunks
14116 * in the destination register. If saturation happens, an overflow flag OV will be set.
14117 *
14118 * **Description**:\n
14119 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
14120 * or bottom 16-bit Q15 content of the 32-bit portions in Rs2. The Q30 results are then doubled and
14121 * saturated into Q31 values. The Q31 values are then written into the 32-bit chunks in Rd. When both
14122 * the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated to 0x7FFFFFFF
14123 * and the overflow flag OV will be set.
14124 *
14125 * **Operations**:\n
14126 * ~~~
14127 * // KDMBB16: (x,y,z)=(0,0,0),(2,2,1)
14128 * // KDMBT16: (x,y,z)=(0,1,0),(2,3,1)
14129 * // KDMTT16: (x,y,z)=(1,1,0),(3,3,1)
14130 * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
14131 * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
14132 * Mresult[z] = aop[z] * bop[z];
14133 * resQ31[z] = Mresult[z] << 1;
14134 * } else {
14135 * resQ31[z] = 0x7FFFFFFF;
14136 * OV = 1;
14137 * }
14138 * Rd.W[z] = resQ31[z];
14139 * ~~~
14140 *
14141 * \param [in] a unsigned long type of value stored in a
14142 * \param [in] b unsigned long type of value stored in b
14143 * \return value stored in unsigned long type
14144 */
__RV_KDMBB16(unsigned long a,unsigned long b)14145 __STATIC_FORCEINLINE unsigned long __RV_KDMBB16(unsigned long a, unsigned long b)
14146 {
14147 register unsigned long result;
14148 __ASM volatile("kdmbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
14149 return result;
14150 }
14151 /* ===== Inline Function End for 4.8.1. KDMBB16 ===== */
14152
14153 /* ===== Inline Function Start for 4.8.2. KDMBT16 ===== */
14154 /**
14155 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
14156 * \brief KDMBT16 (SIMD Signed Saturating Double Multiply B16 x T16)
14157 * \details
14158 * **Type**: SIMD (RV64 only)
14159 *
14160 * **Syntax**:\n
14161 * ~~~
14162 * KDMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
14163 * ~~~
14164 *
14165 * **Purpose**:\n
14166 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
14167 * of the 32-bit chunks in registers and then double and saturate the Q31 results into the 32-bit chunks
14168 * in the destination register. If saturation happens, an overflow flag OV will be set.
14169 *
14170 * **Description**:\n
14171 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
14172 * or bottom 16-bit Q15 content of the 32-bit portions in Rs2. The Q30 results are then doubled and
14173 * saturated into Q31 values. The Q31 values are then written into the 32-bit chunks in Rd. When both
14174 * the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated to 0x7FFFFFFF
14175 * and the overflow flag OV will be set.
14176 *
14177 * **Operations**:\n
14178 * ~~~
14179 * // KDMBB16: (x,y,z)=(0,0,0),(2,2,1)
14180 * // KDMBT16: (x,y,z)=(0,1,0),(2,3,1)
14181 * // KDMTT16: (x,y,z)=(1,1,0),(3,3,1)
14182 * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
14183 * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
14184 * Mresult[z] = aop[z] * bop[z];
14185 * resQ31[z] = Mresult[z] << 1;
14186 * } else {
14187 * resQ31[z] = 0x7FFFFFFF;
14188 * OV = 1;
14189 * }
14190 * Rd.W[z] = resQ31[z];
14191 * ~~~
14192 *
14193 * \param [in] a unsigned long type of value stored in a
14194 * \param [in] b unsigned long type of value stored in b
14195 * \return value stored in unsigned long type
14196 */
__RV_KDMBT16(unsigned long a,unsigned long b)14197 __STATIC_FORCEINLINE unsigned long __RV_KDMBT16(unsigned long a, unsigned long b)
14198 {
14199 register unsigned long result;
14200 __ASM volatile("kdmbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
14201 return result;
14202 }
14203 /* ===== Inline Function End for 4.8.2. KDMBT16 ===== */
14204
14205 /* ===== Inline Function Start for 4.8.3. KDMTT16 ===== */
14206 /**
14207 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
14208 * \brief KDMTT16 (SIMD Signed Saturating Double Multiply T16 x T16)
14209 * \details
14210 * **Type**: SIMD (RV64 only)
14211 *
14212 * **Syntax**:\n
14213 * ~~~
14214 * KDMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
14215 * ~~~
14216 *
14217 * **Purpose**:\n
14218 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
14219 * of the 32-bit chunks in registers and then double and saturate the Q31 results into the 32-bit chunks
14220 * in the destination register. If saturation happens, an overflow flag OV will be set.
14221 *
14222 * **Description**:\n
14223 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
14224 * or bottom 16-bit Q15 content of the 32-bit portions in Rs2. The Q30 results are then doubled and
14225 * saturated into Q31 values. The Q31 values are then written into the 32-bit chunks in Rd. When both
14226 * the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated to 0x7FFFFFFF
14227 * and the overflow flag OV will be set.
14228 *
14229 * **Operations**:\n
14230 * ~~~
14231 * // KDMBB16: (x,y,z)=(0,0,0),(2,2,1)
14232 * // KDMBT16: (x,y,z)=(0,1,0),(2,3,1)
14233 * // KDMTT16: (x,y,z)=(1,1,0),(3,3,1)
14234 * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
14235 * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
14236 * Mresult[z] = aop[z] * bop[z];
14237 * resQ31[z] = Mresult[z] << 1;
14238 * } else {
14239 * resQ31[z] = 0x7FFFFFFF;
14240 * OV = 1;
14241 * }
14242 * Rd.W[z] = resQ31[z];
14243 * ~~~
14244 *
14245 * \param [in] a unsigned long type of value stored in a
14246 * \param [in] b unsigned long type of value stored in b
14247 * \return value stored in unsigned long type
14248 */
__RV_KDMTT16(unsigned long a,unsigned long b)14249 __STATIC_FORCEINLINE unsigned long __RV_KDMTT16(unsigned long a, unsigned long b)
14250 {
14251 register unsigned long result;
14252 __ASM volatile("kdmtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
14253 return result;
14254 }
14255 /* ===== Inline Function End for 4.8.3. KDMTT16 ===== */
14256
14257 /* ===== Inline Function Start for 4.9.1. KDMABB16 ===== */
14258 /**
14259 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
14260 * \brief KDMABB16 (SIMD Signed Saturating Double Multiply Addition B16 x B16)
14261 * \details
14262 * **Type**: SIMD (RV64 only)
14263 *
14264 * **Syntax**:\n
14265 * ~~~
14266 * KDMAxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
14267 * ~~~
14268 *
14269 * **Purpose**:\n
14270 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
14271 * of the 32-bit chunks in registers and then double and saturate the Q31 results, add the results with
14272 * the values of the corresponding 32-bit chunks from the destination register and write the saturated
14273 * addition results back into the corresponding 32-bit chunks of the destination register. If saturation
14274 * happens, an overflow flag OV will be set.
14275 *
14276 * **Description**:\n
14277 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
14278 * or bottom 16-bit Q15 content of the corresponding 32-bit portions in Rs2. The Q30 results are then
14279 * doubled and saturated into Q31 values. The Q31 values are then added with the content of the
14280 * corresponding 32-bit portions of Rd. If the addition results are beyond the Q31 number range (-2^31 <=
14281 * Q31 <= 2^31-1), they are saturated to the range and the OV flag is set to 1. The results after saturation
14282 * are written back to Rd.
14283 * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
14284 * set.
14285 *
14286 * **Operations**:\n
14287 * ~~~
14288 * // KDMABB16: (x,y,z)=(0,0,0),(2,2,1)
14289 * // KDMABT16: (x,y,z)=(0,1,0),(2,3,1)
14290 * // KDMATT16: (x,y,z)=(1,1,0),(3,3,1)
14291 * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
14292 * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
14293 * Mresult[z] = aop[z] * bop[z];
14294 * resQ31[z] = Mresult[z] << 1;
14295 * } else {
14296 * resQ31[z] = 0x7FFFFFFF;
14297 * OV = 1;
14298 * }
14299 * resadd[z] = Rd.W[z] + resQ31[z];
14300 * if (resadd[z] > (2^31)-1) {
14301 * resadd[z] = (2^31)-1;
14302 * OV = 1;
14303 * } else if (resadd[z] < -2^31) {
14304 * resadd[z] = -2^31;
14305 * OV = 1;
14306 * }
14307 * Rd.W[z] = resadd[z];
14308 * ~~~
14309 *
14310 * \param [in] t unsigned long type of value stored in t
14311 * \param [in] a unsigned long type of value stored in a
14312 * \param [in] b unsigned long type of value stored in b
14313 * \return value stored in unsigned long type
14314 */
__RV_KDMABB16(unsigned long t,unsigned long a,unsigned long b)14315 __STATIC_FORCEINLINE unsigned long __RV_KDMABB16(unsigned long t, unsigned long a, unsigned long b)
14316 {
14317 __ASM volatile("kdmabb16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
14318 return t;
14319 }
14320 /* ===== Inline Function End for 4.9.1. KDMABB16 ===== */
14321
14322 /* ===== Inline Function Start for 4.9.2. KDMABT16 ===== */
14323 /**
14324 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
14325 * \brief KDMABT16 (SIMD Signed Saturating Double Multiply Addition B16 x T16)
14326 * \details
14327 * **Type**: SIMD (RV64 only)
14328 *
14329 * **Syntax**:\n
14330 * ~~~
14331 * KDMAxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
14332 * ~~~
14333 *
14334 * **Purpose**:\n
14335 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
14336 * of the 32-bit chunks in registers and then double and saturate the Q31 results, add the results with
14337 * the values of the corresponding 32-bit chunks from the destination register and write the saturated
14338 * addition results back into the corresponding 32-bit chunks of the destination register. If saturation
14339 * happens, an overflow flag OV will be set.
14340 *
14341 * **Description**:\n
14342 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
14343 * or bottom 16-bit Q15 content of the corresponding 32-bit portions in Rs2. The Q30 results are then
14344 * doubled and saturated into Q31 values. The Q31 values are then added with the content of the
14345 * corresponding 32-bit portions of Rd. If the addition results are beyond the Q31 number range (-2^31 <=
14346 * Q31 <= 2^31-1), they are saturated to the range and the OV flag is set to 1. The results after saturation
14347 * are written back to Rd.
14348 * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
14349 * set.
14350 *
14351 * **Operations**:\n
14352 * ~~~
14353 * // KDMABB16: (x,y,z)=(0,0,0),(2,2,1)
14354 * // KDMABT16: (x,y,z)=(0,1,0),(2,3,1)
14355 * // KDMATT16: (x,y,z)=(1,1,0),(3,3,1)
14356 * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
14357 * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
14358 * Mresult[z] = aop[z] * bop[z];
14359 * resQ31[z] = Mresult[z] << 1;
14360 * } else {
14361 * resQ31[z] = 0x7FFFFFFF;
14362 * OV = 1;
14363 * }
14364 * resadd[z] = Rd.W[z] + resQ31[z];
14365 * if (resadd[z] > (2^31)-1) {
14366 * resadd[z] = (2^31)-1;
14367 * OV = 1;
14368 * } else if (resadd[z] < -2^31) {
14369 * resadd[z] = -2^31;
14370 * OV = 1;
14371 * }
14372 * Rd.W[z] = resadd[z];
14373 * ~~~
14374 *
14375 * \param [in] t unsigned long type of value stored in t
14376 * \param [in] a unsigned long type of value stored in a
14377 * \param [in] b unsigned long type of value stored in b
14378 * \return value stored in unsigned long type
14379 */
__RV_KDMABT16(unsigned long t,unsigned long a,unsigned long b)14380 __STATIC_FORCEINLINE unsigned long __RV_KDMABT16(unsigned long t, unsigned long a, unsigned long b)
14381 {
14382 __ASM volatile("kdmabt16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
14383 return t;
14384 }
14385 /* ===== Inline Function End for 4.9.2. KDMABT16 ===== */
14386
14387 /* ===== Inline Function Start for 4.9.3. KDMATT16 ===== */
14388 /**
14389 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
14390 * \brief KDMATT16 (SIMD Signed Saturating Double Multiply Addition T16 x T16)
14391 * \details
14392 * **Type**: SIMD (RV64 only)
14393 *
14394 * **Syntax**:\n
14395 * ~~~
14396 * KDMAxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
14397 * ~~~
14398 *
14399 * **Purpose**:\n
14400 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
14401 * of the 32-bit chunks in registers and then double and saturate the Q31 results, add the results with
14402 * the values of the corresponding 32-bit chunks from the destination register and write the saturated
14403 * addition results back into the corresponding 32-bit chunks of the destination register. If saturation
14404 * happens, an overflow flag OV will be set.
14405 *
14406 * **Description**:\n
14407 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
14408 * or bottom 16-bit Q15 content of the corresponding 32-bit portions in Rs2. The Q30 results are then
14409 * doubled and saturated into Q31 values. The Q31 values are then added with the content of the
14410 * corresponding 32-bit portions of Rd. If the addition results are beyond the Q31 number range (-2^31 <=
14411 * Q31 <= 2^31-1), they are saturated to the range and the OV flag is set to 1. The results after saturation
14412 * are written back to Rd.
14413 * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
14414 * set.
14415 *
14416 * **Operations**:\n
14417 * ~~~
14418 * // KDMABB16: (x,y,z)=(0,0,0),(2,2,1)
14419 * // KDMABT16: (x,y,z)=(0,1,0),(2,3,1)
14420 * // KDMATT16: (x,y,z)=(1,1,0),(3,3,1)
14421 * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
14422 * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
14423 * Mresult[z] = aop[z] * bop[z];
14424 * resQ31[z] = Mresult[z] << 1;
14425 * } else {
14426 * resQ31[z] = 0x7FFFFFFF;
14427 * OV = 1;
14428 * }
14429 * resadd[z] = Rd.W[z] + resQ31[z];
14430 * if (resadd[z] > (2^31)-1) {
14431 * resadd[z] = (2^31)-1;
14432 * OV = 1;
14433 * } else if (resadd[z] < -2^31) {
14434 * resadd[z] = -2^31;
14435 * OV = 1;
14436 * }
14437 * Rd.W[z] = resadd[z];
14438 * ~~~
14439 *
14440 * \param [in] t unsigned long type of value stored in t
14441 * \param [in] a unsigned long type of value stored in a
14442 * \param [in] b unsigned long type of value stored in b
14443 * \return value stored in unsigned long type
14444 */
__RV_KDMATT16(unsigned long t,unsigned long a,unsigned long b)14445 __STATIC_FORCEINLINE unsigned long __RV_KDMATT16(unsigned long t, unsigned long a, unsigned long b)
14446 {
14447 __ASM volatile("kdmatt16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
14448 return t;
14449 }
14450 /* ===== Inline Function End for 4.9.3. KDMATT16 ===== */
14451
14452 /* ===== Inline Function Start for 4.10.1. KHMBB16 ===== */
14453 /**
14454 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
14455 * \brief KHMBB16 (SIMD Signed Saturating Half Multiply B16 x B16)
14456 * \details
14457 * **Type**: SIMD (RV64 Only)
14458 *
14459 * **Syntax**:\n
14460 * ~~~
14461 * KHMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
14462 * ~~~
14463 *
14464 * **Purpose**:\n
14465 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
14466 * of the 32-bit chunks in registers and then right-shift 15 bits to turn the Q30 results into Q15
14467 * numbers again and saturate the Q15 results into the destination register. If saturation happens, an
14468 * overflow flag OV will be set.
14469 *
14470 * **Description**:\n
14471 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
14472 * or bottom 16-bit Q15 content of the 32-bit portion in Rs2. The Q30 results are then right-shifted 15-
14473 * bits and saturated into Q15 values. The 32-bit Q15 values are then written into the 32-bit chunks in
14474 * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
14475 * to 0x7FFF and the overflow flag OV will be set.
14476 *
14477 * **Operations**:\n
14478 * ~~~
14479 * // KHMBB16: (x,y,z)=(0,0,0),(2,2,1)
14480 * // KHMBT16: (x,y,z)=(0,1,0),(2,3,1)
14481 * // KHMTT16: (x,y,z)=(1,1,0),(3,3,1)
14482 * aop = Rs1.H[x]; bop = Rs2.H[y];
14483 * If (0x8000 != aop | 0x8000 != bop) {
14484 * Mresult[31:0] = aop * bop;
14485 * res[15:0] = Mresult[30:15];
14486 * } else {
14487 * res[15:0] = 0x7FFF;
14488 * OV = 1;
14489 * }
14490 * Rd.W[z] = SE32(res[15:0]);
14491 * ~~~
14492 *
14493 * \param [in] a unsigned long type of value stored in a
14494 * \param [in] b unsigned long type of value stored in b
14495 * \return value stored in unsigned long type
14496 */
__RV_KHMBB16(unsigned long a,unsigned long b)14497 __STATIC_FORCEINLINE unsigned long __RV_KHMBB16(unsigned long a, unsigned long b)
14498 {
14499 register unsigned long result;
14500 __ASM volatile("khmbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
14501 return result;
14502 }
14503 /* ===== Inline Function End for 4.10.1. KHMBB16 ===== */
14504
14505 /* ===== Inline Function Start for 4.10.2. KHMBT16 ===== */
14506 /**
14507 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
14508 * \brief KHMBT16 (SIMD Signed Saturating Half Multiply B16 x T16)
14509 * \details
14510 * **Type**: SIMD (RV64 Only)
14511 *
14512 * **Syntax**:\n
14513 * ~~~
14514 * KHMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
14515 * ~~~
14516 *
14517 * **Purpose**:\n
14518 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
14519 * of the 32-bit chunks in registers and then right-shift 15 bits to turn the Q30 results into Q15
14520 * numbers again and saturate the Q15 results into the destination register. If saturation happens, an
14521 * overflow flag OV will be set.
14522 *
14523 * **Description**:\n
14524 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
14525 * or bottom 16-bit Q15 content of the 32-bit portion in Rs2. The Q30 results are then right-shifted 15-
14526 * bits and saturated into Q15 values. The 32-bit Q15 values are then written into the 32-bit chunks in
14527 * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
14528 * to 0x7FFF and the overflow flag OV will be set.
14529 *
14530 * **Operations**:\n
14531 * ~~~
14532 * // KHMBB16: (x,y,z)=(0,0,0),(2,2,1)
14533 * // KHMBT16: (x,y,z)=(0,1,0),(2,3,1)
14534 * // KHMTT16: (x,y,z)=(1,1,0),(3,3,1)
14535 * aop = Rs1.H[x]; bop = Rs2.H[y];
14536 * If (0x8000 != aop | 0x8000 != bop) {
14537 * Mresult[31:0] = aop * bop;
14538 * res[15:0] = Mresult[30:15];
14539 * } else {
14540 * res[15:0] = 0x7FFF;
14541 * OV = 1;
14542 * }
14543 * Rd.W[z] = SE32(res[15:0]);
14544 * ~~~
14545 *
14546 * \param [in] a unsigned long type of value stored in a
14547 * \param [in] b unsigned long type of value stored in b
14548 * \return value stored in unsigned long type
14549 */
__RV_KHMBT16(unsigned long a,unsigned long b)14550 __STATIC_FORCEINLINE unsigned long __RV_KHMBT16(unsigned long a, unsigned long b)
14551 {
14552 register unsigned long result;
14553 __ASM volatile("khmbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
14554 return result;
14555 }
14556 /* ===== Inline Function End for 4.10.2. KHMBT16 ===== */
14557
14558 /* ===== Inline Function Start for 4.10.3. KHMTT16 ===== */
14559 /**
14560 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
14561 * \brief KHMTT16 (SIMD Signed Saturating Half Multiply T16 x T16)
14562 * \details
14563 * **Type**: SIMD (RV64 Only)
14564 *
14565 * **Syntax**:\n
14566 * ~~~
14567 * KHMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
14568 * ~~~
14569 *
14570 * **Purpose**:\n
14571 * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
14572 * of the 32-bit chunks in registers and then right-shift 15 bits to turn the Q30 results into Q15
14573 * numbers again and saturate the Q15 results into the destination register. If saturation happens, an
14574 * overflow flag OV will be set.
14575 *
14576 * **Description**:\n
14577 * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
14578 * or bottom 16-bit Q15 content of the 32-bit portion in Rs2. The Q30 results are then right-shifted 15-
14579 * bits and saturated into Q15 values. The 32-bit Q15 values are then written into the 32-bit chunks in
14580 * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
14581 * to 0x7FFF and the overflow flag OV will be set.
14582 *
14583 * **Operations**:\n
14584 * ~~~
14585 * // KHMBB16: (x,y,z)=(0,0,0),(2,2,1)
14586 * // KHMBT16: (x,y,z)=(0,1,0),(2,3,1)
14587 * // KHMTT16: (x,y,z)=(1,1,0),(3,3,1)
14588 * aop = Rs1.H[x]; bop = Rs2.H[y];
14589 * If (0x8000 != aop | 0x8000 != bop) {
14590 * Mresult[31:0] = aop * bop;
14591 * res[15:0] = Mresult[30:15];
14592 * } else {
14593 * res[15:0] = 0x7FFF;
14594 * OV = 1;
14595 * }
14596 * Rd.W[z] = SE32(res[15:0]);
14597 * ~~~
14598 *
14599 * \param [in] a unsigned long type of value stored in a
14600 * \param [in] b unsigned long type of value stored in b
14601 * \return value stored in unsigned long type
14602 */
__RV_KHMTT16(unsigned long a,unsigned long b)14603 __STATIC_FORCEINLINE unsigned long __RV_KHMTT16(unsigned long a, unsigned long b)
14604 {
14605 register unsigned long result;
14606 __ASM volatile("khmtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
14607 return result;
14608 }
14609 /* ===== Inline Function End for 4.10.3. KHMTT16 ===== */
14610
14611 /* ===== Inline Function Start for 4.11.1. KMABB32 ===== */
14612 /**
14613 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD
14614 * \brief KMABB32 (Saturating Signed Multiply Bottom Words & Add)
14615 * \details
14616 * **Type**: DSP (RV64 Only)
14617 *
14618 * **Syntax**:\n
14619 * ~~~
14620 * KMABB32 Rd, Rs1, Rs2
14621 * KMABT32 Rd, Rs1, Rs2
14622 * KMATT32 Rd, Rs1, Rs2
14623 * ~~~
14624 *
14625 * **Purpose**:\n
14626 * Multiply the signed 32-bit element in a register with the 32-bit element in another register
14627 * and add the result to the content of 64-bit data in the third register. The addition result may be
14628 * saturated and is written to the third register.
14629 * * KMABB32: rd + bottom*bottom
14630 * * KMABT32: rd + bottom*top
14631 * * KMATT32: rd + top*top
14632 *
14633 * **Description**:\n
14634 * For the `KMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
14635 * element in Rs2.
14636 * For the `KMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
14637 * element in Rs2.
14638 * For the `KMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
14639 * element in Rs2.
14640 * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond
14641 * the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The
14642 * result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
14643 * integers.
14644 *
14645 * **Operations**:\n
14646 * ~~~
14647 * res = Rd + (Rs1.W[0] * Rs2.W[0]); // KMABB32
14648 * res = Rd + (Rs1.W[0] * Rs2.W[1]); // KMABT32
14649 * res = Rd + (Rs1.W[1] * Rs2.W[1]); // KMATT32
14650 * if (res > (2^63)-1) {
14651 * res = (2^63)-1;
14652 * OV = 1;
14653 * } else if (res < -2^63) {
14654 * res = -2^63;
14655 * OV = 1;
14656 * }
14657 * Rd = res;
14658 * *Exceptions:* None
14659 * ~~~
14660 *
14661 * \param [in] t long type of value stored in t
14662 * \param [in] a unsigned long type of value stored in a
14663 * \param [in] b unsigned long type of value stored in b
14664 * \return value stored in long type
14665 */
__RV_KMABB32(long t,unsigned long a,unsigned long b)14666 __STATIC_FORCEINLINE long __RV_KMABB32(long t, unsigned long a, unsigned long b)
14667 {
14668 __ASM volatile("kmabb32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
14669 return t;
14670 }
14671 /* ===== Inline Function End for 4.11.1. KMABB32 ===== */
14672
14673 /* ===== Inline Function Start for 4.11.2. KMABT32 ===== */
14674 /**
14675 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD
14676 * \brief KMABT32 (Saturating Signed Multiply Bottom & Top Words & Add)
14677 * \details
14678 * **Type**: DSP (RV64 Only)
14679 *
14680 * **Syntax**:\n
14681 * ~~~
14682 * KMABB32 Rd, Rs1, Rs2
14683 * KMABT32 Rd, Rs1, Rs2
14684 * KMATT32 Rd, Rs1, Rs2
14685 * ~~~
14686 *
14687 * **Purpose**:\n
14688 * Multiply the signed 32-bit element in a register with the 32-bit element in another register
14689 * and add the result to the content of 64-bit data in the third register. The addition result may be
14690 * saturated and is written to the third register.
14691 * * KMABB32: rd + bottom*bottom
14692 * * KMABT32: rd + bottom*top
14693 * * KMATT32: rd + top*top
14694 *
14695 * **Description**:\n
14696 * For the `KMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
14697 * element in Rs2.
14698 * For the `KMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
14699 * element in Rs2.
14700 * For the `KMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
14701 * element in Rs2.
14702 * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond
14703 * the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The
14704 * result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
14705 * integers.
14706 *
14707 * **Operations**:\n
14708 * ~~~
14709 * res = Rd + (Rs1.W[0] * Rs2.W[0]); // KMABB32
14710 * res = Rd + (Rs1.W[0] * Rs2.W[1]); // KMABT32
14711 * res = Rd + (Rs1.W[1] * Rs2.W[1]); // KMATT32
14712 * if (res > (2^63)-1) {
14713 * res = (2^63)-1;
14714 * OV = 1;
14715 * } else if (res < -2^63) {
14716 * res = -2^63;
14717 * OV = 1;
14718 * }
14719 * Rd = res;
14720 * *Exceptions:* None
14721 * ~~~
14722 *
14723 * \param [in] t long type of value stored in t
14724 * \param [in] a unsigned long type of value stored in a
14725 * \param [in] b unsigned long type of value stored in b
14726 * \return value stored in long type
14727 */
__RV_KMABT32(long t,unsigned long a,unsigned long b)14728 __STATIC_FORCEINLINE long __RV_KMABT32(long t, unsigned long a, unsigned long b)
14729 {
14730 __ASM volatile("kmabt32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
14731 return t;
14732 }
14733 /* ===== Inline Function End for 4.11.2. KMABT32 ===== */
14734
14735 /* ===== Inline Function Start for 4.11.3. KMATT32 ===== */
14736 /**
14737 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD
14738 * \brief KMATT32 (Saturating Signed Multiply Top Words & Add)
14739 * \details
14740 * **Type**: DSP (RV64 Only)
14741 *
14742 * **Syntax**:\n
14743 * ~~~
14744 * KMABB32 Rd, Rs1, Rs2
14745 * KMABT32 Rd, Rs1, Rs2
14746 * KMATT32 Rd, Rs1, Rs2
14747 * ~~~
14748 *
14749 * **Purpose**:\n
14750 * Multiply the signed 32-bit element in a register with the 32-bit element in another register
14751 * and add the result to the content of 64-bit data in the third register. The addition result may be
14752 * saturated and is written to the third register.
14753 * * KMABB32: rd + bottom*bottom
14754 * * KMABT32: rd + bottom*top
14755 * * KMATT32: rd + top*top
14756 *
14757 * **Description**:\n
14758 * For the `KMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
14759 * element in Rs2.
14760 * For the `KMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
14761 * element in Rs2.
14762 * For the `KMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
14763 * element in Rs2.
14764 * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond
14765 * the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The
14766 * result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
14767 * integers.
14768 *
14769 * **Operations**:\n
14770 * ~~~
14771 * res = Rd + (Rs1.W[0] * Rs2.W[0]); // KMABB32
14772 * res = Rd + (Rs1.W[0] * Rs2.W[1]); // KMABT32
14773 * res = Rd + (Rs1.W[1] * Rs2.W[1]); // KMATT32
14774 * if (res > (2^63)-1) {
14775 * res = (2^63)-1;
14776 * OV = 1;
14777 * } else if (res < -2^63) {
14778 * res = -2^63;
14779 * OV = 1;
14780 * }
14781 * Rd = res;
14782 * *Exceptions:* None
14783 * ~~~
14784 *
14785 * \param [in] t long type of value stored in t
14786 * \param [in] a unsigned long type of value stored in a
14787 * \param [in] b unsigned long type of value stored in b
14788 * \return value stored in long type
14789 */
__RV_KMATT32(long t,unsigned long a,unsigned long b)14790 __STATIC_FORCEINLINE long __RV_KMATT32(long t, unsigned long a, unsigned long b)
14791 {
14792 __ASM volatile("kmatt32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
14793 return t;
14794 }
14795 /* ===== Inline Function End for 4.11.3. KMATT32 ===== */
14796
14797 /* ===== Inline Function Start for 4.12.1. KMADA32 ===== */
14798 /**
14799 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
14800 * \brief KMADA32 (Saturating Signed Multiply Two Words and Two Adds)
14801 * \details
14802 * **Type**: DSP (RV64 Only)
14803 *
14804 * **Syntax**:\n
14805 * ~~~
14806 * KMADA32 Rd, Rs1, Rs2
14807 * KMAXDA32 Rd, Rs1, Rs2
14808 * ~~~
14809 *
14810 * **Purpose**:\n
14811 * Do two signed 32-bit multiplications from 32-bit data in two registers; and then adds the
14812 * two 64-bit results and 64-bit data in a third register together. The addition result may be saturated.
14813 * * KMADA32: rd + top*top + bottom*bottom
14814 * * KMAXDA32: rd + top*bottom + bottom*top
14815 *
14816 * **Description**:\n
14817 * For the `KMADA32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-
14818 * bit element in Rs2 and then adds the result to the result of multiplying the top 32-bit element in Rs1
14819 * with the top 32-bit element in Rs2. It is actually an alias of the `KMAR64` instruction.
14820 * For the `KMAXDA32` instruction, it multiplies the top 32-bit element in Rs1 with the bottom 32-bit
14821 * element in Rs2 and then adds the result to the result of multiplying the bottom 32-bit element in Rs1
14822 * with the top 32-bit element in Rs2.
14823 * The result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63
14824 * number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The 64-bit
14825 * result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
14826 *
14827 * **Operations**:\n
14828 * ~~~
14829 * res = Rd + (Rs1.W[1] * Rs2.w[1]) + (Rs1.W[0] * Rs2.W[0]); // KMADA32
14830 * res = Rd + (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMAXDA32
14831 * if (res > (2^63)-1) {
14832 * res = (2^63)-1;
14833 * OV = 1;
14834 * } else if (res < -2^63) {
14835 * res = -2^63;
14836 * OV = 1;
14837 * }
14838 * Rd = res;
14839 * ~~~
14840 *
14841 * \param [in] t long type of value stored in t
14842 * \param [in] a unsigned long type of value stored in a
14843 * \param [in] b unsigned long type of value stored in b
14844 * \return value stored in long type
14845 */
__RV_KMADA32(long t,unsigned long a,unsigned long b)14846 __STATIC_FORCEINLINE long __RV_KMADA32(long t, unsigned long a, unsigned long b)
14847 {
14848 __ASM volatile("kmada32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
14849 return t;
14850 }
14851 /* ===== Inline Function End for 4.12.1. KMADA32 ===== */
14852
14853 /* ===== Inline Function Start for 4.12.2. KMAXDA32 ===== */
14854 /**
14855 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
14856 * \brief KMAXDA32 (Saturating Signed Crossed Multiply Two Words and Two Adds)
14857 * \details
14858 * **Type**: DSP (RV64 Only)
14859 *
14860 * **Syntax**:\n
14861 * ~~~
14862 * KMADA32 Rd, Rs1, Rs2
14863 * KMAXDA32 Rd, Rs1, Rs2
14864 * ~~~
14865 *
14866 * **Purpose**:\n
14867 * Do two signed 32-bit multiplications from 32-bit data in two registers; and then adds the
14868 * two 64-bit results and 64-bit data in a third register together. The addition result may be saturated.
14869 * * KMADA32: rd + top*top + bottom*bottom
14870 * * KMAXDA32: rd + top*bottom + bottom*top
14871 *
14872 * **Description**:\n
14873 * For the `KMADA32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-
14874 * bit element in Rs2 and then adds the result to the result of multiplying the top 32-bit element in Rs1
14875 * with the top 32-bit element in Rs2. It is actually an alias of the `KMAR64` instruction.
14876 * For the `KMAXDA32` instruction, it multiplies the top 32-bit element in Rs1 with the bottom 32-bit
14877 * element in Rs2 and then adds the result to the result of multiplying the bottom 32-bit element in Rs1
14878 * with the top 32-bit element in Rs2.
14879 * The result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63
14880 * number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The 64-bit
14881 * result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
14882 *
14883 * **Operations**:\n
14884 * ~~~
14885 * res = Rd + (Rs1.W[1] * Rs2.w[1]) + (Rs1.W[0] * Rs2.W[0]); // KMADA32
14886 * res = Rd + (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMAXDA32
14887 * if (res > (2^63)-1) {
14888 * res = (2^63)-1;
14889 * OV = 1;
14890 * } else if (res < -2^63) {
14891 * res = -2^63;
14892 * OV = 1;
14893 * }
14894 * Rd = res;
14895 * ~~~
14896 *
14897 * \param [in] t long type of value stored in t
14898 * \param [in] a unsigned long type of value stored in a
14899 * \param [in] b unsigned long type of value stored in b
14900 * \return value stored in long type
14901 */
__RV_KMAXDA32(long t,unsigned long a,unsigned long b)14902 __STATIC_FORCEINLINE long __RV_KMAXDA32(long t, unsigned long a, unsigned long b)
14903 {
14904 __ASM volatile("kmaxda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
14905 return t;
14906 }
14907 /* ===== Inline Function End for 4.12.2. KMAXDA32 ===== */
14908
14909 /* ===== Inline Function Start for 4.13.1. KMDA32 ===== */
14910 /**
14911 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
14912 * \brief KMDA32 (Signed Multiply Two Words and Add)
14913 * \details
14914 * **Type**: DSP (RV64 Only)
14915 *
14916 * **Syntax**:\n
14917 * ~~~
14918 * KMDA32 Rd, Rs1, Rs2
14919 * KMXDA32 Rd, Rs1, Rs2
14920 * ~~~
14921 *
14922 * **Purpose**:\n
14923 * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then
14924 * adds the two 64-bit results together. The addition result may be saturated.
14925 * * KMDA32: top*top + bottom*bottom
14926 * * KMXDA32: top*bottom + bottom*top
14927 *
14928 * **Description**:\n
14929 * For the `KMDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
14930 * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
14931 * with the top 32-bit element of Rs2.
14932 * For the `KMXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
14933 * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
14934 * with the bottom 32-bit element of Rs2.
14935 * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^63-1.
14936 * The final result is written to Rd. The 32-bit contents are treated as signed integers.
14937 *
14938 * **Operations**:\n
14939 * ~~~
14940 * if ((Rs1 != 0x8000000080000000) or (Rs2 != 0x8000000080000000)) {
14941 * Rd = (Rs1.W[1] * Rs2.W[1]) + (Rs1.W[0] * Rs2.W[0]); // KMDA32
14942 * Rd = (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMXDA32
14943 * } else {
14944 * Rd = 0x7fffffffffffffff;
14945 * OV = 1;
14946 * }
14947 * ~~~
14948 *
14949 * \param [in] a unsigned long type of value stored in a
14950 * \param [in] b unsigned long type of value stored in b
14951 * \return value stored in long type
14952 */
__RV_KMDA32(unsigned long a,unsigned long b)14953 __STATIC_FORCEINLINE long __RV_KMDA32(unsigned long a, unsigned long b)
14954 {
14955 register long result;
14956 __ASM volatile("kmda32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
14957 return result;
14958 }
14959 /* ===== Inline Function End for 4.13.1. KMDA32 ===== */
14960
14961 /* ===== Inline Function Start for 4.13.2. KMXDA32 ===== */
14962 /**
14963 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
14964 * \brief KMXDA32 (Signed Crossed Multiply Two Words and Add)
14965 * \details
14966 * **Type**: DSP (RV64 Only)
14967 *
14968 * **Syntax**:\n
14969 * ~~~
14970 * KMDA32 Rd, Rs1, Rs2
14971 * KMXDA32 Rd, Rs1, Rs2
14972 * ~~~
14973 *
14974 * **Purpose**:\n
14975 * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then
14976 * adds the two 64-bit results together. The addition result may be saturated.
14977 * * KMDA32: top*top + bottom*bottom
14978 * * KMXDA32: top*bottom + bottom*top
14979 *
14980 * **Description**:\n
14981 * For the `KMDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
14982 * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
14983 * with the top 32-bit element of Rs2.
14984 * For the `KMXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
14985 * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
14986 * with the bottom 32-bit element of Rs2.
14987 * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^63-1.
14988 * The final result is written to Rd. The 32-bit contents are treated as signed integers.
14989 *
14990 * **Operations**:\n
14991 * ~~~
14992 * if ((Rs1 != 0x8000000080000000) or (Rs2 != 0x8000000080000000)) {
14993 * Rd = (Rs1.W[1] * Rs2.W[1]) + (Rs1.W[0] * Rs2.W[0]); // KMDA32
14994 * Rd = (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMXDA32
14995 * } else {
14996 * Rd = 0x7fffffffffffffff;
14997 * OV = 1;
14998 * }
14999 * ~~~
15000 *
15001 * \param [in] a unsigned long type of value stored in a
15002 * \param [in] b unsigned long type of value stored in b
15003 * \return value stored in long type
15004 */
__RV_KMXDA32(unsigned long a,unsigned long b)15005 __STATIC_FORCEINLINE long __RV_KMXDA32(unsigned long a, unsigned long b)
15006 {
15007 register long result;
15008 __ASM volatile("kmxda32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15009 return result;
15010 }
15011 /* ===== Inline Function End for 4.13.2. KMXDA32 ===== */
15012
15013 /* ===== Inline Function Start for 4.14.1. KMADS32 ===== */
15014 /**
15015 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
15016 * \brief KMADS32 (Saturating Signed Multiply Two Words & Subtract & Add)
15017 * \details
15018 * **Type**: DSP (RV64 Only)
15019 *
15020 * **Syntax**:\n
15021 * ~~~
15022 * KMADS32 Rd, Rs1, Rs2
15023 * KMADRS32 Rd, Rs1, Rs2
15024 * KMAXDS32 Rd, Rs1, Rs2
15025 * ~~~
15026 *
15027 * **Purpose**:\n
15028 * Do two signed 32-bit multiplications from 32-bit elements in two registers; and then
15029 * perform a subtraction operation between the two 64-bit results. Then add the subtraction result to
15030 * 64-bit data in a third register. The addition result may be saturated.
15031 * * KMADS32: rd + (top*top - bottom*bottom)
15032 * * KMADRS32: rd + (bottom*bottom - top*top)
15033 * * KMAXDS32: rd + (top*bottom - bottom*top)
15034 *
15035 * **Description**:\n
15036 * For the `KMADS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
15037 * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
15038 * Rs1 with the top 32-bit element in Rs2.
15039 * For the `KMADRS32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
15040 * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
15041 * element in Rs1 with the bottom 32-bit element in Rs2.
15042 * For the `KMAXDS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
15043 * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
15044 * Rs1 with the bottom 32-bit element in Rs2.
15045 * The subtraction result is then added to the content of 64-bit data in Rd. If the addition result is
15046 * beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to
15047 * 1. The 64-bit result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated
15048 * as signed integers.
15049 *
15050 * **Operations**:\n
15051 * ~~~
15052 * res = Rd + (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMADS32
15053 * res = Rd + (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // KMADRS32
15054 * res = Rd + (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMAXDS32
15055 * if (res > (2^63)-1) {
15056 * res = (2^63)-1;
15057 * OV = 1;
15058 * } else if (res < -2^63) {
15059 * res = -2^63;
15060 * OV = 1;
15061 * }
15062 * Rd = res;
15063 * ~~~
15064 *
15065 * \param [in] t long type of value stored in t
15066 * \param [in] a unsigned long type of value stored in a
15067 * \param [in] b unsigned long type of value stored in b
15068 * \return value stored in long type
15069 */
__RV_KMADS32(long t,unsigned long a,unsigned long b)15070 __STATIC_FORCEINLINE long __RV_KMADS32(long t, unsigned long a, unsigned long b)
15071 {
15072 __ASM volatile("kmads32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
15073 return t;
15074 }
15075 /* ===== Inline Function End for 4.14.1. KMADS32 ===== */
15076
15077 /* ===== Inline Function Start for 4.14.2. KMADRS32 ===== */
15078 /**
15079 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
15080 * \brief KMADRS32 (Saturating Signed Multiply Two Words & Reverse Subtract & Add)
15081 * \details
15082 * **Type**: DSP (RV64 Only)
15083 *
15084 * **Syntax**:\n
15085 * ~~~
15086 * KMADS32 Rd, Rs1, Rs2
15087 * KMADRS32 Rd, Rs1, Rs2
15088 * KMAXDS32 Rd, Rs1, Rs2
15089 * ~~~
15090 *
15091 * **Purpose**:\n
15092 * Do two signed 32-bit multiplications from 32-bit elements in two registers; and then
15093 * perform a subtraction operation between the two 64-bit results. Then add the subtraction result to
15094 * 64-bit data in a third register. The addition result may be saturated.
15095 * * KMADS32: rd + (top*top - bottom*bottom)
15096 * * KMADRS32: rd + (bottom*bottom - top*top)
15097 * * KMAXDS32: rd + (top*bottom - bottom*top)
15098 *
15099 * **Description**:\n
15100 * For the `KMADS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
15101 * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
15102 * Rs1 with the top 32-bit element in Rs2.
15103 * For the `KMADRS32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
15104 * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
15105 * element in Rs1 with the bottom 32-bit element in Rs2.
15106 * For the `KMAXDS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
15107 * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
15108 * Rs1 with the bottom 32-bit element in Rs2.
15109 * The subtraction result is then added to the content of 64-bit data in Rd. If the addition result is
15110 * beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to
15111 * 1. The 64-bit result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated
15112 * as signed integers.
15113 *
15114 * **Operations**:\n
15115 * ~~~
15116 * res = Rd + (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMADS32
15117 * res = Rd + (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // KMADRS32
15118 * res = Rd + (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMAXDS32
15119 * if (res > (2^63)-1) {
15120 * res = (2^63)-1;
15121 * OV = 1;
15122 * } else if (res < -2^63) {
15123 * res = -2^63;
15124 * OV = 1;
15125 * }
15126 * Rd = res;
15127 * ~~~
15128 *
15129 * \param [in] t long type of value stored in t
15130 * \param [in] a unsigned long type of value stored in a
15131 * \param [in] b unsigned long type of value stored in b
15132 * \return value stored in long type
15133 */
__RV_KMADRS32(long t,unsigned long a,unsigned long b)15134 __STATIC_FORCEINLINE long __RV_KMADRS32(long t, unsigned long a, unsigned long b)
15135 {
15136 __ASM volatile("kmadrs32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
15137 return t;
15138 }
15139 /* ===== Inline Function End for 4.14.2. KMADRS32 ===== */
15140
15141 /* ===== Inline Function Start for 4.14.3. KMAXDS32 ===== */
15142 /**
15143 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
15144 * \brief KMAXDS32 (Saturating Signed Crossed Multiply Two Words & Subtract & Add)
15145 * \details
15146 * **Type**: DSP (RV64 Only)
15147 *
15148 * **Syntax**:\n
15149 * ~~~
15150 * KMADS32 Rd, Rs1, Rs2
15151 * KMADRS32 Rd, Rs1, Rs2
15152 * KMAXDS32 Rd, Rs1, Rs2
15153 * ~~~
15154 *
15155 * **Purpose**:\n
15156 * Do two signed 32-bit multiplications from 32-bit elements in two registers; and then
15157 * perform a subtraction operation between the two 64-bit results. Then add the subtraction result to
15158 * 64-bit data in a third register. The addition result may be saturated.
15159 * * KMADS32: rd + (top*top - bottom*bottom)
15160 * * KMADRS32: rd + (bottom*bottom - top*top)
15161 * * KMAXDS32: rd + (top*bottom - bottom*top)
15162 *
15163 * **Description**:\n
15164 * For the `KMADS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
15165 * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
15166 * Rs1 with the top 32-bit element in Rs2.
15167 * For the `KMADRS32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
15168 * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
15169 * element in Rs1 with the bottom 32-bit element in Rs2.
15170 * For the `KMAXDS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
15171 * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
15172 * Rs1 with the bottom 32-bit element in Rs2.
15173 * The subtraction result is then added to the content of 64-bit data in Rd. If the addition result is
15174 * beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to
15175 * 1. The 64-bit result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated
15176 * as signed integers.
15177 *
15178 * **Operations**:\n
15179 * ~~~
15180 * res = Rd + (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMADS32
15181 * res = Rd + (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // KMADRS32
15182 * res = Rd + (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMAXDS32
15183 * if (res > (2^63)-1) {
15184 * res = (2^63)-1;
15185 * OV = 1;
15186 * } else if (res < -2^63) {
15187 * res = -2^63;
15188 * OV = 1;
15189 * }
15190 * Rd = res;
15191 * ~~~
15192 *
15193 * \param [in] t long type of value stored in t
15194 * \param [in] a unsigned long type of value stored in a
15195 * \param [in] b unsigned long type of value stored in b
15196 * \return value stored in long type
15197 */
__RV_KMAXDS32(long t,unsigned long a,unsigned long b)15198 __STATIC_FORCEINLINE long __RV_KMAXDS32(long t, unsigned long a, unsigned long b)
15199 {
15200 __ASM volatile("kmaxds32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
15201 return t;
15202 }
15203 /* ===== Inline Function End for 4.14.3. KMAXDS32 ===== */
15204
15205 /* ===== Inline Function Start for 4.15.1. KMSDA32 ===== */
15206 /**
15207 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
15208 * \brief KMSDA32 (Saturating Signed Multiply Two Words & Add & Subtract)
15209 * \details
15210 * **Type**: DSP (RV64 Only)
15211 *
15212 * **Syntax**:\n
15213 * ~~~
15214 * KMSDA32 Rd, Rs1, Rs2
15215 * KMSXDA32 Rd, Rs1, Rs2
15216 * ~~~
15217 *
15218 * **Purpose**:\n
15219 * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then
15220 * subtracts the two 64-bit results from a third register. The subtraction result may be saturated.
15221 * * KMSDA: rd - top*top - bottom*bottom
15222 * * KMSXDA: rd - top*bottom - bottom*top
15223 *
15224 * **Description**:\n
15225 * For the `KMSDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
15226 * element of Rs2 and multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2.
15227 * For the `KMSXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
15228 * element of Rs2 and multiplies the top 32-bit element of Rs1 with the bottom 32-bit element of Rs2.
15229 * The two 64-bit multiplication results are then subtracted from the content of Rd. If the subtraction
15230 * result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit
15231 * is set to 1. The result after saturation is written to Rd. The 32-bit contents are treated as signed
15232 * integers.
15233 *
15234 * **Operations**:\n
15235 * ~~~
15236 * res = Rd - (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMSDA32
15237 * res = Rd - (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMSXDA32
15238 * if (res > (2^63)-1) {
15239 * res = (2^63)-1;
15240 * OV = 1;
15241 * } else if (res < -2^63) {
15242 * res = -2^63;
15243 * OV = 1;
15244 * }
15245 * Rd = res;
15246 * ~~~
15247 *
15248 * \param [in] t long type of value stored in t
15249 * \param [in] a unsigned long type of value stored in a
15250 * \param [in] b unsigned long type of value stored in b
15251 * \return value stored in long type
15252 */
__RV_KMSDA32(long t,unsigned long a,unsigned long b)15253 __STATIC_FORCEINLINE long __RV_KMSDA32(long t, unsigned long a, unsigned long b)
15254 {
15255 __ASM volatile("kmsda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
15256 return t;
15257 }
15258 /* ===== Inline Function End for 4.15.1. KMSDA32 ===== */
15259
15260 /* ===== Inline Function Start for 4.15.2. KMSXDA32 ===== */
15261 /**
15262 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
15263 * \brief KMSXDA32 (Saturating Signed Crossed Multiply Two Words & Add & Subtract)
15264 * \details
15265 * **Type**: DSP (RV64 Only)
15266 *
15267 * **Syntax**:\n
15268 * ~~~
15269 * KMSDA32 Rd, Rs1, Rs2
15270 * KMSXDA32 Rd, Rs1, Rs2
15271 * ~~~
15272 *
15273 * **Purpose**:\n
15274 * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then
15275 * subtracts the two 64-bit results from a third register. The subtraction result may be saturated.
15276 * * KMSDA: rd - top*top - bottom*bottom
15277 * * KMSXDA: rd - top*bottom - bottom*top
15278 *
15279 * **Description**:\n
15280 * For the `KMSDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
15281 * element of Rs2 and multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2.
15282 * For the `KMSXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
15283 * element of Rs2 and multiplies the top 32-bit element of Rs1 with the bottom 32-bit element of Rs2.
15284 * The two 64-bit multiplication results are then subtracted from the content of Rd. If the subtraction
15285 * result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit
15286 * is set to 1. The result after saturation is written to Rd. The 32-bit contents are treated as signed
15287 * integers.
15288 *
15289 * **Operations**:\n
15290 * ~~~
15291 * res = Rd - (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMSDA32
15292 * res = Rd - (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMSXDA32
15293 * if (res > (2^63)-1) {
15294 * res = (2^63)-1;
15295 * OV = 1;
15296 * } else if (res < -2^63) {
15297 * res = -2^63;
15298 * OV = 1;
15299 * }
15300 * Rd = res;
15301 * ~~~
15302 *
15303 * \param [in] t long type of value stored in t
15304 * \param [in] a unsigned long type of value stored in a
15305 * \param [in] b unsigned long type of value stored in b
15306 * \return value stored in long type
15307 */
__RV_KMSXDA32(long t,unsigned long a,unsigned long b)15308 __STATIC_FORCEINLINE long __RV_KMSXDA32(long t, unsigned long a, unsigned long b)
15309 {
15310 __ASM volatile("kmsxda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
15311 return t;
15312 }
15313 /* ===== Inline Function End for 4.15.2. KMSXDA32 ===== */
15314
15315 /* ===== Inline Function Start for 4.16. KSLL32 ===== */
15316 /**
15317 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
15318 * \brief KSLL32 (SIMD 32-bit Saturating Shift Left Logical)
15319 * \details
15320 * **Type**: SIMD (RV64 Only)
15321 *
15322 * **Syntax**:\n
15323 * ~~~
15324 * KSLL32 Rd, Rs1, Rs2
15325 * ~~~
15326 *
15327 * **Purpose**:\n
15328 * Do 32-bit elements logical left shift operations with saturation simultaneously. The shift
15329 * amount is a variable from a GPR.
15330 *
15331 * **Description**:\n
15332 * The 32-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
15333 * with zero and the shift amount is specified by the low-order 5-bits of the value in the Rs2 register.
15334 * Any shifted value greater than 2^31-1 is saturated to 2^31-1. Any shifted value smaller than -2^31 is
15335 * saturated to -2^31. And the saturated results are written to Rd. If any saturation is performed, set OV
15336 * bit to 1.
15337 *
15338 * **Operations**:\n
15339 * ~~~
15340 * sa = Rs2[4:0];
15341 * if (sa != 0) {
15342 * res[(31+sa):0] = Rs1.W[x] << sa;
15343 * if (res > (2^31)-1) {
15344 * res = 0x7fffffff; OV = 1;
15345 * } else if (res < -2^31) {
15346 * res = 0x80000000; OV = 1;
15347 * }
15348 * Rd.W[x] = res[31:0];
15349 * } else {
15350 * Rd = Rs1;
15351 * }
15352 * for RV64: x=1...0
15353 * ~~~
15354 *
15355 * \param [in] a unsigned long type of value stored in a
15356 * \param [in] b unsigned int type of value stored in b
15357 * \return value stored in unsigned long type
15358 */
__RV_KSLL32(unsigned long a,unsigned int b)15359 __STATIC_FORCEINLINE unsigned long __RV_KSLL32(unsigned long a, unsigned int b)
15360 {
15361 register unsigned long result;
15362 __ASM volatile("ksll32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15363 return result;
15364 }
15365 /* ===== Inline Function End for 4.16. KSLL32 ===== */
15366
15367 /* ===== Inline Function Start for 4.17. KSLLI32 ===== */
15368 /**
15369 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
15370 * \brief KSLLI32 (SIMD 32-bit Saturating Shift Left Logical Immediate)
15371 * \details
15372 * **Type**: SIMD (RV64 Only)
15373 *
15374 * **Syntax**:\n
15375 * ~~~
15376 * KSLLI32 Rd, Rs1, imm5u
15377 * ~~~
15378 *
15379 * **Purpose**:\n
15380 * Do 32-bit elements logical left shift operations with saturation simultaneously. The shift
15381 * amount is an immediate value.
15382 *
15383 * **Description**:\n
15384 * The 32-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
15385 * with zero and the shift amount is specified by the imm5u constant. Any shifted value greater than
15386 * 2^31-1 is saturated to 2^31-1. Any shifted value smaller than -2^31 is saturated to -2^31. And the saturated
15387 * results are written to Rd. If any saturation is performed, set OV bit to 1.
15388 *
15389 * **Operations**:\n
15390 * ~~~
15391 * sa = imm5u[4:0];
15392 * if (sa != 0) {
15393 * res[(31+sa):0] = Rs1.W[x] << sa;
15394 * if (res > (2^31)-1) {
15395 * res = 0x7fffffff; OV = 1;
15396 * } else if (res < -2^31) {
15397 * res = 0x80000000; OV = 1;
15398 * }
15399 * Rd.W[x] = res[31:0];
15400 * } else {
15401 * Rd = Rs1;
15402 * }
15403 * for RV64: x=1...0
15404 * ~~~
15405 *
15406 * \param [in] a unsigned long type of value stored in a
15407 * \param [in] b unsigned int type of value stored in b
15408 * \return value stored in unsigned long type
15409 */
__RV_KSLLI32(unsigned long a,unsigned int b)15410 __STATIC_FORCEINLINE unsigned long __RV_KSLLI32(unsigned long a, unsigned int b)
15411 {
15412 register unsigned long result;
15413 __ASM volatile("kslli32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15414 return result;
15415 }
15416 /* ===== Inline Function End for 4.17. KSLLI32 ===== */
15417
15418 /* ===== Inline Function Start for 4.18.1. KSLRA32 ===== */
15419 /**
15420 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
15421 * \brief KSLRA32 (SIMD 32-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
15422 * \details
15423 * **Type**: SIMD (RV64 Only)
15424 *
15425 * **Syntax**:\n
15426 * ~~~
15427 * KSLRA32 Rd, Rs1, Rs2
15428 * KSLRA32.u Rd, Rs1, Rs2
15429 * ~~~
15430 *
15431 * **Purpose**:\n
15432 * Do 32-bit elements logical left (positive) or arithmetic right (negative) shift operation with
15433 * Q31 saturation for the left shift. The `.u` form performs additional rounding up operations for the
15434 * right shift.
15435 *
15436 * **Description**:\n
15437 * The 32-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
15438 * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means
15439 * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the
15440 * absolute value of Rs2[5:0]. However, the behavior of `Rs2[5:0]==-25 (0x20)` is defined to be
15441 * equivalent to the behavior of `Rs2[5:0]==-(25-1) (0x21)`.
15442 * The left-shifted results are saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. For the `.u`
15443 * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
15444 * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
15445 * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:6] will not affect
15446 * this instruction.
15447 *
15448 * **Operations**:\n
15449 * ~~~
15450 * if (Rs2[5:0] < 0) {
15451 * sa = -Rs2[5:0];
15452 * sa = (sa == 32)? 31 : sa;
15453 * if (`.u` form) {
15454 * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
15455 * Rd.W[x] = res[31:0];
15456 * } else {
15457 * Rd.W[x] = SE32(Rs1.W[x][31:sa]);
15458 * }
15459 * } else {
15460 * sa = Rs2[4:0];
15461 * res[(31+sa):0] = Rs1.W[x] <<(logic) sa;
15462 * if (res > (2^31)-1) {
15463 * res[31:0] = 0x7fffffff; OV = 1;
15464 * } else if (res < -2^31) {
15465 * res[31:0] = 0x80000000; OV = 1;
15466 * }
15467 * Rd.W[x] = res[31:0];
15468 * }
15469 * for RV64: x=1...0
15470 * ~~~
15471 *
15472 * \param [in] a unsigned long type of value stored in a
15473 * \param [in] b int type of value stored in b
15474 * \return value stored in unsigned long type
15475 */
__RV_KSLRA32(unsigned long a,int b)15476 __STATIC_FORCEINLINE unsigned long __RV_KSLRA32(unsigned long a, int b)
15477 {
15478 register unsigned long result;
15479 __ASM volatile("kslra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15480 return result;
15481 }
15482 /* ===== Inline Function End for 4.18.1. KSLRA32 ===== */
15483
15484 /* ===== Inline Function Start for 4.18.2. KSLRA32.u ===== */
15485 /**
15486 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
15487 * \brief KSLRA32.u (SIMD 32-bit Shift Left Logical with Saturation or Rounding Shift Right Arithmetic)
15488 * \details
15489 * **Type**: SIMD (RV64 Only)
15490 *
15491 * **Syntax**:\n
15492 * ~~~
15493 * KSLRA32 Rd, Rs1, Rs2
15494 * KSLRA32.u Rd, Rs1, Rs2
15495 * ~~~
15496 *
15497 * **Purpose**:\n
15498 * Do 32-bit elements logical left (positive) or arithmetic right (negative) shift operation with
15499 * Q31 saturation for the left shift. The `.u` form performs additional rounding up operations for the
15500 * right shift.
15501 *
15502 * **Description**:\n
15503 * The 32-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
15504 * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means
15505 * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the
15506 * absolute value of Rs2[5:0]. However, the behavior of `Rs2[5:0]==-25 (0x20)` is defined to be
15507 * equivalent to the behavior of `Rs2[5:0]==-(25-1) (0x21)`.
15508 * The left-shifted results are saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. For the `.u`
15509 * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
15510 * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
15511 * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:6] will not affect
15512 * this instruction.
15513 *
15514 * **Operations**:\n
15515 * ~~~
15516 * if (Rs2[5:0] < 0) {
15517 * sa = -Rs2[5:0];
15518 * sa = (sa == 32)? 31 : sa;
15519 * if (`.u` form) {
15520 * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
15521 * Rd.W[x] = res[31:0];
15522 * } else {
15523 * Rd.W[x] = SE32(Rs1.W[x][31:sa]);
15524 * }
15525 * } else {
15526 * sa = Rs2[4:0];
15527 * res[(31+sa):0] = Rs1.W[x] <<(logic) sa;
15528 * if (res > (2^31)-1) {
15529 * res[31:0] = 0x7fffffff; OV = 1;
15530 * } else if (res < -2^31) {
15531 * res[31:0] = 0x80000000; OV = 1;
15532 * }
15533 * Rd.W[x] = res[31:0];
15534 * }
15535 * for RV64: x=1...0
15536 * ~~~
15537 *
15538 * \param [in] a unsigned long type of value stored in a
15539 * \param [in] b int type of value stored in b
15540 * \return value stored in unsigned long type
15541 */
__RV_KSLRA32_U(unsigned long a,int b)15542 __STATIC_FORCEINLINE unsigned long __RV_KSLRA32_U(unsigned long a, int b)
15543 {
15544 register unsigned long result;
15545 __ASM volatile("kslra32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15546 return result;
15547 }
15548 /* ===== Inline Function End for 4.18.2. KSLRA32.u ===== */
15549
15550 /* ===== Inline Function Start for 4.19. KSTAS32 ===== */
15551 /**
15552 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
15553 * \brief KSTAS32 (SIMD 32-bit Signed Saturating Straight Addition & Subtraction)
15554 * \details
15555 * **Type**: SIMD (RV64 Only)
15556 *
15557 * **Syntax**:\n
15558 * ~~~
15559 * KSTAS32 Rd, Rs1, Rs2
15560 * ~~~
15561 *
15562 * **Purpose**:\n
15563 * Do 32-bit signed integer element saturating addition and 32-bit signed integer element
15564 * saturating subtraction in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit
15565 * elements.
15566 *
15567 * **Description**:\n
15568 * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit
15569 * integer element in [63:32] of Rs2; at the same time, it subtracts the 32-bit integer element in [31:0] of
15570 * Rs2 from the 32-bit integer element in [31:0] of Rs1. If any of the results are beyond the Q31 number
15571 * range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated
15572 * results are written to [63:32] of Rd for addition and [31:0] of Rd for subtraction.
15573 *
15574 * **Operations**:\n
15575 * ~~~
15576 * res[1] = Rs1.W[1] + Rs2.W[1];
15577 * res[0] = Rs1.W[0] - Rs2.W[0];
15578 * if (res[x] > (2^31)-1) {
15579 * res[x] = (2^31)-1;
15580 * OV = 1;
15581 * } else if (res < -2^31) {
15582 * res[x] = -2^31;
15583 * OV = 1;
15584 * }
15585 * Rd.W[1] = res[1];
15586 * Rd.W[0] = res[0];
15587 * for RV64, x=1...0
15588 * ~~~
15589 *
15590 * \param [in] a unsigned long type of value stored in a
15591 * \param [in] b unsigned long type of value stored in b
15592 * \return value stored in unsigned long type
15593 */
__RV_KSTAS32(unsigned long a,unsigned long b)15594 __STATIC_FORCEINLINE unsigned long __RV_KSTAS32(unsigned long a, unsigned long b)
15595 {
15596 register unsigned long result;
15597 __ASM volatile("kstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15598 return result;
15599 }
15600 /* ===== Inline Function End for 4.19. KSTAS32 ===== */
15601
15602 /* ===== Inline Function Start for 4.20. KSTSA32 ===== */
15603 /**
15604 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
15605 * \brief KSTSA32 (SIMD 32-bit Signed Saturating Straight Subtraction & Addition)
15606 * \details
15607 * **Type**: SIM (RV64 Only)
15608 *
15609 * **Syntax**:\n
15610 * ~~~
15611 * KSTSA32 Rd, Rs1, Rs2
15612 * ~~~
15613 *
15614 * **Purpose**:\n
15615 * Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element
15616 * saturating addition in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit
15617 * elements.
15618 * *Description: *
15619 * This instruction subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer
15620 * element in [63:32] of Rs1; at the same time, it adds the 32-bit integer element in [31:0] of Rs1 with
15621 * the 32-bit integer element in [31:0] of Rs2. If any of the results are beyond the Q31 number range (-
15622 * 231 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are
15623 * written to [63:32] of Rd for subtraction and [31:0] of Rd for addition.
15624 *
15625 * **Operations**:\n
15626 * ~~~
15627 * res[1] = Rs1.W[1] - Rs2.W[1];
15628 * res[0] = Rs1.W[0] + Rs2.W[0];
15629 * if (res[x] > (2^31)-1) {
15630 * res[x] = (2^31)-1;
15631 * OV = 1;
15632 * } else if (res < -2^31) {
15633 * res[x] = -2^31;
15634 * OV = 1;
15635 * }
15636 * Rd.W[1] = res[1];
15637 * Rd.W[0] = res[0];
15638 * for RV64, x=1...0
15639 * ~~~
15640 *
15641 * \param [in] a unsigned long type of value stored in a
15642 * \param [in] b unsigned long type of value stored in b
15643 * \return value stored in unsigned long type
15644 */
__RV_KSTSA32(unsigned long a,unsigned long b)15645 __STATIC_FORCEINLINE unsigned long __RV_KSTSA32(unsigned long a, unsigned long b)
15646 {
15647 register unsigned long result;
15648 __ASM volatile("kstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15649 return result;
15650 }
15651 /* ===== Inline Function End for 4.20. KSTSA32 ===== */
15652
15653 /* ===== Inline Function Start for 4.21. KSUB32 ===== */
15654 /**
15655 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
15656 * \brief KSUB32 (SIMD 32-bit Signed Saturating Subtraction)
15657 * \details
15658 * **Type**: SIMD (RV64 Only)
15659 *
15660 * **Syntax**:\n
15661 * ~~~
15662 * KSUB32 Rd, Rs1, Rs2
15663 * ~~~
15664 *
15665 * **Purpose**:\n
15666 * Do 32-bit signed integer elements saturating subtractions simultaneously.
15667 *
15668 * **Description**:\n
15669 * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit
15670 * signed integer elements in Rs1. If any of the results are beyond the Q31 number range (-2^31 <= Q31 <=
15671 * 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to
15672 * Rd.
15673 *
15674 * **Operations**:\n
15675 * ~~~
15676 * res[x] = Rs1.W[x] - Rs2.W[x];
15677 * if (res[x] > (2^31)-1) {
15678 * res[x] = (2^31)-1;
15679 * OV = 1;
15680 * } else if (res[x] < -2^31) {
15681 * res[x] = -2^31;
15682 * OV = 1;
15683 * }
15684 * Rd.W[x] = res[x];
15685 * for RV64: x=1...0
15686 * ~~~
15687 *
15688 * \param [in] a unsigned long type of value stored in a
15689 * \param [in] b unsigned long type of value stored in b
15690 * \return value stored in unsigned long type
15691 */
__RV_KSUB32(unsigned long a,unsigned long b)15692 __STATIC_FORCEINLINE unsigned long __RV_KSUB32(unsigned long a, unsigned long b)
15693 {
15694 register unsigned long result;
15695 __ASM volatile("ksub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15696 return result;
15697 }
15698 /* ===== Inline Function End for 4.21. KSUB32 ===== */
15699
15700 /* ===== Inline Function Start for 4.22.1. PKBB32 ===== */
15701 /**
15702 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK
15703 * \brief PKBB32 (Pack Two 32-bit Data from Both Bottom Half)
15704 * \details
15705 * **Type**: DSP (RV64 Only)
15706 *
15707 * **Syntax**:\n
15708 * ~~~
15709 * PKBB32 Rd, Rs1, Rs2
15710 * PKBT32 Rd, Rs1, Rs2
15711 * PKTT32 Rd, Rs1, Rs2
15712 * PKTB32 Rd, Rs1, Rs2
15713 * ~~~
15714 *
15715 * **Purpose**:\n
15716 * Pack 32-bit data from 64-bit chunks in two registers.
15717 * * PKBB32: bottom.bottom
15718 * * PKBT32: bottom.top
15719 * * PKTT32: top.top
15720 * * PKTB32: top.bottom
15721 *
15722 * **Description**:\n
15723 * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
15724 * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
15725 * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
15726 * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
15727 *
15728 * **Operations**:\n
15729 * ~~~
15730 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32
15731 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32
15732 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32
15733 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32
15734 * ~~~
15735 *
15736 * \param [in] a unsigned long type of value stored in a
15737 * \param [in] b unsigned long type of value stored in b
15738 * \return value stored in unsigned long type
15739 */
__RV_PKBB32(unsigned long a,unsigned long b)15740 __STATIC_FORCEINLINE unsigned long __RV_PKBB32(unsigned long a, unsigned long b)
15741 {
15742 register unsigned long result;
15743 __ASM volatile("pkbb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15744 return result;
15745 }
15746 /* ===== Inline Function End for 4.22.1. PKBB32 ===== */
15747
15748 /* ===== Inline Function Start for 4.22.2. PKBT32 ===== */
15749 /**
15750 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK
15751 * \brief PKBT32 (Pack Two 32-bit Data from Bottom and Top Half)
15752 * \details
15753 * **Type**: DSP (RV64 Only)
15754 *
15755 * **Syntax**:\n
15756 * ~~~
15757 * PKBB32 Rd, Rs1, Rs2
15758 * PKBT32 Rd, Rs1, Rs2
15759 * PKTT32 Rd, Rs1, Rs2
15760 * PKTB32 Rd, Rs1, Rs2
15761 * ~~~
15762 *
15763 * **Purpose**:\n
15764 * Pack 32-bit data from 64-bit chunks in two registers.
15765 * * PKBB32: bottom.bottom
15766 * * PKBT32: bottom.top
15767 * * PKTT32: top.top
15768 * * PKTB32: top.bottom
15769 *
15770 * **Description**:\n
15771 * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
15772 * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
15773 * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
15774 * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
15775 *
15776 * **Operations**:\n
15777 * ~~~
15778 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32
15779 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32
15780 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32
15781 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32
15782 * ~~~
15783 *
15784 * \param [in] a unsigned long type of value stored in a
15785 * \param [in] b unsigned long type of value stored in b
15786 * \return value stored in unsigned long type
15787 */
__RV_PKBT32(unsigned long a,unsigned long b)15788 __STATIC_FORCEINLINE unsigned long __RV_PKBT32(unsigned long a, unsigned long b)
15789 {
15790 register unsigned long result;
15791 __ASM volatile("pkbt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15792 return result;
15793 }
15794 /* ===== Inline Function End for 4.22.2. PKBT32 ===== */
15795
15796 /* ===== Inline Function Start for 4.22.3. PKTT32 ===== */
15797 /**
15798 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK
15799 * \brief PKTT32 (Pack Two 32-bit Data from Both Top Half)
15800 * \details
15801 * **Type**: DSP (RV64 Only)
15802 *
15803 * **Syntax**:\n
15804 * ~~~
15805 * PKBB32 Rd, Rs1, Rs2
15806 * PKBT32 Rd, Rs1, Rs2
15807 * PKTT32 Rd, Rs1, Rs2
15808 * PKTB32 Rd, Rs1, Rs2
15809 * ~~~
15810 *
15811 * **Purpose**:\n
15812 * Pack 32-bit data from 64-bit chunks in two registers.
15813 * * PKBB32: bottom.bottom
15814 * * PKBT32: bottom.top
15815 * * PKTT32: top.top
15816 * * PKTB32: top.bottom
15817 *
15818 * **Description**:\n
15819 * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
15820 * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
15821 * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
15822 * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
15823 *
15824 * **Operations**:\n
15825 * ~~~
15826 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32
15827 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32
15828 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32
15829 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32
15830 * ~~~
15831 *
15832 * \param [in] a unsigned long type of value stored in a
15833 * \param [in] b unsigned long type of value stored in b
15834 * \return value stored in unsigned long type
15835 */
__RV_PKTT32(unsigned long a,unsigned long b)15836 __STATIC_FORCEINLINE unsigned long __RV_PKTT32(unsigned long a, unsigned long b)
15837 {
15838 register unsigned long result;
15839 __ASM volatile("pktt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15840 return result;
15841 }
15842 /* ===== Inline Function End for 4.22.3. PKTT32 ===== */
15843
15844 /* ===== Inline Function Start for 4.22.4. PKTB32 ===== */
15845 /**
15846 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK
15847 * \brief PKTB32 (Pack Two 32-bit Data from Top and Bottom Half)
15848 * \details
15849 * **Type**: DSP (RV64 Only)
15850 *
15851 * **Syntax**:\n
15852 * ~~~
15853 * PKBB32 Rd, Rs1, Rs2
15854 * PKBT32 Rd, Rs1, Rs2
15855 * PKTT32 Rd, Rs1, Rs2
15856 * PKTB32 Rd, Rs1, Rs2
15857 * ~~~
15858 *
15859 * **Purpose**:\n
15860 * Pack 32-bit data from 64-bit chunks in two registers.
15861 * * PKBB32: bottom.bottom
15862 * * PKBT32: bottom.top
15863 * * PKTT32: top.top
15864 * * PKTB32: top.bottom
15865 *
15866 * **Description**:\n
15867 * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
15868 * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
15869 * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
15870 * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
15871 *
15872 * **Operations**:\n
15873 * ~~~
15874 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32
15875 * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32
15876 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32
15877 * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32
15878 * ~~~
15879 *
15880 * \param [in] a unsigned long type of value stored in a
15881 * \param [in] b unsigned long type of value stored in b
15882 * \return value stored in unsigned long type
15883 */
__RV_PKTB32(unsigned long a,unsigned long b)15884 __STATIC_FORCEINLINE unsigned long __RV_PKTB32(unsigned long a, unsigned long b)
15885 {
15886 register unsigned long result;
15887 __ASM volatile("pktb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15888 return result;
15889 }
15890 /* ===== Inline Function End for 4.22.4. PKTB32 ===== */
15891
15892 /* ===== Inline Function Start for 4.23. RADD32 ===== */
15893 /**
15894 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
15895 * \brief RADD32 (SIMD 32-bit Signed Halving Addition)
15896 * \details
15897 * **Type**: SIMD (RV64 Only)
15898 *
15899 * **Syntax**:\n
15900 * ~~~
15901 * RADD32 Rd, Rs1, Rs2
15902 * ~~~
15903 *
15904 * **Purpose**:\n
15905 * Do 32-bit signed integer element additions simultaneously. The results are halved to avoid
15906 * overflow or saturation.
15907 *
15908 * **Description**:\n
15909 * This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed
15910 * integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to
15911 * Rd.
15912 *
15913 * **Examples**:\n
15914 * ~~~
15915 * * Rs1 = 0x7FFFFFFF, Rs2 = 0x7FFFFFFF Rd = 0x7FFFFFFF
15916 * * Rs1 = 0x80000000, Rs2 = 0x80000000 Rd = 0x80000000
15917 * * Rs1 = 0x40000000, Rs2 = 0x80000000 Rd = 0xE0000000
15918 * ~~~
15919 *
15920 * **Operations**:\n
15921 * ~~~
15922 * Rd.W[x] = (Rs1.W[x] + Rs2.W[x]) s>> 1;
15923 * for RV64: x=1...0
15924 * ~~~
15925 *
15926 * \param [in] a unsigned long type of value stored in a
15927 * \param [in] b unsigned long type of value stored in b
15928 * \return value stored in unsigned long type
15929 */
__RV_RADD32(unsigned long a,unsigned long b)15930 __STATIC_FORCEINLINE unsigned long __RV_RADD32(unsigned long a, unsigned long b)
15931 {
15932 register unsigned long result;
15933 __ASM volatile("radd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15934 return result;
15935 }
15936 /* ===== Inline Function End for 4.23. RADD32 ===== */
15937
15938 /* ===== Inline Function Start for 4.24. RCRAS32 ===== */
15939 /**
15940 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
15941 * \brief RCRAS32 (SIMD 32-bit Signed Halving Cross Addition & Subtraction)
15942 * \details
15943 * **Type**: SIMD (RV64 Only)
15944 *
15945 * **Syntax**:\n
15946 * ~~~
15947 * RCRAS32 Rd, Rs1, Rs2
15948 * ~~~
15949 *
15950 * **Purpose**:\n
15951 * Do 32-bit signed integer element addition and 32-bit signed integer element subtraction in
15952 * a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The results are halved to
15953 * avoid overflow or saturation.
15954 *
15955 * **Description**:\n
15956 * This instruction adds the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit
15957 * signed integer element in [31:0] of Rs2, and subtracts the 32-bit signed integer element in [63:32] of
15958 * Rs2 from the 32-bit signed integer element in [31:0] of Rs1. The element results are first
15959 * arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd
15960 * for subtraction.
15961 *
15962 * **Examples**:\n
15963 * ~~~
15964 * Please see `RADD32` and `RSUB32` instructions.
15965 * ~~~
15966 *
15967 * **Operations**:\n
15968 * ~~~
15969 * Rd.W[1] = (Rs1.W[1] + Rs2.W[0]) s>> 1;
15970 * Rd.W[0] = (Rs1.W[0] - Rs2.W[1]) s>> 1;
15971 * ~~~
15972 *
15973 * \param [in] a unsigned long type of value stored in a
15974 * \param [in] b unsigned long type of value stored in b
15975 * \return value stored in unsigned long type
15976 */
__RV_RCRAS32(unsigned long a,unsigned long b)15977 __STATIC_FORCEINLINE unsigned long __RV_RCRAS32(unsigned long a, unsigned long b)
15978 {
15979 register unsigned long result;
15980 __ASM volatile("rcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
15981 return result;
15982 }
15983 /* ===== Inline Function End for 4.24. RCRAS32 ===== */
15984
15985 /* ===== Inline Function Start for 4.25. RCRSA32 ===== */
15986 /**
15987 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
15988 * \brief RCRSA32 (SIMD 32-bit Signed Halving Cross Subtraction & Addition)
15989 * \details
15990 * **Type**: SIMD (RV64 Only)
15991 *
15992 * **Syntax**:\n
15993 * ~~~
15994 * RCRSA32 Rd, Rs1, Rs2
15995 * ~~~
15996 *
15997 * **Purpose**:\n
15998 * Do 32-bit signed integer element subtraction and 32-bit signed integer element addition in
15999 * a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The results are halved to
16000 * avoid overflow or saturation.
16001 *
16002 * **Description**:\n
16003 * This instruction subtracts the 32-bit signed integer element in [31:0] of Rs2 from the
16004 * 32-bit signed integer element in [63:32] of Rs1, and adds the 32-bit signed element integer in [31:0]
16005 * of Rs1 with the 32-bit signed integer element in [63:32] of Rs2. The two results are first
16006 * arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of
16007 * Rd for addition.
16008 *
16009 * **Examples**:\n
16010 * ~~~
16011 * Please see `RADD32` and `RSUB32` instructions.
16012 * ~~~
16013 *
16014 * **Operations**:\n
16015 * ~~~
16016 * Rd.W[1] = (Rs1.W[1] - Rs2.W[0]) s>> 1;
16017 * Rd.W[0] = (Rs1.W[0] + Rs2.W[1]) s>> 1;
16018 * ~~~
16019 *
16020 * \param [in] a unsigned long type of value stored in a
16021 * \param [in] b unsigned long type of value stored in b
16022 * \return value stored in unsigned long type
16023 */
__RV_RCRSA32(unsigned long a,unsigned long b)16024 __STATIC_FORCEINLINE unsigned long __RV_RCRSA32(unsigned long a, unsigned long b)
16025 {
16026 register unsigned long result;
16027 __ASM volatile("rcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16028 return result;
16029 }
16030 /* ===== Inline Function End for 4.25. RCRSA32 ===== */
16031
16032 /* ===== Inline Function Start for 4.26. RSTAS32 ===== */
16033 /**
16034 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
16035 * \brief RSTAS32 (SIMD 32-bit Signed Halving Straight Addition & Subtraction)
16036 * \details
16037 * **Type**: SIMD (RV64 Only)
16038 *
16039 * **Syntax**:\n
16040 * ~~~
16041 * RSTAS32 Rd, Rs1, Rs2
16042 * ~~~
16043 *
16044 * **Purpose**:\n
16045 * Do 32-bit signed integer element addition and 32-bit signed integer element subtraction in
16046 * a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements. The results are
16047 * halved to avoid overflow or saturation.
16048 *
16049 * **Description**:\n
16050 * This instruction adds the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit
16051 * signed integer element in [63:32] of Rs2, and subtracts the 32-bit signed integer element in [31:0] of
16052 * Rs2 from the 32-bit signed integer element in [31:0] of Rs1. The element results are first
16053 * arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd
16054 * for subtraction.
16055 *
16056 * **Examples**:\n
16057 * ~~~
16058 * Please see `RADD32` and `RSUB32` instructions.
16059 * ~~~
16060 *
16061 * **Operations**:\n
16062 * ~~~
16063 * Rd.W[1] = (Rs1.W[1] + Rs2.W[1]) s>> 1;
16064 * Rd.W[0] = (Rs1.W[0] - Rs2.W[0]) s>> 1;
16065 * ~~~
16066 *
16067 * \param [in] a unsigned long type of value stored in a
16068 * \param [in] b unsigned long type of value stored in b
16069 * \return value stored in unsigned long type
16070 */
__RV_RSTAS32(unsigned long a,unsigned long b)16071 __STATIC_FORCEINLINE unsigned long __RV_RSTAS32(unsigned long a, unsigned long b)
16072 {
16073 register unsigned long result;
16074 __ASM volatile("rstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16075 return result;
16076 }
16077 /* ===== Inline Function End for 4.26. RSTAS32 ===== */
16078
16079 /* ===== Inline Function Start for 4.27. RSTSA32 ===== */
16080 /**
16081 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
16082 * \brief RSTSA32 (SIMD 32-bit Signed Halving Straight Subtraction & Addition)
16083 * \details
16084 * **Type**: SIMD (RV64 Only)
16085 *
16086 * **Syntax**:\n
16087 * ~~~
16088 * RSTSA32 Rd, Rs1, Rs2
16089 * ~~~
16090 *
16091 * **Purpose**:\n
16092 * Do 32-bit signed integer element subtraction and 32-bit signed integer element addition in
16093 * a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements. The results are
16094 * halved to avoid overflow or saturation.
16095 *
16096 * **Description**:\n
16097 * This instruction subtracts the 32-bit signed integer element in [63:32] of Rs2 from the
16098 * 32-bit signed integer element in [63:32] of Rs1, and adds the 32-bit signed element integer in [31:0]
16099 * of Rs1 with the 32-bit signed integer element in [31:0] of Rs2. The two results are first arithmetically
16100 * right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of Rd for addition.
16101 *
16102 * **Examples**:\n
16103 * ~~~
16104 * Please see `RADD32` and `RSUB32` instructions.
16105 * ~~~
16106 *
16107 * **Operations**:\n
16108 * ~~~
16109 * Rd.W[1] = (Rs1.W[1] - Rs2.W[1]) s>> 1;
16110 * Rd.W[0] = (Rs1.W[0] + Rs2.W[0]) s>> 1;
16111 * ~~~
16112 *
16113 * \param [in] a unsigned long type of value stored in a
16114 * \param [in] b unsigned long type of value stored in b
16115 * \return value stored in unsigned long type
16116 */
__RV_RSTSA32(unsigned long a,unsigned long b)16117 __STATIC_FORCEINLINE unsigned long __RV_RSTSA32(unsigned long a, unsigned long b)
16118 {
16119 register unsigned long result;
16120 __ASM volatile("rstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16121 return result;
16122 }
16123 /* ===== Inline Function End for 4.27. RSTSA32 ===== */
16124
16125 /* ===== Inline Function Start for 4.28. RSUB32 ===== */
16126 /**
16127 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
16128 * \brief RSUB32 (SIMD 32-bit Signed Halving Subtraction)
16129 * \details
16130 * **Type**: SIMD (RV64 Only)
16131 *
16132 * **Syntax**:\n
16133 * ~~~
16134 * RSUB32 Rd, Rs1, Rs2
16135 * ~~~
16136 *
16137 * **Purpose**:\n
16138 * Do 32-bit signed integer element subtractions simultaneously. The results are halved to
16139 * avoid overflow or saturation.
16140 *
16141 * **Description**:\n
16142 * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit
16143 * signed integer elements in Rs1. The results are first arithmetically right-shifted by 1 bit and then
16144 * written to Rd.
16145 *
16146 * **Examples**:\n
16147 * ~~~
16148 * * Ra = 0x7FFFFFFF, Rb = 0x80000000 Rt = 0x7FFFFFFF
16149 * * Ra = 0x80000000, Rb = 0x7FFFFFFF Rt = 0x80000000
16150 * * Ra = 0x80000000, Rb = 0x40000000 Rt = 0xA0000000
16151 * ~~~
16152 *
16153 * **Operations**:\n
16154 * ~~~
16155 * Rd.W[x] = (Rs1.W[x] - Rs2.W[x]) s>> 1;
16156 * for RV64: x=1...0
16157 * ~~~
16158 *
16159 * \param [in] a unsigned long type of value stored in a
16160 * \param [in] b unsigned long type of value stored in b
16161 * \return value stored in unsigned long type
16162 */
__RV_RSUB32(unsigned long a,unsigned long b)16163 __STATIC_FORCEINLINE unsigned long __RV_RSUB32(unsigned long a, unsigned long b)
16164 {
16165 register unsigned long result;
16166 __ASM volatile("rsub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16167 return result;
16168 }
16169 /* ===== Inline Function End for 4.28. RSUB32 ===== */
16170
16171 /* ===== Inline Function Start for 4.29. SLL32 ===== */
16172 /**
16173 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
16174 * \brief SLL32 (SIMD 32-bit Shift Left Logical)
16175 * \details
16176 * **Type**: SIMD (RV64 Only)
16177 *
16178 * **Syntax**:\n
16179 * ~~~
16180 * SLL32 Rd, Rs1, Rs2
16181 * ~~~
16182 *
16183 * **Purpose**:\n
16184 * Do 32-bit elements logical left shift operations simultaneously. The shift amount is a
16185 * variable from a GPR.
16186 *
16187 * **Description**:\n
16188 * The 32-bit elements in Rs1 are left-shifted logically. And the results are written to Rd.
16189 * The shifted out bits are filled with zero and the shift amount is specified by the low-order 5-bits of
16190 * the value in the Rs2 register.
16191 *
16192 * **Operations**:\n
16193 * ~~~
16194 * sa = Rs2[4:0];
16195 * Rd.W[x] = Rs1.W[x] << sa;
16196 * for RV64: x=1...0
16197 * ~~~
16198 *
16199 * \param [in] a unsigned long type of value stored in a
16200 * \param [in] b unsigned int type of value stored in b
16201 * \return value stored in unsigned long type
16202 */
__RV_SLL32(unsigned long a,unsigned int b)16203 __STATIC_FORCEINLINE unsigned long __RV_SLL32(unsigned long a, unsigned int b)
16204 {
16205 register unsigned long result;
16206 __ASM volatile("sll32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16207 return result;
16208 }
16209 /* ===== Inline Function End for 4.29. SLL32 ===== */
16210
16211 /* ===== Inline Function Start for 4.30. SLLI32 ===== */
16212 /**
16213 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
16214 * \brief SLLI32 (SIMD 32-bit Shift Left Logical Immediate)
16215 * \details
16216 * **Type**: SIMD (RV64 Only)
16217 *
16218 * **Syntax**:\n
16219 * ~~~
16220 * SLLI32 Rd, Rs1, imm5u[4:0]
16221 * ~~~
16222 *
16223 * **Purpose**:\n
16224 * Do 32-bit element logical left shift operations simultaneously. The shift amount is an
16225 * immediate value.
16226 *
16227 * **Description**:\n
16228 * The 32-bit elements in Rs1 are left-shifted logically. The shifted out bits are filled with
16229 * zero and the shift amount is specified by the imm5u[4:0] constant. And the results are written to Rd.
16230 *
16231 * **Operations**:\n
16232 * ~~~
16233 * sa = imm5u[4:0];
16234 * Rd.W[x] = Rs1.W[x] << sa;
16235 * for RV64: x=1...0
16236 * ~~~
16237 *
16238 * \param [in] a unsigned long type of value stored in a
16239 * \param [in] b unsigned int type of value stored in b
16240 * \return value stored in unsigned long type
16241 */
__RV_SLLI32(unsigned long a,unsigned int b)16242 __STATIC_FORCEINLINE unsigned long __RV_SLLI32(unsigned long a, unsigned int b)
16243 {
16244 register unsigned long result;
16245 __ASM volatile("slli32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16246 return result;
16247 }
16248 /* ===== Inline Function End for 4.30. SLLI32 ===== */
16249
16250 /* ===== Inline Function Start for 4.31. SMAX32 ===== */
16251 /**
16252 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
16253 * \brief SMAX32 (SIMD 32-bit Signed Maximum)
16254 * \details
16255 * **Type**: SIMD (RV64 Only)
16256 *
16257 * **Syntax**:\n
16258 * ~~~
16259 * SMAX32 Rd, Rs1, Rs2
16260 * ~~~
16261 *
16262 * **Purpose**:\n
16263 * Do 32-bit signed integer elements finding maximum operations simultaneously.
16264 *
16265 * **Description**:\n
16266 * This instruction compares the 32-bit signed integer elements in Rs1 with the 32-bit
16267 * signed integer elements in Rs2 and selects the numbers that is greater than the other one. The
16268 * selected results are written to Rd.
16269 *
16270 * **Operations**:\n
16271 * ~~~
16272 * Rd.W[x] = (Rs1.W[x] > Rs2.W[x])? Rs1.W[x] : Rs2.W[x];
16273 * for RV64: x=1...0
16274 * ~~~
16275 *
16276 * \param [in] a unsigned long type of value stored in a
16277 * \param [in] b unsigned long type of value stored in b
16278 * \return value stored in unsigned long type
16279 */
__RV_SMAX32(unsigned long a,unsigned long b)16280 __STATIC_FORCEINLINE unsigned long __RV_SMAX32(unsigned long a, unsigned long b)
16281 {
16282 register unsigned long result;
16283 __ASM volatile("smax32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16284 return result;
16285 }
16286 /* ===== Inline Function End for 4.31. SMAX32 ===== */
16287
16288 /* ===== Inline Function Start for 4.32.1. SMBB32 ===== */
16289 /**
16290 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT
16291 * \brief SMBB32 (Signed Multiply Bottom Word & Bottom Word)
16292 * \details
16293 * **Type**: DSP (RV64 Only)
16294 *
16295 * **Syntax**:\n
16296 * ~~~
16297 * SMBB32 Rd, Rs1, Rs2
16298 * SMBT32 Rd, Rs1, Rs2
16299 * SMTT32 Rd, Rs1, Rs2
16300 * ~~~
16301 *
16302 * **Purpose**:\n
16303 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another
16304 * register and write the 64-bit result to a third register.
16305 * * SMBB32: bottom*bottom
16306 * * SMBT32: bottom*top
16307 * * SMTT32: top*top
16308 *
16309 * **Description**:\n
16310 * For the `SMBB32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
16311 * element of Rs2. It is actually an alias of `MULSR64` instruction.
16312 * For the `SMBT32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
16313 * element of Rs2.
16314 * For the `SMTT32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit element
16315 * of Rs2.
16316 * The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as
16317 * signed integers.
16318 *
16319 * **Operations**:\n
16320 * ~~~
16321 * res = Rs1.W[0] * Rs2.W[0]; // SMBB32 res = Rs1.W[0] * Rs2.w[1]; // SMBT32 res = Rs1.W[1] * Rs2.W[1];
16322 * // SMTT32 Rd = res;
16323 * ~~~
16324 *
16325 * \param [in] a unsigned long type of value stored in a
16326 * \param [in] b unsigned long type of value stored in b
16327 * \return value stored in long type
16328 */
__RV_SMBB32(unsigned long a,unsigned long b)16329 __STATIC_FORCEINLINE long __RV_SMBB32(unsigned long a, unsigned long b)
16330 {
16331 register long result;
16332 __ASM volatile("smbb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16333 return result;
16334 }
16335 /* ===== Inline Function End for 4.32.1. SMBB32 ===== */
16336
16337 /* ===== Inline Function Start for 4.32.2. SMBT32 ===== */
16338 /**
16339 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT
16340 * \brief SMBT32 (Signed Multiply Bottom Word & Top Word)
16341 * \details
16342 * **Type**: DSP (RV64 Only)
16343 *
16344 * **Syntax**:\n
16345 * ~~~
16346 * SMBB32 Rd, Rs1, Rs2
16347 * SMBT32 Rd, Rs1, Rs2
16348 * SMTT32 Rd, Rs1, Rs2
16349 * ~~~
16350 *
16351 * **Purpose**:\n
16352 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another
16353 * register and write the 64-bit result to a third register.
16354 * * SMBB32: bottom*bottom
16355 * * SMBT32: bottom*top
16356 * * SMTT32: top*top
16357 *
16358 * **Description**:\n
16359 * For the `SMBB32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
16360 * element of Rs2. It is actually an alias of `MULSR64` instruction.
16361 * For the `SMBT32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
16362 * element of Rs2.
16363 * For the `SMTT32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit element
16364 * of Rs2.
16365 * The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as
16366 * signed integers.
16367 *
16368 * **Operations**:\n
16369 * ~~~
16370 * res = Rs1.W[0] * Rs2.W[0]; // SMBB32 res = Rs1.W[0] * Rs2.w[1]; // SMBT32 res = Rs1.W[1] * Rs2.W[1];
16371 * // SMTT32 Rd = res;
16372 * ~~~
16373 *
16374 * \param [in] a unsigned long type of value stored in a
16375 * \param [in] b unsigned long type of value stored in b
16376 * \return value stored in long type
16377 */
__RV_SMBT32(unsigned long a,unsigned long b)16378 __STATIC_FORCEINLINE long __RV_SMBT32(unsigned long a, unsigned long b)
16379 {
16380 register long result;
16381 __ASM volatile("smbt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16382 return result;
16383 }
16384 /* ===== Inline Function End for 4.32.2. SMBT32 ===== */
16385
16386 /* ===== Inline Function Start for 4.32.3. SMTT32 ===== */
16387 /**
16388 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT
16389 * \brief SMTT32 (Signed Multiply Top Word & Top Word)
16390 * \details
16391 * **Type**: DSP (RV64 Only)
16392 *
16393 * **Syntax**:\n
16394 * ~~~
16395 * SMBB32 Rd, Rs1, Rs2
16396 * SMBT32 Rd, Rs1, Rs2
16397 * SMTT32 Rd, Rs1, Rs2
16398 * ~~~
16399 *
16400 * **Purpose**:\n
16401 * Multiply the signed 32-bit element of a register with the signed 32-bit element of another
16402 * register and write the 64-bit result to a third register.
16403 * * SMBB32: bottom*bottom
16404 * * SMBT32: bottom*top
16405 * * SMTT32: top*top
16406 *
16407 * **Description**:\n
16408 * For the `SMBB32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
16409 * element of Rs2. It is actually an alias of `MULSR64` instruction.
16410 * For the `SMBT32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
16411 * element of Rs2.
16412 * For the `SMTT32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit element
16413 * of Rs2.
16414 * The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as
16415 * signed integers.
16416 *
16417 * **Operations**:\n
16418 * ~~~
16419 * res = Rs1.W[0] * Rs2.W[0]; // SMBB32 res = Rs1.W[0] * Rs2.w[1]; // SMBT32 res = Rs1.W[1] * Rs2.W[1];
16420 * // SMTT32 Rd = res;
16421 * ~~~
16422 *
16423 * \param [in] a unsigned long type of value stored in a
16424 * \param [in] b unsigned long type of value stored in b
16425 * \return value stored in long type
16426 */
__RV_SMTT32(unsigned long a,unsigned long b)16427 __STATIC_FORCEINLINE long __RV_SMTT32(unsigned long a, unsigned long b)
16428 {
16429 register long result;
16430 __ASM volatile("smtt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16431 return result;
16432 }
16433 /* ===== Inline Function End for 4.32.3. SMTT32 ===== */
16434
16435 /* ===== Inline Function Start for 4.33.1. SMDS32 ===== */
16436 /**
16437 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
16438 * \brief SMDS32 (Signed Multiply Two Words and Subtract)
16439 * \details
16440 * **Type**: DSP (RV64 Only)
16441 *
16442 * **Syntax**:\n
16443 * ~~~
16444 * SMDS32 Rd, Rs1, Rs2
16445 * SMDRS32 Rd, Rs1, Rs2
16446 * SMXDS32 Rd, Rs1, Rs2
16447 * ~~~
16448 *
16449 * **Purpose**:\n
16450 * Do two signed 32-bit multiplications from the l 32-bit element of two registers; and then
16451 * perform a subtraction operation between the two 64-bit results.
16452 * * SMDS32: top*top - bottom*bottom
16453 * * SMDRS32: bottom*bottom - top*top
16454 * * SMXDS32: top*bottom - bottom*top
16455 *
16456 * **Description**:\n
16457 * For the `SMDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
16458 * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
16459 * Rs1 with the top 32-bit element of Rs2.
16460 * For the `SMDRS32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit
16461 * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
16462 * element of Rs1 with the bottom 32-bit element of Rs2.
16463 * For the `SMXDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
16464 * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
16465 * Rs1 with the bottom 32-bit element of Rs2.
16466 * The subtraction result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
16467 * integers.
16468 *
16469 * **Operations**:\n
16470 * ~~~
16471 * Rt = (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // SMDS32
16472 * Rt = (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // SMDRS32
16473 * Rt = (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // SMXDS32
16474 * ~~~
16475 *
16476 * \param [in] a unsigned long type of value stored in a
16477 * \param [in] b unsigned long type of value stored in b
16478 * \return value stored in long type
16479 */
__RV_SMDS32(unsigned long a,unsigned long b)16480 __STATIC_FORCEINLINE long __RV_SMDS32(unsigned long a, unsigned long b)
16481 {
16482 register long result;
16483 __ASM volatile("smds32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16484 return result;
16485 }
16486 /* ===== Inline Function End for 4.33.1. SMDS32 ===== */
16487
16488 /* ===== Inline Function Start for 4.33.2. SMDRS32 ===== */
16489 /**
16490 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
16491 * \brief SMDRS32 (Signed Multiply Two Words and Reverse Subtract)
16492 * \details
16493 * **Type**: DSP (RV64 Only)
16494 *
16495 * **Syntax**:\n
16496 * ~~~
16497 * SMDS32 Rd, Rs1, Rs2
16498 * SMDRS32 Rd, Rs1, Rs2
16499 * SMXDS32 Rd, Rs1, Rs2
16500 * ~~~
16501 *
16502 * **Purpose**:\n
16503 * Do two signed 32-bit multiplications from the l 32-bit element of two registers; and then
16504 * perform a subtraction operation between the two 64-bit results.
16505 * * SMDS32: top*top - bottom*bottom
16506 * * SMDRS32: bottom*bottom - top*top
16507 * * SMXDS32: top*bottom - bottom*top
16508 *
16509 * **Description**:\n
16510 * For the `SMDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
16511 * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
16512 * Rs1 with the top 32-bit element of Rs2.
16513 * For the `SMDRS32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit
16514 * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
16515 * element of Rs1 with the bottom 32-bit element of Rs2.
16516 * For the `SMXDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
16517 * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
16518 * Rs1 with the bottom 32-bit element of Rs2.
16519 * The subtraction result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
16520 * integers.
16521 *
16522 * **Operations**:\n
16523 * ~~~
16524 * Rt = (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // SMDS32
16525 * Rt = (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // SMDRS32
16526 * Rt = (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // SMXDS32
16527 * ~~~
16528 *
16529 * \param [in] a unsigned long type of value stored in a
16530 * \param [in] b unsigned long type of value stored in b
16531 * \return value stored in long type
16532 */
__RV_SMDRS32(unsigned long a,unsigned long b)16533 __STATIC_FORCEINLINE long __RV_SMDRS32(unsigned long a, unsigned long b)
16534 {
16535 register long result;
16536 __ASM volatile("smdrs32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16537 return result;
16538 }
16539 /* ===== Inline Function End for 4.33.2. SMDRS32 ===== */
16540
16541 /* ===== Inline Function Start for 4.33.3. SMXDS32 ===== */
16542 /**
16543 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
16544 * \brief SMXDS32 (Signed Crossed Multiply Two Words and Subtract)
16545 * \details
16546 * **Type**: DSP (RV64 Only)
16547 *
16548 * **Syntax**:\n
16549 * ~~~
16550 * SMDS32 Rd, Rs1, Rs2
16551 * SMDRS32 Rd, Rs1, Rs2
16552 * SMXDS32 Rd, Rs1, Rs2
16553 * ~~~
16554 *
16555 * **Purpose**:\n
16556 * Do two signed 32-bit multiplications from the l 32-bit element of two registers; and then
16557 * perform a subtraction operation between the two 64-bit results.
16558 * * SMDS32: top*top - bottom*bottom
16559 * * SMDRS32: bottom*bottom - top*top
16560 * * SMXDS32: top*bottom - bottom*top
16561 *
16562 * **Description**:\n
16563 * For the `SMDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
16564 * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
16565 * Rs1 with the top 32-bit element of Rs2.
16566 * For the `SMDRS32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit
16567 * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
16568 * element of Rs1 with the bottom 32-bit element of Rs2.
16569 * For the `SMXDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
16570 * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
16571 * Rs1 with the bottom 32-bit element of Rs2.
16572 * The subtraction result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
16573 * integers.
16574 *
16575 * **Operations**:\n
16576 * ~~~
16577 * Rt = (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // SMDS32
16578 * Rt = (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // SMDRS32
16579 * Rt = (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // SMXDS32
16580 * ~~~
16581 *
16582 * \param [in] a unsigned long type of value stored in a
16583 * \param [in] b unsigned long type of value stored in b
16584 * \return value stored in long type
16585 */
__RV_SMXDS32(unsigned long a,unsigned long b)16586 __STATIC_FORCEINLINE long __RV_SMXDS32(unsigned long a, unsigned long b)
16587 {
16588 register long result;
16589 __ASM volatile("smxds32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16590 return result;
16591 }
16592 /* ===== Inline Function End for 4.33.3. SMXDS32 ===== */
16593
16594 /* ===== Inline Function Start for 4.34. SMIN32 ===== */
16595 /**
16596 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
16597 * \brief SMIN32 (SIMD 32-bit Signed Minimum)
16598 * \details
16599 * **Type**: SIMD (RV64 Only)
16600 *
16601 * **Syntax**:\n
16602 * ~~~
16603 * SMIN32 Rd, Rs1, Rs2
16604 * ~~~
16605 *
16606 * **Purpose**:\n
16607 * Do 32-bit signed integer elements finding minimum operations simultaneously.
16608 *
16609 * **Description**:\n
16610 * This instruction compares the 32-bit signed integer elements in Rs1 with the 32-bit
16611 * signed integer elements in Rs2 and selects the numbers that is less than the other one. The selected
16612 * results are written to Rd.
16613 *
16614 * **Operations**:\n
16615 * ~~~
16616 * Rd.W[x] = (Rs1.W[x] < Rs2.W[x])? Rs1.W[x] : Rs2.W[x];
16617 * for RV64: x=1...0
16618 * ~~~
16619 *
16620 * \param [in] a unsigned long type of value stored in a
16621 * \param [in] b unsigned long type of value stored in b
16622 * \return value stored in unsigned long type
16623 */
__RV_SMIN32(unsigned long a,unsigned long b)16624 __STATIC_FORCEINLINE unsigned long __RV_SMIN32(unsigned long a, unsigned long b)
16625 {
16626 register unsigned long result;
16627 __ASM volatile("smin32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16628 return result;
16629 }
16630 /* ===== Inline Function End for 4.34. SMIN32 ===== */
16631
16632 /* ===== Inline Function Start for 4.35.1. SRA32 ===== */
16633 /**
16634 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
16635 * \brief SRA32 (SIMD 32-bit Shift Right Arithmetic)
16636 * \details
16637 * **Type**: SIMD (RV64 Only)
16638 *
16639 * **Syntax**:\n
16640 * ~~~
16641 * SRA32 Rd, Rs1, Rs2
16642 * SRA32.u Rd, Rs1, Rs2
16643 * ~~~
16644 *
16645 * **Purpose**:\n
16646 * Do 32-bit element arithmetic right shift operations simultaneously. The shift amount is a
16647 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
16648 * results.
16649 *
16650 * **Description**:\n
16651 * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
16652 * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
16653 * 5-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
16654 * added to the most significant discarded bit of each 32-bit data element to calculate the final results.
16655 * And the results are written to Rd.
16656 *
16657 * **Operations**:\n
16658 * ~~~
16659 * sa = Rs2[4:0];
16660 * if (sa > 0) {
16661 * if (`.u` form) { // SRA32.u
16662 * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
16663 * Rd.W[x] = res[31:0];
16664 * else { // SRA32
16665 * Rd.W[x] = SE32(Rs1.W[x][31:sa])
16666 * }
16667 * } else {
16668 * Rd = Rs1;
16669 * }
16670 * for RV64: x=1...0
16671 * ~~~
16672 *
16673 * \param [in] a unsigned long type of value stored in a
16674 * \param [in] b unsigned int type of value stored in b
16675 * \return value stored in unsigned long type
16676 */
__RV_SRA32(unsigned long a,unsigned int b)16677 __STATIC_FORCEINLINE unsigned long __RV_SRA32(unsigned long a, unsigned int b)
16678 {
16679 register unsigned long result;
16680 __ASM volatile("sra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16681 return result;
16682 }
16683 /* ===== Inline Function End for 4.35.1. SRA32 ===== */
16684
16685 /* ===== Inline Function Start for 4.35.2. SRA32.u ===== */
16686 /**
16687 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
16688 * \brief SRA32.u (SIMD 32-bit Rounding Shift Right Arithmetic)
16689 * \details
16690 * **Type**: SIMD (RV64 Only)
16691 *
16692 * **Syntax**:\n
16693 * ~~~
16694 * SRA32 Rd, Rs1, Rs2
16695 * SRA32.u Rd, Rs1, Rs2
16696 * ~~~
16697 *
16698 * **Purpose**:\n
16699 * Do 32-bit element arithmetic right shift operations simultaneously. The shift amount is a
16700 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
16701 * results.
16702 *
16703 * **Description**:\n
16704 * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
16705 * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
16706 * 5-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
16707 * added to the most significant discarded bit of each 32-bit data element to calculate the final results.
16708 * And the results are written to Rd.
16709 *
16710 * **Operations**:\n
16711 * ~~~
16712 * sa = Rs2[4:0];
16713 * if (sa > 0) {
16714 * if (`.u` form) { // SRA32.u
16715 * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
16716 * Rd.W[x] = res[31:0];
16717 * else { // SRA32
16718 * Rd.W[x] = SE32(Rs1.W[x][31:sa])
16719 * }
16720 * } else {
16721 * Rd = Rs1;
16722 * }
16723 * for RV64: x=1...0
16724 * ~~~
16725 *
16726 * \param [in] a unsigned long type of value stored in a
16727 * \param [in] b unsigned int type of value stored in b
16728 * \return value stored in unsigned long type
16729 */
__RV_SRA32_U(unsigned long a,unsigned int b)16730 __STATIC_FORCEINLINE unsigned long __RV_SRA32_U(unsigned long a, unsigned int b)
16731 {
16732 register unsigned long result;
16733 __ASM volatile("sra32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16734 return result;
16735 }
16736 /* ===== Inline Function End for 4.35.2. SRA32.u ===== */
16737
16738 /* ===== Inline Function Start for 4.36.1. SRAI32 ===== */
16739 /**
16740 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
16741 * \brief SRAI32 (SIMD 32-bit Shift Right Arithmetic Immediate)
16742 * \details
16743 * **Type**: DSP (RV64 Only)
16744 *
16745 * **Syntax**:\n
16746 * ~~~
16747 * SRAI32 Rd, Rs1, imm5u
16748 * SRAI32.u Rd, Rs1, imm5u
16749 * ~~~
16750 *
16751 * **Purpose**:\n
16752 * Do 32-bit elements arithmetic right shift operations simultaneously. The shift amount is
16753 * an immediate value. The `.u` form performs additional rounding up operations on the shifted
16754 * results.
16755 *
16756 * **Description**:\n
16757 * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
16758 * bits are filled with the sign-bit of the 32-bit data elements. The shift amount is specified by the
16759 * imm5u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most
16760 * significant discarded bit of each 32-bit data to calculate the final results. And the results are written
16761 * to Rd.
16762 *
16763 * **Operations**:\n
16764 * ~~~
16765 * sa = imm5u[4:0];
16766 * if (sa > 0) {
16767 * if (`.u` form) { // SRAI32.u
16768 * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
16769 * Rd.W[x] = res[31:0];
16770 * else { // SRAI32
16771 * Rd.W[x] = SE32(Rs1.W[x][31:sa]);
16772 * }
16773 * } else {
16774 * Rd = Rs1;
16775 * }
16776 * for RV64: x=1...0
16777 * ~~~
16778 *
16779 * \param [in] a unsigned long type of value stored in a
16780 * \param [in] b unsigned int type of value stored in b
16781 * \return value stored in unsigned long type
16782 */
__RV_SRAI32(unsigned long a,unsigned int b)16783 __STATIC_FORCEINLINE unsigned long __RV_SRAI32(unsigned long a, unsigned int b)
16784 {
16785 register unsigned long result;
16786 __ASM volatile("srai32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16787 return result;
16788 }
16789 /* ===== Inline Function End for 4.36.1. SRAI32 ===== */
16790
16791 /* ===== Inline Function Start for 4.36.2. SRAI32.u ===== */
16792 /**
16793 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
16794 * \brief SRAI32.u (SIMD 32-bit Rounding Shift Right Arithmetic Immediate)
16795 * \details
16796 * **Type**: DSP (RV64 Only)
16797 *
16798 * **Syntax**:\n
16799 * ~~~
16800 * SRAI32 Rd, Rs1, imm5u
16801 * SRAI32.u Rd, Rs1, imm5u
16802 * ~~~
16803 *
16804 * **Purpose**:\n
16805 * Do 32-bit elements arithmetic right shift operations simultaneously. The shift amount is
16806 * an immediate value. The `.u` form performs additional rounding up operations on the shifted
16807 * results.
16808 *
16809 * **Description**:\n
16810 * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
16811 * bits are filled with the sign-bit of the 32-bit data elements. The shift amount is specified by the
16812 * imm5u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most
16813 * significant discarded bit of each 32-bit data to calculate the final results. And the results are written
16814 * to Rd.
16815 *
16816 * **Operations**:\n
16817 * ~~~
16818 * sa = imm5u[4:0];
16819 * if (sa > 0) {
16820 * if (`.u` form) { // SRAI32.u
16821 * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
16822 * Rd.W[x] = res[31:0];
16823 * else { // SRAI32
16824 * Rd.W[x] = SE32(Rs1.W[x][31:sa]);
16825 * }
16826 * } else {
16827 * Rd = Rs1;
16828 * }
16829 * for RV64: x=1...0
16830 * ~~~
16831 *
16832 * \param [in] a unsigned long type of value stored in a
16833 * \param [in] b unsigned int type of value stored in b
16834 * \return value stored in unsigned long type
16835 */
__RV_SRAI32_U(unsigned long a,unsigned int b)16836 __STATIC_FORCEINLINE unsigned long __RV_SRAI32_U(unsigned long a, unsigned int b)
16837 {
16838 register unsigned long result;
16839 __ASM volatile("srai32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16840 return result;
16841 }
16842 /* ===== Inline Function End for 4.36.2. SRAI32.u ===== */
16843
16844 /* ===== Inline Function Start for 4.37. SRAIW.u ===== */
16845 /**
16846 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_NON_SIMD_32B_SHIFT
16847 * \brief SRAIW.u (Rounding Shift Right Arithmetic Immediate Word)
16848 * \details
16849 * **Type**: DSP (RV64 only)
16850 *
16851 * **Syntax**:\n
16852 * ~~~
16853 * SRAIW.u Rd, Rs1, imm5u
16854 * ~~~
16855 *
16856 * **Purpose**:\n
16857 * Perform a 32-bit arithmetic right shift operation with rounding. The shift amount is an
16858 * immediate value.
16859 *
16860 * **Description**:\n
16861 * This instruction right-shifts the lower 32-bit content of Rs1 arithmetically. The shifted
16862 * out bits are filled with the sign-bit Rs1(31) and the shift amount is specified by the imm5u constant.
16863 * For the rounding operation, a value of 1 is added to the most significant discarded bit of the data to
16864 * calculate the final result. And the result is sign-extended and written to Rd.
16865 *
16866 * **Operations**:\n
16867 * ~~~
16868 * sa = imm5u;
16869 * if (sa != 0) {
16870 * res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1;
16871 * Rd = SE32(res[31:0]);
16872 * } else {
16873 * Rd = SE32(Rs1.W[0]);
16874 * }
16875 * ~~~
16876 *
16877 * \param [in] a int type of value stored in a
16878 * \param [in] b unsigned int type of value stored in b
16879 * \return value stored in long type
16880 */
__RV_SRAIW_U(int a,unsigned int b)16881 __STATIC_FORCEINLINE long __RV_SRAIW_U(int a, unsigned int b)
16882 {
16883 register long result;
16884 __ASM volatile("sraiw.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16885 return result;
16886 }
16887 /* ===== Inline Function End for 4.37. SRAIW.u ===== */
16888
16889 /* ===== Inline Function Start for 4.38.1. SRL32 ===== */
16890 /**
16891 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
16892 * \brief SRL32 (SIMD 32-bit Shift Right Logical)
16893 * \details
16894 * **Type**: SIMD (RV64 Only)
16895 *
16896 * **Syntax**:\n
16897 * ~~~
16898 * SRL32 Rd, Rs1, Rs2
16899 * SRL32.u Rd, Rs1, Rs2
16900 * ~~~
16901 *
16902 * **Purpose**:\n
16903 * Do 32-bit element logical right shift operations simultaneously. The shift amount is a
16904 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
16905 * results.
16906 *
16907 * **Description**:\n
16908 * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
16909 * are filled with zero. The shift amount is specified by the low-order 5-bits of the value in the Rs2
16910 * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
16911 * discarded bit of each 32-bit data element to calculate the final results. And the results are written to
16912 * Rd.
16913 *
16914 * **Operations**:\n
16915 * ~~~
16916 * sa = Rs2[4:0];
16917 * if (sa > 0) {
16918 * if (`.u` form) { // SRA32.u
16919 * res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1;
16920 * Rd.W[x] = res[31:0];
16921 * else { // SRA32
16922 * Rd.W[x] = ZE32(Rs1.W[x][31:sa])
16923 * }
16924 * } else {
16925 * Rd = Rs1;
16926 * }
16927 * for RV64: x=1...0
16928 * ~~~
16929 *
16930 * \param [in] a unsigned long type of value stored in a
16931 * \param [in] b unsigned int type of value stored in b
16932 * \return value stored in unsigned long type
16933 */
__RV_SRL32(unsigned long a,unsigned int b)16934 __STATIC_FORCEINLINE unsigned long __RV_SRL32(unsigned long a, unsigned int b)
16935 {
16936 register unsigned long result;
16937 __ASM volatile("srl32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16938 return result;
16939 }
16940 /* ===== Inline Function End for 4.38.1. SRL32 ===== */
16941
16942 /* ===== Inline Function Start for 4.38.2. SRL32.u ===== */
16943 /**
16944 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
16945 * \brief SRL32.u (SIMD 32-bit Rounding Shift Right Logical)
16946 * \details
16947 * **Type**: SIMD (RV64 Only)
16948 *
16949 * **Syntax**:\n
16950 * ~~~
16951 * SRL32 Rd, Rs1, Rs2
16952 * SRL32.u Rd, Rs1, Rs2
16953 * ~~~
16954 *
16955 * **Purpose**:\n
16956 * Do 32-bit element logical right shift operations simultaneously. The shift amount is a
16957 * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
16958 * results.
16959 *
16960 * **Description**:\n
16961 * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
16962 * are filled with zero. The shift amount is specified by the low-order 5-bits of the value in the Rs2
16963 * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
16964 * discarded bit of each 32-bit data element to calculate the final results. And the results are written to
16965 * Rd.
16966 *
16967 * **Operations**:\n
16968 * ~~~
16969 * sa = Rs2[4:0];
16970 * if (sa > 0) {
16971 * if (`.u` form) { // SRA32.u
16972 * res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1;
16973 * Rd.W[x] = res[31:0];
16974 * else { // SRA32
16975 * Rd.W[x] = ZE32(Rs1.W[x][31:sa])
16976 * }
16977 * } else {
16978 * Rd = Rs1;
16979 * }
16980 * for RV64: x=1...0
16981 * ~~~
16982 *
16983 * \param [in] a unsigned long type of value stored in a
16984 * \param [in] b unsigned int type of value stored in b
16985 * \return value stored in unsigned long type
16986 */
__RV_SRL32_U(unsigned long a,unsigned int b)16987 __STATIC_FORCEINLINE unsigned long __RV_SRL32_U(unsigned long a, unsigned int b)
16988 {
16989 register unsigned long result;
16990 __ASM volatile("srl32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
16991 return result;
16992 }
16993 /* ===== Inline Function End for 4.38.2. SRL32.u ===== */
16994
16995 /* ===== Inline Function Start for 4.39.1. SRLI32 ===== */
16996 /**
16997 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
16998 * \brief SRLI32 (SIMD 32-bit Shift Right Logical Immediate)
16999 * \details
17000 * **Type**: SIMD (RV64 Only)
17001 *
17002 * **Syntax**:\n
17003 * ~~~
17004 * SRLI32 Rd, Rs1, imm5u
17005 * SRLI32.u Rd, Rs1, imm5u
17006 * ~~~
17007 *
17008 * **Purpose**:\n
17009 * Do 32-bit elements logical right shift operations simultaneously. The shift amount is an
17010 * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
17011 *
17012 * **Description**:\n
17013 * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
17014 * are filled with zero. The shift amount is specified by the imm5u constant. For the rounding
17015 * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 32-bit
17016 * data to calculate the final results. And the results are written to Rd.
17017 *
17018 * **Operations**:\n
17019 * ~~~
17020 * sa = imm5u[4:0];
17021 * if (sa > 0) {
17022 * if (`.u` form) { // SRLI32.u
17023 * res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1;
17024 * Rd.W[x] = res[31:0];
17025 * else { // SRLI32
17026 * Rd.W[x] = ZE32(Rs1.W[x][31:sa]);
17027 * }
17028 * } else {
17029 * Rd = Rs1;
17030 * }
17031 * for RV64: x=1...0
17032 * ~~~
17033 *
17034 * \param [in] a unsigned long type of value stored in a
17035 * \param [in] b unsigned int type of value stored in b
17036 * \return value stored in unsigned long type
17037 */
__RV_SRLI32(unsigned long a,unsigned int b)17038 __STATIC_FORCEINLINE unsigned long __RV_SRLI32(unsigned long a, unsigned int b)
17039 {
17040 register unsigned long result;
17041 __ASM volatile("srli32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17042 return result;
17043 }
17044 /* ===== Inline Function End for 4.39.1. SRLI32 ===== */
17045
17046 /* ===== Inline Function Start for 4.39.2. SRLI32.u ===== */
17047 /**
17048 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
17049 * \brief SRLI32.u (SIMD 32-bit Rounding Shift Right Logical Immediate)
17050 * \details
17051 * **Type**: SIMD (RV64 Only)
17052 *
17053 * **Syntax**:\n
17054 * ~~~
17055 * SRLI32 Rd, Rs1, imm5u
17056 * SRLI32.u Rd, Rs1, imm5u
17057 * ~~~
17058 *
17059 * **Purpose**:\n
17060 * Do 32-bit elements logical right shift operations simultaneously. The shift amount is an
17061 * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
17062 *
17063 * **Description**:\n
17064 * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
17065 * are filled with zero. The shift amount is specified by the imm5u constant. For the rounding
17066 * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 32-bit
17067 * data to calculate the final results. And the results are written to Rd.
17068 *
17069 * **Operations**:\n
17070 * ~~~
17071 * sa = imm5u[4:0];
17072 * if (sa > 0) {
17073 * if (`.u` form) { // SRLI32.u
17074 * res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1;
17075 * Rd.W[x] = res[31:0];
17076 * else { // SRLI32
17077 * Rd.W[x] = ZE32(Rs1.W[x][31:sa]);
17078 * }
17079 * } else {
17080 * Rd = Rs1;
17081 * }
17082 * for RV64: x=1...0
17083 * ~~~
17084 *
17085 * \param [in] a unsigned long type of value stored in a
17086 * \param [in] b unsigned int type of value stored in b
17087 * \return value stored in unsigned long type
17088 */
__RV_SRLI32_U(unsigned long a,unsigned int b)17089 __STATIC_FORCEINLINE unsigned long __RV_SRLI32_U(unsigned long a, unsigned int b)
17090 {
17091 register unsigned long result;
17092 __ASM volatile("srli32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17093 return result;
17094 }
17095 /* ===== Inline Function End for 4.39.2. SRLI32.u ===== */
17096
17097 /* ===== Inline Function Start for 4.40. STAS32 ===== */
17098 /**
17099 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17100 * \brief STAS32 (SIMD 32-bit Straight Addition & Subtraction)
17101 * \details
17102 * **Type**: SIMD (RV64 Only)
17103 *
17104 * **Syntax**:\n
17105 * ~~~
17106 * STAS32 Rd, Rs1, Rs2
17107 * ~~~
17108 *
17109 * **Purpose**:\n
17110 * Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit
17111 * chunk simultaneously. Operands are from corresponding 32-bit elements.
17112 *
17113 * **Description**:\n
17114 * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit
17115 * integer element in [63:32] of Rs2, and writes the result to [63:32] of Rd; at the same time, it subtracts
17116 * the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element in [31:0] of Rs1, and
17117 * writes the result to [31:0] of Rd.
17118 *
17119 * **Note**:\n
17120 * This instruction can be used for either signed or unsigned operations.
17121 *
17122 * **Operations**:\n
17123 * ~~~
17124 * Rd.W[1] = Rs1.W[1] + Rs2.W[1];
17125 * Rd.W[0] = Rs1.W[0] - Rs2.W[0];
17126 * ~~~
17127 *
17128 * \param [in] a unsigned long type of value stored in a
17129 * \param [in] b unsigned long type of value stored in b
17130 * \return value stored in unsigned long type
17131 */
__RV_STAS32(unsigned long a,unsigned long b)17132 __STATIC_FORCEINLINE unsigned long __RV_STAS32(unsigned long a, unsigned long b)
17133 {
17134 register unsigned long result;
17135 __ASM volatile("stas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17136 return result;
17137 }
17138 /* ===== Inline Function End for 4.40. STAS32 ===== */
17139
17140 /* ===== Inline Function Start for 4.41. STSA32 ===== */
17141 /**
17142 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17143 * \brief STSA32 (SIMD 32-bit Straight Subtraction & Addition)
17144 * \details
17145 * **Type**: SIMD (RV64 Only)
17146 *
17147 * **Syntax**:\n
17148 * ~~~
17149 * STSA32 Rd, Rs1, Rs2
17150 * ~~~
17151 *
17152 * **Purpose**:\n
17153 * Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit
17154 * chunk simultaneously. Operands are from corresponding 32-bit elements.
17155 * *Description: *
17156 * This instruction subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer
17157 * element in [63:32] of Rs1, and writes the result to [63:32] of Rd; at the same time, it adds the 32-bit
17158 * integer element in [31:0] of Rs1 with the 32-bit integer element in [31:0] of Rs2, and writes the result
17159 * to [31:0] of Rd
17160 *
17161 * **Note**:\n
17162 * This instruction can be used for either signed or unsigned operations.
17163 *
17164 * **Operations**:\n
17165 * ~~~
17166 * Rd.W[1] = Rs1.W[1] - Rs2.W[1];
17167 * Rd.W[0] = Rs1.W[0] + Rs2.W[0];
17168 * ~~~
17169 *
17170 * \param [in] a unsigned long type of value stored in a
17171 * \param [in] b unsigned long type of value stored in b
17172 * \return value stored in unsigned long type
17173 */
__RV_STSA32(unsigned long a,unsigned long b)17174 __STATIC_FORCEINLINE unsigned long __RV_STSA32(unsigned long a, unsigned long b)
17175 {
17176 register unsigned long result;
17177 __ASM volatile("stsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17178 return result;
17179 }
17180 /* ===== Inline Function End for 4.41. STSA32 ===== */
17181
17182 /* ===== Inline Function Start for 4.42. SUB32 ===== */
17183 /**
17184 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17185 * \brief SUB32 (SIMD 32-bit Subtraction)
17186 * \details
17187 * **Type**: DSP (RV64 Only)
17188 *
17189 * **Syntax**:\n
17190 * ~~~
17191 * SUB32 Rd, Rs1, Rs2
17192 * ~~~
17193 *
17194 * **Purpose**:\n
17195 * Do 32-bit integer element subtractions simultaneously.
17196 *
17197 * **Description**:\n
17198 * This instruction subtracts the 32-bit integer elements in Rs2 from the 32-bit integer
17199 * elements in Rs1, and then writes the results to Rd.
17200 *
17201 * **Note**:\n
17202 * This instruction can be used for either signed or unsigned subtraction.
17203 *
17204 * **Operations**:\n
17205 * ~~~
17206 * Rd.W[x] = Rs1.W[x] - Rs2.W[x];
17207 * for RV64: x=1...0
17208 * ~~~
17209 *
17210 * \param [in] a unsigned long type of value stored in a
17211 * \param [in] b unsigned long type of value stored in b
17212 * \return value stored in unsigned long type
17213 */
__RV_SUB32(unsigned long a,unsigned long b)17214 __STATIC_FORCEINLINE unsigned long __RV_SUB32(unsigned long a, unsigned long b)
17215 {
17216 register unsigned long result;
17217 __ASM volatile("sub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17218 return result;
17219 }
17220 /* ===== Inline Function End for 4.42. SUB32 ===== */
17221
17222 /* ===== Inline Function Start for 4.43. UKADD32 ===== */
17223 /**
17224 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17225 * \brief UKADD32 (SIMD 32-bit Unsigned Saturating Addition)
17226 * \details
17227 * **Type**: SIMD (RV64 Only)
17228 *
17229 * **Syntax**:\n
17230 * ~~~
17231 * UKADD32 Rd, Rs1, Rs2
17232 * ~~~
17233 *
17234 * **Purpose**:\n
17235 * Do 32-bit unsigned integer element saturating additions simultaneously.
17236 *
17237 * **Description**:\n
17238 * This instruction adds the 32-bit unsigned integer elements in Rs1 with the 32-bit
17239 * unsigned integer elements in Rs2. If any of the results are beyond the 32-bit unsigned number
17240 * range (0 <= RES <= 2^32-1), they are saturated to the range and the OV bit is set to 1. The saturated
17241 * results are written to Rd.
17242 *
17243 * **Operations**:\n
17244 * ~~~
17245 * res[x] = Rs1.W[x] + Rs2.W[x];
17246 * if (res[x] > (2^32)-1) {
17247 * res[x] = (2^32)-1;
17248 * OV = 1;
17249 * }
17250 * Rd.W[x] = res[x];
17251 * for RV64: x=1...0
17252 * ~~~
17253 *
17254 * \param [in] a unsigned long type of value stored in a
17255 * \param [in] b unsigned long type of value stored in b
17256 * \return value stored in unsigned long type
17257 */
__RV_UKADD32(unsigned long a,unsigned long b)17258 __STATIC_FORCEINLINE unsigned long __RV_UKADD32(unsigned long a, unsigned long b)
17259 {
17260 register unsigned long result;
17261 __ASM volatile("ukadd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17262 return result;
17263 }
17264 /* ===== Inline Function End for 4.43. UKADD32 ===== */
17265
17266 /* ===== Inline Function Start for 4.44. UKCRAS32 ===== */
17267 /**
17268 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17269 * \brief UKCRAS32 (SIMD 32-bit Unsigned Saturating Cross Addition & Subtraction)
17270 * \details
17271 * **Type**: SIMD (RV64 Only)
17272 *
17273 * **Syntax**:\n
17274 * ~~~
17275 * UKCRAS32 Rd, Rs1, Rs2
17276 * ~~~
17277 *
17278 * **Purpose**:\n
17279 * Do one 32-bit unsigned integer element saturating addition and one 32-bit unsigned
17280 * integer element saturating subtraction in a 64-bit chunk simultaneously. Operands are from crossed
17281 * 32-bit elements.
17282 *
17283 * **Description**:\n
17284 * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32-
17285 * bit unsigned integer element in [31:0] of Rs2; at the same time, it subtracts the 32-bit unsigned
17286 * integer element in [63:32] of Rs2 from the 32-bit unsigned integer element in [31:0] Rs1. If any of the
17287 * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the
17288 * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for addition and
17289 * [31:0] of Rd for subtraction.
17290 *
17291 * **Operations**:\n
17292 * ~~~
17293 * res1 = Rs1.W[1] + Rs2.W[0];
17294 * res2 = Rs1.W[0] - Rs2.W[1];
17295 * if (res1 > (2^32)-1) {
17296 * res1 = (2^32)-1;
17297 * OV = 1;
17298 * }
17299 * if (res2 < 0) {
17300 * res2 = 0;
17301 * OV = 1;
17302 * }
17303 * Rd.W[1] = res1;
17304 * Rd.W[0] = res2;
17305 * ~~~
17306 *
17307 * \param [in] a unsigned long type of value stored in a
17308 * \param [in] b unsigned long type of value stored in b
17309 * \return value stored in unsigned long type
17310 */
__RV_UKCRAS32(unsigned long a,unsigned long b)17311 __STATIC_FORCEINLINE unsigned long __RV_UKCRAS32(unsigned long a, unsigned long b)
17312 {
17313 register unsigned long result;
17314 __ASM volatile("ukcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17315 return result;
17316 }
17317 /* ===== Inline Function End for 4.44. UKCRAS32 ===== */
17318
17319 /* ===== Inline Function Start for 4.45. UKCRSA32 ===== */
17320 /**
17321 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17322 * \brief UKCRSA32 (SIMD 32-bit Unsigned Saturating Cross Subtraction & Addition)
17323 * \details
17324 * **Type**: SIMD (RV64 Only)
17325 *
17326 * **Syntax**:\n
17327 * ~~~
17328 * UKCRSA32 Rd, Rs1, Rs2
17329 * ~~~
17330 *
17331 * **Purpose**:\n
17332 * Do one 32-bit unsigned integer element saturating subtraction and one 32-bit unsigned
17333 * integer element saturating addition in a 64-bit chunk simultaneously. Operands are from crossed
17334 * 32-bit elements.
17335 *
17336 * **Description**:\n
17337 * This instruction subtracts the 32-bit unsigned integer element in [31:0] of Rs2 from the
17338 * 32-bit unsigned integer element in [63:32] of Rs1; at the same time, it adds the 32-bit unsigned
17339 * integer element in [63:32] of Rs2 with the 32-bit unsigned integer element in [31:0] Rs1. If any of the
17340 * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the
17341 * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for subtraction and
17342 * [31:0] of Rd for addition.
17343 *
17344 * **Operations**:\n
17345 * ~~~
17346 * res1 = Rs1.W[1] - Rs2.W[0];
17347 * res2 = Rs1.W[0] + Rs2.W[1];
17348 * if (res1 < 0) {
17349 * res1 = 0;
17350 * OV = 1;
17351 * } else if (res2 > (2^32)-1) {
17352 * res2 = (2^32)-1;
17353 * OV = 1;
17354 * }
17355 * Rd.W[1] = res1;
17356 * Rd.W[0] = res2;
17357 * ~~~
17358 *
17359 * \param [in] a unsigned long type of value stored in a
17360 * \param [in] b unsigned long type of value stored in b
17361 * \return value stored in unsigned long type
17362 */
__RV_UKCRSA32(unsigned long a,unsigned long b)17363 __STATIC_FORCEINLINE unsigned long __RV_UKCRSA32(unsigned long a, unsigned long b)
17364 {
17365 register unsigned long result;
17366 __ASM volatile("ukcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17367 return result;
17368 }
17369 /* ===== Inline Function End for 4.45. UKCRSA32 ===== */
17370
17371 /* ===== Inline Function Start for 4.46. UKSTAS32 ===== */
17372 /**
17373 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17374 * \brief UKSTAS32 (SIMD 32-bit Unsigned Saturating Straight Addition & Subtraction)
17375 * \details
17376 * **Type**: SIMD (RV64 Only)
17377 *
17378 * **Syntax**:\n
17379 * ~~~
17380 * UKSTAS32 Rd, Rs1, Rs2
17381 * ~~~
17382 *
17383 * **Purpose**:\n
17384 * Do one 32-bit unsigned integer element saturating addition and one 32-bit unsigned
17385 * integer element saturating subtraction in a 64-bit chunk simultaneously. Operands are from
17386 * corresponding 32-bit elements.
17387 *
17388 * **Description**:\n
17389 * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32-
17390 * bit unsigned integer element in [63:32] of Rs2; at the same time, it subtracts the 32-bit unsigned
17391 * integer element in [31:0] of Rs2 from the 32-bit unsigned integer element in [31:0] Rs1. If any of the
17392 * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the
17393 * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for addition and
17394 * [31:0] of Rd for subtraction.
17395 *
17396 * **Operations**:\n
17397 * ~~~
17398 * res1 = Rs1.W[1] + Rs2.W[1];
17399 * res2 = Rs1.W[0] - Rs2.W[0];
17400 * if (res1 > (2^32)-1) {
17401 * res1 = (2^32)-1;
17402 * OV = 1;
17403 * }
17404 * if (res2 < 0) {
17405 * res2 = 0;
17406 * OV = 1;
17407 * }
17408 * Rd.W[1] = res1;
17409 * Rd.W[0] = res2;
17410 * ~~~
17411 *
17412 * \param [in] a unsigned long type of value stored in a
17413 * \param [in] b unsigned long type of value stored in b
17414 * \return value stored in unsigned long type
17415 */
__RV_UKSTAS32(unsigned long a,unsigned long b)17416 __STATIC_FORCEINLINE unsigned long __RV_UKSTAS32(unsigned long a, unsigned long b)
17417 {
17418 register unsigned long result;
17419 __ASM volatile("ukstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17420 return result;
17421 }
17422 /* ===== Inline Function End for 4.46. UKSTAS32 ===== */
17423
17424 /* ===== Inline Function Start for 4.47. UKSTSA32 ===== */
17425 /**
17426 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17427 * \brief UKSTSA32 (SIMD 32-bit Unsigned Saturating Straight Subtraction & Addition)
17428 * \details
17429 * **Type**: SIMD (RV64 Only)
17430 *
17431 * **Syntax**:\n
17432 * ~~~
17433 * UKSTSA32 Rd, Rs1, Rs2
17434 * ~~~
17435 *
17436 * **Purpose**:\n
17437 * Do one 32-bit unsigned integer element saturating subtraction and one 32-bit unsigned
17438 * integer element saturating addition in a 64-bit chunk simultaneously. Operands are from
17439 * corresponding 32-bit elements.
17440 *
17441 * **Description**:\n
17442 * This instruction subtracts the 32-bit unsigned integer element in [63:32] of Rs2 from
17443 * the 32-bit unsigned integer element in [63:32] of Rs1; at the same time, it adds the 32-bit unsigned
17444 * integer element in [31:0] of Rs2 with the 32-bit unsigned integer element in [31:0] Rs1. If any of the
17445 * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the
17446 * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for subtraction and
17447 * [31:0] of Rd for addition.
17448 *
17449 * **Operations**:\n
17450 * ~~~
17451 * res1 = Rs1.W[1] - Rs2.W[1];
17452 * res2 = Rs1.W[0] + Rs2.W[0];
17453 * if (res1 < 0) {
17454 * res1 = 0;
17455 * OV = 1;
17456 * } else if (res2 > (2^32)-1) {
17457 * res2 = (2^32)-1;
17458 * OV = 1;
17459 * }
17460 * Rd.W[1] = res1;
17461 * Rd.W[0] = res2;
17462 * ~~~
17463 *
17464 * \param [in] a unsigned long type of value stored in a
17465 * \param [in] b unsigned long type of value stored in b
17466 * \return value stored in unsigned long type
17467 */
__RV_UKSTSA32(unsigned long a,unsigned long b)17468 __STATIC_FORCEINLINE unsigned long __RV_UKSTSA32(unsigned long a, unsigned long b)
17469 {
17470 register unsigned long result;
17471 __ASM volatile("ukstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17472 return result;
17473 }
17474 /* ===== Inline Function End for 4.47. UKSTSA32 ===== */
17475
17476 /* ===== Inline Function Start for 4.48. UKSUB32 ===== */
17477 /**
17478 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17479 * \brief UKSUB32 (SIMD 32-bit Unsigned Saturating Subtraction)
17480 * \details
17481 * **Type**: SIMD (RV64 Only)
17482 *
17483 * **Syntax**:\n
17484 * ~~~
17485 * UKSUB32 Rd, Rs1, Rs2
17486 * ~~~
17487 *
17488 * **Purpose**:\n
17489 * Do 32-bit unsigned integer elements saturating subtractions simultaneously.
17490 *
17491 * **Description**:\n
17492 * This instruction subtracts the 32-bit unsigned integer elements in Rs2 from the 32-bit
17493 * unsigned integer elements in Rs1. If any of the results are beyond the 32-bit unsigned number
17494 * range (0 <= RES <= 2^32-1), they are saturated to the range and the OV bit is set to 1. The saturated
17495 * results are written to Rd.
17496 *
17497 * **Operations**:\n
17498 * ~~~
17499 * res[x] = Rs1.W[x] - Rs2.W[x];
17500 * if (res[x] < 0) {
17501 * res[x] = 0;
17502 * OV = 1;
17503 * }
17504 * Rd.W[x] = res[x];
17505 * for RV64: x=1...0
17506 * ~~~
17507 *
17508 * \param [in] a unsigned long type of value stored in a
17509 * \param [in] b unsigned long type of value stored in b
17510 * \return value stored in unsigned long type
17511 */
__RV_UKSUB32(unsigned long a,unsigned long b)17512 __STATIC_FORCEINLINE unsigned long __RV_UKSUB32(unsigned long a, unsigned long b)
17513 {
17514 register unsigned long result;
17515 __ASM volatile("uksub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17516 return result;
17517 }
17518 /* ===== Inline Function End for 4.48. UKSUB32 ===== */
17519
17520 /* ===== Inline Function Start for 4.49. UMAX32 ===== */
17521 /**
17522 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
17523 * \brief UMAX32 (SIMD 32-bit Unsigned Maximum)
17524 * \details
17525 * **Type**: SIMD (RV64 Only)
17526 *
17527 * **Syntax**:\n
17528 * ~~~
17529 * UMAX32 Rd, Rs1, Rs2
17530 * ~~~
17531 *
17532 * **Purpose**:\n
17533 * Do 32-bit unsigned integer elements finding maximum operations simultaneously.
17534 *
17535 * **Description**:\n
17536 * This instruction compares the 32-bit unsigned integer elements in Rs1 with the 32-bit
17537 * unsigned integer elements in Rs2 and selects the numbers that is greater than the other one. The
17538 * selected results are written to Rd.
17539 *
17540 * **Operations**:\n
17541 * ~~~
17542 * Rd.W[x] = (Rs1.W[x] u> Rs2.W[x])? Rs1.W[x] : Rs2.W[x];
17543 * for RV64: x=1...0
17544 * ~~~
17545 *
17546 * \param [in] a unsigned long type of value stored in a
17547 * \param [in] b unsigned long type of value stored in b
17548 * \return value stored in unsigned long type
17549 */
__RV_UMAX32(unsigned long a,unsigned long b)17550 __STATIC_FORCEINLINE unsigned long __RV_UMAX32(unsigned long a, unsigned long b)
17551 {
17552 register unsigned long result;
17553 __ASM volatile("umax32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17554 return result;
17555 }
17556 /* ===== Inline Function End for 4.49. UMAX32 ===== */
17557
17558 /* ===== Inline Function Start for 4.50. UMIN32 ===== */
17559 /**
17560 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
17561 * \brief UMIN32 (SIMD 32-bit Unsigned Minimum)
17562 * \details
17563 * **Type**: SIMD (RV64 Only)
17564 *
17565 * **Syntax**:\n
17566 * ~~~
17567 * UMIN32 Rd, Rs1, Rs2
17568 * ~~~
17569 *
17570 * **Purpose**:\n
17571 * Do 32-bit unsigned integer elements finding minimum operations simultaneously.
17572 *
17573 * **Description**:\n
17574 * This instruction compares the 32-bit unsigned integer elements in Rs1 with the 32-bit
17575 * unsigned integer elements in Rs2 and selects the numbers that is less than the other one. The
17576 * selected results are written to Rd.
17577 *
17578 * **Operations**:\n
17579 * ~~~
17580 * Rd.W[x] = (Rs1.W[x] <u Rs2.W[x])? Rs1.W[x] : Rs2.W[x];
17581 * for RV64: x=1...0
17582 * ~~~
17583 *
17584 * \param [in] a unsigned long type of value stored in a
17585 * \param [in] b unsigned long type of value stored in b
17586 * \return value stored in unsigned long type
17587 */
__RV_UMIN32(unsigned long a,unsigned long b)17588 __STATIC_FORCEINLINE unsigned long __RV_UMIN32(unsigned long a, unsigned long b)
17589 {
17590 register unsigned long result;
17591 __ASM volatile("umin32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17592 return result;
17593 }
17594 /* ===== Inline Function End for 4.50. UMIN32 ===== */
17595
17596 /* ===== Inline Function Start for 4.51. URADD32 ===== */
17597 /**
17598 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17599 * \brief URADD32 (SIMD 32-bit Unsigned Halving Addition)
17600 * \details
17601 * **Type**: SIMD (RV64 Only)
17602 *
17603 * **Syntax**:\n
17604 * ~~~
17605 * URADD32 Rd, Rs1, Rs2
17606 * ~~~
17607 *
17608 * **Purpose**:\n
17609 * Do 32-bit unsigned integer element additions simultaneously. The results are halved to
17610 * avoid overflow or saturation.
17611 *
17612 * **Description**:\n
17613 * This instruction adds the 32-bit unsigned integer elements in Rs1 with the 32-bit
17614 * unsigned integer elements in Rs2. The results are first logically right-shifted by 1 bit and then
17615 * written to Rd.
17616 *
17617 * **Examples**:\n
17618 * ~~~
17619 * * Ra = 0x7FFFFFFF, Rb = 0x7FFFFFFF Rt = 0x7FFFFFFF
17620 * * Ra = 0x80000000, Rb = 0x80000000 Rt = 0x80000000
17621 * * Ra = 0x40000000, Rb = 0x80000000 Rt = 0x60000000
17622 * ~~~
17623 *
17624 * **Operations**:\n
17625 * ~~~
17626 * Rd.W[x] = (Rs1.W[x] + Rs2.W[x]) u>> 1;
17627 * for RV64: x=1...0
17628 * ~~~
17629 *
17630 * \param [in] a unsigned long type of value stored in a
17631 * \param [in] b unsigned long type of value stored in b
17632 * \return value stored in unsigned long type
17633 */
__RV_URADD32(unsigned long a,unsigned long b)17634 __STATIC_FORCEINLINE unsigned long __RV_URADD32(unsigned long a, unsigned long b)
17635 {
17636 register unsigned long result;
17637 __ASM volatile("uradd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17638 return result;
17639 }
17640 /* ===== Inline Function End for 4.51. URADD32 ===== */
17641
17642 /* ===== Inline Function Start for 4.52. URCRAS32 ===== */
17643 /**
17644 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17645 * \brief URCRAS32 (SIMD 32-bit Unsigned Halving Cross Addition & Subtraction)
17646 * \details
17647 * **Type**: SIMD (RV64 Only)
17648 *
17649 * **Syntax**:\n
17650 * ~~~
17651 * URCRAS32 Rd, Rs1, Rs2
17652 * ~~~
17653 *
17654 * **Purpose**:\n
17655 * Do 32-bit unsigned integer element addition and 32-bit unsigned integer element
17656 * subtraction in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The
17657 * results are halved to avoid overflow or saturation.
17658 *
17659 * **Description**:\n
17660 * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32-
17661 * bit unsigned integer element in [31:0] of Rs2, and subtracts the 32-bit unsigned integer element in
17662 * [63:32] of Rs2 from the 32-bit unsigned integer element in [31:0] of Rs1. The element results are first
17663 * logically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd for
17664 * subtraction.
17665 *
17666 * **Examples**:\n
17667 * ~~~
17668 * Please see `URADD32` and `URSUB32` instructions.
17669 * ~~~
17670 *
17671 * **Operations**:\n
17672 * ~~~
17673 * Rd.W[1] = (Rs1.W[1] + Rs2.W[0]) u>> 1;
17674 * Rd.W[0] = (Rs1.W[0] - Rs2.W[1]) u>> 1;
17675 * ~~~
17676 *
17677 * \param [in] a unsigned long type of value stored in a
17678 * \param [in] b unsigned long type of value stored in b
17679 * \return value stored in unsigned long type
17680 */
__RV_URCRAS32(unsigned long a,unsigned long b)17681 __STATIC_FORCEINLINE unsigned long __RV_URCRAS32(unsigned long a, unsigned long b)
17682 {
17683 register unsigned long result;
17684 __ASM volatile("urcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17685 return result;
17686 }
17687 /* ===== Inline Function End for 4.52. URCRAS32 ===== */
17688
17689 /* ===== Inline Function Start for 4.53. URCRSA32 ===== */
17690 /**
17691 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17692 * \brief URCRSA32 (SIMD 32-bit Unsigned Halving Cross Subtraction & Addition)
17693 * \details
17694 * **Type**: SIMD (RV64 Only)
17695 *
17696 * **Syntax**:\n
17697 * ~~~
17698 * URCRSA32 Rd, Rs1, Rs2
17699 * ~~~
17700 *
17701 * **Purpose**:\n
17702 * Do 32-bit unsigned integer element subtraction and 32-bit unsigned integer element
17703 * addition in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The results
17704 * are halved to avoid overflow or saturation.
17705 *
17706 * **Description**:\n
17707 * This instruction subtracts the 32-bit unsigned integer element in [31:0] of Rs2 from the
17708 * 32-bit unsigned integer element in [63:32] of Rs1, and adds the 32-bit unsigned element integer in
17709 * [31:0] of Rs1 with the 32-bit unsigned integer element in [63:32] of Rs2. The two results are first
17710 * logically right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of Rd for
17711 * addition.
17712 *
17713 * **Examples**:\n
17714 * ~~~
17715 * Please see `URADD32` and `URSUB32` instructions.
17716 * ~~~
17717 *
17718 * **Operations**:\n
17719 * ~~~
17720 * Rd.W[1] = (Rs1.W[1] - Rs2.W[0]) u>> 1;
17721 * Rd.W[0] = (Rs1.W[0] + Rs2.W[1]) u>> 1;
17722 * ~~~
17723 *
17724 * \param [in] a unsigned long type of value stored in a
17725 * \param [in] b unsigned long type of value stored in b
17726 * \return value stored in unsigned long type
17727 */
__RV_URCRSA32(unsigned long a,unsigned long b)17728 __STATIC_FORCEINLINE unsigned long __RV_URCRSA32(unsigned long a, unsigned long b)
17729 {
17730 register unsigned long result;
17731 __ASM volatile("urcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17732 return result;
17733 }
17734 /* ===== Inline Function End for 4.53. URCRSA32 ===== */
17735
17736 /* ===== Inline Function Start for 4.54. URSTAS32 ===== */
17737 /**
17738 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17739 * \brief URSTAS32 (SIMD 32-bit Unsigned Halving Straight Addition & Subtraction)
17740 * \details
17741 * **Type**: SIMD (RV64 Only)
17742 *
17743 * **Syntax**:\n
17744 * ~~~
17745 * URSTAS32 Rd, Rs1, Rs2
17746 * ~~~
17747 *
17748 * **Purpose**:\n
17749 * Do 32-bit unsigned integer element addition and 32-bit unsigned integer element
17750 * subtraction in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements.
17751 * The results are halved to avoid overflow or saturation.
17752 *
17753 * **Description**:\n
17754 * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32-
17755 * bit unsigned integer element in [63:32] of Rs2, and subtracts the 32-bit unsigned integer element in
17756 * [31:0] of Rs2 from the 32-bit unsigned integer element in [31:0] of Rs1. The element results are first
17757 * logically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd for
17758 * subtraction.
17759 *
17760 * **Examples**:\n
17761 * ~~~
17762 * Please see `URADD32` and `URSUB32` instructions.
17763 * ~~~
17764 *
17765 * **Operations**:\n
17766 * ~~~
17767 * Rd.W[1] = (Rs1.W[1] + Rs2.W[1]) u>> 1;
17768 * Rd.W[0] = (Rs1.W[0] - Rs2.W[0]) u>> 1;
17769 * ~~~
17770 *
17771 * \param [in] a unsigned long type of value stored in a
17772 * \param [in] b unsigned long type of value stored in b
17773 * \return value stored in unsigned long type
17774 */
__RV_URSTAS32(unsigned long a,unsigned long b)17775 __STATIC_FORCEINLINE unsigned long __RV_URSTAS32(unsigned long a, unsigned long b)
17776 {
17777 register unsigned long result;
17778 __ASM volatile("urstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17779 return result;
17780 }
17781 /* ===== Inline Function End for 4.54. URSTAS32 ===== */
17782
17783 /* ===== Inline Function Start for 4.55. URSTSA32 ===== */
17784 /**
17785 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17786 * \brief URSTSA32 (SIMD 32-bit Unsigned Halving Straight Subtraction & Addition)
17787 * \details
17788 * **Type**: SIMD (RV64 Only)
17789 *
17790 * **Syntax**:\n
17791 * ~~~
17792 * URSTSA32 Rd, Rs1, Rs2
17793 * ~~~
17794 *
17795 * **Purpose**:\n
17796 * Do 32-bit unsigned integer element subtraction and 32-bit unsigned integer element
17797 * addition in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements. The
17798 * results are halved to avoid overflow or saturation.
17799 *
17800 * **Description**:\n
17801 * This instruction subtracts the 32-bit unsigned integer element in [63:32] of Rs2 from
17802 * the 32-bit unsigned integer element in [63:32] of Rs1, and adds the 32-bit unsigned element integer
17803 * in [31:0] of Rs1 with the 32-bit unsigned integer element in [31:0] of Rs2. The two results are first
17804 * logically right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of Rd for
17805 * addition.
17806 *
17807 * **Examples**:\n
17808 * ~~~
17809 * Please see `URADD32` and `URSUB32` instructions.
17810 * ~~~
17811 *
17812 * **Operations**:\n
17813 * ~~~
17814 * Rd.W[1] = (Rs1.W[1] - Rs2.W[1]) u>> 1;
17815 * Rd.W[0] = (Rs1.W[0] + Rs2.W[0]) u>> 1;
17816 * ~~~
17817 *
17818 * \param [in] a unsigned long type of value stored in a
17819 * \param [in] b unsigned long type of value stored in b
17820 * \return value stored in unsigned long type
17821 */
__RV_URSTSA32(unsigned long a,unsigned long b)17822 __STATIC_FORCEINLINE unsigned long __RV_URSTSA32(unsigned long a, unsigned long b)
17823 {
17824 register unsigned long result;
17825 __ASM volatile("urstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17826 return result;
17827 }
17828 /* ===== Inline Function End for 4.55. URSTSA32 ===== */
17829
17830 /* ===== Inline Function Start for 4.56. URSUB32 ===== */
17831 /**
17832 * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
17833 * \brief URSUB32 (SIMD 32-bit Unsigned Halving Subtraction)
17834 * \details
17835 * **Type**: SIMD (RV64 Only)
17836 *
17837 * **Syntax**:\n
17838 * ~~~
17839 * URSUB32 Rd, Rs1, Rs2
17840 * ~~~
17841 *
17842 * **Purpose**:\n
17843 * Do 32-bit unsigned integer element subtractions simultaneously. The results are halved to
17844 * avoid overflow or saturation.
17845 *
17846 * **Description**:\n
17847 * This instruction subtracts the 32-bit unsigned integer elements in Rs2 from the 32-bit
17848 * unsigned integer elements in Rs1. The results are first logically right-shifted by 1 bit and then
17849 * written to Rd.
17850 *
17851 * **Examples**:\n
17852 * ~~~
17853 * * Ra = 0x7FFFFFFF, Rb = 0x80000000, Rt = 0xFFFFFFFF
17854 * * Ra = 0x80000000, Rb = 0x7FFFFFFF, Rt = 0x00000000
17855 * * Ra = 0x80000000, Rb = 0x40000000, Rt = 0x20000000
17856 * ~~~
17857 *
17858 * **Operations**:\n
17859 * ~~~
17860 * Rd.W[x] = (Rs1.W[x] - Rs2.W[x]) u>> 1;
17861 * for RV64: x=1...0
17862 * ~~~
17863 *
17864 * \param [in] a unsigned long type of value stored in a
17865 * \param [in] b unsigned long type of value stored in b
17866 * \return value stored in unsigned long type
17867 */
__RV_URSUB32(unsigned long a,unsigned long b)17868 __STATIC_FORCEINLINE unsigned long __RV_URSUB32(unsigned long a, unsigned long b)
17869 {
17870 register unsigned long result;
17871 __ASM volatile("ursub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17872 return result;
17873 }
17874 /* ===== Inline Function End for 4.56. URSUB32 ===== */
17875
17876 #endif /* __RISCV_XLEN == 64 */
17877
17878
17879 #if (__RISCV_XLEN == 32) || defined(__ONLY_FOR_DOXYGEN_DOCUMENT_GENERATION__)
17880 /* XXXXX Nuclei Extended DSP Instructions for RV32 XXXXX */
17881 /**
17882 * \defgroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM Nuclei Customized DSP Instructions
17883 * \ingroup NMSIS_Core_DSP_Intrinsic
17884 * \brief (RV32 only)Nuclei Customized DSP Instructions
17885 * \details This is Nuclei customized DSP instructions only for RV32
17886 */
17887 /* ===== Inline Function Start for A.1. DKHM8 ===== */
17888 /**
17889 * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
17890 * \brief DKHM8 (64-bit SIMD Signed Saturating Q7 Multiply)
17891 * \details
17892 * **Type**: SIMD
17893 *
17894 * **Syntax**:\n
17895 * ~~~
17896 * DKHM8 Rd, Rs1, Rs2
17897 * # Rd, Rs1, Rs2 are all even/odd pair of registers
17898 * ~~~
17899 *
17900 * **Purpose**:\n
17901 * Do Q7xQ7 element multiplications simultaneously. The Q14 results are then reduced to Q7
17902 * numbers again.
17903 *
17904 * **Description**:\n
17905 * For the `DKHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1
17906 * with the top 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
17907 * content of 16-bit chunks in Rs1 with the bottom 8-bit Q7 content of 16-bit chunks in Rs2.
17908 *
17909 * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then
17910 * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen.
17911 * The result will be saturated to 0x7F and the overflow flag OV will be set.
17912 *
17913 * **Operations**:\n
17914 * ~~~
17915 * op1t = Rs1.B[x+1]; op2t = Rs2.B[x+1]; // top
17916 * op1b = Rs1.B[x]; op2b = Rs2.B[x]; // bottom
17917 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
17918 * if (0x80 != aop | 0x80 != bop) {
17919 * res = (aop s* bop) >> 7;
17920 * } else {
17921 * res= 0x7F;
17922 * OV = 1;
17923 * }
17924 * }
17925 * Rd.H[x/2] = concat(rest, resb);
17926 * for RV32, x=0,2,4,6
17927 * ~~~
17928 *
17929 * \param [in] a unsigned long long type of value stored in a
17930 * \param [in] b unsigned long long type of value stored in b
17931 * \return value stored in unsigned long long type
17932 */
__RV_DKHM8(unsigned long long a,unsigned long long b)17933 __STATIC_FORCEINLINE unsigned long long __RV_DKHM8(unsigned long long a, unsigned long long b)
17934 {
17935 unsigned long long result;
17936 __ASM volatile("dkhm8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17937 return result;
17938 }
17939 /* ===== Inline Function End for A.1. DKHM8 ===== */
17940
17941 /* ===== Inline Function Start for A.2. DKHM16 ===== */
17942 /**
17943 * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
17944 * \brief DKHM16 (64-bit SIMD Signed Saturating Q15 Multiply)
17945 * \details
17946 * **Type**: SIMD
17947 *
17948 * **Syntax**:\n
17949 * ~~~
17950 * DKHM16 Rd, Rs1, Rs2
17951 * # Rd, Rs1, Rs2 are all even/odd pair of registers
17952 * ~~~
17953 *
17954 * **Purpose**:\n
17955 * Do Q15xQ15 element multiplications simultaneously. The Q30 results are then reduced to
17956 * Q15 numbers again.
17957 *
17958 * **Description**:\n
17959 * For the `DKHM16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in
17960 * Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom
17961 * 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in
17962 * Rs2.
17963 *
17964 * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are
17965 * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will
17966 * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set.
17967 *
17968 * **Operations**:\n
17969 * ~~~
17970 * op1t = Rs1.H[x+1]; op2t = Rs2.H[x+1]; // top
17971 * op1b = Rs1.H[x]; op2b = Rs2.H[x]; // bottom
17972 * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
17973 * if (0x8000 != aop | 0x8000 != bop) {
17974 * res = (aop s* bop) >> 15;
17975 * } else {
17976 * res= 0x7FFF;
17977 * OV = 1;
17978 * }
17979 * }
17980 * Rd.W[x/2] = concat(rest, resb);
17981 * for RV32: x=0, 2
17982 * ~~~
17983 *
17984 * \param [in] a unsigned long long type of value stored in a
17985 * \param [in] b unsigned long long type of value stored in b
17986 * \return value stored in unsigned long long type
17987 */
__RV_DKHM16(unsigned long long a,unsigned long long b)17988 __STATIC_FORCEINLINE unsigned long long __RV_DKHM16(unsigned long long a, unsigned long long b)
17989 {
17990 unsigned long long result;
17991 __ASM volatile("dkhm16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
17992 return result;
17993 }
17994 /* ===== Inline Function End for A.2. DKHM16 ===== */
17995
17996 /* ===== Inline Function Start for A.3. DKABS8 ===== */
17997 /**
17998 * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
17999 * \brief DKABS8 (64-bit SIMD 8-bit Saturating Absolute)
18000 * \details
18001 * **Type**: SIMD
18002 *
18003 * **Syntax**:\n
18004 * ~~~
18005 * DKABS8 Rd, Rs1
18006 * # Rd, Rs1 are all even/odd pair of registers
18007 * ~~~
18008 *
18009 * **Purpose**:\n
18010 * Get the absolute value of 8-bit signed integer elements simultaneously.
18011 *
18012 * **Description**:\n
18013 * This instruction calculates the absolute value of 8-bit signed integer elements stored
18014 * in Rs1 and writes the element results to Rd. If the input number is 0x80, this instruction generates
18015 * 0x7f as the output and sets the OV bit to 1.
18016 *
18017 * **Operations**:\n
18018 * ~~~
18019 * src = Rs1.B[x];
18020 * if (src == 0x80) {
18021 * src = 0x7f;
18022 * OV = 1;
18023 * } else if (src[7] == 1)
18024 * src = -src;
18025 * }
18026 * Rd.B[x] = src;
18027 * for RV32: x=7...0,
18028 * ~~~
18029 *
18030 * \param [in] a unsigned long long type of value stored in a
18031 * \return value stored in unsigned long long type
18032 */
__RV_DKABS8(unsigned long long a)18033 __STATIC_FORCEINLINE unsigned long long __RV_DKABS8(unsigned long long a)
18034 {
18035 unsigned long long result;
18036 __ASM volatile("dkabs8 %0, %1" : "=r"(result) : "r"(a));
18037 return result;
18038 }
18039 /* ===== Inline Function End for A.3. DKABS8 ===== */
18040
18041 /* ===== Inline Function Start for A.4. DKABS16 ===== */
18042 /**
18043 * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
18044 * \brief DKABS16 (64-bit SIMD 16-bit Saturating Absolute)
18045 * \details
18046 * **Type**: SIMD
18047 *
18048 * **Syntax**:\n
18049 * ~~~
18050 * DKABS16 Rd, Rs1
18051 * # Rd, Rs1 are all even/odd pair of registers
18052 * ~~~
18053 *
18054 * **Purpose**:\n
18055 * Get the absolute value of 16-bit signed integer elements simultaneously.
18056 *
18057 * **Description**:\n
18058 * This instruction calculates the absolute value of 16-bit signed integer elements stored
18059 * in Rs1 and writes the element results to Rd. If the input number is 0x8000, this instruction
18060 * generates 0x7fff as the output and sets the OV bit to 1.
18061 *
18062 * **Operations**:\n
18063 * ~~~
18064 * src = Rs1.H[x];
18065 * if (src == 0x8000) {
18066 * src = 0x7fff;
18067 * OV = 1;
18068 * } else if (src[15] == 1)
18069 * src = -src;
18070 * }
18071 * Rd.H[x] = src;
18072 * for RV32: x=3...0,
18073 * ~~~
18074 *
18075 * \param [in] a unsigned long long type of value stored in a
18076 * \return value stored in unsigned long long type
18077 */
__RV_DKABS16(unsigned long long a)18078 __STATIC_FORCEINLINE unsigned long long __RV_DKABS16(unsigned long long a)
18079 {
18080 unsigned long long result;
18081 __ASM volatile("dkabs16 %0, %1" : "=r"(result) : "r"(a));
18082 return result;
18083 }
18084 /* ===== Inline Function End for A.4. DKABS16 ===== */
18085
18086 /* ===== Inline Function Start for A.5. DKSLRA8 ===== */
18087 /**
18088 * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
18089 * \brief DKSLRA8 (64-bit SIMD 8-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
18090 * \details
18091 * **Type**: SIMD
18092 *
18093 * **Syntax**:\n
18094 * ~~~
18095 * DKSLRA8 Rd, Rs1, Rs2
18096 * # Rd, Rs1 are all even/odd pair of registers
18097 * ~~~
18098 *
18099 * **Purpose**:\n
18100 * Do 8-bit elements logical left (positive) or arithmetic right (negative) shift operation with
18101 * Q7 saturation for the left shift.
18102 *
18103 * **Description**:\n
18104 * The 8-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
18105 * based on the value of Rs2[3:0]. Rs2[3:0] is in the signed range of [-2^3, 2^3-1]. A positive Rs2[3:0] means
18106 * logical left shift and a negative Rs2[3:0] means arithmetic right shift. The shift amount is the
18107 * absolute value of Rs2[3:0]. However, the behavior of `Rs2[3:0]==-2^3 (0x8)` is defined to be
18108 * equivalent to the behavior of `Rs2[3:0]==-(2^3-1) (0x9)`.
18109 * The left-shifted results are saturated to the 8-bit signed integer range of [-2^7, 2^7-1].
18110 * If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:4] will not affect
18111 * this instruction.
18112 *
18113 * **Operations**:\n
18114 * ~~~
18115 * if (Rs2[3:0] < 0) {
18116 * sa = -Rs2[3:0];
18117 * sa = (sa == 8)? 7 : sa;
18118 * Rd.B[x] = SE8(Rs1.B[x][7:sa]);
18119 * } else {
18120 * sa = Rs2[2:0];
18121 * res[(7+sa):0] = Rs1.B[x] <<(logic) sa;
18122 * if (res > (2^7)-1) {
18123 * res[7:0] = 0x7f; OV = 1;
18124 * } else if (res < -2^7) {
18125 * res[7:0] = 0x80; OV = 1;
18126 * }
18127 * Rd.B[x] = res[7:0];
18128 * }
18129 * for RV32: x=7...0,
18130 * ~~~
18131 *
18132 * \param [in] a unsigned long long type of value stored in a
18133 * \param [in] b int type of value stored in b
18134 * \return value stored in unsigned long long type
18135 */
__RV_DKSLRA8(unsigned long long a,int b)18136 __STATIC_FORCEINLINE unsigned long long __RV_DKSLRA8(unsigned long long a, int b)
18137 {
18138 unsigned long long result;
18139 __ASM volatile("dkslra8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
18140 return result;
18141 }
18142 /* ===== Inline Function End for A.5. DKSLRA8 ===== */
18143
18144 /* ===== Inline Function Start for A.6. DKSLRA16 ===== */
18145 /**
18146 * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
18147 * \brief DKSLRA16 (64-bit SIMD 16-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
18148 * \details
18149 * **Type**: SIMD
18150 *
18151 * **Syntax**:\n
18152 * ~~~
18153 * DKSLRA16 Rd, Rs1, Rs2
18154 * # Rd, Rs1 are all even/odd pair of registers
18155 * ~~~
18156 *
18157 * **Purpose**:\n
18158 * Do 16-bit elements logical left (positive) or arithmetic right (negative) shift operation with
18159 * Q15 saturation for the left shift.
18160 *
18161 * **Description**:\n
18162 * The 16-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
18163 * based on the value of Rs2[4:0]. Rs2[4:0] is in the signed range of [-2^4, 2^4-1]. A positive Rs2[4:0] means
18164 * logical left shift and a negative Rs2[4:0] means arithmetic right shift. The shift amount is the
18165 * absolute value of Rs2[4:0]. However, the behavior of `Rs2[4:0]==-2^4 (0x10)` is defined to be
18166 * equivalent to the behavior of `Rs2[4:0]==-(2^4-1) (0x11)`.
18167 * The left-shifted results are saturated to the 16-bit signed integer range of [-2^15, 2^15-1].
18168 * After the shift, saturation, or rounding, the final results are written to
18169 * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:5] will not affect
18170 * this instruction.
18171 *
18172 * **Operations**:\n
18173 * ~~~
18174 * if (Rs2[4:0] < 0) {
18175 * sa = -Rs2[4:0];
18176 * sa = (sa == 16)? 15 : sa;
18177 * Rd.H[x] = SE16(Rs1.H[x][15:sa]);
18178 * } else {
18179 * sa = Rs2[3:0];
18180 * res[(15+sa):0] = Rs1.H[x] <<(logic) sa;
18181 * if (res > (2^15)-1) {
18182 * res[15:0] = 0x7fff; OV = 1;
18183 * } else if (res < -2^15) {
18184 * res[15:0] = 0x8000; OV = 1;
18185 * }
18186 * d.H[x] = res[15:0];
18187 * }
18188 * for RV32: x=3...0,
18189 * ~~~
18190 *
18191 * \param [in] a unsigned long long type of value stored in a
18192 * \param [in] b int type of value stored in b
18193 * \return value stored in unsigned long long type
18194 */
__RV_DKSLRA16(unsigned long long a,int b)18195 __STATIC_FORCEINLINE unsigned long long __RV_DKSLRA16(unsigned long long a, int b)
18196 {
18197 unsigned long long result;
18198 __ASM volatile("dkslra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
18199 return result;
18200 }
18201 /* ===== Inline Function End for A.6. DKSLRA16 ===== */
18202
18203 /* ===== Inline Function Start for A.7. DKADD8 ===== */
18204 /**
18205 * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
18206 * \brief DKADD8 (64-bit SIMD 8-bit Signed Saturating Addition)
18207 * \details
18208 * **Type**: SIMD
18209 *
18210 * **Syntax**:\n
18211 * ~~~
18212 * DKADD8 Rd, Rs1, Rs2
18213 * # Rd, Rs1, Rs2 are all even/odd pair of registers
18214 * ~~~
18215 *
18216 * **Purpose**:\n
18217 * Do 8-bit signed integer element saturating additions simultaneously.
18218 *
18219 * **Description**:\n
18220 * This instruction adds the 8-bit signed integer elements in Rs1 with the 8-bit signed
18221 * integer elements in Rs2. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 2^7-1), they
18222 * are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
18223 *
18224 * **Operations**:\n
18225 * ~~~
18226 * res[x] = Rs1.B[x] + Rs2.B[x];
18227 * if (res[x] > 127) {
18228 * res[x] = 127;
18229 * OV = 1;
18230 * } else if (res[x] < -128) {
18231 * res[x] = -128;
18232 * OV = 1;
18233 * }
18234 * Rd.B[x] = res[x];
18235 * for RV32: x=7...0,
18236 * ~~~
18237 *
18238 * \param [in] a unsigned long long type of value stored in a
18239 * \param [in] b unsigned long long type of value stored in b
18240 * \return value stored in unsigned long long type
18241 */
__RV_DKADD8(unsigned long long a,unsigned long long b)18242 __STATIC_FORCEINLINE unsigned long long __RV_DKADD8(unsigned long long a, unsigned long long b)
18243 {
18244 unsigned long long result;
18245 __ASM volatile("dkadd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
18246 return result;
18247 }
18248 /* ===== Inline Function End for A.7. DKADD8 ===== */
18249
18250 /* ===== Inline Function Start for A.8. DKADD16 ===== */
18251 /**
18252 * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
18253 * \brief DKADD16 (64-bit SIMD 16-bit Signed Saturating Addition)
18254 * \details
18255 * **Type**: SIMD
18256 *
18257 * **Syntax**:\n
18258 * ~~~
18259 * DKADD16 Rd, Rs1, Rs2
18260 * # Rd, Rs1, Rs2 are all even/odd pair of registers
18261 * ~~~
18262 *
18263 * **Purpose**:\n
18264 * Do 16-bit signed integer element saturating additions simultaneously.
18265 *
18266 * **Description**:\n
18267 * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed
18268 * integer elements in Rs2. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1),
18269 * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
18270 *
18271 * **Operations**:\n
18272 * ~~~
18273 * res[x] = Rs1.H[x] + Rs2.H[x];
18274 * if (res[x] > 32767) {
18275 * res[x] = 32767;
18276 * OV = 1;
18277 * } else if (res[x] < -32768) {
18278 * res[x] = -32768;
18279 * OV = 1;
18280 * }
18281 * Rd.H[x] = res[x];
18282 * for RV32: x=3...0,
18283 * ~~~
18284 *
18285 * \param [in] a unsigned long long type of value stored in a
18286 * \param [in] b unsigned long long type of value stored in b
18287 * \return value stored in unsigned long long type
18288 */
__RV_DKADD16(unsigned long long a,unsigned long long b)18289 __STATIC_FORCEINLINE unsigned long long __RV_DKADD16(unsigned long long a, unsigned long long b)
18290 {
18291 unsigned long long result;
18292 __ASM volatile("dkadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
18293 return result;
18294 }
18295 /* ===== Inline Function End for A.8. DKADD16 ===== */
18296
18297 /* ===== Inline Function Start for A.10. DKSUB8 ===== */
18298 /**
18299 * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
18300 * \brief DKSUB8 (64-bit SIMD 8-bit Signed Saturating Subtraction)
18301 * \details
18302 * **Type**: SIMD
18303 *
18304 * **Syntax**:\n
18305 * ~~~
18306 * DKSUB8 Rd, Rs1, Rs2
18307 * # Rd, Rs1, Rs2 are all even/odd pair of registers
18308 * ~~~
18309 *
18310 * **Purpose**:\n
18311 * Do 8-bit signed elements saturating subtractions simultaneously.
18312 *
18313 * **Description**:\n
18314 * This instruction subtracts the 8-bit signed integer elements in Rs2 from the 8-bit
18315 * signed integer elements in Rs1. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 2^7-1),
18316 * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
18317 *
18318 * **Operations**:\n
18319 * ~~~
18320 * res[x] = Rs1.B[x] - Rs2.B[x];
18321 * if (res[x] > (2^7)-1) {
18322 * res[x] = (2^7)-1;
18323 * OV = 1;
18324 * } else if (res[x] < -2^7) {
18325 * res[x] = -2^7;
18326 * OV = 1;
18327 * }
18328 * Rd.B[x] = res[x];
18329 * for RV32: x=7...0,
18330 * ~~~
18331 *
18332 * \param [in] a unsigned long long type of value stored in a
18333 * \param [in] b unsigned long long type of value stored in b
18334 * \return value stored in unsigned long long type
18335 */
__RV_DKSUB8(unsigned long long a,unsigned long long b)18336 __STATIC_FORCEINLINE unsigned long long __RV_DKSUB8(unsigned long long a, unsigned long long b)
18337 {
18338 unsigned long long result;
18339 __ASM volatile("dksub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
18340 return result;
18341 }
18342 /* ===== Inline Function End for A.9. DKSUB8 ===== */
18343
18344 /* ===== Inline Function Start for A.10. DKSUB16 ===== */
18345 /**
18346 * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
18347 * \brief DKSUB16 (64-bit SIMD 16-bit Signed Saturating Subtraction)
18348 * \details
18349 * **Type**: SIMD
18350 *
18351 * **Syntax**:\n
18352 * ~~~
18353 * DKSUB16 Rd, Rs1, Rs2
18354 * # Rd, Rs1, Rs2 are all even/odd pair of registers
18355 * ~~~
18356 *
18357 * **Purpose**:\n
18358 * Do 16-bit signed integer elements saturating subtractions simultaneously.
18359 *
18360 * **Description**:\n
18361 * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit
18362 * signed integer elements in Rs1. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <=
18363 * 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to
18364 * Rd.
18365 *
18366 * **Operations**:\n
18367 * ~~~
18368 * res[x] = Rs1.H[x] - Rs2.H[x];
18369 * if (res[x] > (2^15)-1) {
18370 * res[x] = (2^15)-1;
18371 * OV = 1;
18372 * } else if (res[x] < -2^15) {
18373 * res[x] = -2^15;
18374 * OV = 1;
18375 * }
18376 * Rd.H[x] = res[x];
18377 * for RV32: x=3...0,
18378 * ~~~
18379 *
18380 * \param [in] a unsigned long long type of value stored in a
18381 * \param [in] b unsigned long long type of value stored in b
18382 * \return value stored in unsigned long long type
18383 */
__RV_DKSUB16(unsigned long long a,unsigned long long b)18384 __STATIC_FORCEINLINE unsigned long long __RV_DKSUB16(unsigned long long a, unsigned long long b)
18385 {
18386 unsigned long long result;
18387 __ASM volatile("dksub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
18388 return result;
18389 }
18390 /* ===== Inline Function End for A.10. DKSUB16 ===== */
18391
18392 /* ===== Inline Function Start for A.11.1. EXPD80 ===== */
18393 /**
18394 * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
18395 * \brief EXPD80 (Expand and Copy Byte 0 to 32bit)
18396 * \details
18397 * **Type**: DSP
18398 *
18399 * **Syntax**:\n
18400 * ~~~
18401 * EXPD80 Rd, Rs1
18402 * ~~~
18403 *
18404 * **Purpose**:\n
18405 * Copy 8-bit data from 32-bit chunks into 4 bytes in a register.
18406 *
18407 * **Description**:\n
18408 * Moves Rs1.B[0][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
18409 *
18410 * **Operations**:\n
18411 * ~~~
18412 * Rd.W[x][31:0] = CONCAT(Rs1.B[0][7:0], Rs1.B[0][7:0], Rs1.B[0][7:0], Rs1.B[0][7:0]);
18413 * for RV32: x=0
18414 * ~~~
18415 *
18416 * \param [in] a unsigned long type of value stored in a
18417 * \return value stored in unsigned long type
18418 */
__RV_EXPD80(unsigned long a)18419 __STATIC_FORCEINLINE unsigned long __RV_EXPD80(unsigned long a)
18420 {
18421 unsigned long result;
18422 __ASM volatile("expd80 %0, %1" : "=r"(result) : "r"(a));
18423 return result;
18424 }
18425 /* ===== Inline Function End for A11.1. EXPD80 ===== */
18426
18427 /* ===== Inline Function Start for A.11.2. EXPD81 ===== */
18428 /**
18429 * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
18430 * \brief EXPD81 (Expand and Copy Byte 1 to 32bit)
18431 * \details
18432 * **Type**: DSP
18433 *
18434 * **Syntax**:\n
18435 * ~~~
18436 * EXPD81 Rd, Rs1
18437 * ~~~
18438 *
18439 * **Purpose**:\n
18440 * Copy 8-bit data from 32-bit chunks into 4 bytes in a register.
18441 *
18442 * **Description**:\n
18443 * Moves Rs1.B[1][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
18444 *
18445 * **Operations**:\n
18446 * ~~~
18447 * Rd.W[x][31:0] = CONCAT(Rs1.B[1][7:0], Rs1.B[1][7:0], Rs1.B[1][7:0], Rs1.B[1][7:0]);
18448 * for RV32: x=0
18449 * ~~~
18450 *
18451 * \param [in] a unsigned long type of value stored in a
18452 * \return value stored in unsigned long type
18453 */
__RV_EXPD81(unsigned long a)18454 __STATIC_FORCEINLINE unsigned long __RV_EXPD81(unsigned long a)
18455 {
18456 unsigned long result;
18457 __ASM volatile("expd81 %0, %1" : "=r"(result) : "r"(a));
18458 return result;
18459 }
18460 /* ===== Inline Function End for A11.2. EXPD81 ===== */
18461
18462 /* ===== Inline Function Start for A.11.3. EXPD82 ===== */
18463 /**
18464 * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
18465 * \brief EXPD82 (Expand and Copy Byte 2 to 32bit)
18466 * \details
18467 * **Type**: DSP
18468 *
18469 * **Syntax**:\n
18470 * ~~~
18471 * EXPD82 Rd, Rs1
18472 * ~~~
18473 *
18474 * **Purpose**:\n
18475 * Copy 8-bit data from 32-bit chunks into 4 bytes in a register.
18476 *
18477 * **Description**:\n
18478 * Moves Rs1.B[2][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
18479 *
18480 * **Operations**:\n
18481 * ~~~
18482 * Rd.W[x][31:0] = CONCAT(Rs1.B[2][7:0], Rs1.B[2][7:0], Rs1.B[2][7:0], Rs1.B[2][7:0]);
18483 * for RV32: x=0
18484 * ~~~
18485 *
18486 * \param [in] a unsigned long type of value stored in a
18487 * \return value stored in unsigned long type
18488 */
__RV_EXPD82(unsigned long a)18489 __STATIC_FORCEINLINE unsigned long __RV_EXPD82(unsigned long a)
18490 {
18491 unsigned long result;
18492 __ASM volatile("expd82 %0, %1" : "=r"(result) : "r"(a));
18493 return result;
18494 }
18495 /* ===== Inline Function End for A11.3. EXPD82 ===== */
18496
18497 /* ===== Inline Function Start for A.11.4. EXPD83 ===== */
18498 /**
18499 * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_CUSTOM
18500 * \brief EXPD83 (Expand and Copy Byte 3 to 32bit)
18501 * \details
18502 * **Type**: DSP
18503 *
18504 * **Syntax**:\n
18505 * ~~~
18506 * EXPD83 Rd, Rs1
18507 * ~~~
18508 *
18509 * **Purpose**:\n
18510 * Copy 8-bit data from 32-bit chunks into 4 bytes in a register.
18511 *
18512 * **Description**:\n
18513 * Moves Rs1.B[3][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
18514 *
18515 * **Operations**:\n
18516 * ~~~
18517 * Rd.W[x][31:0] = CONCAT(Rs1.B[3][7:0], Rs1.B[3][7:0], Rs1.B[3][7:0], Rs1.B[3][7:0]);
18518 * for RV32: x=0
18519 * ~~~
18520 *
18521 * \param [in] a unsigned long type of value stored in a
18522 * \return value stored in unsigned long type
18523 */
__RV_EXPD83(unsigned long a)18524 __STATIC_FORCEINLINE unsigned long __RV_EXPD83(unsigned long a)
18525 {
18526 unsigned long result;
18527 __ASM volatile("expd83 %0, %1" : "=r"(result) : "r"(a));
18528 return result;
18529 }
18530 /* ===== Inline Function End for A11.4. EXPD83 ===== */
18531 #endif /* __RISCV_XLEN == 32 */
18532
18533 #if defined(__RISCV_FEATURE_DSP) && (__RISCV_FEATURE_DSP == 1)
18534 /* XXXXX ARM Compatiable SIMD API XXXXX */
18535 /** \brief Q setting quad 8-bit saturating addition. */
18536 #define __QADD8(x, y) __RV_KADD8(x, y)
18537 /** \brief Q setting quad 8-bit saturating subtract. */
18538 #define __QSUB8(x, y) __RV_KSUB8((x), (y))
18539 /** \brief Q setting dual 16-bit saturating addition. */
18540 #define __QADD16(x, y) __RV_KADD16((x), (y))
18541 /** \brief Dual 16-bit signed addition with halved results. */
18542 #define __SHADD16(x, y) __RV_RADD16((x), (y))
18543 /** \brief Q setting dual 16-bit saturating subtract. */
18544 #define __QSUB16(x, y) __RV_KSUB16((x), (y))
18545 /** \brief Dual 16-bit signed subtraction with halved results. */
18546 #define __SHSUB16(x, y) __RV_RSUB16((x), (y))
18547 /** \brief Q setting dual 16-bit add and subtract with exchange. */
18548 #define __QASX(x, y) __RV_KCRAS16((x), (y))
18549 /** \brief Dual 16-bit signed addition and subtraction with halved results.*/
18550 #define __SHASX(x, y) __RV_RCRAS16((x), (y))
18551 /** \brief Q setting dual 16-bit subtract and add with exchange. */
18552 #define __QSAX(x, y) __RV_KCRSA16((x), (y))
18553 /** \brief Dual 16-bit signed subtraction and addition with halved results.*/
18554 #define __SHSAX(x, y) __RV_RCRSA16((x), (y))
18555 /** \brief Dual 16-bit signed multiply with exchange returning difference. */
18556 #define __SMUSDX(x, y) __RV_SMXDS((y), (x))
18557 /** \brief Q setting sum of dual 16-bit signed multiply with exchange. */
__SMUADX(int32_t op1,int32_t op2)18558 __STATIC_FORCEINLINE int32_t __SMUADX (int32_t op1, int32_t op2)
18559 {
18560 return (int32_t)__RV_KMXDA(op1, op2);
18561 }
18562 /** \brief Q setting saturating add. */
18563 #define __QADD(x, y) __RV_KADDW((x), (y))
18564 /** \brief Q setting saturating subtract. */
18565 #define __QSUB(x, y) __RV_KSUBW((x), (y))
18566 /** \brief Q setting dual 16-bit signed multiply with single 32-bit accumulator. */
__SMLAD(int32_t op1,int32_t op2,int32_t op3)18567 __STATIC_FORCEINLINE int32_t __SMLAD(int32_t op1, int32_t op2, int32_t op3)
18568 {
18569 return (int32_t)__RV_KMADA(op3, op1, op2);
18570 }
18571 /** \brief Q setting pre-exchanged dual 16-bit signed multiply with single 32-bit accumulator. */
__SMLADX(int32_t op1,int32_t op2,int32_t op3)18572 __STATIC_FORCEINLINE int32_t __SMLADX(int32_t op1, int32_t op2, int32_t op3)
18573 {
18574 return (int32_t)__RV_KMAXDA(op3, op1, op2);
18575 }
18576 /** \brief Q setting dual 16-bit signed multiply with exchange subtract with 32-bit accumulate. */
__SMLSDX(int32_t op1,int32_t op2,int32_t op3)18577 __STATIC_FORCEINLINE int32_t __SMLSDX(int32_t op1, int32_t op2, int32_t op3)
18578 {
18579 return (op3 - (int32_t)__RV_SMXDS(op1, op2));
18580 }
18581 /** \brief Dual 16-bit signed multiply with single 64-bit accumulator. */
__SMLALD(int32_t op1,int32_t op2,int64_t acc)18582 __STATIC_FORCEINLINE int64_t __SMLALD(int32_t op1, int32_t op2, int64_t acc)
18583 {
18584 return (int64_t)__RV_SMALDA(acc, op1, op2);
18585 }
18586 /** \brief Dual 16-bit signed multiply with exchange with single 64-bit accumulator. */
__SMLALDX(int32_t op1,int32_t op2,int64_t acc)18587 __STATIC_FORCEINLINE int64_t __SMLALDX(int32_t op1, int32_t op2, int64_t acc)
18588 {
18589 return (int64_t)__RV_SMALXDA(acc, op1, op2);
18590 }
18591 /** \brief Q setting sum of dual 16-bit signed multiply. */
__SMUAD(int32_t op1,int32_t op2)18592 __STATIC_FORCEINLINE int32_t __SMUAD(int32_t op1, int32_t op2)
18593 {
18594 return (int32_t)__RV_KMDA(op1, op2);
18595 }
18596 /** \brief Dual 16-bit signed multiply returning difference. */
__SMUSD(int32_t op1,int32_t op2)18597 __STATIC_FORCEINLINE int32_t __SMUSD(int32_t op1, int32_t op2)
18598 {
18599 return (int32_t)__RV_SMDRS(op1, op2);
18600 }
18601 /** \brief Dual extract 8-bits and sign extend each to 16-bits. */
18602 #define __SXTB16(x) __RV_SUNPKD820(x)
18603 /** \brief Dual extracted 8-bit to 16-bit signed addition. TODO Need test */
__SXTAB16(uint32_t op1,uint32_t op2)18604 __STATIC_FORCEINLINE int32_t __SXTAB16(uint32_t op1, uint32_t op2)
18605 {
18606 return __RV_ADD16(op1, __RV_SUNPKD830(op2));
18607 }
18608 /** \brief 32-bit signed multiply with 32-bit truncated accumulator. */
__SMMLA(int32_t op1,int32_t op2,int32_t op3)18609 __STATIC_FORCEINLINE int32_t __SMMLA(int32_t op1, int32_t op2, int32_t op3)
18610 {
18611 int32_t mul;
18612 mul = (int32_t)__RV_SMMUL(op1, op2);
18613 return (op3 + mul);
18614 }
18615 #define __DKHM8 __RV_DKHM8
18616 #define __DKHM16 __RV_DKHM16
18617 #define __DKSUB16 __RV_DKSUB16
18618 #define __SMAQA __RV_SMAQA
18619 #define __MULSR64 __RV_MULSR64
18620 #define __DQADD8 __RV_DKADD8
18621 #define __DQSUB8 __RV_DKSUB8
18622 #define __DKADD16 __RV_DKADD16
18623 #define __PKBB16 __RV_PKBB16
18624 #define __DKSLRA16 __RV_DKSLRA16
18625 #define __DKSLRA8 __RV_DKSLRA8
18626 #define __KABSW __RV_KABSW
18627 #define __DKABS8 __RV_DKABS8
18628 #define __DKABS16 __RV_DKABS16
18629 #define __SMALDA __RV_SMALDA
18630 #define __SMSLDA __RV_SMSLDA
18631 #define __SMALBB __RV_SMALBB
18632 #define __SUB64 __RV_SUB64
18633 #define __ADD64 __RV_ADD64
18634 #define __SMBB16 __RV_SMBB16
18635 #define __SMBT16 __RV_SMBT16
18636 #define __SMTT16 __RV_SMTT16
18637 #define __EXPD80 __RV_EXPD80
18638 #define __SMAX8 __RV_SMAX8
18639 #define __SMAX16 __RV_SMAX16
18640 #define __PKTT16 __RV_PKTT16
18641 #define __KADD16 __RV_KADD16
18642 #define __SADD16 __RV_ADD16
18643
18644 #endif /* (__RISCV_FEATURE_DSP == 1) */
18645
18646 #endif /* defined(__DSP_PRESENT) && (__DSP_PRESENT == 1) */
18647
18648 /** \brief Halfword packing instruction. Combines bits[15:0] of val1 with bits[31:16] of val2 levitated with the val3. */
18649 #define __PKHBT(ARG1,ARG2,ARG3) ( ((((uint32_t)(ARG1)) ) & 0x0000FFFFUL) | \
18650 ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL) )
18651 /** \brief Halfword packing instruction. Combines bits[31:16] of val1 with bits[15:0] of val2 right-shifted with the val3. */
18652 #define __PKHTB(ARG1,ARG2,ARG3) ( ((((uint32_t)(ARG1)) ) & 0xFFFF0000UL) | \
18653 ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL) )
18654
18655 #ifdef __cplusplus
18656 }
18657 #endif
18658
18659 #endif /* __CORE_FEATURE_DSP__ */
18660