1//===-- generic/lib/misc/shuffle2.cl ------------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#include <clc/clc.h> 10 11#define _CLC_ELEMENT_CASES2(VAR) \ 12 case 0: return VAR.s0; \ 13 case 1: return VAR.s1; 14 15#define _CLC_ELEMENT_CASES4(VAR) \ 16 _CLC_ELEMENT_CASES2(VAR) \ 17 case 2: return VAR.s2; \ 18 case 3: return VAR.s3; 19 20#define _CLC_ELEMENT_CASES8(VAR) \ 21 _CLC_ELEMENT_CASES4(VAR) \ 22 case 4: return VAR.s4; \ 23 case 5: return VAR.s5; \ 24 case 6: return VAR.s6; \ 25 case 7: return VAR.s7; 26 27#define _CLC_ELEMENT_CASES16(VAR) \ 28 _CLC_ELEMENT_CASES8(VAR) \ 29 case 8: return VAR.s8; \ 30 case 9: return VAR.s9; \ 31 case 10: return VAR.sA; \ 32 case 11: return VAR.sB; \ 33 case 12: return VAR.sC; \ 34 case 13: return VAR.sD; \ 35 case 14: return VAR.sE; \ 36 case 15: return VAR.sF; 37 38#define _CLC_GET_ELEMENT_DEFINE(ARGTYPE, ARGSIZE, IDXTYPE) \ 39 __attribute__((always_inline)) \ 40 ARGTYPE __clc_get_el_##ARGTYPE##ARGSIZE##_##IDXTYPE(ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, IDXTYPE idx) {\ 41 if (idx < ARGSIZE) \ 42 switch (idx){ \ 43 _CLC_ELEMENT_CASES##ARGSIZE(x) \ 44 default: return 0; \ 45 } \ 46 else \ 47 switch (idx - ARGSIZE){ \ 48 _CLC_ELEMENT_CASES##ARGSIZE(y) \ 49 default: return 0; \ 50 } \ 51 } \ 52 53#define _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ 54 ret_val.s0 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s0); \ 55 ret_val.s1 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s1); 56 57#define _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ 58 _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ 59 ret_val.s2 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s2); \ 60 ret_val.s3 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s3); 61 62#define _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ 63 _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ 64 ret_val.s4 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s4); \ 65 ret_val.s5 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s5); \ 66 ret_val.s6 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s6); \ 67 ret_val.s7 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s7); 68 69#define _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ 70 _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ 71 ret_val.s8 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s8); \ 72 ret_val.s9 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s9); \ 73 ret_val.sA = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sA); \ 74 ret_val.sB = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sB); \ 75 ret_val.sC = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sC); \ 76 ret_val.sD = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sD); \ 77 ret_val.sE = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sE); \ 78 ret_val.sF = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sF); \ 79 80#define _CLC_SHUFFLE_DEFINE2(ARGTYPE, ARGSIZE, MASKTYPE) \ 81_CLC_DEF _CLC_OVERLOAD ARGTYPE##2 shuffle2(ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##2 mask){ \ 82 ARGTYPE##2 ret_val; \ 83 mask &= (MASKTYPE##2)(ARGSIZE * 2 - 1); \ 84 _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ 85 return ret_val; \ 86} 87 88#define _CLC_SHUFFLE_DEFINE4(ARGTYPE, ARGSIZE, MASKTYPE) \ 89_CLC_DEF _CLC_OVERLOAD ARGTYPE##4 shuffle2(ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##4 mask){ \ 90 ARGTYPE##4 ret_val; \ 91 mask &= (MASKTYPE##4)(ARGSIZE * 2 - 1); \ 92 _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ 93 return ret_val; \ 94} 95 96#define _CLC_SHUFFLE_DEFINE8(ARGTYPE, ARGSIZE, MASKTYPE) \ 97_CLC_DEF _CLC_OVERLOAD ARGTYPE##8 shuffle2(ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##8 mask){ \ 98 ARGTYPE##8 ret_val; \ 99 mask &= (MASKTYPE##8)(ARGSIZE * 2 - 1); \ 100 _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ 101 return ret_val; \ 102} 103 104#define _CLC_SHUFFLE_DEFINE16(ARGTYPE, ARGSIZE, MASKTYPE) \ 105_CLC_DEF _CLC_OVERLOAD ARGTYPE##16 shuffle2(ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##16 mask){ \ 106 ARGTYPE##16 ret_val; \ 107 mask &= (MASKTYPE##16)(ARGSIZE * 2 - 1); \ 108 _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ 109 return ret_val; \ 110} 111 112#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INTYPE, ARGSIZE, MASKTYPE) \ 113 _CLC_GET_ELEMENT_DEFINE(INTYPE, ARGSIZE, MASKTYPE) \ 114 _CLC_SHUFFLE_DEFINE2(INTYPE, ARGSIZE, MASKTYPE) \ 115 _CLC_SHUFFLE_DEFINE4(INTYPE, ARGSIZE, MASKTYPE) \ 116 _CLC_SHUFFLE_DEFINE8(INTYPE, ARGSIZE, MASKTYPE) \ 117 _CLC_SHUFFLE_DEFINE16(INTYPE, ARGSIZE, MASKTYPE) \ 118 119#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE) \ 120 _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 2, MASKTYPE) \ 121 _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 4, MASKTYPE) \ 122 _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 8, MASKTYPE) \ 123 _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 16, MASKTYPE) \ 124 125 126 127_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar) 128_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort) 129_CLC_VECTOR_SHUFFLE_INSIZE(int, uint) 130_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong) 131_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar) 132_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort) 133_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint) 134_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong) 135_CLC_VECTOR_SHUFFLE_INSIZE(float, uint) 136#ifdef cl_khr_fp64 137#pragma OPENCL EXTENSION cl_khr_fp64 : enable 138_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong) 139#endif 140#ifdef cl_khr_fp16 141#pragma OPENCL EXTENSION cl_khr_fp16 : enable 142_CLC_VECTOR_SHUFFLE_INSIZE(half, ushort) 143#endif 144 145#undef _CLC_ELEMENT_CASES2 146#undef _CLC_ELEMENT_CASES4 147#undef _CLC_ELEMENT_CASES8 148#undef _CLC_ELEMENT_CASES16 149#undef _CLC_GET_ELEMENT_DEFINE 150#undef _CLC_SHUFFLE_SET_2_ELEMENTS 151#undef _CLC_SHUFFLE_SET_4_ELEMENTS 152#undef _CLC_SHUFFLE_SET_8_ELEMENTS 153#undef _CLC_SHUFFLE_SET_16_ELEMENTS 154#undef _CLC_SHUFFLE_DEFINE2 155#undef _CLC_SHUFFLE_DEFINE4 156#undef _CLC_SHUFFLE_DEFINE8 157#undef _CLC_SHUFFLE_DEFINE16 158#undef _CLC_VECTOR_SHUFFLE_MASKSIZE 159#undef _CLC_VECTOR_SHUFFLE_INSIZE 160