/external/XNNPACK/src/f32-prelu/gen/ |
D | wasmsimd-minmax-4x16.c | 55 const v128_t vzero = wasm_f32x4_splat(0.0f); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() local 87 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() 88 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() 89 v128_t vacc0x4567 = wasm_i32x4_max(vi0x4567, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() 90 vi0x4567 = wasm_i32x4_min(vi0x4567, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() 91 v128_t vacc0x89AB = wasm_i32x4_max(vi0x89AB, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() 92 vi0x89AB = wasm_i32x4_min(vi0x89AB, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() 93 v128_t vacc0xCDEF = wasm_i32x4_max(vi0xCDEF, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() 94 vi0xCDEF = wasm_i32x4_min(vi0xCDEF, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() 95 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x16() [all …]
|
D | wasm-2x4.c | 41 const float vzero = 0.0f; in xnn_f32_prelu_ukernel__wasm_2x4() local 62 float vacc0x0 = __builtin_wasm_max_f32(vi0x0, vzero); in xnn_f32_prelu_ukernel__wasm_2x4() 63 vi0x0 = __builtin_wasm_min_f32(vi0x0, vzero); in xnn_f32_prelu_ukernel__wasm_2x4() 64 float vacc0x1 = __builtin_wasm_max_f32(vi0x1, vzero); in xnn_f32_prelu_ukernel__wasm_2x4() 65 vi0x1 = __builtin_wasm_min_f32(vi0x1, vzero); in xnn_f32_prelu_ukernel__wasm_2x4() 66 float vacc0x2 = __builtin_wasm_max_f32(vi0x2, vzero); in xnn_f32_prelu_ukernel__wasm_2x4() 67 vi0x2 = __builtin_wasm_min_f32(vi0x2, vzero); in xnn_f32_prelu_ukernel__wasm_2x4() 68 float vacc0x3 = __builtin_wasm_max_f32(vi0x3, vzero); in xnn_f32_prelu_ukernel__wasm_2x4() 69 vi0x3 = __builtin_wasm_min_f32(vi0x3, vzero); in xnn_f32_prelu_ukernel__wasm_2x4() 70 float vacc1x0 = __builtin_wasm_max_f32(vi1x0, vzero); in xnn_f32_prelu_ukernel__wasm_2x4() [all …]
|
D | wasmsimd-minmax-4x8.c | 55 const v128_t vzero = wasm_f32x4_splat(0.0f); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() local 77 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() 78 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() 79 v128_t vacc0x4567 = wasm_i32x4_max(vi0x4567, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() 80 vi0x4567 = wasm_i32x4_min(vi0x4567, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() 81 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() 82 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() 83 v128_t vacc1x4567 = wasm_i32x4_max(vi1x4567, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() 84 vi1x4567 = wasm_i32x4_min(vi1x4567, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() 85 v128_t vacc2x0123 = wasm_i32x4_max(vi2x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x8() [all …]
|
D | wasmsimd-minmax-2x16.c | 43 const v128_t vzero = wasm_f32x4_splat(0.0f); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() local 65 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() 66 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() 67 v128_t vacc0x4567 = wasm_i32x4_max(vi0x4567, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() 68 vi0x4567 = wasm_i32x4_min(vi0x4567, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() 69 v128_t vacc0x89AB = wasm_i32x4_max(vi0x89AB, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() 70 vi0x89AB = wasm_i32x4_min(vi0x89AB, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() 71 v128_t vacc0xCDEF = wasm_i32x4_max(vi0xCDEF, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() 72 vi0xCDEF = wasm_i32x4_min(vi0xCDEF, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() 73 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x16() [all …]
|
D | wasmsimd-minmax-2x8.c | 43 const v128_t vzero = wasm_f32x4_splat(0.0f); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() local 59 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() 60 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() 61 v128_t vacc0x4567 = wasm_i32x4_max(vi0x4567, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() 62 vi0x4567 = wasm_i32x4_min(vi0x4567, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() 63 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() 64 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() 65 v128_t vacc1x4567 = wasm_i32x4_max(vi1x4567, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() 66 vi1x4567 = wasm_i32x4_min(vi1x4567, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() 89 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x8() [all …]
|
D | wasmsimd-minmax-1x16.c | 37 const v128_t vzero = wasm_f32x4_splat(0.0f); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16() local 54 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16() 55 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16() 56 v128_t vacc0x4567 = wasm_i32x4_max(vi0x4567, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16() 57 vi0x4567 = wasm_i32x4_min(vi0x4567, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16() 58 v128_t vacc0x89AB = wasm_i32x4_max(vi0x89AB, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16() 59 vi0x89AB = wasm_i32x4_min(vi0x89AB, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16() 60 v128_t vacc0xCDEF = wasm_i32x4_max(vi0xCDEF, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16() 61 vi0xCDEF = wasm_i32x4_min(vi0xCDEF, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16() 81 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x16() [all …]
|
D | wasmsimd-minmax-4x4.c | 55 const v128_t vzero = wasm_f32x4_splat(0.0f); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4() local 72 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4() 73 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4() 74 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4() 75 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4() 76 v128_t vacc2x0123 = wasm_i32x4_max(vi2x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4() 77 vi2x0123 = wasm_i32x4_min(vi2x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4() 78 v128_t vacc3x0123 = wasm_i32x4_max(vi3x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4() 79 vi3x0123 = wasm_i32x4_min(vi3x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4() 108 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_4x4() [all …]
|
D | wasmsimd-bitselect-4x16.c | 55 const v128_t vzero = wasm_i32x4_splat(0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() local 88 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() 90 const v128_t vmask0x4567 = wasm_i32x4_lt(vi0x4567, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() 92 const v128_t vmask0x89AB = wasm_i32x4_lt(vi0x89AB, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() 94 const v128_t vmask0xCDEF = wasm_i32x4_lt(vi0xCDEF, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() 96 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() 98 const v128_t vmask1x4567 = wasm_i32x4_lt(vi1x4567, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() 100 const v128_t vmask1x89AB = wasm_i32x4_lt(vi1x89AB, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() 102 const v128_t vmask1xCDEF = wasm_i32x4_lt(vi1xCDEF, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() 104 const v128_t vmask2x0123 = wasm_i32x4_lt(vi2x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x16() [all …]
|
D | wasmsimd-bitselect-4x8.c | 55 const v128_t vzero = wasm_i32x4_splat(0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8() local 78 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8() 80 const v128_t vmask0x4567 = wasm_i32x4_lt(vi0x4567, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8() 82 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8() 84 const v128_t vmask1x4567 = wasm_i32x4_lt(vi1x4567, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8() 86 const v128_t vmask2x0123 = wasm_i32x4_lt(vi2x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8() 88 const v128_t vmask2x4567 = wasm_i32x4_lt(vi2x4567, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8() 90 const v128_t vmask3x0123 = wasm_i32x4_lt(vi3x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8() 92 const v128_t vmask3x4567 = wasm_i32x4_lt(vi3x4567, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8() 130 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_4x8() [all …]
|
D | wasmsimd-minmax-1x8.c | 37 const v128_t vzero = wasm_f32x4_splat(0.0f); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8() local 50 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8() 51 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8() 52 v128_t vacc0x4567 = wasm_i32x4_max(vi0x4567, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8() 53 vi0x4567 = wasm_i32x4_min(vi0x4567, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8() 69 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8() 70 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8() 84 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8() 85 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_1x8()
|
D | wasmsimd-bitselect-2x16.c | 43 const v128_t vzero = wasm_i32x4_splat(0); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() local 66 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() 68 const v128_t vmask0x4567 = wasm_i32x4_lt(vi0x4567, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() 70 const v128_t vmask0x89AB = wasm_i32x4_lt(vi0x89AB, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() 72 const v128_t vmask0xCDEF = wasm_i32x4_lt(vi0xCDEF, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() 74 const v128_t vmask1x0123 = wasm_i32x4_lt(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() 76 const v128_t vmask1x4567 = wasm_i32x4_lt(vi1x4567, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() 78 const v128_t vmask1x89AB = wasm_i32x4_lt(vi1x89AB, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() 80 const v128_t vmask1xCDEF = wasm_i32x4_lt(vi1xCDEF, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() 112 const v128_t vmask0x0123 = wasm_i32x4_lt(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_bitselect_2x16() [all …]
|
D | wasmsimd-minmax-2x4.c | 43 const v128_t vzero = wasm_f32x4_splat(0.0f); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4() local 56 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4() 57 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4() 58 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4() 59 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4() 78 v128_t vacc0x0123 = wasm_i32x4_max(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4() 79 vi0x0123 = wasm_i32x4_min(vi0x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4() 80 v128_t vacc1x0123 = wasm_i32x4_max(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4() 81 vi1x0123 = wasm_i32x4_min(vi1x0123, vzero); in xnn_f32_prelu_ukernel__wasmsimd_minmax_2x4()
|
/external/XNNPACK/src/f32-vlrelu/gen/ |
D | vlrelu-wasm-x4.c | 26 const float vzero = 0.0f; in xnn_f32_vlrelu_ukernel__wasm_x4() local 35 const float vnegx0 = __builtin_wasm_min_f32(vx0, vzero); in xnn_f32_vlrelu_ukernel__wasm_x4() 36 const float vnegx1 = __builtin_wasm_min_f32(vx1, vzero); in xnn_f32_vlrelu_ukernel__wasm_x4() 37 const float vnegx2 = __builtin_wasm_min_f32(vx2, vzero); in xnn_f32_vlrelu_ukernel__wasm_x4() 38 const float vnegx3 = __builtin_wasm_min_f32(vx3, vzero); in xnn_f32_vlrelu_ukernel__wasm_x4() 41 const float vposx0 = __builtin_wasm_max_f32(vx0, vzero); in xnn_f32_vlrelu_ukernel__wasm_x4() 43 const float vposx1 = __builtin_wasm_max_f32(vx1, vzero); in xnn_f32_vlrelu_ukernel__wasm_x4() 45 const float vposx2 = __builtin_wasm_max_f32(vx2, vzero); in xnn_f32_vlrelu_ukernel__wasm_x4() 47 const float vposx3 = __builtin_wasm_max_f32(vx3, vzero); in xnn_f32_vlrelu_ukernel__wasm_x4() 63 const float vnegx = __builtin_wasm_min_f32(vx, vzero); in xnn_f32_vlrelu_ukernel__wasm_x4() [all …]
|
D | vlrelu-wasmsimd-minmax-x8.c | 28 const v128_t vzero = wasm_f32x4_splat(0.0f); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8() local 34 v128_t vacc0123 = wasm_i32x4_max(vx0123, vzero); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8() 35 vx0123 = wasm_i32x4_min(vx0123, vzero); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8() 36 v128_t vacc4567 = wasm_i32x4_max(vx4567, vzero); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8() 37 vx4567 = wasm_i32x4_min(vx4567, vzero); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8() 49 v128_t vacc = wasm_i32x4_max(vx, vzero); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8() 50 vx = wasm_i32x4_min(vx, vzero); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8() 57 v128_t vacc = wasm_i32x4_max(vx, vzero); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8() 58 vx = wasm_i32x4_min(vx, vzero); in xnn_f32_vlrelu_ukernel__wasmsimd_minmax_x8()
|
D | vlrelu-wasm-x2.c | 26 const float vzero = 0.0f; in xnn_f32_vlrelu_ukernel__wasm_x2() local 33 const float vnegx0 = __builtin_wasm_min_f32(vx0, vzero); in xnn_f32_vlrelu_ukernel__wasm_x2() 34 const float vnegx1 = __builtin_wasm_min_f32(vx1, vzero); in xnn_f32_vlrelu_ukernel__wasm_x2() 37 const float vposx0 = __builtin_wasm_max_f32(vx0, vzero); in xnn_f32_vlrelu_ukernel__wasm_x2() 39 const float vposx1 = __builtin_wasm_max_f32(vx1, vzero); in xnn_f32_vlrelu_ukernel__wasm_x2() 50 const float vnegx = __builtin_wasm_min_f32(vx, vzero); in xnn_f32_vlrelu_ukernel__wasm_x2() 52 const float vposx = __builtin_wasm_max_f32(vx, vzero); in xnn_f32_vlrelu_ukernel__wasm_x2()
|
/external/XNNPACK/src/f32-relu/gen/ |
D | wasm-x8.c | 28 const float vzero = 0.0f; in xnn_f32_relu_ukernel__wasm_x8() local 41 vacc0 = __builtin_wasm_max_f32(vacc0, vzero); in xnn_f32_relu_ukernel__wasm_x8() 42 vacc1 = __builtin_wasm_max_f32(vacc1, vzero); in xnn_f32_relu_ukernel__wasm_x8() 43 vacc2 = __builtin_wasm_max_f32(vacc2, vzero); in xnn_f32_relu_ukernel__wasm_x8() 44 vacc3 = __builtin_wasm_max_f32(vacc3, vzero); in xnn_f32_relu_ukernel__wasm_x8() 45 vacc4 = __builtin_wasm_max_f32(vacc4, vzero); in xnn_f32_relu_ukernel__wasm_x8() 46 vacc5 = __builtin_wasm_max_f32(vacc5, vzero); in xnn_f32_relu_ukernel__wasm_x8() 47 vacc6 = __builtin_wasm_max_f32(vacc6, vzero); in xnn_f32_relu_ukernel__wasm_x8() 48 vacc7 = __builtin_wasm_max_f32(vacc7, vzero); in xnn_f32_relu_ukernel__wasm_x8() 63 vacc = __builtin_wasm_max_f32(vacc, vzero); in xnn_f32_relu_ukernel__wasm_x8()
|
D | wasmsimd-x16.c | 29 const v128_t vzero = wasm_f32x4_splat(0.0f); in xnn_f32_relu_ukernel__wasmsimd_x16() local 38 vacc0123 = wasm_i32x4_max(vacc0123, vzero); in xnn_f32_relu_ukernel__wasmsimd_x16() 39 vacc4567 = wasm_i32x4_max(vacc4567, vzero); in xnn_f32_relu_ukernel__wasmsimd_x16() 40 vacc89AB = wasm_i32x4_max(vacc89AB, vzero); in xnn_f32_relu_ukernel__wasmsimd_x16() 41 vaccCDEF = wasm_i32x4_max(vaccCDEF, vzero); in xnn_f32_relu_ukernel__wasmsimd_x16() 53 vacc = wasm_i32x4_max(vacc, vzero); in xnn_f32_relu_ukernel__wasmsimd_x16() 61 vacc = wasm_i32x4_max(vacc, vzero); in xnn_f32_relu_ukernel__wasmsimd_x16()
|
D | wasm-x4.c | 28 const float vzero = 0.0f; in xnn_f32_relu_ukernel__wasm_x4() local 37 vacc0 = __builtin_wasm_max_f32(vacc0, vzero); in xnn_f32_relu_ukernel__wasm_x4() 38 vacc1 = __builtin_wasm_max_f32(vacc1, vzero); in xnn_f32_relu_ukernel__wasm_x4() 39 vacc2 = __builtin_wasm_max_f32(vacc2, vzero); in xnn_f32_relu_ukernel__wasm_x4() 40 vacc3 = __builtin_wasm_max_f32(vacc3, vzero); in xnn_f32_relu_ukernel__wasm_x4() 51 vacc = __builtin_wasm_max_f32(vacc, vzero); in xnn_f32_relu_ukernel__wasm_x4()
|
/external/XNNPACK/src/qu8-gavgpool/ |
D | 7p7x-minmax-sse2-c8.c | 40 const __m128i vzero = _mm_setzero_si128(); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__sse2_c8() local 52 const __m128i vxi0 = _mm_unpacklo_epi8(vi0, vzero); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__sse2_c8() 53 const __m128i vxi1 = _mm_unpacklo_epi8(vi1, vzero); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__sse2_c8() 54 const __m128i vxi2 = _mm_unpacklo_epi8(vi2, vzero); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__sse2_c8() 55 const __m128i vxi3 = _mm_unpacklo_epi8(vi3, vzero); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__sse2_c8() 56 const __m128i vxi4 = _mm_unpacklo_epi8(vi4, vzero); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__sse2_c8() 57 const __m128i vxi5 = _mm_unpacklo_epi8(vi5, vzero); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__sse2_c8() 58 const __m128i vxi6 = _mm_unpacklo_epi8(vi6, vzero); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__sse2_c8() 68 const __m128i vacc_lo = _mm_add_epi32(vbias, _mm_unpacklo_epi16(vsum, vzero)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__sse2_c8() 69 const __m128i vacc_hi = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vsum, vzero)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__sse2_c8() [all …]
|
D | 7x-minmax-sse2-c8.c | 56 const __m128i vzero = _mm_setzero_si128(); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8() local 70 const __m128i vxi0 = _mm_unpacklo_epi8(vi0, vzero); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8() 71 const __m128i vxi1 = _mm_unpacklo_epi8(vi1, vzero); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8() 72 const __m128i vxi2 = _mm_unpacklo_epi8(vi2, vzero); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8() 73 const __m128i vxi3 = _mm_unpacklo_epi8(vi3, vzero); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8() 74 const __m128i vxi4 = _mm_unpacklo_epi8(vi4, vzero); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8() 75 const __m128i vxi5 = _mm_unpacklo_epi8(vi5, vzero); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8() 76 const __m128i vxi6 = _mm_unpacklo_epi8(vi6, vzero); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8() 86 __m128i vacc_lo = _mm_add_epi32(vbias, _mm_unpacklo_epi16(vsum, vzero)); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8() 87 __m128i vacc_hi = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vsum, vzero)); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8() [all …]
|
/external/libaom/libaom/aom_dsp/x86/ |
D | sum_squares_avx2.c | 111 __m256i vzero = _mm256_setzero_si256(); in aom_var_2d_u8_avx2() local 112 __m256i v_acc_sum = vzero; in aom_var_2d_u8_avx2() 113 __m256i v_acc_sqs = vzero; in aom_var_2d_u8_avx2() 127 __m256i vsrc0 = _mm256_unpacklo_epi8(vsrc[k], vzero); in aom_var_2d_u8_avx2() 128 __m256i vsrc1 = _mm256_unpackhi_epi8(vsrc[k], vzero); in aom_var_2d_u8_avx2() 141 v_acc_sum = vzero; in aom_var_2d_u8_avx2() 142 v_acc_sqs = vzero; in aom_var_2d_u8_avx2() 148 __m256i vsrc0 = _mm256_unpacklo_epi8(vsrc, vzero); in aom_var_2d_u8_avx2() 149 __m256i vsrc1 = _mm256_unpackhi_epi8(vsrc, vzero); in aom_var_2d_u8_avx2() 164 v_acc_sum = vzero; in aom_var_2d_u8_avx2() [all …]
|
D | sum_squares_sse2.c | 229 __m128i vzero = _mm_setzero_si128(); in aom_var_2d_u8_sse2() local 230 __m128i v_acc_sum = vzero; in aom_var_2d_u8_sse2() 231 __m128i v_acc_sqs = vzero; in aom_var_2d_u8_sse2() 245 __m128i vsrc0 = _mm_unpacklo_epi8(vsrc[k], vzero); in aom_var_2d_u8_sse2() 246 __m128i vsrc1 = _mm_unpackhi_epi8(vsrc[k], vzero); in aom_var_2d_u8_sse2() 259 v_acc_sum = vzero; in aom_var_2d_u8_sse2() 260 v_acc_sqs = vzero; in aom_var_2d_u8_sse2() 266 __m128i vsrc0 = _mm_unpacklo_epi8(vsrc, vzero); in aom_var_2d_u8_sse2() 267 __m128i vsrc1 = _mm_unpackhi_epi8(vsrc, vzero); in aom_var_2d_u8_sse2() 282 v_acc_sum = vzero; in aom_var_2d_u8_sse2() [all …]
|
/external/XNNPACK/src/qu8-avgpool/ |
D | 9p8x-minmax-sse2-c8.c | 34 const __m128i vzero = _mm_setzero_si128(); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() local 99 const __m128i vxi0 = _mm_unpacklo_epi8(vi0, vzero); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() 100 const __m128i vxi1 = _mm_unpacklo_epi8(vi1, vzero); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() 101 const __m128i vxi2 = _mm_unpacklo_epi8(vi2, vzero); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() 102 const __m128i vxi3 = _mm_unpacklo_epi8(vi3, vzero); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() 103 const __m128i vxi4 = _mm_unpacklo_epi8(vi4, vzero); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() 104 const __m128i vxi5 = _mm_unpacklo_epi8(vi5, vzero); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() 105 const __m128i vxi6 = _mm_unpacklo_epi8(vi6, vzero); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() 106 const __m128i vxi7 = _mm_unpacklo_epi8(vi7, vzero); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() 107 const __m128i vxi8 = _mm_unpacklo_epi8(vi8, vzero); in xnn_qu8_avgpool_minmax_ukernel_9p8x__sse2_c8() [all …]
|
D | 9x-minmax-sse2-c8.c | 34 const __m128i vzero = _mm_setzero_si128(); in xnn_qu8_avgpool_minmax_ukernel_9x__sse2_c8() local 123 const __m128i vxi0 = _mm_unpacklo_epi8(vi0, vzero); in xnn_qu8_avgpool_minmax_ukernel_9x__sse2_c8() 124 const __m128i vxi1 = _mm_unpacklo_epi8(vi1, vzero); in xnn_qu8_avgpool_minmax_ukernel_9x__sse2_c8() 125 const __m128i vxi2 = _mm_unpacklo_epi8(vi2, vzero); in xnn_qu8_avgpool_minmax_ukernel_9x__sse2_c8() 126 const __m128i vxi3 = _mm_unpacklo_epi8(vi3, vzero); in xnn_qu8_avgpool_minmax_ukernel_9x__sse2_c8() 127 const __m128i vxi4 = _mm_unpacklo_epi8(vi4, vzero); in xnn_qu8_avgpool_minmax_ukernel_9x__sse2_c8() 128 const __m128i vxi5 = _mm_unpacklo_epi8(vi5, vzero); in xnn_qu8_avgpool_minmax_ukernel_9x__sse2_c8() 129 const __m128i vxi6 = _mm_unpacklo_epi8(vi6, vzero); in xnn_qu8_avgpool_minmax_ukernel_9x__sse2_c8() 130 const __m128i vxi7 = _mm_unpacklo_epi8(vi7, vzero); in xnn_qu8_avgpool_minmax_ukernel_9x__sse2_c8() 131 const __m128i vxi8 = _mm_unpacklo_epi8(vi8, vzero); in xnn_qu8_avgpool_minmax_ukernel_9x__sse2_c8() [all …]
|
/external/XNNPACK/src/qu8-dwconv/ |
D | up8x9-minmax-sse2.c | 27 const __m128i vzero = _mm_setzero_si128(); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() local 76 const __m128i vxi0 = _mm_unpacklo_epi8(vi0, vzero); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 78 const __m128i vxk0 = _mm_sub_epi16(_mm_unpacklo_epi8(vk0, vzero), vkernel_zero_point); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 85 const __m128i vxi1 = _mm_unpacklo_epi8(vi1, vzero); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 87 const __m128i vxk1 = _mm_sub_epi16(_mm_unpacklo_epi8(vk1, vzero), vkernel_zero_point); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 94 const __m128i vxi2 = _mm_unpacklo_epi8(vi2, vzero); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 96 const __m128i vxk2 = _mm_sub_epi16(_mm_unpacklo_epi8(vk2, vzero), vkernel_zero_point); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 103 const __m128i vxi3 = _mm_unpacklo_epi8(vi3, vzero); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 105 const __m128i vxk3 = _mm_sub_epi16(_mm_unpacklo_epi8(vk3, vzero), vkernel_zero_point); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 112 const __m128i vxi4 = _mm_unpacklo_epi8(vi4, vzero); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() [all …]
|