/external/XNNPACK/src/f16-ibilinear-chw/gen/ |
D | neonfp16arith-p4.c | 71 const float16x4x2_t vtl_t = vuzp_f16(vget_low_f16(vtltr), vget_high_f16(vtltr)); in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p4() local 72 const float16x4_t vtl = vtl_t.val[0]; in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p4() 73 const float16x4_t vtr = vtl_t.val[1]; in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p4() 114 const float16x4x2_t vtl_t = vuzp_f16(vtltr, vtltr); in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p4() local 115 const float16x4_t vtl = vtl_t.val[0]; in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p4() 116 const float16x4_t vtr = vtl_t.val[1]; in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p4() 165 const float16x4x2_t vtl_t = vuzp_f16(vtltr, vtltr); in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p4() local 166 const float16x4_t vtl = vtl_t.val[0]; in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p4() 167 const float16x4_t vtr = vtl_t.val[1]; in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p4()
|
D | neonfp16arith-p8.c | 138 const float16x4x2_t vtl_t = vuzp_f16(vget_low_f16(vtltr), vget_high_f16(vtltr)); in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p8() local 139 const float16x4_t vtl = vtl_t.val[0]; in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p8() 140 const float16x4_t vtr = vtl_t.val[1]; in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p8() 181 const float16x4x2_t vtl_t = vuzp_f16(vtltr, vtltr); in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p8() local 182 const float16x4_t vtl = vtl_t.val[0]; in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p8() 183 const float16x4_t vtr = vtl_t.val[1]; in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p8() 232 const float16x4x2_t vtl_t = vuzp_f16(vtltr, vtltr); in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p8() local 233 const float16x4_t vtl = vtl_t.val[0]; in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p8() 234 const float16x4_t vtr = vtl_t.val[1]; in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p8()
|
D | neonfp16arith-p16.c | 191 const float16x4x2_t vtl_t = vuzp_f16(vget_low_f16(vtltr), vget_high_f16(vtltr)); in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p16() local 192 const float16x4_t vtl = vtl_t.val[0]; in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p16() 193 const float16x4_t vtr = vtl_t.val[1]; in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p16() 234 const float16x4x2_t vtl_t = vuzp_f16(vtltr, vtltr); in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p16() local 235 const float16x4_t vtl = vtl_t.val[0]; in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p16() 236 const float16x4_t vtr = vtl_t.val[1]; in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p16() 285 const float16x4x2_t vtl_t = vuzp_f16(vtltr, vtltr); in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p16() local 286 const float16x4_t vtl = vtl_t.val[0]; in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p16() 287 const float16x4_t vtr = vtl_t.val[1]; in xnn_f16_ibilinear_chw_ukernel__neonfp16arith_p16()
|
/external/XNNPACK/src/f16-ibilinear-chw/ |
D | neonfp16arith.c.in | 66 … const float16x8x2_t vtl_t${ABC[P:P+8]} = vuzpq_f16(vtltr${ABC[P:P+4]}, vtltr${ABC[P+4:P+8]}); 67 const float16x8_t vtl${ABC[P:P+8]} = vtl_t${ABC[P:P+8]}.val[0]; 68 const float16x8_t vtr${ABC[P:P+8]} = vtl_t${ABC[P:P+8]}.val[1]; 108 const float16x4x2_t vtl_t = vuzp_f16(vget_low_f16(vtltr), vget_high_f16(vtltr)); variable 109 const float16x4_t vtl = vtl_t.val[0]; 110 const float16x4_t vtr = vtl_t.val[1]; 149 const float16x4x2_t vtl_t = vuzp_f16(vtltr, vtltr); variable 150 const float16x4_t vtl = vtl_t.val[0]; 151 const float16x4_t vtr = vtl_t.val[1]; 200 const float16x4x2_t vtl_t = vuzp_f16(vtltr, vtltr); variable [all …]
|
/external/XNNPACK/src/f32-ibilinear-chw/gen/ |
D | neonfma-p4.c | 73 const float32x4x2_t vtl_t = vuzpq_f32(vtltr01, vtltr23); in xnn_f32_ibilinear_chw_ukernel__neonfma_p4() local 74 const float32x4_t vtl = vtl_t.val[0]; in xnn_f32_ibilinear_chw_ukernel__neonfma_p4() 75 const float32x4_t vtr = vtl_t.val[1]; in xnn_f32_ibilinear_chw_ukernel__neonfma_p4() 113 const float32x2x2_t vtl_t = vuzp_f32(vtltr0, vtltr1); in xnn_f32_ibilinear_chw_ukernel__neonfma_p4() local 114 const float32x2_t vtl = vtl_t.val[0]; in xnn_f32_ibilinear_chw_ukernel__neonfma_p4() 115 const float32x2_t vtr = vtl_t.val[1]; in xnn_f32_ibilinear_chw_ukernel__neonfma_p4()
|
D | neon-p4.c | 73 const float32x4x2_t vtl_t = vuzpq_f32(vtltr01, vtltr23); in xnn_f32_ibilinear_chw_ukernel__neon_p4() local 74 const float32x4_t vtl = vtl_t.val[0]; in xnn_f32_ibilinear_chw_ukernel__neon_p4() 75 const float32x4_t vtr = vtl_t.val[1]; in xnn_f32_ibilinear_chw_ukernel__neon_p4() 113 const float32x2x2_t vtl_t = vuzp_f32(vtltr0, vtltr1); in xnn_f32_ibilinear_chw_ukernel__neon_p4() local 114 const float32x2_t vtl = vtl_t.val[0]; in xnn_f32_ibilinear_chw_ukernel__neon_p4() 115 const float32x2_t vtr = vtl_t.val[1]; in xnn_f32_ibilinear_chw_ukernel__neon_p4()
|
D | neonfma-p8.c | 161 const float32x4x2_t vtl_t = vuzpq_f32(vtltr01, vtltr23); in xnn_f32_ibilinear_chw_ukernel__neonfma_p8() local 162 const float32x4_t vtl = vtl_t.val[0]; in xnn_f32_ibilinear_chw_ukernel__neonfma_p8() 163 const float32x4_t vtr = vtl_t.val[1]; in xnn_f32_ibilinear_chw_ukernel__neonfma_p8() 201 const float32x2x2_t vtl_t = vuzp_f32(vtltr0, vtltr1); in xnn_f32_ibilinear_chw_ukernel__neonfma_p8() local 202 const float32x2_t vtl = vtl_t.val[0]; in xnn_f32_ibilinear_chw_ukernel__neonfma_p8() 203 const float32x2_t vtr = vtl_t.val[1]; in xnn_f32_ibilinear_chw_ukernel__neonfma_p8()
|
D | neon-p8.c | 161 const float32x4x2_t vtl_t = vuzpq_f32(vtltr01, vtltr23); in xnn_f32_ibilinear_chw_ukernel__neon_p8() local 162 const float32x4_t vtl = vtl_t.val[0]; in xnn_f32_ibilinear_chw_ukernel__neon_p8() 163 const float32x4_t vtr = vtl_t.val[1]; in xnn_f32_ibilinear_chw_ukernel__neon_p8() 201 const float32x2x2_t vtl_t = vuzp_f32(vtltr0, vtltr1); in xnn_f32_ibilinear_chw_ukernel__neon_p8() local 202 const float32x2_t vtl = vtl_t.val[0]; in xnn_f32_ibilinear_chw_ukernel__neon_p8() 203 const float32x2_t vtr = vtl_t.val[1]; in xnn_f32_ibilinear_chw_ukernel__neon_p8()
|
D | neonfma-p16.c | 233 const float32x4x2_t vtl_t = vuzpq_f32(vtltr01, vtltr23); in xnn_f32_ibilinear_chw_ukernel__neonfma_p16() local 234 const float32x4_t vtl = vtl_t.val[0]; in xnn_f32_ibilinear_chw_ukernel__neonfma_p16() 235 const float32x4_t vtr = vtl_t.val[1]; in xnn_f32_ibilinear_chw_ukernel__neonfma_p16() 273 const float32x2x2_t vtl_t = vuzp_f32(vtltr0, vtltr1); in xnn_f32_ibilinear_chw_ukernel__neonfma_p16() local 274 const float32x2_t vtl = vtl_t.val[0]; in xnn_f32_ibilinear_chw_ukernel__neonfma_p16() 275 const float32x2_t vtr = vtl_t.val[1]; in xnn_f32_ibilinear_chw_ukernel__neonfma_p16()
|
D | neon-p16.c | 233 const float32x4x2_t vtl_t = vuzpq_f32(vtltr01, vtltr23); in xnn_f32_ibilinear_chw_ukernel__neon_p16() local 234 const float32x4_t vtl = vtl_t.val[0]; in xnn_f32_ibilinear_chw_ukernel__neon_p16() 235 const float32x4_t vtr = vtl_t.val[1]; in xnn_f32_ibilinear_chw_ukernel__neon_p16() 273 const float32x2x2_t vtl_t = vuzp_f32(vtltr0, vtltr1); in xnn_f32_ibilinear_chw_ukernel__neon_p16() local 274 const float32x2_t vtl = vtl_t.val[0]; in xnn_f32_ibilinear_chw_ukernel__neon_p16() 275 const float32x2_t vtr = vtl_t.val[1]; in xnn_f32_ibilinear_chw_ukernel__neon_p16()
|
/external/XNNPACK/src/f32-ibilinear-chw/ |
D | neon.c.in | 67 … const float32x4x2_t vtl_t${ABC[P:P+4]} = vuzpq_f32(vtltr${ABC[P:P+2]}, vtltr${ABC[P+2:P+4]}); 68 const float32x4_t vtl${ABC[P:P+4]} = vtl_t${ABC[P:P+4]}.val[0]; 69 const float32x4_t vtr${ABC[P:P+4]} = vtl_t${ABC[P:P+4]}.val[1]; 113 const float32x4x2_t vtl_t = vuzpq_f32(vtltr01, vtltr23); 114 const float32x4_t vtl = vtl_t.val[0]; 115 const float32x4_t vtr = vtl_t.val[1]; 151 const float32x2x2_t vtl_t = vuzp_f32(vtltr0, vtltr1); 152 const float32x2_t vtl = vtl_t.val[0]; 153 const float32x2_t vtr = vtl_t.val[1];
|