/external/XNNPACK/src/f16-dwconv2d-chw/gen/ |
D | 3x3p1-minmax-neonfp16arith-1x8-acc2.c | 61 float16x8_t vi2x89ABCDEF = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc2() local 76 vo0p0 = vfmaq_lane_f16(vo0p0, vi2x89ABCDEF, vw89, 0); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc2() 81 const float16x8_t vi2x789ABCDE = vextq_f16(vi2x01234567, vi2x89ABCDEF, 7); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc2() 91 vi2x01234567 = vi2x89ABCDEF; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc2() 96 const float16x8_t vi2x9ABCDEFG = vextq_f16(vi2x89ABCDEF, vi2xGHIJKLMN, 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc2() 106 vi2x89ABCDEF = vi2xGHIJKLMN; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc2() 124 vi2x89ABCDEF = vreinterpretq_f16_u16(vandq_u16(vmask, vreinterpretq_u16_f16(vi2x89ABCDEF))); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc2() 131 vo0p0 = vfmaq_lane_f16(vo0p0, vi2x89ABCDEF, vw89, 0); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc2() 136 const float16x8_t vi2x789ABCDE = vextq_f16(vi2x01234567, vi2x89ABCDEF, 7); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc2() 148 const float16x8_t vi2x9ABCDEFG = vextq_f16(vi2x89ABCDEF, vzero, 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc2()
|
D | 3x3p1-minmax-neonfp16arith-1x8-acc3.c | 61 float16x8_t vi2x89ABCDEF = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc3() local 76 float16x8_t vo0p2 = vmulq_lane_f16(vi2x89ABCDEF, vw89, 0); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc3() 81 const float16x8_t vi2x789ABCDE = vextq_f16(vi2x01234567, vi2x89ABCDEF, 7); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc3() 91 vi2x01234567 = vi2x89ABCDEF; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc3() 96 const float16x8_t vi2x9ABCDEFG = vextq_f16(vi2x89ABCDEF, vi2xGHIJKLMN, 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc3() 106 vi2x89ABCDEF = vi2xGHIJKLMN; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc3() 125 vi2x89ABCDEF = vreinterpretq_f16_u16(vandq_u16(vmask, vreinterpretq_u16_f16(vi2x89ABCDEF))); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc3() 132 float16x8_t vo0p2 = vmulq_lane_f16(vi2x89ABCDEF, vw89, 0); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc3() 137 const float16x8_t vi2x789ABCDE = vextq_f16(vi2x01234567, vi2x89ABCDEF, 7); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc3() 149 const float16x8_t vi2x9ABCDEFG = vextq_f16(vi2x89ABCDEF, vzero, 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc3()
|
D | 3x3p1-minmax-neonfp16arith-1x8.c | 61 float16x8_t vi2x89ABCDEF = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8() local 76 vo0p0 = vfmaq_lane_f16(vo0p0, vi2x89ABCDEF, vw89, 0); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8() 81 const float16x8_t vi2x789ABCDE = vextq_f16(vi2x01234567, vi2x89ABCDEF, 7); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8() 91 vi2x01234567 = vi2x89ABCDEF; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8() 96 const float16x8_t vi2x9ABCDEFG = vextq_f16(vi2x89ABCDEF, vi2xGHIJKLMN, 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8() 106 vi2x89ABCDEF = vi2xGHIJKLMN; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8() 123 vi2x89ABCDEF = vreinterpretq_f16_u16(vandq_u16(vmask, vreinterpretq_u16_f16(vi2x89ABCDEF))); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8() 130 vo0p0 = vfmaq_lane_f16(vo0p0, vi2x89ABCDEF, vw89, 0); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8() 135 const float16x8_t vi2x789ABCDE = vextq_f16(vi2x01234567, vi2x89ABCDEF, 7); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8() 147 const float16x8_t vi2x9ABCDEFG = vextq_f16(vi2x89ABCDEF, vzero, 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8()
|
D | 3x3p1-minmax-neonfp16arith-1x8-acc4.c | 61 float16x8_t vi2x89ABCDEF = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc4() local 76 float16x8_t vo0p2 = vmulq_lane_f16(vi2x89ABCDEF, vw89, 0); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc4() 81 const float16x8_t vi2x789ABCDE = vextq_f16(vi2x01234567, vi2x89ABCDEF, 7); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc4() 91 vi2x01234567 = vi2x89ABCDEF; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc4() 96 const float16x8_t vi2x9ABCDEFG = vextq_f16(vi2x89ABCDEF, vi2xGHIJKLMN, 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc4() 106 vi2x89ABCDEF = vi2xGHIJKLMN; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc4() 126 vi2x89ABCDEF = vreinterpretq_f16_u16(vandq_u16(vmask, vreinterpretq_u16_f16(vi2x89ABCDEF))); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc4() 133 float16x8_t vo0p2 = vmulq_lane_f16(vi2x89ABCDEF, vw89, 0); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc4() 138 const float16x8_t vi2x789ABCDE = vextq_f16(vi2x01234567, vi2x89ABCDEF, 7); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc4() 150 const float16x8_t vi2x9ABCDEFG = vextq_f16(vi2x89ABCDEF, vzero, 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_1x8_acc4()
|
D | 3x3p1-minmax-neonfp16arith-2x8.c | 68 float16x8_t vi2x89ABCDEF = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8() local 86 vo1p0 = vfmaq_lane_f16(vo1p0, vi2x89ABCDEF, vget_high_f16(vw01234567), 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8() 88 vo0p0 = vfmaq_lane_f16(vo0p0, vi2x89ABCDEF, vw89, 0); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8() 94 const float16x8_t vi2x789ABCDE = vextq_f16(vi2x01234567, vi2x89ABCDEF, 7); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8() 108 vi2x01234567 = vi2x89ABCDEF; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8() 114 const float16x8_t vi2x9ABCDEFG = vextq_f16(vi2x89ABCDEF, vi2xGHIJKLMN, 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8() 128 vi2x89ABCDEF = vi2xGHIJKLMN; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8() 150 vi2x89ABCDEF = vreinterpretq_f16_u16(vandq_u16(vmask, vreinterpretq_u16_f16(vi2x89ABCDEF))); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8() 158 vo1p0 = vfmaq_lane_f16(vo1p0, vi2x89ABCDEF, vget_high_f16(vw01234567), 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8() 160 vo0p0 = vfmaq_lane_f16(vo0p0, vi2x89ABCDEF, vw89, 0); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8() [all …]
|
D | 3x3p1-minmax-neonfp16arith-2x8-acc2.c | 68 float16x8_t vi2x89ABCDEF = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8_acc2() local 86 float16x8_t vo1p1 = vmulq_lane_f16(vi2x89ABCDEF, vget_high_f16(vw01234567), 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8_acc2() 88 vo0p0 = vfmaq_lane_f16(vo0p0, vi2x89ABCDEF, vw89, 0); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8_acc2() 94 const float16x8_t vi2x789ABCDE = vextq_f16(vi2x01234567, vi2x89ABCDEF, 7); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8_acc2() 108 vi2x01234567 = vi2x89ABCDEF; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8_acc2() 114 const float16x8_t vi2x9ABCDEFG = vextq_f16(vi2x89ABCDEF, vi2xGHIJKLMN, 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8_acc2() 128 vi2x89ABCDEF = vi2xGHIJKLMN; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8_acc2() 152 vi2x89ABCDEF = vreinterpretq_f16_u16(vandq_u16(vmask, vreinterpretq_u16_f16(vi2x89ABCDEF))); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8_acc2() 160 float16x8_t vo1p1 = vmulq_lane_f16(vi2x89ABCDEF, vget_high_f16(vw01234567), 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8_acc2() 162 vo0p0 = vfmaq_lane_f16(vo0p0, vi2x89ABCDEF, vw89, 0); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_2x8_acc2() [all …]
|
D | 3x3p1-minmax-neonfp16arith-3x8.c | 75 float16x8_t vi2x89ABCDEF = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_3x8() local 94 vo2p0 = vfmaq_lane_f16(vo2p0, vi2x89ABCDEF, vget_low_f16(vw01234567), 2); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_3x8() 97 vo1p0 = vfmaq_lane_f16(vo1p0, vi2x89ABCDEF, vget_high_f16(vw01234567), 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_3x8() 100 vo0p0 = vfmaq_lane_f16(vo0p0, vi2x89ABCDEF, vw89, 0); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_3x8() 107 const float16x8_t vi2x789ABCDE = vextq_f16(vi2x01234567, vi2x89ABCDEF, 7); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_3x8() 125 vi2x01234567 = vi2x89ABCDEF; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_3x8() 132 const float16x8_t vi2x9ABCDEFG = vextq_f16(vi2x89ABCDEF, vi2xGHIJKLMN, 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_3x8() 150 vi2x89ABCDEF = vi2xGHIJKLMN; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_3x8() 177 vi2x89ABCDEF = vreinterpretq_f16_u16(vandq_u16(vmask, vreinterpretq_u16_f16(vi2x89ABCDEF))); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_3x8() 184 vo2p0 = vfmaq_lane_f16(vo2p0, vi2x89ABCDEF, vget_low_f16(vw01234567), 2); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_3x8() [all …]
|
D | 3x3p1-minmax-neonfp16arith-4x8.c | 82 float16x8_t vi2x89ABCDEF = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_4x8() local 104 vo2p0 = vfmaq_lane_f16(vo2p0, vi2x89ABCDEF, vget_low_f16(vw01234567), 2); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_4x8() 108 vo1p0 = vfmaq_lane_f16(vo1p0, vi2x89ABCDEF, vget_high_f16(vw01234567), 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_4x8() 112 vo0p0 = vfmaq_lane_f16(vo0p0, vi2x89ABCDEF, vw89, 0); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_4x8() 120 const float16x8_t vi2x789ABCDE = vextq_f16(vi2x01234567, vi2x89ABCDEF, 7); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_4x8() 142 vi2x01234567 = vi2x89ABCDEF; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_4x8() 150 const float16x8_t vi2x9ABCDEFG = vextq_f16(vi2x89ABCDEF, vi2xGHIJKLMN, 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_4x8() 172 vi2x89ABCDEF = vi2xGHIJKLMN; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_4x8() 204 vi2x89ABCDEF = vreinterpretq_f16_u16(vandq_u16(vmask, vreinterpretq_u16_f16(vi2x89ABCDEF))); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_4x8() 212 vo2p0 = vfmaq_lane_f16(vo2p0, vi2x89ABCDEF, vget_low_f16(vw01234567), 2); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_4x8() [all …]
|
D | 3x3p1-minmax-neonfp16arith-5x8.c | 89 float16x8_t vi2x89ABCDEF = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8() local 114 vo2p0 = vfmaq_lane_f16(vo2p0, vi2x89ABCDEF, vget_low_f16(vw01234567), 2); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8() 119 vo1p0 = vfmaq_lane_f16(vo1p0, vi2x89ABCDEF, vget_high_f16(vw01234567), 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8() 124 vo0p0 = vfmaq_lane_f16(vo0p0, vi2x89ABCDEF, vw89, 0); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8() 133 const float16x8_t vi2x789ABCDE = vextq_f16(vi2x01234567, vi2x89ABCDEF, 7); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8() 159 vi2x01234567 = vi2x89ABCDEF; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8() 168 const float16x8_t vi2x9ABCDEFG = vextq_f16(vi2x89ABCDEF, vi2xGHIJKLMN, 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8() 194 vi2x89ABCDEF = vi2xGHIJKLMN; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8() 231 vi2x89ABCDEF = vreinterpretq_f16_u16(vandq_u16(vmask, vreinterpretq_u16_f16(vi2x89ABCDEF))); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8() 240 vo2p0 = vfmaq_lane_f16(vo2p0, vi2x89ABCDEF, vget_low_f16(vw01234567), 2); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_5x8() [all …]
|
D | 3x3p1-minmax-neonfp16arith-6x8.c | 96 float16x8_t vi2x89ABCDEF = vld1q_f16(i2); i2 += 8; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8() local 124 vo2p0 = vfmaq_lane_f16(vo2p0, vi2x89ABCDEF, vget_low_f16(vw01234567), 2); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8() 130 vo1p0 = vfmaq_lane_f16(vo1p0, vi2x89ABCDEF, vget_high_f16(vw01234567), 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8() 136 vo0p0 = vfmaq_lane_f16(vo0p0, vi2x89ABCDEF, vw89, 0); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8() 146 const float16x8_t vi2x789ABCDE = vextq_f16(vi2x01234567, vi2x89ABCDEF, 7); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8() 176 vi2x01234567 = vi2x89ABCDEF; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8() 186 const float16x8_t vi2x9ABCDEFG = vextq_f16(vi2x89ABCDEF, vi2xGHIJKLMN, 1); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8() 216 vi2x89ABCDEF = vi2xGHIJKLMN; in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8() 258 vi2x89ABCDEF = vreinterpretq_f16_u16(vandq_u16(vmask, vreinterpretq_u16_f16(vi2x89ABCDEF))); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8() 268 vo2p0 = vfmaq_lane_f16(vo2p0, vi2x89ABCDEF, vget_low_f16(vw01234567), 2); in xnn_f16_dwconv2d_chw_ukernel_3x3p1__neonfp16arith_6x8() [all …]
|
/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7p7x-minmax-fp32-sse2-c16.c | 59 const __m128i vi2x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i2 + 8)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() local 66 const __m128i vxi2x89ABCDEF = _mm_srai_epi16(_mm_unpacklo_epi8(vi2x89ABCDEF, vi2x89ABCDEF), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() 147 const __m128i vi2x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i2 + 8)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() local 154 … const __m128i vxi2x89ABCDEF = _mm_srai_epi16(_mm_unpacklo_epi8(vi2x89ABCDEF, vi2x89ABCDEF), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() 255 const __m128i vi2x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i2 + 8)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() local 262 const __m128i vxi2x89ABCDEF = _mm_srai_epi16(_mm_unpacklo_epi8(vi2x89ABCDEF, vi2x89ABCDEF), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16()
|
D | 7p7x-minmax-rndnu-neon-c16.c | 51 const int8x8_t vi2x89ABCDEF = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c16() local 57 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi2x89ABCDEF); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c16() 103 const int8x8_t vi2x89ABCDEF = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c16() local 109 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi2x89ABCDEF); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c16() 181 const int8x8_t vi2x89ABCDEF = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c16() local 187 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi2x89ABCDEF); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c16()
|
D | 7p7x-minmax-fp32-neon-c16.c | 51 const int8x8_t vi2x89ABCDEF = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c16() local 57 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi2x89ABCDEF); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c16() 103 const int8x8_t vi2x89ABCDEF = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c16() local 109 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi2x89ABCDEF); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c16() 180 const int8x8_t vi2x89ABCDEF = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c16() local 186 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi2x89ABCDEF); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c16()
|
D | 7p7x-minmax-fp32-neonv8-c16.c | 52 const int8x8_t vi2x89ABCDEF = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c16() local 58 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi2x89ABCDEF); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c16() 104 const int8x8_t vi2x89ABCDEF = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c16() local 110 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi2x89ABCDEF); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c16() 180 const int8x8_t vi2x89ABCDEF = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c16() local 186 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi2x89ABCDEF); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c16()
|
D | 7p7x-minmax-fp32-sse2-c24.c | 62 const __m128i vi2x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i2 + 8)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24() local 71 const __m128i vxi2x89ABCDEF = _mm_srai_epi16(_mm_unpacklo_epi8(vi2x89ABCDEF, vi2x89ABCDEF), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24() 231 const __m128i vi2x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i2 + 8)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24() local 240 … const __m128i vxi2x89ABCDEF = _mm_srai_epi16(_mm_unpacklo_epi8(vi2x89ABCDEF, vi2x89ABCDEF), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24() 420 const __m128i vi2x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i2 + 8)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24() local 429 const __m128i vxi2x89ABCDEF = _mm_srai_epi16(_mm_unpacklo_epi8(vi2x89ABCDEF, vi2x89ABCDEF), 8); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c24()
|
D | 7p7x-minmax-rndnu-neon-c24.c | 53 const int8x8_t vi2x89ABCDEF = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local 61 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi2x89ABCDEF); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() 148 const int8x8_t vi2x89ABCDEF = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local 156 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi2x89ABCDEF); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() 273 const int8x8_t vi2x89ABCDEF = vld1_s8(i2); i2 += 8; in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local 281 vsum89ABCDEF = vaddw_s8(vsum89ABCDEF, vi2x89ABCDEF); in xnn_qs8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24()
|
/external/XNNPACK/src/f16-gavgpool/gen/ |
D | 7p7x-minmax-neonfp16arith-c16.c | 50 const float16x8_t vi2x89ABCDEF = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() local 56 vacc89ABCDEF = vaddq_f16(vacc89ABCDEF, vi2x89ABCDEF); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() 122 const float16x8_t vi2x89ABCDEF = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() local 127 vacc89ABCDEF = vaddq_f16(vacc89ABCDEF, vi2x89ABCDEF); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() 214 const float16x8_t vi2x89ABCDEF = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16() local 219 vacc89ABCDEF = vaddq_f16(vacc89ABCDEF, vi2x89ABCDEF); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c16()
|
D | 7p7x-minmax-neonfp16arith-c24.c | 52 const float16x8_t vi2x89ABCDEF = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() local 60 vacc89ABCDEF = vaddq_f16(vacc89ABCDEF, vi2x89ABCDEF); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() 140 const float16x8_t vi2x89ABCDEF = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() local 147 vacc89ABCDEF = vaddq_f16(vacc89ABCDEF, vi2x89ABCDEF); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() 248 const float16x8_t vi2x89ABCDEF = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24() local 255 vacc89ABCDEF = vaddq_f16(vacc89ABCDEF, vi2x89ABCDEF); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c24()
|
D | 7p7x-minmax-f16c-c16.c | 51 const __m256 vi2x89ABCDEF = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i2)); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() local 57 …vacc89ABCDEF = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc89ABCDEF), vi2x89ABCDEF), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() 123 const __m256 vi2x89ABCDEF = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i2)); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() local 128 …vacc89ABCDEF = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc89ABCDEF), vi2x89ABCDEF), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() 217 const __m256 vi2x89ABCDEF = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*) i2)); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16() local 222 …vacc89ABCDEF = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vacc89ABCDEF), vi2x89ABCDEF), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c16()
|
D | 7p7x-minmax-neonfp16arith-c32.c | 54 const float16x8_t vi2x89ABCDEF = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() local 64 vacc89ABCDEF = vaddq_f16(vacc89ABCDEF, vi2x89ABCDEF); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() 158 const float16x8_t vi2x89ABCDEF = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() local 167 vacc89ABCDEF = vaddq_f16(vacc89ABCDEF, vi2x89ABCDEF); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() 282 const float16x8_t vi2x89ABCDEF = vld1q_f16(i2); i2 += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() local 291 vacc89ABCDEF = vaddq_f16(vacc89ABCDEF, vi2x89ABCDEF); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32()
|
/external/XNNPACK/src/qu8-gavgpool/gen/ |
D | 7p7x-minmax-fp32-neon-c16.c | 51 const uint8x8_t vi2x89ABCDEF = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c16() local 57 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi2x89ABCDEF); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c16() 103 const uint8x8_t vi2x89ABCDEF = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c16() local 109 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi2x89ABCDEF); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c16() 180 const uint8x8_t vi2x89ABCDEF = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c16() local 186 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi2x89ABCDEF); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neon_c16()
|
D | 7p7x-minmax-rndnu-neon-c16.c | 51 const uint8x8_t vi2x89ABCDEF = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c16() local 57 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi2x89ABCDEF); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c16() 103 const uint8x8_t vi2x89ABCDEF = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c16() local 109 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi2x89ABCDEF); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c16() 181 const uint8x8_t vi2x89ABCDEF = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c16() local 187 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi2x89ABCDEF); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c16()
|
D | 7p7x-minmax-fp32-neonv8-c16.c | 52 const uint8x8_t vi2x89ABCDEF = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c16() local 58 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi2x89ABCDEF); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c16() 104 const uint8x8_t vi2x89ABCDEF = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c16() local 110 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi2x89ABCDEF); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c16() 180 const uint8x8_t vi2x89ABCDEF = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c16() local 186 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi2x89ABCDEF); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__neonv8_c16()
|
D | 7p7x-minmax-fp32-sse2-c16.c | 60 const __m128i vi2x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i2 + 8)); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() local 67 const __m128i vxi2x89ABCDEF = _mm_unpacklo_epi8(vi2x89ABCDEF, vzero); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() 146 const __m128i vi2x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i2 + 8)); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() local 153 const __m128i vxi2x89ABCDEF = _mm_unpacklo_epi8(vi2x89ABCDEF, vzero); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() 252 const __m128i vi2x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i2 + 8)); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16() local 259 const __m128i vxi2x89ABCDEF = _mm_unpacklo_epi8(vi2x89ABCDEF, vzero); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__sse2_c16()
|
D | 7p7x-minmax-rndnu-neon-c24.c | 53 const uint8x8_t vi2x89ABCDEF = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local 61 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi2x89ABCDEF); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() 148 const uint8x8_t vi2x89ABCDEF = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local 156 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi2x89ABCDEF); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() 273 const uint8x8_t vi2x89ABCDEF = vld1_u8(i2); i2 += 8; in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24() local 281 vsum89ABCDEF = vaddw_u8(vsum89ABCDEF, vi2x89ABCDEF); in xnn_qu8_gavgpool_minmax_rndnu_ukernel_7p7x__neon_c24()
|