Home
last modified time | relevance | path

Searched refs:vget_high_s16 (Results 1 – 25 of 183) sorted by relevance

12345678

/external/XNNPACK/src/qs8-gemm/gen/
D4x16-minmax-neon-mlal-lane.c93 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane()
95 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane()
97 vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa2), 0); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane()
99 vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa3), 0); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane()
104 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane()
106 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane()
108 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane()
110 vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane()
115 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane()
117 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa1), 1); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane()
[all …]
D3x16-minmax-neon-mlal-lane.c81 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane()
83 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane()
85 vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa2), 0); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane()
90 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane()
92 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane()
94 vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane()
99 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane()
101 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa1), 1); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane()
103 vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa2), 1); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane()
108 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane()
[all …]
D4x8-minmax-neon-mlal-lane.c85 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane()
87 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane()
89 vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa2), 0); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane()
91 vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa3), 0); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane()
96 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane()
98 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa1), 1); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane()
100 vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa2), 1); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane()
102 vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa3), 1); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane()
107 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa0), 2); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane()
109 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa1), 2); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane()
[all …]
D2x16-minmax-neon-mlal-lane.c69 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane()
71 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane()
76 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane()
78 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane()
83 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane()
85 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa1), 1); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane()
90 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane()
92 vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane()
97 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa0), 2); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane()
99 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa1), 2); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane()
[all …]
D3x8-minmax-neon-mlal-lane.c75 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane()
77 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane()
79 vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa2), 0); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane()
84 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane()
86 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa1), 1); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane()
88 vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa2), 1); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane()
93 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa0), 2); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane()
95 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa1), 2); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane()
97 vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa2), 2); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane()
102 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa0), 3); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane()
[all …]
D2x8-minmax-neon-mlal-lane.c65 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
67 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
72 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
74 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa1), 1); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
79 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa0), 2); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
81 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa1), 2); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
86 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa0), 3); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
88 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa1), 3); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
93 vacc0x0123 = vmlal_lane_s16(vacc0x0123, vget_low_s16(vxb01234567c4), vget_high_s16(vxa0), 0); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
94 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c4), vget_high_s16(vxa0), 0); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane()
[all …]
D1x16-minmax-neon-mlal-lane.c57 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane()
62 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane()
67 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane()
72 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane()
77 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa0), 2); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane()
82 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc2), vget_low_s16(vxa0), 2); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane()
87 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa0), 3); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane()
92 vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc3), vget_low_s16(vxa0), 3); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane()
97 vacc0x0123 = vmlal_lane_s16(vacc0x0123, vget_low_s16(vxb01234567c4), vget_high_s16(vxa0), 0); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane()
98 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c4), vget_high_s16(vxa0), 0); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane()
[all …]
D4x16-minmax-neon-mull-addw-dup.c89 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
92 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
95 vacc2x4567 = vaddw_s16(vacc2x4567, vget_high_s16(vprod2x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
98 vacc3x4567 = vaddw_s16(vacc3x4567, vget_high_s16(vprod3x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
103 vacc0xCDEF = vaddw_s16(vacc0xCDEF, vget_high_s16(vprod0x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
106 vacc1xCDEF = vaddw_s16(vacc1xCDEF, vget_high_s16(vprod1x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
109 vacc2xCDEF = vaddw_s16(vacc2xCDEF, vget_high_s16(vprod2x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
112 vacc3xCDEF = vaddw_s16(vacc3xCDEF, vget_high_s16(vprod3x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
117 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
120 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
[all …]
/external/XNNPACK/src/qu8-gemm/
D8x8-minmax-neon.c122 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
124 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
126 vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa2), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
128 vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa3), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
130 vacc4x4567 = vmlal_lane_s16(vacc4x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa4), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
132 vacc5x4567 = vmlal_lane_s16(vacc5x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa5), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
134 vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa6), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
136 vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa7), 0); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
142 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa0), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
144 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa1), 1); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
[all …]
D4x8-minmax-neon.c82 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_qu8_gemm_minmax_ukernel_4x8__neon()
84 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_qu8_gemm_minmax_ukernel_4x8__neon()
86 vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa2), 0); in xnn_qu8_gemm_minmax_ukernel_4x8__neon()
88 vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa3), 0); in xnn_qu8_gemm_minmax_ukernel_4x8__neon()
94 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa0), 1); in xnn_qu8_gemm_minmax_ukernel_4x8__neon()
96 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa1), 1); in xnn_qu8_gemm_minmax_ukernel_4x8__neon()
98 vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa2), 1); in xnn_qu8_gemm_minmax_ukernel_4x8__neon()
100 vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa3), 1); in xnn_qu8_gemm_minmax_ukernel_4x8__neon()
106 vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa0), 2); in xnn_qu8_gemm_minmax_ukernel_4x8__neon()
108 vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa1), 2); in xnn_qu8_gemm_minmax_ukernel_4x8__neon()
[all …]
/external/XNNPACK/src/qu8-igemm/
D8x8-minmax-neon.c148 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa0), 0); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
150 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa1), 0); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
152 … vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa2), 0); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
154 … vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa3), 0); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
156 … vacc4x4567 = vmlal_lane_s16(vacc4x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa4), 0); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
158 … vacc5x4567 = vmlal_lane_s16(vacc5x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa5), 0); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
160 … vacc6x4567 = vmlal_lane_s16(vacc6x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa6), 0); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
162 … vacc7x4567 = vmlal_lane_s16(vacc7x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa7), 0); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
170 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa0), 1); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
172 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa1), 1); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
[all …]
D4x8-minmax-neon.c100 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa0), 0); in xnn_qu8_igemm_minmax_ukernel_4x8__neon()
102 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa1), 0); in xnn_qu8_igemm_minmax_ukernel_4x8__neon()
104 … vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa2), 0); in xnn_qu8_igemm_minmax_ukernel_4x8__neon()
106 … vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa3), 0); in xnn_qu8_igemm_minmax_ukernel_4x8__neon()
114 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa0), 1); in xnn_qu8_igemm_minmax_ukernel_4x8__neon()
116 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa1), 1); in xnn_qu8_igemm_minmax_ukernel_4x8__neon()
118 … vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa2), 1); in xnn_qu8_igemm_minmax_ukernel_4x8__neon()
120 … vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa3), 1); in xnn_qu8_igemm_minmax_ukernel_4x8__neon()
128 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa0), 2); in xnn_qu8_igemm_minmax_ukernel_4x8__neon()
130 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567), vget_low_s16(vxa1), 2); in xnn_qu8_igemm_minmax_ukernel_4x8__neon()
[all …]
/external/XNNPACK/src/qs8-igemm/gen/
D4x16-minmax-neon-mlal-lane.c110 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane()
112 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane()
114 … vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa2), 0); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane()
116 … vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa3), 0); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane()
121 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane()
123 … vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane()
125 … vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane()
127 … vacc3xCDEF = vmlal_lane_s16(vacc3xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa3), 0); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane()
132 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa0), 1); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane()
134 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa1), 1); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane()
[all …]
D3x16-minmax-neon-mlal-lane.c96 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane()
98 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane()
100 … vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa2), 0); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane()
105 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane()
107 … vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane()
109 … vacc2xCDEF = vmlal_lane_s16(vacc2xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa2), 0); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane()
114 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa0), 1); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane()
116 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa1), 1); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane()
118 … vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa2), 1); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane()
123 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane()
[all …]
D4x8-minmax-neon-mlal-lane.c102 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane()
104 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane()
106 … vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa2), 0); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane()
108 … vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa3), 0); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane()
113 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa0), 1); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane()
115 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa1), 1); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane()
117 … vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa2), 1); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane()
119 … vacc3x4567 = vmlal_lane_s16(vacc3x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa3), 1); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane()
124 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa0), 2); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane()
126 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa1), 2); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane()
[all …]
D2x16-minmax-neon-mlal-lane.c82 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane()
84 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane()
89 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane()
91 … vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa1), 0); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane()
96 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa0), 1); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane()
98 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa1), 1); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane()
103 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane()
105 … vacc1xCDEF = vmlal_lane_s16(vacc1xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa1), 1); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane()
110 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa0), 2); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane()
112 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa1), 2); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane()
[all …]
D3x8-minmax-neon-mlal-lane.c90 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane()
92 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane()
94 … vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa2), 0); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane()
99 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa0), 1); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane()
101 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa1), 1); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane()
103 … vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa2), 1); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane()
108 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa0), 2); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane()
110 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa1), 2); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane()
112 … vacc2x4567 = vmlal_lane_s16(vacc2x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa2), 2); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane()
117 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa0), 3); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane()
[all …]
D2x8-minmax-neon-mlal-lane.c78 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane()
80 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa1), 0); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane()
85 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa0), 1); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane()
87 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa1), 1); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane()
92 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa0), 2); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane()
94 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa1), 2); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane()
99 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa0), 3); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane()
101 … vacc1x4567 = vmlal_lane_s16(vacc1x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa1), 3); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane()
106 … vacc0x0123 = vmlal_lane_s16(vacc0x0123, vget_low_s16(vxb01234567c4), vget_high_s16(vxa0), 0); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane()
107 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c4), vget_high_s16(vxa0), 0); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane()
[all …]
D1x16-minmax-neon-mlal-lane.c68 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c0), vget_low_s16(vxa0), 0); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane()
73 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc0), vget_low_s16(vxa0), 0); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane()
78 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c1), vget_low_s16(vxa0), 1); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane()
83 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc1), vget_low_s16(vxa0), 1); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane()
88 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c2), vget_low_s16(vxa0), 2); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane()
93 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc2), vget_low_s16(vxa0), 2); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane()
98 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c3), vget_low_s16(vxa0), 3); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane()
103 … vacc0xCDEF = vmlal_lane_s16(vacc0xCDEF, vget_high_s16(vxb89ABCDEFc3), vget_low_s16(vxa0), 3); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane()
108 … vacc0x0123 = vmlal_lane_s16(vacc0x0123, vget_low_s16(vxb01234567c4), vget_high_s16(vxa0), 0); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane()
109 … vacc0x4567 = vmlal_lane_s16(vacc0x4567, vget_high_s16(vxb01234567c4), vget_high_s16(vxa0), 0); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane()
[all …]
D4x16-minmax-neon-mull-addw-dup.c106 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
109 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
112 vacc2x4567 = vaddw_s16(vacc2x4567, vget_high_s16(vprod2x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
115 vacc3x4567 = vaddw_s16(vacc3x4567, vget_high_s16(vprod3x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
120 vacc0xCDEF = vaddw_s16(vacc0xCDEF, vget_high_s16(vprod0x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
123 vacc1xCDEF = vaddw_s16(vacc1xCDEF, vget_high_s16(vprod1x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
126 vacc2xCDEF = vaddw_s16(vacc2xCDEF, vget_high_s16(vprod2x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
129 vacc3xCDEF = vaddw_s16(vacc3xCDEF, vget_high_s16(vprod3x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
134 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
137 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
[all …]
D3x16-minmax-neon-mull-addw-dup.c93 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
96 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
99 vacc2x4567 = vaddw_s16(vacc2x4567, vget_high_s16(vprod2x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
104 vacc0xCDEF = vaddw_s16(vacc0xCDEF, vget_high_s16(vprod0x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
107 vacc1xCDEF = vaddw_s16(vacc1xCDEF, vget_high_s16(vprod1x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
110 vacc2xCDEF = vaddw_s16(vacc2xCDEF, vget_high_s16(vprod2x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
115 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
118 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
121 vacc2x4567 = vaddw_s16(vacc2x4567, vget_high_s16(vprod2x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
126 vacc0xCDEF = vaddw_s16(vacc0xCDEF, vget_high_s16(vprod0x89ABCDEFc1)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
[all …]
/external/XNNPACK/src/qs8-dwconv/gen/
Dup32x9-minmax-neon-mul16.c109 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
111 vaccCDEF = vmlal_s16(vaccCDEF, vget_high_s16(vi0x89ABCDEF), vget_high_s16(vk0x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
113 vaccKLMN = vmlal_s16(vaccKLMN, vget_high_s16(vi0xGHIJKLMN), vget_high_s16(vk0xGHIJKLMN)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
115 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi0xOPQRSTUV), vget_high_s16(vk0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
127 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
129 vaccCDEF = vmlal_s16(vaccCDEF, vget_high_s16(vi1x89ABCDEF), vget_high_s16(vk1x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
131 vaccKLMN = vmlal_s16(vaccKLMN, vget_high_s16(vi1xGHIJKLMN), vget_high_s16(vk1xGHIJKLMN)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
133 vaccSTUV = vmlal_s16(vaccSTUV, vget_high_s16(vi1xOPQRSTUV), vget_high_s16(vk1xOPQRSTUV)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
145 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
147 vaccCDEF = vmlal_s16(vaccCDEF, vget_high_s16(vi2x89ABCDEF), vget_high_s16(vk2x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
[all …]
Dup24x9-minmax-neon-mul16.c105 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
107 vaccCDEF = vmlal_s16(vaccCDEF, vget_high_s16(vi0x89ABCDEF), vget_high_s16(vk0x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
109 vaccKLMN = vmlal_s16(vaccKLMN, vget_high_s16(vi0xGHIJKLMN), vget_high_s16(vk0xGHIJKLMN)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
119 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
121 vaccCDEF = vmlal_s16(vaccCDEF, vget_high_s16(vi1x89ABCDEF), vget_high_s16(vk1x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
123 vaccKLMN = vmlal_s16(vaccKLMN, vget_high_s16(vi1xGHIJKLMN), vget_high_s16(vk1xGHIJKLMN)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
133 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
135 vaccCDEF = vmlal_s16(vaccCDEF, vget_high_s16(vi2x89ABCDEF), vget_high_s16(vk2x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
137 vaccKLMN = vmlal_s16(vaccKLMN, vget_high_s16(vi2xGHIJKLMN), vget_high_s16(vk2xGHIJKLMN)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
147 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
[all …]
Dup16x9-minmax-neon-mul16.c101 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
103 vaccCDEF = vmlal_s16(vaccCDEF, vget_high_s16(vi0x89ABCDEF), vget_high_s16(vk0x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
111 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
113 vaccCDEF = vmlal_s16(vaccCDEF, vget_high_s16(vi1x89ABCDEF), vget_high_s16(vk1x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
121 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
123 vaccCDEF = vmlal_s16(vaccCDEF, vget_high_s16(vi2x89ABCDEF), vget_high_s16(vk2x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
131 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
133 vaccCDEF = vmlal_s16(vaccCDEF, vget_high_s16(vi3x89ABCDEF), vget_high_s16(vk3x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
141 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
143 vaccCDEF = vmlal_s16(vaccCDEF, vget_high_s16(vi4x89ABCDEF), vget_high_s16(vk4x89ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
[all …]
/external/libhevc/common/arm/
Dihevc_resi_trans_neon_32x32.c137 vget_high_s16(diff_16[2][0]), vget_low_s16(diff_16[2][0])); in ihevc_resi_trans_32x32_neon()
141 vget_high_s16(diff_16[3][0]), vget_low_s16(diff_16[3][0])); in ihevc_resi_trans_32x32_neon()
162 vget_high_s16(diff_16[2][1]), vget_low_s16(diff_16[2][1])); in ihevc_resi_trans_32x32_neon()
166 vget_high_s16(diff_16[3][1]), vget_low_s16(diff_16[3][1])); in ihevc_resi_trans_32x32_neon()
239 e0_1 = vcombine_s16(vget_high_s16(e0_1), vget_low_s16(e0_1)); in ihevc_resi_trans_32x32_neon()
244 e1_1 = vcombine_s16(vget_high_s16(e1_1), vget_low_s16(e1_1)); in ihevc_resi_trans_32x32_neon()
256 vrev64_s16(vget_high_s16(ee0)), vrev64_s16(vget_high_s16(ee1))); in ihevc_resi_trans_32x32_neon()
266 vreinterpret_s32_s16(vget_high_s16(eee))); in ihevc_resi_trans_32x32_neon()
274 vtrn_s16(vget_low_s16(eeee), vget_high_s16(eeee)); in ihevc_resi_trans_32x32_neon()
288 a[0].val[0], vget_high_s16(g_ai2_ihevc_trans_32_01_8), eeee_01); in ihevc_resi_trans_32x32_neon()
[all …]

12345678