Home
last modified time | relevance | path

Searched refs:vdup_lane_s8 (Results 1 – 22 of 22) sorted by relevance

/external/XNNPACK/src/qs8-igemm/gen/
D4x16-minmax-neon-mull-addw-dup.c104 const int16x8_t vprod0x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va0, 0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
107 const int16x8_t vprod1x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va1, 0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
110 const int16x8_t vprod2x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va2, 0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
113 const int16x8_t vprod3x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va3, 0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
118 const int16x8_t vprod0x89ABCDEFc0 = vmull_s8(vb89ABCDEFc0, vdup_lane_s8(va0, 0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
121 const int16x8_t vprod1x89ABCDEFc0 = vmull_s8(vb89ABCDEFc0, vdup_lane_s8(va1, 0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
124 const int16x8_t vprod2x89ABCDEFc0 = vmull_s8(vb89ABCDEFc0, vdup_lane_s8(va2, 0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
127 const int16x8_t vprod3x89ABCDEFc0 = vmull_s8(vb89ABCDEFc0, vdup_lane_s8(va3, 0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
132 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
135 const int16x8_t vprod1x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va1, 1)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup()
[all …]
D3x16-minmax-neon-mull-addw-dup.c91 const int16x8_t vprod0x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va0, 0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
94 const int16x8_t vprod1x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va1, 0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
97 const int16x8_t vprod2x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va2, 0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
102 const int16x8_t vprod0x89ABCDEFc0 = vmull_s8(vb89ABCDEFc0, vdup_lane_s8(va0, 0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
105 const int16x8_t vprod1x89ABCDEFc0 = vmull_s8(vb89ABCDEFc0, vdup_lane_s8(va1, 0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
108 const int16x8_t vprod2x89ABCDEFc0 = vmull_s8(vb89ABCDEFc0, vdup_lane_s8(va2, 0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
113 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
116 const int16x8_t vprod1x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va1, 1)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
119 const int16x8_t vprod2x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va2, 1)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
124 const int16x8_t vprod0x89ABCDEFc1 = vmull_s8(vb89ABCDEFc1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup()
[all …]
D2x16-minmax-neon-mull-addw-dup.c78 const int16x8_t vprod0x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va0, 0)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup()
81 const int16x8_t vprod1x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va1, 0)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup()
86 const int16x8_t vprod0x89ABCDEFc0 = vmull_s8(vb89ABCDEFc0, vdup_lane_s8(va0, 0)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup()
89 const int16x8_t vprod1x89ABCDEFc0 = vmull_s8(vb89ABCDEFc0, vdup_lane_s8(va1, 0)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup()
94 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup()
97 const int16x8_t vprod1x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va1, 1)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup()
102 const int16x8_t vprod0x89ABCDEFc1 = vmull_s8(vb89ABCDEFc1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup()
105 const int16x8_t vprod1x89ABCDEFc1 = vmull_s8(vb89ABCDEFc1, vdup_lane_s8(va1, 1)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup()
110 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup()
113 const int16x8_t vprod1x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va1, 2)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup()
[all …]
D4x8-minmax-neon-mull-addw-dup.c96 const int16x8_t vprod0x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va0, 0)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup()
99 const int16x8_t vprod1x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va1, 0)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup()
102 const int16x8_t vprod2x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va2, 0)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup()
105 const int16x8_t vprod3x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va3, 0)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup()
110 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup()
113 const int16x8_t vprod1x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va1, 1)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup()
116 const int16x8_t vprod2x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va2, 1)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup()
119 const int16x8_t vprod3x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va3, 1)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup()
124 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup()
127 const int16x8_t vprod1x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va1, 2)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup()
[all …]
D3x8-minmax-neon-mull-addw-dup.c85 const int16x8_t vprod0x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va0, 0)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup()
88 const int16x8_t vprod1x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va1, 0)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup()
91 const int16x8_t vprod2x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va2, 0)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup()
96 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup()
99 const int16x8_t vprod1x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va1, 1)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup()
102 const int16x8_t vprod2x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va2, 1)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup()
107 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup()
110 const int16x8_t vprod1x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va1, 2)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup()
113 const int16x8_t vprod2x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va2, 2)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup()
118 const int16x8_t vprod0x01234567c3 = vmull_s8(vb01234567c3, vdup_lane_s8(va0, 3)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup()
[all …]
D2x8-minmax-neon-mull-addw-dup.c74 const int16x8_t vprod0x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va0, 0)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
77 const int16x8_t vprod1x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va1, 0)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
82 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
85 const int16x8_t vprod1x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va1, 1)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
90 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
93 const int16x8_t vprod1x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va1, 2)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
98 const int16x8_t vprod0x01234567c3 = vmull_s8(vb01234567c3, vdup_lane_s8(va0, 3)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
101 const int16x8_t vprod1x01234567c3 = vmull_s8(vb01234567c3, vdup_lane_s8(va1, 3)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
106 const int16x8_t vprod0x01234567c4 = vmull_s8(vb01234567c4, vdup_lane_s8(va0, 4)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
109 const int16x8_t vprod1x01234567c4 = vmull_s8(vb01234567c4, vdup_lane_s8(va1, 4)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup()
[all …]
D1x16-minmax-neon-mull-addw-dup.c65 const int16x8_t vprod0x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va0, 0)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup()
70 const int16x8_t vprod0x89ABCDEFc0 = vmull_s8(vb89ABCDEFc0, vdup_lane_s8(va0, 0)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup()
75 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup()
80 const int16x8_t vprod0x89ABCDEFc1 = vmull_s8(vb89ABCDEFc1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup()
85 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup()
90 const int16x8_t vprod0x89ABCDEFc2 = vmull_s8(vb89ABCDEFc2, vdup_lane_s8(va0, 2)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup()
95 const int16x8_t vprod0x01234567c3 = vmull_s8(vb01234567c3, vdup_lane_s8(va0, 3)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup()
100 const int16x8_t vprod0x89ABCDEFc3 = vmull_s8(vb89ABCDEFc3, vdup_lane_s8(va0, 3)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup()
105 const int16x8_t vprod0x01234567c4 = vmull_s8(vb01234567c4, vdup_lane_s8(va0, 4)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup()
110 const int16x8_t vprod0x89ABCDEFc4 = vmull_s8(vb89ABCDEFc4, vdup_lane_s8(va0, 4)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup()
[all …]
D1x8-minmax-neon-mull-addw-dup.c63 const int16x8_t vprod0x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va0, 0)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
68 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
73 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
78 const int16x8_t vprod0x01234567c3 = vmull_s8(vb01234567c3, vdup_lane_s8(va0, 3)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
83 const int16x8_t vprod0x01234567c4 = vmull_s8(vb01234567c4, vdup_lane_s8(va0, 4)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
88 const int16x8_t vprod0x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va0, 5)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
93 const int16x8_t vprod0x01234567c6 = vmull_s8(vb01234567c6, vdup_lane_s8(va0, 6)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
98 const int16x8_t vprod0x01234567c7 = vmull_s8(vb01234567c7, vdup_lane_s8(va0, 7)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
109 const int16x8_t vprod0x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va0, 0)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
116 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup()
[all …]
/external/XNNPACK/src/qs8-gemm/gen/
D4x16-minmax-neon-mull-addw-dup.c87 const int16x8_t vprod0x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va0, 0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
90 const int16x8_t vprod1x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va1, 0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
93 const int16x8_t vprod2x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va2, 0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
96 const int16x8_t vprod3x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va3, 0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
101 const int16x8_t vprod0x89ABCDEFc0 = vmull_s8(vb89ABCDEFc0, vdup_lane_s8(va0, 0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
104 const int16x8_t vprod1x89ABCDEFc0 = vmull_s8(vb89ABCDEFc0, vdup_lane_s8(va1, 0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
107 const int16x8_t vprod2x89ABCDEFc0 = vmull_s8(vb89ABCDEFc0, vdup_lane_s8(va2, 0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
110 const int16x8_t vprod3x89ABCDEFc0 = vmull_s8(vb89ABCDEFc0, vdup_lane_s8(va3, 0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
115 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
118 const int16x8_t vprod1x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va1, 1)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup()
[all …]
D3x16-minmax-neon-mull-addw-dup.c76 const int16x8_t vprod0x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va0, 0)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup()
79 const int16x8_t vprod1x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va1, 0)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup()
82 const int16x8_t vprod2x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va2, 0)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup()
87 const int16x8_t vprod0x89ABCDEFc0 = vmull_s8(vb89ABCDEFc0, vdup_lane_s8(va0, 0)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup()
90 const int16x8_t vprod1x89ABCDEFc0 = vmull_s8(vb89ABCDEFc0, vdup_lane_s8(va1, 0)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup()
93 const int16x8_t vprod2x89ABCDEFc0 = vmull_s8(vb89ABCDEFc0, vdup_lane_s8(va2, 0)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup()
98 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup()
101 const int16x8_t vprod1x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va1, 1)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup()
104 const int16x8_t vprod2x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va2, 1)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup()
109 const int16x8_t vprod0x89ABCDEFc1 = vmull_s8(vb89ABCDEFc1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup()
[all …]
D2x16-minmax-neon-mull-addw-dup.c65 const int16x8_t vprod0x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va0, 0)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup()
68 const int16x8_t vprod1x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va1, 0)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup()
73 const int16x8_t vprod0x89ABCDEFc0 = vmull_s8(vb89ABCDEFc0, vdup_lane_s8(va0, 0)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup()
76 const int16x8_t vprod1x89ABCDEFc0 = vmull_s8(vb89ABCDEFc0, vdup_lane_s8(va1, 0)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup()
81 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup()
84 const int16x8_t vprod1x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va1, 1)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup()
89 const int16x8_t vprod0x89ABCDEFc1 = vmull_s8(vb89ABCDEFc1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup()
92 const int16x8_t vprod1x89ABCDEFc1 = vmull_s8(vb89ABCDEFc1, vdup_lane_s8(va1, 1)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup()
97 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup()
100 const int16x8_t vprod1x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va1, 2)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup()
[all …]
D4x8-minmax-neon-mull-addw-dup.c79 const int16x8_t vprod0x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va0, 0)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup()
82 const int16x8_t vprod1x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va1, 0)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup()
85 const int16x8_t vprod2x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va2, 0)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup()
88 const int16x8_t vprod3x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va3, 0)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup()
93 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup()
96 const int16x8_t vprod1x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va1, 1)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup()
99 const int16x8_t vprod2x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va2, 1)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup()
102 const int16x8_t vprod3x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va3, 1)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup()
107 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup()
110 const int16x8_t vprod1x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va1, 2)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup()
[all …]
D3x8-minmax-neon-mull-addw-dup.c70 const int16x8_t vprod0x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va0, 0)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup()
73 const int16x8_t vprod1x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va1, 0)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup()
76 const int16x8_t vprod2x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va2, 0)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup()
81 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup()
84 const int16x8_t vprod1x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va1, 1)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup()
87 const int16x8_t vprod2x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va2, 1)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup()
92 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup()
95 const int16x8_t vprod1x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va1, 2)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup()
98 const int16x8_t vprod2x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va2, 2)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup()
103 const int16x8_t vprod0x01234567c3 = vmull_s8(vb01234567c3, vdup_lane_s8(va0, 3)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup()
[all …]
D2x8-minmax-neon-mull-addw-dup.c61 const int16x8_t vprod0x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va0, 0)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
64 const int16x8_t vprod1x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va1, 0)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
69 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
72 const int16x8_t vprod1x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va1, 1)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
77 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
80 const int16x8_t vprod1x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va1, 2)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
85 const int16x8_t vprod0x01234567c3 = vmull_s8(vb01234567c3, vdup_lane_s8(va0, 3)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
88 const int16x8_t vprod1x01234567c3 = vmull_s8(vb01234567c3, vdup_lane_s8(va1, 3)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
93 const int16x8_t vprod0x01234567c4 = vmull_s8(vb01234567c4, vdup_lane_s8(va0, 4)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
96 const int16x8_t vprod1x01234567c4 = vmull_s8(vb01234567c4, vdup_lane_s8(va1, 4)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup()
[all …]
D1x16-minmax-neon-mull-addw-dup.c54 const int16x8_t vprod0x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va0, 0)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup()
59 const int16x8_t vprod0x89ABCDEFc0 = vmull_s8(vb89ABCDEFc0, vdup_lane_s8(va0, 0)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup()
64 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup()
69 const int16x8_t vprod0x89ABCDEFc1 = vmull_s8(vb89ABCDEFc1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup()
74 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup()
79 const int16x8_t vprod0x89ABCDEFc2 = vmull_s8(vb89ABCDEFc2, vdup_lane_s8(va0, 2)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup()
84 const int16x8_t vprod0x01234567c3 = vmull_s8(vb01234567c3, vdup_lane_s8(va0, 3)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup()
89 const int16x8_t vprod0x89ABCDEFc3 = vmull_s8(vb89ABCDEFc3, vdup_lane_s8(va0, 3)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup()
94 const int16x8_t vprod0x01234567c4 = vmull_s8(vb01234567c4, vdup_lane_s8(va0, 4)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup()
99 const int16x8_t vprod0x89ABCDEFc4 = vmull_s8(vb89ABCDEFc4, vdup_lane_s8(va0, 4)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup()
[all …]
D1x8-minmax-neon-mull-addw-dup.c52 const int16x8_t vprod0x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va0, 0)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
57 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
62 const int16x8_t vprod0x01234567c2 = vmull_s8(vb01234567c2, vdup_lane_s8(va0, 2)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
67 const int16x8_t vprod0x01234567c3 = vmull_s8(vb01234567c3, vdup_lane_s8(va0, 3)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
72 const int16x8_t vprod0x01234567c4 = vmull_s8(vb01234567c4, vdup_lane_s8(va0, 4)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
77 const int16x8_t vprod0x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va0, 5)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
82 const int16x8_t vprod0x01234567c6 = vmull_s8(vb01234567c6, vdup_lane_s8(va0, 6)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
87 const int16x8_t vprod0x01234567c7 = vmull_s8(vb01234567c7, vdup_lane_s8(va0, 7)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
98 const int16x8_t vprod0x01234567c0 = vmull_s8(vb01234567c0, vdup_lane_s8(va0, 0)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
105 const int16x8_t vprod0x01234567c1 = vmull_s8(vb01234567c1, vdup_lane_s8(va0, 1)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup()
[all …]
/external/libvpx/libvpx/vp8/common/arm/neon/
Dsixtappredict_neon.c407 d0s8 = vdup_lane_s8(dtmps8, 0); in vp8_sixtap_predict8x4_neon()
408 d1s8 = vdup_lane_s8(dtmps8, 1); in vp8_sixtap_predict8x4_neon()
409 d2s8 = vdup_lane_s8(dtmps8, 2); in vp8_sixtap_predict8x4_neon()
410 d3s8 = vdup_lane_s8(dtmps8, 3); in vp8_sixtap_predict8x4_neon()
411 d4s8 = vdup_lane_s8(dtmps8, 4); in vp8_sixtap_predict8x4_neon()
412 d5s8 = vdup_lane_s8(dtmps8, 5); in vp8_sixtap_predict8x4_neon()
501 d0s8 = vdup_lane_s8(dtmps8, 0); in vp8_sixtap_predict8x4_neon()
502 d1s8 = vdup_lane_s8(dtmps8, 1); in vp8_sixtap_predict8x4_neon()
503 d2s8 = vdup_lane_s8(dtmps8, 2); in vp8_sixtap_predict8x4_neon()
504 d3s8 = vdup_lane_s8(dtmps8, 3); in vp8_sixtap_predict8x4_neon()
[all …]
/external/XNNPACK/src/qs8-gemm/
Dneon-mull-addw-dup.c.in76 …nt16x8_t vprod${M}x${ABC[N:N+8]}c${K} = vmull_s8(vb${ABC[N:N+8]}c${K}, vdup_lane_s8(va${M}, ${K}));
91 … const int16x8_t vprod${M}x${ABC[N:N+8]}c0 = vmull_s8(vb${ABC[N:N+8]}c0, vdup_lane_s8(va${M}, 0));
101 … const int16x8_t vprod${M}x${ABC[N:N+8]}c1 = vmull_s8(vb${ABC[N:N+8]}c1, vdup_lane_s8(va${M}, 1));
111 … const int16x8_t vprod${M}x${ABC[N:N+8]}c2 = vmull_s8(vb${ABC[N:N+8]}c2, vdup_lane_s8(va${M}, 2));
121 … const int16x8_t vprod${M}x${ABC[N:N+8]}c3 = vmull_s8(vb${ABC[N:N+8]}c3, vdup_lane_s8(va${M}, 3));
131 … const int16x8_t vprod${M}x${ABC[N:N+8]}c4 = vmull_s8(vb${ABC[N:N+8]}c4, vdup_lane_s8(va${M}, 4));
141 … const int16x8_t vprod${M}x${ABC[N:N+8]}c5 = vmull_s8(vb${ABC[N:N+8]}c5, vdup_lane_s8(va${M}, 5));
151 … const int16x8_t vprod${M}x${ABC[N:N+8]}c6 = vmull_s8(vb${ABC[N:N+8]}c6, vdup_lane_s8(va${M}, 6));
/external/XNNPACK/src/qs8-igemm/
Dneon-mull-addw-dup.c.in84 …nt16x8_t vprod${M}x${ABC[N:N+8]}c${K} = vmull_s8(vb${ABC[N:N+8]}c${K}, vdup_lane_s8(va${M}, ${K}));
99 … const int16x8_t vprod${M}x${ABC[N:N+8]}c0 = vmull_s8(vb${ABC[N:N+8]}c0, vdup_lane_s8(va${M}, 0));
109 … const int16x8_t vprod${M}x${ABC[N:N+8]}c1 = vmull_s8(vb${ABC[N:N+8]}c1, vdup_lane_s8(va${M}, 1));
119 … const int16x8_t vprod${M}x${ABC[N:N+8]}c2 = vmull_s8(vb${ABC[N:N+8]}c2, vdup_lane_s8(va${M}, 2));
129 … const int16x8_t vprod${M}x${ABC[N:N+8]}c3 = vmull_s8(vb${ABC[N:N+8]}c3, vdup_lane_s8(va${M}, 3));
139 … const int16x8_t vprod${M}x${ABC[N:N+8]}c4 = vmull_s8(vb${ABC[N:N+8]}c4, vdup_lane_s8(va${M}, 4));
149 … const int16x8_t vprod${M}x${ABC[N:N+8]}c5 = vmull_s8(vb${ABC[N:N+8]}c5, vdup_lane_s8(va${M}, 5));
159 … const int16x8_t vprod${M}x${ABC[N:N+8]}c6 = vmull_s8(vb${ABC[N:N+8]}c6, vdup_lane_s8(va${M}, 6));
/external/llvm-project/clang/test/CodeGen/
Darm_neon_intrinsics.c2443 return vdup_lane_s8(a, 7); in test_vdup_lane_s8()
/external/clang/test/CodeGen/
Darm_neon_intrinsics.c2644 return vdup_lane_s8(a, 7); in test_vdup_lane_s8()
/external/neon_2_sse/
DNEON_2_SSE.h1712 _NEON2SSESTORAGE int8x8_t vdup_lane_s8(int8x8_t vec, __constrange(0,7) int lane); // VDUP.8 d0,d0[0]
12419 _NEON2SSESTORAGE int8x8_t vdup_lane_s8(int8x8_t vec, __constrange(0,7) int lane); // VDUP.8 d0,d0[…
12420 #define vdup_lane_s8 vdup_lane_u8 macro