/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x16c4-minmax-neondot.c | 68 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() 69 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() 70 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() 71 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() 72 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() 73 const int8x16_t vb4567x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() 74 const int8x16_t vb4567x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() 75 const int8x16_t vb4567xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() 95 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() 96 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() [all …]
|
D | 1x16c16-minmax-neon-mlal-padal.c | 75 const int8x16_t va0 = vld1q_s8(a0); a0 += 16; in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() 77 const int8x16_t vb0 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() 78 const int8x16_t vb1 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() 79 const int8x16_t vb2 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() 80 const int8x16_t vb3 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() 81 const int8x16_t vb4 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() 82 const int8x16_t vb5 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() 83 const int8x16_t vb6 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() 84 const int8x16_t vb7 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() 85 const int8x16_t vb8 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() [all …]
|
D | 1x8c4-minmax-neondot.c | 66 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot() 67 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot() 68 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot() 69 const int8x16_t vb4567x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot() 85 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot() 86 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot()
|
D | 1x8c16-minmax-neon-mlal-padal.c | 67 const int8x16_t va0 = vld1q_s8(a0); a0 += 16; in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() 69 const int8x16_t vb0 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() 70 const int8x16_t vb1 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() 71 const int8x16_t vb2 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() 72 const int8x16_t vb3 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() 73 const int8x16_t vb4 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() 74 const int8x16_t vb5 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() 75 const int8x16_t vb6 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() 76 const int8x16_t vb7 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 99 const int8x16_t va0 = vld1q_s8(a0); a0 += 16; in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 100 const int8x16_t va1 = vld1q_s8(a1); a1 += 16; in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 102 const int8x16_t vb0 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 103 const int8x16_t vb1 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 104 const int8x16_t vb2 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 105 const int8x16_t vb3 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 106 const int8x16_t vb4 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 107 const int8x16_t vb5 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 108 const int8x16_t vb6 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 109 const int8x16_t vb7 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() [all …]
|
D | 4x16c4-minmax-neondot.c | 107 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 108 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 109 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 110 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 111 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 112 const int8x16_t vb4567x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 113 const int8x16_t vb4567x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 114 const int8x16_t vb4567xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 161 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 162 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() [all …]
|
D | 2x8c16-minmax-neon-mlal-padal.c | 83 const int8x16_t va0 = vld1q_s8(a0); a0 += 16; in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() 84 const int8x16_t va1 = vld1q_s8(a1); a1 += 16; in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() 86 const int8x16_t vb0 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() 87 const int8x16_t vb1 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() 88 const int8x16_t vb2 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() 89 const int8x16_t vb3 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() 90 const int8x16_t vb4 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() 91 const int8x16_t vb5 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() 92 const int8x16_t vb6 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() 93 const int8x16_t vb7 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 123 const int8x16_t va0 = vld1q_s8(a0); a0 += 16; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 124 const int8x16_t va1 = vld1q_s8(a1); a1 += 16; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 125 const int8x16_t va2 = vld1q_s8(a2); a2 += 16; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 127 const int8x16_t vb0 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 128 const int8x16_t vb1 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 129 const int8x16_t vb2 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 130 const int8x16_t vb3 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 131 const int8x16_t vb4 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 132 const int8x16_t vb5 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 133 const int8x16_t vb6 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() [all …]
|
D | 3x8c16-minmax-neon-mlal-padal.c | 99 const int8x16_t va0 = vld1q_s8(a0); a0 += 16; in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 100 const int8x16_t va1 = vld1q_s8(a1); a1 += 16; in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 101 const int8x16_t va2 = vld1q_s8(a2); a2 += 16; in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 103 const int8x16_t vb0 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 104 const int8x16_t vb1 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 105 const int8x16_t vb2 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 106 const int8x16_t vb3 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 107 const int8x16_t vb4 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 108 const int8x16_t vb5 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() 109 const int8x16_t vb6 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() [all …]
|
D | 4x8c4-minmax-neondot.c | 99 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 100 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 101 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 102 const int8x16_t vb4567x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 133 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 134 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
|
D | 6x16c4-minmax-neondot.c | 133 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 134 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 135 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 136 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 137 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 138 const int8x16_t vb4567x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 139 const int8x16_t vb4567x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 140 const int8x16_t vb4567xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 205 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 206 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() [all …]
|
D | 4x8c16-minmax-neon-mlal-padal.c | 115 const int8x16_t va0 = vld1q_s8(a0); a0 += 16; in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 116 const int8x16_t va1 = vld1q_s8(a1); a1 += 16; in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 117 const int8x16_t va2 = vld1q_s8(a2); a2 += 16; in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 118 const int8x16_t va3 = vld1q_s8(a3); a3 += 16; in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 120 const int8x16_t vb0 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 121 const int8x16_t vb1 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 122 const int8x16_t vb2 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 123 const int8x16_t vb3 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 124 const int8x16_t vb4 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() 125 const int8x16_t vb5 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal() [all …]
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x16c4-minmax-neondot.c | 59 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() 60 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() 61 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() 62 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() 63 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() 64 const int8x16_t vb4567x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() 65 const int8x16_t vb4567x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() 66 const int8x16_t vb4567xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() 86 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() 87 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() [all …]
|
D | 1x16c16-minmax-neon-mlal-padal.c | 64 const int8x16_t va0 = vld1q_s8(a0); a0 += 16; in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() 66 const int8x16_t vb0 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() 67 const int8x16_t vb1 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() 68 const int8x16_t vb2 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() 69 const int8x16_t vb3 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() 70 const int8x16_t vb4 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() 71 const int8x16_t vb5 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() 72 const int8x16_t vb6 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() 73 const int8x16_t vb7 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() 74 const int8x16_t vb8 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() [all …]
|
D | 1x8c4-minmax-neondot.c | 57 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot() 58 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot() 59 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot() 60 const int8x16_t vb4567x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot() 76 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot() 77 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot()
|
D | 1x8c16-minmax-neon-mlal-padal.c | 56 const int8x16_t va0 = vld1q_s8(a0); a0 += 16; in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() 58 const int8x16_t vb0 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() 59 const int8x16_t vb1 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() 60 const int8x16_t vb2 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() 61 const int8x16_t vb3 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() 62 const int8x16_t vb4 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() 63 const int8x16_t vb5 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() 64 const int8x16_t vb6 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() 65 const int8x16_t vb7 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 86 const int8x16_t va0 = vld1q_s8(a0); a0 += 16; in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 87 const int8x16_t va1 = vld1q_s8(a1); a1 += 16; in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 89 const int8x16_t vb0 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 90 const int8x16_t vb1 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 91 const int8x16_t vb2 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 92 const int8x16_t vb3 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 93 const int8x16_t vb4 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 94 const int8x16_t vb5 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 95 const int8x16_t vb6 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 96 const int8x16_t vb7 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() [all …]
|
D | 4x16c4-minmax-neondot.c | 92 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 93 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 94 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 95 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 96 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 97 const int8x16_t vb4567x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 98 const int8x16_t vb4567x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 99 const int8x16_t vb4567xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 146 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 147 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() [all …]
|
D | 2x8c16-minmax-neon-mlal-padal.c | 70 const int8x16_t va0 = vld1q_s8(a0); a0 += 16; in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() 71 const int8x16_t va1 = vld1q_s8(a1); a1 += 16; in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() 73 const int8x16_t vb0 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() 74 const int8x16_t vb1 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() 75 const int8x16_t vb2 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() 76 const int8x16_t vb3 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() 77 const int8x16_t vb4 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() 78 const int8x16_t vb5 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() 79 const int8x16_t vb6 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() 80 const int8x16_t vb7 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 108 const int8x16_t va0 = vld1q_s8(a0); a0 += 16; in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 109 const int8x16_t va1 = vld1q_s8(a1); a1 += 16; in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 110 const int8x16_t va2 = vld1q_s8(a2); a2 += 16; in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 112 const int8x16_t vb0 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 113 const int8x16_t vb1 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 114 const int8x16_t vb2 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 115 const int8x16_t vb3 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 116 const int8x16_t vb4 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 117 const int8x16_t vb5 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 118 const int8x16_t vb6 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() [all …]
|
D | 3x8c16-minmax-neon-mlal-padal.c | 84 const int8x16_t va0 = vld1q_s8(a0); a0 += 16; in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 85 const int8x16_t va1 = vld1q_s8(a1); a1 += 16; in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 86 const int8x16_t va2 = vld1q_s8(a2); a2 += 16; in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 88 const int8x16_t vb0 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 89 const int8x16_t vb1 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 90 const int8x16_t vb2 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 91 const int8x16_t vb3 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 92 const int8x16_t vb4 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 93 const int8x16_t vb5 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() 94 const int8x16_t vb6 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() [all …]
|
D | 4x8c4-minmax-neondot.c | 84 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 85 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 86 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 87 const int8x16_t vb4567x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 118 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 119 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
|
D | 6x16c4-minmax-neondot.c | 114 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 115 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 116 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 117 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 118 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 119 const int8x16_t vb4567x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 120 const int8x16_t vb4567x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 121 const int8x16_t vb4567xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 186 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 187 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() [all …]
|
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/ |
D | depthwiseconv_uint8_transitional.h | 3323 filter_reg_0_a = vld1q_s8(filter_workspace); 3325 filter_reg_0_b = vld1q_s8(filter_workspace); 3327 filter_reg_1_a = vld1q_s8(filter_workspace); 3329 filter_reg_1_b = vld1q_s8(filter_workspace); 3331 filter_reg_2_a = vld1q_s8(filter_workspace); 3333 filter_reg_2_b = vld1q_s8(filter_workspace); 3355 int8x16_t left_bank_0_reg = vld1q_s8(next_input_data); 3357 vld1q_s8(next_input_data + workspace_height_stride); 3359 vld1q_s8(next_input_data + 2 * workspace_height_stride); 3361 vld1q_s8(next_input_data + 3 * workspace_height_stride); [all …]
|
/external/libjpeg-turbo/simd/arm/aarch64/ |
D | jchuff-neon.c | 99 vld1q_s8((int8_t *)(block + 0 * DCTSIZE)), in jsimd_huff_encode_one_block_neon() 100 vld1q_s8((int8_t *)(block + 1 * DCTSIZE)), in jsimd_huff_encode_one_block_neon() 101 vld1q_s8((int8_t *)(block + 2 * DCTSIZE)), in jsimd_huff_encode_one_block_neon() 102 vld1q_s8((int8_t *)(block + 3 * DCTSIZE)) in jsimd_huff_encode_one_block_neon() 105 vld1q_s8((int8_t *)(block + 4 * DCTSIZE)), in jsimd_huff_encode_one_block_neon() 106 vld1q_s8((int8_t *)(block + 5 * DCTSIZE)), in jsimd_huff_encode_one_block_neon() 107 vld1q_s8((int8_t *)(block + 6 * DCTSIZE)), in jsimd_huff_encode_one_block_neon() 108 vld1q_s8((int8_t *)(block + 7 * DCTSIZE)) in jsimd_huff_encode_one_block_neon()
|