/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neonfma-rr1-lut64-p2-div-x24.c | 73 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 74 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 77 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 78 const uint64_t vidx67 = vgetq_lane_u64(vidx4567, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 81 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 82 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 85 const uint64_t vidxCD = vgetq_lane_u64(vidxCDEF, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 86 const uint64_t vidxEF = vgetq_lane_u64(vidxCDEF, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 89 const uint64_t vidxGH = vgetq_lane_u64(vidxGHIJ, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 90 const uint64_t vidxIJ = vgetq_lane_u64(vidxGHIJ, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() [all …]
|
D | neonfma-rr1-lut2048-p1-div-x24.c | 72 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 73 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 76 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 77 const uint64_t vidx67 = vgetq_lane_u64(vidx4567, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 80 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 81 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 84 const uint64_t vidxCD = vgetq_lane_u64(vidxCDEF, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 85 const uint64_t vidxEF = vgetq_lane_u64(vidxCDEF, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 88 const uint64_t vidxGH = vgetq_lane_u64(vidxGHIJ, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 89 const uint64_t vidxIJ = vgetq_lane_u64(vidxGHIJ, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() [all …]
|
D | neonfma-rr1-lut2048-p1-div-x20.c | 67 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() 68 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() 71 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() 72 const uint64_t vidx67 = vgetq_lane_u64(vidx4567, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() 75 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() 76 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() 79 const uint64_t vidxCD = vgetq_lane_u64(vidxCDEF, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() 80 const uint64_t vidxEF = vgetq_lane_u64(vidxCDEF, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() 83 const uint64_t vidxGH = vgetq_lane_u64(vidxGHIJ, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() 84 const uint64_t vidxIJ = vgetq_lane_u64(vidxGHIJ, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() [all …]
|
D | neonfma-rr1-lut64-p2-div-x20.c | 68 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() 69 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() 72 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() 73 const uint64_t vidx67 = vgetq_lane_u64(vidx4567, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() 76 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() 77 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() 80 const uint64_t vidxCD = vgetq_lane_u64(vidxCDEF, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() 81 const uint64_t vidxEF = vgetq_lane_u64(vidxCDEF, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() 84 const uint64_t vidxGH = vgetq_lane_u64(vidxGHIJ, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() 85 const uint64_t vidxIJ = vgetq_lane_u64(vidxGHIJ, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20() [all …]
|
D | neonfma-rr1-lut64-p2-div-x16.c | 63 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16() 64 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16() 67 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16() 68 const uint64_t vidx67 = vgetq_lane_u64(vidx4567, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16() 71 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16() 72 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16() 75 const uint64_t vidxCD = vgetq_lane_u64(vidxCDEF, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16() 76 const uint64_t vidxEF = vgetq_lane_u64(vidxCDEF, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16() 162 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16() 163 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16() [all …]
|
D | neonfma-rr1-lut2048-p1-div-x16.c | 62 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16() 63 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16() 66 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16() 67 const uint64_t vidx67 = vgetq_lane_u64(vidx4567, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16() 70 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16() 71 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16() 74 const uint64_t vidxCD = vgetq_lane_u64(vidxCDEF, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16() 75 const uint64_t vidxEF = vgetq_lane_u64(vidxCDEF, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16() 156 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16() 157 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16() [all …]
|
D | neonfma-rr1-lut2048-p1-nr2recps-x20.c | 67 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 68 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 71 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 72 const uint64_t vidx67 = vgetq_lane_u64(vidx4567, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 75 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 76 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 79 const uint64_t vidxCD = vgetq_lane_u64(vidxCDEF, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 80 const uint64_t vidxEF = vgetq_lane_u64(vidxCDEF, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 83 const uint64_t vidxGH = vgetq_lane_u64(vidxGHIJ, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 84 const uint64_t vidxIJ = vgetq_lane_u64(vidxGHIJ, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() [all …]
|
D | neonfma-rr1-lut64-p2-nr2fma-x24.c | 73 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 74 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 77 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 78 const uint64_t vidx67 = vgetq_lane_u64(vidx4567, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 81 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 82 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 85 const uint64_t vidxCD = vgetq_lane_u64(vidxCDEF, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 86 const uint64_t vidxEF = vgetq_lane_u64(vidxCDEF, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 89 const uint64_t vidxGH = vgetq_lane_u64(vidxGHIJ, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 90 const uint64_t vidxIJ = vgetq_lane_u64(vidxGHIJ, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() [all …]
|
D | neonfma-rr1-lut64-p2-nr2recps-x24.c | 73 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 74 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 77 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 78 const uint64_t vidx67 = vgetq_lane_u64(vidx4567, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 81 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 82 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 85 const uint64_t vidxCD = vgetq_lane_u64(vidxCDEF, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 86 const uint64_t vidxEF = vgetq_lane_u64(vidxCDEF, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 89 const uint64_t vidxGH = vgetq_lane_u64(vidxGHIJ, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 90 const uint64_t vidxIJ = vgetq_lane_u64(vidxGHIJ, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() [all …]
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x24.c | 72 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 73 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 76 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 77 const uint64_t vidx67 = vgetq_lane_u64(vidx4567, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 80 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 81 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 84 const uint64_t vidxCD = vgetq_lane_u64(vidxCDEF, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 85 const uint64_t vidxEF = vgetq_lane_u64(vidxCDEF, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 88 const uint64_t vidxGH = vgetq_lane_u64(vidxGHIJ, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 89 const uint64_t vidxIJ = vgetq_lane_u64(vidxGHIJ, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() [all …]
|
D | neonfma-rr1-lut64-p2-nr1recps1fma-x24.c | 73 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 74 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 77 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 78 const uint64_t vidx67 = vgetq_lane_u64(vidx4567, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 81 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 82 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 85 const uint64_t vidxCD = vgetq_lane_u64(vidxCDEF, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 86 const uint64_t vidxEF = vgetq_lane_u64(vidxCDEF, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 89 const uint64_t vidxGH = vgetq_lane_u64(vidxGHIJ, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 90 const uint64_t vidxIJ = vgetq_lane_u64(vidxGHIJ, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() [all …]
|
D | neon-rr2-lut2048-p1-nr2recps-x24.c | 73 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 74 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 77 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 78 const uint64_t vidx67 = vgetq_lane_u64(vidx4567, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 81 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 82 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 85 const uint64_t vidxCD = vgetq_lane_u64(vidxCDEF, 0); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 86 const uint64_t vidxEF = vgetq_lane_u64(vidxCDEF, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 89 const uint64_t vidxGH = vgetq_lane_u64(vidxGHIJ, 0); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 90 const uint64_t vidxIJ = vgetq_lane_u64(vidxGHIJ, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() [all …]
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neonfma-lut64-p2-x20.c | 89 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 90 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 92 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 93 const uint64_t vidx67 = vgetq_lane_u64(vidx4567, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 95 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 96 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 98 const uint64_t vidxCD = vgetq_lane_u64(vidxCDEF, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 99 const uint64_t vidxEF = vgetq_lane_u64(vidxCDEF, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 101 const uint64_t vidxGH = vgetq_lane_u64(vidxGHIJ, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 102 const uint64_t vidxIJ = vgetq_lane_u64(vidxGHIJ, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() [all …]
|
D | neonfma-lut64-p2-x20-acc5.c | 93 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 94 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 96 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 97 const uint64_t vidx67 = vgetq_lane_u64(vidx4567, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 99 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 100 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 102 const uint64_t vidxCD = vgetq_lane_u64(vidxCDEF, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 103 const uint64_t vidxEF = vgetq_lane_u64(vidxCDEF, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 105 const uint64_t vidxGH = vgetq_lane_u64(vidxGHIJ, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 106 const uint64_t vidxIJ = vgetq_lane_u64(vidxGHIJ, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() [all …]
|
D | neonfma-lut64-p2-x20-acc2.c | 90 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 91 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 93 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 94 const uint64_t vidx67 = vgetq_lane_u64(vidx4567, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 96 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 97 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 99 const uint64_t vidxCD = vgetq_lane_u64(vidxCDEF, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 100 const uint64_t vidxEF = vgetq_lane_u64(vidxCDEF, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 102 const uint64_t vidxGH = vgetq_lane_u64(vidxGHIJ, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 103 const uint64_t vidxIJ = vgetq_lane_u64(vidxGHIJ, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() [all …]
|
D | neon-lut64-p2-x20.c | 90 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 91 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 93 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 94 const uint64_t vidx67 = vgetq_lane_u64(vidx4567, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 96 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 97 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 99 const uint64_t vidxCD = vgetq_lane_u64(vidxCDEF, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 100 const uint64_t vidxEF = vgetq_lane_u64(vidxCDEF, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 102 const uint64_t vidxGH = vgetq_lane_u64(vidxGHIJ, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 103 const uint64_t vidxIJ = vgetq_lane_u64(vidxGHIJ, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() [all …]
|
D | neon-lut64-p2-x20-acc5.c | 94 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 95 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 97 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 98 const uint64_t vidx67 = vgetq_lane_u64(vidx4567, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 100 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 101 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 103 const uint64_t vidxCD = vgetq_lane_u64(vidxCDEF, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 104 const uint64_t vidxEF = vgetq_lane_u64(vidxCDEF, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 106 const uint64_t vidxGH = vgetq_lane_u64(vidxGHIJ, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 107 const uint64_t vidxIJ = vgetq_lane_u64(vidxGHIJ, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() [all …]
|
D | neon-lut64-p2-x20-acc2.c | 91 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 92 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 94 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 95 const uint64_t vidx67 = vgetq_lane_u64(vidx4567, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 97 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 98 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 100 const uint64_t vidxCD = vgetq_lane_u64(vidxCDEF, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 101 const uint64_t vidxEF = vgetq_lane_u64(vidxCDEF, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 103 const uint64_t vidxGH = vgetq_lane_u64(vidxGHIJ, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 104 const uint64_t vidxIJ = vgetq_lane_u64(vidxGHIJ, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() [all …]
|
D | neonfma-lut64-p2-x16-acc4.c | 88 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 89 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 91 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 92 const uint64_t vidx67 = vgetq_lane_u64(vidx4567, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 94 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 95 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 97 const uint64_t vidxCD = vgetq_lane_u64(vidxCDEF, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 98 const uint64_t vidxEF = vgetq_lane_u64(vidxCDEF, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 222 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 223 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() [all …]
|
D | neon-lut64-p2-x16-acc4.c | 89 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 90 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 92 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 93 const uint64_t vidx67 = vgetq_lane_u64(vidx4567, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 95 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 96 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 98 const uint64_t vidxCD = vgetq_lane_u64(vidxCDEF, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 99 const uint64_t vidxEF = vgetq_lane_u64(vidxCDEF, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 223 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 224 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() [all …]
|
D | neon-lut64-p2-x16.c | 86 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 87 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 89 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 90 const uint64_t vidx67 = vgetq_lane_u64(vidx4567, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 92 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 93 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 95 const uint64_t vidxCD = vgetq_lane_u64(vidxCDEF, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 96 const uint64_t vidxEF = vgetq_lane_u64(vidxCDEF, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 216 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 217 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() [all …]
|
D | neon-lut64-p2-x16-acc2.c | 87 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 88 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 90 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 91 const uint64_t vidx67 = vgetq_lane_u64(vidx4567, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 93 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 94 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 96 const uint64_t vidxCD = vgetq_lane_u64(vidxCDEF, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 97 const uint64_t vidxEF = vgetq_lane_u64(vidxCDEF, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 219 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 220 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() [all …]
|
D | neonfma-lut64-p2-x16-acc2.c | 86 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 87 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 89 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 90 const uint64_t vidx67 = vgetq_lane_u64(vidx4567, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 92 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 93 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 95 const uint64_t vidxCD = vgetq_lane_u64(vidxCDEF, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 96 const uint64_t vidxEF = vgetq_lane_u64(vidxCDEF, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 218 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 219 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() [all …]
|
D | neonfma-lut64-p2-x16.c | 85 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 86 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 88 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 89 const uint64_t vidx67 = vgetq_lane_u64(vidx4567, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 91 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 92 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 94 const uint64_t vidxCD = vgetq_lane_u64(vidxCDEF, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 95 const uint64_t vidxEF = vgetq_lane_u64(vidxCDEF, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 215 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 216 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() [all …]
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-neonfma-rr1-lut16-p3-x24.c | 79 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 80 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 86 const uint64_t vidx45 = vgetq_lane_u64(vidx4567, 0); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 87 const uint64_t vidx67 = vgetq_lane_u64(vidx4567, 1); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 93 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 94 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 100 const uint64_t vidxCD = vgetq_lane_u64(vidxCDEF, 0); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 101 const uint64_t vidxEF = vgetq_lane_u64(vidxCDEF, 1); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 107 const uint64_t vidxGH = vgetq_lane_u64(vidxGHIJ, 0); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 108 const uint64_t vidxIJ = vgetq_lane_u64(vidxGHIJ, 1); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() [all …]
|