/external/XNNPACK/src/f32-spmm/gen/ |
D | 12x1-minmax-neonfma.c | 34 size_t output_decrement = output_stride * nc - 12 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_12x1__neonfma() local 71 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_12x1__neonfma() 76 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_12x1__neonfma() 105 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_12x1__neonfma() 108 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_12x1__neonfma() 131 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_12x1__neonfma() 134 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_12x1__neonfma() 157 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_12x1__neonfma() 160 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_12x1__neonfma() 183 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_12x1__neonfma()
|
D | 12x1-minmax-neon.c | 34 size_t output_decrement = output_stride * nc - 12 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_12x1__neon() local 71 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_12x1__neon() 76 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_12x1__neon() 105 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_12x1__neon() 108 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_12x1__neon() 131 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_12x1__neon() 134 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_12x1__neon() 157 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_12x1__neon() 160 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_12x1__neon() 183 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_12x1__neon()
|
D | 8x1-minmax-wasmsimd-arm.c | 34 size_t output_decrement = output_stride * nc - 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm() local 63 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm() 68 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm() 92 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm() 95 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm() 119 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm() 122 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm() 146 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm()
|
D | 16x1-minmax-neonfma.c | 34 size_t output_decrement = output_stride * nc - 16 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma() local 77 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma() 82 output_decrement += 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma() 111 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma() 114 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma() 137 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma() 140 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma() 163 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma() 166 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma() 189 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma()
|
D | 16x1-minmax-neon.c | 34 size_t output_decrement = output_stride * nc - 16 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neon() local 77 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neon() 82 output_decrement += 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neon() 111 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neon() 114 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neon() 137 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neon() 140 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neon() 163 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neon() 166 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neon() 189 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neon()
|
D | 16x1-minmax-sse.c | 34 size_t output_decrement = output_stride * nc - 16 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__sse() local 75 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__sse() 80 output_decrement += 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__sse() 109 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__sse() 112 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__sse() 135 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__sse() 138 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__sse() 163 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__sse() 166 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__sse() 189 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__sse()
|
D | 16x1-minmax-wasmsimd-arm.c | 34 size_t output_decrement = output_stride * nc - 16 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm() local 75 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm() 80 output_decrement += 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm() 110 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm() 113 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm() 137 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm() 140 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm() 164 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm() 167 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm() 191 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm()
|
D | 8x1-minmax-wasmsimd-x86.c | 34 size_t output_decrement = output_stride * nc - 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86() local 63 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86() 68 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86() 92 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86() 95 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86() 119 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86() 122 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86() 146 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86()
|
D | 8x1-minmax-neonfma.c | 34 size_t output_decrement = output_stride * nc - 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma() local 65 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma() 70 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma() 93 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma() 96 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma() 119 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma() 122 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma() 145 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma()
|
D | 8x1-minmax-neon.c | 34 size_t output_decrement = output_stride * nc - 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neon() local 65 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neon() 70 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neon() 93 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neon() 96 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neon() 119 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neon() 122 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neon() 145 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neon()
|
D | 8x1-minmax-wasmsimd-arm-pipelined.c | 34 size_t output_decrement = output_stride * nc - 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined() local 71 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined() 76 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined() 100 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined() 103 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined() 127 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined() 130 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined() 154 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_arm_pipelined()
|
D | 8x1-minmax-neonfma-pipelined.c | 34 size_t output_decrement = output_stride * nc - 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined() local 70 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined() 75 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined() 100 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined() 103 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined() 128 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined() 131 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined() 156 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neonfma_pipelined()
|
D | 8x1-minmax-sse.c | 34 size_t output_decrement = output_stride * nc - 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__sse() local 63 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__sse() 68 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__sse() 91 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__sse() 94 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__sse() 119 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__sse() 122 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__sse() 145 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__sse()
|
D | 8x1-minmax-neon-pipelined.c | 34 size_t output_decrement = output_stride * nc - 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined() local 70 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined() 75 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined() 100 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined() 103 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined() 128 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined() 131 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined() 156 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__neon_pipelined()
|
D | 16x1-minmax-neon-pipelined.c | 34 size_t output_decrement = output_stride * nc - 16 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined() local 84 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined() 89 output_decrement += 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined() 120 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined() 123 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined() 148 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined() 151 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined() 176 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined() 179 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined() 204 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neon_pipelined()
|
D | 16x1-minmax-neonfma-pipelined.c | 34 size_t output_decrement = output_stride * nc - 16 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined() local 84 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined() 89 output_decrement += 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined() 120 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined() 123 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined() 148 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined() 151 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined() 176 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined() 179 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined() 204 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__neonfma_pipelined()
|
D | 16x1-minmax-wasmsimd-arm-pipelined.c | 34 size_t output_decrement = output_stride * nc - 16 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined() local 85 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined() 90 output_decrement += 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined() 120 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined() 123 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined() 147 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined() 150 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined() 174 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined() 177 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined() 201 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_16x1__wasmsimd_arm_pipelined()
|
D | 32x1-minmax-neon.c | 34 size_t output_decrement = output_stride * nc - 32 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_32x1__neon() local 102 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_32x1__neon() 107 output_decrement += 16 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_32x1__neon() 148 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_32x1__neon() 151 output_decrement += 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_32x1__neon() 180 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_32x1__neon() 183 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_32x1__neon() 206 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_32x1__neon() 209 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_32x1__neon() 232 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_32x1__neon() [all …]
|
D | 32x1-minmax-neonfma.c | 34 size_t output_decrement = output_stride * nc - 32 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma() local 102 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma() 107 output_decrement += 16 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma() 148 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma() 151 output_decrement += 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma() 180 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma() 183 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma() 206 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma() 209 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma() 232 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma() [all …]
|
D | 8x1-minmax-wasmsimd-x86-pipelined.c | 34 size_t output_decrement = output_stride * nc - 8 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined() local 71 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined() 76 output_decrement += 4 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined() 100 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined() 103 output_decrement += 2 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined() 127 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined() 130 output_decrement += 1 * sizeof(float); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined() 154 output = (float*restrict) ((uintptr_t) output - output_decrement); in xnn_f32_spmm_minmax_ukernel_8x1__wasmsimd_x86_pipelined()
|
/external/XNNPACK/src/f16-spmm/gen/ |
D | 32x1-minmax-neonfp16arith.c | 39 size_t output_decrement = output_stride * nc - 32 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith() local 84 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith() 89 output_decrement += 16 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith() 118 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith() 121 output_decrement += 8 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith() 144 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith() 147 output_decrement += 4 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith() 170 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith() 173 output_decrement += 2 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith() 196 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith() [all …]
|
D | 24x1-minmax-neonfp16arith.c | 39 size_t output_decrement = output_stride * nc - 24 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith() local 77 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith() 82 output_decrement += 8 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith() 111 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith() 114 output_decrement += 8 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith() 137 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith() 140 output_decrement += 4 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith() 163 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith() 166 output_decrement += 2 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith() 189 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith() [all …]
|
D | 16x1-minmax-neonfp16arith.c | 39 size_t output_decrement = output_stride * nc - 16 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith() local 70 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith() 75 output_decrement += 8 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith() 98 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith() 101 output_decrement += 4 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith() 124 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith() 127 output_decrement += 2 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith() 150 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith() 153 output_decrement += 1 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith() 176 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith()
|
D | 8x1-minmax-neonfp16arith.c | 39 size_t output_decrement = output_stride * nc - 8 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith() local 63 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith() 68 output_decrement += 4 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith() 91 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith() 94 output_decrement += 2 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith() 117 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith() 120 output_decrement += 1 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith() 143 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith()
|
D | 24x1-minmax-neonfp16arith-x2.c | 39 size_t output_decrement = output_stride * nc - 24 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2() local 107 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2() 112 output_decrement += 8 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2() 141 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2() 144 output_decrement += 8 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2() 167 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2() 170 output_decrement += 4 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2() 193 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2() 196 output_decrement += 2 * sizeof(__fp16); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2() 219 o = (__fp16*restrict) ((uintptr_t) o - output_decrement); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2() [all …]
|