/external/XNNPACK/src/f32-conv-hwc/gen/ |
D | 3x3s2p1c3x4-neonfma-2x2.c | 31 size_t output_width_stride, in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() argument 42 const size_t output_channel_decrement = output_width * output_width_stride - 4 * sizeof(float); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 356 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 358 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 361 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 363 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 372 vst1_f32((float*) ((uintptr_t) o1_tmp + output_width_stride), vo1x1c01); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 374 vst1_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 386 vst1_lane_f32((float*) ((uintptr_t) o1_tmp + output_width_stride), vo1x1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() 387 vst1_lane_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x4-neon-2x2.c | 31 size_t output_width_stride, in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() argument 42 const size_t output_channel_decrement = output_width * output_width_stride - 4 * sizeof(float); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 356 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 358 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 361 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 363 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 372 vst1_f32((float*) ((uintptr_t) o1_tmp + output_width_stride), vo1x1c01); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 374 vst1_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 386 vst1_lane_f32((float*) ((uintptr_t) o1_tmp + output_width_stride), vo1x1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() 387 vst1_lane_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() [all …]
|
D | 3x3s2p0p1c3x4-neon-2x2.c | 31 size_t output_width_stride, in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() argument 42 const size_t output_channel_decrement = output_width * output_width_stride - 4 * sizeof(float); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 356 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 358 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 361 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 363 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 372 vst1_f32((float*) ((uintptr_t) o1_tmp + output_width_stride), vo1x1c01); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 374 vst1_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 386 vst1_lane_f32((float*) ((uintptr_t) o1_tmp + output_width_stride), vo1x1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() 387 vst1_lane_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2() [all …]
|
D | 3x3s2p0p1c3x4-neonfma-2x2.c | 31 size_t output_width_stride, in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() argument 42 const size_t output_channel_decrement = output_width * output_width_stride - 4 * sizeof(float); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 356 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 358 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 361 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 363 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 372 vst1_f32((float*) ((uintptr_t) o1_tmp + output_width_stride), vo1x1c01); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 374 vst1_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 386 vst1_lane_f32((float*) ((uintptr_t) o1_tmp + output_width_stride), vo1x1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() 387 vst1_lane_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01, 0); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2() [all …]
|
D | 3x3s2p0p1c3x4-neon-2x1.c | 31 size_t output_width_stride, in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() argument 42 const size_t output_channel_decrement = output_width * output_width_stride - 4 * sizeof(float); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 260 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 262 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 279 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 280 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 396 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 398 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 414 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1() 415 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1()
|
D | 3x3s2p0p1c3x4-neonfma-2x1.c | 31 size_t output_width_stride, in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() argument 42 const size_t output_channel_decrement = output_width * output_width_stride - 4 * sizeof(float); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 260 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 262 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 279 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 280 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 394 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 396 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 412 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1() 413 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1()
|
D | 3x3s2p1c3x4-neonfma-2x1.c | 31 size_t output_width_stride, in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() argument 42 const size_t output_channel_decrement = output_width * output_width_stride - 4 * sizeof(float); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 260 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 262 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 279 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 280 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 394 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 396 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 412 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1() 413 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1()
|
D | 3x3s2p1c3x4-neon-2x1.c | 31 size_t output_width_stride, in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() argument 42 const size_t output_channel_decrement = output_width * output_width_stride - 4 * sizeof(float); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 260 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 262 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 279 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 280 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 396 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 398 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 414 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1() 415 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1()
|
D | 3x3s2p1c3x8-neonfma-2x2.c | 31 size_t output_width_stride, in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() argument 42 const size_t output_channel_decrement = output_width * output_width_stride - 8 * sizeof(float); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 504 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 507 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 511 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 514 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 519 vst1q_f32((float*) ((uintptr_t) o1_tmp + output_width_stride), vo1x1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 521 vst1q_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 534 vst1_f32((float*) ((uintptr_t) o1_tmp + output_width_stride), vo1x1c01); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() 536 vst1_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x8-neon-2x2.c | 31 size_t output_width_stride, in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() argument 42 const size_t output_channel_decrement = output_width * output_width_stride - 8 * sizeof(float); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 504 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 507 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 511 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 514 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 519 vst1q_f32((float*) ((uintptr_t) o1_tmp + output_width_stride), vo1x1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 521 vst1q_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 534 vst1_f32((float*) ((uintptr_t) o1_tmp + output_width_stride), vo1x1c01); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() 536 vst1_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2() [all …]
|
D | 3x3s2p0p1c3x8-neonfma-2x2.c | 31 size_t output_width_stride, in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() argument 42 const size_t output_channel_decrement = output_width * output_width_stride - 8 * sizeof(float); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 504 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 507 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 511 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 514 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 519 vst1q_f32((float*) ((uintptr_t) o1_tmp + output_width_stride), vo1x1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 521 vst1q_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 534 vst1_f32((float*) ((uintptr_t) o1_tmp + output_width_stride), vo1x1c01); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() 536 vst1_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2() [all …]
|
D | 3x3s2p0p1c3x8-neon-2x2.c | 31 size_t output_width_stride, in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() argument 42 const size_t output_channel_decrement = output_width * output_width_stride - 8 * sizeof(float); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 504 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 507 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 511 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 514 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 519 vst1q_f32((float*) ((uintptr_t) o1_tmp + output_width_stride), vo1x1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 521 vst1q_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c0123); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 534 vst1_f32((float*) ((uintptr_t) o1_tmp + output_width_stride), vo1x1c01); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() 536 vst1_f32((float*) ((uintptr_t) o0_tmp + output_width_stride), vo0x1c01); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2() [all …]
|
D | 3x3s2p0p1c3x8-neonfma-2x1.c | 31 size_t output_width_stride, in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() argument 42 const size_t output_channel_decrement = output_width * output_width_stride - 8 * sizeof(float); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 348 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 351 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 374 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 375 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 550 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 553 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 575 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1() 576 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1()
|
D | 3x3s2p1c3x8-neon-2x1.c | 31 size_t output_width_stride, in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() argument 42 const size_t output_channel_decrement = output_width * output_width_stride - 8 * sizeof(float); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 348 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 351 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 374 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 375 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 552 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 555 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 577 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() 578 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
|
D | 3x3s2p0p1c3x8-neon-2x1.c | 31 size_t output_width_stride, in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() argument 42 const size_t output_channel_decrement = output_width * output_width_stride - 8 * sizeof(float); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 348 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 351 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 374 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 375 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 552 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 555 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 577 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1() 578 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1()
|
D | 3x3s2p1c3x8-neonfma-2x1.c | 31 size_t output_width_stride, in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() argument 42 const size_t output_channel_decrement = output_width * output_width_stride - 8 * sizeof(float); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 348 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 351 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 374 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 375 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 550 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 553 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 575 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() 576 o1 = (float*) ((uintptr_t) o1 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
|
/external/XNNPACK/src/f32-conv-hwc/ |
D | 3x3s2p1c3x4-scalar-1x1.c | 24 size_t output_width_stride, in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1() argument 35 const size_t output_channel_decrement = output_width * output_width_stride - 4 * sizeof(float); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1() 411 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1() 423 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1() 650 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1() 662 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1()
|
D | 3x3s2p0p1c3x4-scalar-1x1.c | 24 size_t output_width_stride, in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1() argument 35 const size_t output_channel_decrement = output_width * output_width_stride - 4 * sizeof(float); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1() 411 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1() 423 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1() 650 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1() 662 o0 = (float*) ((uintptr_t) o0 + output_width_stride); in xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1()
|
D | 3x3s2p0p1c3-neon-x2.c.in | 31 size_t output_width_stride, 42 …const size_t output_channel_decrement = output_width * output_width_stride - ${CHANNEL_TILE} * siz… 437 o${Y} = (float*) ((uintptr_t) o${Y} + output_width_stride); 443 o${Y} = (float*) ((uintptr_t) o${Y} + output_width_stride); 458 … vst1q_f32((float*) ((uintptr_t) o${Y}_tmp + output_width_stride), vo${Y}x1c${ABC[C:C+4]}); 466 … vst1_f32((float*) ((uintptr_t) o${Y}_tmp + output_width_stride), vo${Y}x1c${ABC[0:2]}); 477 … vst1_lane_f32((float*) ((uintptr_t) o${Y}_tmp + output_width_stride), vo${Y}x1c${ABC[0:2]}, 0); 481 o${Y} = (float*) ((uintptr_t) o${Y} + output_width_stride * 2); 710 o${Y} = (float*) ((uintptr_t) o${Y} + output_width_stride); 735 o${Y} = (float*) ((uintptr_t) o${Y} + output_width_stride); [all …]
|
D | 3x3s2p1c3-neon-x2.c.in | 31 size_t output_width_stride, 42 …const size_t output_channel_decrement = output_width * output_width_stride - ${CHANNEL_TILE} * siz… 437 o${Y} = (float*) ((uintptr_t) o${Y} + output_width_stride); 443 o${Y} = (float*) ((uintptr_t) o${Y} + output_width_stride); 458 … vst1q_f32((float*) ((uintptr_t) o${Y}_tmp + output_width_stride), vo${Y}x1c${ABC[C:C+4]}); 466 … vst1_f32((float*) ((uintptr_t) o${Y}_tmp + output_width_stride), vo${Y}x1c${ABC[0:2]}); 477 … vst1_lane_f32((float*) ((uintptr_t) o${Y}_tmp + output_width_stride), vo${Y}x1c${ABC[0:2]}, 0); 481 o${Y} = (float*) ((uintptr_t) o${Y} + output_width_stride * 2); 710 o${Y} = (float*) ((uintptr_t) o${Y} + output_width_stride); 735 o${Y} = (float*) ((uintptr_t) o${Y} + output_width_stride); [all …]
|
/external/XNNPACK/src/operators/ |
D | argmax-pooling-nhwc.c | 253 const size_t output_width_stride = argmax_pooling_op->output_pixel_stride * sizeof(float); in xnn_setup_argmax_pooling2d_nhwc_f32() local 254 const size_t output_height_stride = output_width * output_width_stride; in xnn_setup_argmax_pooling2d_nhwc_f32() 274 .output_increment = output_width_stride - channels * sizeof(float), in xnn_setup_argmax_pooling2d_nhwc_f32()
|
D | max-pooling-nhwc.c | 294 …const size_t output_width_stride = max_pooling_op->output_pixel_stride << log2_output_element_size; in setup_max_pooling2d_nhwc() local 295 const size_t output_height_stride = output_width * output_width_stride; in setup_max_pooling2d_nhwc() 310 .output_increment = output_width_stride - (channels << log2_output_element_size), in setup_max_pooling2d_nhwc()
|
D | average-pooling-nhwc.c | 542 …const size_t output_width_stride = average_pooling_op->output_pixel_stride << log2_output_element_… in setup_average_pooling2d() local 543 const size_t output_height_stride = output_width * output_width_stride; in setup_average_pooling2d() 592 .output_increment = output_width_stride - (channels << log2_output_element_size), in setup_average_pooling2d() 619 .output_increment = output_width_stride - (channels << log2_output_element_size), in setup_average_pooling2d()
|
D | depth-to-space-nhwc.c | 142 .output_width_stride = depth_to_space_op->output_pixel_stride * sizeof(float), in xnn_setup_depth_to_space_nhwc_x32()
|
/external/XNNPACK/src/xnnpack/ |
D | conv.h | 35 size_t output_width_stride, \
|