Lines Matching refs:nr_block_start
30 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_q8_gemm_goi_w() local
31 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_q8_gemm_goi_w()
35 *((int32_t*) packed_w) = b[nr_block_start + nr_block_offset] + boff; in xnn_pack_q8_gemm_goi_w()
51 …const uint8_t kv = k[(nr_block_start + nr_block_offset) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_q8_gemm_goi_w()
81 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_q8_gemm_io_w() local
82 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_q8_gemm_io_w()
86 *((int32_t*) packed_w) = b[nr_block_start + nr_block_offset] + boff; in xnn_pack_q8_gemm_io_w()
102 …const uint8_t kv = k[(kr_block_start + kr_block_offset) * nc + (nr_block_start + nr_block_offset)]; in xnn_pack_q8_gemm_io_w()
130 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_q8_conv_goki_w() local
131 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_q8_conv_goki_w()
135 *((int32_t*) packed_w) = b[nr_block_start + nr_block_offset] + boff; in xnn_pack_q8_conv_goki_w()
153 … k[((nr_block_start + nr_block_offset) * ks + ki) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_q8_conv_goki_w()
186 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_q8_conv_kgo_w() local
187 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_q8_conv_kgo_w()
191 *((int32_t*) packed_w) = b[nr_block_start + nr_block_offset] + boff; in xnn_pack_q8_conv_kgo_w()
205 k[ki * g * nc + (nr_block_start + nr_block_offset)]; in xnn_pack_q8_conv_kgo_w()
244 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_q8_deconv_goki_w() local
245 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_q8_deconv_goki_w()
249 *((int32_t*) packed_w) = b[nr_block_start + nr_block_offset] + boff; in xnn_pack_q8_deconv_goki_w()
268 …k[(((nr_block_start + nr_block_offset) * kh + ky) * kw + kx) * kc + (kr_block_start + kr_block_off… in xnn_pack_q8_deconv_goki_w()
385 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_f16_gemm_goi_w() local
386 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_f16_gemm_goi_w()
389 packed_w[nr_block_offset] = b[nr_block_start + nr_block_offset]; in xnn_pack_f16_gemm_goi_w()
398 k[(nr_block_start + nr_block_offset) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_f16_gemm_goi_w()
421 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_f16_gemm_io_w() local
422 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_f16_gemm_io_w()
425 packed_w[nr_block_offset] = b[nr_block_start + nr_block_offset]; in xnn_pack_f16_gemm_io_w()
434 k[(kr_block_start + kr_block_offset) * nc + (nr_block_start + nr_block_offset)]; in xnn_pack_f16_gemm_io_w()
458 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_f32_gemm_goi_w() local
459 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_f32_gemm_goi_w()
462 packed_w[nr_block_offset] = b[nr_block_start + nr_block_offset]; in xnn_pack_f32_gemm_goi_w()
471 …k[(nr_block_start + nr_block_offset) * kc + round_down_po2(kr_block_start, skr) + ((kr_block_start… in xnn_pack_f32_gemm_goi_w()
482 k[(nr_block_start + nr_block_offset) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_f32_gemm_goi_w()
509 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_f32_gemm_io_w() local
510 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_f32_gemm_io_w()
513 packed_w[nr_block_offset] = b[nr_block_start + nr_block_offset]; in xnn_pack_f32_gemm_io_w()
522 …_start + nr_block_offset * kr) & sr_mask) + kr_block_offset) * nc + (nr_block_start + nr_block_off… in xnn_pack_f32_gemm_io_w()
533 k[(kr_block_start + kr_block_offset) * nc + (nr_block_start + nr_block_offset)]; in xnn_pack_f32_gemm_io_w()
556 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_f32_gemminc_goi_w() local
557 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_f32_gemminc_goi_w()
563 …k[(nr_block_start + nr_block_offset) * kc + round_down_po2(kr_block_start, skr) + ((kr_block_start… in xnn_pack_f32_gemminc_goi_w()
574 k[(nr_block_start + nr_block_offset) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_f32_gemminc_goi_w()
601 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_f32_conv_goki_w() local
602 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_f32_conv_goki_w()
605 packed_w[nr_block_offset] = b[nr_block_start + nr_block_offset]; in xnn_pack_f32_conv_goki_w()
615 …k[((nr_block_start + nr_block_offset) * ks + ki) * kc + round_down_po2(kr_block_start, skr) + ((kr… in xnn_pack_f32_conv_goki_w()
626 … k[((nr_block_start + nr_block_offset) * ks + ki) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_f32_conv_goki_w()
652 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_f32_conv_kgo_w() local
653 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_f32_conv_kgo_w()
656 packed_w[nr_block_offset] = b[nr_block_start + nr_block_offset]; in xnn_pack_f32_conv_kgo_w()
663 k[ki * g * nc + (nr_block_start + nr_block_offset)]; in xnn_pack_f32_conv_kgo_w()
686 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_f32_dconv_oki_w() local
687 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_f32_dconv_oki_w()
703 …*packed_w++ = k[(((nr_block_start + min(nr_block_offset, nr_block_size - 1)) * kh + ky) * kw + kx)… in xnn_pack_f32_dconv_oki_w()
739 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_f32_deconv_goki_w() local
740 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_f32_deconv_goki_w()
743 packed_w[nr_block_offset] = b[nr_block_start + nr_block_offset]; in xnn_pack_f32_deconv_goki_w()
753 …k[(((nr_block_start + nr_block_offset) * kh + ky) * kw + kx) * kc + round_down_po2(kr_block_start,… in xnn_pack_f32_deconv_goki_w()
764 …k[(((nr_block_start + nr_block_offset) * kh + ky) * kw + kx) * kc + (kr_block_start + kr_block_off… in xnn_pack_f32_deconv_goki_w()