Lines Matching refs:nc
18 size_t nc, in xnn_pack_f32_gemm_goi_w() argument
32 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_f32_gemm_goi_w()
33 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_f32_gemm_goi_w()
63 k += nc * kc; in xnn_pack_f32_gemm_goi_w()
65 b += nc; in xnn_pack_f32_gemm_goi_w()
72 size_t nc, in xnn_pack_f16_gemm_goi_w() argument
86 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_f16_gemm_goi_w()
87 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_f16_gemm_goi_w()
117 k += nc * kc; in xnn_pack_f16_gemm_goi_w()
119 b += nc; in xnn_pack_f16_gemm_goi_w()
126 size_t nc, in xnn_pack_qu8_gemm_goi_w() argument
140 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_qu8_gemm_goi_w()
141 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_qu8_gemm_goi_w()
172 k += nc * kc; in xnn_pack_qu8_gemm_goi_w()
174 b += nc; in xnn_pack_qu8_gemm_goi_w()
181 size_t nc, in xnn_pack_qs8_gemm_goi_w() argument
194 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_qs8_gemm_goi_w()
195 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_qs8_gemm_goi_w()
226 k += nc * kc; in xnn_pack_qs8_gemm_goi_w()
228 b += nc; in xnn_pack_qs8_gemm_goi_w()
235 size_t nc, in xnn_pack_qs8_gemm_xw_goi_w() argument
248 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_qs8_gemm_xw_goi_w()
249 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_qs8_gemm_xw_goi_w()
280 k += nc * kc; in xnn_pack_qs8_gemm_xw_goi_w()
282 b += nc; in xnn_pack_qs8_gemm_xw_goi_w()
288 size_t nc, in xnn_pack_f32_gemm_io_w() argument
301 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_f32_gemm_io_w()
302 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_f32_gemm_io_w()
314 …+ ((kr_block_start + nr_block_offset * kr) & sr_mask) + kr_block_offset) * nc + (nr_block_start + … in xnn_pack_f32_gemm_io_w()
325 k[(kr_block_start + kr_block_offset) * nc + (nr_block_start + nr_block_offset)]; in xnn_pack_f32_gemm_io_w()
335 size_t nc, in xnn_pack_f16_gemm_io_w() argument
348 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_f16_gemm_io_w()
349 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_f16_gemm_io_w()
361 …+ ((kr_block_start + nr_block_offset * kr) & sr_mask) + kr_block_offset) * nc + (nr_block_start + … in xnn_pack_f16_gemm_io_w()
372 k[(kr_block_start + kr_block_offset) * nc + (nr_block_start + nr_block_offset)]; in xnn_pack_f16_gemm_io_w()
382 size_t nc, in xnn_pack_qu8_gemm_io_w() argument
395 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_qu8_gemm_io_w()
396 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_qu8_gemm_io_w()
416 …const uint8_t kv = k[(kr_block_start + kr_block_offset) * nc + (nr_block_start + nr_block_offset)]; in xnn_pack_qu8_gemm_io_w()
431 size_t nc, in xnn_pack_f32_conv_goki_w() argument
446 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_f32_conv_goki_w()
447 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_f32_conv_goki_w()
479 k += ks * kc * nc; in xnn_pack_f32_conv_goki_w()
481 b += nc; in xnn_pack_f32_conv_goki_w()
488 size_t nc, in xnn_pack_f16_conv_goki_w() argument
503 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_f16_conv_goki_w()
504 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_f16_conv_goki_w()
536 k += ks * kc * nc; in xnn_pack_f16_conv_goki_w()
538 b += nc; in xnn_pack_f16_conv_goki_w()
545 size_t nc, in xnn_pack_qu8_conv_goki_w() argument
560 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_qu8_conv_goki_w()
561 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_qu8_conv_goki_w()
595 k += ks * kc * nc; in xnn_pack_qu8_conv_goki_w()
597 b += nc; in xnn_pack_qu8_conv_goki_w()
604 size_t nc, in xnn_pack_qs8_conv_goki_w() argument
618 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_qs8_conv_goki_w()
619 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_qs8_conv_goki_w()
653 k += ks * kc * nc; in xnn_pack_qs8_conv_goki_w()
655 b += nc; in xnn_pack_qs8_conv_goki_w()
662 size_t nc, in xnn_pack_f32_conv_kgo_w() argument
672 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_f32_conv_kgo_w()
673 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_f32_conv_kgo_w()
683 k[ki * g * nc + (nr_block_start + nr_block_offset)]; in xnn_pack_f32_conv_kgo_w()
689 k += nc; in xnn_pack_f32_conv_kgo_w()
691 b += nc; in xnn_pack_f32_conv_kgo_w()
698 size_t nc, in xnn_pack_f16_conv_kgo_w() argument
708 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_f16_conv_kgo_w()
709 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_f16_conv_kgo_w()
719 k[ki * g * nc + (nr_block_start + nr_block_offset)]; in xnn_pack_f16_conv_kgo_w()
725 k += nc; in xnn_pack_f16_conv_kgo_w()
727 b += nc; in xnn_pack_f16_conv_kgo_w()
734 size_t nc, in xnn_pack_qu8_conv_kgo_w() argument
746 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_qu8_conv_kgo_w()
747 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_qu8_conv_kgo_w()
765 k[ki * g * nc + (nr_block_start + nr_block_offset)]; in xnn_pack_qu8_conv_kgo_w()
773 k += nc; in xnn_pack_qu8_conv_kgo_w()
775 b += nc; in xnn_pack_qu8_conv_kgo_w()
782 size_t nc, in xnn_pack_qs8_conv_kgo_w() argument
793 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_qs8_conv_kgo_w()
794 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_qs8_conv_kgo_w()
812 k[ki * g * nc + (nr_block_start + nr_block_offset)]; in xnn_pack_qs8_conv_kgo_w()
820 k += nc; in xnn_pack_qs8_conv_kgo_w()
822 b += nc; in xnn_pack_qs8_conv_kgo_w()
829 size_t nc, in xnn_pack_f32_deconv_goki_w() argument
853 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_f32_deconv_goki_w()
854 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_f32_deconv_goki_w()
889 k += kh * kw * kc * nc; in xnn_pack_f32_deconv_goki_w()
891 b += nc; in xnn_pack_f32_deconv_goki_w()
898 size_t nc, in xnn_pack_f16_deconv_goki_w() argument
922 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_f16_deconv_goki_w()
923 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_f16_deconv_goki_w()
958 k += kh * kw * kc * nc; in xnn_pack_f16_deconv_goki_w()
960 b += nc; in xnn_pack_f16_deconv_goki_w()
967 size_t nc, in xnn_pack_qu8_deconv_goki_w() argument
992 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_qu8_deconv_goki_w()
993 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_qu8_deconv_goki_w()
1031 k += kh * kw * kc * nc; in xnn_pack_qu8_deconv_goki_w()
1033 b += nc; in xnn_pack_qu8_deconv_goki_w()
1346 size_t nc, in xnn_pack_f32_gemminc_goi_w() argument
1359 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_f32_gemminc_goi_w()
1360 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_f32_gemminc_goi_w()
1384 k += nc * kc; in xnn_pack_f32_gemminc_goi_w()
1390 size_t nc, in xnn_pack_f16_gemminc_goi_w() argument
1403 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_f16_gemminc_goi_w()
1404 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_f16_gemminc_goi_w()
1428 k += nc * kc; in xnn_pack_f16_gemminc_goi_w()
1433 size_t nc, in xnn_pack_f32_dconv_oki_w() argument
1443 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_f32_dconv_oki_w()
1444 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_f32_dconv_oki_w()
1472 size_t nc, in xnn_pack_f16_dconv_oki_w() argument
1482 for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) { in xnn_pack_f16_dconv_oki_w()
1483 const size_t nr_block_size = min(nc - nr_block_start, nr); in xnn_pack_f16_dconv_oki_w()