• Home
  • Raw
  • Download

Lines Matching refs:kc

19   size_t kc,  in xnn_pack_f32_gemm_goi_w()  argument
29 const size_t skc = round_down_po2(kc, skr); in xnn_pack_f32_gemm_goi_w()
45 …k[(nr_block_start + nr_block_offset) * kc + round_down_po2(kr_block_start, skr) + ((kr_block_start… in xnn_pack_f32_gemm_goi_w()
51 for (size_t kr_block_start = skc; kr_block_start < kc; kr_block_start += kr) { in xnn_pack_f32_gemm_goi_w()
52 const size_t kr_block_size = min(kc - kr_block_start, kr); in xnn_pack_f32_gemm_goi_w()
56 k[(nr_block_start + nr_block_offset) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_f32_gemm_goi_w()
63 k += nc * kc; in xnn_pack_f32_gemm_goi_w()
73 size_t kc, in xnn_pack_f16_gemm_goi_w() argument
83 const size_t skc = round_down_po2(kc, skr); in xnn_pack_f16_gemm_goi_w()
99 …k[(nr_block_start + nr_block_offset) * kc + round_down_po2(kr_block_start, skr) + ((kr_block_start… in xnn_pack_f16_gemm_goi_w()
105 for (size_t kr_block_start = skc; kr_block_start < kc; kr_block_start += kr) { in xnn_pack_f16_gemm_goi_w()
106 const size_t kr_block_size = min(kc - kr_block_start, kr); in xnn_pack_f16_gemm_goi_w()
110 k[(nr_block_start + nr_block_offset) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_f16_gemm_goi_w()
117 k += nc * kc; in xnn_pack_f16_gemm_goi_w()
127 size_t kc, in xnn_pack_qu8_gemm_goi_w() argument
138 const int32_t boff = (int32_t) kc * izp * (int32_t) params->kernel_zero_point; in xnn_pack_qu8_gemm_goi_w()
156 for (size_t kr_block_start = 0; kr_block_start < kc; kr_block_start += kr) { in xnn_pack_qu8_gemm_goi_w()
157 const size_t kr_block_size = min(kc - kr_block_start, kr); in xnn_pack_qu8_gemm_goi_w()
161 …const uint8_t kv = k[(nr_block_start + nr_block_offset) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_qu8_gemm_goi_w()
172 k += nc * kc; in xnn_pack_qu8_gemm_goi_w()
182 size_t kc, in xnn_pack_qs8_gemm_goi_w() argument
210 for (size_t kr_block_start = 0; kr_block_start < kc; kr_block_start += kr) { in xnn_pack_qs8_gemm_goi_w()
211 const size_t kr_block_size = min(kc - kr_block_start, kr); in xnn_pack_qs8_gemm_goi_w()
215 … const int8_t kv = k[(nr_block_start + nr_block_offset) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_qs8_gemm_goi_w()
226 k += nc * kc; in xnn_pack_qs8_gemm_goi_w()
236 size_t kc, in xnn_pack_qs8_gemm_xw_goi_w() argument
264 for (size_t kr_block_start = 0; kr_block_start < kc; kr_block_start += kr) { in xnn_pack_qs8_gemm_xw_goi_w()
265 const size_t kr_block_size = min(kc - kr_block_start, kr); in xnn_pack_qs8_gemm_xw_goi_w()
269 … const int8_t kv = k[(nr_block_start + nr_block_offset) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_qs8_gemm_xw_goi_w()
280 k += nc * kc; in xnn_pack_qs8_gemm_xw_goi_w()
289 size_t kc, in xnn_pack_f32_gemm_io_w() argument
299 const size_t skc = round_down_po2(kc, skr); in xnn_pack_f32_gemm_io_w()
320 for (size_t kr_block_start = skc; kr_block_start < kc; kr_block_start += kr) { in xnn_pack_f32_gemm_io_w()
321 const size_t kr_block_size = min(kc - kr_block_start, kr); in xnn_pack_f32_gemm_io_w()
336 size_t kc, in xnn_pack_f16_gemm_io_w() argument
346 const size_t skc = round_down_po2(kc, skr); in xnn_pack_f16_gemm_io_w()
367 for (size_t kr_block_start = skc; kr_block_start < kc; kr_block_start += kr) { in xnn_pack_f16_gemm_io_w()
368 const size_t kr_block_size = min(kc - kr_block_start, kr); in xnn_pack_f16_gemm_io_w()
383 size_t kc, in xnn_pack_qu8_gemm_io_w() argument
394 const int32_t boff = (int32_t) kc * izp * (int32_t) params->kernel_zero_point; in xnn_pack_qu8_gemm_io_w()
411 for (size_t kr_block_start = 0; kr_block_start < kc; kr_block_start += kr) { in xnn_pack_qu8_gemm_io_w()
412 const size_t kr_block_size = min(kc - kr_block_start, kr); in xnn_pack_qu8_gemm_io_w()
433 size_t kc, in xnn_pack_f32_conv_goki_w() argument
443 const size_t skc = round_down_po2(kc, skr); in xnn_pack_f32_conv_goki_w()
460 …k[((nr_block_start + nr_block_offset) * ks + ki) * kc + round_down_po2(kr_block_start, skr) + ((kr… in xnn_pack_f32_conv_goki_w()
466 for (size_t kr_block_start = skc; kr_block_start < kc; kr_block_start += kr) { in xnn_pack_f32_conv_goki_w()
467 const size_t kr_block_size = min(kc - kr_block_start, kr); in xnn_pack_f32_conv_goki_w()
471 … k[((nr_block_start + nr_block_offset) * ks + ki) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_f32_conv_goki_w()
479 k += ks * kc * nc; in xnn_pack_f32_conv_goki_w()
490 size_t kc, in xnn_pack_f16_conv_goki_w() argument
500 const size_t skc = round_down_po2(kc, skr); in xnn_pack_f16_conv_goki_w()
517 …k[((nr_block_start + nr_block_offset) * ks + ki) * kc + round_down_po2(kr_block_start, skr) + ((kr… in xnn_pack_f16_conv_goki_w()
523 for (size_t kr_block_start = skc; kr_block_start < kc; kr_block_start += kr) { in xnn_pack_f16_conv_goki_w()
524 const size_t kr_block_size = min(kc - kr_block_start, kr); in xnn_pack_f16_conv_goki_w()
528 … k[((nr_block_start + nr_block_offset) * ks + ki) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_f16_conv_goki_w()
536 k += ks * kc * nc; in xnn_pack_f16_conv_goki_w()
547 size_t kc, in xnn_pack_qu8_conv_goki_w() argument
558 const int32_t boff = (int32_t) ks * (int32_t) kc * izp * (int32_t) params->kernel_zero_point; in xnn_pack_qu8_conv_goki_w()
577 for (size_t kr_block_start = 0; kr_block_start < kc; kr_block_start += kr) { in xnn_pack_qu8_conv_goki_w()
578 const size_t kr_block_size = min(kc - kr_block_start, kr); in xnn_pack_qu8_conv_goki_w()
583 … k[((nr_block_start + nr_block_offset) * ks + ki) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_qu8_conv_goki_w()
595 k += ks * kc * nc; in xnn_pack_qu8_conv_goki_w()
606 size_t kc, in xnn_pack_qs8_conv_goki_w() argument
635 for (size_t kr_block_start = 0; kr_block_start < kc; kr_block_start += kr) { in xnn_pack_qs8_conv_goki_w()
636 const size_t kr_block_size = min(kc - kr_block_start, kr); in xnn_pack_qs8_conv_goki_w()
641 … k[((nr_block_start + nr_block_offset) * ks + ki) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_qs8_conv_goki_w()
653 k += ks * kc * nc; in xnn_pack_qs8_conv_goki_w()
832 size_t kc, in xnn_pack_f32_deconv_goki_w() argument
845 const size_t skc = round_down_po2(kc, skr); in xnn_pack_f32_deconv_goki_w()
867 …k[(((nr_block_start + nr_block_offset) * kh + ky) * kw + kx) * kc + round_down_po2(kr_block_start,… in xnn_pack_f32_deconv_goki_w()
873 for (size_t kr_block_start = skc; kr_block_start < kc; kr_block_start += kr) { in xnn_pack_f32_deconv_goki_w()
874 const size_t kr_block_size = min(kc - kr_block_start, kr); in xnn_pack_f32_deconv_goki_w()
878 …k[(((nr_block_start + nr_block_offset) * kh + ky) * kw + kx) * kc + (kr_block_start + kr_block_off… in xnn_pack_f32_deconv_goki_w()
889 k += kh * kw * kc * nc; in xnn_pack_f32_deconv_goki_w()
901 size_t kc, in xnn_pack_f16_deconv_goki_w() argument
914 const size_t skc = round_down_po2(kc, skr); in xnn_pack_f16_deconv_goki_w()
936 …k[(((nr_block_start + nr_block_offset) * kh + ky) * kw + kx) * kc + round_down_po2(kr_block_start,… in xnn_pack_f16_deconv_goki_w()
942 for (size_t kr_block_start = skc; kr_block_start < kc; kr_block_start += kr) { in xnn_pack_f16_deconv_goki_w()
943 const size_t kr_block_size = min(kc - kr_block_start, kr); in xnn_pack_f16_deconv_goki_w()
947 …k[(((nr_block_start + nr_block_offset) * kh + ky) * kw + kx) * kc + (kr_block_start + kr_block_off… in xnn_pack_f16_deconv_goki_w()
958 k += kh * kw * kc * nc; in xnn_pack_f16_deconv_goki_w()
970 size_t kc, in xnn_pack_qu8_deconv_goki_w() argument
991 …) divide_round_up(kh - oy, sh) * (int32_t) divide_round_up(kw - ox, sw) * (int32_t) kc * izp * kzp; in xnn_pack_qu8_deconv_goki_w()
1010 for (size_t kr_block_start = 0; kr_block_start < kc; kr_block_start += kr) { in xnn_pack_qu8_deconv_goki_w()
1011 const size_t kr_block_size = min(kc - kr_block_start, kr); in xnn_pack_qu8_deconv_goki_w()
1016 …k[(((nr_block_start + nr_block_offset) * kh + ky) * kw + kx) * kc + (kr_block_start + kr_block_off… in xnn_pack_qu8_deconv_goki_w()
1031 k += kh * kw * kc * nc; in xnn_pack_qu8_deconv_goki_w()
1347 size_t kc, in xnn_pack_f32_gemminc_goi_w() argument
1356 const size_t skc = round_down_po2(kc, skr); in xnn_pack_f32_gemminc_goi_w()
1366 …k[(nr_block_start + nr_block_offset) * kc + round_down_po2(kr_block_start, skr) + ((kr_block_start… in xnn_pack_f32_gemminc_goi_w()
1372 for (size_t kr_block_start = skc; kr_block_start < kc; kr_block_start += kr) { in xnn_pack_f32_gemminc_goi_w()
1373 const size_t kr_block_size = min(kc - kr_block_start, kr); in xnn_pack_f32_gemminc_goi_w()
1377 k[(nr_block_start + nr_block_offset) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_f32_gemminc_goi_w()
1384 k += nc * kc; in xnn_pack_f32_gemminc_goi_w()
1391 size_t kc, in xnn_pack_f16_gemminc_goi_w() argument
1400 const size_t skc = round_down_po2(kc, skr); in xnn_pack_f16_gemminc_goi_w()
1410 …k[(nr_block_start + nr_block_offset) * kc + round_down_po2(kr_block_start, skr) + ((kr_block_start… in xnn_pack_f16_gemminc_goi_w()
1416 for (size_t kr_block_start = skc; kr_block_start < kc; kr_block_start += kr) { in xnn_pack_f16_gemminc_goi_w()
1417 const size_t kr_block_size = min(kc - kr_block_start, kr); in xnn_pack_f16_gemminc_goi_w()
1421 k[(nr_block_start + nr_block_offset) * kc + (kr_block_start + kr_block_offset)]; in xnn_pack_f16_gemminc_goi_w()
1428 k += nc * kc; in xnn_pack_f16_gemminc_goi_w()
1434 size_t kc, in xnn_pack_f32_dconv_oki_w() argument
1457 for (size_t c = 0; c < kc; c++) { in xnn_pack_f32_dconv_oki_w()
1460 …+ = k[(((nr_block_start + min(nr_block_offset, nr_block_size - 1)) * kh + ky) * kw + kx) * kc + c]; in xnn_pack_f32_dconv_oki_w()
1473 size_t kc, in xnn_pack_f16_dconv_oki_w() argument
1496 for (size_t c = 0; c < kc; c++) { in xnn_pack_f16_dconv_oki_w()
1499 …+ = k[(((nr_block_start + min(nr_block_offset, nr_block_size - 1)) * kh + ky) * kw + kx) * kc + c]; in xnn_pack_f16_dconv_oki_w()