Lines Matching +full:- +full:oc
3 // This source code is licensed under the BSD-style license found in the
23 #include <xnnpack/microparams-init.h>
64 …led to create %s operator with %" PRIu32 "x%" PRIu32 " kernel: kernel dimensions must be non-zero", in xnn_create_convolution2d_nchw_f32()
71 …ate %s operator with %" PRIu32 "x%" PRIu32 " subsampling: subsampling dimensions must be non-zero", in xnn_create_convolution2d_nchw_f32()
78 …to create %s operator with %" PRIu32 "x%" PRIu32 " dilation: dilation dimensions must be non-zero", in xnn_create_convolution2d_nchw_f32()
85 "failed to create %s operator with %" PRIu32 " groups: number of groups must be non-zero", in xnn_create_convolution2d_nchw_f32()
92 …iled to create %s operator with %zu input channels per group: number of channels must be non-zero", in xnn_create_convolution2d_nchw_f32()
99 …led to create %s operator with %zu output channels per group: number of channels must be non-zero", in xnn_create_convolution2d_nchw_f32()
126 "failed to create %s operator with NaN output lower bound: lower bound must be non-NaN", in xnn_create_convolution2d_nchw_f32()
133 "failed to create %s operator with NaN output upper bound: upper bound must be non-NaN", in xnn_create_convolution2d_nchw_f32()
159 // + 3x3 stride-2 with 3 input channels and NHWC input layout in xnn_create_convolution2d_nchw_f32()
160 // + 3x3 stride-2 depthwise convolution with horizontal padding 1 & no vertical padding in xnn_create_convolution2d_nchw_f32()
161 // + 3x3 stride-1 depthwise convolution with horizontal padding 1 & no vertical padding in xnn_create_convolution2d_nchw_f32()
162 // + 5x5 stride-2 depthwise convolution with horizontal padding 2 & no vertical padding in xnn_create_convolution2d_nchw_f32()
163 // + 5x5 stride-1 depthwise convolution with horizontal padding 2 & no vertical padding in xnn_create_convolution2d_nchw_f32()
223 convolution_op->weights_cache = caches->weights_cache; in xnn_create_convolution2d_nchw_f32()
236 for (size_t oc = 0; oc < round_down_po2(group_output_channels, 4); oc += 4) { in xnn_create_convolution2d_nchw_f32() local
238 const size_t row0_nonzero = (size_t) (kernel[oc * group_input_channels + ic] != 0.0f); in xnn_create_convolution2d_nchw_f32()
239 … const size_t row1_nonzero = (size_t) (kernel[(oc + 1) * group_input_channels + ic] != 0.0f); in xnn_create_convolution2d_nchw_f32()
240 … const size_t row2_nonzero = (size_t) (kernel[(oc + 2) * group_input_channels + ic] != 0.0f); in xnn_create_convolution2d_nchw_f32()
241 … const size_t row3_nonzero = (size_t) (kernel[(oc + 3) * group_input_channels + ic] != 0.0f); in xnn_create_convolution2d_nchw_f32()
248 …for (size_t oc = round_down_po2(group_output_channels, 4); oc < round_down_po2(group_output_channe… in xnn_create_convolution2d_nchw_f32() local
250 const size_t row0_nonzero = (size_t) (kernel[oc * group_input_channels + ic] != 0.0f); in xnn_create_convolution2d_nchw_f32()
251 … const size_t row1_nonzero = (size_t) (kernel[(oc + 1) * group_input_channels + ic] != 0.0f); in xnn_create_convolution2d_nchw_f32()
257 for (size_t oc = round_down_po2(group_output_channels, 2); oc < group_output_channels; oc++) { in xnn_create_convolution2d_nchw_f32() local
259 num_nonzeroes += (size_t) (kernel[oc * group_input_channels + ic] != 0.0f); in xnn_create_convolution2d_nchw_f32()
268 // 4-channel blocks have 90%+ non-zeroes in xnn_create_convolution2d_nchw_f32()
273 // Non-zeroes which don't fit into whole 4-channel blocks, processed one-by-one in xnn_create_convolution2d_nchw_f32()
274 const size_t num_remaining_nonzeroes = num_nonzeroes - num_block4_nonzeroes; in xnn_create_convolution2d_nchw_f32()
278 // 2-channel blocks have 90%+ non-zeroes in xnn_create_convolution2d_nchw_f32()
283 // Non-zeroes which don't fit into whole 2-channel blocks, processed one-by-one in xnn_create_convolution2d_nchw_f32()
284 const size_t num_remaining_nonzeroes = num_nonzeroes - num_block2_nonzeroes; in xnn_create_convolution2d_nchw_f32()
290 …// 1. An array of float values storing non-zero kernel elements, and all (group_output_channels) b… in xnn_create_convolution2d_nchw_f32()
291 // All elements within non-zero block are assumed to be non-zero. in xnn_create_convolution2d_nchw_f32()
294 …// 3. An array of uint32_t values storing the number of non-zero kernel elements per each output c… in xnn_create_convolution2d_nchw_f32()
296 // corresponding to successive non-zero blocks. in xnn_create_convolution2d_nchw_f32()
300 convolution_op->packed_weights.pointer = xnn_allocate_simd_memory(packed_weights_size); in xnn_create_convolution2d_nchw_f32()
301 if (convolution_op->packed_weights.pointer == NULL) { in xnn_create_convolution2d_nchw_f32()
307 convolution_op->num_nonzero_values = num_nonzero_values; in xnn_create_convolution2d_nchw_f32()
308 convolution_op->num_nonzero_blocks = num_nonzero_blocks; in xnn_create_convolution2d_nchw_f32()
309 convolution_op->num_output_channel_blocks = num_output_channel_blocks; in xnn_create_convolution2d_nchw_f32()
311 float* nonzero_values = convolution_op->packed_weights.pointer; in xnn_create_convolution2d_nchw_f32()
343 … const int64_t diff = (int64_t) ((uint64_t) ic - (uint64_t) last_ic) * (int64_t) sizeof(float); in xnn_create_convolution2d_nchw_f32()
358 …for (size_t oc = round_down_po2(group_output_channels, output_channels_block_size); oc < group_out… in xnn_create_convolution2d_nchw_f32() local
360 *nonzero_values++ = bias[oc]; in xnn_create_convolution2d_nchw_f32()
365 const float weight = kernel[oc * group_input_channels + ic]; in xnn_create_convolution2d_nchw_f32()
371 … const int64_t diff = (int64_t) ((uint64_t) ic - (uint64_t) last_ic) * (int64_t) sizeof(float); in xnn_create_convolution2d_nchw_f32()
386 // If there are any non-zero elements, we have to return to the initial input channel. in xnn_create_convolution2d_nchw_f32()
388 …const int64_t diff = (int64_t) ((uint64_t) first_ic - (uint64_t) last_ic) * (int64_t) sizeof(float… in xnn_create_convolution2d_nchw_f32()
396 convolution_op->first_input_channel = first_ic; in xnn_create_convolution2d_nchw_f32()
398 convolution_op->ukernel.spmm = (struct xnn_ukernel_spmm) { in xnn_create_convolution2d_nchw_f32()
399 .function = spmm_parameters->ukernel, in xnn_create_convolution2d_nchw_f32()
400 .mr = spmm_parameters->mr, in xnn_create_convolution2d_nchw_f32()
431 convolution_op->packed_weights.offset = xnn_get_or_insert_weights_cache( in xnn_create_convolution2d_nchw_f32()
432 convolution_op->weights_cache, weights_ptr, aligned_total_weights_size); in xnn_create_convolution2d_nchw_f32()
435 convolution_op->ukernel.conv2d = (struct xnn_ukernel_conv2d) { in xnn_create_convolution2d_nchw_f32()
471 convolution_op->packed_weights.offset = xnn_get_or_insert_weights_cache( in xnn_create_convolution2d_nchw_f32()
472 convolution_op->weights_cache, weights_ptr, aligned_total_weights_size); in xnn_create_convolution2d_nchw_f32()
475 convolution_op->ukernel.dwconv2d = (struct xnn_ukernel_dwconv2d) { in xnn_create_convolution2d_nchw_f32()
476 .chw_function = dwconv2d_parameters->ukernel, in xnn_create_convolution2d_nchw_f32()
477 .output_width_tile = dwconv2d_parameters->output_width_tile, in xnn_create_convolution2d_nchw_f32()
486 convolution_op->padding_top = input_padding_top; in xnn_create_convolution2d_nchw_f32()
487 convolution_op->padding_right = input_padding_right; in xnn_create_convolution2d_nchw_f32()
488 convolution_op->padding_bottom = input_padding_bottom; in xnn_create_convolution2d_nchw_f32()
489 convolution_op->padding_left = input_padding_left; in xnn_create_convolution2d_nchw_f32()
491 convolution_op->kernel_height = kernel_height; in xnn_create_convolution2d_nchw_f32()
492 convolution_op->kernel_width = kernel_width; in xnn_create_convolution2d_nchw_f32()
493 convolution_op->stride_height = subsampling_height; in xnn_create_convolution2d_nchw_f32()
494 convolution_op->stride_width = subsampling_width; in xnn_create_convolution2d_nchw_f32()
495 convolution_op->dilation_height = dilation_height; in xnn_create_convolution2d_nchw_f32()
496 convolution_op->dilation_width = dilation_width; in xnn_create_convolution2d_nchw_f32()
497 convolution_op->groups = groups; in xnn_create_convolution2d_nchw_f32()
498 convolution_op->group_input_channels = group_input_channels; in xnn_create_convolution2d_nchw_f32()
499 convolution_op->group_output_channels = group_output_channels; in xnn_create_convolution2d_nchw_f32()
500 convolution_op->input_pixel_stride = input_channel_stride; in xnn_create_convolution2d_nchw_f32()
501 convolution_op->output_pixel_stride = output_channel_stride; in xnn_create_convolution2d_nchw_f32()
504 xnn_init_f32_chw_params(&convolution_op->params.f32_chw, 0, output_min, output_max); in xnn_create_convolution2d_nchw_f32()
506 xnn_init_f32_minmax_params(&convolution_op->params.f32_minmax, output_min, output_max); in xnn_create_convolution2d_nchw_f32()
509 convolution_op->type = xnn_operator_type_convolution_nchw_f32; in xnn_create_convolution2d_nchw_f32()
510 convolution_op->ukernel.type = ukernel_type; in xnn_create_convolution2d_nchw_f32()
511 convolution_op->flags = flags; in xnn_create_convolution2d_nchw_f32()
513 convolution_op->state = xnn_run_state_invalid; in xnn_create_convolution2d_nchw_f32()
538 convolution_op->state = xnn_run_state_invalid; in setup_convolution2d_nchw()
548 "failed to setup %s operator with %zux%zu input: input dimensions must be non-zero", in setup_convolution2d_nchw()
554 convolution_op->state = xnn_run_state_skip; in setup_convolution2d_nchw()
558 …if (convolution_op->weights_cache != NULL && !xnn_weights_cache_is_finalized(convolution_op->weigh… in setup_convolution2d_nchw()
564 convolution_op->batch_size = batch_size; in setup_convolution2d_nchw()
565 convolution_op->input_height = input_height; in setup_convolution2d_nchw()
566 convolution_op->input_width = input_width; in setup_convolution2d_nchw()
567 convolution_op->input = input; in setup_convolution2d_nchw()
568 convolution_op->output = output; in setup_convolution2d_nchw()
571 convolution_op->padding_top + input_height + convolution_op->padding_bottom, in setup_convolution2d_nchw()
572 convolution_op->kernel_height, in setup_convolution2d_nchw()
573 convolution_op->dilation_height, in setup_convolution2d_nchw()
574 convolution_op->stride_height); in setup_convolution2d_nchw()
576 convolution_op->padding_left + input_width + convolution_op->padding_right, in setup_convolution2d_nchw()
577 convolution_op->kernel_width, in setup_convolution2d_nchw()
578 convolution_op->dilation_width, in setup_convolution2d_nchw()
579 convolution_op->stride_width); in setup_convolution2d_nchw()
581 …const size_t input_batch_stride = (input_height * input_width * convolution_op->input_pixel_stride… in setup_convolution2d_nchw()
582 …const size_t output_batch_stride = (output_height * output_width * convolution_op->output_pixel_st… in setup_convolution2d_nchw()
583 switch (convolution_op->ukernel.type) { in setup_convolution2d_nchw()
586 const size_t num_nonzero_values = convolution_op->num_nonzero_values; in setup_convolution2d_nchw()
587 const size_t num_nonzero_blocks = convolution_op->num_nonzero_blocks; in setup_convolution2d_nchw()
588 const size_t num_output_channel_blocks = convolution_op->num_output_channel_blocks; in setup_convolution2d_nchw()
590 convolution_op->num_nonzero_values = num_nonzero_values; in setup_convolution2d_nchw()
591 convolution_op->num_nonzero_blocks = num_nonzero_blocks; in setup_convolution2d_nchw()
592 convolution_op->num_output_channel_blocks = num_output_channel_blocks; in setup_convolution2d_nchw()
595 …int32_t* input_increments = (int32_t*) (nonzero_values + num_nonzero_values + convolution_op->grou… in setup_convolution2d_nchw()
612 convolution_op->context.spmm = (struct spmm_context) { in setup_convolution2d_nchw()
613 .n = convolution_op->group_output_channels, in setup_convolution2d_nchw()
615 ….input = (const void*) ((uintptr_t) input + (convolution_op->first_input_channel * input_size * si… in setup_convolution2d_nchw()
622 .ukernel = convolution_op->ukernel.spmm.function, in setup_convolution2d_nchw()
624 … memcpy(&convolution_op->context.spmm.params, params, sizeof(convolution_op->context.spmm.params)); in setup_convolution2d_nchw()
626 const size_t mr = convolution_op->ukernel.spmm.mr; in setup_convolution2d_nchw()
639 convolution_op->compute.type = xnn_parallelization_type_2d_tile_1d; in setup_convolution2d_nchw()
640 convolution_op->compute.task_2d_tile_1d = (pthreadpool_task_2d_tile_1d_t) xnn_compute_spmm; in setup_convolution2d_nchw()
641 convolution_op->compute.range[0] = batch_size; in setup_convolution2d_nchw()
642 convolution_op->compute.range[1] = input_size * sizeof(float); in setup_convolution2d_nchw()
643 convolution_op->compute.tile[0] = mc * sizeof(float); in setup_convolution2d_nchw()
644 convolution_op->state = xnn_run_state_ready; in setup_convolution2d_nchw()
650 …const size_t zero_size = (input_width * convolution_op->group_input_channels << log2_input_element… in setup_convolution2d_nchw()
651 void* zero_buffer = xnn_reallocate_memory(convolution_op->zero_buffer, zero_size); in setup_convolution2d_nchw()
659 convolution_op->zero_buffer = zero_buffer; in setup_convolution2d_nchw()
661 convolution_op->context.conv2d = (struct conv2d_context) { in setup_convolution2d_nchw()
670 .input_padding_top = convolution_op->padding_top, in setup_convolution2d_nchw()
671 .output_channels = convolution_op->group_output_channels, in setup_convolution2d_nchw()
674 .hwc2chw_ukernel = convolution_op->ukernel.conv2d.hwc2chw_function, in setup_convolution2d_nchw()
676 …memcpy(&convolution_op->context.conv2d.params, params, sizeof(convolution_op->context.conv2d.param… in setup_convolution2d_nchw()
678 const size_t output_height_tile = convolution_op->ukernel.conv2d.output_height_tile; in setup_convolution2d_nchw()
692 convolution_op->compute.type = xnn_parallelization_type_2d_tile_1d; in setup_convolution2d_nchw()
693 …convolution_op->compute.task_2d_tile_1d = (pthreadpool_task_2d_tile_1d_t) xnn_compute_conv2d_hwc2c… in setup_convolution2d_nchw()
694 convolution_op->compute.range[0] = batch_size; in setup_convolution2d_nchw()
695 convolution_op->compute.range[1] = output_height; in setup_convolution2d_nchw()
696 convolution_op->compute.tile[0] = output_height_slice; in setup_convolution2d_nchw()
697 convolution_op->state = xnn_run_state_ready; in setup_convolution2d_nchw()
704 void* zero_buffer = xnn_reallocate_memory(convolution_op->zero_buffer, zero_size); in setup_convolution2d_nchw()
712 convolution_op->zero_buffer = zero_buffer; in setup_convolution2d_nchw()
715 convolution_op->context.dwconv2d = (struct dwconv2d_context) { in setup_convolution2d_nchw()
720 .input_padding_top = convolution_op->padding_top, in setup_convolution2d_nchw()
725 … (convolution_op->kernel_height * convolution_op->kernel_width << log2_filter_element_size), in setup_convolution2d_nchw()
729 .chw_ukernel = convolution_op->ukernel.dwconv2d.chw_function, in setup_convolution2d_nchw()
731 …memcpy(&convolution_op->context.dwconv2d.params, chw_params, sizeof(convolution_op->context.dwconv… in setup_convolution2d_nchw()
733 convolution_op->compute.type = xnn_parallelization_type_2d; in setup_convolution2d_nchw()
734 convolution_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_dwconv2d_chw; in setup_convolution2d_nchw()
735 convolution_op->compute.range[0] = batch_size; in setup_convolution2d_nchw()
736 convolution_op->compute.range[1] = convolution_op->groups; in setup_convolution2d_nchw()
737 convolution_op->state = xnn_run_state_ready; in setup_convolution2d_nchw()
755 if (convolution_op->type != xnn_operator_type_convolution_nchw_f32) { in xnn_setup_convolution2d_nchw_f32()
758 xnn_operator_type_to_string(convolution_op->type)); in xnn_setup_convolution2d_nchw_f32()
770 &convolution_op->params.f32_minmax, in xnn_setup_convolution2d_nchw_f32()
771 &convolution_op->params.f32_chw, in xnn_setup_convolution2d_nchw_f32()