# Copyright 2024 Arm Limited and/or its affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. import unittest from typing import Tuple import pytest import torch from executorch.backends.arm.test import common from executorch.backends.arm.test.ops.test_conv1d import Conv1d from executorch.backends.arm.test.ops.test_conv2d import Conv2d from executorch.backends.arm.test.tester.arm_tester import ArmTester from executorch.exir.backend.backend_details import CompileSpec from parameterized import parameterized """ The configuration when groups == in_channels and out_channels = K * in_channels where K is a positive integer is termed in literature as depthwise convolution. """ dw_conv1d_3_1x3x14_gp3_st1 = Conv1d( in_channels=3, out_channels=3, kernel_size=7, stride=1, groups=3, length=14, batches=1, padding=3, ) dw_conv1d_2_1x6x4_gp6_st1 = Conv1d( in_channels=6, out_channels=12, kernel_size=2, stride=1, groups=6, padding=0, length=4, batches=1, ) dw_conv2d_2x2_1x6x4x4_gp6_st1 = Conv2d( in_channels=6, out_channels=12, kernel_size=(2, 2), stride=(1, 1), groups=6, padding=0, width=4, height=4, batches=1, ) dw_conv1d_3_1x3x256_gp3_st1 = Conv1d( in_channels=3, out_channels=3, kernel_size=3, stride=1, groups=3, padding=0, length=256, batches=1, ) dw_conv2d_3x3_1x3x256x256_gp3_st1 = Conv2d( in_channels=3, out_channels=3, kernel_size=(3, 3), stride=(1, 1), groups=3, padding=0, width=256, height=256, batches=1, ) dw_conv2d_3x3_1x4x256x256_gp4_st1 = Conv2d( in_channels=4, out_channels=8, kernel_size=(3, 3), stride=(1, 1), groups=4, padding=0, width=256, height=256, batches=1, ) dw_conv2d_3x3_2x8x198x198_gp8_st3 = Conv2d( in_channels=8, out_channels=16, kernel_size=(3, 3), stride=3, groups=8, padding=0, width=198, height=198, batches=2, ) dw_conv2d_3x3_1x4x256x256_gp4_nobias = Conv2d( in_channels=4, out_channels=8, kernel_size=(3, 3), stride=1, groups=4, bias=False, width=256, height=256, batches=1, ) two_dw_conv1d = Conv1d( nbr_conv=2, length=64, in_channels=[4, 8], out_channels=[8, 24], kernel_size=[3, 3], stride=[1, 1], padding=[0, 0], groups=[4, 8], bias=[True, True], batches=1, ) two_dw_conv2d = Conv2d( nbr_conv=2, width=64, height=64, in_channels=[4, 8], out_channels=[8, 24], kernel_size=[(3, 3), (3, 3)], stride=[1, 1], padding=[0, 0], groups=[4, 8], bias=[True, True], batches=2, ) # Shenanigan to get a nicer output when test fails. testsuite_conv2d = [ ("2x2_1x6x4x4_gp6_st1", dw_conv2d_2x2_1x6x4x4_gp6_st1), ("3x3_1x3x256x256_gp3_st1", dw_conv2d_3x3_1x3x256x256_gp3_st1), ("3x3_1x4x256x256_gp4_st1", dw_conv2d_3x3_1x4x256x256_gp4_st1), ("3x3_2x8x198x198_gp8_st3", dw_conv2d_3x3_2x8x198x198_gp8_st3), ("3x3_1x4x256x256_gp4_nobias", dw_conv2d_3x3_1x4x256x256_gp4_nobias), ("two_dw_conv2d", two_dw_conv2d), ] testsuite_conv1d = [ ("2_1x6x4_gp6_st1", dw_conv1d_2_1x6x4_gp6_st1), ("3_1x3x256_gp3_st1", dw_conv1d_3_1x3x256_gp3_st1), ("two_dw_conv1d", two_dw_conv1d), ("3_1x3x14_gp3_st1", dw_conv1d_3_1x3x14_gp3_st1), ] class TestDepthwiseConv(unittest.TestCase): """Tests Conv1D and Conv2D where groups == in_channels and out_channels = K * in_channels. This is a special case enables depthwise convolution.""" def _test_dw_conv_tosa_MI_pipeline( self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] ): ( ArmTester( module, example_inputs=test_data, compile_spec=common.get_tosa_compile_spec( "TOSA-0.80.0+MI", permute_memory_to_nhwc=True ), ) .export() .to_edge() .partition() .check_not(["executorch_exir_dialects_edge__ops_aten_convolution_default"]) .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) .to_executorch() .run_method_and_compare_outputs(inputs=test_data) ) def _test_dw_conv_tosa_BI_pipeline( self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] ): ( ArmTester( module, example_inputs=test_data, compile_spec=common.get_tosa_compile_spec( "TOSA-0.80.0+BI", permute_memory_to_nhwc=True ), ) .quantize() .export() .to_edge() .partition() .check_not(["executorch_exir_dialects_edge__ops_aten_convolution_default"]) .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) .to_executorch() .run_method_and_compare_outputs(inputs=test_data, qtol=1) ) def _test_dw_conv_ethos_BI_pipeline( self, module: torch.nn.Module, compile_spec: CompileSpec, test_data: Tuple[torch.Tensor], ): ( ArmTester( module, example_inputs=test_data, compile_spec=compile_spec, ) .quantize() .export() .to_edge() .partition() .check_not(["executorch_exir_dialects_edge__ops_aten_convolution_default"]) .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) .to_executorch() ) @parameterized.expand(testsuite_conv1d + testsuite_conv2d) def test_dw_conv_tosa_MI(self, test_name: str, model: torch.nn.Module): self._test_dw_conv_tosa_MI_pipeline(model, model.get_inputs()) # TODO: Investigate flakyness (MLTORCH-307) @parameterized.expand(testsuite_conv1d + testsuite_conv2d) @pytest.mark.flaky(reruns=3) def test_dw_conv_tosa_BI(self, test_name: str, model: torch.nn.Module): self._test_dw_conv_tosa_BI_pipeline(model, model.get_inputs()) @parameterized.expand(testsuite_conv2d, skip_on_empty=True) def test_dw_conv2d_u55_BI( self, test_name: str, model: torch.nn.Module, set_quantize_io: bool = False ): self._test_dw_conv_ethos_BI_pipeline( model, common.get_u55_compile_spec( permute_memory_to_nhwc=True, quantize_io=set_quantize_io ), model.get_inputs(), ) # Expected to fail as conv1d needs transpose which is not supported # on u55. @parameterized.expand(testsuite_conv1d, skip_on_empty=True) @unittest.expectedFailure def test_dw_conv1d_u55_BI( self, test_name: str, model: torch.nn.Module, set_quantize_io: bool = False ): self._test_dw_conv_ethos_BI_pipeline( model, common.get_u55_compile_spec( permute_memory_to_nhwc=True, quantize_io=set_quantize_io ), model.get_inputs(), ) @parameterized.expand(testsuite_conv1d + testsuite_conv2d) def test_dw_conv_u85_BI( self, test_name: str, model: torch.nn.Module, set_quantize_io: bool = False ): self._test_dw_conv_ethos_BI_pipeline( model, common.get_u85_compile_spec( permute_memory_to_nhwc=True, quantize_io=set_quantize_io ), model.get_inputs(), )