1From 48ef088b1f2d594b4d756c19574ead05492d3f84 Mon Sep 17 00:00:00 2001 2From: zhangyanhui <zhangyanhui17@huawei.com> 3Date: Mon, 8 Jul 2024 18:27:42 +0800 4Subject: [PATCH] bugfix for depthwise when kernel size is longer than input_w 5 6--- 7 .../device/cpu/kernel/nnacl/assembly/arm32/ConvDwFp32Row.S | 2 +- 8 .../device/cpu/kernel/nnacl/assembly/arm64/ConvDwFp32Row.S | 2 +- 9 2 files changed, 2 insertions(+), 2 deletions(-) 10 11diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/assembly/arm32/ConvDwFp32Row.S b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/assembly/arm32/ConvDwFp32Row.S 12index 8d1d819d..19750920 100644 13--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/assembly/arm32/ConvDwFp32Row.S 14+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/assembly/arm32/ConvDwFp32Row.S 15@@ -35,7 +35,7 @@ asm_function ConvDwFp32Row 16 mov r6, #4 17 mul r5, r5, r6 18 cmp r3, #0 19- beq End 20+ ble End 21 22 LoopNumPixel: 23 mov r6, r1 // input_ptr 24diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/assembly/arm64/ConvDwFp32Row.S b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/assembly/arm64/ConvDwFp32Row.S 25index 417aa4ce..e50e2ee5 100644 26--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/assembly/arm64/ConvDwFp32Row.S 27+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/assembly/arm64/ConvDwFp32Row.S 28@@ -30,7 +30,7 @@ asm_function ConvDwFp32Row 29 // x19 ~ x29 should be also preserved 30 // whereas our coding style do not permit such amount of parameters 31 cmp x3, #0 32-beq End 33+ble End 34 35 mov x9, x0 36 mov x12, #4 37-- 382.25.1 39 40