1 /* 2 * Copyright (c) 2021-2022 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25 #include "src/core/NEON/kernels/arm_gemm/utils.hpp" 26 27 #include <cstdint> 28 29 #pragma once 30 31 #if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) 32 33 namespace arm_conv { 34 namespace depthwise { 35 36 void sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_indirect_impl(const float *const *const, float *const *const, const void *, unsigned int, const float, const float); 37 void sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_direct_impl(const unsigned int, const unsigned int, const float *, int64_t, int64_t, float *, int64_t, int64_t, const void *, unsigned int, const float, const float); 38 39 class sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst : public DepthwiseDepthfirstStrategy<float, float, float, float> 40 { 41 private: 42 using Parent = DepthwiseDepthfirstStrategy<float, float, float, float>; 43 Parent::IndirectKernelType m_indirect_kernel = sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_indirect_impl; 44 Parent::DirectKernelType m_direct_kernel = sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_direct_impl; 45 46 public: 47 using return_type = float; 48 constexpr static auto vl_type = arm_gemm::VLType::SVE; 49 50 constexpr static unsigned int kernel_rows = 3; 51 constexpr static unsigned int kernel_cols = 3; 52 53 constexpr static unsigned int stride_rows = 1; 54 constexpr static unsigned int stride_cols = 1; 55 56 constexpr static unsigned int output_rows = 4; 57 constexpr static unsigned int output_cols = 4; 58 sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst(const CPUInfo *)59 sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst(const CPUInfo *) 60 : DepthwiseDepthfirstStrategy<float, float, float, float>(4, 3, 1) {} 61 get_vl_type(void) const62 arm_gemm::VLType get_vl_type(void) const override { return vl_type; } 63 get_indirect_kernel() const64 Parent::IndirectKernelType get_indirect_kernel() const override { return m_indirect_kernel; } get_direct_kernel() const65 Parent::DirectKernelType get_direct_kernel() const override { return m_direct_kernel; } 66 }; 67 68 } // namespace depthwise 69 } // namespace arm_conv 70 71 #endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) 72