| /external/javassist/src/test/test3/ |
| D | Switch.java | 13 int j = 4; in foo() 14 j = 4; in foo() 15 j = 4; in foo() 16 j = 4; in foo() 17 j = 4; in foo() 18 j = 4; in foo() 19 j = 4; in foo() 20 j = 4; in foo() 21 j = 4; in foo() 22 j = 4; in foo() [all …]
|
| /external/hyphenation-patterns/el/ |
| D | hyph-el.pat.txt | 125 4β. 126 4γ. 127 4γκ. 128 4δ. 129 4ζ. 130 4θ. 131 4κ. 132 4λ. 133 4μ. 134 4μπ. [all …]
|
| /external/rust/crates/unicode-ident/tests/trie/ |
| D | trie.rs | 28 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 29 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 4, 32, 33, 34, 4, 4, 4, 4, 4, 30 35, 36, 37, 38, 39, 40, 41, 42, 4, 4, 4, 4, 4, 4, 4, 4, 43, 44, 45, 46, 31 47, 4, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 4, 61, 4, 62, 32 63, 64, 65, 66, 4, 4, 4, 4, 4, 4, 4, 4, 67, 68, 69, 70, 71, 72, 73, 74, 35 78, 78, 78, 78, 78, 78, 78, 78, 4, 4, 4, 79, 80, 81, 82, 83, 78, 78, 78, 36 78, 78, 78, 78, 78, 84, 42, 85, 86, 87, 4, 88, 89, 78, 78, 78, 78, 78, 78, 37 78, 78, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 38 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 39 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, [all …]
|
| /external/libxaac/decoder/armv8/ |
| D | ixheaacd_post_twiddle.s | 57 MOV x6, #4 58 dup v10.4h, w4 65 dup v10.4h, w4 72 LDR w7, [x1], #4 73 LDR w8, [x1], #4 107 SUB x7, x7, #4 109 STR w11, [x7], #-4 111 STR w9, [x0], #4 119 ASR w3, w3, #4 139 LD4 {v0.4h, v1.4h, v2.4h, v3.4h}, [x5], x8 [all …]
|
| D | ixheaacd_pre_twiddle.s | 59 LSL x7, x4, #4 61 SUB x7, x7, #4 77 LDR w8, [x3], #4 78 LDR w9, [x0], #4 93 LDR w10, [x1], #-4 121 STR w9, [x2], #4 122 STR w11, [x2], #4 126 MOV X6, #4 154 dup v14.4s, w5 163 rev64 v10.4h, v8.4h [all …]
|
| /external/gemmlowp/meta/ |
| D | transform_kernels_arm_64.h | 41 "dup v4.4s, %w[input_range_min]\n" in Transform() 42 "dup v5.4s, %w[output_range_min]\n" in Transform() 43 "dup v6.4s, %w[input_range_offset]\n" in Transform() 44 "dup v7.4s, %w[input_range_scale]\n" in Transform() 45 "dup v8.4s, %w[one_over_output_range_scale]\n" in Transform() 46 "fsub v4.4s, v4.4s, v5.4s\n" in Transform() 52 "ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [%x[input]], #64\n" in Transform() 54 "scvtf v0.4s, v0.4s\n" in Transform() 55 "scvtf v1.4s, v1.4s\n" in Transform() 56 "scvtf v2.4s, v2.4s\n" in Transform() [all …]
|
| /external/linux-kselftest/tools/testing/selftests/powerpc/lib/ |
| D | reg.S | 17 ld 18, 4*8(3) 39 std 18, 4*8(3) 58 lfs 0, 0*4(3) 59 lfs 1, 1*4(3) 60 lfs 2, 2*4(3) 61 lfs 3, 3*4(3) 62 lfs 4, 4*4(3) 63 lfs 5, 5*4(3) 64 lfs 6, 6*4(3) 65 lfs 7, 7*4(3) [all …]
|
| /external/cronet/net/base/registry_controlled_domains/ |
| D | effective_tld_names_unittest4.gperf | 4 …______________________________________________________________________________________________a1, 4 6 …______________________________________________________________________________________________a3, 4 8 …______________________________________________________________________________________________a5, 4 10 …______________________________________________________________________________________________a7, 4 12 …______________________________________________________________________________________________a9, 4 14 …______________________________________________________________________________________________b1, 4 16 …______________________________________________________________________________________________b3, 4 18 …______________________________________________________________________________________________b5, 4 20 …______________________________________________________________________________________________b7, 4 22 …______________________________________________________________________________________________b9, 4 [all …]
|
| /external/ComputeLibrary/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/ |
| D | generic.cpp | 50 "ld1r { v14.4s }, [x19]\n" in a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 52 "ld1r { v13.4s }, [x19]\n" in a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 58 "ld1r { v10.4s }, [x19]\n" in a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 60 "ld1r { v9.4s }, [x19]\n" in a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 62 "ld1r { v8.4s }, [x19]\n" in a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 64 "ld1r { v7.4s }, [x19]\n" in a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 68 "movi v16.4s, #0x0\n" in a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 115 "4:" // Output channel loop: Kernel loop in a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 116 "smlal v6.4s, v17.4h, v3.h[0]\n" in a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 119 "smlal v5.4s, v17.4h, v3.h[1]\n" in a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() [all …]
|
| /external/ComputeLibrary/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/ |
| D | generic.cpp | 50 "ld1r { v14.4s }, [x19]\n" in a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 52 "ld1r { v13.4s }, [x19]\n" in a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 58 "ld1r { v10.4s }, [x19]\n" in a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 60 "ld1r { v9.4s }, [x19]\n" in a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 62 "ld1r { v8.4s }, [x19]\n" in a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 64 "ld1r { v7.4s }, [x19]\n" in a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 68 "movi v16.4s, #0x0\n" in a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 115 "4:" // Output channel loop: Kernel loop in a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 116 "smlal v6.4s, v17.4h, v3.h[0]\n" in a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 119 "smlal v5.4s, v17.4h, v3.h[1]\n" in a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() [all …]
|
| /external/ComputeLibrary/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/ |
| D | generic.cpp | 50 "ld1r { v14.4s }, [x19]\n" in a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 52 "ld1r { v13.4s }, [x19]\n" in a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 58 "ld1r { v10.4s }, [x19]\n" in a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 60 "ld1r { v9.4s }, [x19]\n" in a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 62 "ld1r { v8.4s }, [x19]\n" in a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 64 "ld1r { v7.4s }, [x19]\n" in a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 68 "movi v16.4s, #0x0\n" in a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 115 "4:" // Output channel loop: Kernel loop in a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 116 "smlal v6.4s, v17.4h, v3.h[0]\n" in a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() 119 "smlal v5.4s, v17.4h, v3.h[1]\n" in a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst_impl() [all …]
|
| /external/ComputeLibrary/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/ |
| D | generic_direct.cpp | 111 "ld1r { v15.4s }, [x23]\n" in a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_direct_impl() 113 "ld1r { v14.4s }, [x21]\n" in a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_direct_impl() 136 "cbz x19, 4f\n" in a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_direct_impl() 139 "cmp x21, x19, LSL #4\n" in a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_direct_impl() 150 "ld1 { v10.4s }, [x8]\n" in a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_direct_impl() 155 "mov v31.16b, v13.16b\n fmla v31.4s, v8.4s, v9.4s\n" in a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_direct_impl() 157 "mov v30.16b, v13.16b\n fmla v30.4s, v7.4s, v9.4s\n" in a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_direct_impl() 159 "mov v29.16b, v13.16b\n fmla v29.4s, v6.4s, v9.4s\n" in a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_direct_impl() 161 "mov v27.16b, v13.16b\n fmla v27.4s, v5.4s, v9.4s\n" in a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_direct_impl() 162 "cmp x21, x19, LSL #4\n" in a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_direct_impl() [all …]
|
| D | generic_indirect.cpp | 61 inptrs[4] = input_ptrs[30]; in a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_indirect_impl() 65 inptrs[8] = input_ptrs[4]; in a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_indirect_impl() 106 "ld1r { v15.4s }, [x20]\n" in a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_indirect_impl() 107 "ld1r { v14.4s }, [x19]\n" in a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_indirect_impl() 115 "cmp x13, x11, LSL #4\n" in a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_indirect_impl() 133 "mov v31.16b, v13.16b\n fmla v31.4s, v8.4s, v9.4s\n" in a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_indirect_impl() 136 "mov v30.16b, v13.16b\n fmla v30.4s, v7.4s, v9.4s\n" in a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_indirect_impl() 138 "mov v29.16b, v13.16b\n fmla v29.4s, v6.4s, v9.4s\n" in a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_indirect_impl() 140 "mov v27.16b, v13.16b\n fmla v27.4s, v5.4s, v9.4s\n" in a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_indirect_impl() 142 "mov v26.16b, v13.16b\n fmla v26.4s, v4.4s, v9.4s\n" in a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst_indirect_impl() [all …]
|
| /external/ComputeLibrary/src/core/NEON/kernels/arm_gemm/transforms/ |
| D | a64_transpose_interleave_12_2x4_fp32bf16.hpp | 35 if (height % 4) { in a64_transpose_interleave_12_2x4_fp32bf16() 39 size_t out_stride = 12 * roundup<size_t>(height, 4) * sizeof(bfloat16); in a64_transpose_interleave_12_2x4_fp32bf16() 65 "zip1 v29.4s, v12.4s, v11.4s\n" in a64_transpose_interleave_12_2x4_fp32bf16() 67 "zip2 v0.4s, v12.4s, v11.4s\n" in a64_transpose_interleave_12_2x4_fp32bf16() 70 "zip1 v23.4s, v5.4s, v17.4s\n" in a64_transpose_interleave_12_2x4_fp32bf16() 72 "zip2 v18.4s, v5.4s, v17.4s\n" in a64_transpose_interleave_12_2x4_fp32bf16() 75 "zip1 v21.4s, v25.4s, v31.4s\n" in a64_transpose_interleave_12_2x4_fp32bf16() 77 "zip2 v10.4s, v25.4s, v31.4s\n" in a64_transpose_interleave_12_2x4_fp32bf16() 80 "zip1 v13.4s, v16.4s, v1.4s\n" in a64_transpose_interleave_12_2x4_fp32bf16() 82 "zip2 v24.4s, v16.4s, v1.4s\n" in a64_transpose_interleave_12_2x4_fp32bf16() [all …]
|
| D | a64_transpose_interleave_24_2x4_fp32bf16.hpp | 35 if (height % 4) { in a64_transpose_interleave_24_2x4_fp32bf16() 39 size_t out_stride = 24 * roundup<size_t>(height, 4) * sizeof(bfloat16); in a64_transpose_interleave_24_2x4_fp32bf16() 65 "zip1 v28.4s, v3.4s, v26.4s\n" in a64_transpose_interleave_24_2x4_fp32bf16() 67 "zip2 v12.4s, v3.4s, v26.4s\n" in a64_transpose_interleave_24_2x4_fp32bf16() 70 "zip1 v11.4s, v1.4s, v17.4s\n" in a64_transpose_interleave_24_2x4_fp32bf16() 72 "zip2 v23.4s, v1.4s, v17.4s\n" in a64_transpose_interleave_24_2x4_fp32bf16() 75 "zip1 v10.4s, v31.4s, v1.4s\n" in a64_transpose_interleave_24_2x4_fp32bf16() 77 "zip2 v17.4s, v31.4s, v1.4s\n" in a64_transpose_interleave_24_2x4_fp32bf16() 80 "zip1 v5.4s, v14.4s, v1.4s\n" in a64_transpose_interleave_24_2x4_fp32bf16() 82 "zip2 v8.4s, v14.4s, v1.4s\n" in a64_transpose_interleave_24_2x4_fp32bf16() [all …]
|
| /external/ComputeLibrary/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/ |
| D | generic.cpp | 71 "ld1r { v21.4s }, [%x[clamps]]\n" in a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst_impl() 72 "ld1r { v20.4s }, [x11]\n" in a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst_impl() 91 "fmla v12.4s, v31.4s, v0.s[0]\n" in a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst_impl() 93 "fmla v13.4s, v31.4s, v0.s[1]\n" in a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst_impl() 95 "fmla v14.4s, v31.4s, v0.s[2]\n" in a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst_impl() 96 "fmla v15.4s, v31.4s, v0.s[3]\n" in a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst_impl() 97 "fmla v16.4s, v31.4s, v2.s[0]\n" in a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst_impl() 98 "fmla v17.4s, v31.4s, v2.s[1]\n" in a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst_impl() 99 "fmla v18.4s, v31.4s, v2.s[2]\n" in a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst_impl() 100 "fmla v19.4s, v31.4s, v2.s[3]\n" in a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst_impl() [all …]
|
| /external/ComputeLibrary/src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_6x4/ |
| D | generic.cpp | 37 const long loops_count = iceildiv(N, (int)4) - 1; in a64_smallK_hybrid_fp32_mla_6x4() 40 float nullbias[4]; in a64_smallK_hybrid_fp32_mla_6x4() 42 memset(nullbias, 0, (4 * sizeof(float))); in a64_smallK_hybrid_fp32_mla_6x4() 67 const uint64_t biasinc = bias ? 4*sizeof(float) : 0; in a64_smallK_hybrid_fp32_mla_6x4() 158 "fmla v26.4s, v18.4s, v0.s[0]\n" in a64_smallK_hybrid_fp32_mla_6x4() 159 "fmla v27.4s, v18.4s, v3.s[0]\n" in a64_smallK_hybrid_fp32_mla_6x4() 160 "fmla v28.4s, v18.4s, v6.s[0]\n" in a64_smallK_hybrid_fp32_mla_6x4() 161 "fmla v29.4s, v18.4s, v9.s[0]\n" in a64_smallK_hybrid_fp32_mla_6x4() 162 "fmla v30.4s, v18.4s, v12.s[0]\n" in a64_smallK_hybrid_fp32_mla_6x4() 163 "fmla v31.4s, v18.4s, v15.s[0]\n" in a64_smallK_hybrid_fp32_mla_6x4() [all …]
|
| /external/libhevc/common/arm64/ |
| D | ihevc_itrans_recon_32x32.s | 125 //d5[3]= 38 d7[3]=4 128 .align 4 169 ld1 {v0.4h, v1.4h, v2.4h, v3.4h},[x14],#32 170 ld1 {v4.4h, v5.4h, v6.4h, v7.4h},[x14],#32 195 mov x20,#4 208 ld1 {v10.4h},[x0],x6 209 ld1 {v8.4h},[x0],x6 210 ld1 {v11.4h},[x0],x6 211 ld1 {v9.4h},[x0],x6 213 smull v24.4s, v8.4h, v0.h[1] //// y1 * cos1(part of b0) [all …]
|
| /external/XNNPACK/src/qs8-igemm/ |
| D | 4x16c4-aarch64-neondot-ld128.S.in | 57 CMP x0, 4 // if mr < 4 82 # Load next 4 A pointers 101 B.LO 4f 112 SDOT v16.4s, v4.16b, v0.4b[0] 113 SDOT v17.4s, v4.16b, v1.4b[0] 115 SDOT v18.4s, v4.16b, v2.4b[0] 116 SDOT v19.4s, v4.16b, v3.4b[0] 117 SDOT v20.4s, v5.16b, v0.4b[0] 118 SDOT v21.4s, v5.16b, v1.4b[0] 119 SDOT v22.4s, v5.16b, v2.4b[0] [all …]
|
| D | 4x16c4-aarch64-neondot-cortex-a55.S.in | 14 $if DATATYPE == "qu8": REWIND_DECREMENT += 4 62 CMP x0, 4 // if mr < 4 87 # Load next 4 A pointers 120 # Main loop - 16 bytes of A in 4 groups. 121 # 4 row of 4 vectors wide = 16 sdot instructions for 4 channels 122 # 4 LD64 for A 123 # 4 LD128 for W. = 2 LD64 + INS. 124 # for each 4 sdot, 1 LD64 for A, 2 LD64 for W + INS. 129 SDOT v16.4s, v8.16b, v0.4b[0] 131 SDOT v17.4s, v8.16b, v1.4b[0] [all …]
|
| /external/XNNPACK/src/qs8-gemm/ |
| D | 4x16c4-aarch64-neondot-ld64.S.in | 59 CMP x0, 4 // if mr < 4 95 SDOT v16.4s, v4.16b, v0.4b[0] 96 SDOT v17.4s, v4.16b, v1.4b[0] 98 SDOT v18.4s, v4.16b, v2.4b[0] 99 SDOT v19.4s, v4.16b, v3.4b[0] 100 SDOT v20.4s, v5.16b, v0.4b[0] 101 SDOT v21.4s, v5.16b, v1.4b[0] 102 SDOT v22.4s, v5.16b, v2.4b[0] 103 SDOT v23.4s, v5.16b, v3.4b[0] 104 SDOT v24.4s, v6.16b, v0.4b[0] [all …]
|
| D | 4x16c4-aarch64-neondot-ld128.S.in | 45 # params structure is 4 bytes 85 CMP x0, 4 // if mr < 4 121 SDOT v16.4s, v4.16b, v0.4b[0] 122 SDOT v17.4s, v4.16b, v1.4b[0] 124 SDOT v18.4s, v4.16b, v2.4b[0] 125 SDOT v19.4s, v4.16b, v3.4b[0] 126 SDOT v20.4s, v5.16b, v0.4b[0] 127 SDOT v21.4s, v5.16b, v1.4b[0] 128 SDOT v22.4s, v5.16b, v2.4b[0] 129 SDOT v23.4s, v5.16b, v3.4b[0] [all …]
|
| /external/XNNPACK/src/qu8-gemm/ |
| D | 4x16c4-aarch64-neondot-ld128.S.in | 26 # uint8_t kernel_zero_point[4]; 37 # uint8_t kernel_zero_point[4]; 82 CMP x0, 4 // if mr < 4 86 LD1R {v8.4s}, [x11], 4 // kernel_zero_point 96 MOVI v12.4s, 0 97 MOVI v13.4s, 0 98 MOVI v14.4s, 0 99 MOVI v15.4s, 0 129 UDOT v12.4s, v8.16b, v0.16b // update zero point 130 UDOT v13.4s, v8.16b, v1.16b [all …]
|
| /external/XNNPACK/src/qu8-igemm/ |
| D | 4x16c4-aarch64-neondot-ld128.S.in | 56 CMP x0, 4 // if mr < 4 60 LD1R {v8.4s}, [x11], 4 // kernel_zero_point 67 MOVI v12.4s, 0 68 MOVI v13.4s, 0 69 MOVI v14.4s, 0 70 MOVI v15.4s, 0 90 # Load next 4 A pointers 109 B.LO 4f 121 UDOT v12.4s, v8.16b, v0.16b // update zero point 122 UDOT v13.4s, v8.16b, v1.16b [all …]
|
| /external/ComputeLibrary/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/ |
| D | generic_direct.cpp | 111 "ld1r { v18.4s }, [x24]\n" in a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst_direct_impl() 113 "ld1r { v17.4s }, [x21]\n" in a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst_direct_impl() 132 "cbz x19, 4f\n" in a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst_direct_impl() 135 "cmp x21, x19, LSL #4\n" in a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst_direct_impl() 146 "ld1 { v10.4s }, [x15]\n" in a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst_direct_impl() 148 "ld1 { v12.4s }, [x9]\n" in a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst_direct_impl() 152 "mov v31.16b, v16.16b\n fmla v31.4s, v8.4s, v9.4s\n" in a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst_direct_impl() 154 "mov v30.16b, v16.16b\n fmla v30.4s, v7.4s, v9.4s\n" in a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst_direct_impl() 156 "mov v29.16b, v16.16b\n fmla v29.4s, v6.4s, v9.4s\n" in a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst_direct_impl() 158 "mov v28.16b, v16.16b\n fmla v28.4s, v5.4s, v9.4s\n" in a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst_direct_impl() [all …]
|