1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package other; 18 19 /** 20 * Tests for dot product idiom vectorization: char and short case. 21 */ 22 public class TestCharShort { 23 24 public static final int ARRAY_SIZE = 1024; 25 26 /// CHECK-START: int other.TestCharShort.testDotProdSimple(short[], short[]) loop_optimization (before) 27 /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none 28 /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none 29 /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none 30 /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none 31 /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 32 /// CHECK-DAG: <<Get2:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 33 /// CHECK-DAG: <<Mul:i\d+>> Mul [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none 34 /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none 35 /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none 36 37 /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdSimple(short[], short[]) loop_optimization (after) 38 /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none 39 /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none 40 /// CHECK-IF: hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true' 41 // 42 // 16-bit DotProd is not supported for SVE. 43 /// CHECK-NOT: VecDotProd 44 // 45 /// CHECK-ELSE: 46 // 47 /// CHECK-DAG: <<Const8:i\d+>> IntConstant 8 loop:none 48 /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none 49 /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none 50 /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none 51 /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 52 /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 53 /// CHECK-DAG: VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Int16 loop:<<Loop>> outer_loop:none 54 /// CHECK-DAG: Add [<<Phi1>>,<<Const8>>] loop:<<Loop>> outer_loop:none 55 // 56 /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none 57 /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none 58 // 59 /// CHECK-FI: testDotProdSimple(short[] a, short[] b)60 public static final int testDotProdSimple(short[] a, short[] b) { 61 int s = 1; 62 for (int i = 0; i < b.length; i++) { 63 int temp = a[i] * b[i]; 64 s += temp; 65 } 66 return s - 1; 67 } 68 69 /// CHECK-START: int other.TestCharShort.testDotProdComplex(short[], short[]) loop_optimization (before) 70 /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none 71 /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none 72 /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none 73 /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none 74 /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 75 /// CHECK-DAG: <<AddC1:i\d+>> Add [<<Get1>>,<<Const1>>] loop:<<Loop>> outer_loop:none 76 /// CHECK-DAG: <<TypeC1:s\d+>> TypeConversion [<<AddC1>>] loop:<<Loop>> outer_loop:none 77 /// CHECK-DAG: <<Get2:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 78 /// CHECK-DAG: <<AddC2:i\d+>> Add [<<Get2>>,<<Const1>>] loop:<<Loop>> outer_loop:none 79 /// CHECK-DAG: <<TypeC2:s\d+>> TypeConversion [<<AddC2>>] loop:<<Loop>> outer_loop:none 80 /// CHECK-DAG: <<Mul:i\d+>> Mul [<<TypeC1>>,<<TypeC2>>] loop:<<Loop>> outer_loop:none 81 /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none 82 /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none 83 84 /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdComplex(short[], short[]) loop_optimization (after) 85 /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none 86 /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none 87 /// CHECK-IF: hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true' 88 // 89 // 16-bit DotProd is not supported for SVE. 90 /// CHECK-NOT: VecDotProd 91 // 92 /// CHECK-ELSE: 93 // 94 /// CHECK-DAG: <<Const8:i\d+>> IntConstant 8 loop:none 95 /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const1>>] loop:none 96 /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none 97 /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none 98 /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none 99 /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 100 /// CHECK-DAG: <<VAdd1:d\d+>> VecAdd [<<Load1>>,<<Repl>>] loop:<<Loop>> outer_loop:none 101 /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 102 /// CHECK-DAG: <<VAdd2:d\d+>> VecAdd [<<Load2>>,<<Repl>>] loop:<<Loop>> outer_loop:none 103 /// CHECK-DAG: VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Int16 loop:<<Loop>> outer_loop:none 104 /// CHECK-DAG: Add [<<Phi1>>,<<Const8>>] loop:<<Loop>> outer_loop:none 105 // 106 /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none 107 /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none 108 // 109 /// CHECK-FI: testDotProdComplex(short[] a, short[] b)110 public static final int testDotProdComplex(short[] a, short[] b) { 111 int s = 1; 112 for (int i = 0; i < b.length; i++) { 113 int temp = ((short)(a[i] + 1)) * ((short)(b[i] + 1)); 114 s += temp; 115 } 116 return s - 1; 117 } 118 119 /// CHECK-START: int other.TestCharShort.testDotProdSimpleUnsigned(char[], char[]) loop_optimization (before) 120 /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none 121 /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none 122 /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none 123 /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none 124 /// CHECK-DAG: <<Get1:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 125 /// CHECK-DAG: <<Get2:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 126 /// CHECK-DAG: <<Mul:i\d+>> Mul [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none 127 /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none 128 /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none 129 130 /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdSimpleUnsigned(char[], char[]) loop_optimization (after) 131 /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none 132 /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none 133 /// CHECK-IF: hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true' 134 // 135 // 16-bit DotProd is not supported for SVE. 136 /// CHECK-NOT: VecDotProd 137 // 138 /// CHECK-ELSE: 139 // 140 /// CHECK-DAG: <<Const8:i\d+>> IntConstant 8 loop:none 141 /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none 142 /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none 143 /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none 144 /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 145 /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 146 /// CHECK-DAG: VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Uint16 loop:<<Loop>> outer_loop:none 147 /// CHECK-DAG: Add [<<Phi1>>,<<Const8>>] loop:<<Loop>> outer_loop:none 148 // 149 /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none 150 /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none 151 // 152 /// CHECK-FI: testDotProdSimpleUnsigned(char[] a, char[] b)153 public static final int testDotProdSimpleUnsigned(char[] a, char[] b) { 154 int s = 1; 155 for (int i = 0; i < b.length; i++) { 156 int temp = a[i] * b[i]; 157 s += temp; 158 } 159 return s - 1; 160 } 161 162 /// CHECK-START: int other.TestCharShort.testDotProdComplexUnsigned(char[], char[]) loop_optimization (before) 163 /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none 164 /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none 165 /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none 166 /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none 167 /// CHECK-DAG: <<Get1:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 168 /// CHECK-DAG: <<AddC:i\d+>> Add [<<Get1>>,<<Const1>>] loop:<<Loop>> outer_loop:none 169 /// CHECK-DAG: <<TypeC1:c\d+>> TypeConversion [<<AddC>>] loop:<<Loop>> outer_loop:none 170 /// CHECK-DAG: <<Get2:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 171 /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>] loop:<<Loop>> outer_loop:none 172 /// CHECK-DAG: <<TypeC2:c\d+>> TypeConversion [<<AddGets>>] loop:<<Loop>> outer_loop:none 173 /// CHECK-DAG: <<Mul:i\d+>> Mul [<<TypeC1>>,<<TypeC2>>] loop:<<Loop>> outer_loop:none 174 /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none 175 /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none 176 177 /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdComplexUnsigned(char[], char[]) loop_optimization (after) 178 /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none 179 /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none 180 /// CHECK-IF: hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true' 181 // 182 // 16-bit DotProd is not supported for SVE. 183 /// CHECK-NOT: VecDotProd 184 // 185 /// CHECK-ELSE: 186 // 187 /// CHECK-DAG: <<Const8:i\d+>> IntConstant 8 loop:none 188 /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const1>>] loop:none 189 /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none 190 /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none 191 /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none 192 /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 193 /// CHECK-DAG: <<VAdd1:d\d+>> VecAdd [<<Load1>>,<<Repl>>] loop:<<Loop>> outer_loop:none 194 /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 195 /// CHECK-DAG: <<VAdd2:d\d+>> VecAdd [<<Load2>>,<<Repl>>] loop:<<Loop>> outer_loop:none 196 /// CHECK-DAG: VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Uint16 loop:<<Loop>> outer_loop:none 197 /// CHECK-DAG: Add [<<Phi1>>,<<Const8>>] loop:<<Loop>> outer_loop:none 198 // 199 /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none 200 /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none 201 // 202 /// CHECK-FI: testDotProdComplexUnsigned(char[] a, char[] b)203 public static final int testDotProdComplexUnsigned(char[] a, char[] b) { 204 int s = 1; 205 for (int i = 0; i < b.length; i++) { 206 int temp = ((char)(a[i] + 1)) * ((char)(b[i] + 1)); 207 s += temp; 208 } 209 return s - 1; 210 } 211 212 /// CHECK-START: int other.TestCharShort.testDotProdComplexUnsignedCastToSigned(char[], char[]) loop_optimization (before) 213 /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none 214 /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none 215 /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none 216 /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none 217 /// CHECK-DAG: <<Get1:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 218 /// CHECK-DAG: <<AddC:i\d+>> Add [<<Get1>>,<<Const1>>] loop:<<Loop>> outer_loop:none 219 /// CHECK-DAG: <<TypeC1:s\d+>> TypeConversion [<<AddC>>] loop:<<Loop>> outer_loop:none 220 /// CHECK-DAG: <<Get2:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 221 /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>] loop:<<Loop>> outer_loop:none 222 /// CHECK-DAG: <<TypeC2:s\d+>> TypeConversion [<<AddGets>>] loop:<<Loop>> outer_loop:none 223 /// CHECK-DAG: <<Mul:i\d+>> Mul [<<TypeC1>>,<<TypeC2>>] loop:<<Loop>> outer_loop:none 224 /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none 225 /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none 226 227 /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdComplexUnsignedCastToSigned(char[], char[]) loop_optimization (after) 228 /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none 229 /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none 230 /// CHECK-IF: hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true' 231 // 232 // 16-bit DotProd is not supported for SVE. 233 /// CHECK-NOT: VecDotProd 234 // 235 /// CHECK-ELSE: 236 // 237 /// CHECK-DAG: <<Const8:i\d+>> IntConstant 8 loop:none 238 /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const1>>] loop:none 239 /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none 240 /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none 241 /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none 242 /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 243 /// CHECK-DAG: <<VAdd1:d\d+>> VecAdd [<<Load1>>,<<Repl>>] loop:<<Loop>> outer_loop:none 244 /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 245 /// CHECK-DAG: <<VAdd2:d\d+>> VecAdd [<<Load2>>,<<Repl>>] loop:<<Loop>> outer_loop:none 246 /// CHECK-DAG: VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Int16 loop:<<Loop>> outer_loop:none 247 /// CHECK-DAG: Add [<<Phi1>>,<<Const8>>] loop:<<Loop>> outer_loop:none 248 // 249 /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none 250 /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none 251 // 252 /// CHECK-FI: testDotProdComplexUnsignedCastToSigned(char[] a, char[] b)253 public static final int testDotProdComplexUnsignedCastToSigned(char[] a, char[] b) { 254 int s = 1; 255 for (int i = 0; i < b.length; i++) { 256 int temp = ((short)(a[i] + 1)) * ((short)(b[i] + 1)); 257 s += temp; 258 } 259 return s - 1; 260 } 261 262 /// CHECK-START: int other.TestCharShort.testDotProdComplexSignedCastToUnsigned(short[], short[]) loop_optimization (before) 263 /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none 264 /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none 265 /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none 266 /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none 267 /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 268 /// CHECK-DAG: <<AddC:i\d+>> Add [<<Get1>>,<<Const1>>] loop:<<Loop>> outer_loop:none 269 /// CHECK-DAG: <<TypeC1:c\d+>> TypeConversion [<<AddC>>] loop:<<Loop>> outer_loop:none 270 /// CHECK-DAG: <<Get2:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 271 /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>] loop:<<Loop>> outer_loop:none 272 /// CHECK-DAG: <<TypeC2:c\d+>> TypeConversion [<<AddGets>>] loop:<<Loop>> outer_loop:none 273 /// CHECK-DAG: <<Mul:i\d+>> Mul [<<TypeC1>>,<<TypeC2>>] loop:<<Loop>> outer_loop:none 274 /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none 275 /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none 276 277 /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdComplexSignedCastToUnsigned(short[], short[]) loop_optimization (after) 278 /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none 279 /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none 280 /// CHECK-IF: hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true' 281 // 282 // 16-bit DotProd is not supported for SVE. 283 /// CHECK-NOT: VecDotProd 284 // 285 /// CHECK-ELSE: 286 // 287 /// CHECK-DAG: <<Const8:i\d+>> IntConstant 8 loop:none 288 /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const1>>] loop:none 289 /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none 290 /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none 291 /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none 292 /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 293 /// CHECK-DAG: <<VAdd1:d\d+>> VecAdd [<<Load1>>,<<Repl>>] loop:<<Loop>> outer_loop:none 294 /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 295 /// CHECK-DAG: <<VAdd2:d\d+>> VecAdd [<<Load2>>,<<Repl>>] loop:<<Loop>> outer_loop:none 296 /// CHECK-DAG: VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Uint16 loop:<<Loop>> outer_loop:none 297 /// CHECK-DAG: Add [<<Phi1>>,<<Const8>>] loop:<<Loop>> outer_loop:none 298 // 299 /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none 300 /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none 301 // 302 /// CHECK-FI: testDotProdComplexSignedCastToUnsigned(short[] a, short[] b)303 public static final int testDotProdComplexSignedCastToUnsigned(short[] a, short[] b) { 304 int s = 1; 305 for (int i = 0; i < b.length; i++) { 306 int temp = ((char)(a[i] + 1)) * ((char)(b[i] + 1)); 307 s += temp; 308 } 309 return s - 1; 310 } 311 312 /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdSignedToInt(short[], short[]) loop_optimization (after) 313 /// CHECK-IF: hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true' 314 // 315 // 16-bit DotProd is not supported for SVE. 316 /// CHECK-NOT: VecDotProd 317 // 318 /// CHECK-ELSE: 319 // 320 /// CHECK-DAG: VecDotProd type:Int16 321 // 322 /// CHECK-FI: testDotProdSignedToInt(short[] a, short[] b)323 public static final int testDotProdSignedToInt(short[] a, short[] b) { 324 int s = 1; 325 for (int i = 0; i < b.length; i++) { 326 int temp = ((int)(a[i])) * ((int)(b[i])); 327 s += temp; 328 } 329 return s - 1; 330 } 331 332 /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdParamSigned(int, short[]) loop_optimization (after) 333 /// CHECK-IF: hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true' 334 // 335 // 16-bit DotProd is not supported for SVE. 336 /// CHECK-NOT: VecDotProd 337 // 338 /// CHECK-ELSE: 339 // 340 /// CHECK-DAG: VecDotProd type:Int16 341 // 342 /// CHECK-FI: testDotProdParamSigned(int x, short[] b)343 public static final int testDotProdParamSigned(int x, short[] b) { 344 int s = 1; 345 for (int i = 0; i < b.length; i++) { 346 int temp = (short)(x) * b[i]; 347 s += temp; 348 } 349 return s - 1; 350 } 351 352 /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdParamUnsigned(int, char[]) loop_optimization (after) 353 /// CHECK-IF: hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true' 354 // 355 // 16-bit DotProd is not supported for SVE. 356 /// CHECK-NOT: VecDotProd 357 // 358 /// CHECK-ELSE: 359 // 360 /// CHECK-DAG: VecDotProd type:Uint16 361 // 362 /// CHECK-FI: testDotProdParamUnsigned(int x, char[] b)363 public static final int testDotProdParamUnsigned(int x, char[] b) { 364 int s = 1; 365 for (int i = 0; i < b.length; i++) { 366 int temp = (char)(x) * b[i]; 367 s += temp; 368 } 369 return s - 1; 370 } 371 372 /// CHECK-START: int other.TestCharShort.testDotProdIntParam(int, short[]) loop_optimization (after) 373 /// CHECK-NOT: VecDotProd testDotProdIntParam(int x, short[] b)374 public static final int testDotProdIntParam(int x, short[] b) { 375 int s = 1; 376 for (int i = 0; i < b.length; i++) { 377 int temp = b[i] * (x); 378 s += temp; 379 } 380 return s - 1; 381 } 382 383 /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdSignedToChar(short[], short[]) loop_optimization (after) 384 /// CHECK-IF: hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true' 385 // 386 // 16-bit DotProd is not supported for SVE. 387 /// CHECK-NOT: VecDotProd 388 // 389 /// CHECK-ELSE: 390 // 391 /// CHECK-DAG: VecDotProd type:Uint16 392 // 393 /// CHECK-FI: testDotProdSignedToChar(short[] a, short[] b)394 public static final int testDotProdSignedToChar(short[] a, short[] b) { 395 int s = 1; 396 for (int i = 0; i < b.length; i++) { 397 int temp = ((char)(a[i])) * ((char)(b[i])); 398 s += temp; 399 } 400 return s - 1; 401 } 402 403 // Cases when result of Mul is type-converted are not supported. 404 405 /// CHECK-START: int other.TestCharShort.testDotProdSimpleMulCastToSigned(short[], short[]) loop_optimization (after) 406 /// CHECK-NOT: VecDotProd type:Uint16 testDotProdSimpleMulCastToSigned(short[] a, short[] b)407 public static final int testDotProdSimpleMulCastToSigned(short[] a, short[] b) { 408 int s = 1; 409 for (int i = 0; i < b.length; i++) { 410 short temp = (short)(a[i] * b[i]); 411 s += temp; 412 } 413 return s - 1; 414 } 415 416 /// CHECK-START: int other.TestCharShort.testDotProdSimpleMulCastToUnsigned(short[], short[]) loop_optimization (after) 417 /// CHECK-NOT: VecDotProd testDotProdSimpleMulCastToUnsigned(short[] a, short[] b)418 public static final int testDotProdSimpleMulCastToUnsigned(short[] a, short[] b) { 419 int s = 1; 420 for (int i = 0; i < b.length; i++) { 421 char temp = (char)(a[i] * b[i]); 422 s += temp; 423 } 424 return s - 1; 425 } 426 427 /// CHECK-START: int other.TestCharShort.testDotProdSimpleUnsignedMulCastToSigned(char[], char[]) loop_optimization (after) 428 /// CHECK-NOT: VecDotProd testDotProdSimpleUnsignedMulCastToSigned(char[] a, char[] b)429 public static final int testDotProdSimpleUnsignedMulCastToSigned(char[] a, char[] b) { 430 int s = 1; 431 for (int i = 0; i < b.length; i++) { 432 short temp = (short)(a[i] * b[i]); 433 s += temp; 434 } 435 return s - 1; 436 } 437 438 /// CHECK-START: int other.TestCharShort.testDotProdSimpleUnsignedMulCastToUnsigned(char[], char[]) loop_optimization (after) 439 /// CHECK-NOT: VecDotProd testDotProdSimpleUnsignedMulCastToUnsigned(char[] a, char[] b)440 public static final int testDotProdSimpleUnsignedMulCastToUnsigned(char[] a, char[] b) { 441 int s = 1; 442 for (int i = 0; i < b.length; i++) { 443 char temp = (char)(a[i] * b[i]); 444 s += temp; 445 } 446 return s - 1; 447 } 448 449 /// CHECK-START: int other.TestCharShort.testDotProdSimpleCastToShort(short[], short[]) loop_optimization (after) 450 /// CHECK-NOT: VecDotProd testDotProdSimpleCastToShort(short[] a, short[] b)451 public static final int testDotProdSimpleCastToShort(short[] a, short[] b) { 452 int s = 1; 453 for (int i = 0; i < b.length; i++) { 454 short temp = (short)(a[i] * b[i]); 455 s += temp; 456 } 457 return s - 1; 458 } 459 460 /// CHECK-START: int other.TestCharShort.testDotProdSimpleCastToChar(short[], short[]) loop_optimization (after) 461 /// CHECK-NOT: VecDotProd testDotProdSimpleCastToChar(short[] a, short[] b)462 public static final int testDotProdSimpleCastToChar(short[] a, short[] b) { 463 int s = 1; 464 for (int i = 0; i < b.length; i++) { 465 char temp = (char)(a[i] * b[i]); 466 s += temp; 467 } 468 return s - 1; 469 } 470 471 /// CHECK-START: int other.TestCharShort.testDotProdSimpleUnsignedCastToShort(char[], char[]) loop_optimization (after) 472 /// CHECK-NOT: VecDotProd testDotProdSimpleUnsignedCastToShort(char[] a, char[] b)473 public static final int testDotProdSimpleUnsignedCastToShort(char[] a, char[] b) { 474 int s = 1; 475 for (int i = 0; i < b.length; i++) { 476 short temp = (short)(a[i] * b[i]); 477 s += temp; 478 } 479 return s - 1; 480 } 481 482 /// CHECK-START: int other.TestCharShort.testDotProdSimpleUnsignedCastToChar(char[], char[]) loop_optimization (after) 483 /// CHECK-NOT: VecDotProd testDotProdSimpleUnsignedCastToChar(char[] a, char[] b)484 public static final int testDotProdSimpleUnsignedCastToChar(char[] a, char[] b) { 485 int s = 1; 486 for (int i = 0; i < b.length; i++) { 487 char temp = (char)(a[i] * b[i]); 488 s += temp; 489 } 490 return s - 1; 491 } 492 493 /// CHECK-START: int other.TestCharShort.testDotProdSimpleUnsignedCastToLong(char[], char[]) loop_optimization (after) 494 /// CHECK-NOT: VecDotProd testDotProdSimpleUnsignedCastToLong(char[] a, char[] b)495 public static final int testDotProdSimpleUnsignedCastToLong(char[] a, char[] b) { 496 int s = 1; 497 for (int i = 0; i < b.length; i++) { 498 long temp = (long)(a[i] * b[i]); 499 s += temp; 500 } 501 return s - 1; 502 } 503 504 // Narrowing conversions. 505 506 /// CHECK-START: int other.TestCharShort.testDotProdSignedNarrowerSigned(short[], short[]) loop_optimization (after) 507 /// CHECK-NOT: VecDotProd testDotProdSignedNarrowerSigned(short[] a, short[] b)508 public static final int testDotProdSignedNarrowerSigned(short[] a, short[] b) { 509 int s = 1; 510 for (int i = 0; i < b.length; i++) { 511 int temp = ((byte)(a[i])) * ((byte)(b[i])); 512 s += temp; 513 } 514 return s - 1; 515 } 516 517 /// CHECK-START: int other.TestCharShort.testDotProdSignedNarrowerUnsigned(short[], short[]) loop_optimization (after) 518 /// CHECK-NOT: VecDotProd testDotProdSignedNarrowerUnsigned(short[] a, short[] b)519 public static final int testDotProdSignedNarrowerUnsigned(short[] a, short[] b) { 520 int s = 1; 521 for (int i = 0; i < b.length; i++) { 522 int temp = (a[i] & 0xff) * (b[i] & 0xff); 523 s += temp; 524 } 525 return s - 1; 526 } 527 528 /// CHECK-START: int other.TestCharShort.testDotProdUnsignedNarrowerSigned(char[], char[]) loop_optimization (after) 529 /// CHECK-NOT: VecDotProd testDotProdUnsignedNarrowerSigned(char[] a, char[] b)530 public static final int testDotProdUnsignedNarrowerSigned(char[] a, char[] b) { 531 int s = 1; 532 for (int i = 0; i < b.length; i++) { 533 int temp = ((byte)(a[i])) * ((byte)(b[i])); 534 s += temp; 535 } 536 return s - 1; 537 } 538 539 /// CHECK-START: int other.TestCharShort.testDotProdUnsignedNarrowerUnsigned(char[], char[]) loop_optimization (after) 540 /// CHECK-NOT: VecDotProd testDotProdUnsignedNarrowerUnsigned(char[] a, char[] b)541 public static final int testDotProdUnsignedNarrowerUnsigned(char[] a, char[] b) { 542 int s = 1; 543 for (int i = 0; i < b.length; i++) { 544 int temp = (a[i] & 0xff) * (b[i] & 0xff); 545 s += temp; 546 } 547 return s - 1; 548 } 549 550 /// CHECK-START: int other.TestCharShort.testDotProdUnsignedSigned(char[], short[]) loop_optimization (after) 551 /// CHECK-NOT: VecDotProd testDotProdUnsignedSigned(char[] a, short[] b)552 public static final int testDotProdUnsignedSigned(char[] a, short[] b) { 553 int s = 1; 554 for (int i = 0; i < b.length; i++) { 555 int temp = a[i] * b[i]; 556 s += temp; 557 } 558 return s - 1; 559 } 560 expectEquals(int expected, int result)561 private static void expectEquals(int expected, int result) { 562 if (expected != result) { 563 throw new Error("Expected: " + expected + ", found: " + result); 564 } 565 } 566 testDotProd(short[] s1, short[] s2, char[] c1, char[] c2, int[] results)567 private static void testDotProd(short[] s1, short[] s2, char[] c1, char[] c2, int[] results) { 568 expectEquals(results[0], testDotProdSimple(s1, s2)); 569 expectEquals(results[1], testDotProdComplex(s1, s2)); 570 expectEquals(results[2], testDotProdSimpleUnsigned(c1, c2)); 571 expectEquals(results[3], testDotProdComplexUnsigned(c1, c2)); 572 expectEquals(results[4], testDotProdComplexUnsignedCastToSigned(c1, c2)); 573 expectEquals(results[5], testDotProdComplexSignedCastToUnsigned(s1, s2)); 574 expectEquals(results[6], testDotProdSignedToInt(s1, s2)); 575 expectEquals(results[7], testDotProdParamSigned(-32768, s2)); 576 expectEquals(results[8], testDotProdParamUnsigned(-32768, c2)); 577 expectEquals(results[9], testDotProdIntParam(-32768, s2)); 578 expectEquals(results[10], testDotProdSignedToChar(s1, s2)); 579 expectEquals(results[11], testDotProdSimpleMulCastToSigned(s1, s2)); 580 expectEquals(results[12], testDotProdSimpleMulCastToUnsigned(s1, s2)); 581 expectEquals(results[13], testDotProdSimpleUnsignedMulCastToSigned(c1, c2)); 582 expectEquals(results[14], testDotProdSimpleUnsignedMulCastToUnsigned(c1, c2)); 583 expectEquals(results[15], testDotProdSimpleCastToShort(s1, s2)); 584 expectEquals(results[16], testDotProdSimpleCastToChar(s1, s2)); 585 expectEquals(results[17], testDotProdSimpleUnsignedCastToShort(c1, c2)); 586 expectEquals(results[18], testDotProdSimpleUnsignedCastToChar(c1, c2)); 587 expectEquals(results[19], testDotProdSimpleUnsignedCastToLong(c1, c2)); 588 expectEquals(results[20], testDotProdSignedNarrowerSigned(s1, s2)); 589 expectEquals(results[21], testDotProdSignedNarrowerUnsigned(s1, s2)); 590 expectEquals(results[22], testDotProdUnsignedNarrowerSigned(c1, c2)); 591 expectEquals(results[23], testDotProdUnsignedNarrowerUnsigned(c1, c2)); 592 expectEquals(results[24], testDotProdUnsignedSigned(c1, s2)); 593 } 594 run()595 public static void run() { 596 final short MAX_S = Short.MAX_VALUE; 597 final short MIN_S = Short.MAX_VALUE; 598 599 short[] s1_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S }; 600 short[] s2_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S }; 601 char[] c1_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S }; 602 char[] c2_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S }; 603 int[] results_1 = { 2147352578, -2147483634, 2147352578, -2147483634, -2147483634, -2147483634, 604 2147352578, -2147418112, 2147418112, -2147418112, 2147352578, 605 2, 2, 2, 2, 2, 2, 2, 2, 2147352578, 2, 130050, 2, 130050, 2147352578 }; 606 testDotProd(s1_1, s2_1, c1_1, c2_1, results_1); 607 608 short[] s1_2 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S, MAX_S, MAX_S }; 609 short[] s2_2 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S, MAX_S, MAX_S }; 610 char[] c1_2 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S, MAX_S, MAX_S }; 611 char[] c2_2 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S, MAX_S, MAX_S }; 612 int[] results_2 = { -262140, 12, -262140, 12, 12, 12, -262140, 131072, -131072, 131072, 613 -262140, 4, 4, 4, 4, 4, 4, 4, 4, -262140, 4, 260100, 4, 260100, -262140 }; 614 testDotProd(s1_2, s2_2, c1_2, c2_2, results_2); 615 616 short[] s1_3 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; 617 short[] s2_3 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S }; 618 char[] c1_3 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; 619 char[] c2_3 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S }; 620 int[] results_3 = { 2147352578, -2147483634, 2147352578, -2147483634, -2147483634, 621 -2147483634, 2147352578, -2147418112, 2147418112, -2147418112, 622 2147352578, 2, 2, 2, 2, 2, 2, 2, 2, 2147352578, 2, 130050, 2, 623 130050, 2147352578}; 624 testDotProd(s1_3, s2_3, c1_3, c2_3, results_3); 625 626 627 short[] s1_4 = { MIN_S, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; 628 short[] s2_4 = { MIN_S, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; 629 char[] c1_4 = { MIN_S, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; 630 char[] c2_4 = { MIN_S, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; 631 int[] results_4 = { -1073938429, -1073741811, -1073938429, -1073741811, -1073741811, 632 -1073741811, -1073938429, 1073840128, -1073840128, 1073840128, 633 -1073938429, 3, 3, 3, 3, 3, 3, 3, 3, -1073938429, 3, 195075, 3, 634 195075, -1073938429 }; 635 testDotProd(s1_4, s2_4, c1_4, c2_4, results_4); 636 } 637 main(String[] args)638 public static void main(String[] args) { 639 run(); 640 } 641 } 642