• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /**
18  * Tests for halving-add idiomatic vectorization.
19  *
20  * Alternative version expressed with logical shift right
21  * in the higher precision (has no impact on idiom).
22  */
23 public class HaddAltByte {
24 
25   private static final int N = 256;
26   private static final int M = N * N + 15;
27 
28   static byte[] sB1 = new byte[M];
29   static byte[] sB2 = new byte[M];
30   static byte[] sBo = new byte[M];
31 
32   /// CHECK-START: void HaddAltByte.halving_add_signed(byte[], byte[], byte[]) loop_optimization (before)
33   /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
34   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
35   /// CHECK-DAG: <<Get1:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
36   /// CHECK-DAG: <<Get2:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
37   /// CHECK-DAG: <<Add:i\d+>>  Add [<<Get1>>,<<Get2>>]             loop:<<Loop>>      outer_loop:none
38   /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
39   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
40   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
41   //
42   /// CHECK-START-ARM: void HaddAltByte.halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
43   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
44   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
45   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none
46   /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
47   //
48   /// CHECK-START-ARM64: void HaddAltByte.halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
49   /// CHECK-IF:     hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true'
50   //
51   //      HalvingAdd idiom is not supported for SVE.
52   ///     CHECK-NOT: VecHalvingAdd
53   //
54   /// CHECK-ELSE:
55   //
56   ///     CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
57   ///     CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
58   ///     CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none
59   ///     CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
60   //
61   /// CHECK-FI:
halving_add_signed(byte[] b1, byte[] b2, byte[] bo)62   private static void halving_add_signed(byte[] b1, byte[] b2, byte[] bo) {
63     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
64     for (int i = 0; i < min_length; i++) {
65       bo[i] = (byte) ((b1[i] + b2[i]) >>> 1);
66     }
67   }
68 
69   /// CHECK-START: void HaddAltByte.halving_add_unsigned(byte[], byte[], byte[]) instruction_simplifier (before)
70   /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
71   /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                     loop:none
72   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
73   /// CHECK-DAG: <<Get1:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
74   /// CHECK-DAG: <<Get2:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
75   /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<I255>>]             loop:<<Loop>>      outer_loop:none
76   /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<I255>>]             loop:<<Loop>>      outer_loop:none
77   /// CHECK-DAG: <<Add:i\d+>>  Add [<<And1>>,<<And2>>]             loop:<<Loop>>      outer_loop:none
78   /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
79   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
80   /// CHECK-DAG:               ArraySet [{{l\d+}},{{i\d+}},<<Cnv>>] loop:<<Loop>>      outer_loop:none
81   //
82   /// CHECK-START: void HaddAltByte.halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (before)
83   /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
84   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
85   /// CHECK-DAG: <<Get1:a\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
86   /// CHECK-DAG: <<Get2:a\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
87   /// CHECK-DAG: <<Add:i\d+>>  Add [<<Get1>>,<<Get2>>]             loop:<<Loop>>      outer_loop:none
88   /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
89   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
90   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
91   //
92   /// CHECK-START-ARM: void HaddAltByte.halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
93   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
94   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
95   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none
96   /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
97   //
98   /// CHECK-START-ARM64: void HaddAltByte.halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
99   /// CHECK-IF:     hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true'
100   //
101   //      HalvingAdd idiom is not supported for SVE.
102   ///     CHECK-NOT: VecHalvingAdd
103   //
104   /// CHECK-ELSE:
105   //
106   ///     CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
107   ///     CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
108   ///     CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none
109   ///     CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
110   //
111   /// CHECK-FI:
halving_add_unsigned(byte[] b1, byte[] b2, byte[] bo)112   private static void halving_add_unsigned(byte[] b1, byte[] b2, byte[] bo) {
113     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
114     for (int i = 0; i < min_length; i++) {
115       bo[i] = (byte) (((b1[i] & 0xff) + (b2[i] & 0xff)) >>> 1);
116     }
117   }
118 
119   /// CHECK-START: void HaddAltByte.rounding_halving_add_signed(byte[], byte[], byte[]) loop_optimization (before)
120   /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
121   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
122   /// CHECK-DAG: <<Get1:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
123   /// CHECK-DAG: <<Get2:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
124   /// CHECK-DAG: <<Add1:i\d+>> Add [<<Get1>>,<<Get2>>]             loop:<<Loop>>      outer_loop:none
125   /// CHECK-DAG: <<Add2:i\d+>> Add [<<Add1>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
126   /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add2>>,<<I1>>]              loop:<<Loop>>      outer_loop:none
127   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
128   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
129   //
130   /// CHECK-START-ARM: void HaddAltByte.rounding_halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
131   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
132   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
133   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:true loop:<<Loop>> outer_loop:none
134   /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
135   //
136   /// CHECK-START-ARM64: void HaddAltByte.rounding_halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
137   /// CHECK-IF:     hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true'
138   //
139   //      HalvingAdd idiom is not supported for SVE.
140   ///     CHECK-NOT: VecHalvingAdd
141   //
142   /// CHECK-ELSE:
143   //
144   ///     CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
145   ///     CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
146   ///     CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:true loop:<<Loop>> outer_loop:none
147   ///     CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
148   //
149   /// CHECK-FI:
rounding_halving_add_signed(byte[] b1, byte[] b2, byte[] bo)150   private static void rounding_halving_add_signed(byte[] b1, byte[] b2, byte[] bo) {
151     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
152     for (int i = 0; i < min_length; i++) {
153       bo[i] = (byte) ((b1[i] + b2[i] + 1) >>> 1);
154     }
155   }
156 
157   /// CHECK-START: void HaddAltByte.rounding_halving_add_unsigned(byte[], byte[], byte[]) instruction_simplifier (before)
158   /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
159   /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                     loop:none
160   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
161   /// CHECK-DAG: <<Get1:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
162   /// CHECK-DAG: <<Get2:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
163   /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<I255>>]             loop:<<Loop>>      outer_loop:none
164   /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<I255>>]             loop:<<Loop>>      outer_loop:none
165   /// CHECK-DAG: <<Add1:i\d+>> Add [<<And1>>,<<And2>>]             loop:<<Loop>>      outer_loop:none
166   /// CHECK-DAG: <<Add2:i\d+>> Add [<<Add1>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
167   /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add2>>,<<I1>>]              loop:<<Loop>>      outer_loop:none
168   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
169   /// CHECK-DAG:               ArraySet [{{l\d+}},{{i\d+}},<<Cnv>>] loop:<<Loop>>      outer_loop:none
170   //
171   /// CHECK-START: void HaddAltByte.rounding_halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (before)
172   /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
173   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
174   /// CHECK-DAG: <<Get1:a\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
175   /// CHECK-DAG: <<Get2:a\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
176   /// CHECK-DAG: <<Add1:i\d+>> Add [<<Get1>>,<<Get2>>]             loop:<<Loop>>      outer_loop:none
177   /// CHECK-DAG: <<Add2:i\d+>> Add [<<Add1>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
178   /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add2>>,<<I1>>]              loop:<<Loop>>      outer_loop:none
179   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
180   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
181   //
182   /// CHECK-START-ARM: void HaddAltByte.rounding_halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
183   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
184   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
185   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  packed_type:Uint8 rounded:true loop:<<Loop>> outer_loop:none
186   /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
187   //
188   /// CHECK-START-ARM64: void HaddAltByte.rounding_halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
189   /// CHECK-IF:     hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true'
190   //
191   //      HalvingAdd idiom is not supported for SVE.
192   ///     CHECK-NOT: VecHalvingAdd
193   //
194   /// CHECK-ELSE:
195   //
196   ///     CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
197   ///     CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
198   ///     CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  packed_type:Uint8 rounded:true loop:<<Loop>> outer_loop:none
199   ///     CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
200   //
201   /// CHECK-FI:
rounding_halving_add_unsigned(byte[] b1, byte[] b2, byte[] bo)202   private static void rounding_halving_add_unsigned(byte[] b1, byte[] b2, byte[] bo) {
203     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
204     for (int i = 0; i < min_length; i++) {
205       bo[i] = (byte) (((b1[i] & 0xff) + (b2[i] & 0xff) + 1) >>> 1);
206     }
207   }
208 
209   /// CHECK-START: void HaddAltByte.halving_add_signed_constant(byte[], byte[]) loop_optimization (before)
210   /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
211   /// CHECK-DAG: <<I127:i\d+>> IntConstant 127                     loop:none
212   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
213   /// CHECK-DAG: <<Get:b\d+>>  ArrayGet                            loop:<<Loop>>      outer_loop:none
214   /// CHECK-DAG: <<Add:i\d+>>  Add [<<Get>>,<<I127>>]              loop:<<Loop>>      outer_loop:none
215   /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
216   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
217   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
218   //
219   /// CHECK-START-ARM: void HaddAltByte.halving_add_signed_constant(byte[], byte[]) loop_optimization (after)
220   /// CHECK-DAG: <<I127:i\d+>> IntConstant 127                       loop:none
221   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I127>>]         loop:none
222   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
223   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none
224   /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
225   //
226   /// CHECK-START-ARM64: void HaddAltByte.halving_add_signed_constant(byte[], byte[]) loop_optimization (after)
227   /// CHECK-IF:     hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true'
228   //
229   //      HalvingAdd idiom is not supported for SVE.
230   ///     CHECK-NOT: VecHalvingAdd
231   //
232   /// CHECK-ELSE:
233   //
234   ///     CHECK-DAG: <<I127:i\d+>> IntConstant 127                       loop:none
235   ///     CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I127>>]         loop:none
236   ///     CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
237   ///     CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none
238   ///     CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
239   //
240   /// CHECK-FI:
halving_add_signed_constant(byte[] b1, byte[] bo)241   private static void halving_add_signed_constant(byte[] b1, byte[] bo) {
242     int min_length = Math.min(bo.length, b1.length);
243     for (int i = 0; i < min_length; i++) {
244       bo[i] = (byte) ((b1[i] + 0x7f) >>> 1);
245     }
246   }
247 
248   /// CHECK-START: void HaddAltByte.halving_add_unsigned_constant(byte[], byte[]) instruction_simplifier (before)
249   /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
250   /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                     loop:none
251   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
252   /// CHECK-DAG: <<Get:b\d+>>  ArrayGet                            loop:<<Loop>>      outer_loop:none
253   /// CHECK-DAG: <<And:i\d+>>  And [<<Get>>,<<I255>>]              loop:<<Loop>>      outer_loop:none
254   /// CHECK-DAG: <<Add:i\d+>>  Add [<<And>>,<<I255>>]              loop:<<Loop>>      outer_loop:none
255   /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
256   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
257   /// CHECK-DAG:               ArraySet [{{l\d+}},{{i\d+}},<<Cnv>>] loop:<<Loop>>      outer_loop:none
258   //
259   /// CHECK-START: void HaddAltByte.halving_add_unsigned_constant(byte[], byte[]) loop_optimization (before)
260   /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
261   /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                     loop:none
262   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
263   /// CHECK-DAG: <<Get:a\d+>>  ArrayGet                            loop:<<Loop>>      outer_loop:none
264   /// CHECK-DAG: <<Add:i\d+>>  Add [<<Get>>,<<I255>>]              loop:<<Loop>>      outer_loop:none
265   /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Add>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
266   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
267   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
268   //
269   /// CHECK-START-ARM: void HaddAltByte.halving_add_unsigned_constant(byte[], byte[]) loop_optimization (after)
270   /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                       loop:none
271   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I255>>]         loop:none
272   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
273   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none
274   /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
275   //
276   /// CHECK-START-ARM64: void HaddAltByte.halving_add_unsigned_constant(byte[], byte[]) loop_optimization (after)
277   /// CHECK-IF:     hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true'
278   //
279   //      HalvingAdd idiom is not supported for SVE.
280   ///     CHECK-NOT: VecHalvingAdd
281   //
282   /// CHECK-ELSE:
283   //
284   ///     CHECK-DAG: <<I255:i\d+>> IntConstant 255                       loop:none
285   ///     CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I255>>]         loop:none
286   ///     CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
287   ///     CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none
288   ///     CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
289   //
290   /// CHECK-FI:
halving_add_unsigned_constant(byte[] b1, byte[] bo)291   private static void halving_add_unsigned_constant(byte[] b1, byte[] bo) {
292     int min_length = Math.min(bo.length, b1.length);
293     for (int i = 0; i < min_length; i++) {
294       bo[i] = (byte) (((b1[i] & 0xff) + 0xff) >>> 1);
295     }
296   }
297 
main()298   public static void main() {
299     // Initialize cross-values to test all cases, and also
300     // set up some extra values to exercise the cleanup loop.
301     int k = 0;
302     for (int i = 0; i < N; i++) {
303       for (int j = 0; j < N; j++) {
304         sB1[k] = (byte) i;
305         sB2[k] = (byte) j;
306         k++;
307       }
308     }
309     for (int i = 0; i < 15; i++) {
310       sB1[k] = (byte) i;
311       sB2[k] = 100;
312       k++;
313     }
314     expectEquals(k, M);
315 
316     // Test halving add idioms. Note that the expected result is computed
317     // with the arithmetic >> to demonstrate the computed narrower result
318     // does not depend on the wider >> or >>>.
319     halving_add_signed(sB1, sB2, sBo);
320     for (int i = 0; i < M; i++) {
321       byte e = (byte) ((sB1[i] + sB2[i]) >> 1);
322       expectEquals(e, sBo[i]);
323     }
324     halving_add_unsigned(sB1, sB2, sBo);
325     for (int i = 0; i < M; i++) {
326       byte e = (byte) (((sB1[i] & 0xff) + (sB2[i] & 0xff)) >> 1);
327       expectEquals(e, sBo[i]);
328     }
329     rounding_halving_add_signed(sB1, sB2, sBo);
330     for (int i = 0; i < M; i++) {
331       byte e = (byte) ((sB1[i] + sB2[i] + 1) >> 1);
332       expectEquals(e, sBo[i]);
333     }
334     rounding_halving_add_unsigned(sB1, sB2, sBo);
335     for (int i = 0; i < M; i++) {
336       byte e = (byte) (((sB1[i] & 0xff) + (sB2[i] & 0xff) + 1) >> 1);
337       expectEquals(e, sBo[i]);
338     }
339     halving_add_signed_constant(sB1, sBo);
340     for (int i = 0; i < M; i++) {
341       byte e = (byte) ((sB1[i] + 0x7f) >> 1);
342       expectEquals(e, sBo[i]);
343     }
344     halving_add_unsigned_constant(sB1, sBo);
345     for (int i = 0; i < M; i++) {
346       byte e = (byte) (((sB1[i] & 0xff) + 0xff) >> 1);
347       expectEquals(e, sBo[i]);
348     }
349 
350     System.out.println("HaddAltByte passed");
351   }
352 
expectEquals(int expected, int result)353   private static void expectEquals(int expected, int result) {
354     if (expected != result) {
355       throw new Error("Expected: " + expected + ", found: " + result);
356     }
357   }
358 }
359