1; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s 2 3; Check propagation of optional IR flags (PR20802). For a flag to 4; propagate from scalar instructions to their vector replacement, 5; *all* scalar instructions must have the flag. 6 7target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 8target triple = "x86_64-unknown-unknown" 9 10; CHECK-LABEL: @exact( 11; CHECK: lshr exact <4 x i32> 12define void @exact(i32* %x) { 13 %idx1 = getelementptr inbounds i32, i32* %x, i64 0 14 %idx2 = getelementptr inbounds i32, i32* %x, i64 1 15 %idx3 = getelementptr inbounds i32, i32* %x, i64 2 16 %idx4 = getelementptr inbounds i32, i32* %x, i64 3 17 18 %load1 = load i32, i32* %idx1, align 4 19 %load2 = load i32, i32* %idx2, align 4 20 %load3 = load i32, i32* %idx3, align 4 21 %load4 = load i32, i32* %idx4, align 4 22 23 %op1 = lshr exact i32 %load1, 1 24 %op2 = lshr exact i32 %load2, 1 25 %op3 = lshr exact i32 %load3, 1 26 %op4 = lshr exact i32 %load4, 1 27 28 store i32 %op1, i32* %idx1, align 4 29 store i32 %op2, i32* %idx2, align 4 30 store i32 %op3, i32* %idx3, align 4 31 store i32 %op4, i32* %idx4, align 4 32 33 ret void 34} 35 36; CHECK-LABEL: @not_exact( 37; CHECK: lshr <4 x i32> 38define void @not_exact(i32* %x) { 39 %idx1 = getelementptr inbounds i32, i32* %x, i64 0 40 %idx2 = getelementptr inbounds i32, i32* %x, i64 1 41 %idx3 = getelementptr inbounds i32, i32* %x, i64 2 42 %idx4 = getelementptr inbounds i32, i32* %x, i64 3 43 44 %load1 = load i32, i32* %idx1, align 4 45 %load2 = load i32, i32* %idx2, align 4 46 %load3 = load i32, i32* %idx3, align 4 47 %load4 = load i32, i32* %idx4, align 4 48 49 %op1 = lshr exact i32 %load1, 1 50 %op2 = lshr i32 %load2, 1 51 %op3 = lshr exact i32 %load3, 1 52 %op4 = lshr exact i32 %load4, 1 53 54 store i32 %op1, i32* %idx1, align 4 55 store i32 %op2, i32* %idx2, align 4 56 store i32 %op3, i32* %idx3, align 4 57 store i32 %op4, i32* %idx4, align 4 58 59 ret void 60} 61 62; CHECK-LABEL: @nsw( 63; CHECK: add nsw <4 x i32> 64define void @nsw(i32* %x) { 65 %idx1 = getelementptr inbounds i32, i32* %x, i64 0 66 %idx2 = getelementptr inbounds i32, i32* %x, i64 1 67 %idx3 = getelementptr inbounds i32, i32* %x, i64 2 68 %idx4 = getelementptr inbounds i32, i32* %x, i64 3 69 70 %load1 = load i32, i32* %idx1, align 4 71 %load2 = load i32, i32* %idx2, align 4 72 %load3 = load i32, i32* %idx3, align 4 73 %load4 = load i32, i32* %idx4, align 4 74 75 %op1 = add nsw i32 %load1, 1 76 %op2 = add nsw i32 %load2, 1 77 %op3 = add nsw i32 %load3, 1 78 %op4 = add nsw i32 %load4, 1 79 80 store i32 %op1, i32* %idx1, align 4 81 store i32 %op2, i32* %idx2, align 4 82 store i32 %op3, i32* %idx3, align 4 83 store i32 %op4, i32* %idx4, align 4 84 85 ret void 86} 87 88; CHECK-LABEL: @not_nsw( 89; CHECK: add <4 x i32> 90define void @not_nsw(i32* %x) { 91 %idx1 = getelementptr inbounds i32, i32* %x, i64 0 92 %idx2 = getelementptr inbounds i32, i32* %x, i64 1 93 %idx3 = getelementptr inbounds i32, i32* %x, i64 2 94 %idx4 = getelementptr inbounds i32, i32* %x, i64 3 95 96 %load1 = load i32, i32* %idx1, align 4 97 %load2 = load i32, i32* %idx2, align 4 98 %load3 = load i32, i32* %idx3, align 4 99 %load4 = load i32, i32* %idx4, align 4 100 101 %op1 = add nsw i32 %load1, 1 102 %op2 = add nsw i32 %load2, 1 103 %op3 = add nsw i32 %load3, 1 104 %op4 = add i32 %load4, 1 105 106 store i32 %op1, i32* %idx1, align 4 107 store i32 %op2, i32* %idx2, align 4 108 store i32 %op3, i32* %idx3, align 4 109 store i32 %op4, i32* %idx4, align 4 110 111 ret void 112} 113 114; CHECK-LABEL: @nuw( 115; CHECK: add nuw <4 x i32> 116define void @nuw(i32* %x) { 117 %idx1 = getelementptr inbounds i32, i32* %x, i64 0 118 %idx2 = getelementptr inbounds i32, i32* %x, i64 1 119 %idx3 = getelementptr inbounds i32, i32* %x, i64 2 120 %idx4 = getelementptr inbounds i32, i32* %x, i64 3 121 122 %load1 = load i32, i32* %idx1, align 4 123 %load2 = load i32, i32* %idx2, align 4 124 %load3 = load i32, i32* %idx3, align 4 125 %load4 = load i32, i32* %idx4, align 4 126 127 %op1 = add nuw i32 %load1, 1 128 %op2 = add nuw i32 %load2, 1 129 %op3 = add nuw i32 %load3, 1 130 %op4 = add nuw i32 %load4, 1 131 132 store i32 %op1, i32* %idx1, align 4 133 store i32 %op2, i32* %idx2, align 4 134 store i32 %op3, i32* %idx3, align 4 135 store i32 %op4, i32* %idx4, align 4 136 137 ret void 138} 139 140; CHECK-LABEL: @not_nuw( 141; CHECK: add <4 x i32> 142define void @not_nuw(i32* %x) { 143 %idx1 = getelementptr inbounds i32, i32* %x, i64 0 144 %idx2 = getelementptr inbounds i32, i32* %x, i64 1 145 %idx3 = getelementptr inbounds i32, i32* %x, i64 2 146 %idx4 = getelementptr inbounds i32, i32* %x, i64 3 147 148 %load1 = load i32, i32* %idx1, align 4 149 %load2 = load i32, i32* %idx2, align 4 150 %load3 = load i32, i32* %idx3, align 4 151 %load4 = load i32, i32* %idx4, align 4 152 153 %op1 = add nuw i32 %load1, 1 154 %op2 = add i32 %load2, 1 155 %op3 = add i32 %load3, 1 156 %op4 = add nuw i32 %load4, 1 157 158 store i32 %op1, i32* %idx1, align 4 159 store i32 %op2, i32* %idx2, align 4 160 store i32 %op3, i32* %idx3, align 4 161 store i32 %op4, i32* %idx4, align 4 162 163 ret void 164} 165 166; CHECK-LABEL: @nnan( 167; CHECK: fadd nnan <4 x float> 168define void @nnan(float* %x) { 169 %idx1 = getelementptr inbounds float, float* %x, i64 0 170 %idx2 = getelementptr inbounds float, float* %x, i64 1 171 %idx3 = getelementptr inbounds float, float* %x, i64 2 172 %idx4 = getelementptr inbounds float, float* %x, i64 3 173 174 %load1 = load float, float* %idx1, align 4 175 %load2 = load float, float* %idx2, align 4 176 %load3 = load float, float* %idx3, align 4 177 %load4 = load float, float* %idx4, align 4 178 179 %op1 = fadd fast nnan float %load1, 1.0 180 %op2 = fadd nnan ninf float %load2, 1.0 181 %op3 = fadd nsz nnan float %load3, 1.0 182 %op4 = fadd arcp nnan float %load4, 1.0 183 184 store float %op1, float* %idx1, align 4 185 store float %op2, float* %idx2, align 4 186 store float %op3, float* %idx3, align 4 187 store float %op4, float* %idx4, align 4 188 189 ret void 190} 191 192; CHECK-LABEL: @not_nnan( 193; CHECK: fadd <4 x float> 194define void @not_nnan(float* %x) { 195 %idx1 = getelementptr inbounds float, float* %x, i64 0 196 %idx2 = getelementptr inbounds float, float* %x, i64 1 197 %idx3 = getelementptr inbounds float, float* %x, i64 2 198 %idx4 = getelementptr inbounds float, float* %x, i64 3 199 200 %load1 = load float, float* %idx1, align 4 201 %load2 = load float, float* %idx2, align 4 202 %load3 = load float, float* %idx3, align 4 203 %load4 = load float, float* %idx4, align 4 204 205 %op1 = fadd nnan float %load1, 1.0 206 %op2 = fadd ninf float %load2, 1.0 207 %op3 = fadd nsz float %load3, 1.0 208 %op4 = fadd arcp float %load4, 1.0 209 210 store float %op1, float* %idx1, align 4 211 store float %op2, float* %idx2, align 4 212 store float %op3, float* %idx3, align 4 213 store float %op4, float* %idx4, align 4 214 215 ret void 216} 217 218; CHECK-LABEL: @only_fast( 219; CHECK: fadd fast <4 x float> 220define void @only_fast(float* %x) { 221 %idx1 = getelementptr inbounds float, float* %x, i64 0 222 %idx2 = getelementptr inbounds float, float* %x, i64 1 223 %idx3 = getelementptr inbounds float, float* %x, i64 2 224 %idx4 = getelementptr inbounds float, float* %x, i64 3 225 226 %load1 = load float, float* %idx1, align 4 227 %load2 = load float, float* %idx2, align 4 228 %load3 = load float, float* %idx3, align 4 229 %load4 = load float, float* %idx4, align 4 230 231 %op1 = fadd fast nnan float %load1, 1.0 232 %op2 = fadd fast nnan ninf float %load2, 1.0 233 %op3 = fadd fast nsz nnan float %load3, 1.0 234 %op4 = fadd arcp nnan fast float %load4, 1.0 235 236 store float %op1, float* %idx1, align 4 237 store float %op2, float* %idx2, align 4 238 store float %op3, float* %idx3, align 4 239 store float %op4, float* %idx4, align 4 240 241 ret void 242} 243 244; CHECK-LABEL: @only_arcp( 245; CHECK: fadd arcp <4 x float> 246define void @only_arcp(float* %x) { 247 %idx1 = getelementptr inbounds float, float* %x, i64 0 248 %idx2 = getelementptr inbounds float, float* %x, i64 1 249 %idx3 = getelementptr inbounds float, float* %x, i64 2 250 %idx4 = getelementptr inbounds float, float* %x, i64 3 251 252 %load1 = load float, float* %idx1, align 4 253 %load2 = load float, float* %idx2, align 4 254 %load3 = load float, float* %idx3, align 4 255 %load4 = load float, float* %idx4, align 4 256 257 %op1 = fadd fast float %load1, 1.0 258 %op2 = fadd fast float %load2, 1.0 259 %op3 = fadd fast float %load3, 1.0 260 %op4 = fadd arcp float %load4, 1.0 261 262 store float %op1, float* %idx1, align 4 263 store float %op2, float* %idx2, align 4 264 store float %op3, float* %idx3, align 4 265 store float %op4, float* %idx4, align 4 266 267 ret void 268} 269 270; CHECK-LABEL: @addsub_all_nsw 271; CHECK: add nsw <4 x i32> 272; CHECK: sub nsw <4 x i32> 273define void @addsub_all_nsw(i32* %x) { 274 %idx1 = getelementptr inbounds i32, i32* %x, i64 0 275 %idx2 = getelementptr inbounds i32, i32* %x, i64 1 276 %idx3 = getelementptr inbounds i32, i32* %x, i64 2 277 %idx4 = getelementptr inbounds i32, i32* %x, i64 3 278 279 %load1 = load i32, i32* %idx1, align 4 280 %load2 = load i32, i32* %idx2, align 4 281 %load3 = load i32, i32* %idx3, align 4 282 %load4 = load i32, i32* %idx4, align 4 283 284 %op1 = add nsw i32 %load1, 1 285 %op2 = sub nsw i32 %load2, 1 286 %op3 = add nsw i32 %load3, 1 287 %op4 = sub nsw i32 %load4, 1 288 289 store i32 %op1, i32* %idx1, align 4 290 store i32 %op2, i32* %idx2, align 4 291 store i32 %op3, i32* %idx3, align 4 292 store i32 %op4, i32* %idx4, align 4 293 294 ret void 295} 296 297; CHECK-LABEL: @addsub_some_nsw 298; CHECK: add nsw <4 x i32> 299; CHECK: sub <4 x i32> 300define void @addsub_some_nsw(i32* %x) { 301 %idx1 = getelementptr inbounds i32, i32* %x, i64 0 302 %idx2 = getelementptr inbounds i32, i32* %x, i64 1 303 %idx3 = getelementptr inbounds i32, i32* %x, i64 2 304 %idx4 = getelementptr inbounds i32, i32* %x, i64 3 305 306 %load1 = load i32, i32* %idx1, align 4 307 %load2 = load i32, i32* %idx2, align 4 308 %load3 = load i32, i32* %idx3, align 4 309 %load4 = load i32, i32* %idx4, align 4 310 311 %op1 = add nsw i32 %load1, 1 312 %op2 = sub nsw i32 %load2, 1 313 %op3 = add nsw i32 %load3, 1 314 %op4 = sub i32 %load4, 1 315 316 store i32 %op1, i32* %idx1, align 4 317 store i32 %op2, i32* %idx2, align 4 318 store i32 %op3, i32* %idx3, align 4 319 store i32 %op4, i32* %idx4, align 4 320 321 ret void 322} 323 324; CHECK-LABEL: @addsub_no_nsw 325; CHECK: add <4 x i32> 326; CHECK: sub <4 x i32> 327define void @addsub_no_nsw(i32* %x) { 328 %idx1 = getelementptr inbounds i32, i32* %x, i64 0 329 %idx2 = getelementptr inbounds i32, i32* %x, i64 1 330 %idx3 = getelementptr inbounds i32, i32* %x, i64 2 331 %idx4 = getelementptr inbounds i32, i32* %x, i64 3 332 333 %load1 = load i32, i32* %idx1, align 4 334 %load2 = load i32, i32* %idx2, align 4 335 %load3 = load i32, i32* %idx3, align 4 336 %load4 = load i32, i32* %idx4, align 4 337 338 %op1 = add i32 %load1, 1 339 %op2 = sub nsw i32 %load2, 1 340 %op3 = add nsw i32 %load3, 1 341 %op4 = sub i32 %load4, 1 342 343 store i32 %op1, i32* %idx1, align 4 344 store i32 %op2, i32* %idx2, align 4 345 store i32 %op3, i32* %idx3, align 4 346 store i32 %op4, i32* %idx4, align 4 347 348 ret void 349} 350 351