1; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s 2; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s 3 4%shifttype = type <2 x i16> 5define %shifttype @shift2i16(%shifttype %a, %shifttype %b) { 6entry: 7 ; SSE2: shift2i16 8 ; SSE2: cost of 4 {{.*}} lshr 9 ; SSE2-CODEGEN: shift2i16 10 ; SSE2-CODEGEN: psrlq 11 12 %0 = lshr %shifttype %a , %b 13 ret %shifttype %0 14} 15 16%shifttype4i16 = type <4 x i16> 17define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) { 18entry: 19 ; SSE2: shift4i16 20 ; SSE2: cost of 16 {{.*}} lshr 21 ; SSE2-CODEGEN: shift4i16 22 ; SSE2-CODEGEN: psrld 23 24 %0 = lshr %shifttype4i16 %a , %b 25 ret %shifttype4i16 %0 26} 27 28%shifttype8i16 = type <8 x i16> 29define %shifttype8i16 @shift8i16(%shifttype8i16 %a, %shifttype8i16 %b) { 30entry: 31 ; SSE2: shift8i16 32 ; SSE2: cost of 32 {{.*}} lshr 33 ; SSE2-CODEGEN: shift8i16 34 ; SSE2-CODEGEN: psrlw 35 36 %0 = lshr %shifttype8i16 %a , %b 37 ret %shifttype8i16 %0 38} 39 40%shifttype16i16 = type <16 x i16> 41define %shifttype16i16 @shift16i16(%shifttype16i16 %a, %shifttype16i16 %b) { 42entry: 43 ; SSE2: shift16i16 44 ; SSE2: cost of 64 {{.*}} lshr 45 ; SSE2-CODEGEN: shift16i16 46 ; SSE2-CODEGEN: psrlw 47 48 %0 = lshr %shifttype16i16 %a , %b 49 ret %shifttype16i16 %0 50} 51 52%shifttype32i16 = type <32 x i16> 53define %shifttype32i16 @shift32i16(%shifttype32i16 %a, %shifttype32i16 %b) { 54entry: 55 ; SSE2: shift32i16 56 ; SSE2: cost of 128 {{.*}} lshr 57 ; SSE2-CODEGEN: shift32i16 58 ; SSE2-CODEGEN: psrlw 59 60 %0 = lshr %shifttype32i16 %a , %b 61 ret %shifttype32i16 %0 62} 63 64%shifttype2i32 = type <2 x i32> 65define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) { 66entry: 67 ; SSE2: shift2i32 68 ; SSE2: cost of 4 {{.*}} lshr 69 ; SSE2-CODEGEN: shift2i32 70 ; SSE2-CODEGEN: psrlq 71 72 %0 = lshr %shifttype2i32 %a , %b 73 ret %shifttype2i32 %0 74} 75 76%shifttype4i32 = type <4 x i32> 77define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) { 78entry: 79 ; SSE2: shift4i32 80 ; SSE2: cost of 16 {{.*}} lshr 81 ; SSE2-CODEGEN: shift4i32 82 ; SSE2-CODEGEN: psrld 83 84 %0 = lshr %shifttype4i32 %a , %b 85 ret %shifttype4i32 %0 86} 87 88%shifttype8i32 = type <8 x i32> 89define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) { 90entry: 91 ; SSE2: shift8i32 92 ; SSE2: cost of 32 {{.*}} lshr 93 ; SSE2-CODEGEN: shift8i32 94 ; SSE2-CODEGEN: psrld 95 96 %0 = lshr %shifttype8i32 %a , %b 97 ret %shifttype8i32 %0 98} 99 100%shifttype16i32 = type <16 x i32> 101define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) { 102entry: 103 ; SSE2: shift16i32 104 ; SSE2: cost of 64 {{.*}} lshr 105 ; SSE2-CODEGEN: shift16i32 106 ; SSE2-CODEGEN: psrld 107 108 %0 = lshr %shifttype16i32 %a , %b 109 ret %shifttype16i32 %0 110} 111 112%shifttype32i32 = type <32 x i32> 113define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) { 114entry: 115 ; SSE2: shift32i32 116 ; SSE2: cost of 128 {{.*}} lshr 117 ; SSE2-CODEGEN: shift32i32 118 ; SSE2-CODEGEN: psrld 119 120 %0 = lshr %shifttype32i32 %a , %b 121 ret %shifttype32i32 %0 122} 123 124%shifttype2i64 = type <2 x i64> 125define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) { 126entry: 127 ; SSE2: shift2i64 128 ; SSE2: cost of 4 {{.*}} lshr 129 ; SSE2-CODEGEN: shift2i64 130 ; SSE2-CODEGEN: psrlq 131 132 %0 = lshr %shifttype2i64 %a , %b 133 ret %shifttype2i64 %0 134} 135 136%shifttype4i64 = type <4 x i64> 137define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) { 138entry: 139 ; SSE2: shift4i64 140 ; SSE2: cost of 8 {{.*}} lshr 141 ; SSE2-CODEGEN: shift4i64 142 ; SSE2-CODEGEN: psrlq 143 144 %0 = lshr %shifttype4i64 %a , %b 145 ret %shifttype4i64 %0 146} 147 148%shifttype8i64 = type <8 x i64> 149define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) { 150entry: 151 ; SSE2: shift8i64 152 ; SSE2: cost of 16 {{.*}} lshr 153 ; SSE2-CODEGEN: shift8i64 154 ; SSE2-CODEGEN: psrlq 155 156 %0 = lshr %shifttype8i64 %a , %b 157 ret %shifttype8i64 %0 158} 159 160%shifttype16i64 = type <16 x i64> 161define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) { 162entry: 163 ; SSE2: shift16i64 164 ; SSE2: cost of 32 {{.*}} lshr 165 ; SSE2-CODEGEN: shift16i64 166 ; SSE2-CODEGEN: psrlq 167 168 %0 = lshr %shifttype16i64 %a , %b 169 ret %shifttype16i64 %0 170} 171 172%shifttype32i64 = type <32 x i64> 173define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) { 174entry: 175 ; SSE2: shift32i64 176 ; SSE2: cost of 64 {{.*}} lshr 177 ; SSE2-CODEGEN: shift32i64 178 ; SSE2-CODEGEN: psrlq 179 180 %0 = lshr %shifttype32i64 %a , %b 181 ret %shifttype32i64 %0 182} 183 184%shifttype2i8 = type <2 x i8> 185define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) { 186entry: 187 ; SSE2: shift2i8 188 ; SSE2: cost of 4 {{.*}} lshr 189 ; SSE2-CODEGEN: shift2i8 190 ; SSE2-CODEGEN: psrlq 191 192 %0 = lshr %shifttype2i8 %a , %b 193 ret %shifttype2i8 %0 194} 195 196%shifttype4i8 = type <4 x i8> 197define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) { 198entry: 199 ; SSE2: shift4i8 200 ; SSE2: cost of 16 {{.*}} lshr 201 ; SSE2-CODEGEN: shift4i8 202 ; SSE2-CODEGEN: psrld 203 204 %0 = lshr %shifttype4i8 %a , %b 205 ret %shifttype4i8 %0 206} 207 208%shifttype8i8 = type <8 x i8> 209define %shifttype8i8 @shift8i8(%shifttype8i8 %a, %shifttype8i8 %b) { 210entry: 211 ; SSE2: shift8i8 212 ; SSE2: cost of 32 {{.*}} lshr 213 ; SSE2-CODEGEN: shift8i8 214 ; SSE2-CODEGEN: psrlw 215 216 %0 = lshr %shifttype8i8 %a , %b 217 ret %shifttype8i8 %0 218} 219 220%shifttype16i8 = type <16 x i8> 221define %shifttype16i8 @shift16i8(%shifttype16i8 %a, %shifttype16i8 %b) { 222entry: 223 ; SSE2: shift16i8 224 ; SSE2: cost of 26 {{.*}} lshr 225 ; SSE2-CODEGEN: shift16i8 226 ; SSE2-CODEGEN: psrlw 227 228 %0 = lshr %shifttype16i8 %a , %b 229 ret %shifttype16i8 %0 230} 231 232%shifttype32i8 = type <32 x i8> 233define %shifttype32i8 @shift32i8(%shifttype32i8 %a, %shifttype32i8 %b) { 234entry: 235 ; SSE2: shift32i8 236 ; SSE2: cost of 52 {{.*}} lshr 237 ; SSE2-CODEGEN: shift32i8 238 ; SSE2-CODEGEN: psrlw 239 240 %0 = lshr %shifttype32i8 %a , %b 241 ret %shifttype32i8 %0 242} 243 244; Test shift by a constant vector. 245 246%shifttypec = type <2 x i16> 247define %shifttypec @shift2i16const(%shifttypec %a, %shifttypec %b) { 248entry: 249 ; SSE2: shift2i16const 250 ; SSE2: cost of 1 {{.*}} lshr 251 ; SSE2-CODEGEN: shift2i16const 252 ; SSE2-CODEGEN: psrlq $3 253 254 %0 = lshr %shifttypec %a , <i16 3, i16 3> 255 ret %shifttypec %0 256} 257 258%shifttypec4i16 = type <4 x i16> 259define %shifttypec4i16 @shift4i16const(%shifttypec4i16 %a, %shifttypec4i16 %b) { 260entry: 261 ; SSE2: shift4i16const 262 ; SSE2: cost of 1 {{.*}} lshr 263 ; SSE2-CODEGEN: shift4i16const 264 ; SSE2-CODEGEN: psrld $3 265 266 %0 = lshr %shifttypec4i16 %a , <i16 3, i16 3, i16 3, i16 3> 267 ret %shifttypec4i16 %0 268} 269 270%shifttypec8i16 = type <8 x i16> 271define %shifttypec8i16 @shift8i16const(%shifttypec8i16 %a, %shifttypec8i16 %b) { 272entry: 273 ; SSE2: shift8i16const 274 ; SSE2: cost of 1 {{.*}} lshr 275 ; SSE2-CODEGEN: shift8i16const 276 ; SSE2-CODEGEN: psrlw $3 277 278 %0 = lshr %shifttypec8i16 %a , <i16 3, i16 3, i16 3, i16 3, 279 i16 3, i16 3, i16 3, i16 3> 280 ret %shifttypec8i16 %0 281} 282 283%shifttypec16i16 = type <16 x i16> 284define %shifttypec16i16 @shift16i16const(%shifttypec16i16 %a, 285 %shifttypec16i16 %b) { 286entry: 287 ; SSE2: shift16i16const 288 ; SSE2: cost of 2 {{.*}} lshr 289 ; SSE2-CODEGEN: shift16i16const 290 ; SSE2-CODEGEN: psrlw $3 291 292 %0 = lshr %shifttypec16i16 %a , <i16 3, i16 3, i16 3, i16 3, 293 i16 3, i16 3, i16 3, i16 3, 294 i16 3, i16 3, i16 3, i16 3, 295 i16 3, i16 3, i16 3, i16 3> 296 ret %shifttypec16i16 %0 297} 298 299%shifttypec32i16 = type <32 x i16> 300define %shifttypec32i16 @shift32i16const(%shifttypec32i16 %a, 301 %shifttypec32i16 %b) { 302entry: 303 ; SSE2: shift32i16const 304 ; SSE2: cost of 4 {{.*}} lshr 305 ; SSE2-CODEGEN: shift32i16const 306 ; SSE2-CODEGEN: psrlw $3 307 308 %0 = lshr %shifttypec32i16 %a , <i16 3, i16 3, i16 3, i16 3, 309 i16 3, i16 3, i16 3, i16 3, 310 i16 3, i16 3, i16 3, i16 3, 311 i16 3, i16 3, i16 3, i16 3, 312 i16 3, i16 3, i16 3, i16 3, 313 i16 3, i16 3, i16 3, i16 3, 314 i16 3, i16 3, i16 3, i16 3, 315 i16 3, i16 3, i16 3, i16 3> 316 ret %shifttypec32i16 %0 317} 318 319%shifttypec2i32 = type <2 x i32> 320define %shifttypec2i32 @shift2i32c(%shifttypec2i32 %a, %shifttypec2i32 %b) { 321entry: 322 ; SSE2: shift2i32c 323 ; SSE2: cost of 1 {{.*}} lshr 324 ; SSE2-CODEGEN: shift2i32c 325 ; SSE2-CODEGEN: psrlq $3 326 327 %0 = lshr %shifttypec2i32 %a , <i32 3, i32 3> 328 ret %shifttypec2i32 %0 329} 330 331%shifttypec4i32 = type <4 x i32> 332define %shifttypec4i32 @shift4i32c(%shifttypec4i32 %a, %shifttypec4i32 %b) { 333entry: 334 ; SSE2: shift4i32c 335 ; SSE2: cost of 1 {{.*}} lshr 336 ; SSE2-CODEGEN: shift4i32c 337 ; SSE2-CODEGEN: psrld $3 338 339 %0 = lshr %shifttypec4i32 %a , <i32 3, i32 3, i32 3, i32 3> 340 ret %shifttypec4i32 %0 341} 342 343%shifttypec8i32 = type <8 x i32> 344define %shifttypec8i32 @shift8i32c(%shifttypec8i32 %a, %shifttypec8i32 %b) { 345entry: 346 ; SSE2: shift8i32c 347 ; SSE2: cost of 2 {{.*}} lshr 348 ; SSE2-CODEGEN: shift8i32c 349 ; SSE2-CODEGEN: psrld $3 350 351 %0 = lshr %shifttypec8i32 %a , <i32 3, i32 3, i32 3, i32 3, 352 i32 3, i32 3, i32 3, i32 3> 353 ret %shifttypec8i32 %0 354} 355 356%shifttypec16i32 = type <16 x i32> 357define %shifttypec16i32 @shift16i32c(%shifttypec16i32 %a, %shifttypec16i32 %b) { 358entry: 359 ; SSE2: shift16i32c 360 ; SSE2: cost of 4 {{.*}} lshr 361 ; SSE2-CODEGEN: shift16i32c 362 ; SSE2-CODEGEN: psrld $3 363 364 %0 = lshr %shifttypec16i32 %a , <i32 3, i32 3, i32 3, i32 3, 365 i32 3, i32 3, i32 3, i32 3, 366 i32 3, i32 3, i32 3, i32 3, 367 i32 3, i32 3, i32 3, i32 3> 368 ret %shifttypec16i32 %0 369} 370 371%shifttypec32i32 = type <32 x i32> 372define %shifttypec32i32 @shift32i32c(%shifttypec32i32 %a, %shifttypec32i32 %b) { 373entry: 374 ; SSE2: shift32i32c 375 ; SSE2: cost of 8 {{.*}} lshr 376 ; SSE2-CODEGEN: shift32i32c 377 ; SSE2-CODEGEN: psrld $3 378 %0 = lshr %shifttypec32i32 %a , <i32 3, i32 3, i32 3, i32 3, 379 i32 3, i32 3, i32 3, i32 3, 380 i32 3, i32 3, i32 3, i32 3, 381 i32 3, i32 3, i32 3, i32 3, 382 i32 3, i32 3, i32 3, i32 3, 383 i32 3, i32 3, i32 3, i32 3, 384 i32 3, i32 3, i32 3, i32 3, 385 i32 3, i32 3, i32 3, i32 3> 386 ret %shifttypec32i32 %0 387} 388 389%shifttypec2i64 = type <2 x i64> 390define %shifttypec2i64 @shift2i64c(%shifttypec2i64 %a, %shifttypec2i64 %b) { 391entry: 392 ; SSE2: shift2i64c 393 ; SSE2: cost of 1 {{.*}} lshr 394 ; SSE2-CODEGEN: shift2i64c 395 ; SSE2-CODEGEN: psrlq $3 396 397 %0 = lshr %shifttypec2i64 %a , <i64 3, i64 3> 398 ret %shifttypec2i64 %0 399} 400 401%shifttypec4i64 = type <4 x i64> 402define %shifttypec4i64 @shift4i64c(%shifttypec4i64 %a, %shifttypec4i64 %b) { 403entry: 404 ; SSE2: shift4i64c 405 ; SSE2: cost of 2 {{.*}} lshr 406 ; SSE2-CODEGEN: shift4i64c 407 ; SSE2-CODEGEN: psrlq $3 408 409 %0 = lshr %shifttypec4i64 %a , <i64 3, i64 3, i64 3, i64 3> 410 ret %shifttypec4i64 %0 411} 412 413%shifttypec8i64 = type <8 x i64> 414define %shifttypec8i64 @shift8i64c(%shifttypec8i64 %a, %shifttypec8i64 %b) { 415entry: 416 ; SSE2: shift8i64c 417 ; SSE2: cost of 4 {{.*}} lshr 418 ; SSE2-CODEGEN: shift8i64c 419 ; SSE2-CODEGEN: psrlq $3 420 421 %0 = lshr %shifttypec8i64 %a , <i64 3, i64 3, i64 3, i64 3, 422 i64 3, i64 3, i64 3, i64 3> 423 ret %shifttypec8i64 %0 424} 425 426%shifttypec16i64 = type <16 x i64> 427define %shifttypec16i64 @shift16i64c(%shifttypec16i64 %a, %shifttypec16i64 %b) { 428entry: 429 ; SSE2: shift16i64c 430 ; SSE2: cost of 8 {{.*}} lshr 431 ; SSE2-CODEGEN: shift16i64c 432 ; SSE2-CODEGEN: psrlq $3 433 434 %0 = lshr %shifttypec16i64 %a , <i64 3, i64 3, i64 3, i64 3, 435 i64 3, i64 3, i64 3, i64 3, 436 i64 3, i64 3, i64 3, i64 3, 437 i64 3, i64 3, i64 3, i64 3> 438 ret %shifttypec16i64 %0 439} 440 441%shifttypec32i64 = type <32 x i64> 442define %shifttypec32i64 @shift32i64c(%shifttypec32i64 %a, %shifttypec32i64 %b) { 443entry: 444 ; SSE2: shift32i64c 445 ; SSE2: cost of 16 {{.*}} lshr 446 ; SSE2-CODEGEN: shift32i64c 447 ; SSE2-CODEGEN: psrlq $3 448 449 %0 = lshr %shifttypec32i64 %a ,<i64 3, i64 3, i64 3, i64 3, 450 i64 3, i64 3, i64 3, i64 3, 451 i64 3, i64 3, i64 3, i64 3, 452 i64 3, i64 3, i64 3, i64 3, 453 i64 3, i64 3, i64 3, i64 3, 454 i64 3, i64 3, i64 3, i64 3, 455 i64 3, i64 3, i64 3, i64 3, 456 i64 3, i64 3, i64 3, i64 3> 457 ret %shifttypec32i64 %0 458} 459 460%shifttypec2i8 = type <2 x i8> 461define %shifttypec2i8 @shift2i8c(%shifttypec2i8 %a, %shifttypec2i8 %b) { 462entry: 463 ; SSE2: shift2i8c 464 ; SSE2: cost of 1 {{.*}} lshr 465 ; SSE2-CODEGEN: shift2i8c 466 ; SSE2-CODEGEN: psrlq $3 467 468 %0 = lshr %shifttypec2i8 %a , <i8 3, i8 3> 469 ret %shifttypec2i8 %0 470} 471 472%shifttypec4i8 = type <4 x i8> 473define %shifttypec4i8 @shift4i8c(%shifttypec4i8 %a, %shifttypec4i8 %b) { 474entry: 475 ; SSE2: shift4i8c 476 ; SSE2: cost of 1 {{.*}} lshr 477 ; SSE2-CODEGEN: shift4i8c 478 ; SSE2-CODEGEN: psrld $3 479 480 %0 = lshr %shifttypec4i8 %a , <i8 3, i8 3, i8 3, i8 3> 481 ret %shifttypec4i8 %0 482} 483 484%shifttypec8i8 = type <8 x i8> 485define %shifttypec8i8 @shift8i8c(%shifttypec8i8 %a, %shifttypec8i8 %b) { 486entry: 487 ; SSE2: shift8i8c 488 ; SSE2: cost of 1 {{.*}} lshr 489 ; SSE2-CODEGEN: shift8i8c 490 ; SSE2-CODEGEN: psrlw $3 491 492 %0 = lshr %shifttypec8i8 %a , <i8 3, i8 3, i8 3, i8 3, 493 i8 3, i8 3, i8 3, i8 3> 494 ret %shifttypec8i8 %0 495} 496 497%shifttypec16i8 = type <16 x i8> 498define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) { 499entry: 500 ; SSE2: shift16i8c 501 ; SSE2: cost of 1 {{.*}} lshr 502 ; SSE2-CODEGEN: shift16i8c 503 ; SSE2-CODEGEN: psrlw $3 504 505 %0 = lshr %shifttypec16i8 %a , <i8 3, i8 3, i8 3, i8 3, 506 i8 3, i8 3, i8 3, i8 3, 507 i8 3, i8 3, i8 3, i8 3, 508 i8 3, i8 3, i8 3, i8 3> 509 ret %shifttypec16i8 %0 510} 511 512%shifttypec32i8 = type <32 x i8> 513define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) { 514entry: 515 ; SSE2: shift32i8c 516 ; SSE2: cost of 2 {{.*}} lshr 517 ; SSE2-CODEGEN: shift32i8c 518 ; SSE2-CODEGEN: psrlw $3 519 520 %0 = lshr %shifttypec32i8 %a , <i8 3, i8 3, i8 3, i8 3, 521 i8 3, i8 3, i8 3, i8 3, 522 i8 3, i8 3, i8 3, i8 3, 523 i8 3, i8 3, i8 3, i8 3, 524 i8 3, i8 3, i8 3, i8 3, 525 i8 3, i8 3, i8 3, i8 3, 526 i8 3, i8 3, i8 3, i8 3, 527 i8 3, i8 3, i8 3, i8 3> 528 ret %shifttypec32i8 %0 529} 530