1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck -check-prefix=SI %s 3; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -amdgpu-codegenprepare %s | FileCheck -check-prefix=VI %s 4 5define amdgpu_kernel void @add_i3(i3 %a, i3 %b) { 6; SI-LABEL: @add_i3( 7; SI-NEXT: [[R:%.*]] = add i3 [[A:%.*]], [[B:%.*]] 8; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef 9; SI-NEXT: ret void 10; 11; VI-LABEL: @add_i3( 12; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 13; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 14; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]] 15; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 16; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef 17; VI-NEXT: ret void 18; 19 %r = add i3 %a, %b 20 store volatile i3 %r, i3 addrspace(1)* undef 21 ret void 22} 23 24define amdgpu_kernel void @add_nsw_i3(i3 %a, i3 %b) { 25; SI-LABEL: @add_nsw_i3( 26; SI-NEXT: [[R:%.*]] = add nsw i3 [[A:%.*]], [[B:%.*]] 27; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef 28; SI-NEXT: ret void 29; 30; VI-LABEL: @add_nsw_i3( 31; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 32; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 33; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]] 34; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 35; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef 36; VI-NEXT: ret void 37; 38 %r = add nsw i3 %a, %b 39 store volatile i3 %r, i3 addrspace(1)* undef 40 ret void 41} 42 43define amdgpu_kernel void @add_nuw_i3(i3 %a, i3 %b) { 44; SI-LABEL: @add_nuw_i3( 45; SI-NEXT: [[R:%.*]] = add nuw i3 [[A:%.*]], [[B:%.*]] 46; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef 47; SI-NEXT: ret void 48; 49; VI-LABEL: @add_nuw_i3( 50; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 51; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 52; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]] 53; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 54; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef 55; VI-NEXT: ret void 56; 57 %r = add nuw i3 %a, %b 58 store volatile i3 %r, i3 addrspace(1)* undef 59 ret void 60} 61 62define amdgpu_kernel void @add_nuw_nsw_i3(i3 %a, i3 %b) { 63; SI-LABEL: @add_nuw_nsw_i3( 64; SI-NEXT: [[R:%.*]] = add nuw nsw i3 [[A:%.*]], [[B:%.*]] 65; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef 66; SI-NEXT: ret void 67; 68; VI-LABEL: @add_nuw_nsw_i3( 69; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 70; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 71; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]] 72; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 73; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef 74; VI-NEXT: ret void 75; 76 %r = add nuw nsw i3 %a, %b 77 store volatile i3 %r, i3 addrspace(1)* undef 78 ret void 79} 80 81define amdgpu_kernel void @sub_i3(i3 %a, i3 %b) { 82; SI-LABEL: @sub_i3( 83; SI-NEXT: [[R:%.*]] = sub i3 [[A:%.*]], [[B:%.*]] 84; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef 85; SI-NEXT: ret void 86; 87; VI-LABEL: @sub_i3( 88; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 89; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 90; VI-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP1]], [[TMP2]] 91; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 92; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef 93; VI-NEXT: ret void 94; 95 %r = sub i3 %a, %b 96 store volatile i3 %r, i3 addrspace(1)* undef 97 ret void 98} 99 100define amdgpu_kernel void @sub_nsw_i3(i3 %a, i3 %b) { 101; SI-LABEL: @sub_nsw_i3( 102; SI-NEXT: [[R:%.*]] = sub nsw i3 [[A:%.*]], [[B:%.*]] 103; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef 104; SI-NEXT: ret void 105; 106; VI-LABEL: @sub_nsw_i3( 107; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 108; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 109; VI-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP1]], [[TMP2]] 110; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 111; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef 112; VI-NEXT: ret void 113; 114 %r = sub nsw i3 %a, %b 115 store volatile i3 %r, i3 addrspace(1)* undef 116 ret void 117} 118 119define amdgpu_kernel void @sub_nuw_i3(i3 %a, i3 %b) { 120; SI-LABEL: @sub_nuw_i3( 121; SI-NEXT: [[R:%.*]] = sub nuw i3 [[A:%.*]], [[B:%.*]] 122; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef 123; SI-NEXT: ret void 124; 125; VI-LABEL: @sub_nuw_i3( 126; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 127; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 128; VI-NEXT: [[TMP3:%.*]] = sub nuw nsw i32 [[TMP1]], [[TMP2]] 129; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 130; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef 131; VI-NEXT: ret void 132; 133 %r = sub nuw i3 %a, %b 134 store volatile i3 %r, i3 addrspace(1)* undef 135 ret void 136} 137 138define amdgpu_kernel void @sub_nuw_nsw_i3(i3 %a, i3 %b) { 139; SI-LABEL: @sub_nuw_nsw_i3( 140; SI-NEXT: [[R:%.*]] = sub nuw nsw i3 [[A:%.*]], [[B:%.*]] 141; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef 142; SI-NEXT: ret void 143; 144; VI-LABEL: @sub_nuw_nsw_i3( 145; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 146; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 147; VI-NEXT: [[TMP3:%.*]] = sub nuw nsw i32 [[TMP1]], [[TMP2]] 148; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 149; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef 150; VI-NEXT: ret void 151; 152 %r = sub nuw nsw i3 %a, %b 153 store volatile i3 %r, i3 addrspace(1)* undef 154 ret void 155} 156 157define amdgpu_kernel void @mul_i3(i3 %a, i3 %b) { 158; SI-LABEL: @mul_i3( 159; SI-NEXT: [[R:%.*]] = mul i3 [[A:%.*]], [[B:%.*]] 160; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef 161; SI-NEXT: ret void 162; 163; VI-LABEL: @mul_i3( 164; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 165; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 166; VI-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] 167; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 168; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef 169; VI-NEXT: ret void 170; 171 %r = mul i3 %a, %b 172 store volatile i3 %r, i3 addrspace(1)* undef 173 ret void 174} 175 176define amdgpu_kernel void @mul_nsw_i3(i3 %a, i3 %b) { 177; SI-LABEL: @mul_nsw_i3( 178; SI-NEXT: [[R:%.*]] = mul nsw i3 [[A:%.*]], [[B:%.*]] 179; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef 180; SI-NEXT: ret void 181; 182; VI-LABEL: @mul_nsw_i3( 183; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 184; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 185; VI-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] 186; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 187; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef 188; VI-NEXT: ret void 189; 190 %r = mul nsw i3 %a, %b 191 store volatile i3 %r, i3 addrspace(1)* undef 192 ret void 193} 194 195define amdgpu_kernel void @mul_nuw_i3(i3 %a, i3 %b) { 196; SI-LABEL: @mul_nuw_i3( 197; SI-NEXT: [[R:%.*]] = mul nuw i3 [[A:%.*]], [[B:%.*]] 198; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef 199; SI-NEXT: ret void 200; 201; VI-LABEL: @mul_nuw_i3( 202; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 203; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 204; VI-NEXT: [[TMP3:%.*]] = mul nuw nsw i32 [[TMP1]], [[TMP2]] 205; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 206; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef 207; VI-NEXT: ret void 208; 209 %r = mul nuw i3 %a, %b 210 store volatile i3 %r, i3 addrspace(1)* undef 211 ret void 212} 213 214define amdgpu_kernel void @mul_nuw_nsw_i3(i3 %a, i3 %b) { 215; SI-LABEL: @mul_nuw_nsw_i3( 216; SI-NEXT: [[R:%.*]] = mul nuw nsw i3 [[A:%.*]], [[B:%.*]] 217; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef 218; SI-NEXT: ret void 219; 220; VI-LABEL: @mul_nuw_nsw_i3( 221; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 222; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 223; VI-NEXT: [[TMP3:%.*]] = mul nuw nsw i32 [[TMP1]], [[TMP2]] 224; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 225; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef 226; VI-NEXT: ret void 227; 228 %r = mul nuw nsw i3 %a, %b 229 store volatile i3 %r, i3 addrspace(1)* undef 230 ret void 231} 232 233define amdgpu_kernel void @shl_i3(i3 %a, i3 %b) { 234; SI-LABEL: @shl_i3( 235; SI-NEXT: [[R:%.*]] = shl i3 [[A:%.*]], [[B:%.*]] 236; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef 237; SI-NEXT: ret void 238; 239; VI-LABEL: @shl_i3( 240; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 241; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 242; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]] 243; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 244; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef 245; VI-NEXT: ret void 246; 247 %r = shl i3 %a, %b 248 store volatile i3 %r, i3 addrspace(1)* undef 249 ret void 250} 251 252define amdgpu_kernel void @shl_nsw_i3(i3 %a, i3 %b) { 253; SI-LABEL: @shl_nsw_i3( 254; SI-NEXT: [[R:%.*]] = shl nsw i3 [[A:%.*]], [[B:%.*]] 255; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef 256; SI-NEXT: ret void 257; 258; VI-LABEL: @shl_nsw_i3( 259; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 260; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 261; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]] 262; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 263; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef 264; VI-NEXT: ret void 265; 266 %r = shl nsw i3 %a, %b 267 store volatile i3 %r, i3 addrspace(1)* undef 268 ret void 269} 270 271define amdgpu_kernel void @shl_nuw_i3(i3 %a, i3 %b) { 272; SI-LABEL: @shl_nuw_i3( 273; SI-NEXT: [[R:%.*]] = shl nuw i3 [[A:%.*]], [[B:%.*]] 274; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef 275; SI-NEXT: ret void 276; 277; VI-LABEL: @shl_nuw_i3( 278; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 279; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 280; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]] 281; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 282; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef 283; VI-NEXT: ret void 284; 285 %r = shl nuw i3 %a, %b 286 store volatile i3 %r, i3 addrspace(1)* undef 287 ret void 288} 289 290define amdgpu_kernel void @shl_nuw_nsw_i3(i3 %a, i3 %b) { 291; SI-LABEL: @shl_nuw_nsw_i3( 292; SI-NEXT: [[R:%.*]] = shl nuw nsw i3 [[A:%.*]], [[B:%.*]] 293; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef 294; SI-NEXT: ret void 295; 296; VI-LABEL: @shl_nuw_nsw_i3( 297; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 298; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 299; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]] 300; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 301; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef 302; VI-NEXT: ret void 303; 304 %r = shl nuw nsw i3 %a, %b 305 store volatile i3 %r, i3 addrspace(1)* undef 306 ret void 307} 308 309define amdgpu_kernel void @lshr_i3(i3 %a, i3 %b) { 310; SI-LABEL: @lshr_i3( 311; SI-NEXT: [[R:%.*]] = lshr i3 [[A:%.*]], [[B:%.*]] 312; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef 313; SI-NEXT: ret void 314; 315; VI-LABEL: @lshr_i3( 316; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 317; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 318; VI-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP1]], [[TMP2]] 319; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 320; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef 321; VI-NEXT: ret void 322; 323 %r = lshr i3 %a, %b 324 store volatile i3 %r, i3 addrspace(1)* undef 325 ret void 326} 327 328define amdgpu_kernel void @lshr_exact_i3(i3 %a, i3 %b) { 329; SI-LABEL: @lshr_exact_i3( 330; SI-NEXT: [[R:%.*]] = lshr exact i3 [[A:%.*]], [[B:%.*]] 331; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef 332; SI-NEXT: ret void 333; 334; VI-LABEL: @lshr_exact_i3( 335; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 336; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 337; VI-NEXT: [[TMP3:%.*]] = lshr exact i32 [[TMP1]], [[TMP2]] 338; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 339; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef 340; VI-NEXT: ret void 341; 342 %r = lshr exact i3 %a, %b 343 store volatile i3 %r, i3 addrspace(1)* undef 344 ret void 345} 346 347define amdgpu_kernel void @ashr_i3(i3 %a, i3 %b) { 348; SI-LABEL: @ashr_i3( 349; SI-NEXT: [[R:%.*]] = ashr i3 [[A:%.*]], [[B:%.*]] 350; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef 351; SI-NEXT: ret void 352; 353; VI-LABEL: @ashr_i3( 354; VI-NEXT: [[TMP1:%.*]] = sext i3 [[A:%.*]] to i32 355; VI-NEXT: [[TMP2:%.*]] = sext i3 [[B:%.*]] to i32 356; VI-NEXT: [[TMP3:%.*]] = ashr i32 [[TMP1]], [[TMP2]] 357; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 358; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef 359; VI-NEXT: ret void 360; 361 %r = ashr i3 %a, %b 362 store volatile i3 %r, i3 addrspace(1)* undef 363 ret void 364} 365 366define amdgpu_kernel void @ashr_exact_i3(i3 %a, i3 %b) { 367; SI-LABEL: @ashr_exact_i3( 368; SI-NEXT: [[R:%.*]] = ashr exact i3 [[A:%.*]], [[B:%.*]] 369; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef 370; SI-NEXT: ret void 371; 372; VI-LABEL: @ashr_exact_i3( 373; VI-NEXT: [[TMP1:%.*]] = sext i3 [[A:%.*]] to i32 374; VI-NEXT: [[TMP2:%.*]] = sext i3 [[B:%.*]] to i32 375; VI-NEXT: [[TMP3:%.*]] = ashr exact i32 [[TMP1]], [[TMP2]] 376; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 377; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef 378; VI-NEXT: ret void 379; 380 %r = ashr exact i3 %a, %b 381 store volatile i3 %r, i3 addrspace(1)* undef 382 ret void 383} 384 385define amdgpu_kernel void @and_i3(i3 %a, i3 %b) { 386; SI-LABEL: @and_i3( 387; SI-NEXT: [[R:%.*]] = and i3 [[A:%.*]], [[B:%.*]] 388; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef 389; SI-NEXT: ret void 390; 391; VI-LABEL: @and_i3( 392; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 393; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 394; VI-NEXT: [[TMP3:%.*]] = and i32 [[TMP1]], [[TMP2]] 395; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 396; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef 397; VI-NEXT: ret void 398; 399 %r = and i3 %a, %b 400 store volatile i3 %r, i3 addrspace(1)* undef 401 ret void 402} 403 404define amdgpu_kernel void @or_i3(i3 %a, i3 %b) { 405; SI-LABEL: @or_i3( 406; SI-NEXT: [[R:%.*]] = or i3 [[A:%.*]], [[B:%.*]] 407; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef 408; SI-NEXT: ret void 409; 410; VI-LABEL: @or_i3( 411; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 412; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 413; VI-NEXT: [[TMP3:%.*]] = or i32 [[TMP1]], [[TMP2]] 414; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 415; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef 416; VI-NEXT: ret void 417; 418 %r = or i3 %a, %b 419 store volatile i3 %r, i3 addrspace(1)* undef 420 ret void 421} 422 423define amdgpu_kernel void @xor_i3(i3 %a, i3 %b) { 424; SI-LABEL: @xor_i3( 425; SI-NEXT: [[R:%.*]] = xor i3 [[A:%.*]], [[B:%.*]] 426; SI-NEXT: store volatile i3 [[R]], i3 addrspace(1)* undef 427; SI-NEXT: ret void 428; 429; VI-LABEL: @xor_i3( 430; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 431; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 432; VI-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] 433; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 434; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef 435; VI-NEXT: ret void 436; 437 %r = xor i3 %a, %b 438 store volatile i3 %r, i3 addrspace(1)* undef 439 ret void 440} 441 442define amdgpu_kernel void @select_eq_i3(i3 %a, i3 %b) { 443; SI-LABEL: @select_eq_i3( 444; SI-NEXT: [[CMP:%.*]] = icmp eq i3 [[A:%.*]], [[B:%.*]] 445; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]] 446; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef 447; SI-NEXT: ret void 448; 449; VI-LABEL: @select_eq_i3( 450; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 451; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 452; VI-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP1]], [[TMP2]] 453; VI-NEXT: [[TMP4:%.*]] = zext i3 [[A]] to i32 454; VI-NEXT: [[TMP5:%.*]] = zext i3 [[B]] to i32 455; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]] 456; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3 457; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef 458; VI-NEXT: ret void 459; 460 %cmp = icmp eq i3 %a, %b 461 %sel = select i1 %cmp, i3 %a, i3 %b 462 store volatile i3 %sel, i3 addrspace(1)* undef 463 ret void 464} 465 466define amdgpu_kernel void @select_ne_i3(i3 %a, i3 %b) { 467; SI-LABEL: @select_ne_i3( 468; SI-NEXT: [[CMP:%.*]] = icmp ne i3 [[A:%.*]], [[B:%.*]] 469; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]] 470; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef 471; SI-NEXT: ret void 472; 473; VI-LABEL: @select_ne_i3( 474; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 475; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 476; VI-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] 477; VI-NEXT: [[TMP4:%.*]] = zext i3 [[A]] to i32 478; VI-NEXT: [[TMP5:%.*]] = zext i3 [[B]] to i32 479; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]] 480; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3 481; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef 482; VI-NEXT: ret void 483; 484 %cmp = icmp ne i3 %a, %b 485 %sel = select i1 %cmp, i3 %a, i3 %b 486 store volatile i3 %sel, i3 addrspace(1)* undef 487 ret void 488} 489 490define amdgpu_kernel void @select_ugt_i3(i3 %a, i3 %b) { 491; SI-LABEL: @select_ugt_i3( 492; SI-NEXT: [[CMP:%.*]] = icmp ugt i3 [[A:%.*]], [[B:%.*]] 493; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]] 494; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef 495; SI-NEXT: ret void 496; 497; VI-LABEL: @select_ugt_i3( 498; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 499; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 500; VI-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[TMP1]], [[TMP2]] 501; VI-NEXT: [[TMP4:%.*]] = zext i3 [[A]] to i32 502; VI-NEXT: [[TMP5:%.*]] = zext i3 [[B]] to i32 503; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]] 504; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3 505; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef 506; VI-NEXT: ret void 507; 508 %cmp = icmp ugt i3 %a, %b 509 %sel = select i1 %cmp, i3 %a, i3 %b 510 store volatile i3 %sel, i3 addrspace(1)* undef 511 ret void 512} 513 514define amdgpu_kernel void @select_uge_i3(i3 %a, i3 %b) { 515; SI-LABEL: @select_uge_i3( 516; SI-NEXT: [[CMP:%.*]] = icmp uge i3 [[A:%.*]], [[B:%.*]] 517; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]] 518; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef 519; SI-NEXT: ret void 520; 521; VI-LABEL: @select_uge_i3( 522; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 523; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 524; VI-NEXT: [[TMP3:%.*]] = icmp uge i32 [[TMP1]], [[TMP2]] 525; VI-NEXT: [[TMP4:%.*]] = zext i3 [[A]] to i32 526; VI-NEXT: [[TMP5:%.*]] = zext i3 [[B]] to i32 527; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]] 528; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3 529; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef 530; VI-NEXT: ret void 531; 532 %cmp = icmp uge i3 %a, %b 533 %sel = select i1 %cmp, i3 %a, i3 %b 534 store volatile i3 %sel, i3 addrspace(1)* undef 535 ret void 536} 537 538define amdgpu_kernel void @select_ult_i3(i3 %a, i3 %b) { 539; SI-LABEL: @select_ult_i3( 540; SI-NEXT: [[CMP:%.*]] = icmp ult i3 [[A:%.*]], [[B:%.*]] 541; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]] 542; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef 543; SI-NEXT: ret void 544; 545; VI-LABEL: @select_ult_i3( 546; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 547; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 548; VI-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP1]], [[TMP2]] 549; VI-NEXT: [[TMP4:%.*]] = zext i3 [[A]] to i32 550; VI-NEXT: [[TMP5:%.*]] = zext i3 [[B]] to i32 551; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]] 552; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3 553; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef 554; VI-NEXT: ret void 555; 556 %cmp = icmp ult i3 %a, %b 557 %sel = select i1 %cmp, i3 %a, i3 %b 558 store volatile i3 %sel, i3 addrspace(1)* undef 559 ret void 560} 561 562define amdgpu_kernel void @select_ule_i3(i3 %a, i3 %b) { 563; SI-LABEL: @select_ule_i3( 564; SI-NEXT: [[CMP:%.*]] = icmp ule i3 [[A:%.*]], [[B:%.*]] 565; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]] 566; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef 567; SI-NEXT: ret void 568; 569; VI-LABEL: @select_ule_i3( 570; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 571; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32 572; VI-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP1]], [[TMP2]] 573; VI-NEXT: [[TMP4:%.*]] = zext i3 [[A]] to i32 574; VI-NEXT: [[TMP5:%.*]] = zext i3 [[B]] to i32 575; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]] 576; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3 577; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef 578; VI-NEXT: ret void 579; 580 %cmp = icmp ule i3 %a, %b 581 %sel = select i1 %cmp, i3 %a, i3 %b 582 store volatile i3 %sel, i3 addrspace(1)* undef 583 ret void 584} 585 586define amdgpu_kernel void @select_sgt_i3(i3 %a, i3 %b) { 587; SI-LABEL: @select_sgt_i3( 588; SI-NEXT: [[CMP:%.*]] = icmp sgt i3 [[A:%.*]], [[B:%.*]] 589; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]] 590; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef 591; SI-NEXT: ret void 592; 593; VI-LABEL: @select_sgt_i3( 594; VI-NEXT: [[TMP1:%.*]] = sext i3 [[A:%.*]] to i32 595; VI-NEXT: [[TMP2:%.*]] = sext i3 [[B:%.*]] to i32 596; VI-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] 597; VI-NEXT: [[TMP4:%.*]] = sext i3 [[A]] to i32 598; VI-NEXT: [[TMP5:%.*]] = sext i3 [[B]] to i32 599; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]] 600; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3 601; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef 602; VI-NEXT: ret void 603; 604 %cmp = icmp sgt i3 %a, %b 605 %sel = select i1 %cmp, i3 %a, i3 %b 606 store volatile i3 %sel, i3 addrspace(1)* undef 607 ret void 608} 609 610define amdgpu_kernel void @select_sge_i3(i3 %a, i3 %b) { 611; SI-LABEL: @select_sge_i3( 612; SI-NEXT: [[CMP:%.*]] = icmp sge i3 [[A:%.*]], [[B:%.*]] 613; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]] 614; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef 615; SI-NEXT: ret void 616; 617; VI-LABEL: @select_sge_i3( 618; VI-NEXT: [[TMP1:%.*]] = sext i3 [[A:%.*]] to i32 619; VI-NEXT: [[TMP2:%.*]] = sext i3 [[B:%.*]] to i32 620; VI-NEXT: [[TMP3:%.*]] = icmp sge i32 [[TMP1]], [[TMP2]] 621; VI-NEXT: [[TMP4:%.*]] = sext i3 [[A]] to i32 622; VI-NEXT: [[TMP5:%.*]] = sext i3 [[B]] to i32 623; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]] 624; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3 625; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef 626; VI-NEXT: ret void 627; 628 %cmp = icmp sge i3 %a, %b 629 %sel = select i1 %cmp, i3 %a, i3 %b 630 store volatile i3 %sel, i3 addrspace(1)* undef 631 ret void 632} 633 634define amdgpu_kernel void @select_slt_i3(i3 %a, i3 %b) { 635; SI-LABEL: @select_slt_i3( 636; SI-NEXT: [[CMP:%.*]] = icmp slt i3 [[A:%.*]], [[B:%.*]] 637; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]] 638; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef 639; SI-NEXT: ret void 640; 641; VI-LABEL: @select_slt_i3( 642; VI-NEXT: [[TMP1:%.*]] = sext i3 [[A:%.*]] to i32 643; VI-NEXT: [[TMP2:%.*]] = sext i3 [[B:%.*]] to i32 644; VI-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP1]], [[TMP2]] 645; VI-NEXT: [[TMP4:%.*]] = sext i3 [[A]] to i32 646; VI-NEXT: [[TMP5:%.*]] = sext i3 [[B]] to i32 647; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]] 648; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3 649; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef 650; VI-NEXT: ret void 651; 652 %cmp = icmp slt i3 %a, %b 653 %sel = select i1 %cmp, i3 %a, i3 %b 654 store volatile i3 %sel, i3 addrspace(1)* undef 655 ret void 656} 657 658define amdgpu_kernel void @select_sle_i3(i3 %a, i3 %b) { 659; SI-LABEL: @select_sle_i3( 660; SI-NEXT: [[CMP:%.*]] = icmp sle i3 [[A:%.*]], [[B:%.*]] 661; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i3 [[A]], i3 [[B]] 662; SI-NEXT: store volatile i3 [[SEL]], i3 addrspace(1)* undef 663; SI-NEXT: ret void 664; 665; VI-LABEL: @select_sle_i3( 666; VI-NEXT: [[TMP1:%.*]] = sext i3 [[A:%.*]] to i32 667; VI-NEXT: [[TMP2:%.*]] = sext i3 [[B:%.*]] to i32 668; VI-NEXT: [[TMP3:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] 669; VI-NEXT: [[TMP4:%.*]] = sext i3 [[A]] to i32 670; VI-NEXT: [[TMP5:%.*]] = sext i3 [[B]] to i32 671; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]] 672; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i3 673; VI-NEXT: store volatile i3 [[TMP7]], i3 addrspace(1)* undef 674; VI-NEXT: ret void 675; 676 %cmp = icmp sle i3 %a, %b 677 %sel = select i1 %cmp, i3 %a, i3 %b 678 store volatile i3 %sel, i3 addrspace(1)* undef 679 ret void 680} 681 682declare i3 @llvm.bitreverse.i3(i3) 683define amdgpu_kernel void @bitreverse_i3(i3 %a) { 684; SI-LABEL: @bitreverse_i3( 685; SI-NEXT: [[BREV:%.*]] = call i3 @llvm.bitreverse.i3(i3 [[A:%.*]]) 686; SI-NEXT: store volatile i3 [[BREV]], i3 addrspace(1)* undef 687; SI-NEXT: ret void 688; 689; VI-LABEL: @bitreverse_i3( 690; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32 691; VI-NEXT: [[TMP2:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[TMP1]]) 692; VI-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP2]], 29 693; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 694; VI-NEXT: store volatile i3 [[TMP4]], i3 addrspace(1)* undef 695; VI-NEXT: ret void 696; 697 %brev = call i3 @llvm.bitreverse.i3(i3 %a) 698 store volatile i3 %brev, i3 addrspace(1)* undef 699 ret void 700} 701 702define amdgpu_kernel void @add_i16(i16 %a, i16 %b) { 703; SI-LABEL: @add_i16( 704; SI-NEXT: [[R:%.*]] = add i16 [[A:%.*]], [[B:%.*]] 705; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 706; SI-NEXT: ret void 707; 708; VI-LABEL: @add_i16( 709; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 710; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 711; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]] 712; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 713; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef 714; VI-NEXT: ret void 715; 716 %r = add i16 %a, %b 717 store volatile i16 %r, i16 addrspace(1)* undef 718 ret void 719} 720 721define amdgpu_kernel void @constant_add_i16() { 722; SI-LABEL: @constant_add_i16( 723; SI-NEXT: [[R:%.*]] = add i16 1, 2 724; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 725; SI-NEXT: ret void 726; 727; VI-LABEL: @constant_add_i16( 728; VI-NEXT: store volatile i16 3, i16 addrspace(1)* undef 729; VI-NEXT: ret void 730; 731 %r = add i16 1, 2 732 store volatile i16 %r, i16 addrspace(1)* undef 733 ret void 734} 735 736define amdgpu_kernel void @constant_add_nsw_i16() { 737; SI-LABEL: @constant_add_nsw_i16( 738; SI-NEXT: [[R:%.*]] = add nsw i16 1, 2 739; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 740; SI-NEXT: ret void 741; 742; VI-LABEL: @constant_add_nsw_i16( 743; VI-NEXT: store volatile i16 3, i16 addrspace(1)* undef 744; VI-NEXT: ret void 745; 746 %r = add nsw i16 1, 2 747 store volatile i16 %r, i16 addrspace(1)* undef 748 ret void 749} 750 751define amdgpu_kernel void @constant_add_nuw_i16() { 752; SI-LABEL: @constant_add_nuw_i16( 753; SI-NEXT: [[R:%.*]] = add nsw i16 1, 2 754; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 755; SI-NEXT: ret void 756; 757; VI-LABEL: @constant_add_nuw_i16( 758; VI-NEXT: store volatile i16 3, i16 addrspace(1)* undef 759; VI-NEXT: ret void 760; 761 %r = add nsw i16 1, 2 762 store volatile i16 %r, i16 addrspace(1)* undef 763 ret void 764} 765 766define amdgpu_kernel void @add_nsw_i16(i16 %a, i16 %b) { 767; SI-LABEL: @add_nsw_i16( 768; SI-NEXT: [[R:%.*]] = add nsw i16 [[A:%.*]], [[B:%.*]] 769; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 770; SI-NEXT: ret void 771; 772; VI-LABEL: @add_nsw_i16( 773; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 774; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 775; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]] 776; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 777; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef 778; VI-NEXT: ret void 779; 780 %r = add nsw i16 %a, %b 781 store volatile i16 %r, i16 addrspace(1)* undef 782 ret void 783} 784 785define amdgpu_kernel void @add_nuw_i16(i16 %a, i16 %b) { 786; SI-LABEL: @add_nuw_i16( 787; SI-NEXT: [[R:%.*]] = add nuw i16 [[A:%.*]], [[B:%.*]] 788; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 789; SI-NEXT: ret void 790; 791; VI-LABEL: @add_nuw_i16( 792; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 793; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 794; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]] 795; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 796; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef 797; VI-NEXT: ret void 798; 799 %r = add nuw i16 %a, %b 800 store volatile i16 %r, i16 addrspace(1)* undef 801 ret void 802} 803 804define amdgpu_kernel void @add_nuw_nsw_i16(i16 %a, i16 %b) { 805; SI-LABEL: @add_nuw_nsw_i16( 806; SI-NEXT: [[R:%.*]] = add nuw nsw i16 [[A:%.*]], [[B:%.*]] 807; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 808; SI-NEXT: ret void 809; 810; VI-LABEL: @add_nuw_nsw_i16( 811; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 812; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 813; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]] 814; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 815; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef 816; VI-NEXT: ret void 817; 818 %r = add nuw nsw i16 %a, %b 819 store volatile i16 %r, i16 addrspace(1)* undef 820 ret void 821} 822 823define amdgpu_kernel void @sub_i16(i16 %a, i16 %b) { 824; SI-LABEL: @sub_i16( 825; SI-NEXT: [[R:%.*]] = sub i16 [[A:%.*]], [[B:%.*]] 826; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 827; SI-NEXT: ret void 828; 829; VI-LABEL: @sub_i16( 830; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 831; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 832; VI-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP1]], [[TMP2]] 833; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 834; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef 835; VI-NEXT: ret void 836; 837 %r = sub i16 %a, %b 838 store volatile i16 %r, i16 addrspace(1)* undef 839 ret void 840} 841 842define amdgpu_kernel void @sub_nsw_i16(i16 %a, i16 %b) { 843; SI-LABEL: @sub_nsw_i16( 844; SI-NEXT: [[R:%.*]] = sub nsw i16 [[A:%.*]], [[B:%.*]] 845; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 846; SI-NEXT: ret void 847; 848; VI-LABEL: @sub_nsw_i16( 849; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 850; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 851; VI-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP1]], [[TMP2]] 852; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 853; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef 854; VI-NEXT: ret void 855; 856 %r = sub nsw i16 %a, %b 857 store volatile i16 %r, i16 addrspace(1)* undef 858 ret void 859} 860 861define amdgpu_kernel void @sub_nuw_i16(i16 %a, i16 %b) { 862; SI-LABEL: @sub_nuw_i16( 863; SI-NEXT: [[R:%.*]] = sub nuw i16 [[A:%.*]], [[B:%.*]] 864; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 865; SI-NEXT: ret void 866; 867; VI-LABEL: @sub_nuw_i16( 868; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 869; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 870; VI-NEXT: [[TMP3:%.*]] = sub nuw nsw i32 [[TMP1]], [[TMP2]] 871; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 872; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef 873; VI-NEXT: ret void 874; 875 %r = sub nuw i16 %a, %b 876 store volatile i16 %r, i16 addrspace(1)* undef 877 ret void 878} 879 880define amdgpu_kernel void @sub_nuw_nsw_i16(i16 %a, i16 %b) { 881; SI-LABEL: @sub_nuw_nsw_i16( 882; SI-NEXT: [[R:%.*]] = sub nuw nsw i16 [[A:%.*]], [[B:%.*]] 883; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 884; SI-NEXT: ret void 885; 886; VI-LABEL: @sub_nuw_nsw_i16( 887; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 888; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 889; VI-NEXT: [[TMP3:%.*]] = sub nuw nsw i32 [[TMP1]], [[TMP2]] 890; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 891; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef 892; VI-NEXT: ret void 893; 894 %r = sub nuw nsw i16 %a, %b 895 store volatile i16 %r, i16 addrspace(1)* undef 896 ret void 897} 898 899define amdgpu_kernel void @mul_i16(i16 %a, i16 %b) { 900; SI-LABEL: @mul_i16( 901; SI-NEXT: [[R:%.*]] = mul i16 [[A:%.*]], [[B:%.*]] 902; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 903; SI-NEXT: ret void 904; 905; VI-LABEL: @mul_i16( 906; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 907; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 908; VI-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] 909; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 910; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef 911; VI-NEXT: ret void 912; 913 %r = mul i16 %a, %b 914 store volatile i16 %r, i16 addrspace(1)* undef 915 ret void 916} 917 918define amdgpu_kernel void @mul_nsw_i16(i16 %a, i16 %b) { 919; SI-LABEL: @mul_nsw_i16( 920; SI-NEXT: [[R:%.*]] = mul nsw i16 [[A:%.*]], [[B:%.*]] 921; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 922; SI-NEXT: ret void 923; 924; VI-LABEL: @mul_nsw_i16( 925; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 926; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 927; VI-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] 928; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 929; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef 930; VI-NEXT: ret void 931; 932 %r = mul nsw i16 %a, %b 933 store volatile i16 %r, i16 addrspace(1)* undef 934 ret void 935} 936 937define amdgpu_kernel void @mul_nuw_i16(i16 %a, i16 %b) { 938; SI-LABEL: @mul_nuw_i16( 939; SI-NEXT: [[R:%.*]] = mul nuw i16 [[A:%.*]], [[B:%.*]] 940; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 941; SI-NEXT: ret void 942; 943; VI-LABEL: @mul_nuw_i16( 944; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 945; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 946; VI-NEXT: [[TMP3:%.*]] = mul nuw nsw i32 [[TMP1]], [[TMP2]] 947; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 948; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef 949; VI-NEXT: ret void 950; 951 %r = mul nuw i16 %a, %b 952 store volatile i16 %r, i16 addrspace(1)* undef 953 ret void 954} 955 956define amdgpu_kernel void @mul_nuw_nsw_i16(i16 %a, i16 %b) { 957; SI-LABEL: @mul_nuw_nsw_i16( 958; SI-NEXT: [[R:%.*]] = mul nuw nsw i16 [[A:%.*]], [[B:%.*]] 959; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 960; SI-NEXT: ret void 961; 962; VI-LABEL: @mul_nuw_nsw_i16( 963; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 964; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 965; VI-NEXT: [[TMP3:%.*]] = mul nuw nsw i32 [[TMP1]], [[TMP2]] 966; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 967; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef 968; VI-NEXT: ret void 969; 970 %r = mul nuw nsw i16 %a, %b 971 store volatile i16 %r, i16 addrspace(1)* undef 972 ret void 973} 974 975define amdgpu_kernel void @shl_i16(i16 %a, i16 %b) { 976; SI-LABEL: @shl_i16( 977; SI-NEXT: [[R:%.*]] = shl i16 [[A:%.*]], [[B:%.*]] 978; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 979; SI-NEXT: ret void 980; 981; VI-LABEL: @shl_i16( 982; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 983; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 984; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]] 985; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 986; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef 987; VI-NEXT: ret void 988; 989 %r = shl i16 %a, %b 990 store volatile i16 %r, i16 addrspace(1)* undef 991 ret void 992} 993 994define amdgpu_kernel void @shl_nsw_i16(i16 %a, i16 %b) { 995; SI-LABEL: @shl_nsw_i16( 996; SI-NEXT: [[R:%.*]] = shl nsw i16 [[A:%.*]], [[B:%.*]] 997; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 998; SI-NEXT: ret void 999; 1000; VI-LABEL: @shl_nsw_i16( 1001; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 1002; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 1003; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]] 1004; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 1005; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef 1006; VI-NEXT: ret void 1007; 1008 %r = shl nsw i16 %a, %b 1009 store volatile i16 %r, i16 addrspace(1)* undef 1010 ret void 1011} 1012 1013define amdgpu_kernel void @shl_nuw_i16(i16 %a, i16 %b) { 1014; SI-LABEL: @shl_nuw_i16( 1015; SI-NEXT: [[R:%.*]] = shl nuw i16 [[A:%.*]], [[B:%.*]] 1016; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 1017; SI-NEXT: ret void 1018; 1019; VI-LABEL: @shl_nuw_i16( 1020; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 1021; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 1022; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]] 1023; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 1024; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef 1025; VI-NEXT: ret void 1026; 1027 %r = shl nuw i16 %a, %b 1028 store volatile i16 %r, i16 addrspace(1)* undef 1029 ret void 1030} 1031 1032define amdgpu_kernel void @shl_nuw_nsw_i16(i16 %a, i16 %b) { 1033; SI-LABEL: @shl_nuw_nsw_i16( 1034; SI-NEXT: [[R:%.*]] = shl nuw nsw i16 [[A:%.*]], [[B:%.*]] 1035; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 1036; SI-NEXT: ret void 1037; 1038; VI-LABEL: @shl_nuw_nsw_i16( 1039; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 1040; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 1041; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], [[TMP2]] 1042; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 1043; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef 1044; VI-NEXT: ret void 1045; 1046 %r = shl nuw nsw i16 %a, %b 1047 store volatile i16 %r, i16 addrspace(1)* undef 1048 ret void 1049} 1050 1051define amdgpu_kernel void @lshr_i16(i16 %a, i16 %b) { 1052; SI-LABEL: @lshr_i16( 1053; SI-NEXT: [[R:%.*]] = lshr i16 [[A:%.*]], [[B:%.*]] 1054; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 1055; SI-NEXT: ret void 1056; 1057; VI-LABEL: @lshr_i16( 1058; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 1059; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 1060; VI-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP1]], [[TMP2]] 1061; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 1062; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef 1063; VI-NEXT: ret void 1064; 1065 %r = lshr i16 %a, %b 1066 store volatile i16 %r, i16 addrspace(1)* undef 1067 ret void 1068} 1069 1070define amdgpu_kernel void @lshr_exact_i16(i16 %a, i16 %b) { 1071; SI-LABEL: @lshr_exact_i16( 1072; SI-NEXT: [[R:%.*]] = lshr exact i16 [[A:%.*]], [[B:%.*]] 1073; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 1074; SI-NEXT: ret void 1075; 1076; VI-LABEL: @lshr_exact_i16( 1077; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 1078; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 1079; VI-NEXT: [[TMP3:%.*]] = lshr exact i32 [[TMP1]], [[TMP2]] 1080; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 1081; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef 1082; VI-NEXT: ret void 1083; 1084 %r = lshr exact i16 %a, %b 1085 store volatile i16 %r, i16 addrspace(1)* undef 1086 ret void 1087} 1088 1089define amdgpu_kernel void @ashr_i16(i16 %a, i16 %b) { 1090; SI-LABEL: @ashr_i16( 1091; SI-NEXT: [[R:%.*]] = ashr i16 [[A:%.*]], [[B:%.*]] 1092; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 1093; SI-NEXT: ret void 1094; 1095; VI-LABEL: @ashr_i16( 1096; VI-NEXT: [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32 1097; VI-NEXT: [[TMP2:%.*]] = sext i16 [[B:%.*]] to i32 1098; VI-NEXT: [[TMP3:%.*]] = ashr i32 [[TMP1]], [[TMP2]] 1099; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 1100; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef 1101; VI-NEXT: ret void 1102; 1103 %r = ashr i16 %a, %b 1104 store volatile i16 %r, i16 addrspace(1)* undef 1105 ret void 1106} 1107 1108define amdgpu_kernel void @ashr_exact_i16(i16 %a, i16 %b) { 1109; SI-LABEL: @ashr_exact_i16( 1110; SI-NEXT: [[R:%.*]] = ashr exact i16 [[A:%.*]], [[B:%.*]] 1111; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 1112; SI-NEXT: ret void 1113; 1114; VI-LABEL: @ashr_exact_i16( 1115; VI-NEXT: [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32 1116; VI-NEXT: [[TMP2:%.*]] = sext i16 [[B:%.*]] to i32 1117; VI-NEXT: [[TMP3:%.*]] = ashr exact i32 [[TMP1]], [[TMP2]] 1118; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 1119; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef 1120; VI-NEXT: ret void 1121; 1122 %r = ashr exact i16 %a, %b 1123 store volatile i16 %r, i16 addrspace(1)* undef 1124 ret void 1125} 1126 1127define amdgpu_kernel void @constant_lshr_exact_i16(i16 %a, i16 %b) { 1128; SI-LABEL: @constant_lshr_exact_i16( 1129; SI-NEXT: [[R:%.*]] = lshr exact i16 4, 1 1130; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 1131; SI-NEXT: ret void 1132; 1133; VI-LABEL: @constant_lshr_exact_i16( 1134; VI-NEXT: store volatile i16 2, i16 addrspace(1)* undef 1135; VI-NEXT: ret void 1136; 1137 %r = lshr exact i16 4, 1 1138 store volatile i16 %r, i16 addrspace(1)* undef 1139 ret void 1140} 1141 1142define amdgpu_kernel void @and_i16(i16 %a, i16 %b) { 1143; SI-LABEL: @and_i16( 1144; SI-NEXT: [[R:%.*]] = and i16 [[A:%.*]], [[B:%.*]] 1145; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 1146; SI-NEXT: ret void 1147; 1148; VI-LABEL: @and_i16( 1149; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 1150; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 1151; VI-NEXT: [[TMP3:%.*]] = and i32 [[TMP1]], [[TMP2]] 1152; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 1153; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef 1154; VI-NEXT: ret void 1155; 1156 %r = and i16 %a, %b 1157 store volatile i16 %r, i16 addrspace(1)* undef 1158 ret void 1159} 1160 1161define amdgpu_kernel void @or_i16(i16 %a, i16 %b) { 1162; SI-LABEL: @or_i16( 1163; SI-NEXT: [[R:%.*]] = or i16 [[A:%.*]], [[B:%.*]] 1164; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 1165; SI-NEXT: ret void 1166; 1167; VI-LABEL: @or_i16( 1168; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 1169; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 1170; VI-NEXT: [[TMP3:%.*]] = or i32 [[TMP1]], [[TMP2]] 1171; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 1172; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef 1173; VI-NEXT: ret void 1174; 1175 %r = or i16 %a, %b 1176 store volatile i16 %r, i16 addrspace(1)* undef 1177 ret void 1178} 1179 1180define amdgpu_kernel void @xor_i16(i16 %a, i16 %b) { 1181; SI-LABEL: @xor_i16( 1182; SI-NEXT: [[R:%.*]] = xor i16 [[A:%.*]], [[B:%.*]] 1183; SI-NEXT: store volatile i16 [[R]], i16 addrspace(1)* undef 1184; SI-NEXT: ret void 1185; 1186; VI-LABEL: @xor_i16( 1187; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 1188; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 1189; VI-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] 1190; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 1191; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef 1192; VI-NEXT: ret void 1193; 1194 %r = xor i16 %a, %b 1195 store volatile i16 %r, i16 addrspace(1)* undef 1196 ret void 1197} 1198 1199define amdgpu_kernel void @select_eq_i16(i16 %a, i16 %b) { 1200; SI-LABEL: @select_eq_i16( 1201; SI-NEXT: [[CMP:%.*]] = icmp eq i16 [[A:%.*]], [[B:%.*]] 1202; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]] 1203; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef 1204; SI-NEXT: ret void 1205; 1206; VI-LABEL: @select_eq_i16( 1207; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 1208; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 1209; VI-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP1]], [[TMP2]] 1210; VI-NEXT: [[TMP4:%.*]] = zext i16 [[A]] to i32 1211; VI-NEXT: [[TMP5:%.*]] = zext i16 [[B]] to i32 1212; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]] 1213; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16 1214; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef 1215; VI-NEXT: ret void 1216; 1217 %cmp = icmp eq i16 %a, %b 1218 %sel = select i1 %cmp, i16 %a, i16 %b 1219 store volatile i16 %sel, i16 addrspace(1)* undef 1220 ret void 1221} 1222 1223define amdgpu_kernel void @select_ne_i16(i16 %a, i16 %b) { 1224; SI-LABEL: @select_ne_i16( 1225; SI-NEXT: [[CMP:%.*]] = icmp ne i16 [[A:%.*]], [[B:%.*]] 1226; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]] 1227; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef 1228; SI-NEXT: ret void 1229; 1230; VI-LABEL: @select_ne_i16( 1231; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 1232; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 1233; VI-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], [[TMP2]] 1234; VI-NEXT: [[TMP4:%.*]] = zext i16 [[A]] to i32 1235; VI-NEXT: [[TMP5:%.*]] = zext i16 [[B]] to i32 1236; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]] 1237; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16 1238; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef 1239; VI-NEXT: ret void 1240; 1241 %cmp = icmp ne i16 %a, %b 1242 %sel = select i1 %cmp, i16 %a, i16 %b 1243 store volatile i16 %sel, i16 addrspace(1)* undef 1244 ret void 1245} 1246 1247define amdgpu_kernel void @select_ugt_i16(i16 %a, i16 %b) { 1248; SI-LABEL: @select_ugt_i16( 1249; SI-NEXT: [[CMP:%.*]] = icmp ugt i16 [[A:%.*]], [[B:%.*]] 1250; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]] 1251; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef 1252; SI-NEXT: ret void 1253; 1254; VI-LABEL: @select_ugt_i16( 1255; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 1256; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 1257; VI-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[TMP1]], [[TMP2]] 1258; VI-NEXT: [[TMP4:%.*]] = zext i16 [[A]] to i32 1259; VI-NEXT: [[TMP5:%.*]] = zext i16 [[B]] to i32 1260; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]] 1261; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16 1262; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef 1263; VI-NEXT: ret void 1264; 1265 %cmp = icmp ugt i16 %a, %b 1266 %sel = select i1 %cmp, i16 %a, i16 %b 1267 store volatile i16 %sel, i16 addrspace(1)* undef 1268 ret void 1269} 1270 1271define amdgpu_kernel void @select_uge_i16(i16 %a, i16 %b) { 1272; SI-LABEL: @select_uge_i16( 1273; SI-NEXT: [[CMP:%.*]] = icmp uge i16 [[A:%.*]], [[B:%.*]] 1274; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]] 1275; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef 1276; SI-NEXT: ret void 1277; 1278; VI-LABEL: @select_uge_i16( 1279; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 1280; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 1281; VI-NEXT: [[TMP3:%.*]] = icmp uge i32 [[TMP1]], [[TMP2]] 1282; VI-NEXT: [[TMP4:%.*]] = zext i16 [[A]] to i32 1283; VI-NEXT: [[TMP5:%.*]] = zext i16 [[B]] to i32 1284; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]] 1285; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16 1286; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef 1287; VI-NEXT: ret void 1288; 1289 %cmp = icmp uge i16 %a, %b 1290 %sel = select i1 %cmp, i16 %a, i16 %b 1291 store volatile i16 %sel, i16 addrspace(1)* undef 1292 ret void 1293} 1294 1295define amdgpu_kernel void @select_ult_i16(i16 %a, i16 %b) { 1296; SI-LABEL: @select_ult_i16( 1297; SI-NEXT: [[CMP:%.*]] = icmp ult i16 [[A:%.*]], [[B:%.*]] 1298; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]] 1299; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef 1300; SI-NEXT: ret void 1301; 1302; VI-LABEL: @select_ult_i16( 1303; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 1304; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 1305; VI-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP1]], [[TMP2]] 1306; VI-NEXT: [[TMP4:%.*]] = zext i16 [[A]] to i32 1307; VI-NEXT: [[TMP5:%.*]] = zext i16 [[B]] to i32 1308; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]] 1309; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16 1310; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef 1311; VI-NEXT: ret void 1312; 1313 %cmp = icmp ult i16 %a, %b 1314 %sel = select i1 %cmp, i16 %a, i16 %b 1315 store volatile i16 %sel, i16 addrspace(1)* undef 1316 ret void 1317} 1318 1319define amdgpu_kernel void @select_ule_i16(i16 %a, i16 %b) { 1320; SI-LABEL: @select_ule_i16( 1321; SI-NEXT: [[CMP:%.*]] = icmp ule i16 [[A:%.*]], [[B:%.*]] 1322; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]] 1323; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef 1324; SI-NEXT: ret void 1325; 1326; VI-LABEL: @select_ule_i16( 1327; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 1328; VI-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 1329; VI-NEXT: [[TMP3:%.*]] = icmp ule i32 [[TMP1]], [[TMP2]] 1330; VI-NEXT: [[TMP4:%.*]] = zext i16 [[A]] to i32 1331; VI-NEXT: [[TMP5:%.*]] = zext i16 [[B]] to i32 1332; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]] 1333; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16 1334; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef 1335; VI-NEXT: ret void 1336; 1337 %cmp = icmp ule i16 %a, %b 1338 %sel = select i1 %cmp, i16 %a, i16 %b 1339 store volatile i16 %sel, i16 addrspace(1)* undef 1340 ret void 1341} 1342 1343define amdgpu_kernel void @select_sgt_i16(i16 %a, i16 %b) { 1344; SI-LABEL: @select_sgt_i16( 1345; SI-NEXT: [[CMP:%.*]] = icmp sgt i16 [[A:%.*]], [[B:%.*]] 1346; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]] 1347; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef 1348; SI-NEXT: ret void 1349; 1350; VI-LABEL: @select_sgt_i16( 1351; VI-NEXT: [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32 1352; VI-NEXT: [[TMP2:%.*]] = sext i16 [[B:%.*]] to i32 1353; VI-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] 1354; VI-NEXT: [[TMP4:%.*]] = sext i16 [[A]] to i32 1355; VI-NEXT: [[TMP5:%.*]] = sext i16 [[B]] to i32 1356; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]] 1357; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16 1358; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef 1359; VI-NEXT: ret void 1360; 1361 %cmp = icmp sgt i16 %a, %b 1362 %sel = select i1 %cmp, i16 %a, i16 %b 1363 store volatile i16 %sel, i16 addrspace(1)* undef 1364 ret void 1365} 1366 1367define amdgpu_kernel void @select_sge_i16(i16 %a, i16 %b) { 1368; SI-LABEL: @select_sge_i16( 1369; SI-NEXT: [[CMP:%.*]] = icmp sge i16 [[A:%.*]], [[B:%.*]] 1370; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]] 1371; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef 1372; SI-NEXT: ret void 1373; 1374; VI-LABEL: @select_sge_i16( 1375; VI-NEXT: [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32 1376; VI-NEXT: [[TMP2:%.*]] = sext i16 [[B:%.*]] to i32 1377; VI-NEXT: [[TMP3:%.*]] = icmp sge i32 [[TMP1]], [[TMP2]] 1378; VI-NEXT: [[TMP4:%.*]] = sext i16 [[A]] to i32 1379; VI-NEXT: [[TMP5:%.*]] = sext i16 [[B]] to i32 1380; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]] 1381; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16 1382; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef 1383; VI-NEXT: ret void 1384; 1385 %cmp = icmp sge i16 %a, %b 1386 %sel = select i1 %cmp, i16 %a, i16 %b 1387 store volatile i16 %sel, i16 addrspace(1)* undef 1388 ret void 1389} 1390 1391define amdgpu_kernel void @select_slt_i16(i16 %a, i16 %b) { 1392; SI-LABEL: @select_slt_i16( 1393; SI-NEXT: [[CMP:%.*]] = icmp slt i16 [[A:%.*]], [[B:%.*]] 1394; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]] 1395; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef 1396; SI-NEXT: ret void 1397; 1398; VI-LABEL: @select_slt_i16( 1399; VI-NEXT: [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32 1400; VI-NEXT: [[TMP2:%.*]] = sext i16 [[B:%.*]] to i32 1401; VI-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP1]], [[TMP2]] 1402; VI-NEXT: [[TMP4:%.*]] = sext i16 [[A]] to i32 1403; VI-NEXT: [[TMP5:%.*]] = sext i16 [[B]] to i32 1404; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]] 1405; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16 1406; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef 1407; VI-NEXT: ret void 1408; 1409 %cmp = icmp slt i16 %a, %b 1410 %sel = select i1 %cmp, i16 %a, i16 %b 1411 store volatile i16 %sel, i16 addrspace(1)* undef 1412 ret void 1413} 1414 1415define amdgpu_kernel void @select_sle_i16(i16 %a, i16 %b) { 1416; SI-LABEL: @select_sle_i16( 1417; SI-NEXT: [[CMP:%.*]] = icmp sle i16 [[A:%.*]], [[B:%.*]] 1418; SI-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i16 [[A]], i16 [[B]] 1419; SI-NEXT: store volatile i16 [[SEL]], i16 addrspace(1)* undef 1420; SI-NEXT: ret void 1421; 1422; VI-LABEL: @select_sle_i16( 1423; VI-NEXT: [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32 1424; VI-NEXT: [[TMP2:%.*]] = sext i16 [[B:%.*]] to i32 1425; VI-NEXT: [[TMP3:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] 1426; VI-NEXT: [[TMP4:%.*]] = sext i16 [[A]] to i32 1427; VI-NEXT: [[TMP5:%.*]] = sext i16 [[B]] to i32 1428; VI-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP4]], i32 [[TMP5]] 1429; VI-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16 1430; VI-NEXT: store volatile i16 [[TMP7]], i16 addrspace(1)* undef 1431; VI-NEXT: ret void 1432; 1433 %cmp = icmp sle i16 %a, %b 1434 %sel = select i1 %cmp, i16 %a, i16 %b 1435 store volatile i16 %sel, i16 addrspace(1)* undef 1436 ret void 1437} 1438 1439declare i16 @llvm.bitreverse.i16(i16) 1440 1441define amdgpu_kernel void @bitreverse_i16(i16 %a) { 1442; SI-LABEL: @bitreverse_i16( 1443; SI-NEXT: [[BREV:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[A:%.*]]) 1444; SI-NEXT: store volatile i16 [[BREV]], i16 addrspace(1)* undef 1445; SI-NEXT: ret void 1446; 1447; VI-LABEL: @bitreverse_i16( 1448; VI-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 1449; VI-NEXT: [[TMP2:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[TMP1]]) 1450; VI-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP2]], 16 1451; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 1452; VI-NEXT: store volatile i16 [[TMP4]], i16 addrspace(1)* undef 1453; VI-NEXT: ret void 1454; 1455 %brev = call i16 @llvm.bitreverse.i16(i16 %a) 1456 store volatile i16 %brev, i16 addrspace(1)* undef 1457 ret void 1458} 1459 1460define amdgpu_kernel void @add_3xi15(<3 x i15> %a, <3 x i15> %b) { 1461; SI-LABEL: @add_3xi15( 1462; SI-NEXT: [[R:%.*]] = add <3 x i15> [[A:%.*]], [[B:%.*]] 1463; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef 1464; SI-NEXT: ret void 1465; 1466; VI-LABEL: @add_3xi15( 1467; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 1468; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 1469; VI-NEXT: [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]] 1470; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15> 1471; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef 1472; VI-NEXT: ret void 1473; 1474 %r = add <3 x i15> %a, %b 1475 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1476 ret void 1477} 1478 1479define amdgpu_kernel void @add_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { 1480; SI-LABEL: @add_nsw_3xi15( 1481; SI-NEXT: [[R:%.*]] = add nsw <3 x i15> [[A:%.*]], [[B:%.*]] 1482; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef 1483; SI-NEXT: ret void 1484; 1485; VI-LABEL: @add_nsw_3xi15( 1486; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 1487; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 1488; VI-NEXT: [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]] 1489; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15> 1490; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef 1491; VI-NEXT: ret void 1492; 1493 %r = add nsw <3 x i15> %a, %b 1494 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1495 ret void 1496} 1497 1498define amdgpu_kernel void @add_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) { 1499; SI-LABEL: @add_nuw_3xi15( 1500; SI-NEXT: [[R:%.*]] = add nuw <3 x i15> [[A:%.*]], [[B:%.*]] 1501; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef 1502; SI-NEXT: ret void 1503; 1504; VI-LABEL: @add_nuw_3xi15( 1505; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 1506; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 1507; VI-NEXT: [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]] 1508; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15> 1509; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef 1510; VI-NEXT: ret void 1511; 1512 %r = add nuw <3 x i15> %a, %b 1513 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1514 ret void 1515} 1516 1517define amdgpu_kernel void @add_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { 1518; SI-LABEL: @add_nuw_nsw_3xi15( 1519; SI-NEXT: [[R:%.*]] = add nuw nsw <3 x i15> [[A:%.*]], [[B:%.*]] 1520; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef 1521; SI-NEXT: ret void 1522; 1523; VI-LABEL: @add_nuw_nsw_3xi15( 1524; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 1525; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 1526; VI-NEXT: [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]] 1527; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15> 1528; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef 1529; VI-NEXT: ret void 1530; 1531 %r = add nuw nsw <3 x i15> %a, %b 1532 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1533 ret void 1534} 1535 1536define amdgpu_kernel void @sub_3xi15(<3 x i15> %a, <3 x i15> %b) { 1537; SI-LABEL: @sub_3xi15( 1538; SI-NEXT: [[R:%.*]] = sub <3 x i15> [[A:%.*]], [[B:%.*]] 1539; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef 1540; SI-NEXT: ret void 1541; 1542; VI-LABEL: @sub_3xi15( 1543; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 1544; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 1545; VI-NEXT: [[TMP3:%.*]] = sub nsw <3 x i32> [[TMP1]], [[TMP2]] 1546; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15> 1547; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef 1548; VI-NEXT: ret void 1549; 1550 %r = sub <3 x i15> %a, %b 1551 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1552 ret void 1553} 1554 1555define amdgpu_kernel void @sub_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { 1556; SI-LABEL: @sub_nsw_3xi15( 1557; SI-NEXT: [[R:%.*]] = sub nsw <3 x i15> [[A:%.*]], [[B:%.*]] 1558; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef 1559; SI-NEXT: ret void 1560; 1561; VI-LABEL: @sub_nsw_3xi15( 1562; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 1563; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 1564; VI-NEXT: [[TMP3:%.*]] = sub nsw <3 x i32> [[TMP1]], [[TMP2]] 1565; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15> 1566; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef 1567; VI-NEXT: ret void 1568; 1569 %r = sub nsw <3 x i15> %a, %b 1570 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1571 ret void 1572} 1573 1574define amdgpu_kernel void @sub_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) { 1575; SI-LABEL: @sub_nuw_3xi15( 1576; SI-NEXT: [[R:%.*]] = sub nuw <3 x i15> [[A:%.*]], [[B:%.*]] 1577; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef 1578; SI-NEXT: ret void 1579; 1580; VI-LABEL: @sub_nuw_3xi15( 1581; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 1582; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 1583; VI-NEXT: [[TMP3:%.*]] = sub nuw nsw <3 x i32> [[TMP1]], [[TMP2]] 1584; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15> 1585; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef 1586; VI-NEXT: ret void 1587; 1588 %r = sub nuw <3 x i15> %a, %b 1589 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1590 ret void 1591} 1592 1593define amdgpu_kernel void @sub_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { 1594; SI-LABEL: @sub_nuw_nsw_3xi15( 1595; SI-NEXT: [[R:%.*]] = sub nuw nsw <3 x i15> [[A:%.*]], [[B:%.*]] 1596; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef 1597; SI-NEXT: ret void 1598; 1599; VI-LABEL: @sub_nuw_nsw_3xi15( 1600; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 1601; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 1602; VI-NEXT: [[TMP3:%.*]] = sub nuw nsw <3 x i32> [[TMP1]], [[TMP2]] 1603; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15> 1604; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef 1605; VI-NEXT: ret void 1606; 1607 %r = sub nuw nsw <3 x i15> %a, %b 1608 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1609 ret void 1610} 1611 1612define amdgpu_kernel void @mul_3xi15(<3 x i15> %a, <3 x i15> %b) { 1613; SI-LABEL: @mul_3xi15( 1614; SI-NEXT: [[R:%.*]] = mul <3 x i15> [[A:%.*]], [[B:%.*]] 1615; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef 1616; SI-NEXT: ret void 1617; 1618; VI-LABEL: @mul_3xi15( 1619; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 1620; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 1621; VI-NEXT: [[TMP3:%.*]] = mul nuw <3 x i32> [[TMP1]], [[TMP2]] 1622; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15> 1623; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef 1624; VI-NEXT: ret void 1625; 1626 %r = mul <3 x i15> %a, %b 1627 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1628 ret void 1629} 1630 1631define amdgpu_kernel void @mul_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { 1632; SI-LABEL: @mul_nsw_3xi15( 1633; SI-NEXT: [[R:%.*]] = mul nsw <3 x i15> [[A:%.*]], [[B:%.*]] 1634; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef 1635; SI-NEXT: ret void 1636; 1637; VI-LABEL: @mul_nsw_3xi15( 1638; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 1639; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 1640; VI-NEXT: [[TMP3:%.*]] = mul nuw <3 x i32> [[TMP1]], [[TMP2]] 1641; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15> 1642; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef 1643; VI-NEXT: ret void 1644; 1645 %r = mul nsw <3 x i15> %a, %b 1646 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1647 ret void 1648} 1649 1650define amdgpu_kernel void @mul_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) { 1651; SI-LABEL: @mul_nuw_3xi15( 1652; SI-NEXT: [[R:%.*]] = mul nuw <3 x i15> [[A:%.*]], [[B:%.*]] 1653; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef 1654; SI-NEXT: ret void 1655; 1656; VI-LABEL: @mul_nuw_3xi15( 1657; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 1658; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 1659; VI-NEXT: [[TMP3:%.*]] = mul nuw nsw <3 x i32> [[TMP1]], [[TMP2]] 1660; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15> 1661; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef 1662; VI-NEXT: ret void 1663; 1664 %r = mul nuw <3 x i15> %a, %b 1665 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1666 ret void 1667} 1668 1669define amdgpu_kernel void @mul_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { 1670; SI-LABEL: @mul_nuw_nsw_3xi15( 1671; SI-NEXT: [[R:%.*]] = mul nuw nsw <3 x i15> [[A:%.*]], [[B:%.*]] 1672; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef 1673; SI-NEXT: ret void 1674; 1675; VI-LABEL: @mul_nuw_nsw_3xi15( 1676; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 1677; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 1678; VI-NEXT: [[TMP3:%.*]] = mul nuw nsw <3 x i32> [[TMP1]], [[TMP2]] 1679; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15> 1680; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef 1681; VI-NEXT: ret void 1682; 1683 %r = mul nuw nsw <3 x i15> %a, %b 1684 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1685 ret void 1686} 1687 1688define amdgpu_kernel void @shl_3xi15(<3 x i15> %a, <3 x i15> %b) { 1689; SI-LABEL: @shl_3xi15( 1690; SI-NEXT: [[R:%.*]] = shl <3 x i15> [[A:%.*]], [[B:%.*]] 1691; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef 1692; SI-NEXT: ret void 1693; 1694; VI-LABEL: @shl_3xi15( 1695; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 1696; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 1697; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]] 1698; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15> 1699; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef 1700; VI-NEXT: ret void 1701; 1702 %r = shl <3 x i15> %a, %b 1703 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1704 ret void 1705} 1706 1707define amdgpu_kernel void @shl_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { 1708; SI-LABEL: @shl_nsw_3xi15( 1709; SI-NEXT: [[R:%.*]] = shl nsw <3 x i15> [[A:%.*]], [[B:%.*]] 1710; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef 1711; SI-NEXT: ret void 1712; 1713; VI-LABEL: @shl_nsw_3xi15( 1714; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 1715; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 1716; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]] 1717; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15> 1718; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef 1719; VI-NEXT: ret void 1720; 1721 %r = shl nsw <3 x i15> %a, %b 1722 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1723 ret void 1724} 1725 1726define amdgpu_kernel void @shl_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) { 1727; SI-LABEL: @shl_nuw_3xi15( 1728; SI-NEXT: [[R:%.*]] = shl nuw <3 x i15> [[A:%.*]], [[B:%.*]] 1729; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef 1730; SI-NEXT: ret void 1731; 1732; VI-LABEL: @shl_nuw_3xi15( 1733; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 1734; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 1735; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]] 1736; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15> 1737; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef 1738; VI-NEXT: ret void 1739; 1740 %r = shl nuw <3 x i15> %a, %b 1741 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1742 ret void 1743} 1744 1745define amdgpu_kernel void @shl_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { 1746; SI-LABEL: @shl_nuw_nsw_3xi15( 1747; SI-NEXT: [[R:%.*]] = shl nuw nsw <3 x i15> [[A:%.*]], [[B:%.*]] 1748; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef 1749; SI-NEXT: ret void 1750; 1751; VI-LABEL: @shl_nuw_nsw_3xi15( 1752; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 1753; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 1754; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]] 1755; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15> 1756; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef 1757; VI-NEXT: ret void 1758; 1759 %r = shl nuw nsw <3 x i15> %a, %b 1760 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1761 ret void 1762} 1763 1764define amdgpu_kernel void @lshr_3xi15(<3 x i15> %a, <3 x i15> %b) { 1765; SI-LABEL: @lshr_3xi15( 1766; SI-NEXT: [[R:%.*]] = lshr <3 x i15> [[A:%.*]], [[B:%.*]] 1767; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef 1768; SI-NEXT: ret void 1769; 1770; VI-LABEL: @lshr_3xi15( 1771; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 1772; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 1773; VI-NEXT: [[TMP3:%.*]] = lshr <3 x i32> [[TMP1]], [[TMP2]] 1774; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15> 1775; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef 1776; VI-NEXT: ret void 1777; 1778 %r = lshr <3 x i15> %a, %b 1779 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1780 ret void 1781} 1782 1783define amdgpu_kernel void @lshr_exact_3xi15(<3 x i15> %a, <3 x i15> %b) { 1784; SI-LABEL: @lshr_exact_3xi15( 1785; SI-NEXT: [[R:%.*]] = lshr exact <3 x i15> [[A:%.*]], [[B:%.*]] 1786; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef 1787; SI-NEXT: ret void 1788; 1789; VI-LABEL: @lshr_exact_3xi15( 1790; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 1791; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 1792; VI-NEXT: [[TMP3:%.*]] = lshr exact <3 x i32> [[TMP1]], [[TMP2]] 1793; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15> 1794; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef 1795; VI-NEXT: ret void 1796; 1797 %r = lshr exact <3 x i15> %a, %b 1798 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1799 ret void 1800} 1801 1802define amdgpu_kernel void @ashr_3xi15(<3 x i15> %a, <3 x i15> %b) { 1803; SI-LABEL: @ashr_3xi15( 1804; SI-NEXT: [[R:%.*]] = ashr <3 x i15> [[A:%.*]], [[B:%.*]] 1805; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef 1806; SI-NEXT: ret void 1807; 1808; VI-LABEL: @ashr_3xi15( 1809; VI-NEXT: [[TMP1:%.*]] = sext <3 x i15> [[A:%.*]] to <3 x i32> 1810; VI-NEXT: [[TMP2:%.*]] = sext <3 x i15> [[B:%.*]] to <3 x i32> 1811; VI-NEXT: [[TMP3:%.*]] = ashr <3 x i32> [[TMP1]], [[TMP2]] 1812; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15> 1813; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef 1814; VI-NEXT: ret void 1815; 1816 %r = ashr <3 x i15> %a, %b 1817 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1818 ret void 1819} 1820 1821define amdgpu_kernel void @ashr_exact_3xi15(<3 x i15> %a, <3 x i15> %b) { 1822; SI-LABEL: @ashr_exact_3xi15( 1823; SI-NEXT: [[R:%.*]] = ashr exact <3 x i15> [[A:%.*]], [[B:%.*]] 1824; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef 1825; SI-NEXT: ret void 1826; 1827; VI-LABEL: @ashr_exact_3xi15( 1828; VI-NEXT: [[TMP1:%.*]] = sext <3 x i15> [[A:%.*]] to <3 x i32> 1829; VI-NEXT: [[TMP2:%.*]] = sext <3 x i15> [[B:%.*]] to <3 x i32> 1830; VI-NEXT: [[TMP3:%.*]] = ashr exact <3 x i32> [[TMP1]], [[TMP2]] 1831; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15> 1832; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef 1833; VI-NEXT: ret void 1834; 1835 %r = ashr exact <3 x i15> %a, %b 1836 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1837 ret void 1838} 1839 1840define amdgpu_kernel void @and_3xi15(<3 x i15> %a, <3 x i15> %b) { 1841; SI-LABEL: @and_3xi15( 1842; SI-NEXT: [[R:%.*]] = and <3 x i15> [[A:%.*]], [[B:%.*]] 1843; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef 1844; SI-NEXT: ret void 1845; 1846; VI-LABEL: @and_3xi15( 1847; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 1848; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 1849; VI-NEXT: [[TMP3:%.*]] = and <3 x i32> [[TMP1]], [[TMP2]] 1850; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15> 1851; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef 1852; VI-NEXT: ret void 1853; 1854 %r = and <3 x i15> %a, %b 1855 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1856 ret void 1857} 1858 1859define amdgpu_kernel void @or_3xi15(<3 x i15> %a, <3 x i15> %b) { 1860; SI-LABEL: @or_3xi15( 1861; SI-NEXT: [[R:%.*]] = or <3 x i15> [[A:%.*]], [[B:%.*]] 1862; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef 1863; SI-NEXT: ret void 1864; 1865; VI-LABEL: @or_3xi15( 1866; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 1867; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 1868; VI-NEXT: [[TMP3:%.*]] = or <3 x i32> [[TMP1]], [[TMP2]] 1869; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15> 1870; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef 1871; VI-NEXT: ret void 1872; 1873 %r = or <3 x i15> %a, %b 1874 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1875 ret void 1876} 1877 1878define amdgpu_kernel void @xor_3xi15(<3 x i15> %a, <3 x i15> %b) { 1879; SI-LABEL: @xor_3xi15( 1880; SI-NEXT: [[R:%.*]] = xor <3 x i15> [[A:%.*]], [[B:%.*]] 1881; SI-NEXT: store volatile <3 x i15> [[R]], <3 x i15> addrspace(1)* undef 1882; SI-NEXT: ret void 1883; 1884; VI-LABEL: @xor_3xi15( 1885; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 1886; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 1887; VI-NEXT: [[TMP3:%.*]] = xor <3 x i32> [[TMP1]], [[TMP2]] 1888; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15> 1889; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef 1890; VI-NEXT: ret void 1891; 1892 %r = xor <3 x i15> %a, %b 1893 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1894 ret void 1895} 1896 1897define amdgpu_kernel void @select_eq_3xi15(<3 x i15> %a, <3 x i15> %b) { 1898; SI-LABEL: @select_eq_3xi15( 1899; SI-NEXT: [[CMP:%.*]] = icmp eq <3 x i15> [[A:%.*]], [[B:%.*]] 1900; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]] 1901; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef 1902; SI-NEXT: ret void 1903; 1904; VI-LABEL: @select_eq_3xi15( 1905; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 1906; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 1907; VI-NEXT: [[TMP3:%.*]] = icmp eq <3 x i32> [[TMP1]], [[TMP2]] 1908; VI-NEXT: [[TMP4:%.*]] = zext <3 x i15> [[A]] to <3 x i32> 1909; VI-NEXT: [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32> 1910; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]] 1911; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15> 1912; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef 1913; VI-NEXT: ret void 1914; 1915 %cmp = icmp eq <3 x i15> %a, %b 1916 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 1917 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef 1918 ret void 1919} 1920 1921define amdgpu_kernel void @select_ne_3xi15(<3 x i15> %a, <3 x i15> %b) { 1922; SI-LABEL: @select_ne_3xi15( 1923; SI-NEXT: [[CMP:%.*]] = icmp ne <3 x i15> [[A:%.*]], [[B:%.*]] 1924; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]] 1925; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef 1926; SI-NEXT: ret void 1927; 1928; VI-LABEL: @select_ne_3xi15( 1929; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 1930; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 1931; VI-NEXT: [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP1]], [[TMP2]] 1932; VI-NEXT: [[TMP4:%.*]] = zext <3 x i15> [[A]] to <3 x i32> 1933; VI-NEXT: [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32> 1934; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]] 1935; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15> 1936; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef 1937; VI-NEXT: ret void 1938; 1939 %cmp = icmp ne <3 x i15> %a, %b 1940 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 1941 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef 1942 ret void 1943} 1944 1945define amdgpu_kernel void @select_ugt_3xi15(<3 x i15> %a, <3 x i15> %b) { 1946; SI-LABEL: @select_ugt_3xi15( 1947; SI-NEXT: [[CMP:%.*]] = icmp ugt <3 x i15> [[A:%.*]], [[B:%.*]] 1948; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]] 1949; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef 1950; SI-NEXT: ret void 1951; 1952; VI-LABEL: @select_ugt_3xi15( 1953; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 1954; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 1955; VI-NEXT: [[TMP3:%.*]] = icmp ugt <3 x i32> [[TMP1]], [[TMP2]] 1956; VI-NEXT: [[TMP4:%.*]] = zext <3 x i15> [[A]] to <3 x i32> 1957; VI-NEXT: [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32> 1958; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]] 1959; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15> 1960; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef 1961; VI-NEXT: ret void 1962; 1963 %cmp = icmp ugt <3 x i15> %a, %b 1964 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 1965 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef 1966 ret void 1967} 1968 1969define amdgpu_kernel void @select_uge_3xi15(<3 x i15> %a, <3 x i15> %b) { 1970; SI-LABEL: @select_uge_3xi15( 1971; SI-NEXT: [[CMP:%.*]] = icmp uge <3 x i15> [[A:%.*]], [[B:%.*]] 1972; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]] 1973; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef 1974; SI-NEXT: ret void 1975; 1976; VI-LABEL: @select_uge_3xi15( 1977; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 1978; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 1979; VI-NEXT: [[TMP3:%.*]] = icmp uge <3 x i32> [[TMP1]], [[TMP2]] 1980; VI-NEXT: [[TMP4:%.*]] = zext <3 x i15> [[A]] to <3 x i32> 1981; VI-NEXT: [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32> 1982; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]] 1983; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15> 1984; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef 1985; VI-NEXT: ret void 1986; 1987 %cmp = icmp uge <3 x i15> %a, %b 1988 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 1989 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef 1990 ret void 1991} 1992 1993define amdgpu_kernel void @select_ult_3xi15(<3 x i15> %a, <3 x i15> %b) { 1994; SI-LABEL: @select_ult_3xi15( 1995; SI-NEXT: [[CMP:%.*]] = icmp ult <3 x i15> [[A:%.*]], [[B:%.*]] 1996; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]] 1997; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef 1998; SI-NEXT: ret void 1999; 2000; VI-LABEL: @select_ult_3xi15( 2001; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 2002; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 2003; VI-NEXT: [[TMP3:%.*]] = icmp ult <3 x i32> [[TMP1]], [[TMP2]] 2004; VI-NEXT: [[TMP4:%.*]] = zext <3 x i15> [[A]] to <3 x i32> 2005; VI-NEXT: [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32> 2006; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]] 2007; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15> 2008; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef 2009; VI-NEXT: ret void 2010; 2011 %cmp = icmp ult <3 x i15> %a, %b 2012 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 2013 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef 2014 ret void 2015} 2016 2017define amdgpu_kernel void @select_ule_3xi15(<3 x i15> %a, <3 x i15> %b) { 2018; SI-LABEL: @select_ule_3xi15( 2019; SI-NEXT: [[CMP:%.*]] = icmp ule <3 x i15> [[A:%.*]], [[B:%.*]] 2020; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]] 2021; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef 2022; SI-NEXT: ret void 2023; 2024; VI-LABEL: @select_ule_3xi15( 2025; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 2026; VI-NEXT: [[TMP2:%.*]] = zext <3 x i15> [[B:%.*]] to <3 x i32> 2027; VI-NEXT: [[TMP3:%.*]] = icmp ule <3 x i32> [[TMP1]], [[TMP2]] 2028; VI-NEXT: [[TMP4:%.*]] = zext <3 x i15> [[A]] to <3 x i32> 2029; VI-NEXT: [[TMP5:%.*]] = zext <3 x i15> [[B]] to <3 x i32> 2030; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]] 2031; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15> 2032; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef 2033; VI-NEXT: ret void 2034; 2035 %cmp = icmp ule <3 x i15> %a, %b 2036 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 2037 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef 2038 ret void 2039} 2040 2041define amdgpu_kernel void @select_sgt_3xi15(<3 x i15> %a, <3 x i15> %b) { 2042; SI-LABEL: @select_sgt_3xi15( 2043; SI-NEXT: [[CMP:%.*]] = icmp sgt <3 x i15> [[A:%.*]], [[B:%.*]] 2044; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]] 2045; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef 2046; SI-NEXT: ret void 2047; 2048; VI-LABEL: @select_sgt_3xi15( 2049; VI-NEXT: [[TMP1:%.*]] = sext <3 x i15> [[A:%.*]] to <3 x i32> 2050; VI-NEXT: [[TMP2:%.*]] = sext <3 x i15> [[B:%.*]] to <3 x i32> 2051; VI-NEXT: [[TMP3:%.*]] = icmp sgt <3 x i32> [[TMP1]], [[TMP2]] 2052; VI-NEXT: [[TMP4:%.*]] = sext <3 x i15> [[A]] to <3 x i32> 2053; VI-NEXT: [[TMP5:%.*]] = sext <3 x i15> [[B]] to <3 x i32> 2054; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]] 2055; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15> 2056; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef 2057; VI-NEXT: ret void 2058; 2059 %cmp = icmp sgt <3 x i15> %a, %b 2060 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 2061 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef 2062 ret void 2063} 2064 2065define amdgpu_kernel void @select_sge_3xi15(<3 x i15> %a, <3 x i15> %b) { 2066; SI-LABEL: @select_sge_3xi15( 2067; SI-NEXT: [[CMP:%.*]] = icmp sge <3 x i15> [[A:%.*]], [[B:%.*]] 2068; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]] 2069; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef 2070; SI-NEXT: ret void 2071; 2072; VI-LABEL: @select_sge_3xi15( 2073; VI-NEXT: [[TMP1:%.*]] = sext <3 x i15> [[A:%.*]] to <3 x i32> 2074; VI-NEXT: [[TMP2:%.*]] = sext <3 x i15> [[B:%.*]] to <3 x i32> 2075; VI-NEXT: [[TMP3:%.*]] = icmp sge <3 x i32> [[TMP1]], [[TMP2]] 2076; VI-NEXT: [[TMP4:%.*]] = sext <3 x i15> [[A]] to <3 x i32> 2077; VI-NEXT: [[TMP5:%.*]] = sext <3 x i15> [[B]] to <3 x i32> 2078; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]] 2079; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15> 2080; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef 2081; VI-NEXT: ret void 2082; 2083 %cmp = icmp sge <3 x i15> %a, %b 2084 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 2085 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef 2086 ret void 2087} 2088 2089define amdgpu_kernel void @select_slt_3xi15(<3 x i15> %a, <3 x i15> %b) { 2090; SI-LABEL: @select_slt_3xi15( 2091; SI-NEXT: [[CMP:%.*]] = icmp slt <3 x i15> [[A:%.*]], [[B:%.*]] 2092; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]] 2093; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef 2094; SI-NEXT: ret void 2095; 2096; VI-LABEL: @select_slt_3xi15( 2097; VI-NEXT: [[TMP1:%.*]] = sext <3 x i15> [[A:%.*]] to <3 x i32> 2098; VI-NEXT: [[TMP2:%.*]] = sext <3 x i15> [[B:%.*]] to <3 x i32> 2099; VI-NEXT: [[TMP3:%.*]] = icmp slt <3 x i32> [[TMP1]], [[TMP2]] 2100; VI-NEXT: [[TMP4:%.*]] = sext <3 x i15> [[A]] to <3 x i32> 2101; VI-NEXT: [[TMP5:%.*]] = sext <3 x i15> [[B]] to <3 x i32> 2102; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]] 2103; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15> 2104; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef 2105; VI-NEXT: ret void 2106; 2107 %cmp = icmp slt <3 x i15> %a, %b 2108 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 2109 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef 2110 ret void 2111} 2112 2113define amdgpu_kernel void @select_sle_3xi15(<3 x i15> %a, <3 x i15> %b) { 2114; SI-LABEL: @select_sle_3xi15( 2115; SI-NEXT: [[CMP:%.*]] = icmp sle <3 x i15> [[A:%.*]], [[B:%.*]] 2116; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i15> [[A]], <3 x i15> [[B]] 2117; SI-NEXT: store volatile <3 x i15> [[SEL]], <3 x i15> addrspace(1)* undef 2118; SI-NEXT: ret void 2119; 2120; VI-LABEL: @select_sle_3xi15( 2121; VI-NEXT: [[TMP1:%.*]] = sext <3 x i15> [[A:%.*]] to <3 x i32> 2122; VI-NEXT: [[TMP2:%.*]] = sext <3 x i15> [[B:%.*]] to <3 x i32> 2123; VI-NEXT: [[TMP3:%.*]] = icmp sle <3 x i32> [[TMP1]], [[TMP2]] 2124; VI-NEXT: [[TMP4:%.*]] = sext <3 x i15> [[A]] to <3 x i32> 2125; VI-NEXT: [[TMP5:%.*]] = sext <3 x i15> [[B]] to <3 x i32> 2126; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]] 2127; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i15> 2128; VI-NEXT: store volatile <3 x i15> [[TMP7]], <3 x i15> addrspace(1)* undef 2129; VI-NEXT: ret void 2130; 2131 %cmp = icmp sle <3 x i15> %a, %b 2132 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 2133 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef 2134 ret void 2135} 2136 2137declare <3 x i15> @llvm.bitreverse.v3i15(<3 x i15>) 2138define amdgpu_kernel void @bitreverse_3xi15(<3 x i15> %a) { 2139; SI-LABEL: @bitreverse_3xi15( 2140; SI-NEXT: [[BREV:%.*]] = call <3 x i15> @llvm.bitreverse.v3i15(<3 x i15> [[A:%.*]]) 2141; SI-NEXT: store volatile <3 x i15> [[BREV]], <3 x i15> addrspace(1)* undef 2142; SI-NEXT: ret void 2143; 2144; VI-LABEL: @bitreverse_3xi15( 2145; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> 2146; VI-NEXT: [[TMP2:%.*]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> [[TMP1]]) 2147; VI-NEXT: [[TMP3:%.*]] = lshr <3 x i32> [[TMP2]], <i32 17, i32 17, i32 17> 2148; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15> 2149; VI-NEXT: store volatile <3 x i15> [[TMP4]], <3 x i15> addrspace(1)* undef 2150; VI-NEXT: ret void 2151; 2152 %brev = call <3 x i15> @llvm.bitreverse.v3i15(<3 x i15> %a) 2153 store volatile <3 x i15> %brev, <3 x i15> addrspace(1)* undef 2154 ret void 2155} 2156 2157define amdgpu_kernel void @add_3xi16(<3 x i16> %a, <3 x i16> %b) { 2158; SI-LABEL: @add_3xi16( 2159; SI-NEXT: [[R:%.*]] = add <3 x i16> [[A:%.*]], [[B:%.*]] 2160; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef 2161; SI-NEXT: ret void 2162; 2163; VI-LABEL: @add_3xi16( 2164; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2165; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2166; VI-NEXT: [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]] 2167; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16> 2168; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef 2169; VI-NEXT: ret void 2170; 2171 %r = add <3 x i16> %a, %b 2172 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 2173 ret void 2174} 2175 2176define amdgpu_kernel void @add_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { 2177; SI-LABEL: @add_nsw_3xi16( 2178; SI-NEXT: [[R:%.*]] = add nsw <3 x i16> [[A:%.*]], [[B:%.*]] 2179; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef 2180; SI-NEXT: ret void 2181; 2182; VI-LABEL: @add_nsw_3xi16( 2183; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2184; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2185; VI-NEXT: [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]] 2186; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16> 2187; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef 2188; VI-NEXT: ret void 2189; 2190 %r = add nsw <3 x i16> %a, %b 2191 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 2192 ret void 2193} 2194 2195define amdgpu_kernel void @add_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) { 2196; SI-LABEL: @add_nuw_3xi16( 2197; SI-NEXT: [[R:%.*]] = add nuw <3 x i16> [[A:%.*]], [[B:%.*]] 2198; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef 2199; SI-NEXT: ret void 2200; 2201; VI-LABEL: @add_nuw_3xi16( 2202; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2203; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2204; VI-NEXT: [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]] 2205; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16> 2206; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef 2207; VI-NEXT: ret void 2208; 2209 %r = add nuw <3 x i16> %a, %b 2210 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 2211 ret void 2212} 2213 2214define amdgpu_kernel void @add_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { 2215; SI-LABEL: @add_nuw_nsw_3xi16( 2216; SI-NEXT: [[R:%.*]] = add nuw nsw <3 x i16> [[A:%.*]], [[B:%.*]] 2217; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef 2218; SI-NEXT: ret void 2219; 2220; VI-LABEL: @add_nuw_nsw_3xi16( 2221; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2222; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2223; VI-NEXT: [[TMP3:%.*]] = add nuw nsw <3 x i32> [[TMP1]], [[TMP2]] 2224; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16> 2225; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef 2226; VI-NEXT: ret void 2227; 2228 %r = add nuw nsw <3 x i16> %a, %b 2229 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 2230 ret void 2231} 2232 2233define amdgpu_kernel void @sub_3xi16(<3 x i16> %a, <3 x i16> %b) { 2234; SI-LABEL: @sub_3xi16( 2235; SI-NEXT: [[R:%.*]] = sub <3 x i16> [[A:%.*]], [[B:%.*]] 2236; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef 2237; SI-NEXT: ret void 2238; 2239; VI-LABEL: @sub_3xi16( 2240; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2241; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2242; VI-NEXT: [[TMP3:%.*]] = sub nsw <3 x i32> [[TMP1]], [[TMP2]] 2243; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16> 2244; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef 2245; VI-NEXT: ret void 2246; 2247 %r = sub <3 x i16> %a, %b 2248 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 2249 ret void 2250} 2251 2252define amdgpu_kernel void @sub_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { 2253; SI-LABEL: @sub_nsw_3xi16( 2254; SI-NEXT: [[R:%.*]] = sub nsw <3 x i16> [[A:%.*]], [[B:%.*]] 2255; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef 2256; SI-NEXT: ret void 2257; 2258; VI-LABEL: @sub_nsw_3xi16( 2259; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2260; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2261; VI-NEXT: [[TMP3:%.*]] = sub nsw <3 x i32> [[TMP1]], [[TMP2]] 2262; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16> 2263; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef 2264; VI-NEXT: ret void 2265; 2266 %r = sub nsw <3 x i16> %a, %b 2267 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 2268 ret void 2269} 2270 2271define amdgpu_kernel void @sub_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) { 2272; SI-LABEL: @sub_nuw_3xi16( 2273; SI-NEXT: [[R:%.*]] = sub nuw <3 x i16> [[A:%.*]], [[B:%.*]] 2274; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef 2275; SI-NEXT: ret void 2276; 2277; VI-LABEL: @sub_nuw_3xi16( 2278; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2279; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2280; VI-NEXT: [[TMP3:%.*]] = sub nuw nsw <3 x i32> [[TMP1]], [[TMP2]] 2281; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16> 2282; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef 2283; VI-NEXT: ret void 2284; 2285 %r = sub nuw <3 x i16> %a, %b 2286 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 2287 ret void 2288} 2289 2290define amdgpu_kernel void @sub_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { 2291; SI-LABEL: @sub_nuw_nsw_3xi16( 2292; SI-NEXT: [[R:%.*]] = sub nuw nsw <3 x i16> [[A:%.*]], [[B:%.*]] 2293; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef 2294; SI-NEXT: ret void 2295; 2296; VI-LABEL: @sub_nuw_nsw_3xi16( 2297; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2298; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2299; VI-NEXT: [[TMP3:%.*]] = sub nuw nsw <3 x i32> [[TMP1]], [[TMP2]] 2300; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16> 2301; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef 2302; VI-NEXT: ret void 2303; 2304 %r = sub nuw nsw <3 x i16> %a, %b 2305 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 2306 ret void 2307} 2308 2309define amdgpu_kernel void @mul_3xi16(<3 x i16> %a, <3 x i16> %b) { 2310; SI-LABEL: @mul_3xi16( 2311; SI-NEXT: [[R:%.*]] = mul <3 x i16> [[A:%.*]], [[B:%.*]] 2312; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef 2313; SI-NEXT: ret void 2314; 2315; VI-LABEL: @mul_3xi16( 2316; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2317; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2318; VI-NEXT: [[TMP3:%.*]] = mul nuw <3 x i32> [[TMP1]], [[TMP2]] 2319; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16> 2320; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef 2321; VI-NEXT: ret void 2322; 2323 %r = mul <3 x i16> %a, %b 2324 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 2325 ret void 2326} 2327 2328define amdgpu_kernel void @mul_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { 2329; SI-LABEL: @mul_nsw_3xi16( 2330; SI-NEXT: [[R:%.*]] = mul nsw <3 x i16> [[A:%.*]], [[B:%.*]] 2331; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef 2332; SI-NEXT: ret void 2333; 2334; VI-LABEL: @mul_nsw_3xi16( 2335; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2336; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2337; VI-NEXT: [[TMP3:%.*]] = mul nuw <3 x i32> [[TMP1]], [[TMP2]] 2338; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16> 2339; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef 2340; VI-NEXT: ret void 2341; 2342 %r = mul nsw <3 x i16> %a, %b 2343 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 2344 ret void 2345} 2346 2347define amdgpu_kernel void @mul_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) { 2348; SI-LABEL: @mul_nuw_3xi16( 2349; SI-NEXT: [[R:%.*]] = mul nuw <3 x i16> [[A:%.*]], [[B:%.*]] 2350; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef 2351; SI-NEXT: ret void 2352; 2353; VI-LABEL: @mul_nuw_3xi16( 2354; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2355; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2356; VI-NEXT: [[TMP3:%.*]] = mul nuw nsw <3 x i32> [[TMP1]], [[TMP2]] 2357; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16> 2358; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef 2359; VI-NEXT: ret void 2360; 2361 %r = mul nuw <3 x i16> %a, %b 2362 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 2363 ret void 2364} 2365 2366define amdgpu_kernel void @mul_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { 2367; SI-LABEL: @mul_nuw_nsw_3xi16( 2368; SI-NEXT: [[R:%.*]] = mul nuw nsw <3 x i16> [[A:%.*]], [[B:%.*]] 2369; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef 2370; SI-NEXT: ret void 2371; 2372; VI-LABEL: @mul_nuw_nsw_3xi16( 2373; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2374; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2375; VI-NEXT: [[TMP3:%.*]] = mul nuw nsw <3 x i32> [[TMP1]], [[TMP2]] 2376; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16> 2377; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef 2378; VI-NEXT: ret void 2379; 2380 %r = mul nuw nsw <3 x i16> %a, %b 2381 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 2382 ret void 2383} 2384 2385define amdgpu_kernel void @shl_3xi16(<3 x i16> %a, <3 x i16> %b) { 2386; SI-LABEL: @shl_3xi16( 2387; SI-NEXT: [[R:%.*]] = shl <3 x i16> [[A:%.*]], [[B:%.*]] 2388; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef 2389; SI-NEXT: ret void 2390; 2391; VI-LABEL: @shl_3xi16( 2392; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2393; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2394; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]] 2395; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16> 2396; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef 2397; VI-NEXT: ret void 2398; 2399 %r = shl <3 x i16> %a, %b 2400 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 2401 ret void 2402} 2403 2404define amdgpu_kernel void @shl_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { 2405; SI-LABEL: @shl_nsw_3xi16( 2406; SI-NEXT: [[R:%.*]] = shl nsw <3 x i16> [[A:%.*]], [[B:%.*]] 2407; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef 2408; SI-NEXT: ret void 2409; 2410; VI-LABEL: @shl_nsw_3xi16( 2411; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2412; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2413; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]] 2414; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16> 2415; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef 2416; VI-NEXT: ret void 2417; 2418 %r = shl nsw <3 x i16> %a, %b 2419 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 2420 ret void 2421} 2422 2423define amdgpu_kernel void @shl_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) { 2424; SI-LABEL: @shl_nuw_3xi16( 2425; SI-NEXT: [[R:%.*]] = shl nuw <3 x i16> [[A:%.*]], [[B:%.*]] 2426; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef 2427; SI-NEXT: ret void 2428; 2429; VI-LABEL: @shl_nuw_3xi16( 2430; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2431; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2432; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]] 2433; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16> 2434; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef 2435; VI-NEXT: ret void 2436; 2437 %r = shl nuw <3 x i16> %a, %b 2438 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 2439 ret void 2440} 2441 2442define amdgpu_kernel void @shl_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { 2443; SI-LABEL: @shl_nuw_nsw_3xi16( 2444; SI-NEXT: [[R:%.*]] = shl nuw nsw <3 x i16> [[A:%.*]], [[B:%.*]] 2445; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef 2446; SI-NEXT: ret void 2447; 2448; VI-LABEL: @shl_nuw_nsw_3xi16( 2449; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2450; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2451; VI-NEXT: [[TMP3:%.*]] = shl nuw nsw <3 x i32> [[TMP1]], [[TMP2]] 2452; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16> 2453; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef 2454; VI-NEXT: ret void 2455; 2456 %r = shl nuw nsw <3 x i16> %a, %b 2457 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 2458 ret void 2459} 2460 2461define amdgpu_kernel void @lshr_3xi16(<3 x i16> %a, <3 x i16> %b) { 2462; SI-LABEL: @lshr_3xi16( 2463; SI-NEXT: [[R:%.*]] = lshr <3 x i16> [[A:%.*]], [[B:%.*]] 2464; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef 2465; SI-NEXT: ret void 2466; 2467; VI-LABEL: @lshr_3xi16( 2468; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2469; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2470; VI-NEXT: [[TMP3:%.*]] = lshr <3 x i32> [[TMP1]], [[TMP2]] 2471; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16> 2472; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef 2473; VI-NEXT: ret void 2474; 2475 %r = lshr <3 x i16> %a, %b 2476 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 2477 ret void 2478} 2479 2480define amdgpu_kernel void @lshr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) { 2481; SI-LABEL: @lshr_exact_3xi16( 2482; SI-NEXT: [[R:%.*]] = lshr exact <3 x i16> [[A:%.*]], [[B:%.*]] 2483; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef 2484; SI-NEXT: ret void 2485; 2486; VI-LABEL: @lshr_exact_3xi16( 2487; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2488; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2489; VI-NEXT: [[TMP3:%.*]] = lshr exact <3 x i32> [[TMP1]], [[TMP2]] 2490; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16> 2491; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef 2492; VI-NEXT: ret void 2493; 2494 %r = lshr exact <3 x i16> %a, %b 2495 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 2496 ret void 2497} 2498 2499define amdgpu_kernel void @ashr_3xi16(<3 x i16> %a, <3 x i16> %b) { 2500; SI-LABEL: @ashr_3xi16( 2501; SI-NEXT: [[R:%.*]] = ashr <3 x i16> [[A:%.*]], [[B:%.*]] 2502; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef 2503; SI-NEXT: ret void 2504; 2505; VI-LABEL: @ashr_3xi16( 2506; VI-NEXT: [[TMP1:%.*]] = sext <3 x i16> [[A:%.*]] to <3 x i32> 2507; VI-NEXT: [[TMP2:%.*]] = sext <3 x i16> [[B:%.*]] to <3 x i32> 2508; VI-NEXT: [[TMP3:%.*]] = ashr <3 x i32> [[TMP1]], [[TMP2]] 2509; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16> 2510; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef 2511; VI-NEXT: ret void 2512; 2513 %r = ashr <3 x i16> %a, %b 2514 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 2515 ret void 2516} 2517 2518define amdgpu_kernel void @ashr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) { 2519; SI-LABEL: @ashr_exact_3xi16( 2520; SI-NEXT: [[R:%.*]] = ashr exact <3 x i16> [[A:%.*]], [[B:%.*]] 2521; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef 2522; SI-NEXT: ret void 2523; 2524; VI-LABEL: @ashr_exact_3xi16( 2525; VI-NEXT: [[TMP1:%.*]] = sext <3 x i16> [[A:%.*]] to <3 x i32> 2526; VI-NEXT: [[TMP2:%.*]] = sext <3 x i16> [[B:%.*]] to <3 x i32> 2527; VI-NEXT: [[TMP3:%.*]] = ashr exact <3 x i32> [[TMP1]], [[TMP2]] 2528; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16> 2529; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef 2530; VI-NEXT: ret void 2531; 2532 %r = ashr exact <3 x i16> %a, %b 2533 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 2534 ret void 2535} 2536 2537define amdgpu_kernel void @and_3xi16(<3 x i16> %a, <3 x i16> %b) { 2538; SI-LABEL: @and_3xi16( 2539; SI-NEXT: [[R:%.*]] = and <3 x i16> [[A:%.*]], [[B:%.*]] 2540; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef 2541; SI-NEXT: ret void 2542; 2543; VI-LABEL: @and_3xi16( 2544; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2545; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2546; VI-NEXT: [[TMP3:%.*]] = and <3 x i32> [[TMP1]], [[TMP2]] 2547; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16> 2548; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef 2549; VI-NEXT: ret void 2550; 2551 %r = and <3 x i16> %a, %b 2552 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 2553 ret void 2554} 2555 2556define amdgpu_kernel void @or_3xi16(<3 x i16> %a, <3 x i16> %b) { 2557; SI-LABEL: @or_3xi16( 2558; SI-NEXT: [[R:%.*]] = or <3 x i16> [[A:%.*]], [[B:%.*]] 2559; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef 2560; SI-NEXT: ret void 2561; 2562; VI-LABEL: @or_3xi16( 2563; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2564; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2565; VI-NEXT: [[TMP3:%.*]] = or <3 x i32> [[TMP1]], [[TMP2]] 2566; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16> 2567; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef 2568; VI-NEXT: ret void 2569; 2570 %r = or <3 x i16> %a, %b 2571 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 2572 ret void 2573} 2574 2575define amdgpu_kernel void @xor_3xi16(<3 x i16> %a, <3 x i16> %b) { 2576; SI-LABEL: @xor_3xi16( 2577; SI-NEXT: [[R:%.*]] = xor <3 x i16> [[A:%.*]], [[B:%.*]] 2578; SI-NEXT: store volatile <3 x i16> [[R]], <3 x i16> addrspace(1)* undef 2579; SI-NEXT: ret void 2580; 2581; VI-LABEL: @xor_3xi16( 2582; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2583; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2584; VI-NEXT: [[TMP3:%.*]] = xor <3 x i32> [[TMP1]], [[TMP2]] 2585; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16> 2586; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef 2587; VI-NEXT: ret void 2588; 2589 %r = xor <3 x i16> %a, %b 2590 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 2591 ret void 2592} 2593 2594define amdgpu_kernel void @select_eq_3xi16(<3 x i16> %a, <3 x i16> %b) { 2595; SI-LABEL: @select_eq_3xi16( 2596; SI-NEXT: [[CMP:%.*]] = icmp eq <3 x i16> [[A:%.*]], [[B:%.*]] 2597; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]] 2598; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef 2599; SI-NEXT: ret void 2600; 2601; VI-LABEL: @select_eq_3xi16( 2602; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2603; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2604; VI-NEXT: [[TMP3:%.*]] = icmp eq <3 x i32> [[TMP1]], [[TMP2]] 2605; VI-NEXT: [[TMP4:%.*]] = zext <3 x i16> [[A]] to <3 x i32> 2606; VI-NEXT: [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32> 2607; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]] 2608; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16> 2609; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef 2610; VI-NEXT: ret void 2611; 2612 %cmp = icmp eq <3 x i16> %a, %b 2613 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 2614 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef 2615 ret void 2616} 2617 2618define amdgpu_kernel void @select_ne_3xi16(<3 x i16> %a, <3 x i16> %b) { 2619; SI-LABEL: @select_ne_3xi16( 2620; SI-NEXT: [[CMP:%.*]] = icmp ne <3 x i16> [[A:%.*]], [[B:%.*]] 2621; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]] 2622; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef 2623; SI-NEXT: ret void 2624; 2625; VI-LABEL: @select_ne_3xi16( 2626; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2627; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2628; VI-NEXT: [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP1]], [[TMP2]] 2629; VI-NEXT: [[TMP4:%.*]] = zext <3 x i16> [[A]] to <3 x i32> 2630; VI-NEXT: [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32> 2631; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]] 2632; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16> 2633; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef 2634; VI-NEXT: ret void 2635; 2636 %cmp = icmp ne <3 x i16> %a, %b 2637 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 2638 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef 2639 ret void 2640} 2641 2642define amdgpu_kernel void @select_ugt_3xi16(<3 x i16> %a, <3 x i16> %b) { 2643; SI-LABEL: @select_ugt_3xi16( 2644; SI-NEXT: [[CMP:%.*]] = icmp ugt <3 x i16> [[A:%.*]], [[B:%.*]] 2645; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]] 2646; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef 2647; SI-NEXT: ret void 2648; 2649; VI-LABEL: @select_ugt_3xi16( 2650; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2651; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2652; VI-NEXT: [[TMP3:%.*]] = icmp ugt <3 x i32> [[TMP1]], [[TMP2]] 2653; VI-NEXT: [[TMP4:%.*]] = zext <3 x i16> [[A]] to <3 x i32> 2654; VI-NEXT: [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32> 2655; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]] 2656; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16> 2657; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef 2658; VI-NEXT: ret void 2659; 2660 %cmp = icmp ugt <3 x i16> %a, %b 2661 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 2662 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef 2663 ret void 2664} 2665 2666define amdgpu_kernel void @select_uge_3xi16(<3 x i16> %a, <3 x i16> %b) { 2667; SI-LABEL: @select_uge_3xi16( 2668; SI-NEXT: [[CMP:%.*]] = icmp uge <3 x i16> [[A:%.*]], [[B:%.*]] 2669; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]] 2670; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef 2671; SI-NEXT: ret void 2672; 2673; VI-LABEL: @select_uge_3xi16( 2674; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2675; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2676; VI-NEXT: [[TMP3:%.*]] = icmp uge <3 x i32> [[TMP1]], [[TMP2]] 2677; VI-NEXT: [[TMP4:%.*]] = zext <3 x i16> [[A]] to <3 x i32> 2678; VI-NEXT: [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32> 2679; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]] 2680; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16> 2681; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef 2682; VI-NEXT: ret void 2683; 2684 %cmp = icmp uge <3 x i16> %a, %b 2685 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 2686 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef 2687 ret void 2688} 2689 2690define amdgpu_kernel void @select_ult_3xi16(<3 x i16> %a, <3 x i16> %b) { 2691; SI-LABEL: @select_ult_3xi16( 2692; SI-NEXT: [[CMP:%.*]] = icmp ult <3 x i16> [[A:%.*]], [[B:%.*]] 2693; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]] 2694; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef 2695; SI-NEXT: ret void 2696; 2697; VI-LABEL: @select_ult_3xi16( 2698; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2699; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2700; VI-NEXT: [[TMP3:%.*]] = icmp ult <3 x i32> [[TMP1]], [[TMP2]] 2701; VI-NEXT: [[TMP4:%.*]] = zext <3 x i16> [[A]] to <3 x i32> 2702; VI-NEXT: [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32> 2703; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]] 2704; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16> 2705; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef 2706; VI-NEXT: ret void 2707; 2708 %cmp = icmp ult <3 x i16> %a, %b 2709 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 2710 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef 2711 ret void 2712} 2713 2714define amdgpu_kernel void @select_ule_3xi16(<3 x i16> %a, <3 x i16> %b) { 2715; SI-LABEL: @select_ule_3xi16( 2716; SI-NEXT: [[CMP:%.*]] = icmp ule <3 x i16> [[A:%.*]], [[B:%.*]] 2717; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]] 2718; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef 2719; SI-NEXT: ret void 2720; 2721; VI-LABEL: @select_ule_3xi16( 2722; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2723; VI-NEXT: [[TMP2:%.*]] = zext <3 x i16> [[B:%.*]] to <3 x i32> 2724; VI-NEXT: [[TMP3:%.*]] = icmp ule <3 x i32> [[TMP1]], [[TMP2]] 2725; VI-NEXT: [[TMP4:%.*]] = zext <3 x i16> [[A]] to <3 x i32> 2726; VI-NEXT: [[TMP5:%.*]] = zext <3 x i16> [[B]] to <3 x i32> 2727; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]] 2728; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16> 2729; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef 2730; VI-NEXT: ret void 2731; 2732 %cmp = icmp ule <3 x i16> %a, %b 2733 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 2734 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef 2735 ret void 2736} 2737 2738define amdgpu_kernel void @select_sgt_3xi16(<3 x i16> %a, <3 x i16> %b) { 2739; SI-LABEL: @select_sgt_3xi16( 2740; SI-NEXT: [[CMP:%.*]] = icmp sgt <3 x i16> [[A:%.*]], [[B:%.*]] 2741; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]] 2742; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef 2743; SI-NEXT: ret void 2744; 2745; VI-LABEL: @select_sgt_3xi16( 2746; VI-NEXT: [[TMP1:%.*]] = sext <3 x i16> [[A:%.*]] to <3 x i32> 2747; VI-NEXT: [[TMP2:%.*]] = sext <3 x i16> [[B:%.*]] to <3 x i32> 2748; VI-NEXT: [[TMP3:%.*]] = icmp sgt <3 x i32> [[TMP1]], [[TMP2]] 2749; VI-NEXT: [[TMP4:%.*]] = sext <3 x i16> [[A]] to <3 x i32> 2750; VI-NEXT: [[TMP5:%.*]] = sext <3 x i16> [[B]] to <3 x i32> 2751; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]] 2752; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16> 2753; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef 2754; VI-NEXT: ret void 2755; 2756 %cmp = icmp sgt <3 x i16> %a, %b 2757 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 2758 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef 2759 ret void 2760} 2761 2762define amdgpu_kernel void @select_sge_3xi16(<3 x i16> %a, <3 x i16> %b) { 2763; SI-LABEL: @select_sge_3xi16( 2764; SI-NEXT: [[CMP:%.*]] = icmp sge <3 x i16> [[A:%.*]], [[B:%.*]] 2765; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]] 2766; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef 2767; SI-NEXT: ret void 2768; 2769; VI-LABEL: @select_sge_3xi16( 2770; VI-NEXT: [[TMP1:%.*]] = sext <3 x i16> [[A:%.*]] to <3 x i32> 2771; VI-NEXT: [[TMP2:%.*]] = sext <3 x i16> [[B:%.*]] to <3 x i32> 2772; VI-NEXT: [[TMP3:%.*]] = icmp sge <3 x i32> [[TMP1]], [[TMP2]] 2773; VI-NEXT: [[TMP4:%.*]] = sext <3 x i16> [[A]] to <3 x i32> 2774; VI-NEXT: [[TMP5:%.*]] = sext <3 x i16> [[B]] to <3 x i32> 2775; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]] 2776; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16> 2777; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef 2778; VI-NEXT: ret void 2779; 2780 %cmp = icmp sge <3 x i16> %a, %b 2781 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 2782 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef 2783 ret void 2784} 2785 2786define amdgpu_kernel void @select_slt_3xi16(<3 x i16> %a, <3 x i16> %b) { 2787; SI-LABEL: @select_slt_3xi16( 2788; SI-NEXT: [[CMP:%.*]] = icmp slt <3 x i16> [[A:%.*]], [[B:%.*]] 2789; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]] 2790; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef 2791; SI-NEXT: ret void 2792; 2793; VI-LABEL: @select_slt_3xi16( 2794; VI-NEXT: [[TMP1:%.*]] = sext <3 x i16> [[A:%.*]] to <3 x i32> 2795; VI-NEXT: [[TMP2:%.*]] = sext <3 x i16> [[B:%.*]] to <3 x i32> 2796; VI-NEXT: [[TMP3:%.*]] = icmp slt <3 x i32> [[TMP1]], [[TMP2]] 2797; VI-NEXT: [[TMP4:%.*]] = sext <3 x i16> [[A]] to <3 x i32> 2798; VI-NEXT: [[TMP5:%.*]] = sext <3 x i16> [[B]] to <3 x i32> 2799; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]] 2800; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16> 2801; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef 2802; VI-NEXT: ret void 2803; 2804 %cmp = icmp slt <3 x i16> %a, %b 2805 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 2806 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef 2807 ret void 2808} 2809 2810define amdgpu_kernel void @select_sle_3xi16(<3 x i16> %a, <3 x i16> %b) { 2811; SI-LABEL: @select_sle_3xi16( 2812; SI-NEXT: [[CMP:%.*]] = icmp sle <3 x i16> [[A:%.*]], [[B:%.*]] 2813; SI-NEXT: [[SEL:%.*]] = select <3 x i1> [[CMP]], <3 x i16> [[A]], <3 x i16> [[B]] 2814; SI-NEXT: store volatile <3 x i16> [[SEL]], <3 x i16> addrspace(1)* undef 2815; SI-NEXT: ret void 2816; 2817; VI-LABEL: @select_sle_3xi16( 2818; VI-NEXT: [[TMP1:%.*]] = sext <3 x i16> [[A:%.*]] to <3 x i32> 2819; VI-NEXT: [[TMP2:%.*]] = sext <3 x i16> [[B:%.*]] to <3 x i32> 2820; VI-NEXT: [[TMP3:%.*]] = icmp sle <3 x i32> [[TMP1]], [[TMP2]] 2821; VI-NEXT: [[TMP4:%.*]] = sext <3 x i16> [[A]] to <3 x i32> 2822; VI-NEXT: [[TMP5:%.*]] = sext <3 x i16> [[B]] to <3 x i32> 2823; VI-NEXT: [[TMP6:%.*]] = select <3 x i1> [[TMP3]], <3 x i32> [[TMP4]], <3 x i32> [[TMP5]] 2824; VI-NEXT: [[TMP7:%.*]] = trunc <3 x i32> [[TMP6]] to <3 x i16> 2825; VI-NEXT: store volatile <3 x i16> [[TMP7]], <3 x i16> addrspace(1)* undef 2826; VI-NEXT: ret void 2827; 2828 %cmp = icmp sle <3 x i16> %a, %b 2829 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 2830 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef 2831 ret void 2832} 2833 2834declare <3 x i16> @llvm.bitreverse.v3i16(<3 x i16>) 2835 2836define amdgpu_kernel void @bitreverse_3xi16(<3 x i16> %a) { 2837; SI-LABEL: @bitreverse_3xi16( 2838; SI-NEXT: [[BREV:%.*]] = call <3 x i16> @llvm.bitreverse.v3i16(<3 x i16> [[A:%.*]]) 2839; SI-NEXT: store volatile <3 x i16> [[BREV]], <3 x i16> addrspace(1)* undef 2840; SI-NEXT: ret void 2841; 2842; VI-LABEL: @bitreverse_3xi16( 2843; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> 2844; VI-NEXT: [[TMP2:%.*]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> [[TMP1]]) 2845; VI-NEXT: [[TMP3:%.*]] = lshr <3 x i32> [[TMP2]], <i32 16, i32 16, i32 16> 2846; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16> 2847; VI-NEXT: store volatile <3 x i16> [[TMP4]], <3 x i16> addrspace(1)* undef 2848; VI-NEXT: ret void 2849; 2850 %brev = call <3 x i16> @llvm.bitreverse.v3i16(<3 x i16> %a) 2851 store volatile <3 x i16> %brev, <3 x i16> addrspace(1)* undef 2852 ret void 2853} 2854