1; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 2; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -amdgpu-codegenprepare %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 3 4; GCN-LABEL: @add_i3( 5; SI: %r = add i3 %a, %b 6; SI-NEXT: store volatile i3 %r 7; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 8; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 9; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]] 10; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 11; VI-NEXT: store volatile i3 %[[R_3]] 12define amdgpu_kernel void @add_i3(i3 %a, i3 %b) { 13 %r = add i3 %a, %b 14 store volatile i3 %r, i3 addrspace(1)* undef 15 ret void 16} 17 18; GCN-LABEL: @add_nsw_i3( 19; SI: %r = add nsw i3 %a, %b 20; SI-NEXT: store volatile i3 %r 21; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 22; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 23; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]] 24; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 25; VI-NEXT: store volatile i3 %[[R_3]] 26define amdgpu_kernel void @add_nsw_i3(i3 %a, i3 %b) { 27 %r = add nsw i3 %a, %b 28 store volatile i3 %r, i3 addrspace(1)* undef 29 ret void 30} 31 32; GCN-LABEL: @add_nuw_i3( 33; SI: %r = add nuw i3 %a, %b 34; SI-NEXT: store volatile i3 %r 35; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 36; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 37; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]] 38; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 39; VI-NEXT: store volatile i3 %[[R_3]] 40define amdgpu_kernel void @add_nuw_i3(i3 %a, i3 %b) { 41 %r = add nuw i3 %a, %b 42 store volatile i3 %r, i3 addrspace(1)* undef 43 ret void 44} 45 46; GCN-LABEL: @add_nuw_nsw_i3( 47; SI: %r = add nuw nsw i3 %a, %b 48; SI-NEXT: store volatile i3 %r 49; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 50; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 51; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]] 52; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 53; VI-NEXT: store volatile i3 %[[R_3]] 54define amdgpu_kernel void @add_nuw_nsw_i3(i3 %a, i3 %b) { 55 %r = add nuw nsw i3 %a, %b 56 store volatile i3 %r, i3 addrspace(1)* undef 57 ret void 58} 59 60; GCN-LABEL: @sub_i3( 61; SI: %r = sub i3 %a, %b 62; SI-NEXT: store volatile i3 %r 63; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 64; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 65; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]] 66; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 67; VI-NEXT: store volatile i3 %[[R_3]] 68define amdgpu_kernel void @sub_i3(i3 %a, i3 %b) { 69 %r = sub i3 %a, %b 70 store volatile i3 %r, i3 addrspace(1)* undef 71 ret void 72} 73 74; GCN-LABEL: @sub_nsw_i3( 75; SI: %r = sub nsw i3 %a, %b 76; SI-NEXT: store volatile i3 %r 77; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 78; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 79; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]] 80; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 81; VI-NEXT: store volatile i3 %[[R_3]] 82define amdgpu_kernel void @sub_nsw_i3(i3 %a, i3 %b) { 83 %r = sub nsw i3 %a, %b 84 store volatile i3 %r, i3 addrspace(1)* undef 85 ret void 86} 87 88; GCN-LABEL: @sub_nuw_i3( 89; SI: %r = sub nuw i3 %a, %b 90; SI-NEXT: store volatile i3 %r 91; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 92; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 93; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]] 94; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 95; VI-NEXT: store volatile i3 %[[R_3]] 96define amdgpu_kernel void @sub_nuw_i3(i3 %a, i3 %b) { 97 %r = sub nuw i3 %a, %b 98 store volatile i3 %r, i3 addrspace(1)* undef 99 ret void 100} 101 102; GCN-LABEL: @sub_nuw_nsw_i3( 103; SI: %r = sub nuw nsw i3 %a, %b 104; SI-NEXT: store volatile i3 %r 105; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 106; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 107; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]] 108; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 109; VI-NEXT: store volatile i3 %[[R_3]] 110define amdgpu_kernel void @sub_nuw_nsw_i3(i3 %a, i3 %b) { 111 %r = sub nuw nsw i3 %a, %b 112 store volatile i3 %r, i3 addrspace(1)* undef 113 ret void 114} 115 116; GCN-LABEL: @mul_i3( 117; SI: %r = mul i3 %a, %b 118; SI-NEXT: store volatile i3 %r 119; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 120; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 121; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]] 122; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 123; VI-NEXT: store volatile i3 %[[R_3]] 124define amdgpu_kernel void @mul_i3(i3 %a, i3 %b) { 125 %r = mul i3 %a, %b 126 store volatile i3 %r, i3 addrspace(1)* undef 127 ret void 128} 129 130; GCN-LABEL: @mul_nsw_i3( 131; SI: %r = mul nsw i3 %a, %b 132; SI-NEXT: store volatile i3 %r 133; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 134; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 135; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]] 136; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 137; VI-NEXT: store volatile i3 %[[R_3]] 138define amdgpu_kernel void @mul_nsw_i3(i3 %a, i3 %b) { 139 %r = mul nsw i3 %a, %b 140 store volatile i3 %r, i3 addrspace(1)* undef 141 ret void 142} 143 144; GCN-LABEL: @mul_nuw_i3( 145; SI: %r = mul nuw i3 %a, %b 146; SI-NEXT: store volatile i3 %r 147; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 148; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 149; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]] 150; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 151; VI-NEXT: store volatile i3 %[[R_3]] 152define amdgpu_kernel void @mul_nuw_i3(i3 %a, i3 %b) { 153 %r = mul nuw i3 %a, %b 154 store volatile i3 %r, i3 addrspace(1)* undef 155 ret void 156} 157 158; GCN-LABEL: @mul_nuw_nsw_i3( 159; SI: %r = mul nuw nsw i3 %a, %b 160; SI-NEXT: store volatile i3 %r 161; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 162; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 163; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]] 164; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 165; VI-NEXT: store volatile i3 %[[R_3]] 166define amdgpu_kernel void @mul_nuw_nsw_i3(i3 %a, i3 %b) { 167 %r = mul nuw nsw i3 %a, %b 168 store volatile i3 %r, i3 addrspace(1)* undef 169 ret void 170} 171 172; GCN-LABEL: @shl_i3( 173; SI: %r = shl i3 %a, %b 174; SI-NEXT: store volatile i3 %r 175; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 176; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 177; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]] 178; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 179; VI-NEXT: store volatile i3 %[[R_3]] 180define amdgpu_kernel void @shl_i3(i3 %a, i3 %b) { 181 %r = shl i3 %a, %b 182 store volatile i3 %r, i3 addrspace(1)* undef 183 ret void 184} 185 186; GCN-LABEL: @shl_nsw_i3( 187; SI: %r = shl nsw i3 %a, %b 188; SI-NEXT: store volatile i3 %r 189; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 190; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 191; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]] 192; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 193; VI-NEXT: store volatile i3 %[[R_3]] 194define amdgpu_kernel void @shl_nsw_i3(i3 %a, i3 %b) { 195 %r = shl nsw i3 %a, %b 196 store volatile i3 %r, i3 addrspace(1)* undef 197 ret void 198} 199 200; GCN-LABEL: @shl_nuw_i3( 201; SI: %r = shl nuw i3 %a, %b 202; SI-NEXT: store volatile i3 %r 203; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 204; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 205; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]] 206; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 207; VI-NEXT: store volatile i3 %[[R_3]] 208define amdgpu_kernel void @shl_nuw_i3(i3 %a, i3 %b) { 209 %r = shl nuw i3 %a, %b 210 store volatile i3 %r, i3 addrspace(1)* undef 211 ret void 212} 213 214; GCN-LABEL: @shl_nuw_nsw_i3( 215; SI: %r = shl nuw nsw i3 %a, %b 216; SI-NEXT: store volatile i3 %r 217; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 218; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 219; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]] 220; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 221; VI-NEXT: store volatile i3 %[[R_3]] 222define amdgpu_kernel void @shl_nuw_nsw_i3(i3 %a, i3 %b) { 223 %r = shl nuw nsw i3 %a, %b 224 store volatile i3 %r, i3 addrspace(1)* undef 225 ret void 226} 227 228; GCN-LABEL: @lshr_i3( 229; SI: %r = lshr i3 %a, %b 230; SI-NEXT: store volatile i3 %r 231; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 232; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 233; VI-NEXT: %[[R_32:[0-9]+]] = lshr i32 %[[A_32]], %[[B_32]] 234; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 235; VI-NEXT: store volatile i3 %[[R_3]] 236define amdgpu_kernel void @lshr_i3(i3 %a, i3 %b) { 237 %r = lshr i3 %a, %b 238 store volatile i3 %r, i3 addrspace(1)* undef 239 ret void 240} 241 242; GCN-LABEL: @lshr_exact_i3( 243; SI: %r = lshr exact i3 %a, %b 244; SI-NEXT: store volatile i3 %r 245; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 246; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 247; VI-NEXT: %[[R_32:[0-9]+]] = lshr exact i32 %[[A_32]], %[[B_32]] 248; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 249; VI-NEXT: store volatile i3 %[[R_3]] 250define amdgpu_kernel void @lshr_exact_i3(i3 %a, i3 %b) { 251 %r = lshr exact i3 %a, %b 252 store volatile i3 %r, i3 addrspace(1)* undef 253 ret void 254} 255 256; GCN-LABEL: @ashr_i3( 257; SI: %r = ashr i3 %a, %b 258; SI-NEXT: store volatile i3 %r 259; VI: %[[A_32:[0-9]+]] = sext i3 %a to i32 260; VI-NEXT: %[[B_32:[0-9]+]] = sext i3 %b to i32 261; VI-NEXT: %[[R_32:[0-9]+]] = ashr i32 %[[A_32]], %[[B_32]] 262; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 263; VI-NEXT: store volatile i3 %[[R_3]] 264define amdgpu_kernel void @ashr_i3(i3 %a, i3 %b) { 265 %r = ashr i3 %a, %b 266 store volatile i3 %r, i3 addrspace(1)* undef 267 ret void 268} 269 270; GCN-LABEL: @ashr_exact_i3( 271; SI: %r = ashr exact i3 %a, %b 272; SI-NEXT: store volatile i3 %r 273; VI: %[[A_32:[0-9]+]] = sext i3 %a to i32 274; VI-NEXT: %[[B_32:[0-9]+]] = sext i3 %b to i32 275; VI-NEXT: %[[R_32:[0-9]+]] = ashr exact i32 %[[A_32]], %[[B_32]] 276; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 277; VI-NEXT: store volatile i3 %[[R_3]] 278define amdgpu_kernel void @ashr_exact_i3(i3 %a, i3 %b) { 279 %r = ashr exact i3 %a, %b 280 store volatile i3 %r, i3 addrspace(1)* undef 281 ret void 282} 283 284; GCN-LABEL: @and_i3( 285; SI: %r = and i3 %a, %b 286; SI-NEXT: store volatile i3 %r 287; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 288; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 289; VI-NEXT: %[[R_32:[0-9]+]] = and i32 %[[A_32]], %[[B_32]] 290; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 291; VI-NEXT: store volatile i3 %[[R_3]] 292define amdgpu_kernel void @and_i3(i3 %a, i3 %b) { 293 %r = and i3 %a, %b 294 store volatile i3 %r, i3 addrspace(1)* undef 295 ret void 296} 297 298; GCN-LABEL: @or_i3( 299; SI: %r = or i3 %a, %b 300; SI-NEXT: store volatile i3 %r 301; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 302; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 303; VI-NEXT: %[[R_32:[0-9]+]] = or i32 %[[A_32]], %[[B_32]] 304; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 305; VI-NEXT: store volatile i3 %[[R_3]] 306define amdgpu_kernel void @or_i3(i3 %a, i3 %b) { 307 %r = or i3 %a, %b 308 store volatile i3 %r, i3 addrspace(1)* undef 309 ret void 310} 311 312; GCN-LABEL: @xor_i3( 313; SI: %r = xor i3 %a, %b 314; SI-NEXT: store volatile i3 %r 315; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 316; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 317; VI-NEXT: %[[R_32:[0-9]+]] = xor i32 %[[A_32]], %[[B_32]] 318; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 319; VI-NEXT: store volatile i3 %[[R_3]] 320define amdgpu_kernel void @xor_i3(i3 %a, i3 %b) { 321 %r = xor i3 %a, %b 322 store volatile i3 %r, i3 addrspace(1)* undef 323 ret void 324} 325 326; GCN-LABEL: @select_eq_i3( 327; SI: %cmp = icmp eq i3 %a, %b 328; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b 329; SI-NEXT: store volatile i3 %sel 330; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32 331; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32 332; VI-NEXT: %[[CMP:[0-9]+]] = icmp eq i32 %[[A_32_0]], %[[B_32_0]] 333; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i3 %a to i32 334; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32 335; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] 336; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3 337; VI-NEXT: store volatile i3 %[[SEL_3]] 338define amdgpu_kernel void @select_eq_i3(i3 %a, i3 %b) { 339 %cmp = icmp eq i3 %a, %b 340 %sel = select i1 %cmp, i3 %a, i3 %b 341 store volatile i3 %sel, i3 addrspace(1)* undef 342 ret void 343} 344 345; GCN-LABEL: @select_ne_i3( 346; SI: %cmp = icmp ne i3 %a, %b 347; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b 348; SI-NEXT: store volatile i3 %sel 349; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32 350; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32 351; VI-NEXT: %[[CMP:[0-9]+]] = icmp ne i32 %[[A_32_0]], %[[B_32_0]] 352; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i3 %a to i32 353; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32 354; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] 355; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3 356; VI-NEXT: store volatile i3 %[[SEL_3]] 357define amdgpu_kernel void @select_ne_i3(i3 %a, i3 %b) { 358 %cmp = icmp ne i3 %a, %b 359 %sel = select i1 %cmp, i3 %a, i3 %b 360 store volatile i3 %sel, i3 addrspace(1)* undef 361 ret void 362} 363 364; GCN-LABEL: @select_ugt_i3( 365; SI: %cmp = icmp ugt i3 %a, %b 366; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b 367; SI-NEXT: store volatile i3 %sel 368; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32 369; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32 370; VI-NEXT: %[[CMP:[0-9]+]] = icmp ugt i32 %[[A_32_0]], %[[B_32_0]] 371; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i3 %a to i32 372; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32 373; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] 374; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3 375; VI-NEXT: store volatile i3 %[[SEL_3]] 376define amdgpu_kernel void @select_ugt_i3(i3 %a, i3 %b) { 377 %cmp = icmp ugt i3 %a, %b 378 %sel = select i1 %cmp, i3 %a, i3 %b 379 store volatile i3 %sel, i3 addrspace(1)* undef 380 ret void 381} 382 383; GCN-LABEL: @select_uge_i3( 384; SI: %cmp = icmp uge i3 %a, %b 385; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b 386; SI-NEXT: store volatile i3 %sel 387; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32 388; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32 389; VI-NEXT: %[[CMP:[0-9]+]] = icmp uge i32 %[[A_32_0]], %[[B_32_0]] 390; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i3 %a to i32 391; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32 392; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] 393; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3 394; VI-NEXT: store volatile i3 %[[SEL_3]] 395define amdgpu_kernel void @select_uge_i3(i3 %a, i3 %b) { 396 %cmp = icmp uge i3 %a, %b 397 %sel = select i1 %cmp, i3 %a, i3 %b 398 store volatile i3 %sel, i3 addrspace(1)* undef 399 ret void 400} 401 402; GCN-LABEL: @select_ult_i3( 403; SI: %cmp = icmp ult i3 %a, %b 404; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b 405; SI-NEXT: store volatile i3 %sel 406; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32 407; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32 408; VI-NEXT: %[[CMP:[0-9]+]] = icmp ult i32 %[[A_32_0]], %[[B_32_0]] 409; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i3 %a to i32 410; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32 411; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] 412; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3 413; VI-NEXT: store volatile i3 %[[SEL_3]] 414define amdgpu_kernel void @select_ult_i3(i3 %a, i3 %b) { 415 %cmp = icmp ult i3 %a, %b 416 %sel = select i1 %cmp, i3 %a, i3 %b 417 store volatile i3 %sel, i3 addrspace(1)* undef 418 ret void 419} 420 421; GCN-LABEL: @select_ule_i3( 422; SI: %cmp = icmp ule i3 %a, %b 423; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b 424; SI-NEXT: store volatile i3 %sel 425; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32 426; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32 427; VI-NEXT: %[[CMP:[0-9]+]] = icmp ule i32 %[[A_32_0]], %[[B_32_0]] 428; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i3 %a to i32 429; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32 430; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] 431; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3 432; VI-NEXT: store volatile i3 %[[SEL_3]] 433define amdgpu_kernel void @select_ule_i3(i3 %a, i3 %b) { 434 %cmp = icmp ule i3 %a, %b 435 %sel = select i1 %cmp, i3 %a, i3 %b 436 store volatile i3 %sel, i3 addrspace(1)* undef 437 ret void 438} 439 440; GCN-LABEL: @select_sgt_i3( 441; SI: %cmp = icmp sgt i3 %a, %b 442; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b 443; SI-NEXT: store volatile i3 %sel 444; VI: %[[A_32_0:[0-9]+]] = sext i3 %a to i32 445; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i3 %b to i32 446; VI-NEXT: %[[CMP:[0-9]+]] = icmp sgt i32 %[[A_32_0]], %[[B_32_0]] 447; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i3 %a to i32 448; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i3 %b to i32 449; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] 450; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3 451; VI-NEXT: store volatile i3 %[[SEL_3]] 452define amdgpu_kernel void @select_sgt_i3(i3 %a, i3 %b) { 453 %cmp = icmp sgt i3 %a, %b 454 %sel = select i1 %cmp, i3 %a, i3 %b 455 store volatile i3 %sel, i3 addrspace(1)* undef 456 ret void 457} 458 459; GCN-LABEL: @select_sge_i3( 460; SI: %cmp = icmp sge i3 %a, %b 461; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b 462; SI-NEXT: store volatile i3 %sel 463; VI: %[[A_32_0:[0-9]+]] = sext i3 %a to i32 464; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i3 %b to i32 465; VI-NEXT: %[[CMP:[0-9]+]] = icmp sge i32 %[[A_32_0]], %[[B_32_0]] 466; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i3 %a to i32 467; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i3 %b to i32 468; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] 469; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3 470; VI-NEXT: store volatile i3 %[[SEL_3]] 471define amdgpu_kernel void @select_sge_i3(i3 %a, i3 %b) { 472 %cmp = icmp sge i3 %a, %b 473 %sel = select i1 %cmp, i3 %a, i3 %b 474 store volatile i3 %sel, i3 addrspace(1)* undef 475 ret void 476} 477 478; GCN-LABEL: @select_slt_i3( 479; SI: %cmp = icmp slt i3 %a, %b 480; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b 481; SI-NEXT: store volatile i3 %sel 482; VI: %[[A_32_0:[0-9]+]] = sext i3 %a to i32 483; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i3 %b to i32 484; VI-NEXT: %[[CMP:[0-9]+]] = icmp slt i32 %[[A_32_0]], %[[B_32_0]] 485; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i3 %a to i32 486; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i3 %b to i32 487; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] 488; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3 489; VI-NEXT: store volatile i3 %[[SEL_3]] 490define amdgpu_kernel void @select_slt_i3(i3 %a, i3 %b) { 491 %cmp = icmp slt i3 %a, %b 492 %sel = select i1 %cmp, i3 %a, i3 %b 493 store volatile i3 %sel, i3 addrspace(1)* undef 494 ret void 495} 496 497; GCN-LABEL: @select_sle_i3( 498; SI: %cmp = icmp sle i3 %a, %b 499; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b 500; SI-NEXT: store volatile i3 %sel 501; VI: %[[A_32_0:[0-9]+]] = sext i3 %a to i32 502; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i3 %b to i32 503; VI-NEXT: %[[CMP:[0-9]+]] = icmp sle i32 %[[A_32_0]], %[[B_32_0]] 504; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i3 %a to i32 505; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i3 %b to i32 506; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] 507; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3 508; VI-NEXT: store volatile i3 %[[SEL_3]] 509define amdgpu_kernel void @select_sle_i3(i3 %a, i3 %b) { 510 %cmp = icmp sle i3 %a, %b 511 %sel = select i1 %cmp, i3 %a, i3 %b 512 store volatile i3 %sel, i3 addrspace(1)* undef 513 ret void 514} 515 516declare i3 @llvm.bitreverse.i3(i3) 517; GCN-LABEL: @bitreverse_i3( 518; SI: %brev = call i3 @llvm.bitreverse.i3(i3 %a) 519; SI-NEXT: store volatile i3 %brev 520; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 521; VI-NEXT: %[[R_32:[0-9]+]] = call i32 @llvm.bitreverse.i32(i32 %[[A_32]]) 522; VI-NEXT: %[[S_32:[0-9]+]] = lshr i32 %[[R_32]], 29 523; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[S_32]] to i3 524; VI-NEXT: store volatile i3 %[[R_3]] 525define amdgpu_kernel void @bitreverse_i3(i3 %a) { 526 %brev = call i3 @llvm.bitreverse.i3(i3 %a) 527 store volatile i3 %brev, i3 addrspace(1)* undef 528 ret void 529} 530 531; GCN-LABEL: @add_i16( 532; SI: %r = add i16 %a, %b 533; SI-NEXT: store volatile i16 %r 534; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 535; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 536; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]] 537; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 538; VI-NEXT: store volatile i16 %[[R_16]] 539define amdgpu_kernel void @add_i16(i16 %a, i16 %b) { 540 %r = add i16 %a, %b 541 store volatile i16 %r, i16 addrspace(1)* undef 542 ret void 543} 544 545; GCN-LABEL: @constant_add_i16( 546; VI: store volatile i16 3 547define amdgpu_kernel void @constant_add_i16() { 548 %r = add i16 1, 2 549 store volatile i16 %r, i16 addrspace(1)* undef 550 ret void 551} 552 553; GCN-LABEL: @constant_add_nsw_i16( 554; VI: store volatile i16 3 555define amdgpu_kernel void @constant_add_nsw_i16() { 556 %r = add nsw i16 1, 2 557 store volatile i16 %r, i16 addrspace(1)* undef 558 ret void 559} 560 561; GCN-LABEL: @constant_add_nuw_i16( 562; VI: store volatile i16 3 563define amdgpu_kernel void @constant_add_nuw_i16() { 564 %r = add nsw i16 1, 2 565 store volatile i16 %r, i16 addrspace(1)* undef 566 ret void 567} 568 569; GCN-LABEL: @add_nsw_i16( 570; SI: %r = add nsw i16 %a, %b 571; SI-NEXT: store volatile i16 %r 572; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 573; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 574; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]] 575; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 576; VI-NEXT: store volatile i16 %[[R_16]] 577define amdgpu_kernel void @add_nsw_i16(i16 %a, i16 %b) { 578 %r = add nsw i16 %a, %b 579 store volatile i16 %r, i16 addrspace(1)* undef 580 ret void 581} 582 583; GCN-LABEL: @add_nuw_i16( 584; SI: %r = add nuw i16 %a, %b 585; SI-NEXT: store volatile i16 %r 586; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 587; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 588; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]] 589; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 590; VI-NEXT: store volatile i16 %[[R_16]] 591define amdgpu_kernel void @add_nuw_i16(i16 %a, i16 %b) { 592 %r = add nuw i16 %a, %b 593 store volatile i16 %r, i16 addrspace(1)* undef 594 ret void 595} 596 597; GCN-LABEL: @add_nuw_nsw_i16( 598; SI: %r = add nuw nsw i16 %a, %b 599; SI-NEXT: store volatile i16 %r 600; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 601; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 602; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]] 603; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 604; VI-NEXT: store volatile i16 %[[R_16]] 605define amdgpu_kernel void @add_nuw_nsw_i16(i16 %a, i16 %b) { 606 %r = add nuw nsw i16 %a, %b 607 store volatile i16 %r, i16 addrspace(1)* undef 608 ret void 609} 610 611; GCN-LABEL: @sub_i16( 612; SI: %r = sub i16 %a, %b 613; SI-NEXT: store volatile i16 %r 614; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 615; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 616; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]] 617; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 618; VI-NEXT: store volatile i16 %[[R_16]] 619define amdgpu_kernel void @sub_i16(i16 %a, i16 %b) { 620 %r = sub i16 %a, %b 621 store volatile i16 %r, i16 addrspace(1)* undef 622 ret void 623} 624 625; GCN-LABEL: @sub_nsw_i16( 626; SI: %r = sub nsw i16 %a, %b 627; SI-NEXT: store volatile i16 %r 628; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 629; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 630; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]] 631; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 632; VI-NEXT: store volatile i16 %[[R_16]] 633define amdgpu_kernel void @sub_nsw_i16(i16 %a, i16 %b) { 634 %r = sub nsw i16 %a, %b 635 store volatile i16 %r, i16 addrspace(1)* undef 636 ret void 637} 638 639; GCN-LABEL: @sub_nuw_i16( 640; SI: %r = sub nuw i16 %a, %b 641; SI-NEXT: store volatile i16 %r 642; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 643; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 644; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]] 645; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 646; VI-NEXT: store volatile i16 %[[R_16]] 647define amdgpu_kernel void @sub_nuw_i16(i16 %a, i16 %b) { 648 %r = sub nuw i16 %a, %b 649 store volatile i16 %r, i16 addrspace(1)* undef 650 ret void 651} 652 653; GCN-LABEL: @sub_nuw_nsw_i16( 654; SI: %r = sub nuw nsw i16 %a, %b 655; SI-NEXT: store volatile i16 %r 656; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 657; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 658; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]] 659; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 660; VI-NEXT: store volatile i16 %[[R_16]] 661define amdgpu_kernel void @sub_nuw_nsw_i16(i16 %a, i16 %b) { 662 %r = sub nuw nsw i16 %a, %b 663 store volatile i16 %r, i16 addrspace(1)* undef 664 ret void 665} 666 667; GCN-LABEL: @mul_i16( 668; SI: %r = mul i16 %a, %b 669; SI-NEXT: store volatile i16 %r 670; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 671; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 672; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]] 673; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 674; VI-NEXT: store volatile i16 %[[R_16]] 675define amdgpu_kernel void @mul_i16(i16 %a, i16 %b) { 676 %r = mul i16 %a, %b 677 store volatile i16 %r, i16 addrspace(1)* undef 678 ret void 679} 680 681; GCN-LABEL: @mul_nsw_i16( 682; SI: %r = mul nsw i16 %a, %b 683; SI-NEXT: store volatile i16 %r 684; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 685; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 686; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]] 687; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 688; VI-NEXT: store volatile i16 %[[R_16]] 689define amdgpu_kernel void @mul_nsw_i16(i16 %a, i16 %b) { 690 %r = mul nsw i16 %a, %b 691 store volatile i16 %r, i16 addrspace(1)* undef 692 ret void 693} 694 695; GCN-LABEL: @mul_nuw_i16( 696; SI: %r = mul nuw i16 %a, %b 697; SI-NEXT: store volatile i16 %r 698; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 699; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 700; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]] 701; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 702; VI-NEXT: store volatile i16 %[[R_16]] 703define amdgpu_kernel void @mul_nuw_i16(i16 %a, i16 %b) { 704 %r = mul nuw i16 %a, %b 705 store volatile i16 %r, i16 addrspace(1)* undef 706 ret void 707} 708 709; GCN-LABEL: @mul_nuw_nsw_i16( 710; SI: %r = mul nuw nsw i16 %a, %b 711; SI-NEXT: store volatile i16 %r 712; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 713; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 714; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]] 715; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 716; VI-NEXT: store volatile i16 %[[R_16]] 717define amdgpu_kernel void @mul_nuw_nsw_i16(i16 %a, i16 %b) { 718 %r = mul nuw nsw i16 %a, %b 719 store volatile i16 %r, i16 addrspace(1)* undef 720 ret void 721} 722 723; GCN-LABEL: @shl_i16( 724; SI: %r = shl i16 %a, %b 725; SI-NEXT: store volatile i16 %r 726; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 727; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 728; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]] 729; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 730; VI-NEXT: store volatile i16 %[[R_16]] 731define amdgpu_kernel void @shl_i16(i16 %a, i16 %b) { 732 %r = shl i16 %a, %b 733 store volatile i16 %r, i16 addrspace(1)* undef 734 ret void 735} 736 737; GCN-LABEL: @shl_nsw_i16( 738; SI: %r = shl nsw i16 %a, %b 739; SI-NEXT: store volatile i16 %r 740; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 741; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 742; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]] 743; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 744; VI-NEXT: store volatile i16 %[[R_16]] 745define amdgpu_kernel void @shl_nsw_i16(i16 %a, i16 %b) { 746 %r = shl nsw i16 %a, %b 747 store volatile i16 %r, i16 addrspace(1)* undef 748 ret void 749} 750 751; GCN-LABEL: @shl_nuw_i16( 752; SI: %r = shl nuw i16 %a, %b 753; SI-NEXT: store volatile i16 %r 754; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 755; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 756; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]] 757; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 758; VI-NEXT: store volatile i16 %[[R_16]] 759define amdgpu_kernel void @shl_nuw_i16(i16 %a, i16 %b) { 760 %r = shl nuw i16 %a, %b 761 store volatile i16 %r, i16 addrspace(1)* undef 762 ret void 763} 764 765; GCN-LABEL: @shl_nuw_nsw_i16( 766; SI: %r = shl nuw nsw i16 %a, %b 767; SI-NEXT: store volatile i16 %r 768; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 769; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 770; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]] 771; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 772; VI-NEXT: store volatile i16 %[[R_16]] 773define amdgpu_kernel void @shl_nuw_nsw_i16(i16 %a, i16 %b) { 774 %r = shl nuw nsw i16 %a, %b 775 store volatile i16 %r, i16 addrspace(1)* undef 776 ret void 777} 778 779; GCN-LABEL: @lshr_i16( 780; SI: %r = lshr i16 %a, %b 781; SI-NEXT: store volatile i16 %r 782; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 783; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 784; VI-NEXT: %[[R_32:[0-9]+]] = lshr i32 %[[A_32]], %[[B_32]] 785; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 786; VI-NEXT: store volatile i16 %[[R_16]] 787define amdgpu_kernel void @lshr_i16(i16 %a, i16 %b) { 788 %r = lshr i16 %a, %b 789 store volatile i16 %r, i16 addrspace(1)* undef 790 ret void 791} 792 793; GCN-LABEL: @lshr_exact_i16( 794; SI: %r = lshr exact i16 %a, %b 795; SI-NEXT: store volatile i16 %r 796; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 797; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 798; VI-NEXT: %[[R_32:[0-9]+]] = lshr exact i32 %[[A_32]], %[[B_32]] 799; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 800; VI-NEXT: store volatile i16 %[[R_16]] 801define amdgpu_kernel void @lshr_exact_i16(i16 %a, i16 %b) { 802 %r = lshr exact i16 %a, %b 803 store volatile i16 %r, i16 addrspace(1)* undef 804 ret void 805} 806 807; GCN-LABEL: @ashr_i16( 808; SI: %r = ashr i16 %a, %b 809; SI-NEXT: store volatile i16 %r 810; VI: %[[A_32:[0-9]+]] = sext i16 %a to i32 811; VI-NEXT: %[[B_32:[0-9]+]] = sext i16 %b to i32 812; VI-NEXT: %[[R_32:[0-9]+]] = ashr i32 %[[A_32]], %[[B_32]] 813; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 814; VI-NEXT: store volatile i16 %[[R_16]] 815define amdgpu_kernel void @ashr_i16(i16 %a, i16 %b) { 816 %r = ashr i16 %a, %b 817 store volatile i16 %r, i16 addrspace(1)* undef 818 ret void 819} 820 821; GCN-LABEL: @ashr_exact_i16( 822; SI: %r = ashr exact i16 %a, %b 823; SI-NEXT: store volatile i16 %r 824; VI: %[[A_32:[0-9]+]] = sext i16 %a to i32 825; VI-NEXT: %[[B_32:[0-9]+]] = sext i16 %b to i32 826; VI-NEXT: %[[R_32:[0-9]+]] = ashr exact i32 %[[A_32]], %[[B_32]] 827; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 828; VI-NEXT: store volatile i16 %[[R_16]] 829define amdgpu_kernel void @ashr_exact_i16(i16 %a, i16 %b) { 830 %r = ashr exact i16 %a, %b 831 store volatile i16 %r, i16 addrspace(1)* undef 832 ret void 833} 834 835; GCN-LABEL: @constant_lshr_exact_i16( 836; VI: store volatile i16 2 837define amdgpu_kernel void @constant_lshr_exact_i16(i16 %a, i16 %b) { 838 %r = lshr exact i16 4, 1 839 store volatile i16 %r, i16 addrspace(1)* undef 840 ret void 841} 842 843; GCN-LABEL: @and_i16( 844; SI: %r = and i16 %a, %b 845; SI-NEXT: store volatile i16 %r 846; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 847; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 848; VI-NEXT: %[[R_32:[0-9]+]] = and i32 %[[A_32]], %[[B_32]] 849; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 850; VI-NEXT: store volatile i16 %[[R_16]] 851define amdgpu_kernel void @and_i16(i16 %a, i16 %b) { 852 %r = and i16 %a, %b 853 store volatile i16 %r, i16 addrspace(1)* undef 854 ret void 855} 856 857; GCN-LABEL: @or_i16( 858; SI: %r = or i16 %a, %b 859; SI-NEXT: store volatile i16 %r 860; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 861; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 862; VI-NEXT: %[[R_32:[0-9]+]] = or i32 %[[A_32]], %[[B_32]] 863; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 864; VI-NEXT: store volatile i16 %[[R_16]] 865define amdgpu_kernel void @or_i16(i16 %a, i16 %b) { 866 %r = or i16 %a, %b 867 store volatile i16 %r, i16 addrspace(1)* undef 868 ret void 869} 870 871; GCN-LABEL: @xor_i16( 872; SI: %r = xor i16 %a, %b 873; SI-NEXT: store volatile i16 %r 874; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 875; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 876; VI-NEXT: %[[R_32:[0-9]+]] = xor i32 %[[A_32]], %[[B_32]] 877; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 878; VI-NEXT: store volatile i16 %[[R_16]] 879define amdgpu_kernel void @xor_i16(i16 %a, i16 %b) { 880 %r = xor i16 %a, %b 881 store volatile i16 %r, i16 addrspace(1)* undef 882 ret void 883} 884 885; GCN-LABEL: @select_eq_i16( 886; SI: %cmp = icmp eq i16 %a, %b 887; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b 888; SI-NEXT: store volatile i16 %sel 889; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32 890; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 891; VI-NEXT: %[[CMP:[0-9]+]] = icmp eq i32 %[[A_32_0]], %[[B_32_0]] 892; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32 893; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 894; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] 895; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 896; VI-NEXT: store volatile i16 %[[SEL_16]] 897define amdgpu_kernel void @select_eq_i16(i16 %a, i16 %b) { 898 %cmp = icmp eq i16 %a, %b 899 %sel = select i1 %cmp, i16 %a, i16 %b 900 store volatile i16 %sel, i16 addrspace(1)* undef 901 ret void 902} 903 904; GCN-LABEL: @select_ne_i16( 905; SI: %cmp = icmp ne i16 %a, %b 906; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b 907; SI-NEXT: store volatile i16 %sel 908; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32 909; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 910; VI-NEXT: %[[CMP:[0-9]+]] = icmp ne i32 %[[A_32_0]], %[[B_32_0]] 911; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32 912; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 913; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] 914; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 915; VI-NEXT: store volatile i16 %[[SEL_16]] 916define amdgpu_kernel void @select_ne_i16(i16 %a, i16 %b) { 917 %cmp = icmp ne i16 %a, %b 918 %sel = select i1 %cmp, i16 %a, i16 %b 919 store volatile i16 %sel, i16 addrspace(1)* undef 920 ret void 921} 922 923; GCN-LABEL: @select_ugt_i16( 924; SI: %cmp = icmp ugt i16 %a, %b 925; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b 926; SI-NEXT: store volatile i16 %sel 927; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32 928; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 929; VI-NEXT: %[[CMP:[0-9]+]] = icmp ugt i32 %[[A_32_0]], %[[B_32_0]] 930; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32 931; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 932; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] 933; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 934; VI-NEXT: store volatile i16 %[[SEL_16]] 935define amdgpu_kernel void @select_ugt_i16(i16 %a, i16 %b) { 936 %cmp = icmp ugt i16 %a, %b 937 %sel = select i1 %cmp, i16 %a, i16 %b 938 store volatile i16 %sel, i16 addrspace(1)* undef 939 ret void 940} 941 942; GCN-LABEL: @select_uge_i16( 943; SI: %cmp = icmp uge i16 %a, %b 944; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b 945; SI-NEXT: store volatile i16 %sel 946; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32 947; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 948; VI-NEXT: %[[CMP:[0-9]+]] = icmp uge i32 %[[A_32_0]], %[[B_32_0]] 949; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32 950; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 951; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] 952; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 953; VI-NEXT: store volatile i16 %[[SEL_16]] 954define amdgpu_kernel void @select_uge_i16(i16 %a, i16 %b) { 955 %cmp = icmp uge i16 %a, %b 956 %sel = select i1 %cmp, i16 %a, i16 %b 957 store volatile i16 %sel, i16 addrspace(1)* undef 958 ret void 959} 960 961; GCN-LABEL: @select_ult_i16( 962; SI: %cmp = icmp ult i16 %a, %b 963; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b 964; SI-NEXT: store volatile i16 %sel 965; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32 966; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 967; VI-NEXT: %[[CMP:[0-9]+]] = icmp ult i32 %[[A_32_0]], %[[B_32_0]] 968; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32 969; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 970; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] 971; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 972; VI-NEXT: store volatile i16 %[[SEL_16]] 973define amdgpu_kernel void @select_ult_i16(i16 %a, i16 %b) { 974 %cmp = icmp ult i16 %a, %b 975 %sel = select i1 %cmp, i16 %a, i16 %b 976 store volatile i16 %sel, i16 addrspace(1)* undef 977 ret void 978} 979 980; GCN-LABEL: @select_ule_i16( 981; SI: %cmp = icmp ule i16 %a, %b 982; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b 983; SI-NEXT: store volatile i16 %sel 984; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32 985; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 986; VI-NEXT: %[[CMP:[0-9]+]] = icmp ule i32 %[[A_32_0]], %[[B_32_0]] 987; VI-NEXT: %[[A_32_1:[0-9]+]] = zext i16 %a to i32 988; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 989; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] 990; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 991; VI-NEXT: store volatile i16 %[[SEL_16]] 992define amdgpu_kernel void @select_ule_i16(i16 %a, i16 %b) { 993 %cmp = icmp ule i16 %a, %b 994 %sel = select i1 %cmp, i16 %a, i16 %b 995 store volatile i16 %sel, i16 addrspace(1)* undef 996 ret void 997} 998 999; GCN-LABEL: @select_sgt_i16( 1000; SI: %cmp = icmp sgt i16 %a, %b 1001; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b 1002; SI-NEXT: store volatile i16 %sel 1003; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32 1004; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32 1005; VI-NEXT: %[[CMP:[0-9]+]] = icmp sgt i32 %[[A_32_0]], %[[B_32_0]] 1006; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i16 %a to i32 1007; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32 1008; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] 1009; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 1010; VI-NEXT: store volatile i16 %[[SEL_16]] 1011define amdgpu_kernel void @select_sgt_i16(i16 %a, i16 %b) { 1012 %cmp = icmp sgt i16 %a, %b 1013 %sel = select i1 %cmp, i16 %a, i16 %b 1014 store volatile i16 %sel, i16 addrspace(1)* undef 1015 ret void 1016} 1017 1018; GCN-LABEL: @select_sge_i16( 1019; SI: %cmp = icmp sge i16 %a, %b 1020; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b 1021; SI-NEXT: store volatile i16 %sel 1022; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32 1023; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32 1024; VI-NEXT: %[[CMP:[0-9]+]] = icmp sge i32 %[[A_32_0]], %[[B_32_0]] 1025; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i16 %a to i32 1026; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32 1027; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] 1028; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 1029; VI-NEXT: store volatile i16 %[[SEL_16]] 1030define amdgpu_kernel void @select_sge_i16(i16 %a, i16 %b) { 1031 %cmp = icmp sge i16 %a, %b 1032 %sel = select i1 %cmp, i16 %a, i16 %b 1033 store volatile i16 %sel, i16 addrspace(1)* undef 1034 ret void 1035} 1036 1037; GCN-LABEL: @select_slt_i16( 1038; SI: %cmp = icmp slt i16 %a, %b 1039; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b 1040; SI-NEXT: store volatile i16 %sel 1041; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32 1042; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32 1043; VI-NEXT: %[[CMP:[0-9]+]] = icmp slt i32 %[[A_32_0]], %[[B_32_0]] 1044; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i16 %a to i32 1045; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32 1046; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] 1047; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 1048; VI-NEXT: store volatile i16 %[[SEL_16]] 1049define amdgpu_kernel void @select_slt_i16(i16 %a, i16 %b) { 1050 %cmp = icmp slt i16 %a, %b 1051 %sel = select i1 %cmp, i16 %a, i16 %b 1052 store volatile i16 %sel, i16 addrspace(1)* undef 1053 ret void 1054} 1055 1056; GCN-LABEL: @select_sle_i16( 1057; SI: %cmp = icmp sle i16 %a, %b 1058; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b 1059; SI-NEXT: store volatile i16 %sel 1060; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32 1061; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32 1062; VI-NEXT: %[[CMP:[0-9]+]] = icmp sle i32 %[[A_32_0]], %[[B_32_0]] 1063; VI-NEXT: %[[A_32_1:[0-9]+]] = sext i16 %a to i32 1064; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32 1065; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] 1066; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 1067; VI-NEXT: store volatile i16 %[[SEL_16]] 1068define amdgpu_kernel void @select_sle_i16(i16 %a, i16 %b) { 1069 %cmp = icmp sle i16 %a, %b 1070 %sel = select i1 %cmp, i16 %a, i16 %b 1071 store volatile i16 %sel, i16 addrspace(1)* undef 1072 ret void 1073} 1074 1075declare i16 @llvm.bitreverse.i16(i16) 1076 1077; GCN-LABEL: @bitreverse_i16( 1078; SI: %brev = call i16 @llvm.bitreverse.i16(i16 %a) 1079; SI-NEXT: store volatile i16 %brev 1080; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 1081; VI-NEXT: %[[R_32:[0-9]+]] = call i32 @llvm.bitreverse.i32(i32 %[[A_32]]) 1082; VI-NEXT: %[[S_32:[0-9]+]] = lshr i32 %[[R_32]], 16 1083; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[S_32]] to i16 1084; VI-NEXT: store volatile i16 %[[R_16]] 1085define amdgpu_kernel void @bitreverse_i16(i16 %a) { 1086 %brev = call i16 @llvm.bitreverse.i16(i16 %a) 1087 store volatile i16 %brev, i16 addrspace(1)* undef 1088 ret void 1089} 1090 1091; GCN-LABEL: @add_3xi15( 1092; SI: %r = add <3 x i15> %a, %b 1093; SI-NEXT: store volatile <3 x i15> %r 1094; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1095; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1096; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]] 1097; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> 1098; VI-NEXT: store volatile <3 x i15> %[[R_15]] 1099define amdgpu_kernel void @add_3xi15(<3 x i15> %a, <3 x i15> %b) { 1100 %r = add <3 x i15> %a, %b 1101 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1102 ret void 1103} 1104 1105; GCN-LABEL: @add_nsw_3xi15( 1106; SI: %r = add nsw <3 x i15> %a, %b 1107; SI-NEXT: store volatile <3 x i15> %r 1108; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1109; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1110; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]] 1111; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> 1112; VI-NEXT: store volatile <3 x i15> %[[R_15]] 1113define amdgpu_kernel void @add_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { 1114 %r = add nsw <3 x i15> %a, %b 1115 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1116 ret void 1117} 1118 1119; GCN-LABEL: @add_nuw_3xi15( 1120; SI: %r = add nuw <3 x i15> %a, %b 1121; SI-NEXT: store volatile <3 x i15> %r 1122; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1123; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1124; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]] 1125; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> 1126; VI-NEXT: store volatile <3 x i15> %[[R_15]] 1127define amdgpu_kernel void @add_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) { 1128 %r = add nuw <3 x i15> %a, %b 1129 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1130 ret void 1131} 1132 1133; GCN-LABEL: @add_nuw_nsw_3xi15( 1134; SI: %r = add nuw nsw <3 x i15> %a, %b 1135; SI-NEXT: store volatile <3 x i15> %r 1136; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1137; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1138; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]] 1139; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> 1140; VI-NEXT: store volatile <3 x i15> %[[R_15]] 1141define amdgpu_kernel void @add_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { 1142 %r = add nuw nsw <3 x i15> %a, %b 1143 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1144 ret void 1145} 1146 1147; GCN-LABEL: @sub_3xi15( 1148; SI: %r = sub <3 x i15> %a, %b 1149; SI-NEXT: store volatile <3 x i15> %r 1150; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1151; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1152; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]] 1153; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> 1154; VI-NEXT: store volatile <3 x i15> %[[R_15]] 1155define amdgpu_kernel void @sub_3xi15(<3 x i15> %a, <3 x i15> %b) { 1156 %r = sub <3 x i15> %a, %b 1157 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1158 ret void 1159} 1160 1161; GCN-LABEL: @sub_nsw_3xi15( 1162; SI: %r = sub nsw <3 x i15> %a, %b 1163; SI-NEXT: store volatile <3 x i15> %r 1164; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1165; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1166; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]] 1167; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> 1168; VI-NEXT: store volatile <3 x i15> %[[R_15]] 1169define amdgpu_kernel void @sub_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { 1170 %r = sub nsw <3 x i15> %a, %b 1171 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1172 ret void 1173} 1174 1175; GCN-LABEL: @sub_nuw_3xi15( 1176; SI: %r = sub nuw <3 x i15> %a, %b 1177; SI-NEXT: store volatile <3 x i15> %r 1178; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1179; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1180; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]] 1181; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> 1182; VI-NEXT: store volatile <3 x i15> %[[R_15]] 1183define amdgpu_kernel void @sub_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) { 1184 %r = sub nuw <3 x i15> %a, %b 1185 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1186 ret void 1187} 1188 1189; GCN-LABEL: @sub_nuw_nsw_3xi15( 1190; SI: %r = sub nuw nsw <3 x i15> %a, %b 1191; SI-NEXT: store volatile <3 x i15> %r 1192; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1193; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1194; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]] 1195; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> 1196; VI-NEXT: store volatile <3 x i15> %[[R_15]] 1197define amdgpu_kernel void @sub_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { 1198 %r = sub nuw nsw <3 x i15> %a, %b 1199 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1200 ret void 1201} 1202 1203; GCN-LABEL: @mul_3xi15( 1204; SI: %r = mul <3 x i15> %a, %b 1205; SI-NEXT: store volatile <3 x i15> %r 1206; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1207; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1208; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]] 1209; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> 1210; VI-NEXT: store volatile <3 x i15> %[[R_15]] 1211define amdgpu_kernel void @mul_3xi15(<3 x i15> %a, <3 x i15> %b) { 1212 %r = mul <3 x i15> %a, %b 1213 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1214 ret void 1215} 1216 1217; GCN-LABEL: @mul_nsw_3xi15( 1218; SI: %r = mul nsw <3 x i15> %a, %b 1219; SI-NEXT: store volatile <3 x i15> %r 1220; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1221; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1222; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]] 1223; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> 1224; VI-NEXT: store volatile <3 x i15> %[[R_15]] 1225define amdgpu_kernel void @mul_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { 1226 %r = mul nsw <3 x i15> %a, %b 1227 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1228 ret void 1229} 1230 1231; GCN-LABEL: @mul_nuw_3xi15( 1232; SI: %r = mul nuw <3 x i15> %a, %b 1233; SI-NEXT: store volatile <3 x i15> %r 1234; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1235; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1236; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]] 1237; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> 1238; VI-NEXT: store volatile <3 x i15> %[[R_15]] 1239define amdgpu_kernel void @mul_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) { 1240 %r = mul nuw <3 x i15> %a, %b 1241 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1242 ret void 1243} 1244 1245; GCN-LABEL: @mul_nuw_nsw_3xi15( 1246; SI: %r = mul nuw nsw <3 x i15> %a, %b 1247; SI-NEXT: store volatile <3 x i15> %r 1248; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1249; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1250; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]] 1251; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> 1252; VI-NEXT: store volatile <3 x i15> %[[R_15]] 1253define amdgpu_kernel void @mul_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { 1254 %r = mul nuw nsw <3 x i15> %a, %b 1255 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1256 ret void 1257} 1258 1259; GCN-LABEL: @shl_3xi15( 1260; SI: %r = shl <3 x i15> %a, %b 1261; SI-NEXT: store volatile <3 x i15> %r 1262; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1263; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1264; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]] 1265; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> 1266; VI-NEXT: store volatile <3 x i15> %[[R_15]] 1267define amdgpu_kernel void @shl_3xi15(<3 x i15> %a, <3 x i15> %b) { 1268 %r = shl <3 x i15> %a, %b 1269 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1270 ret void 1271} 1272 1273; GCN-LABEL: @shl_nsw_3xi15( 1274; SI: %r = shl nsw <3 x i15> %a, %b 1275; SI-NEXT: store volatile <3 x i15> %r 1276; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1277; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1278; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]] 1279; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> 1280; VI-NEXT: store volatile <3 x i15> %[[R_15]] 1281define amdgpu_kernel void @shl_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { 1282 %r = shl nsw <3 x i15> %a, %b 1283 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1284 ret void 1285} 1286 1287; GCN-LABEL: @shl_nuw_3xi15( 1288; SI: %r = shl nuw <3 x i15> %a, %b 1289; SI-NEXT: store volatile <3 x i15> %r 1290; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1291; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1292; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]] 1293; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> 1294; VI-NEXT: store volatile <3 x i15> %[[R_15]] 1295define amdgpu_kernel void @shl_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) { 1296 %r = shl nuw <3 x i15> %a, %b 1297 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1298 ret void 1299} 1300 1301; GCN-LABEL: @shl_nuw_nsw_3xi15( 1302; SI: %r = shl nuw nsw <3 x i15> %a, %b 1303; SI-NEXT: store volatile <3 x i15> %r 1304; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1305; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1306; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]] 1307; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> 1308; VI-NEXT: store volatile <3 x i15> %[[R_15]] 1309define amdgpu_kernel void @shl_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { 1310 %r = shl nuw nsw <3 x i15> %a, %b 1311 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1312 ret void 1313} 1314 1315; GCN-LABEL: @lshr_3xi15( 1316; SI: %r = lshr <3 x i15> %a, %b 1317; SI-NEXT: store volatile <3 x i15> %r 1318; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1319; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1320; VI-NEXT: %[[R_32:[0-9]+]] = lshr <3 x i32> %[[A_32]], %[[B_32]] 1321; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> 1322; VI-NEXT: store volatile <3 x i15> %[[R_15]] 1323define amdgpu_kernel void @lshr_3xi15(<3 x i15> %a, <3 x i15> %b) { 1324 %r = lshr <3 x i15> %a, %b 1325 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1326 ret void 1327} 1328 1329; GCN-LABEL: @lshr_exact_3xi15( 1330; SI: %r = lshr exact <3 x i15> %a, %b 1331; SI-NEXT: store volatile <3 x i15> %r 1332; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1333; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1334; VI-NEXT: %[[R_32:[0-9]+]] = lshr exact <3 x i32> %[[A_32]], %[[B_32]] 1335; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> 1336; VI-NEXT: store volatile <3 x i15> %[[R_15]] 1337define amdgpu_kernel void @lshr_exact_3xi15(<3 x i15> %a, <3 x i15> %b) { 1338 %r = lshr exact <3 x i15> %a, %b 1339 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1340 ret void 1341} 1342 1343; GCN-LABEL: @ashr_3xi15( 1344; SI: %r = ashr <3 x i15> %a, %b 1345; SI-NEXT: store volatile <3 x i15> %r 1346; VI: %[[A_32:[0-9]+]] = sext <3 x i15> %a to <3 x i32> 1347; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i15> %b to <3 x i32> 1348; VI-NEXT: %[[R_32:[0-9]+]] = ashr <3 x i32> %[[A_32]], %[[B_32]] 1349; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> 1350; VI-NEXT: store volatile <3 x i15> %[[R_15]] 1351define amdgpu_kernel void @ashr_3xi15(<3 x i15> %a, <3 x i15> %b) { 1352 %r = ashr <3 x i15> %a, %b 1353 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1354 ret void 1355} 1356 1357; GCN-LABEL: @ashr_exact_3xi15( 1358; SI: %r = ashr exact <3 x i15> %a, %b 1359; SI-NEXT: store volatile <3 x i15> %r 1360; VI: %[[A_32:[0-9]+]] = sext <3 x i15> %a to <3 x i32> 1361; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i15> %b to <3 x i32> 1362; VI-NEXT: %[[R_32:[0-9]+]] = ashr exact <3 x i32> %[[A_32]], %[[B_32]] 1363; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> 1364; VI-NEXT: store volatile <3 x i15> %[[R_15]] 1365define amdgpu_kernel void @ashr_exact_3xi15(<3 x i15> %a, <3 x i15> %b) { 1366 %r = ashr exact <3 x i15> %a, %b 1367 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1368 ret void 1369} 1370 1371; GCN-LABEL: @and_3xi15( 1372; SI: %r = and <3 x i15> %a, %b 1373; SI-NEXT: store volatile <3 x i15> %r 1374; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1375; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1376; VI-NEXT: %[[R_32:[0-9]+]] = and <3 x i32> %[[A_32]], %[[B_32]] 1377; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> 1378; VI-NEXT: store volatile <3 x i15> %[[R_15]] 1379define amdgpu_kernel void @and_3xi15(<3 x i15> %a, <3 x i15> %b) { 1380 %r = and <3 x i15> %a, %b 1381 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1382 ret void 1383} 1384 1385; GCN-LABEL: @or_3xi15( 1386; SI: %r = or <3 x i15> %a, %b 1387; SI-NEXT: store volatile <3 x i15> %r 1388; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1389; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1390; VI-NEXT: %[[R_32:[0-9]+]] = or <3 x i32> %[[A_32]], %[[B_32]] 1391; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> 1392; VI-NEXT: store volatile <3 x i15> %[[R_15]] 1393define amdgpu_kernel void @or_3xi15(<3 x i15> %a, <3 x i15> %b) { 1394 %r = or <3 x i15> %a, %b 1395 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1396 ret void 1397} 1398 1399; GCN-LABEL: @xor_3xi15( 1400; SI: %r = xor <3 x i15> %a, %b 1401; SI-NEXT: store volatile <3 x i15> %r 1402; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1403; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1404; VI-NEXT: %[[R_32:[0-9]+]] = xor <3 x i32> %[[A_32]], %[[B_32]] 1405; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> 1406; VI-NEXT: store volatile <3 x i15> %[[R_15]] 1407define amdgpu_kernel void @xor_3xi15(<3 x i15> %a, <3 x i15> %b) { 1408 %r = xor <3 x i15> %a, %b 1409 store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef 1410 ret void 1411} 1412 1413; GCN-LABEL: @select_eq_3xi15( 1414; SI: %cmp = icmp eq <3 x i15> %a, %b 1415; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 1416; SI-NEXT: store volatile <3 x i15> %sel 1417; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1418; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1419; VI-NEXT: %[[CMP:[0-9]+]] = icmp eq <3 x i32> %[[A_32_0]], %[[B_32_0]] 1420; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1421; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1422; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] 1423; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15> 1424; VI-NEXT: store volatile <3 x i15> %[[SEL_15]] 1425define amdgpu_kernel void @select_eq_3xi15(<3 x i15> %a, <3 x i15> %b) { 1426 %cmp = icmp eq <3 x i15> %a, %b 1427 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 1428 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef 1429 ret void 1430} 1431 1432; GCN-LABEL: @select_ne_3xi15( 1433; SI: %cmp = icmp ne <3 x i15> %a, %b 1434; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 1435; SI-NEXT: store volatile <3 x i15> %sel 1436; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1437; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1438; VI-NEXT: %[[CMP:[0-9]+]] = icmp ne <3 x i32> %[[A_32_0]], %[[B_32_0]] 1439; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1440; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1441; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] 1442; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15> 1443; VI-NEXT: store volatile <3 x i15> %[[SEL_15]] 1444define amdgpu_kernel void @select_ne_3xi15(<3 x i15> %a, <3 x i15> %b) { 1445 %cmp = icmp ne <3 x i15> %a, %b 1446 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 1447 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef 1448 ret void 1449} 1450 1451; GCN-LABEL: @select_ugt_3xi15( 1452; SI: %cmp = icmp ugt <3 x i15> %a, %b 1453; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 1454; SI-NEXT: store volatile <3 x i15> %sel 1455; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1456; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1457; VI-NEXT: %[[CMP:[0-9]+]] = icmp ugt <3 x i32> %[[A_32_0]], %[[B_32_0]] 1458; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1459; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1460; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] 1461; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15> 1462; VI-NEXT: store volatile <3 x i15> %[[SEL_15]] 1463define amdgpu_kernel void @select_ugt_3xi15(<3 x i15> %a, <3 x i15> %b) { 1464 %cmp = icmp ugt <3 x i15> %a, %b 1465 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 1466 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef 1467 ret void 1468} 1469 1470; GCN-LABEL: @select_uge_3xi15( 1471; SI: %cmp = icmp uge <3 x i15> %a, %b 1472; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 1473; SI-NEXT: store volatile <3 x i15> %sel 1474; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1475; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1476; VI-NEXT: %[[CMP:[0-9]+]] = icmp uge <3 x i32> %[[A_32_0]], %[[B_32_0]] 1477; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1478; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1479; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] 1480; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15> 1481; VI-NEXT: store volatile <3 x i15> %[[SEL_15]] 1482define amdgpu_kernel void @select_uge_3xi15(<3 x i15> %a, <3 x i15> %b) { 1483 %cmp = icmp uge <3 x i15> %a, %b 1484 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 1485 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef 1486 ret void 1487} 1488 1489; GCN-LABEL: @select_ult_3xi15( 1490; SI: %cmp = icmp ult <3 x i15> %a, %b 1491; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 1492; SI-NEXT: store volatile <3 x i15> %sel 1493; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1494; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1495; VI-NEXT: %[[CMP:[0-9]+]] = icmp ult <3 x i32> %[[A_32_0]], %[[B_32_0]] 1496; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1497; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1498; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] 1499; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15> 1500; VI-NEXT: store volatile <3 x i15> %[[SEL_15]] 1501define amdgpu_kernel void @select_ult_3xi15(<3 x i15> %a, <3 x i15> %b) { 1502 %cmp = icmp ult <3 x i15> %a, %b 1503 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 1504 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef 1505 ret void 1506} 1507 1508; GCN-LABEL: @select_ule_3xi15( 1509; SI: %cmp = icmp ule <3 x i15> %a, %b 1510; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 1511; SI-NEXT: store volatile <3 x i15> %sel 1512; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1513; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1514; VI-NEXT: %[[CMP:[0-9]+]] = icmp ule <3 x i32> %[[A_32_0]], %[[B_32_0]] 1515; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1516; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32> 1517; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] 1518; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15> 1519; VI-NEXT: store volatile <3 x i15> %[[SEL_15]] 1520define amdgpu_kernel void @select_ule_3xi15(<3 x i15> %a, <3 x i15> %b) { 1521 %cmp = icmp ule <3 x i15> %a, %b 1522 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 1523 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef 1524 ret void 1525} 1526 1527; GCN-LABEL: @select_sgt_3xi15( 1528; SI: %cmp = icmp sgt <3 x i15> %a, %b 1529; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 1530; SI-NEXT: store volatile <3 x i15> %sel 1531; VI: %[[A_32_0:[0-9]+]] = sext <3 x i15> %a to <3 x i32> 1532; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i15> %b to <3 x i32> 1533; VI-NEXT: %[[CMP:[0-9]+]] = icmp sgt <3 x i32> %[[A_32_0]], %[[B_32_0]] 1534; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i15> %a to <3 x i32> 1535; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i15> %b to <3 x i32> 1536; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] 1537; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15> 1538; VI-NEXT: store volatile <3 x i15> %[[SEL_15]] 1539define amdgpu_kernel void @select_sgt_3xi15(<3 x i15> %a, <3 x i15> %b) { 1540 %cmp = icmp sgt <3 x i15> %a, %b 1541 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 1542 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef 1543 ret void 1544} 1545 1546; GCN-LABEL: @select_sge_3xi15( 1547; SI: %cmp = icmp sge <3 x i15> %a, %b 1548; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 1549; SI-NEXT: store volatile <3 x i15> %sel 1550; VI: %[[A_32_0:[0-9]+]] = sext <3 x i15> %a to <3 x i32> 1551; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i15> %b to <3 x i32> 1552; VI-NEXT: %[[CMP:[0-9]+]] = icmp sge <3 x i32> %[[A_32_0]], %[[B_32_0]] 1553; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i15> %a to <3 x i32> 1554; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i15> %b to <3 x i32> 1555; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] 1556; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15> 1557; VI-NEXT: store volatile <3 x i15> %[[SEL_15]] 1558define amdgpu_kernel void @select_sge_3xi15(<3 x i15> %a, <3 x i15> %b) { 1559 %cmp = icmp sge <3 x i15> %a, %b 1560 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 1561 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef 1562 ret void 1563} 1564 1565; GCN-LABEL: @select_slt_3xi15( 1566; SI: %cmp = icmp slt <3 x i15> %a, %b 1567; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 1568; SI-NEXT: store volatile <3 x i15> %sel 1569; VI: %[[A_32_0:[0-9]+]] = sext <3 x i15> %a to <3 x i32> 1570; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i15> %b to <3 x i32> 1571; VI-NEXT: %[[CMP:[0-9]+]] = icmp slt <3 x i32> %[[A_32_0]], %[[B_32_0]] 1572; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i15> %a to <3 x i32> 1573; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i15> %b to <3 x i32> 1574; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] 1575; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15> 1576; VI-NEXT: store volatile <3 x i15> %[[SEL_15]] 1577define amdgpu_kernel void @select_slt_3xi15(<3 x i15> %a, <3 x i15> %b) { 1578 %cmp = icmp slt <3 x i15> %a, %b 1579 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 1580 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef 1581 ret void 1582} 1583 1584; GCN-LABEL: @select_sle_3xi15( 1585; SI: %cmp = icmp sle <3 x i15> %a, %b 1586; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 1587; SI-NEXT: store volatile <3 x i15> %sel 1588; VI: %[[A_32_0:[0-9]+]] = sext <3 x i15> %a to <3 x i32> 1589; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i15> %b to <3 x i32> 1590; VI-NEXT: %[[CMP:[0-9]+]] = icmp sle <3 x i32> %[[A_32_0]], %[[B_32_0]] 1591; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i15> %a to <3 x i32> 1592; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i15> %b to <3 x i32> 1593; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] 1594; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15> 1595; VI-NEXT: store volatile <3 x i15> %[[SEL_15]] 1596define amdgpu_kernel void @select_sle_3xi15(<3 x i15> %a, <3 x i15> %b) { 1597 %cmp = icmp sle <3 x i15> %a, %b 1598 %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b 1599 store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef 1600 ret void 1601} 1602 1603declare <3 x i15> @llvm.bitreverse.v3i15(<3 x i15>) 1604; GCN-LABEL: @bitreverse_3xi15( 1605; SI: %brev = call <3 x i15> @llvm.bitreverse.v3i15(<3 x i15> %a) 1606; SI-NEXT: store volatile <3 x i15> %brev 1607; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> 1608; VI-NEXT: %[[R_32:[0-9]+]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> %[[A_32]]) 1609; VI-NEXT: %[[S_32:[0-9]+]] = lshr <3 x i32> %[[R_32]], <i32 17, i32 17, i32 17> 1610; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[S_32]] to <3 x i15> 1611; VI-NEXT: store volatile <3 x i15> %[[R_15]] 1612define amdgpu_kernel void @bitreverse_3xi15(<3 x i15> %a) { 1613 %brev = call <3 x i15> @llvm.bitreverse.v3i15(<3 x i15> %a) 1614 store volatile <3 x i15> %brev, <3 x i15> addrspace(1)* undef 1615 ret void 1616} 1617 1618; GCN-LABEL: @add_3xi16( 1619; SI: %r = add <3 x i16> %a, %b 1620; SI-NEXT: store volatile <3 x i16> %r 1621; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1622; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1623; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]] 1624; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> 1625; VI-NEXT: store volatile <3 x i16> %[[R_16]] 1626define amdgpu_kernel void @add_3xi16(<3 x i16> %a, <3 x i16> %b) { 1627 %r = add <3 x i16> %a, %b 1628 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 1629 ret void 1630} 1631 1632; GCN-LABEL: @add_nsw_3xi16( 1633; SI: %r = add nsw <3 x i16> %a, %b 1634; SI-NEXT: store volatile <3 x i16> %r 1635; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1636; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1637; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]] 1638; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> 1639; VI-NEXT: store volatile <3 x i16> %[[R_16]] 1640define amdgpu_kernel void @add_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { 1641 %r = add nsw <3 x i16> %a, %b 1642 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 1643 ret void 1644} 1645 1646; GCN-LABEL: @add_nuw_3xi16( 1647; SI: %r = add nuw <3 x i16> %a, %b 1648; SI-NEXT: store volatile <3 x i16> %r 1649; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1650; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1651; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]] 1652; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> 1653; VI-NEXT: store volatile <3 x i16> %[[R_16]] 1654define amdgpu_kernel void @add_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) { 1655 %r = add nuw <3 x i16> %a, %b 1656 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 1657 ret void 1658} 1659 1660; GCN-LABEL: @add_nuw_nsw_3xi16( 1661; SI: %r = add nuw nsw <3 x i16> %a, %b 1662; SI-NEXT: store volatile <3 x i16> %r 1663; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1664; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1665; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]] 1666; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> 1667; VI-NEXT: store volatile <3 x i16> %[[R_16]] 1668define amdgpu_kernel void @add_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { 1669 %r = add nuw nsw <3 x i16> %a, %b 1670 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 1671 ret void 1672} 1673 1674; GCN-LABEL: @sub_3xi16( 1675; SI: %r = sub <3 x i16> %a, %b 1676; SI-NEXT: store volatile <3 x i16> %r 1677; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1678; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1679; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]] 1680; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> 1681; VI-NEXT: store volatile <3 x i16> %[[R_16]] 1682define amdgpu_kernel void @sub_3xi16(<3 x i16> %a, <3 x i16> %b) { 1683 %r = sub <3 x i16> %a, %b 1684 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 1685 ret void 1686} 1687 1688; GCN-LABEL: @sub_nsw_3xi16( 1689; SI: %r = sub nsw <3 x i16> %a, %b 1690; SI-NEXT: store volatile <3 x i16> %r 1691; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1692; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1693; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]] 1694; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> 1695; VI-NEXT: store volatile <3 x i16> %[[R_16]] 1696define amdgpu_kernel void @sub_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { 1697 %r = sub nsw <3 x i16> %a, %b 1698 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 1699 ret void 1700} 1701 1702; GCN-LABEL: @sub_nuw_3xi16( 1703; SI: %r = sub nuw <3 x i16> %a, %b 1704; SI-NEXT: store volatile <3 x i16> %r 1705; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1706; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1707; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]] 1708; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> 1709; VI-NEXT: store volatile <3 x i16> %[[R_16]] 1710define amdgpu_kernel void @sub_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) { 1711 %r = sub nuw <3 x i16> %a, %b 1712 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 1713 ret void 1714} 1715 1716; GCN-LABEL: @sub_nuw_nsw_3xi16( 1717; SI: %r = sub nuw nsw <3 x i16> %a, %b 1718; SI-NEXT: store volatile <3 x i16> %r 1719; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1720; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1721; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]] 1722; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> 1723; VI-NEXT: store volatile <3 x i16> %[[R_16]] 1724define amdgpu_kernel void @sub_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { 1725 %r = sub nuw nsw <3 x i16> %a, %b 1726 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 1727 ret void 1728} 1729 1730; GCN-LABEL: @mul_3xi16( 1731; SI: %r = mul <3 x i16> %a, %b 1732; SI-NEXT: store volatile <3 x i16> %r 1733; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1734; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1735; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]] 1736; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> 1737; VI-NEXT: store volatile <3 x i16> %[[R_16]] 1738define amdgpu_kernel void @mul_3xi16(<3 x i16> %a, <3 x i16> %b) { 1739 %r = mul <3 x i16> %a, %b 1740 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 1741 ret void 1742} 1743 1744; GCN-LABEL: @mul_nsw_3xi16( 1745; SI: %r = mul nsw <3 x i16> %a, %b 1746; SI-NEXT: store volatile <3 x i16> %r 1747; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1748; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1749; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]] 1750; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> 1751; VI-NEXT: store volatile <3 x i16> %[[R_16]] 1752define amdgpu_kernel void @mul_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { 1753 %r = mul nsw <3 x i16> %a, %b 1754 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 1755 ret void 1756} 1757 1758; GCN-LABEL: @mul_nuw_3xi16( 1759; SI: %r = mul nuw <3 x i16> %a, %b 1760; SI-NEXT: store volatile <3 x i16> %r 1761; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1762; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1763; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]] 1764; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> 1765; VI-NEXT: store volatile <3 x i16> %[[R_16]] 1766define amdgpu_kernel void @mul_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) { 1767 %r = mul nuw <3 x i16> %a, %b 1768 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 1769 ret void 1770} 1771 1772; GCN-LABEL: @mul_nuw_nsw_3xi16( 1773; SI: %r = mul nuw nsw <3 x i16> %a, %b 1774; SI-NEXT: store volatile <3 x i16> %r 1775; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1776; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1777; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]] 1778; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> 1779; VI-NEXT: store volatile <3 x i16> %[[R_16]] 1780define amdgpu_kernel void @mul_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { 1781 %r = mul nuw nsw <3 x i16> %a, %b 1782 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 1783 ret void 1784} 1785 1786; GCN-LABEL: @shl_3xi16( 1787; SI: %r = shl <3 x i16> %a, %b 1788; SI-NEXT: store volatile <3 x i16> %r 1789; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1790; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1791; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]] 1792; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> 1793; VI-NEXT: store volatile <3 x i16> %[[R_16]] 1794define amdgpu_kernel void @shl_3xi16(<3 x i16> %a, <3 x i16> %b) { 1795 %r = shl <3 x i16> %a, %b 1796 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 1797 ret void 1798} 1799 1800; GCN-LABEL: @shl_nsw_3xi16( 1801; SI: %r = shl nsw <3 x i16> %a, %b 1802; SI-NEXT: store volatile <3 x i16> %r 1803; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1804; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1805; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]] 1806; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> 1807; VI-NEXT: store volatile <3 x i16> %[[R_16]] 1808define amdgpu_kernel void @shl_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { 1809 %r = shl nsw <3 x i16> %a, %b 1810 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 1811 ret void 1812} 1813 1814; GCN-LABEL: @shl_nuw_3xi16( 1815; SI: %r = shl nuw <3 x i16> %a, %b 1816; SI-NEXT: store volatile <3 x i16> %r 1817; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1818; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1819; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]] 1820; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> 1821; VI-NEXT: store volatile <3 x i16> %[[R_16]] 1822define amdgpu_kernel void @shl_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) { 1823 %r = shl nuw <3 x i16> %a, %b 1824 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 1825 ret void 1826} 1827 1828; GCN-LABEL: @shl_nuw_nsw_3xi16( 1829; SI: %r = shl nuw nsw <3 x i16> %a, %b 1830; SI-NEXT: store volatile <3 x i16> %r 1831; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1832; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1833; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]] 1834; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> 1835; VI-NEXT: store volatile <3 x i16> %[[R_16]] 1836define amdgpu_kernel void @shl_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { 1837 %r = shl nuw nsw <3 x i16> %a, %b 1838 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 1839 ret void 1840} 1841 1842; GCN-LABEL: @lshr_3xi16( 1843; SI: %r = lshr <3 x i16> %a, %b 1844; SI-NEXT: store volatile <3 x i16> %r 1845; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1846; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1847; VI-NEXT: %[[R_32:[0-9]+]] = lshr <3 x i32> %[[A_32]], %[[B_32]] 1848; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> 1849; VI-NEXT: store volatile <3 x i16> %[[R_16]] 1850define amdgpu_kernel void @lshr_3xi16(<3 x i16> %a, <3 x i16> %b) { 1851 %r = lshr <3 x i16> %a, %b 1852 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 1853 ret void 1854} 1855 1856; GCN-LABEL: @lshr_exact_3xi16( 1857; SI: %r = lshr exact <3 x i16> %a, %b 1858; SI-NEXT: store volatile <3 x i16> %r 1859; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1860; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1861; VI-NEXT: %[[R_32:[0-9]+]] = lshr exact <3 x i32> %[[A_32]], %[[B_32]] 1862; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> 1863; VI-NEXT: store volatile <3 x i16> %[[R_16]] 1864define amdgpu_kernel void @lshr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) { 1865 %r = lshr exact <3 x i16> %a, %b 1866 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 1867 ret void 1868} 1869 1870; GCN-LABEL: @ashr_3xi16( 1871; SI: %r = ashr <3 x i16> %a, %b 1872; SI-NEXT: store volatile <3 x i16> %r 1873; VI: %[[A_32:[0-9]+]] = sext <3 x i16> %a to <3 x i32> 1874; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32> 1875; VI-NEXT: %[[R_32:[0-9]+]] = ashr <3 x i32> %[[A_32]], %[[B_32]] 1876; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> 1877; VI-NEXT: store volatile <3 x i16> %[[R_16]] 1878define amdgpu_kernel void @ashr_3xi16(<3 x i16> %a, <3 x i16> %b) { 1879 %r = ashr <3 x i16> %a, %b 1880 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 1881 ret void 1882} 1883 1884; GCN-LABEL: @ashr_exact_3xi16( 1885; SI: %r = ashr exact <3 x i16> %a, %b 1886; SI-NEXT: store volatile <3 x i16> %r 1887; VI: %[[A_32:[0-9]+]] = sext <3 x i16> %a to <3 x i32> 1888; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32> 1889; VI-NEXT: %[[R_32:[0-9]+]] = ashr exact <3 x i32> %[[A_32]], %[[B_32]] 1890; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> 1891; VI-NEXT: store volatile <3 x i16> %[[R_16]] 1892define amdgpu_kernel void @ashr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) { 1893 %r = ashr exact <3 x i16> %a, %b 1894 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 1895 ret void 1896} 1897 1898; GCN-LABEL: @and_3xi16( 1899; SI: %r = and <3 x i16> %a, %b 1900; SI-NEXT: store volatile <3 x i16> %r 1901; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1902; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1903; VI-NEXT: %[[R_32:[0-9]+]] = and <3 x i32> %[[A_32]], %[[B_32]] 1904; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> 1905; VI-NEXT: store volatile <3 x i16> %[[R_16]] 1906define amdgpu_kernel void @and_3xi16(<3 x i16> %a, <3 x i16> %b) { 1907 %r = and <3 x i16> %a, %b 1908 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 1909 ret void 1910} 1911 1912; GCN-LABEL: @or_3xi16( 1913; SI: %r = or <3 x i16> %a, %b 1914; SI-NEXT: store volatile <3 x i16> %r 1915; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1916; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1917; VI-NEXT: %[[R_32:[0-9]+]] = or <3 x i32> %[[A_32]], %[[B_32]] 1918; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> 1919; VI-NEXT: store volatile <3 x i16> %[[R_16]] 1920define amdgpu_kernel void @or_3xi16(<3 x i16> %a, <3 x i16> %b) { 1921 %r = or <3 x i16> %a, %b 1922 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 1923 ret void 1924} 1925 1926; GCN-LABEL: @xor_3xi16( 1927; SI: %r = xor <3 x i16> %a, %b 1928; SI-NEXT: store volatile <3 x i16> %r 1929; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1930; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1931; VI-NEXT: %[[R_32:[0-9]+]] = xor <3 x i32> %[[A_32]], %[[B_32]] 1932; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> 1933; VI-NEXT: store volatile <3 x i16> %[[R_16]] 1934define amdgpu_kernel void @xor_3xi16(<3 x i16> %a, <3 x i16> %b) { 1935 %r = xor <3 x i16> %a, %b 1936 store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef 1937 ret void 1938} 1939 1940; GCN-LABEL: @select_eq_3xi16( 1941; SI: %cmp = icmp eq <3 x i16> %a, %b 1942; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 1943; SI-NEXT: store volatile <3 x i16> %sel 1944; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1945; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1946; VI-NEXT: %[[CMP:[0-9]+]] = icmp eq <3 x i32> %[[A_32_0]], %[[B_32_0]] 1947; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1948; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1949; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] 1950; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> 1951; VI-NEXT: store volatile <3 x i16> %[[SEL_16]] 1952define amdgpu_kernel void @select_eq_3xi16(<3 x i16> %a, <3 x i16> %b) { 1953 %cmp = icmp eq <3 x i16> %a, %b 1954 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 1955 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef 1956 ret void 1957} 1958 1959; GCN-LABEL: @select_ne_3xi16( 1960; SI: %cmp = icmp ne <3 x i16> %a, %b 1961; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 1962; SI-NEXT: store volatile <3 x i16> %sel 1963; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1964; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1965; VI-NEXT: %[[CMP:[0-9]+]] = icmp ne <3 x i32> %[[A_32_0]], %[[B_32_0]] 1966; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1967; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1968; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] 1969; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> 1970; VI-NEXT: store volatile <3 x i16> %[[SEL_16]] 1971define amdgpu_kernel void @select_ne_3xi16(<3 x i16> %a, <3 x i16> %b) { 1972 %cmp = icmp ne <3 x i16> %a, %b 1973 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 1974 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef 1975 ret void 1976} 1977 1978; GCN-LABEL: @select_ugt_3xi16( 1979; SI: %cmp = icmp ugt <3 x i16> %a, %b 1980; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 1981; SI-NEXT: store volatile <3 x i16> %sel 1982; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1983; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1984; VI-NEXT: %[[CMP:[0-9]+]] = icmp ugt <3 x i32> %[[A_32_0]], %[[B_32_0]] 1985; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 1986; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 1987; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] 1988; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> 1989; VI-NEXT: store volatile <3 x i16> %[[SEL_16]] 1990define amdgpu_kernel void @select_ugt_3xi16(<3 x i16> %a, <3 x i16> %b) { 1991 %cmp = icmp ugt <3 x i16> %a, %b 1992 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 1993 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef 1994 ret void 1995} 1996 1997; GCN-LABEL: @select_uge_3xi16( 1998; SI: %cmp = icmp uge <3 x i16> %a, %b 1999; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 2000; SI-NEXT: store volatile <3 x i16> %sel 2001; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 2002; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 2003; VI-NEXT: %[[CMP:[0-9]+]] = icmp uge <3 x i32> %[[A_32_0]], %[[B_32_0]] 2004; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 2005; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 2006; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] 2007; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> 2008; VI-NEXT: store volatile <3 x i16> %[[SEL_16]] 2009define amdgpu_kernel void @select_uge_3xi16(<3 x i16> %a, <3 x i16> %b) { 2010 %cmp = icmp uge <3 x i16> %a, %b 2011 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 2012 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef 2013 ret void 2014} 2015 2016; GCN-LABEL: @select_ult_3xi16( 2017; SI: %cmp = icmp ult <3 x i16> %a, %b 2018; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 2019; SI-NEXT: store volatile <3 x i16> %sel 2020; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 2021; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 2022; VI-NEXT: %[[CMP:[0-9]+]] = icmp ult <3 x i32> %[[A_32_0]], %[[B_32_0]] 2023; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 2024; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 2025; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] 2026; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> 2027; VI-NEXT: store volatile <3 x i16> %[[SEL_16]] 2028define amdgpu_kernel void @select_ult_3xi16(<3 x i16> %a, <3 x i16> %b) { 2029 %cmp = icmp ult <3 x i16> %a, %b 2030 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 2031 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef 2032 ret void 2033} 2034 2035; GCN-LABEL: @select_ule_3xi16( 2036; SI: %cmp = icmp ule <3 x i16> %a, %b 2037; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 2038; SI-NEXT: store volatile <3 x i16> %sel 2039; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 2040; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 2041; VI-NEXT: %[[CMP:[0-9]+]] = icmp ule <3 x i32> %[[A_32_0]], %[[B_32_0]] 2042; VI-NEXT: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 2043; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> 2044; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] 2045; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> 2046; VI-NEXT: store volatile <3 x i16> %[[SEL_16]] 2047define amdgpu_kernel void @select_ule_3xi16(<3 x i16> %a, <3 x i16> %b) { 2048 %cmp = icmp ule <3 x i16> %a, %b 2049 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 2050 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef 2051 ret void 2052} 2053 2054; GCN-LABEL: @select_sgt_3xi16( 2055; SI: %cmp = icmp sgt <3 x i16> %a, %b 2056; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 2057; SI-NEXT: store volatile <3 x i16> %sel 2058; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32> 2059; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32> 2060; VI-NEXT: %[[CMP:[0-9]+]] = icmp sgt <3 x i32> %[[A_32_0]], %[[B_32_0]] 2061; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32> 2062; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32> 2063; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] 2064; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> 2065; VI-NEXT: store volatile <3 x i16> %[[SEL_16]] 2066define amdgpu_kernel void @select_sgt_3xi16(<3 x i16> %a, <3 x i16> %b) { 2067 %cmp = icmp sgt <3 x i16> %a, %b 2068 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 2069 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef 2070 ret void 2071} 2072 2073; GCN-LABEL: @select_sge_3xi16( 2074; SI: %cmp = icmp sge <3 x i16> %a, %b 2075; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 2076; SI-NEXT: store volatile <3 x i16> %sel 2077; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32> 2078; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32> 2079; VI-NEXT: %[[CMP:[0-9]+]] = icmp sge <3 x i32> %[[A_32_0]], %[[B_32_0]] 2080; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32> 2081; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32> 2082; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] 2083; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> 2084; VI-NEXT: store volatile <3 x i16> %[[SEL_16]] 2085define amdgpu_kernel void @select_sge_3xi16(<3 x i16> %a, <3 x i16> %b) { 2086 %cmp = icmp sge <3 x i16> %a, %b 2087 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 2088 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef 2089 ret void 2090} 2091 2092; GCN-LABEL: @select_slt_3xi16( 2093; SI: %cmp = icmp slt <3 x i16> %a, %b 2094; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 2095; SI-NEXT: store volatile <3 x i16> %sel 2096; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32> 2097; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32> 2098; VI-NEXT: %[[CMP:[0-9]+]] = icmp slt <3 x i32> %[[A_32_0]], %[[B_32_0]] 2099; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32> 2100; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32> 2101; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] 2102; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> 2103; VI-NEXT: store volatile <3 x i16> %[[SEL_16]] 2104define amdgpu_kernel void @select_slt_3xi16(<3 x i16> %a, <3 x i16> %b) { 2105 %cmp = icmp slt <3 x i16> %a, %b 2106 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 2107 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef 2108 ret void 2109} 2110 2111; GCN-LABEL: @select_sle_3xi16( 2112; SI: %cmp = icmp sle <3 x i16> %a, %b 2113; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 2114; SI-NEXT: store volatile <3 x i16> %sel 2115; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32> 2116; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32> 2117; VI-NEXT: %[[CMP:[0-9]+]] = icmp sle <3 x i32> %[[A_32_0]], %[[B_32_0]] 2118; VI-NEXT: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32> 2119; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32> 2120; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] 2121; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> 2122; VI-NEXT: store volatile <3 x i16> %[[SEL_16]] 2123define amdgpu_kernel void @select_sle_3xi16(<3 x i16> %a, <3 x i16> %b) { 2124 %cmp = icmp sle <3 x i16> %a, %b 2125 %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b 2126 store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef 2127 ret void 2128} 2129 2130declare <3 x i16> @llvm.bitreverse.v3i16(<3 x i16>) 2131 2132; GCN-LABEL: @bitreverse_3xi16( 2133; SI: %brev = call <3 x i16> @llvm.bitreverse.v3i16(<3 x i16> %a) 2134; SI-NEXT: store volatile <3 x i16> %brev 2135; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> 2136; VI-NEXT: %[[R_32:[0-9]+]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> %[[A_32]]) 2137; VI-NEXT: %[[S_32:[0-9]+]] = lshr <3 x i32> %[[R_32]], <i32 16, i32 16, i32 16> 2138; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[S_32]] to <3 x i16> 2139; VI-NEXT: store volatile <3 x i16> %[[R_16]] 2140define amdgpu_kernel void @bitreverse_3xi16(<3 x i16> %a) { 2141 %brev = call <3 x i16> @llvm.bitreverse.v3i16(<3 x i16> %a) 2142 store volatile <3 x i16> %brev, <3 x i16> addrspace(1)* undef 2143 ret void 2144} 2145