1; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2 2; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41 3; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX 4; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2 5; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX 6; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2 7 8; Verify the cost of vector arithmetic shift right instructions. 9 10; 11; Variable Shifts 12; 13 14define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) { 15; CHECK: 'Cost Model Analysis' for function 'var_shift_v2i64': 16; SSE2: Found an estimated cost of 12 for instruction: %shift 17; SSE41: Found an estimated cost of 12 for instruction: %shift 18; AVX: Found an estimated cost of 12 for instruction: %shift 19; AVX2: Found an estimated cost of 4 for instruction: %shift 20; XOP: Found an estimated cost of 2 for instruction: %shift 21 %shift = ashr <2 x i64> %a, %b 22 ret <2 x i64> %shift 23} 24 25define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { 26; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64': 27; SSE2: Found an estimated cost of 24 for instruction: %shift 28; SSE41: Found an estimated cost of 24 for instruction: %shift 29; AVX: Found an estimated cost of 24 for instruction: %shift 30; AVX2: Found an estimated cost of 4 for instruction: %shift 31; XOP: Found an estimated cost of 4 for instruction: %shift 32 %shift = ashr <4 x i64> %a, %b 33 ret <4 x i64> %shift 34} 35 36define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { 37; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32': 38; SSE2: Found an estimated cost of 16 for instruction: %shift 39; SSE41: Found an estimated cost of 16 for instruction: %shift 40; AVX: Found an estimated cost of 16 for instruction: %shift 41; AVX2: Found an estimated cost of 1 for instruction: %shift 42; XOPAVX: Found an estimated cost of 2 for instruction: %shift 43; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 44 %shift = ashr <4 x i32> %a, %b 45 ret <4 x i32> %shift 46} 47 48define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { 49; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32': 50; SSE2: Found an estimated cost of 32 for instruction: %shift 51; SSE41: Found an estimated cost of 32 for instruction: %shift 52; AVX: Found an estimated cost of 32 for instruction: %shift 53; AVX2: Found an estimated cost of 1 for instruction: %shift 54; XOPAVX: Found an estimated cost of 4 for instruction: %shift 55; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 56 %shift = ashr <8 x i32> %a, %b 57 ret <8 x i32> %shift 58} 59 60define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { 61; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16': 62; SSE2: Found an estimated cost of 32 for instruction: %shift 63; SSE41: Found an estimated cost of 32 for instruction: %shift 64; AVX: Found an estimated cost of 32 for instruction: %shift 65; AVX2: Found an estimated cost of 32 for instruction: %shift 66; XOP: Found an estimated cost of 2 for instruction: %shift 67 %shift = ashr <8 x i16> %a, %b 68 ret <8 x i16> %shift 69} 70 71define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { 72; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16': 73; SSE2: Found an estimated cost of 64 for instruction: %shift 74; SSE41: Found an estimated cost of 64 for instruction: %shift 75; AVX: Found an estimated cost of 64 for instruction: %shift 76; AVX2: Found an estimated cost of 10 for instruction: %shift 77; XOP: Found an estimated cost of 4 for instruction: %shift 78 %shift = ashr <16 x i16> %a, %b 79 ret <16 x i16> %shift 80} 81 82define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { 83; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8': 84; SSE2: Found an estimated cost of 54 for instruction: %shift 85; SSE41: Found an estimated cost of 54 for instruction: %shift 86; AVX: Found an estimated cost of 54 for instruction: %shift 87; AVX2: Found an estimated cost of 54 for instruction: %shift 88; XOP: Found an estimated cost of 2 for instruction: %shift 89 %shift = ashr <16 x i8> %a, %b 90 ret <16 x i8> %shift 91} 92 93define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { 94; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8': 95; SSE2: Found an estimated cost of 108 for instruction: %shift 96; SSE41: Found an estimated cost of 108 for instruction: %shift 97; AVX: Found an estimated cost of 108 for instruction: %shift 98; AVX2: Found an estimated cost of 24 for instruction: %shift 99; XOP: Found an estimated cost of 4 for instruction: %shift 100 %shift = ashr <32 x i8> %a, %b 101 ret <32 x i8> %shift 102} 103 104; 105; Uniform Variable Shifts 106; 107 108define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) { 109; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v2i64': 110; SSE2: Found an estimated cost of 12 for instruction: %shift 111; SSE41: Found an estimated cost of 12 for instruction: %shift 112; AVX: Found an estimated cost of 12 for instruction: %shift 113; AVX2: Found an estimated cost of 4 for instruction: %shift 114; XOP: Found an estimated cost of 2 for instruction: %shift 115 %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer 116 %shift = ashr <2 x i64> %a, %splat 117 ret <2 x i64> %shift 118} 119 120define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { 121; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64': 122; SSE2: Found an estimated cost of 24 for instruction: %shift 123; SSE41: Found an estimated cost of 24 for instruction: %shift 124; AVX: Found an estimated cost of 24 for instruction: %shift 125; AVX2: Found an estimated cost of 4 for instruction: %shift 126; XOP: Found an estimated cost of 4 for instruction: %shift 127 %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer 128 %shift = ashr <4 x i64> %a, %splat 129 ret <4 x i64> %shift 130} 131 132define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { 133; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32': 134; SSE2: Found an estimated cost of 16 for instruction: %shift 135; SSE41: Found an estimated cost of 16 for instruction: %shift 136; AVX: Found an estimated cost of 16 for instruction: %shift 137; AVX2: Found an estimated cost of 1 for instruction: %shift 138; XOPAVX: Found an estimated cost of 2 for instruction: %shift 139; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 140 %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer 141 %shift = ashr <4 x i32> %a, %splat 142 ret <4 x i32> %shift 143} 144 145define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { 146; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32': 147; SSE2: Found an estimated cost of 32 for instruction: %shift 148; SSE41: Found an estimated cost of 32 for instruction: %shift 149; AVX: Found an estimated cost of 32 for instruction: %shift 150; AVX2: Found an estimated cost of 1 for instruction: %shift 151; XOPAVX: Found an estimated cost of 4 for instruction: %shift 152; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 153 %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer 154 %shift = ashr <8 x i32> %a, %splat 155 ret <8 x i32> %shift 156} 157 158define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { 159; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16': 160; SSE2: Found an estimated cost of 32 for instruction: %shift 161; SSE41: Found an estimated cost of 32 for instruction: %shift 162; AVX: Found an estimated cost of 32 for instruction: %shift 163; AVX2: Found an estimated cost of 32 for instruction: %shift 164; XOP: Found an estimated cost of 2 for instruction: %shift 165 %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer 166 %shift = ashr <8 x i16> %a, %splat 167 ret <8 x i16> %shift 168} 169 170define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { 171; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16': 172; SSE2: Found an estimated cost of 64 for instruction: %shift 173; SSE41: Found an estimated cost of 64 for instruction: %shift 174; AVX: Found an estimated cost of 64 for instruction: %shift 175; AVX2: Found an estimated cost of 10 for instruction: %shift 176; XOP: Found an estimated cost of 4 for instruction: %shift 177 %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer 178 %shift = ashr <16 x i16> %a, %splat 179 ret <16 x i16> %shift 180} 181 182define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { 183; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8': 184; SSE2: Found an estimated cost of 54 for instruction: %shift 185; SSE41: Found an estimated cost of 54 for instruction: %shift 186; AVX: Found an estimated cost of 54 for instruction: %shift 187; AVX2: Found an estimated cost of 54 for instruction: %shift 188; XOP: Found an estimated cost of 2 for instruction: %shift 189 %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer 190 %shift = ashr <16 x i8> %a, %splat 191 ret <16 x i8> %shift 192} 193 194define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { 195; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8': 196; SSE2: Found an estimated cost of 108 for instruction: %shift 197; SSE41: Found an estimated cost of 108 for instruction: %shift 198; AVX: Found an estimated cost of 108 for instruction: %shift 199; AVX2: Found an estimated cost of 24 for instruction: %shift 200; XOP: Found an estimated cost of 4 for instruction: %shift 201 %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer 202 %shift = ashr <32 x i8> %a, %splat 203 ret <32 x i8> %shift 204} 205 206; 207; Constant Shifts 208; 209 210define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) { 211; CHECK: 'Cost Model Analysis' for function 'constant_shift_v2i64': 212; SSE2: Found an estimated cost of 12 for instruction: %shift 213; SSE41: Found an estimated cost of 12 for instruction: %shift 214; AVX: Found an estimated cost of 12 for instruction: %shift 215; AVX2: Found an estimated cost of 4 for instruction: %shift 216; XOP: Found an estimated cost of 2 for instruction: %shift 217 %shift = ashr <2 x i64> %a, <i64 1, i64 7> 218 ret <2 x i64> %shift 219} 220 221define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) { 222; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64': 223; SSE2: Found an estimated cost of 24 for instruction: %shift 224; SSE41: Found an estimated cost of 24 for instruction: %shift 225; AVX: Found an estimated cost of 24 for instruction: %shift 226; AVX2: Found an estimated cost of 4 for instruction: %shift 227; XOP: Found an estimated cost of 4 for instruction: %shift 228 %shift = ashr <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31> 229 ret <4 x i64> %shift 230} 231 232define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) { 233; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32': 234; SSE2: Found an estimated cost of 16 for instruction: %shift 235; SSE41: Found an estimated cost of 16 for instruction: %shift 236; AVX: Found an estimated cost of 16 for instruction: %shift 237; AVX2: Found an estimated cost of 1 for instruction: %shift 238; XOPAVX: Found an estimated cost of 2 for instruction: %shift 239; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 240 %shift = ashr <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7> 241 ret <4 x i32> %shift 242} 243 244define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) { 245; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32': 246; SSE2: Found an estimated cost of 32 for instruction: %shift 247; SSE41: Found an estimated cost of 32 for instruction: %shift 248; AVX: Found an estimated cost of 32 for instruction: %shift 249; AVX2: Found an estimated cost of 1 for instruction: %shift 250; XOPAVX: Found an estimated cost of 4 for instruction: %shift 251; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 252 %shift = ashr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> 253 ret <8 x i32> %shift 254} 255 256define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) { 257; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16': 258; SSE2: Found an estimated cost of 32 for instruction: %shift 259; SSE41: Found an estimated cost of 32 for instruction: %shift 260; AVX: Found an estimated cost of 32 for instruction: %shift 261; AVX2: Found an estimated cost of 32 for instruction: %shift 262; XOP: Found an estimated cost of 2 for instruction: %shift 263 %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7> 264 ret <8 x i16> %shift 265} 266 267define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { 268; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16': 269; SSE2: Found an estimated cost of 64 for instruction: %shift 270; SSE41: Found an estimated cost of 64 for instruction: %shift 271; AVX: Found an estimated cost of 64 for instruction: %shift 272; AVX2: Found an estimated cost of 10 for instruction: %shift 273; XOP: Found an estimated cost of 4 for instruction: %shift 274 %shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7> 275 ret <16 x i16> %shift 276} 277 278define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) { 279; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8': 280; SSE2: Found an estimated cost of 54 for instruction: %shift 281; SSE41: Found an estimated cost of 54 for instruction: %shift 282; AVX: Found an estimated cost of 54 for instruction: %shift 283; AVX2: Found an estimated cost of 54 for instruction: %shift 284; XOP: Found an estimated cost of 2 for instruction: %shift 285 %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0> 286 ret <16 x i8> %shift 287} 288 289define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { 290; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8': 291; SSE2: Found an estimated cost of 108 for instruction: %shift 292; SSE41: Found an estimated cost of 108 for instruction: %shift 293; AVX: Found an estimated cost of 108 for instruction: %shift 294; AVX2: Found an estimated cost of 24 for instruction: %shift 295; XOP: Found an estimated cost of 4 for instruction: %shift 296 %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0> 297 ret <32 x i8> %shift 298} 299 300; 301; Uniform Constant Shifts 302; 303 304define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { 305; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v2i64': 306; SSE2: Found an estimated cost of 4 for instruction: %shift 307; SSE41: Found an estimated cost of 4 for instruction: %shift 308; AVX: Found an estimated cost of 4 for instruction: %shift 309; AVX2: Found an estimated cost of 4 for instruction: %shift 310; XOP: Found an estimated cost of 2 for instruction: %shift 311 %shift = ashr <2 x i64> %a, <i64 7, i64 7> 312 ret <2 x i64> %shift 313} 314 315define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { 316; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64': 317; SSE2: Found an estimated cost of 8 for instruction: %shift 318; SSE41: Found an estimated cost of 8 for instruction: %shift 319; AVX: Found an estimated cost of 8 for instruction: %shift 320; AVX2: Found an estimated cost of 4 for instruction: %shift 321; XOP: Found an estimated cost of 4 for instruction: %shift 322 %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7> 323 ret <4 x i64> %shift 324} 325 326define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { 327; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32': 328; SSE2: Found an estimated cost of 1 for instruction: %shift 329; SSE41: Found an estimated cost of 1 for instruction: %shift 330; AVX: Found an estimated cost of 1 for instruction: %shift 331; AVX2: Found an estimated cost of 1 for instruction: %shift 332; XOPAVX: Found an estimated cost of 2 for instruction: %shift 333; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 334 %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5> 335 ret <4 x i32> %shift 336} 337 338define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { 339; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i32': 340; SSE2: Found an estimated cost of 2 for instruction: %shift 341; SSE41: Found an estimated cost of 2 for instruction: %shift 342; AVX: Found an estimated cost of 2 for instruction: %shift 343; AVX2: Found an estimated cost of 1 for instruction: %shift 344; XOPAVX: Found an estimated cost of 4 for instruction: %shift 345; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 346 %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 347 ret <8 x i32> %shift 348} 349 350define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { 351; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16': 352; SSE2: Found an estimated cost of 1 for instruction: %shift 353; SSE41: Found an estimated cost of 1 for instruction: %shift 354; AVX: Found an estimated cost of 1 for instruction: %shift 355; AVX2: Found an estimated cost of 1 for instruction: %shift 356; XOP: Found an estimated cost of 2 for instruction: %shift 357 %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 358 ret <8 x i16> %shift 359} 360 361define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { 362; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i16': 363; SSE2: Found an estimated cost of 2 for instruction: %shift 364; SSE41: Found an estimated cost of 2 for instruction: %shift 365; AVX: Found an estimated cost of 2 for instruction: %shift 366; AVX2: Found an estimated cost of 10 for instruction: %shift 367; XOP: Found an estimated cost of 4 for instruction: %shift 368 %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 369 ret <16 x i16> %shift 370} 371 372define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { 373; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8': 374; SSE2: Found an estimated cost of 4 for instruction: %shift 375; SSE41: Found an estimated cost of 4 for instruction: %shift 376; AVX: Found an estimated cost of 4 for instruction: %shift 377; AVX2: Found an estimated cost of 4 for instruction: %shift 378; XOP: Found an estimated cost of 2 for instruction: %shift 379 %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 380 ret <16 x i8> %shift 381} 382 383define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { 384; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8': 385; SSE2: Found an estimated cost of 8 for instruction: %shift 386; SSE41: Found an estimated cost of 8 for instruction: %shift 387; AVX: Found an estimated cost of 8 for instruction: %shift 388; AVX2: Found an estimated cost of 24 for instruction: %shift 389; XOP: Found an estimated cost of 4 for instruction: %shift 390 %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 391 ret <32 x i8> %shift 392} 393