1;RUN: llc -march=hexagon -mcpu=hexagonv66 -mhvx -filetype=obj < %s -o - | llvm-objdump --mv66 --mhvx -d - | FileCheck --check-prefix=CHECK-V66 %s 2;RUN: llc -march=hexagon -mcpu=hexagonv67 -mhvx -filetype=obj < %s -o - | llvm-objdump --mv67 --mhvx -d - | FileCheck --check-prefix=CHECK-V67 %s 3 4; Should not attempt to use v<even>:<odd> 'reverse' vector regpairs 5; on old or new arches (should not crash). 6 7; CHECK-V66: vcombine 8; CHECK-V67: vcombine 9declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) 10declare <16 x i32> @llvm.hexagon.V6.vd0() 11declare <32 x i32> @llvm.hexagon.V6.vmpybus(<16 x i32>, i32) 12declare <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32>, <32 x i32>, i32) 13declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>) 14declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32 ) 15declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>) 16declare <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32>, <16 x i32>, i32 ) 17declare <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32>, <16 x i32>) 18declare <16 x i32> @llvm.hexagon.V6.vmpyihb.acc(<16 x i32>, <16 x i32>, i32) 19declare <16 x i32> @llvm.hexagon.V6.vasrhubrndsat(<16 x i32>, <16 x i32>, i32) 20 21declare <32 x i32> @llvm.hexagon.V6.vaddubh(<16 x i32>, <16 x i32>) 22declare <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32>, <16 x i32>, i32) 23declare <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32>, <16 x i32>, i32) 24declare <16 x i32> @llvm.hexagon.V6.vshuffob(<16 x i32>, <16 x i32>) 25 26 27define void @Gaussian7x7u8PerRow(i8* %src, i32 %stride, i32 %width, i8* %dst) #0 { 28entry: 29 %mul = mul i32 %stride, 3 30 %idx.neg = sub i32 0, %mul 31 %add.ptr = getelementptr i8, i8* %src, i32 %idx.neg 32 bitcast i8* %add.ptr to <16 x i32>* 33 %mul1 = shl i32 %stride, 1 34 %idx.neg2 = sub i32 0, %mul1 35 %add.ptr3 = getelementptr i8, i8* %src, i32 %idx.neg2 36 bitcast i8* %add.ptr3 to <16 x i32>* 37 %idx.neg5 = sub i32 0, %stride 38 %add.ptr6 = getelementptr i8, i8* %src, i32 %idx.neg5 39 bitcast i8* %add.ptr6 to <16 x i32>* 40 bitcast i8* %src to <16 x i32>* 41 %add.ptr10 = getelementptr i8, i8* %src, i32 %stride 42 bitcast i8* %add.ptr10 to <16 x i32>* 43 %add.ptr12 = getelementptr i8, i8* %src, i32 %mul1 44 bitcast i8* %add.ptr12 to <16 x i32>* 45 %add.ptr14 = getelementptr i8, i8* %src, i32 %mul 46 bitcast i8* %add.ptr14 to <16 x i32>* 47 bitcast i8* %dst to <16 x i32>* 48 load <16 x i32>, <16 x i32>* %0load <16 x i32>, <16 x i32>* %1load <16 x i32>, <16 x i32>* %2load <16 x i32>, <16 x i32>* %3load <16 x i32>, <16 x i32>* %4load <16 x i32>, <16 x i32>* %5load <16 x i32>, <16 x i32>* %6call <16 x i32> @llvm.hexagon.V6.vd0() 49 call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %15, <16 x i32> %15) 50 call <32 x i32> @llvm.hexagon.V6.vaddubh(<16 x i32> %14, <16 x i32> %8) 51 call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %13, <16 x i32> %9) 52 call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %17, <32 x i32> %18, i32 101058054) 53 call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %12, <16 x i32> %10) 54 call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %19, <32 x i32> %20, i32 252645135) 55 call <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32> %21, <16 x i32> %11, i32 336860180) 56 %cmp155 = icmp sgt i32 %width, 64 57 br i1 %cmp155, label %for.body.preheader, label %for.end 58for.body.preheader: %incdec.ptr20 = getelementptr i8, i8* %add.ptr14%23 = bitcast i8* %incdec.ptr20 to <16 x i32>* 59 %incdec.ptr19 = getelementptr i8, i8* %add.ptr12%24 = bitcast i8* %incdec.ptr19 to <16 x i32>* 60 %incdec.ptr18 = getelementptr i8, i8* %add.ptr10%25 = bitcast i8* %incdec.ptr18 to <16 x i32>* 61 %incdec.ptr17 = getelementptr i8, i8* %src%26 = bitcast i8* %incdec.ptr17 to <16 x i32>* 62 %incdec.ptr16 = getelementptr i8, i8* %add.ptr6%27 = bitcast i8* %incdec.ptr16 to <16 x i32>* 63 %incdec.ptr15 = getelementptr i8, i8* %add.ptr3%28 = bitcast i8* %incdec.ptr15 to <16 x i32>* 64 %incdec.ptr = getelementptr i8, i8* %add.ptr%29 = bitcast i8* %incdec.ptr to <16 x i32>* 65 br label %for.body 66for.body: %optr.0166 = phi <16 x i32>* [ %incdec.ptr28, %for.body ], [ %7, %for.body.preheader ] 67 %iptr6.0165 = phi <16 x i32>* [ %incdec.ptr27, %for.body ], [ %23, %for.body.preheader ] 68 %iptr5.0164 = phi <16 x i32>* [ %incdec.ptr26, %for.body ], [ %24, %for.body.preheader ] 69 %iptr4.0163 = phi <16 x i32>* [ %incdec.ptr25, %for.body ], [ %25, %for.body.preheader ] 70 %iptr3.0162 = phi <16 x i32>* [ %incdec.ptr24, %for.body ], [ %26, %for.body.preheader ] 71 %iptr2.0161 = phi <16 x i32>* [ %incdec.ptr23, %for.body ], [ %27, %for.body.preheader ] 72 %iptr1.0160 = phi <16 x i32>* [ %incdec.ptr22, %for.body ], [ %28, %for.body.preheader ] 73 %iptr0.0159 = phi <16 x i32>* [ %incdec.ptr21, %for.body ], [ %29, %for.body.preheader ] 74 %dXV1.0158 = phi <32 x i32> [ %49, %for.body ], [ %22, %for.body.preheader ] 75 %dXV0.0157 = phi <32 x i32> [ %dXV1.0158, %for.body ], [ %16, %for.body.preheader ] 76 %i.0156 = phi i32 [ %sub, %for.body ], [ %width, %for.body.preheader ] 77 %incdec.ptr21 = getelementptr <16 x i32>, <16 x i32>* %iptr0.0159%30 = load <16 x i32>, <16 x i32>* %iptr0.0159%incdec.ptr22 = getelementptr <16 x i32>, <16 x i32>* %iptr1.0160%31 = load <16 x i32>, <16 x i32>* %iptr1.0160%incdec.ptr23 = getelementptr <16 x i32>, <16 x i32>* %iptr2.0161%32 = load <16 x i32>, <16 x i32>* %iptr2.0161%incdec.ptr24 = getelementptr <16 x i32>, <16 x i32>* %iptr3.0162%33 = load <16 x i32>, <16 x i32>* %iptr3.0162%incdec.ptr25 = getelementptr <16 x i32>, <16 x i32>* %iptr4.0163%34 = load <16 x i32>, <16 x i32>* %iptr4.0163%incdec.ptr26 = getelementptr <16 x i32>, <16 x i32>* %iptr5.0164%35 = load <16 x i32>, <16 x i32>* %iptr5.0164%incdec.ptr27 = getelementptr <16 x i32>, <16 x i32>* %iptr6.0165%36 = load <16 x i32>, <16 x i32>* %iptr6.0165, !tbaa !8 78 call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %dXV1.0158) 79 call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %dXV0.0157) 80 call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %37, <16 x i32> %38, i32 2) 81 call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %dXV1.0158) 82 call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %dXV0.0157) 83 call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %40, <16 x i32> %41, i32 2) 84 call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %37, <16 x i32> %38, i32 4) 85 call <32 x i32> @llvm.hexagon.V6.vaddubh(<16 x i32> %36, <16 x i32> %30) 86 call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %35, <16 x i32> %31) 87 call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %44, <32 x i32> %45, i32 101058054) 88 call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %34, <16 x i32> %32) 89 call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %46, <32 x i32> %47, i32 252645135) 90 call <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32> %48, <16 x i32> %33, i32 336860180) 91 call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %49) 92 call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %50, <16 x i32> %40, i32 2) 93 call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %49) 94 call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %52, <16 x i32> %37, i32 2) 95 call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %50, <16 x i32> %40, i32 4) 96 call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %37, <16 x i32> %39) 97 call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %55, <16 x i32> %40) 98 call <32 x i32> @llvm.hexagon.V6.vmpahb(<32 x i32> %56, i32 252972820) 99 call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %51, <16 x i32> %40) 100 call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %58, <16 x i32> %37) 101 call <32 x i32> @llvm.hexagon.V6.vmpahb(<32 x i32> %59, i32 252972820) 102 call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %53, <16 x i32> %43) 103 call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %51, <16 x i32> %42) 104 call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %61, <16 x i32> %62) 105 call <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32> %57, <32 x i32> %63, i32 17170694) 106 call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %54, <16 x i32> %42) 107 call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %53, <16 x i32> %39) 108 call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %65, <16 x i32> %66) 109 call <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32> %60, <32 x i32> %67, i32 17170694) 110 call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %64) 111 call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %64) 112 call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %69, <16 x i32> %70, i32 12) 113 call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %68) 114 call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %68) 115 call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %72, <16 x i32> %73, i32 12) 116 call <16 x i32> @llvm.hexagon.V6.vshuffeb(<16 x i32> %74, <16 x i32> %71) 117 %incdec.ptr28 = getelementptr <16 x i32>, <16 x i32>* %1 118 store <16 x i32> %75, <16 x i32>* %optr.0166%sub = add i32 %i.0156, -64 119 %cmp = icmp sgt i32 %sub, 64 120 br i1 %cmp, label %for.body, label %for.end 121for.end: ret void 122} 123declare <32 x i32> @llvm.hexagon.V6.vmpahb(<32 x i32>, i32) 124declare <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32>, <32 x i32>, i32) 125declare <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32>, <16 x i32>, i32) 126declare <16 x i32> @llvm.hexagon.V6.vshuffeb(<16 x i32>, <16 x i32>) 127 128attributes #0 = { "correctly-rounded-divide-sqrt-fp-math""target-cpu"="hexagonv65" "target-features"="+hvx-length64b,+hvxv65,+v65,-long-calls" "unsafe-fp-math"} 129!8 = !{!9, !9, i64 0} 130!9 = !{!"omnipotent char", !10} 131!10 = !{} 132!14 = !{} 133!19 = !{} 134!24 = !{} 135