1; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=O32 %s 2; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=O32 %s 3; RUN: llc -march=mips64 -target-abi=n32 -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=N32 %s 4; RUN: llc -march=mips64el -target-abi=n32 -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=N32 %s 5; RUN: llc -march=mips64 -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=N64 %s 6; RUN: llc -march=mips64el -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=N64 %s 7 8@v4f32 = global <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0> 9@v2f64 = global <2 x double> <double 0.0, double 0.0> 10@i32 = global i32 0 11@f32 = global float 0.0 12@f64 = global double 0.0 13 14define void @const_v4f32() nounwind { 15 ; ALL-LABEL: const_v4f32: 16 17 store volatile <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, <4 x float>*@v4f32 18 ; ALL: ldi.b [[R1:\$w[0-9]+]], 0 19 20 store volatile <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, <4 x float>*@v4f32 21 ; ALL: lui [[R1:\$[0-9]+]], 16256 22 ; ALL: fill.w [[R2:\$w[0-9]+]], [[R1]] 23 24 store volatile <4 x float> <float 1.0, float 1.0, float 1.0, float 31.0>, <4 x float>*@v4f32 25 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ 26 ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ 27 ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ 28 ; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]]) 29 30 store volatile <4 x float> <float 65537.0, float 65537.0, float 65537.0, float 65537.0>, <4 x float>*@v4f32 31 ; ALL: lui [[R1:\$[0-9]+]], 18304 32 ; ALL: ori [[R2:\$[0-9]+]], [[R1]], 128 33 ; ALL: fill.w [[R3:\$w[0-9]+]], [[R2]] 34 35 store volatile <4 x float> <float 1.0, float 2.0, float 1.0, float 2.0>, <4 x float>*@v4f32 36 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ 37 ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ 38 ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ 39 ; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]]) 40 41 store volatile <4 x float> <float 3.0, float 4.0, float 5.0, float 6.0>, <4 x float>*@v4f32 42 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ 43 ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ 44 ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ 45 ; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]]) 46 47 ret void 48} 49 50define void @const_v2f64() nounwind { 51 ; ALL-LABEL: const_v2f64: 52 53 store volatile <2 x double> <double 0.0, double 0.0>, <2 x double>*@v2f64 54 ; ALL: ldi.b [[R1:\$w[0-9]+]], 0 55 56 store volatile <2 x double> <double 72340172838076673.0, double 72340172838076673.0>, <2 x double>*@v2f64 57 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ 58 ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ 59 ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ 60 ; ALL: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]]) 61 62 store volatile <2 x double> <double 281479271743489.0, double 281479271743489.0>, <2 x double>*@v2f64 63 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ 64 ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ 65 ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ 66 ; ALL: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]]) 67 68 store volatile <2 x double> <double 4294967297.0, double 4294967297.0>, <2 x double>*@v2f64 69 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ 70 ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ 71 ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ 72 ; ALL: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]]) 73 74 store volatile <2 x double> <double 1.0, double 1.0>, <2 x double>*@v2f64 75 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ 76 ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ 77 ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ 78 ; ALL: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]]) 79 80 store volatile <2 x double> <double 1.0, double 31.0>, <2 x double>*@v2f64 81 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ 82 ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ 83 ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ 84 ; ALL: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]]) 85 86 store volatile <2 x double> <double 3.0, double 4.0>, <2 x double>*@v2f64 87 ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($ 88 ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ 89 ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($ 90 ; ALL: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]]) 91 92 ret void 93} 94 95define void @nonconst_v4f32() nounwind { 96 ; ALL-LABEL: nonconst_v4f32: 97 98 %1 = load float , float *@f32 99 %2 = insertelement <4 x float> undef, float %1, i32 0 100 %3 = insertelement <4 x float> %2, float %1, i32 1 101 %4 = insertelement <4 x float> %3, float %1, i32 2 102 %5 = insertelement <4 x float> %4, float %1, i32 3 103 store volatile <4 x float> %5, <4 x float>*@v4f32 104 ; ALL: lwc1 $f[[R1:[0-9]+]], 0( 105 ; ALL: splati.w [[R2:\$w[0-9]+]], $w[[R1]] 106 107 ret void 108} 109 110define void @nonconst_v2f64() nounwind { 111 ; ALL-LABEL: nonconst_v2f64: 112 113 %1 = load double , double *@f64 114 %2 = insertelement <2 x double> undef, double %1, i32 0 115 %3 = insertelement <2 x double> %2, double %1, i32 1 116 store volatile <2 x double> %3, <2 x double>*@v2f64 117 ; ALL: ldc1 $f[[R1:[0-9]+]], 0( 118 ; ALL: splati.d [[R2:\$w[0-9]+]], $w[[R1]] 119 120 ret void 121} 122 123define float @extract_v4f32() nounwind { 124 ; ALL-LABEL: extract_v4f32: 125 126 %1 = load <4 x float>, <4 x float>* @v4f32 127 ; ALL-DAG: ld.w [[R1:\$w[0-9]+]], 128 129 %2 = fadd <4 x float> %1, %1 130 ; ALL-DAG: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 131 132 %3 = extractelement <4 x float> %2, i32 1 133 ; Element 1 can be obtained by splatting it across the vector and extracting 134 ; $w0:sub_lo 135 ; ALL-DAG: splati.w $w0, [[R1]][1] 136 137 ret float %3 138} 139 140define float @extract_v4f32_elt0() nounwind { 141 ; ALL-LABEL: extract_v4f32_elt0: 142 143 %1 = load <4 x float>, <4 x float>* @v4f32 144 ; ALL-DAG: ld.w [[R1:\$w[0-9]+]], 145 146 %2 = fadd <4 x float> %1, %1 147 ; ALL-DAG: fadd.w $w0, [[R1]], [[R1]] 148 149 %3 = extractelement <4 x float> %2, i32 0 150 ; Element 0 can be obtained by extracting $w0:sub_lo ($f0) 151 ; ALL-NOT: copy_u.w 152 ; ALL-NOT: mtc1 153 154 ret float %3 155} 156 157define float @extract_v4f32_elt2() nounwind { 158 ; ALL-LABEL: extract_v4f32_elt2: 159 160 %1 = load <4 x float>, <4 x float>* @v4f32 161 ; ALL-DAG: ld.w [[R1:\$w[0-9]+]], 162 163 %2 = fadd <4 x float> %1, %1 164 ; ALL-DAG: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 165 166 %3 = extractelement <4 x float> %2, i32 2 167 ; Element 2 can be obtained by splatting it across the vector and extracting 168 ; $w0:sub_lo 169 ; ALL-DAG: splati.w $w0, [[R1]][2] 170 171 ret float %3 172} 173 174define float @extract_v4f32_vidx() nounwind { 175 ; ALL-LABEL: extract_v4f32_vidx: 176 177 %1 = load <4 x float>, <4 x float>* @v4f32 178 ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4f32)( 179 ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v4f32)( 180 ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v4f32)( 181 ; ALL-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]]) 182 183 %2 = fadd <4 x float> %1, %1 184 ; ALL-DAG: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]] 185 186 %3 = load i32, i32* @i32 187 ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( 188 ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( 189 ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( 190 ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) 191 192 %4 = extractelement <4 x float> %2, i32 %3 193 ; ALL-DAG: splat.w $w0, [[R1]]{{\[}}[[IDX]]] 194 195 ret float %4 196} 197 198define double @extract_v2f64() nounwind { 199 ; ALL-LABEL: extract_v2f64: 200 201 %1 = load <2 x double>, <2 x double>* @v2f64 202 ; ALL-DAG: ld.d [[R1:\$w[0-9]+]], 203 204 %2 = fadd <2 x double> %1, %1 205 ; ALL-DAG: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 206 207 %3 = extractelement <2 x double> %2, i32 1 208 ; Element 1 can be obtained by splatting it across the vector and extracting 209 ; $w0:sub_64 210 ; ALL-DAG: splati.d $w0, [[R1]][1] 211 ; ALL-NOT: copy_u.w 212 ; ALL-NOT: mtc1 213 ; ALL-NOT: mthc1 214 ; ALL-NOT: sll 215 ; ALL-NOT: sra 216 217 ret double %3 218} 219 220define double @extract_v2f64_elt0() nounwind { 221 ; ALL-LABEL: extract_v2f64_elt0: 222 223 %1 = load <2 x double>, <2 x double>* @v2f64 224 ; ALL-DAG: ld.d [[R1:\$w[0-9]+]], 225 226 %2 = fadd <2 x double> %1, %1 227 ; ALL-DAG: fadd.d $w0, [[R1]], [[R1]] 228 229 %3 = extractelement <2 x double> %2, i32 0 230 ; Element 0 can be obtained by extracting $w0:sub_64 ($f0) 231 ; ALL-NOT: copy_u.w 232 ; ALL-NOT: mtc1 233 ; ALL-NOT: mthc1 234 ; ALL-NOT: sll 235 ; ALL-NOT: sra 236 237 ret double %3 238} 239 240define double @extract_v2f64_vidx() nounwind { 241 ; ALL-LABEL: extract_v2f64_vidx: 242 243 %1 = load <2 x double>, <2 x double>* @v2f64 244 ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2f64)( 245 ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v2f64)( 246 ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v2f64)( 247 ; ALL-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]]) 248 249 %2 = fadd <2 x double> %1, %1 250 ; ALL-DAG: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]] 251 252 %3 = load i32, i32* @i32 253 ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( 254 ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( 255 ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( 256 ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) 257 258 %4 = extractelement <2 x double> %2, i32 %3 259 ; ALL-DAG: splat.d $w0, [[R1]]{{\[}}[[IDX]]] 260 261 ret double %4 262} 263 264define void @insert_v4f32(float %a) nounwind { 265 ; ALL-LABEL: insert_v4f32: 266 267 %1 = load <4 x float>, <4 x float>* @v4f32 268 ; ALL-DAG: ld.w [[R1:\$w[0-9]+]], 269 270 %2 = insertelement <4 x float> %1, float %a, i32 1 271 ; float argument passed in $f12 272 ; ALL-DAG: insve.w [[R1]][1], $w12[0] 273 274 store <4 x float> %2, <4 x float>* @v4f32 275 ; ALL-DAG: st.w [[R1]] 276 277 ret void 278} 279 280define void @insert_v2f64(double %a) nounwind { 281 ; ALL-LABEL: insert_v2f64: 282 283 %1 = load <2 x double>, <2 x double>* @v2f64 284 ; ALL-DAG: ld.d [[R1:\$w[0-9]+]], 285 286 %2 = insertelement <2 x double> %1, double %a, i32 1 287 ; double argument passed in $f12 288 ; ALL-DAG: insve.d [[R1]][1], $w12[0] 289 290 store <2 x double> %2, <2 x double>* @v2f64 291 ; ALL-DAG: st.d [[R1]] 292 293 ret void 294} 295 296define void @insert_v4f32_vidx(float %a) nounwind { 297 ; ALL-LABEL: insert_v4f32_vidx: 298 299 %1 = load <4 x float>, <4 x float>* @v4f32 300 ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4f32)( 301 ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v4f32)( 302 ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v4f32)( 303 ; ALL-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]]) 304 305 %2 = load i32, i32* @i32 306 ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( 307 ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( 308 ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( 309 ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) 310 311 %3 = insertelement <4 x float> %1, float %a, i32 %2 312 ; float argument passed in $f12 313 ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2 314 ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]] 315 ; ALL-DAG: insve.w [[R1]][0], $w12[0] 316 ; ALL-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]] 317 ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]] 318 319 store <4 x float> %3, <4 x float>* @v4f32 320 ; ALL-DAG: st.w [[R1]] 321 322 ret void 323} 324 325define void @insert_v2f64_vidx(double %a) nounwind { 326 ; ALL-LABEL: insert_v2f64_vidx: 327 328 %1 = load <2 x double>, <2 x double>* @v2f64 329 ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2f64)( 330 ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v2f64)( 331 ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v2f64)( 332 ; ALL-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]]) 333 334 %2 = load i32, i32* @i32 335 ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)( 336 ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)( 337 ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)( 338 ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]]) 339 340 %3 = insertelement <2 x double> %1, double %a, i32 %2 341 ; double argument passed in $f12 342 ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 3 343 ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]] 344 ; ALL-DAG: insve.d [[R1]][0], $w12[0] 345 ; ALL-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]] 346 ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]] 347 348 store <2 x double> %3, <2 x double>* @v2f64 349 ; ALL-DAG: st.d [[R1]] 350 351 ret void 352} 353