1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This tablegen backend is responsible for emitting arm_neon.h, which includes
11 // a declaration and definition of each function specified by the ARM NEON
12 // compiler interface. See ARM document DUI0348B.
13 //
14 // Each NEON instruction is implemented in terms of 1 or more functions which
15 // are suffixed with the element type of the input vectors. Functions may be
16 // implemented in terms of generic vector operations such as +, *, -, etc. or
17 // by calling a __builtin_-prefixed function which will be handled by clang's
18 // CodeGen library.
19 //
20 // Additional validation code can be generated by this file when runHeader() is
21 // called, rather than the normal run() entry point. A complete set of tests
22 // for Neon intrinsics can be generated by calling the runTests() entry point.
23 //
24 //===----------------------------------------------------------------------===//
25
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/SmallString.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/StringExtras.h"
30 #include "llvm/ADT/StringMap.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/TableGen/Error.h"
33 #include "llvm/TableGen/Record.h"
34 #include "llvm/TableGen/TableGenBackend.h"
35 #include <string>
36 using namespace llvm;
37
38 enum OpKind {
39 OpNone,
40 OpUnavailable,
41 OpAdd,
42 OpAddl,
43 OpAddw,
44 OpSub,
45 OpSubl,
46 OpSubw,
47 OpMul,
48 OpMla,
49 OpMlal,
50 OpMls,
51 OpMlsl,
52 OpMulN,
53 OpMlaN,
54 OpMlsN,
55 OpMlalN,
56 OpMlslN,
57 OpMulLane,
58 OpMullLane,
59 OpMlaLane,
60 OpMlsLane,
61 OpMlalLane,
62 OpMlslLane,
63 OpQDMullLane,
64 OpQDMlalLane,
65 OpQDMlslLane,
66 OpQDMulhLane,
67 OpQRDMulhLane,
68 OpEq,
69 OpGe,
70 OpLe,
71 OpGt,
72 OpLt,
73 OpNeg,
74 OpNot,
75 OpAnd,
76 OpOr,
77 OpXor,
78 OpAndNot,
79 OpOrNot,
80 OpCast,
81 OpConcat,
82 OpDup,
83 OpDupLane,
84 OpHi,
85 OpLo,
86 OpSelect,
87 OpRev16,
88 OpRev32,
89 OpRev64,
90 OpReinterpret,
91 OpAbdl,
92 OpAba,
93 OpAbal,
94 OpDiv
95 };
96
97 enum ClassKind {
98 ClassNone,
99 ClassI, // generic integer instruction, e.g., "i8" suffix
100 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
101 ClassW, // width-specific instruction, e.g., "8" suffix
102 ClassB, // bitcast arguments with enum argument to specify type
103 ClassL, // Logical instructions which are op instructions
104 // but we need to not emit any suffix for in our
105 // tests.
106 ClassNoTest // Instructions which we do not test since they are
107 // not TRUE instructions.
108 };
109
110 /// NeonTypeFlags - Flags to identify the types for overloaded Neon
111 /// builtins. These must be kept in sync with the flags in
112 /// include/clang/Basic/TargetBuiltins.h.
113 namespace {
114 class NeonTypeFlags {
115 enum {
116 EltTypeMask = 0xf,
117 UnsignedFlag = 0x10,
118 QuadFlag = 0x20
119 };
120 uint32_t Flags;
121
122 public:
123 enum EltType {
124 Int8,
125 Int16,
126 Int32,
127 Int64,
128 Poly8,
129 Poly16,
130 Float16,
131 Float32,
132 Float64
133 };
134
NeonTypeFlags(unsigned F)135 NeonTypeFlags(unsigned F) : Flags(F) {}
NeonTypeFlags(EltType ET,bool IsUnsigned,bool IsQuad)136 NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
137 if (IsUnsigned)
138 Flags |= UnsignedFlag;
139 if (IsQuad)
140 Flags |= QuadFlag;
141 }
142
getFlags() const143 uint32_t getFlags() const { return Flags; }
144 };
145 } // end anonymous namespace
146
147 namespace {
148 class NeonEmitter {
149 RecordKeeper &Records;
150 StringMap<OpKind> OpMap;
151 DenseMap<Record*, ClassKind> ClassMap;
152
153 public:
NeonEmitter(RecordKeeper & R)154 NeonEmitter(RecordKeeper &R) : Records(R) {
155 OpMap["OP_NONE"] = OpNone;
156 OpMap["OP_UNAVAILABLE"] = OpUnavailable;
157 OpMap["OP_ADD"] = OpAdd;
158 OpMap["OP_ADDL"] = OpAddl;
159 OpMap["OP_ADDW"] = OpAddw;
160 OpMap["OP_SUB"] = OpSub;
161 OpMap["OP_SUBL"] = OpSubl;
162 OpMap["OP_SUBW"] = OpSubw;
163 OpMap["OP_MUL"] = OpMul;
164 OpMap["OP_MLA"] = OpMla;
165 OpMap["OP_MLAL"] = OpMlal;
166 OpMap["OP_MLS"] = OpMls;
167 OpMap["OP_MLSL"] = OpMlsl;
168 OpMap["OP_MUL_N"] = OpMulN;
169 OpMap["OP_MLA_N"] = OpMlaN;
170 OpMap["OP_MLS_N"] = OpMlsN;
171 OpMap["OP_MLAL_N"] = OpMlalN;
172 OpMap["OP_MLSL_N"] = OpMlslN;
173 OpMap["OP_MUL_LN"]= OpMulLane;
174 OpMap["OP_MULL_LN"] = OpMullLane;
175 OpMap["OP_MLA_LN"]= OpMlaLane;
176 OpMap["OP_MLS_LN"]= OpMlsLane;
177 OpMap["OP_MLAL_LN"] = OpMlalLane;
178 OpMap["OP_MLSL_LN"] = OpMlslLane;
179 OpMap["OP_QDMULL_LN"] = OpQDMullLane;
180 OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
181 OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
182 OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
183 OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
184 OpMap["OP_EQ"] = OpEq;
185 OpMap["OP_GE"] = OpGe;
186 OpMap["OP_LE"] = OpLe;
187 OpMap["OP_GT"] = OpGt;
188 OpMap["OP_LT"] = OpLt;
189 OpMap["OP_NEG"] = OpNeg;
190 OpMap["OP_NOT"] = OpNot;
191 OpMap["OP_AND"] = OpAnd;
192 OpMap["OP_OR"] = OpOr;
193 OpMap["OP_XOR"] = OpXor;
194 OpMap["OP_ANDN"] = OpAndNot;
195 OpMap["OP_ORN"] = OpOrNot;
196 OpMap["OP_CAST"] = OpCast;
197 OpMap["OP_CONC"] = OpConcat;
198 OpMap["OP_HI"] = OpHi;
199 OpMap["OP_LO"] = OpLo;
200 OpMap["OP_DUP"] = OpDup;
201 OpMap["OP_DUP_LN"] = OpDupLane;
202 OpMap["OP_SEL"] = OpSelect;
203 OpMap["OP_REV16"] = OpRev16;
204 OpMap["OP_REV32"] = OpRev32;
205 OpMap["OP_REV64"] = OpRev64;
206 OpMap["OP_REINT"] = OpReinterpret;
207 OpMap["OP_ABDL"] = OpAbdl;
208 OpMap["OP_ABA"] = OpAba;
209 OpMap["OP_ABAL"] = OpAbal;
210 OpMap["OP_DIV"] = OpDiv;
211
212 Record *SI = R.getClass("SInst");
213 Record *II = R.getClass("IInst");
214 Record *WI = R.getClass("WInst");
215 Record *SOpI = R.getClass("SOpInst");
216 Record *IOpI = R.getClass("IOpInst");
217 Record *WOpI = R.getClass("WOpInst");
218 Record *LOpI = R.getClass("LOpInst");
219 Record *NoTestOpI = R.getClass("NoTestOpInst");
220
221 ClassMap[SI] = ClassS;
222 ClassMap[II] = ClassI;
223 ClassMap[WI] = ClassW;
224 ClassMap[SOpI] = ClassS;
225 ClassMap[IOpI] = ClassI;
226 ClassMap[WOpI] = ClassW;
227 ClassMap[LOpI] = ClassL;
228 ClassMap[NoTestOpI] = ClassNoTest;
229 }
230
231 // run - Emit arm_neon.h.inc
232 void run(raw_ostream &o);
233
234 // runHeader - Emit all the __builtin prototypes used in arm_neon.h
235 void runHeader(raw_ostream &o);
236
237 // runTests - Emit tests for all the Neon intrinsics.
238 void runTests(raw_ostream &o);
239
240 private:
241 void emitIntrinsic(raw_ostream &OS, Record *R,
242 StringMap<ClassKind> &EmittedMap);
243 void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap,
244 bool isA64GenBuiltinDef);
245 void genOverloadTypeCheckCode(raw_ostream &OS,
246 StringMap<ClassKind> &A64IntrinsicMap,
247 bool isA64TypeCheck);
248 void genIntrinsicRangeCheckCode(raw_ostream &OS,
249 StringMap<ClassKind> &A64IntrinsicMap,
250 bool isA64RangeCheck);
251 void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
252 bool isA64TestGen);
253 };
254 } // end anonymous namespace
255
256 /// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
257 /// which each StringRef representing a single type declared in the string.
258 /// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
259 /// 2xfloat and 4xfloat respectively.
ParseTypes(Record * r,std::string & s,SmallVectorImpl<StringRef> & TV)260 static void ParseTypes(Record *r, std::string &s,
261 SmallVectorImpl<StringRef> &TV) {
262 const char *data = s.data();
263 int len = 0;
264
265 for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
266 if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U')
267 continue;
268
269 switch (data[len]) {
270 case 'c':
271 case 's':
272 case 'i':
273 case 'l':
274 case 'h':
275 case 'f':
276 case 'd':
277 break;
278 default:
279 PrintFatalError(r->getLoc(),
280 "Unexpected letter: " + std::string(data + len, 1));
281 }
282 TV.push_back(StringRef(data, len + 1));
283 data += len + 1;
284 len = -1;
285 }
286 }
287
288 /// Widen - Convert a type code into the next wider type. char -> short,
289 /// short -> int, etc.
Widen(const char t)290 static char Widen(const char t) {
291 switch (t) {
292 case 'c':
293 return 's';
294 case 's':
295 return 'i';
296 case 'i':
297 return 'l';
298 case 'h':
299 return 'f';
300 default:
301 PrintFatalError("unhandled type in widen!");
302 }
303 }
304
305 /// Narrow - Convert a type code into the next smaller type. short -> char,
306 /// float -> half float, etc.
Narrow(const char t)307 static char Narrow(const char t) {
308 switch (t) {
309 case 's':
310 return 'c';
311 case 'i':
312 return 's';
313 case 'l':
314 return 'i';
315 case 'f':
316 return 'h';
317 default:
318 PrintFatalError("unhandled type in narrow!");
319 }
320 }
321
322 /// For a particular StringRef, return the base type code, and whether it has
323 /// the quad-vector, polynomial, or unsigned modifiers set.
ClassifyType(StringRef ty,bool & quad,bool & poly,bool & usgn)324 static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
325 unsigned off = 0;
326
327 // remember quad.
328 if (ty[off] == 'Q') {
329 quad = true;
330 ++off;
331 }
332
333 // remember poly.
334 if (ty[off] == 'P') {
335 poly = true;
336 ++off;
337 }
338
339 // remember unsigned.
340 if (ty[off] == 'U') {
341 usgn = true;
342 ++off;
343 }
344
345 // base type to get the type string for.
346 return ty[off];
347 }
348
349 /// ModType - Transform a type code and its modifiers based on a mod code. The
350 /// mod code definitions may be found at the top of arm_neon.td.
ModType(const char mod,char type,bool & quad,bool & poly,bool & usgn,bool & scal,bool & cnst,bool & pntr)351 static char ModType(const char mod, char type, bool &quad, bool &poly,
352 bool &usgn, bool &scal, bool &cnst, bool &pntr) {
353 switch (mod) {
354 case 't':
355 if (poly) {
356 poly = false;
357 usgn = true;
358 }
359 break;
360 case 'u':
361 usgn = true;
362 poly = false;
363 if (type == 'f')
364 type = 'i';
365 if (type == 'd')
366 type = 'l';
367 break;
368 case 'x':
369 usgn = false;
370 poly = false;
371 if (type == 'f')
372 type = 'i';
373 break;
374 case 'f':
375 if (type == 'h')
376 quad = true;
377 type = 'f';
378 usgn = false;
379 break;
380 case 'g':
381 quad = false;
382 break;
383 case 'w':
384 type = Widen(type);
385 quad = true;
386 break;
387 case 'n':
388 type = Widen(type);
389 break;
390 case 'i':
391 type = 'i';
392 scal = true;
393 break;
394 case 'l':
395 type = 'l';
396 scal = true;
397 usgn = true;
398 break;
399 case 's':
400 case 'a':
401 scal = true;
402 break;
403 case 'k':
404 quad = true;
405 break;
406 case 'c':
407 cnst = true;
408 case 'p':
409 pntr = true;
410 scal = true;
411 break;
412 case 'h':
413 type = Narrow(type);
414 if (type == 'h')
415 quad = false;
416 break;
417 case 'e':
418 type = Narrow(type);
419 usgn = true;
420 break;
421 default:
422 break;
423 }
424 return type;
425 }
426
427 /// TypeString - for a modifier and type, generate the name of the typedef for
428 /// that type. QUc -> uint8x8_t.
TypeString(const char mod,StringRef typestr)429 static std::string TypeString(const char mod, StringRef typestr) {
430 bool quad = false;
431 bool poly = false;
432 bool usgn = false;
433 bool scal = false;
434 bool cnst = false;
435 bool pntr = false;
436
437 if (mod == 'v')
438 return "void";
439 if (mod == 'i')
440 return "int";
441
442 // base type to get the type string for.
443 char type = ClassifyType(typestr, quad, poly, usgn);
444
445 // Based on the modifying character, change the type and width if necessary.
446 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
447
448 SmallString<128> s;
449
450 if (usgn)
451 s.push_back('u');
452
453 switch (type) {
454 case 'c':
455 s += poly ? "poly8" : "int8";
456 if (scal)
457 break;
458 s += quad ? "x16" : "x8";
459 break;
460 case 's':
461 s += poly ? "poly16" : "int16";
462 if (scal)
463 break;
464 s += quad ? "x8" : "x4";
465 break;
466 case 'i':
467 s += "int32";
468 if (scal)
469 break;
470 s += quad ? "x4" : "x2";
471 break;
472 case 'l':
473 s += "int64";
474 if (scal)
475 break;
476 s += quad ? "x2" : "x1";
477 break;
478 case 'h':
479 s += "float16";
480 if (scal)
481 break;
482 s += quad ? "x8" : "x4";
483 break;
484 case 'f':
485 s += "float32";
486 if (scal)
487 break;
488 s += quad ? "x4" : "x2";
489 break;
490 case 'd':
491 s += "float64";
492 if (scal)
493 break;
494 s += quad ? "x2" : "x1";
495 break;
496
497 default:
498 PrintFatalError("unhandled type!");
499 }
500
501 if (mod == '2')
502 s += "x2";
503 if (mod == '3')
504 s += "x3";
505 if (mod == '4')
506 s += "x4";
507
508 // Append _t, finishing the type string typedef type.
509 s += "_t";
510
511 if (cnst)
512 s += " const";
513
514 if (pntr)
515 s += " *";
516
517 return s.str();
518 }
519
520 /// BuiltinTypeString - for a modifier and type, generate the clang
521 /// BuiltinsARM.def prototype code for the function. See the top of clang's
522 /// Builtins.def for a description of the type strings.
BuiltinTypeString(const char mod,StringRef typestr,ClassKind ck,bool ret)523 static std::string BuiltinTypeString(const char mod, StringRef typestr,
524 ClassKind ck, bool ret) {
525 bool quad = false;
526 bool poly = false;
527 bool usgn = false;
528 bool scal = false;
529 bool cnst = false;
530 bool pntr = false;
531
532 if (mod == 'v')
533 return "v"; // void
534 if (mod == 'i')
535 return "i"; // int
536
537 // base type to get the type string for.
538 char type = ClassifyType(typestr, quad, poly, usgn);
539
540 // Based on the modifying character, change the type and width if necessary.
541 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
542
543 // All pointers are void* pointers. Change type to 'v' now.
544 if (pntr) {
545 usgn = false;
546 poly = false;
547 type = 'v';
548 }
549 // Treat half-float ('h') types as unsigned short ('s') types.
550 if (type == 'h') {
551 type = 's';
552 usgn = true;
553 }
554 usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && scal && type != 'f');
555
556 if (scal) {
557 SmallString<128> s;
558
559 if (usgn)
560 s.push_back('U');
561 else if (type == 'c')
562 s.push_back('S'); // make chars explicitly signed
563
564 if (type == 'l') // 64-bit long
565 s += "LLi";
566 else
567 s.push_back(type);
568
569 if (cnst)
570 s.push_back('C');
571 if (pntr)
572 s.push_back('*');
573 return s.str();
574 }
575
576 // Since the return value must be one type, return a vector type of the
577 // appropriate width which we will bitcast. An exception is made for
578 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
579 // fashion, storing them to a pointer arg.
580 if (ret) {
581 if (mod >= '2' && mod <= '4')
582 return "vv*"; // void result with void* first argument
583 if (mod == 'f' || (ck != ClassB && type == 'f'))
584 return quad ? "V4f" : "V2f";
585 if (ck != ClassB && type == 's')
586 return quad ? "V8s" : "V4s";
587 if (ck != ClassB && type == 'i')
588 return quad ? "V4i" : "V2i";
589 if (ck != ClassB && type == 'l')
590 return quad ? "V2LLi" : "V1LLi";
591
592 return quad ? "V16Sc" : "V8Sc";
593 }
594
595 // Non-return array types are passed as individual vectors.
596 if (mod == '2')
597 return quad ? "V16ScV16Sc" : "V8ScV8Sc";
598 if (mod == '3')
599 return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
600 if (mod == '4')
601 return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
602
603 if (mod == 'f' || (ck != ClassB && type == 'f'))
604 return quad ? "V4f" : "V2f";
605 if (ck != ClassB && type == 's')
606 return quad ? "V8s" : "V4s";
607 if (ck != ClassB && type == 'i')
608 return quad ? "V4i" : "V2i";
609 if (ck != ClassB && type == 'l')
610 return quad ? "V2LLi" : "V1LLi";
611
612 return quad ? "V16Sc" : "V8Sc";
613 }
614
615 /// InstructionTypeCode - Computes the ARM argument character code and
616 /// quad status for a specific type string and ClassKind.
InstructionTypeCode(const StringRef & typeStr,const ClassKind ck,bool & quad,std::string & typeCode)617 static void InstructionTypeCode(const StringRef &typeStr,
618 const ClassKind ck,
619 bool &quad,
620 std::string &typeCode) {
621 bool poly = false;
622 bool usgn = false;
623 char type = ClassifyType(typeStr, quad, poly, usgn);
624
625 switch (type) {
626 case 'c':
627 switch (ck) {
628 case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break;
629 case ClassI: typeCode = "i8"; break;
630 case ClassW: typeCode = "8"; break;
631 default: break;
632 }
633 break;
634 case 's':
635 switch (ck) {
636 case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break;
637 case ClassI: typeCode = "i16"; break;
638 case ClassW: typeCode = "16"; break;
639 default: break;
640 }
641 break;
642 case 'i':
643 switch (ck) {
644 case ClassS: typeCode = usgn ? "u32" : "s32"; break;
645 case ClassI: typeCode = "i32"; break;
646 case ClassW: typeCode = "32"; break;
647 default: break;
648 }
649 break;
650 case 'l':
651 switch (ck) {
652 case ClassS: typeCode = usgn ? "u64" : "s64"; break;
653 case ClassI: typeCode = "i64"; break;
654 case ClassW: typeCode = "64"; break;
655 default: break;
656 }
657 break;
658 case 'h':
659 switch (ck) {
660 case ClassS:
661 case ClassI: typeCode = "f16"; break;
662 case ClassW: typeCode = "16"; break;
663 default: break;
664 }
665 break;
666 case 'f':
667 switch (ck) {
668 case ClassS:
669 case ClassI: typeCode = "f32"; break;
670 case ClassW: typeCode = "32"; break;
671 default: break;
672 }
673 break;
674 case 'd':
675 switch (ck) {
676 case ClassS:
677 case ClassI:
678 typeCode += "f64";
679 break;
680 case ClassW:
681 PrintFatalError("unhandled type!");
682 default:
683 break;
684 }
685 break;
686 default:
687 PrintFatalError("unhandled type!");
688 }
689 }
690
691 /// MangleName - Append a type or width suffix to a base neon function name,
692 /// and insert a 'q' in the appropriate location if the operation works on
693 /// 128b rather than 64b. E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
MangleName(const std::string & name,StringRef typestr,ClassKind ck)694 static std::string MangleName(const std::string &name, StringRef typestr,
695 ClassKind ck) {
696 if (name == "vcvt_f32_f16")
697 return name;
698
699 bool quad = false;
700 std::string typeCode = "";
701
702 InstructionTypeCode(typestr, ck, quad, typeCode);
703
704 std::string s = name;
705
706 if (typeCode.size() > 0) {
707 s += "_" + typeCode;
708 }
709
710 if (ck == ClassB)
711 s += "_v";
712
713 // Insert a 'q' before the first '_' character so that it ends up before
714 // _lane or _n on vector-scalar operations.
715 if (quad) {
716 size_t pos = s.find('_');
717 s = s.insert(pos, "q");
718 }
719
720 return s;
721 }
722
PreprocessInstruction(const StringRef & Name,const std::string & InstName,std::string & Prefix,bool & HasNPostfix,bool & HasLanePostfix,bool & HasDupPostfix,bool & IsSpecialVCvt,size_t & TBNumber)723 static void PreprocessInstruction(const StringRef &Name,
724 const std::string &InstName,
725 std::string &Prefix,
726 bool &HasNPostfix,
727 bool &HasLanePostfix,
728 bool &HasDupPostfix,
729 bool &IsSpecialVCvt,
730 size_t &TBNumber) {
731 // All of our instruction name fields from arm_neon.td are of the form
732 // <instructionname>_...
733 // Thus we grab our instruction name via computation of said Prefix.
734 const size_t PrefixEnd = Name.find_first_of('_');
735 // If InstName is passed in, we use that instead of our name Prefix.
736 Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName;
737
738 const StringRef Postfix = Name.slice(PrefixEnd, Name.size());
739
740 HasNPostfix = Postfix.count("_n");
741 HasLanePostfix = Postfix.count("_lane");
742 HasDupPostfix = Postfix.count("_dup");
743 IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt");
744
745 if (InstName.compare("vtbl") == 0 ||
746 InstName.compare("vtbx") == 0) {
747 // If we have a vtblN/vtbxN instruction, use the instruction's ASCII
748 // encoding to get its true value.
749 TBNumber = Name[Name.size()-1] - 48;
750 }
751 }
752
753 /// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have
754 /// extracted, generate a FileCheck pattern for a Load Or Store
755 static void
GenerateRegisterCheckPatternForLoadStores(const StringRef & NameRef,const std::string & OutTypeCode,const bool & IsQuad,const bool & HasDupPostfix,const bool & HasLanePostfix,const size_t Count,std::string & RegisterSuffix)756 GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef,
757 const std::string& OutTypeCode,
758 const bool &IsQuad,
759 const bool &HasDupPostfix,
760 const bool &HasLanePostfix,
761 const size_t Count,
762 std::string &RegisterSuffix) {
763 const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1");
764 // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang
765 // will output a series of v{ld,st}1s, so we have to handle it specially.
766 if ((Count == 3 || Count == 4) && IsQuad) {
767 RegisterSuffix += "{";
768 for (size_t i = 0; i < Count; i++) {
769 RegisterSuffix += "d{{[0-9]+}}";
770 if (HasDupPostfix) {
771 RegisterSuffix += "[]";
772 }
773 if (HasLanePostfix) {
774 RegisterSuffix += "[{{[0-9]+}}]";
775 }
776 if (i < Count-1) {
777 RegisterSuffix += ", ";
778 }
779 }
780 RegisterSuffix += "}";
781 } else {
782
783 // Handle normal loads and stores.
784 RegisterSuffix += "{";
785 for (size_t i = 0; i < Count; i++) {
786 RegisterSuffix += "d{{[0-9]+}}";
787 if (HasDupPostfix) {
788 RegisterSuffix += "[]";
789 }
790 if (HasLanePostfix) {
791 RegisterSuffix += "[{{[0-9]+}}]";
792 }
793 if (IsQuad && !HasLanePostfix) {
794 RegisterSuffix += ", d{{[0-9]+}}";
795 if (HasDupPostfix) {
796 RegisterSuffix += "[]";
797 }
798 }
799 if (i < Count-1) {
800 RegisterSuffix += ", ";
801 }
802 }
803 RegisterSuffix += "}, [r{{[0-9]+}}";
804
805 // We only include the alignment hint if we have a vld1.*64 or
806 // a dup/lane instruction.
807 if (IsLDSTOne) {
808 if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") {
809 RegisterSuffix += ":" + OutTypeCode;
810 }
811 }
812
813 RegisterSuffix += "]";
814 }
815 }
816
HasNPostfixAndScalarArgs(const StringRef & NameRef,const bool & HasNPostfix)817 static bool HasNPostfixAndScalarArgs(const StringRef &NameRef,
818 const bool &HasNPostfix) {
819 return (NameRef.count("vmla") ||
820 NameRef.count("vmlal") ||
821 NameRef.count("vmlsl") ||
822 NameRef.count("vmull") ||
823 NameRef.count("vqdmlal") ||
824 NameRef.count("vqdmlsl") ||
825 NameRef.count("vqdmulh") ||
826 NameRef.count("vqdmull") ||
827 NameRef.count("vqrdmulh")) && HasNPostfix;
828 }
829
IsFiveOperandLaneAccumulator(const StringRef & NameRef,const bool & HasLanePostfix)830 static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef,
831 const bool &HasLanePostfix) {
832 return (NameRef.count("vmla") ||
833 NameRef.count("vmls") ||
834 NameRef.count("vmlal") ||
835 NameRef.count("vmlsl") ||
836 (NameRef.count("vmul") && NameRef.size() == 3)||
837 NameRef.count("vqdmlal") ||
838 NameRef.count("vqdmlsl") ||
839 NameRef.count("vqdmulh") ||
840 NameRef.count("vqrdmulh")) && HasLanePostfix;
841 }
842
IsSpecialLaneMultiply(const StringRef & NameRef,const bool & HasLanePostfix,const bool & IsQuad)843 static bool IsSpecialLaneMultiply(const StringRef &NameRef,
844 const bool &HasLanePostfix,
845 const bool &IsQuad) {
846 const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh"))
847 && IsQuad;
848 const bool IsVMull = NameRef.count("mull") && !IsQuad;
849 return (IsVMulOrMulh || IsVMull) && HasLanePostfix;
850 }
851
NormalizeProtoForRegisterPatternCreation(const std::string & Name,const std::string & Proto,const bool & HasNPostfix,const bool & IsQuad,const bool & HasLanePostfix,const bool & HasDupPostfix,std::string & NormedProto)852 static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
853 const std::string &Proto,
854 const bool &HasNPostfix,
855 const bool &IsQuad,
856 const bool &HasLanePostfix,
857 const bool &HasDupPostfix,
858 std::string &NormedProto) {
859 // Handle generic case.
860 const StringRef NameRef(Name);
861 for (size_t i = 0, end = Proto.size(); i < end; i++) {
862 switch (Proto[i]) {
863 case 'u':
864 case 'f':
865 case 'd':
866 case 's':
867 case 'x':
868 case 't':
869 case 'n':
870 NormedProto += IsQuad? 'q' : 'd';
871 break;
872 case 'w':
873 case 'k':
874 NormedProto += 'q';
875 break;
876 case 'g':
877 case 'h':
878 case 'e':
879 NormedProto += 'd';
880 break;
881 case 'i':
882 NormedProto += HasLanePostfix? 'a' : 'i';
883 break;
884 case 'a':
885 if (HasLanePostfix) {
886 NormedProto += 'a';
887 } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) {
888 NormedProto += IsQuad? 'q' : 'd';
889 } else {
890 NormedProto += 'i';
891 }
892 break;
893 }
894 }
895
896 // Handle Special Cases.
897 const bool IsNotVExt = !NameRef.count("vext");
898 const bool IsVPADAL = NameRef.count("vpadal");
899 const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef,
900 HasLanePostfix);
901 const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix,
902 IsQuad);
903
904 if (IsSpecialLaneMul) {
905 // If
906 NormedProto[2] = NormedProto[3];
907 NormedProto.erase(3);
908 } else if (NormedProto.size() == 4 &&
909 NormedProto[0] == NormedProto[1] &&
910 IsNotVExt) {
911 // If NormedProto.size() == 4 and the first two proto characters are the
912 // same, ignore the first.
913 NormedProto = NormedProto.substr(1, 3);
914 } else if (Is5OpLaneAccum) {
915 // If we have a 5 op lane accumulator operation, we take characters 1,2,4
916 std::string tmp = NormedProto.substr(1,2);
917 tmp += NormedProto[4];
918 NormedProto = tmp;
919 } else if (IsVPADAL) {
920 // If we have VPADAL, ignore the first character.
921 NormedProto = NormedProto.substr(0, 2);
922 } else if (NameRef.count("vdup") && NormedProto.size() > 2) {
923 // If our instruction is a dup instruction, keep only the first and
924 // last characters.
925 std::string tmp = "";
926 tmp += NormedProto[0];
927 tmp += NormedProto[NormedProto.size()-1];
928 NormedProto = tmp;
929 }
930 }
931
932 /// GenerateRegisterCheckPatterns - Given a bunch of data we have
933 /// extracted, generate a FileCheck pattern to check that an
934 /// instruction's arguments are correct.
GenerateRegisterCheckPattern(const std::string & Name,const std::string & Proto,const std::string & OutTypeCode,const bool & HasNPostfix,const bool & IsQuad,const bool & HasLanePostfix,const bool & HasDupPostfix,const size_t & TBNumber,std::string & RegisterSuffix)935 static void GenerateRegisterCheckPattern(const std::string &Name,
936 const std::string &Proto,
937 const std::string &OutTypeCode,
938 const bool &HasNPostfix,
939 const bool &IsQuad,
940 const bool &HasLanePostfix,
941 const bool &HasDupPostfix,
942 const size_t &TBNumber,
943 std::string &RegisterSuffix) {
944
945 RegisterSuffix = "";
946
947 const StringRef NameRef(Name);
948 const StringRef ProtoRef(Proto);
949
950 if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) {
951 return;
952 }
953
954 const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst");
955 const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx");
956
957 if (IsLoadStore) {
958 // Grab N value from v{ld,st}N using its ascii representation.
959 const size_t Count = NameRef[3] - 48;
960
961 GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad,
962 HasDupPostfix, HasLanePostfix,
963 Count, RegisterSuffix);
964 } else if (IsTBXOrTBL) {
965 RegisterSuffix += "d{{[0-9]+}}, {";
966 for (size_t i = 0; i < TBNumber-1; i++) {
967 RegisterSuffix += "d{{[0-9]+}}, ";
968 }
969 RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}";
970 } else {
971 // Handle a normal instruction.
972 if (NameRef.count("vget") || NameRef.count("vset"))
973 return;
974
975 // We first normalize our proto, since we only need to emit 4
976 // different types of checks, yet have more than 4 proto types
977 // that map onto those 4 patterns.
978 std::string NormalizedProto("");
979 NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad,
980 HasLanePostfix, HasDupPostfix,
981 NormalizedProto);
982
983 for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) {
984 const char &c = NormalizedProto[i];
985 switch (c) {
986 case 'q':
987 RegisterSuffix += "q{{[0-9]+}}, ";
988 break;
989
990 case 'd':
991 RegisterSuffix += "d{{[0-9]+}}, ";
992 break;
993
994 case 'i':
995 RegisterSuffix += "#{{[0-9]+}}, ";
996 break;
997
998 case 'a':
999 RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], ";
1000 break;
1001 }
1002 }
1003
1004 // Remove extra ", ".
1005 RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2);
1006 }
1007 }
1008
1009 /// GenerateChecksForIntrinsic - Given a specific instruction name +
1010 /// typestr + class kind, generate the proper set of FileCheck
1011 /// Patterns to check for. We could just return a string, but instead
1012 /// use a vector since it provides us with the extra flexibility of
1013 /// emitting multiple checks, which comes in handy for certain cases
1014 /// like mla where we want to check for 2 different instructions.
GenerateChecksForIntrinsic(const std::string & Name,const std::string & Proto,StringRef & OutTypeStr,StringRef & InTypeStr,ClassKind Ck,const std::string & InstName,bool IsHiddenLOp,std::vector<std::string> & Result)1015 static void GenerateChecksForIntrinsic(const std::string &Name,
1016 const std::string &Proto,
1017 StringRef &OutTypeStr,
1018 StringRef &InTypeStr,
1019 ClassKind Ck,
1020 const std::string &InstName,
1021 bool IsHiddenLOp,
1022 std::vector<std::string>& Result) {
1023
1024 // If Ck is a ClassNoTest instruction, just return so no test is
1025 // emitted.
1026 if(Ck == ClassNoTest)
1027 return;
1028
1029 if (Name == "vcvt_f32_f16") {
1030 Result.push_back("vcvt.f32.f16");
1031 return;
1032 }
1033
1034
1035 // Now we preprocess our instruction given the data we have to get the
1036 // data that we need.
1037 // Create a StringRef for String Manipulation of our Name.
1038 const StringRef NameRef(Name);
1039 // Instruction Prefix.
1040 std::string Prefix;
1041 // The type code for our out type string.
1042 std::string OutTypeCode;
1043 // To handle our different cases, we need to check for different postfixes.
1044 // Is our instruction a quad instruction.
1045 bool IsQuad = false;
1046 // Our instruction is of the form <instructionname>_n.
1047 bool HasNPostfix = false;
1048 // Our instruction is of the form <instructionname>_lane.
1049 bool HasLanePostfix = false;
1050 // Our instruction is of the form <instructionname>_dup.
1051 bool HasDupPostfix = false;
1052 // Our instruction is a vcvt instruction which requires special handling.
1053 bool IsSpecialVCvt = false;
1054 // If we have a vtbxN or vtblN instruction, this is set to N.
1055 size_t TBNumber = -1;
1056 // Register Suffix
1057 std::string RegisterSuffix;
1058
1059 PreprocessInstruction(NameRef, InstName, Prefix,
1060 HasNPostfix, HasLanePostfix, HasDupPostfix,
1061 IsSpecialVCvt, TBNumber);
1062
1063 InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode);
1064 GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad,
1065 HasLanePostfix, HasDupPostfix, TBNumber,
1066 RegisterSuffix);
1067
1068 // In the following section, we handle a bunch of special cases. You can tell
1069 // a special case by the fact we are returning early.
1070
1071 // If our instruction is a logical instruction without postfix or a
1072 // hidden LOp just return the current Prefix.
1073 if (Ck == ClassL || IsHiddenLOp) {
1074 Result.push_back(Prefix + " " + RegisterSuffix);
1075 return;
1076 }
1077
1078 // If we have a vmov, due to the many different cases, some of which
1079 // vary within the different intrinsics generated for a single
1080 // instruction type, just output a vmov. (e.g. given an instruction
1081 // A, A.u32 might be vmov and A.u8 might be vmov.8).
1082 //
1083 // FIXME: Maybe something can be done about this. The two cases that we care
1084 // about are vmov as an LType and vmov as a WType.
1085 if (Prefix == "vmov") {
1086 Result.push_back(Prefix + " " + RegisterSuffix);
1087 return;
1088 }
1089
1090 // In the following section, we handle special cases.
1091
1092 if (OutTypeCode == "64") {
1093 // If we have a 64 bit vdup/vext and are handling an uint64x1_t
1094 // type, the intrinsic will be optimized away, so just return
1095 // nothing. On the other hand if we are handling an uint64x2_t
1096 // (i.e. quad instruction), vdup/vmov instructions should be
1097 // emitted.
1098 if (Prefix == "vdup" || Prefix == "vext") {
1099 if (IsQuad) {
1100 Result.push_back("{{vmov|vdup}}");
1101 }
1102 return;
1103 }
1104
1105 // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with
1106 // multiple register operands.
1107 bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3"
1108 || Prefix == "vld4";
1109 bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3"
1110 || Prefix == "vst4";
1111 if (MultiLoadPrefix || MultiStorePrefix) {
1112 Result.push_back(NameRef.slice(0, 3).str() + "1.64");
1113 return;
1114 }
1115
1116 // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of
1117 // emitting said instructions. So return a check for
1118 // vldr/vstr/vmov/str instead.
1119 if (HasLanePostfix || HasDupPostfix) {
1120 if (Prefix == "vst1") {
1121 Result.push_back("{{str|vstr|vmov}}");
1122 return;
1123 } else if (Prefix == "vld1") {
1124 Result.push_back("{{ldr|vldr|vmov}}");
1125 return;
1126 }
1127 }
1128 }
1129
1130 // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are
1131 // sometimes disassembled as vtrn.32. We use a regex to handle both
1132 // cases.
1133 if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") {
1134 Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix);
1135 return;
1136 }
1137
1138 // Currently on most ARM processors, we do not use vmla/vmls for
1139 // quad floating point operations. Instead we output vmul + vadd. So
1140 // check if we have one of those instructions and just output a
1141 // check for vmul.
1142 if (OutTypeCode == "f32") {
1143 if (Prefix == "vmls") {
1144 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1145 Result.push_back("vsub." + OutTypeCode);
1146 return;
1147 } else if (Prefix == "vmla") {
1148 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1149 Result.push_back("vadd." + OutTypeCode);
1150 return;
1151 }
1152 }
1153
1154 // If we have vcvt, get the input type from the instruction name
1155 // (which should be of the form instname_inputtype) and append it
1156 // before the output type.
1157 if (Prefix == "vcvt") {
1158 const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1);
1159 Prefix += "." + inTypeCode;
1160 }
1161
1162 // Append output type code to get our final mangled instruction.
1163 Prefix += "." + OutTypeCode;
1164
1165 Result.push_back(Prefix + " " + RegisterSuffix);
1166 }
1167
1168 /// UseMacro - Examine the prototype string to determine if the intrinsic
1169 /// should be defined as a preprocessor macro instead of an inline function.
UseMacro(const std::string & proto)1170 static bool UseMacro(const std::string &proto) {
1171 // If this builtin takes an immediate argument, we need to #define it rather
1172 // than use a standard declaration, so that SemaChecking can range check
1173 // the immediate passed by the user.
1174 if (proto.find('i') != std::string::npos)
1175 return true;
1176
1177 // Pointer arguments need to use macros to avoid hiding aligned attributes
1178 // from the pointer type.
1179 if (proto.find('p') != std::string::npos ||
1180 proto.find('c') != std::string::npos)
1181 return true;
1182
1183 return false;
1184 }
1185
1186 /// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
1187 /// defined as a macro should be accessed directly instead of being first
1188 /// assigned to a local temporary.
MacroArgUsedDirectly(const std::string & proto,unsigned i)1189 static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
1190 // True for constant ints (i), pointers (p) and const pointers (c).
1191 return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
1192 }
1193
1194 // Generate the string "(argtype a, argtype b, ...)"
GenArgs(const std::string & proto,StringRef typestr)1195 static std::string GenArgs(const std::string &proto, StringRef typestr) {
1196 bool define = UseMacro(proto);
1197 char arg = 'a';
1198
1199 std::string s;
1200 s += "(";
1201
1202 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1203 if (define) {
1204 // Some macro arguments are used directly instead of being assigned
1205 // to local temporaries; prepend an underscore prefix to make their
1206 // names consistent with the local temporaries.
1207 if (MacroArgUsedDirectly(proto, i))
1208 s += "__";
1209 } else {
1210 s += TypeString(proto[i], typestr) + " __";
1211 }
1212 s.push_back(arg);
1213 if ((i + 1) < e)
1214 s += ", ";
1215 }
1216
1217 s += ")";
1218 return s;
1219 }
1220
1221 // Macro arguments are not type-checked like inline function arguments, so
1222 // assign them to local temporaries to get the right type checking.
GenMacroLocals(const std::string & proto,StringRef typestr)1223 static std::string GenMacroLocals(const std::string &proto, StringRef typestr) {
1224 char arg = 'a';
1225 std::string s;
1226 bool generatedLocal = false;
1227
1228 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1229 // Do not create a temporary for an immediate argument.
1230 // That would defeat the whole point of using a macro!
1231 if (MacroArgUsedDirectly(proto, i))
1232 continue;
1233 generatedLocal = true;
1234
1235 s += TypeString(proto[i], typestr) + " __";
1236 s.push_back(arg);
1237 s += " = (";
1238 s.push_back(arg);
1239 s += "); ";
1240 }
1241
1242 if (generatedLocal)
1243 s += "\\\n ";
1244 return s;
1245 }
1246
1247 // Use the vmovl builtin to sign-extend or zero-extend a vector.
Extend(StringRef typestr,const std::string & a)1248 static std::string Extend(StringRef typestr, const std::string &a) {
1249 std::string s;
1250 s = MangleName("vmovl", typestr, ClassS);
1251 s += "(" + a + ")";
1252 return s;
1253 }
1254
Duplicate(unsigned nElts,StringRef typestr,const std::string & a)1255 static std::string Duplicate(unsigned nElts, StringRef typestr,
1256 const std::string &a) {
1257 std::string s;
1258
1259 s = "(" + TypeString('d', typestr) + "){ ";
1260 for (unsigned i = 0; i != nElts; ++i) {
1261 s += a;
1262 if ((i + 1) < nElts)
1263 s += ", ";
1264 }
1265 s += " }";
1266
1267 return s;
1268 }
1269
SplatLane(unsigned nElts,const std::string & vec,const std::string & lane)1270 static std::string SplatLane(unsigned nElts, const std::string &vec,
1271 const std::string &lane) {
1272 std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
1273 for (unsigned i = 0; i < nElts; ++i)
1274 s += ", " + lane;
1275 s += ")";
1276 return s;
1277 }
1278
GetNumElements(StringRef typestr,bool & quad)1279 static unsigned GetNumElements(StringRef typestr, bool &quad) {
1280 quad = false;
1281 bool dummy = false;
1282 char type = ClassifyType(typestr, quad, dummy, dummy);
1283 unsigned nElts = 0;
1284 switch (type) {
1285 case 'c': nElts = 8; break;
1286 case 's': nElts = 4; break;
1287 case 'i': nElts = 2; break;
1288 case 'l': nElts = 1; break;
1289 case 'h': nElts = 4; break;
1290 case 'f': nElts = 2; break;
1291 case 'd':
1292 nElts = 1;
1293 break;
1294 default:
1295 PrintFatalError("unhandled type!");
1296 }
1297 if (quad) nElts <<= 1;
1298 return nElts;
1299 }
1300
1301 // Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
GenOpString(OpKind op,const std::string & proto,StringRef typestr)1302 static std::string GenOpString(OpKind op, const std::string &proto,
1303 StringRef typestr) {
1304 bool quad;
1305 unsigned nElts = GetNumElements(typestr, quad);
1306 bool define = UseMacro(proto);
1307
1308 std::string ts = TypeString(proto[0], typestr);
1309 std::string s;
1310 if (!define) {
1311 s = "return ";
1312 }
1313
1314 switch(op) {
1315 case OpAdd:
1316 s += "__a + __b;";
1317 break;
1318 case OpAddl:
1319 s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
1320 break;
1321 case OpAddw:
1322 s += "__a + " + Extend(typestr, "__b") + ";";
1323 break;
1324 case OpSub:
1325 s += "__a - __b;";
1326 break;
1327 case OpSubl:
1328 s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
1329 break;
1330 case OpSubw:
1331 s += "__a - " + Extend(typestr, "__b") + ";";
1332 break;
1333 case OpMulN:
1334 s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
1335 break;
1336 case OpMulLane:
1337 s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
1338 break;
1339 case OpMul:
1340 s += "__a * __b;";
1341 break;
1342 case OpMullLane:
1343 s += MangleName("vmull", typestr, ClassS) + "(__a, " +
1344 SplatLane(nElts, "__b", "__c") + ");";
1345 break;
1346 case OpMlaN:
1347 s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1348 break;
1349 case OpMlaLane:
1350 s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1351 break;
1352 case OpMla:
1353 s += "__a + (__b * __c);";
1354 break;
1355 case OpMlalN:
1356 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1357 Duplicate(nElts, typestr, "__c") + ");";
1358 break;
1359 case OpMlalLane:
1360 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1361 SplatLane(nElts, "__c", "__d") + ");";
1362 break;
1363 case OpMlal:
1364 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1365 break;
1366 case OpMlsN:
1367 s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1368 break;
1369 case OpMlsLane:
1370 s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1371 break;
1372 case OpMls:
1373 s += "__a - (__b * __c);";
1374 break;
1375 case OpMlslN:
1376 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1377 Duplicate(nElts, typestr, "__c") + ");";
1378 break;
1379 case OpMlslLane:
1380 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1381 SplatLane(nElts, "__c", "__d") + ");";
1382 break;
1383 case OpMlsl:
1384 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1385 break;
1386 case OpQDMullLane:
1387 s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
1388 SplatLane(nElts, "__b", "__c") + ");";
1389 break;
1390 case OpQDMlalLane:
1391 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
1392 SplatLane(nElts, "__c", "__d") + ");";
1393 break;
1394 case OpQDMlslLane:
1395 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
1396 SplatLane(nElts, "__c", "__d") + ");";
1397 break;
1398 case OpQDMulhLane:
1399 s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
1400 SplatLane(nElts, "__b", "__c") + ");";
1401 break;
1402 case OpQRDMulhLane:
1403 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
1404 SplatLane(nElts, "__b", "__c") + ");";
1405 break;
1406 case OpEq:
1407 s += "(" + ts + ")(__a == __b);";
1408 break;
1409 case OpGe:
1410 s += "(" + ts + ")(__a >= __b);";
1411 break;
1412 case OpLe:
1413 s += "(" + ts + ")(__a <= __b);";
1414 break;
1415 case OpGt:
1416 s += "(" + ts + ")(__a > __b);";
1417 break;
1418 case OpLt:
1419 s += "(" + ts + ")(__a < __b);";
1420 break;
1421 case OpNeg:
1422 s += " -__a;";
1423 break;
1424 case OpNot:
1425 s += " ~__a;";
1426 break;
1427 case OpAnd:
1428 s += "__a & __b;";
1429 break;
1430 case OpOr:
1431 s += "__a | __b;";
1432 break;
1433 case OpXor:
1434 s += "__a ^ __b;";
1435 break;
1436 case OpAndNot:
1437 s += "__a & ~__b;";
1438 break;
1439 case OpOrNot:
1440 s += "__a | ~__b;";
1441 break;
1442 case OpCast:
1443 s += "(" + ts + ")__a;";
1444 break;
1445 case OpConcat:
1446 s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
1447 s += ", (int64x1_t)__b, 0, 1);";
1448 break;
1449 case OpHi:
1450 // nElts is for the result vector, so the source is twice that number.
1451 s += "__builtin_shufflevector(__a, __a";
1452 for (unsigned i = nElts; i < nElts * 2; ++i)
1453 s += ", " + utostr(i);
1454 s+= ");";
1455 break;
1456 case OpLo:
1457 s += "__builtin_shufflevector(__a, __a";
1458 for (unsigned i = 0; i < nElts; ++i)
1459 s += ", " + utostr(i);
1460 s+= ");";
1461 break;
1462 case OpDup:
1463 s += Duplicate(nElts, typestr, "__a") + ";";
1464 break;
1465 case OpDupLane:
1466 s += SplatLane(nElts, "__a", "__b") + ";";
1467 break;
1468 case OpSelect:
1469 // ((0 & 1) | (~0 & 2))
1470 s += "(" + ts + ")";
1471 ts = TypeString(proto[1], typestr);
1472 s += "((__a & (" + ts + ")__b) | ";
1473 s += "(~__a & (" + ts + ")__c));";
1474 break;
1475 case OpRev16:
1476 s += "__builtin_shufflevector(__a, __a";
1477 for (unsigned i = 2; i <= nElts; i += 2)
1478 for (unsigned j = 0; j != 2; ++j)
1479 s += ", " + utostr(i - j - 1);
1480 s += ");";
1481 break;
1482 case OpRev32: {
1483 unsigned WordElts = nElts >> (1 + (int)quad);
1484 s += "__builtin_shufflevector(__a, __a";
1485 for (unsigned i = WordElts; i <= nElts; i += WordElts)
1486 for (unsigned j = 0; j != WordElts; ++j)
1487 s += ", " + utostr(i - j - 1);
1488 s += ");";
1489 break;
1490 }
1491 case OpRev64: {
1492 unsigned DblWordElts = nElts >> (int)quad;
1493 s += "__builtin_shufflevector(__a, __a";
1494 for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
1495 for (unsigned j = 0; j != DblWordElts; ++j)
1496 s += ", " + utostr(i - j - 1);
1497 s += ");";
1498 break;
1499 }
1500 case OpAbdl: {
1501 std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
1502 if (typestr[0] != 'U') {
1503 // vabd results are always unsigned and must be zero-extended.
1504 std::string utype = "U" + typestr.str();
1505 s += "(" + TypeString(proto[0], typestr) + ")";
1506 abd = "(" + TypeString('d', utype) + ")" + abd;
1507 s += Extend(utype, abd) + ";";
1508 } else {
1509 s += Extend(typestr, abd) + ";";
1510 }
1511 break;
1512 }
1513 case OpAba:
1514 s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
1515 break;
1516 case OpAbal: {
1517 s += "__a + ";
1518 std::string abd = MangleName("vabd", typestr, ClassS) + "(__b, __c)";
1519 if (typestr[0] != 'U') {
1520 // vabd results are always unsigned and must be zero-extended.
1521 std::string utype = "U" + typestr.str();
1522 s += "(" + TypeString(proto[0], typestr) + ")";
1523 abd = "(" + TypeString('d', utype) + ")" + abd;
1524 s += Extend(utype, abd) + ";";
1525 } else {
1526 s += Extend(typestr, abd) + ";";
1527 }
1528 break;
1529 }
1530 case OpDiv:
1531 s += "__a / __b;";
1532 break;
1533 default:
1534 PrintFatalError("unknown OpKind!");
1535 }
1536 return s;
1537 }
1538
GetNeonEnum(const std::string & proto,StringRef typestr)1539 static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
1540 unsigned mod = proto[0];
1541
1542 if (mod == 'v' || mod == 'f')
1543 mod = proto[1];
1544
1545 bool quad = false;
1546 bool poly = false;
1547 bool usgn = false;
1548 bool scal = false;
1549 bool cnst = false;
1550 bool pntr = false;
1551
1552 // Base type to get the type string for.
1553 char type = ClassifyType(typestr, quad, poly, usgn);
1554
1555 // Based on the modifying character, change the type and width if necessary.
1556 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
1557
1558 NeonTypeFlags::EltType ET;
1559 switch (type) {
1560 case 'c':
1561 ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
1562 break;
1563 case 's':
1564 ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
1565 break;
1566 case 'i':
1567 ET = NeonTypeFlags::Int32;
1568 break;
1569 case 'l':
1570 ET = NeonTypeFlags::Int64;
1571 break;
1572 case 'h':
1573 ET = NeonTypeFlags::Float16;
1574 break;
1575 case 'f':
1576 ET = NeonTypeFlags::Float32;
1577 break;
1578 case 'd':
1579 ET = NeonTypeFlags::Float64;
1580 break;
1581 default:
1582 PrintFatalError("unhandled type!");
1583 }
1584 NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
1585 return Flags.getFlags();
1586 }
1587
1588 // Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
GenBuiltin(const std::string & name,const std::string & proto,StringRef typestr,ClassKind ck)1589 static std::string GenBuiltin(const std::string &name, const std::string &proto,
1590 StringRef typestr, ClassKind ck) {
1591 std::string s;
1592
1593 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
1594 // sret-like argument.
1595 bool sret = (proto[0] >= '2' && proto[0] <= '4');
1596
1597 bool define = UseMacro(proto);
1598
1599 // Check if the prototype has a scalar operand with the type of the vector
1600 // elements. If not, bitcasting the args will take care of arg checking.
1601 // The actual signedness etc. will be taken care of with special enums.
1602 if (proto.find('s') == std::string::npos)
1603 ck = ClassB;
1604
1605 if (proto[0] != 'v') {
1606 std::string ts = TypeString(proto[0], typestr);
1607
1608 if (define) {
1609 if (sret)
1610 s += ts + " r; ";
1611 else
1612 s += "(" + ts + ")";
1613 } else if (sret) {
1614 s += ts + " r; ";
1615 } else {
1616 s += "return (" + ts + ")";
1617 }
1618 }
1619
1620 bool splat = proto.find('a') != std::string::npos;
1621
1622 s += "__builtin_neon_";
1623 if (splat) {
1624 // Call the non-splat builtin: chop off the "_n" suffix from the name.
1625 std::string vname(name, 0, name.size()-2);
1626 s += MangleName(vname, typestr, ck);
1627 } else {
1628 s += MangleName(name, typestr, ck);
1629 }
1630 s += "(";
1631
1632 // Pass the address of the return variable as the first argument to sret-like
1633 // builtins.
1634 if (sret)
1635 s += "&r, ";
1636
1637 char arg = 'a';
1638 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1639 std::string args = std::string(&arg, 1);
1640
1641 // Use the local temporaries instead of the macro arguments.
1642 args = "__" + args;
1643
1644 bool argQuad = false;
1645 bool argPoly = false;
1646 bool argUsgn = false;
1647 bool argScalar = false;
1648 bool dummy = false;
1649 char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
1650 argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
1651 dummy, dummy);
1652
1653 // Handle multiple-vector values specially, emitting each subvector as an
1654 // argument to the __builtin.
1655 if (proto[i] >= '2' && proto[i] <= '4') {
1656 // Check if an explicit cast is needed.
1657 if (argType != 'c' || argPoly || argUsgn)
1658 args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
1659
1660 for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) {
1661 s += args + ".val[" + utostr(vi) + "]";
1662 if ((vi + 1) < ve)
1663 s += ", ";
1664 }
1665 if ((i + 1) < e)
1666 s += ", ";
1667
1668 continue;
1669 }
1670
1671 if (splat && (i + 1) == e)
1672 args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
1673
1674 // Check if an explicit cast is needed.
1675 if ((splat || !argScalar) &&
1676 ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
1677 std::string argTypeStr = "c";
1678 if (ck != ClassB)
1679 argTypeStr = argType;
1680 if (argQuad)
1681 argTypeStr = "Q" + argTypeStr;
1682 args = "(" + TypeString('d', argTypeStr) + ")" + args;
1683 }
1684
1685 s += args;
1686 if ((i + 1) < e)
1687 s += ", ";
1688 }
1689
1690 // Extra constant integer to hold type class enum for this function, e.g. s8
1691 if (ck == ClassB)
1692 s += ", " + utostr(GetNeonEnum(proto, typestr));
1693
1694 s += ");";
1695
1696 if (proto[0] != 'v' && sret) {
1697 if (define)
1698 s += " r;";
1699 else
1700 s += " return r;";
1701 }
1702 return s;
1703 }
1704
GenBuiltinDef(const std::string & name,const std::string & proto,StringRef typestr,ClassKind ck)1705 static std::string GenBuiltinDef(const std::string &name,
1706 const std::string &proto,
1707 StringRef typestr, ClassKind ck) {
1708 std::string s("BUILTIN(__builtin_neon_");
1709
1710 // If all types are the same size, bitcasting the args will take care
1711 // of arg checking. The actual signedness etc. will be taken care of with
1712 // special enums.
1713 if (proto.find('s') == std::string::npos)
1714 ck = ClassB;
1715
1716 s += MangleName(name, typestr, ck);
1717 s += ", \"";
1718
1719 for (unsigned i = 0, e = proto.size(); i != e; ++i)
1720 s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
1721
1722 // Extra constant integer to hold type class enum for this function, e.g. s8
1723 if (ck == ClassB)
1724 s += "i";
1725
1726 s += "\", \"n\")";
1727 return s;
1728 }
1729
GenIntrinsic(const std::string & name,const std::string & proto,StringRef outTypeStr,StringRef inTypeStr,OpKind kind,ClassKind classKind)1730 static std::string GenIntrinsic(const std::string &name,
1731 const std::string &proto,
1732 StringRef outTypeStr, StringRef inTypeStr,
1733 OpKind kind, ClassKind classKind) {
1734 assert(!proto.empty() && "");
1735 bool define = UseMacro(proto) && kind != OpUnavailable;
1736 std::string s;
1737
1738 // static always inline + return type
1739 if (define)
1740 s += "#define ";
1741 else
1742 s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
1743
1744 // Function name with type suffix
1745 std::string mangledName = MangleName(name, outTypeStr, ClassS);
1746 if (outTypeStr != inTypeStr) {
1747 // If the input type is different (e.g., for vreinterpret), append a suffix
1748 // for the input type. String off a "Q" (quad) prefix so that MangleName
1749 // does not insert another "q" in the name.
1750 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
1751 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
1752 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
1753 }
1754 s += mangledName;
1755
1756 // Function arguments
1757 s += GenArgs(proto, inTypeStr);
1758
1759 // Definition.
1760 if (define) {
1761 s += " __extension__ ({ \\\n ";
1762 s += GenMacroLocals(proto, inTypeStr);
1763 } else if (kind == OpUnavailable) {
1764 s += " __attribute__((unavailable));\n";
1765 return s;
1766 } else
1767 s += " {\n ";
1768
1769 if (kind != OpNone)
1770 s += GenOpString(kind, proto, outTypeStr);
1771 else
1772 s += GenBuiltin(name, proto, outTypeStr, classKind);
1773 if (define)
1774 s += " })";
1775 else
1776 s += " }";
1777 s += "\n";
1778 return s;
1779 }
1780
1781 /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h
1782 /// is comprised of type definitions and function declarations.
run(raw_ostream & OS)1783 void NeonEmitter::run(raw_ostream &OS) {
1784 OS <<
1785 "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
1786 "---===\n"
1787 " *\n"
1788 " * Permission is hereby granted, free of charge, to any person obtaining "
1789 "a copy\n"
1790 " * of this software and associated documentation files (the \"Software\"),"
1791 " to deal\n"
1792 " * in the Software without restriction, including without limitation the "
1793 "rights\n"
1794 " * to use, copy, modify, merge, publish, distribute, sublicense, "
1795 "and/or sell\n"
1796 " * copies of the Software, and to permit persons to whom the Software is\n"
1797 " * furnished to do so, subject to the following conditions:\n"
1798 " *\n"
1799 " * The above copyright notice and this permission notice shall be "
1800 "included in\n"
1801 " * all copies or substantial portions of the Software.\n"
1802 " *\n"
1803 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
1804 "EXPRESS OR\n"
1805 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
1806 "MERCHANTABILITY,\n"
1807 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
1808 "SHALL THE\n"
1809 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
1810 "OTHER\n"
1811 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
1812 "ARISING FROM,\n"
1813 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
1814 "DEALINGS IN\n"
1815 " * THE SOFTWARE.\n"
1816 " *\n"
1817 " *===--------------------------------------------------------------------"
1818 "---===\n"
1819 " */\n\n";
1820
1821 OS << "#ifndef __ARM_NEON_H\n";
1822 OS << "#define __ARM_NEON_H\n\n";
1823
1824 OS << "#if !defined(__ARM_NEON__) && !defined(__AARCH_FEATURE_ADVSIMD)\n";
1825 OS << "#error \"NEON support not enabled\"\n";
1826 OS << "#endif\n\n";
1827
1828 OS << "#include <stdint.h>\n\n";
1829
1830 // Emit NEON-specific scalar typedefs.
1831 OS << "typedef float float32_t;\n";
1832 OS << "typedef __fp16 float16_t;\n";
1833
1834 OS << "#ifdef __aarch64__\n";
1835 OS << "typedef double float64_t;\n";
1836 OS << "#endif\n\n";
1837
1838 // For now, signedness of polynomial types depends on target
1839 OS << "#ifdef __aarch64__\n";
1840 OS << "typedef uint8_t poly8_t;\n";
1841 OS << "typedef uint16_t poly16_t;\n";
1842 OS << "#else\n";
1843 OS << "typedef int8_t poly8_t;\n";
1844 OS << "typedef int16_t poly16_t;\n";
1845 OS << "#endif\n";
1846
1847 // Emit Neon vector typedefs.
1848 std::string TypedefTypes(
1849 "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfQdPcQPcPsQPs");
1850 SmallVector<StringRef, 24> TDTypeVec;
1851 ParseTypes(0, TypedefTypes, TDTypeVec);
1852
1853 // Emit vector typedefs.
1854 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
1855 bool dummy, quad = false, poly = false;
1856 char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
1857 bool isA64 = false;
1858
1859 if (type == 'd' && quad)
1860 isA64 = true;
1861
1862 if (isA64)
1863 OS << "#ifdef __aarch64__\n";
1864
1865 if (poly)
1866 OS << "typedef __attribute__((neon_polyvector_type(";
1867 else
1868 OS << "typedef __attribute__((neon_vector_type(";
1869
1870 unsigned nElts = GetNumElements(TDTypeVec[i], quad);
1871 OS << utostr(nElts) << "))) ";
1872 if (nElts < 10)
1873 OS << " ";
1874
1875 OS << TypeString('s', TDTypeVec[i]);
1876 OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
1877
1878 if (isA64)
1879 OS << "#endif\n";
1880 }
1881 OS << "\n";
1882
1883 // Emit struct typedefs.
1884 for (unsigned vi = 2; vi != 5; ++vi) {
1885 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
1886 bool dummy, quad = false, poly = false;
1887 char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
1888 bool isA64 = false;
1889
1890 if (type == 'd' && quad)
1891 isA64 = true;
1892
1893 if (isA64)
1894 OS << "#ifdef __aarch64__\n";
1895
1896 std::string ts = TypeString('d', TDTypeVec[i]);
1897 std::string vs = TypeString('0' + vi, TDTypeVec[i]);
1898 OS << "typedef struct " << vs << " {\n";
1899 OS << " " << ts << " val";
1900 OS << "[" << utostr(vi) << "]";
1901 OS << ";\n} ";
1902 OS << vs << ";\n";
1903
1904 if (isA64)
1905 OS << "#endif\n";
1906
1907 OS << "\n";
1908 }
1909 }
1910
1911 OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
1912
1913 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
1914
1915 StringMap<ClassKind> EmittedMap;
1916
1917 // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
1918 // intrinsics. (Some of the saturating multiply instructions are also
1919 // used to implement the corresponding "_lane" variants, but tablegen
1920 // sorts the records into alphabetical order so that the "_lane" variants
1921 // come after the intrinsics they use.)
1922 emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap);
1923 emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap);
1924 emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap);
1925
1926 // ARM intrinsics must be emitted before AArch64 intrinsics to ensure
1927 // common intrinsics appear only once in the output stream.
1928 // The check for uniquiness is done in emitIntrinsic.
1929 // Emit ARM intrinsics.
1930 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1931 Record *R = RV[i];
1932
1933 // Skip AArch64 intrinsics; they will be emitted at the end.
1934 bool isA64 = R->getValueAsBit("isA64");
1935 if (isA64)
1936 continue;
1937
1938 if (R->getName() != "VMOVL" && R->getName() != "VMULL" &&
1939 R->getName() != "VABD")
1940 emitIntrinsic(OS, R, EmittedMap);
1941 }
1942
1943 // Emit AArch64-specific intrinsics.
1944 OS << "#ifdef __aarch64__\n";
1945
1946 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1947 Record *R = RV[i];
1948
1949 // Skip ARM intrinsics already included above.
1950 bool isA64 = R->getValueAsBit("isA64");
1951 if (!isA64)
1952 continue;
1953
1954 emitIntrinsic(OS, R, EmittedMap);
1955 }
1956
1957 OS << "#endif\n\n";
1958
1959 OS << "#undef __ai\n\n";
1960 OS << "#endif /* __ARM_NEON_H */\n";
1961 }
1962
1963 /// emitIntrinsic - Write out the arm_neon.h header file definitions for the
1964 /// intrinsics specified by record R checking for intrinsic uniqueness.
emitIntrinsic(raw_ostream & OS,Record * R,StringMap<ClassKind> & EmittedMap)1965 void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R,
1966 StringMap<ClassKind> &EmittedMap) {
1967 std::string name = R->getValueAsString("Name");
1968 std::string Proto = R->getValueAsString("Prototype");
1969 std::string Types = R->getValueAsString("Types");
1970
1971 SmallVector<StringRef, 16> TypeVec;
1972 ParseTypes(R, Types, TypeVec);
1973
1974 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
1975
1976 ClassKind classKind = ClassNone;
1977 if (R->getSuperClasses().size() >= 2)
1978 classKind = ClassMap[R->getSuperClasses()[1]];
1979 if (classKind == ClassNone && kind == OpNone)
1980 PrintFatalError(R->getLoc(), "Builtin has no class kind");
1981
1982 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
1983 if (kind == OpReinterpret) {
1984 bool outQuad = false;
1985 bool dummy = false;
1986 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
1987 for (unsigned srcti = 0, srcte = TypeVec.size();
1988 srcti != srcte; ++srcti) {
1989 bool inQuad = false;
1990 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
1991 if (srcti == ti || inQuad != outQuad)
1992 continue;
1993 std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
1994 OpCast, ClassS);
1995 if (EmittedMap.count(s))
1996 continue;
1997 EmittedMap[s] = ClassS;
1998 OS << s;
1999 }
2000 } else {
2001 std::string s =
2002 GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
2003 if (EmittedMap.count(s))
2004 continue;
2005 EmittedMap[s] = classKind;
2006 OS << s;
2007 }
2008 }
2009 OS << "\n";
2010 }
2011
RangeFromType(const char mod,StringRef typestr)2012 static unsigned RangeFromType(const char mod, StringRef typestr) {
2013 // base type to get the type string for.
2014 bool quad = false, dummy = false;
2015 char type = ClassifyType(typestr, quad, dummy, dummy);
2016 type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
2017
2018 switch (type) {
2019 case 'c':
2020 return (8 << (int)quad) - 1;
2021 case 'h':
2022 case 's':
2023 return (4 << (int)quad) - 1;
2024 case 'f':
2025 case 'i':
2026 return (2 << (int)quad) - 1;
2027 case 'l':
2028 return (1 << (int)quad) - 1;
2029 default:
2030 PrintFatalError("unhandled type!");
2031 }
2032 }
2033
2034 /// Generate the ARM and AArch64 intrinsic range checking code for
2035 /// shift/lane immediates, checking for unique declarations.
2036 void
genIntrinsicRangeCheckCode(raw_ostream & OS,StringMap<ClassKind> & A64IntrinsicMap,bool isA64RangeCheck)2037 NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
2038 StringMap<ClassKind> &A64IntrinsicMap,
2039 bool isA64RangeCheck) {
2040 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2041 StringMap<OpKind> EmittedMap;
2042
2043 // Generate the intrinsic range checking code for shift/lane immediates.
2044 if (isA64RangeCheck)
2045 OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n";
2046 else
2047 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2048
2049 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2050 Record *R = RV[i];
2051
2052 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2053 if (k != OpNone)
2054 continue;
2055
2056 std::string name = R->getValueAsString("Name");
2057 std::string Proto = R->getValueAsString("Prototype");
2058 std::string Types = R->getValueAsString("Types");
2059
2060 // Functions with 'a' (the splat code) in the type prototype should not get
2061 // their own builtin as they use the non-splat variant.
2062 if (Proto.find('a') != std::string::npos)
2063 continue;
2064
2065 // Functions which do not have an immediate do not need to have range
2066 // checking code emitted.
2067 size_t immPos = Proto.find('i');
2068 if (immPos == std::string::npos)
2069 continue;
2070
2071 SmallVector<StringRef, 16> TypeVec;
2072 ParseTypes(R, Types, TypeVec);
2073
2074 if (R->getSuperClasses().size() < 2)
2075 PrintFatalError(R->getLoc(), "Builtin has no class kind");
2076
2077 ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2078
2079 // Do not include AArch64 range checks if not generating code for AArch64.
2080 bool isA64 = R->getValueAsBit("isA64");
2081 if (!isA64RangeCheck && isA64)
2082 continue;
2083
2084 // Include ARM range checks in AArch64 but only if ARM intrinsics are not
2085 // redefined by AArch64 to handle new types.
2086 if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(name)) {
2087 ClassKind &A64CK = A64IntrinsicMap[name];
2088 if (A64CK == ck && ck != ClassNone)
2089 continue;
2090 }
2091
2092 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2093 std::string namestr, shiftstr, rangestr;
2094
2095 if (R->getValueAsBit("isVCVT_N")) {
2096 // VCVT between floating- and fixed-point values takes an immediate
2097 // in the range 1 to 32.
2098 ck = ClassB;
2099 rangestr = "l = 1; u = 31"; // upper bound = l + u
2100 } else if (Proto.find('s') == std::string::npos) {
2101 // Builtins which are overloaded by type will need to have their upper
2102 // bound computed at Sema time based on the type constant.
2103 ck = ClassB;
2104 if (R->getValueAsBit("isShift")) {
2105 shiftstr = ", true";
2106
2107 // Right shifts have an 'r' in the name, left shifts do not.
2108 if (name.find('r') != std::string::npos)
2109 rangestr = "l = 1; ";
2110 }
2111 rangestr += "u = RFT(TV" + shiftstr + ")";
2112 } else {
2113 // The immediate generally refers to a lane in the preceding argument.
2114 assert(immPos > 0 && "unexpected immediate operand");
2115 rangestr =
2116 "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti]));
2117 }
2118 // Make sure cases appear only once by uniquing them in a string map.
2119 namestr = MangleName(name, TypeVec[ti], ck);
2120 if (EmittedMap.count(namestr))
2121 continue;
2122 EmittedMap[namestr] = OpNone;
2123
2124 // Calculate the index of the immediate that should be range checked.
2125 unsigned immidx = 0;
2126
2127 // Builtins that return a struct of multiple vectors have an extra
2128 // leading arg for the struct return.
2129 if (Proto[0] >= '2' && Proto[0] <= '4')
2130 ++immidx;
2131
2132 // Add one to the index for each argument until we reach the immediate
2133 // to be checked. Structs of vectors are passed as multiple arguments.
2134 for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
2135 switch (Proto[ii]) {
2136 default:
2137 immidx += 1;
2138 break;
2139 case '2':
2140 immidx += 2;
2141 break;
2142 case '3':
2143 immidx += 3;
2144 break;
2145 case '4':
2146 immidx += 4;
2147 break;
2148 case 'i':
2149 ie = ii + 1;
2150 break;
2151 }
2152 }
2153 if (isA64RangeCheck)
2154 OS << "case AArch64::BI__builtin_neon_";
2155 else
2156 OS << "case ARM::BI__builtin_neon_";
2157 OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; "
2158 << rangestr << "; break;\n";
2159 }
2160 }
2161 OS << "#endif\n\n";
2162 }
2163
2164 /// Generate the ARM and AArch64 overloaded type checking code for
2165 /// SemaChecking.cpp, checking for unique builtin declarations.
2166 void
genOverloadTypeCheckCode(raw_ostream & OS,StringMap<ClassKind> & A64IntrinsicMap,bool isA64TypeCheck)2167 NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
2168 StringMap<ClassKind> &A64IntrinsicMap,
2169 bool isA64TypeCheck) {
2170 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2171 StringMap<OpKind> EmittedMap;
2172
2173 // Generate the overloaded type checking code for SemaChecking.cpp
2174 if (isA64TypeCheck)
2175 OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n";
2176 else
2177 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
2178
2179 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2180 Record *R = RV[i];
2181 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2182 if (k != OpNone)
2183 continue;
2184
2185 std::string Proto = R->getValueAsString("Prototype");
2186 std::string Types = R->getValueAsString("Types");
2187 std::string name = R->getValueAsString("Name");
2188
2189 // Functions with 'a' (the splat code) in the type prototype should not get
2190 // their own builtin as they use the non-splat variant.
2191 if (Proto.find('a') != std::string::npos)
2192 continue;
2193
2194 // Functions which have a scalar argument cannot be overloaded, no need to
2195 // check them if we are emitting the type checking code.
2196 if (Proto.find('s') != std::string::npos)
2197 continue;
2198
2199 SmallVector<StringRef, 16> TypeVec;
2200 ParseTypes(R, Types, TypeVec);
2201
2202 if (R->getSuperClasses().size() < 2)
2203 PrintFatalError(R->getLoc(), "Builtin has no class kind");
2204
2205 // Do not include AArch64 type checks if not generating code for AArch64.
2206 bool isA64 = R->getValueAsBit("isA64");
2207 if (!isA64TypeCheck && isA64)
2208 continue;
2209
2210 // Include ARM type check in AArch64 but only if ARM intrinsics
2211 // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
2212 // redefined in AArch64 to handle an additional 2 x f64 type.
2213 ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2214 if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(name)) {
2215 ClassKind &A64CK = A64IntrinsicMap[name];
2216 if (A64CK == ck && ck != ClassNone)
2217 continue;
2218 }
2219
2220 int si = -1, qi = -1;
2221 uint64_t mask = 0, qmask = 0;
2222 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2223 // Generate the switch case(s) for this builtin for the type validation.
2224 bool quad = false, poly = false, usgn = false;
2225 (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
2226
2227 if (quad) {
2228 qi = ti;
2229 qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2230 } else {
2231 si = ti;
2232 mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2233 }
2234 }
2235
2236 // Check if the builtin function has a pointer or const pointer argument.
2237 int PtrArgNum = -1;
2238 bool HasConstPtr = false;
2239 for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
2240 char ArgType = Proto[arg];
2241 if (ArgType == 'c') {
2242 HasConstPtr = true;
2243 PtrArgNum = arg - 1;
2244 break;
2245 }
2246 if (ArgType == 'p') {
2247 PtrArgNum = arg - 1;
2248 break;
2249 }
2250 }
2251 // For sret builtins, adjust the pointer argument index.
2252 if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4'))
2253 PtrArgNum += 1;
2254
2255 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
2256 // and vst1_lane intrinsics. Using a pointer to the vector element
2257 // type with one of those operations causes codegen to select an aligned
2258 // load/store instruction. If you want an unaligned operation,
2259 // the pointer argument needs to have less alignment than element type,
2260 // so just accept any pointer type.
2261 if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
2262 PtrArgNum = -1;
2263 HasConstPtr = false;
2264 }
2265
2266 if (mask) {
2267 if (isA64TypeCheck)
2268 OS << "case AArch64::BI__builtin_neon_";
2269 else
2270 OS << "case ARM::BI__builtin_neon_";
2271 OS << MangleName(name, TypeVec[si], ClassB) << ": mask = "
2272 << "0x" << utohexstr(mask) << "ULL";
2273 if (PtrArgNum >= 0)
2274 OS << "; PtrArgNum = " << PtrArgNum;
2275 if (HasConstPtr)
2276 OS << "; HasConstPtr = true";
2277 OS << "; break;\n";
2278 }
2279 if (qmask) {
2280 if (isA64TypeCheck)
2281 OS << "case AArch64::BI__builtin_neon_";
2282 else
2283 OS << "case ARM::BI__builtin_neon_";
2284 OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = "
2285 << "0x" << utohexstr(qmask) << "ULL";
2286 if (PtrArgNum >= 0)
2287 OS << "; PtrArgNum = " << PtrArgNum;
2288 if (HasConstPtr)
2289 OS << "; HasConstPtr = true";
2290 OS << "; break;\n";
2291 }
2292 }
2293 OS << "#endif\n\n";
2294 }
2295
2296 /// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def
2297 /// declaration of builtins, checking for unique builtin declarations.
genBuiltinsDef(raw_ostream & OS,StringMap<ClassKind> & A64IntrinsicMap,bool isA64GenBuiltinDef)2298 void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
2299 StringMap<ClassKind> &A64IntrinsicMap,
2300 bool isA64GenBuiltinDef) {
2301 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2302 StringMap<OpKind> EmittedMap;
2303
2304 // Generate BuiltinsARM.def and BuiltinsAArch64.def
2305 if (isA64GenBuiltinDef)
2306 OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n";
2307 else
2308 OS << "#ifdef GET_NEON_BUILTINS\n";
2309
2310 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2311 Record *R = RV[i];
2312 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2313 if (k != OpNone)
2314 continue;
2315
2316 std::string Proto = R->getValueAsString("Prototype");
2317 std::string name = R->getValueAsString("Name");
2318
2319 // Functions with 'a' (the splat code) in the type prototype should not get
2320 // their own builtin as they use the non-splat variant.
2321 if (Proto.find('a') != std::string::npos)
2322 continue;
2323
2324 std::string Types = R->getValueAsString("Types");
2325 SmallVector<StringRef, 16> TypeVec;
2326 ParseTypes(R, Types, TypeVec);
2327
2328 if (R->getSuperClasses().size() < 2)
2329 PrintFatalError(R->getLoc(), "Builtin has no class kind");
2330
2331 ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2332
2333 // Do not include AArch64 BUILTIN() macros if not generating
2334 // code for AArch64
2335 bool isA64 = R->getValueAsBit("isA64");
2336 if (!isA64GenBuiltinDef && isA64)
2337 continue;
2338
2339 // Include ARM BUILTIN() macros in AArch64 but only if ARM intrinsics
2340 // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
2341 // redefined in AArch64 to handle an additional 2 x f64 type.
2342 if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(name)) {
2343 ClassKind &A64CK = A64IntrinsicMap[name];
2344 if (A64CK == ck && ck != ClassNone)
2345 continue;
2346 }
2347
2348 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2349 // Generate the declaration for this builtin, ensuring
2350 // that each unique BUILTIN() macro appears only once in the output
2351 // stream.
2352 std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
2353 if (EmittedMap.count(bd))
2354 continue;
2355
2356 EmittedMap[bd] = OpNone;
2357 OS << bd << "\n";
2358 }
2359 }
2360 OS << "#endif\n\n";
2361 }
2362
2363 /// runHeader - Emit a file with sections defining:
2364 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
2365 /// 2. the SemaChecking code for the type overload checking.
2366 /// 3. the SemaChecking code for validation of intrinsic immediate arguments.
runHeader(raw_ostream & OS)2367 void NeonEmitter::runHeader(raw_ostream &OS) {
2368 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2369
2370 // build a map of AArch64 intriniscs to be used in uniqueness checks.
2371 StringMap<ClassKind> A64IntrinsicMap;
2372 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2373 Record *R = RV[i];
2374
2375 bool isA64 = R->getValueAsBit("isA64");
2376 if (!isA64)
2377 continue;
2378
2379 ClassKind CK = ClassNone;
2380 if (R->getSuperClasses().size() >= 2)
2381 CK = ClassMap[R->getSuperClasses()[1]];
2382
2383 std::string Name = R->getValueAsString("Name");
2384 if (A64IntrinsicMap.count(Name))
2385 continue;
2386 A64IntrinsicMap[Name] = CK;
2387 }
2388
2389 // Generate BuiltinsARM.def for ARM
2390 genBuiltinsDef(OS, A64IntrinsicMap, false);
2391
2392 // Generate BuiltinsAArch64.def for AArch64
2393 genBuiltinsDef(OS, A64IntrinsicMap, true);
2394
2395 // Generate ARM overloaded type checking code for SemaChecking.cpp
2396 genOverloadTypeCheckCode(OS, A64IntrinsicMap, false);
2397
2398 // Generate AArch64 overloaded type checking code for SemaChecking.cpp
2399 genOverloadTypeCheckCode(OS, A64IntrinsicMap, true);
2400
2401 // Generate ARM range checking code for shift/lane immediates.
2402 genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false);
2403
2404 // Generate the AArch64 range checking code for shift/lane immediates.
2405 genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true);
2406 }
2407
2408 /// GenTest - Write out a test for the intrinsic specified by the name and
2409 /// type strings, including the embedded patterns for FileCheck to match.
GenTest(const std::string & name,const std::string & proto,StringRef outTypeStr,StringRef inTypeStr,bool isShift,bool isHiddenLOp,ClassKind ck,const std::string & InstName,bool isA64,std::string & testFuncProto)2410 static std::string GenTest(const std::string &name,
2411 const std::string &proto,
2412 StringRef outTypeStr, StringRef inTypeStr,
2413 bool isShift, bool isHiddenLOp,
2414 ClassKind ck, const std::string &InstName,
2415 bool isA64,
2416 std::string & testFuncProto) {
2417 assert(!proto.empty() && "");
2418 std::string s;
2419
2420 // Function name with type suffix
2421 std::string mangledName = MangleName(name, outTypeStr, ClassS);
2422 if (outTypeStr != inTypeStr) {
2423 // If the input type is different (e.g., for vreinterpret), append a suffix
2424 // for the input type. String off a "Q" (quad) prefix so that MangleName
2425 // does not insert another "q" in the name.
2426 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
2427 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
2428 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
2429 }
2430
2431 // todo: GenerateChecksForIntrinsic does not generate CHECK
2432 // for aarch64 instructions yet
2433 std::vector<std::string> FileCheckPatterns;
2434 if (!isA64) {
2435 GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
2436 isHiddenLOp, FileCheckPatterns);
2437 s+= "// CHECK_ARM: test_" + mangledName + "\n";
2438 }
2439 s += "// CHECK_AARCH64: test_" + mangledName + "\n";
2440
2441 // Emit the FileCheck patterns.
2442 // If for any reason we do not want to emit a check, mangledInst
2443 // will be the empty string.
2444 if (FileCheckPatterns.size()) {
2445 for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(),
2446 e = FileCheckPatterns.end();
2447 i != e;
2448 ++i) {
2449 s += "// CHECK_ARM: " + *i + "\n";
2450 }
2451 }
2452
2453 // Emit the start of the test function.
2454
2455 testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
2456 char arg = 'a';
2457 std::string comma;
2458 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2459 // Do not create arguments for values that must be immediate constants.
2460 if (proto[i] == 'i')
2461 continue;
2462 testFuncProto += comma + TypeString(proto[i], inTypeStr) + " ";
2463 testFuncProto.push_back(arg);
2464 comma = ", ";
2465 }
2466 testFuncProto += ")";
2467
2468 s+= testFuncProto;
2469 s+= " {\n ";
2470
2471 if (proto[0] != 'v')
2472 s += "return ";
2473 s += mangledName + "(";
2474 arg = 'a';
2475 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2476 if (proto[i] == 'i') {
2477 // For immediate operands, test the maximum value.
2478 if (isShift)
2479 s += "1"; // FIXME
2480 else
2481 // The immediate generally refers to a lane in the preceding argument.
2482 s += utostr(RangeFromType(proto[i-1], inTypeStr));
2483 } else {
2484 s.push_back(arg);
2485 }
2486 if ((i + 1) < e)
2487 s += ", ";
2488 }
2489 s += ");\n}\n\n";
2490 return s;
2491 }
2492
2493 /// Write out all intrinsic tests for the specified target, checking
2494 /// for intrinsic test uniqueness.
genTargetTest(raw_ostream & OS,StringMap<OpKind> & EmittedMap,bool isA64GenTest)2495 void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
2496 bool isA64GenTest) {
2497 if (isA64GenTest)
2498 OS << "#ifdef __aarch64__\n";
2499
2500 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2501 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2502 Record *R = RV[i];
2503 std::string name = R->getValueAsString("Name");
2504 std::string Proto = R->getValueAsString("Prototype");
2505 std::string Types = R->getValueAsString("Types");
2506 bool isShift = R->getValueAsBit("isShift");
2507 std::string InstName = R->getValueAsString("InstName");
2508 bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
2509 bool isA64 = R->getValueAsBit("isA64");
2510
2511 // do not include AArch64 intrinsic test if not generating
2512 // code for AArch64
2513 if (!isA64GenTest && isA64)
2514 continue;
2515
2516 SmallVector<StringRef, 16> TypeVec;
2517 ParseTypes(R, Types, TypeVec);
2518
2519 ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2520 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
2521 if (kind == OpUnavailable)
2522 continue;
2523 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2524 if (kind == OpReinterpret) {
2525 bool outQuad = false;
2526 bool dummy = false;
2527 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
2528 for (unsigned srcti = 0, srcte = TypeVec.size();
2529 srcti != srcte; ++srcti) {
2530 bool inQuad = false;
2531 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
2532 if (srcti == ti || inQuad != outQuad)
2533 continue;
2534 std::string testFuncProto;
2535 std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
2536 isShift, isHiddenLOp, ck, InstName, isA64,
2537 testFuncProto);
2538 if (EmittedMap.count(testFuncProto))
2539 continue;
2540 EmittedMap[testFuncProto] = kind;
2541 OS << s << "\n";
2542 }
2543 } else {
2544 std::string testFuncProto;
2545 std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift,
2546 isHiddenLOp, ck, InstName, isA64, testFuncProto);
2547 if (EmittedMap.count(testFuncProto))
2548 continue;
2549 EmittedMap[testFuncProto] = kind;
2550 OS << s << "\n";
2551 }
2552 }
2553 }
2554
2555 if (isA64GenTest)
2556 OS << "#endif\n";
2557 }
2558 /// runTests - Write out a complete set of tests for all of the Neon
2559 /// intrinsics.
runTests(raw_ostream & OS)2560 void NeonEmitter::runTests(raw_ostream &OS) {
2561 OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi "
2562 "apcs-gnu\\\n"
2563 "// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
2564 "// RUN: | FileCheck %s -check-prefix=CHECK_ARM\n"
2565 "\n"
2566 "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
2567 "// RUN -target-feature +neon -ffreestanding -S -o - %s \\\n"
2568 "// RUN: | FileCheck %s -check-prefix=CHECK_AARCH64\n"
2569 "\n"
2570 "// REQUIRES: long_tests\n"
2571 "\n"
2572 "#include <arm_neon.h>\n"
2573 "\n";
2574
2575 // ARM tests must be emitted before AArch64 tests to ensure
2576 // tests for intrinsics that are common to ARM and AArch64
2577 // appear only once in the output stream.
2578 // The check for uniqueness is done in genTargetTest.
2579 StringMap<OpKind> EmittedMap;
2580
2581 genTargetTest(OS, EmittedMap, false);
2582
2583 genTargetTest(OS, EmittedMap, true);
2584 }
2585
2586 namespace clang {
EmitNeon(RecordKeeper & Records,raw_ostream & OS)2587 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
2588 NeonEmitter(Records).run(OS);
2589 }
EmitNeonSema(RecordKeeper & Records,raw_ostream & OS)2590 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
2591 NeonEmitter(Records).runHeader(OS);
2592 }
EmitNeonTest(RecordKeeper & Records,raw_ostream & OS)2593 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
2594 NeonEmitter(Records).runTests(OS);
2595 }
2596 } // End namespace clang
2597