• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This tablegen backend is responsible for emitting arm_neon.h, which includes
11 // a declaration and definition of each function specified by the ARM NEON
12 // compiler interface.  See ARM document DUI0348B.
13 //
14 // Each NEON instruction is implemented in terms of 1 or more functions which
15 // are suffixed with the element type of the input vectors.  Functions may be
16 // implemented in terms of generic vector operations such as +, *, -, etc. or
17 // by calling a __builtin_-prefixed function which will be handled by clang's
18 // CodeGen library.
19 //
20 // Additional validation code can be generated by this file when runHeader() is
21 // called, rather than the normal run() entry point.  A complete set of tests
22 // for Neon intrinsics can be generated by calling the runTests() entry point.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/SmallString.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/StringExtras.h"
30 #include "llvm/ADT/StringMap.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/TableGen/Error.h"
33 #include "llvm/TableGen/Record.h"
34 #include "llvm/TableGen/TableGenBackend.h"
35 #include <string>
36 using namespace llvm;
37 
38 enum OpKind {
39   OpNone,
40   OpUnavailable,
41   OpAdd,
42   OpAddl,
43   OpAddw,
44   OpSub,
45   OpSubl,
46   OpSubw,
47   OpMul,
48   OpMla,
49   OpMlal,
50   OpMls,
51   OpMlsl,
52   OpMulN,
53   OpMlaN,
54   OpMlsN,
55   OpMlalN,
56   OpMlslN,
57   OpMulLane,
58   OpMullLane,
59   OpMlaLane,
60   OpMlsLane,
61   OpMlalLane,
62   OpMlslLane,
63   OpQDMullLane,
64   OpQDMlalLane,
65   OpQDMlslLane,
66   OpQDMulhLane,
67   OpQRDMulhLane,
68   OpEq,
69   OpGe,
70   OpLe,
71   OpGt,
72   OpLt,
73   OpNeg,
74   OpNot,
75   OpAnd,
76   OpOr,
77   OpXor,
78   OpAndNot,
79   OpOrNot,
80   OpCast,
81   OpConcat,
82   OpDup,
83   OpDupLane,
84   OpHi,
85   OpLo,
86   OpSelect,
87   OpRev16,
88   OpRev32,
89   OpRev64,
90   OpReinterpret,
91   OpAbdl,
92   OpAba,
93   OpAbal,
94   OpDiv
95 };
96 
97 enum ClassKind {
98   ClassNone,
99   ClassI,           // generic integer instruction, e.g., "i8" suffix
100   ClassS,           // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
101   ClassW,           // width-specific instruction, e.g., "8" suffix
102   ClassB,           // bitcast arguments with enum argument to specify type
103   ClassL,           // Logical instructions which are op instructions
104                     // but we need to not emit any suffix for in our
105                     // tests.
106   ClassNoTest       // Instructions which we do not test since they are
107                     // not TRUE instructions.
108 };
109 
110 /// NeonTypeFlags - Flags to identify the types for overloaded Neon
111 /// builtins.  These must be kept in sync with the flags in
112 /// include/clang/Basic/TargetBuiltins.h.
113 namespace {
114 class NeonTypeFlags {
115   enum {
116     EltTypeMask = 0xf,
117     UnsignedFlag = 0x10,
118     QuadFlag = 0x20
119   };
120   uint32_t Flags;
121 
122 public:
123   enum EltType {
124     Int8,
125     Int16,
126     Int32,
127     Int64,
128     Poly8,
129     Poly16,
130     Float16,
131     Float32,
132     Float64
133   };
134 
NeonTypeFlags(unsigned F)135   NeonTypeFlags(unsigned F) : Flags(F) {}
NeonTypeFlags(EltType ET,bool IsUnsigned,bool IsQuad)136   NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
137     if (IsUnsigned)
138       Flags |= UnsignedFlag;
139     if (IsQuad)
140       Flags |= QuadFlag;
141   }
142 
getFlags() const143   uint32_t getFlags() const { return Flags; }
144 };
145 } // end anonymous namespace
146 
147 namespace {
148 class NeonEmitter {
149   RecordKeeper &Records;
150   StringMap<OpKind> OpMap;
151   DenseMap<Record*, ClassKind> ClassMap;
152 
153 public:
NeonEmitter(RecordKeeper & R)154   NeonEmitter(RecordKeeper &R) : Records(R) {
155     OpMap["OP_NONE"]  = OpNone;
156     OpMap["OP_UNAVAILABLE"] = OpUnavailable;
157     OpMap["OP_ADD"]   = OpAdd;
158     OpMap["OP_ADDL"]  = OpAddl;
159     OpMap["OP_ADDW"]  = OpAddw;
160     OpMap["OP_SUB"]   = OpSub;
161     OpMap["OP_SUBL"]  = OpSubl;
162     OpMap["OP_SUBW"]  = OpSubw;
163     OpMap["OP_MUL"]   = OpMul;
164     OpMap["OP_MLA"]   = OpMla;
165     OpMap["OP_MLAL"]  = OpMlal;
166     OpMap["OP_MLS"]   = OpMls;
167     OpMap["OP_MLSL"]  = OpMlsl;
168     OpMap["OP_MUL_N"] = OpMulN;
169     OpMap["OP_MLA_N"] = OpMlaN;
170     OpMap["OP_MLS_N"] = OpMlsN;
171     OpMap["OP_MLAL_N"] = OpMlalN;
172     OpMap["OP_MLSL_N"] = OpMlslN;
173     OpMap["OP_MUL_LN"]= OpMulLane;
174     OpMap["OP_MULL_LN"] = OpMullLane;
175     OpMap["OP_MLA_LN"]= OpMlaLane;
176     OpMap["OP_MLS_LN"]= OpMlsLane;
177     OpMap["OP_MLAL_LN"] = OpMlalLane;
178     OpMap["OP_MLSL_LN"] = OpMlslLane;
179     OpMap["OP_QDMULL_LN"] = OpQDMullLane;
180     OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
181     OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
182     OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
183     OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
184     OpMap["OP_EQ"]    = OpEq;
185     OpMap["OP_GE"]    = OpGe;
186     OpMap["OP_LE"]    = OpLe;
187     OpMap["OP_GT"]    = OpGt;
188     OpMap["OP_LT"]    = OpLt;
189     OpMap["OP_NEG"]   = OpNeg;
190     OpMap["OP_NOT"]   = OpNot;
191     OpMap["OP_AND"]   = OpAnd;
192     OpMap["OP_OR"]    = OpOr;
193     OpMap["OP_XOR"]   = OpXor;
194     OpMap["OP_ANDN"]  = OpAndNot;
195     OpMap["OP_ORN"]   = OpOrNot;
196     OpMap["OP_CAST"]  = OpCast;
197     OpMap["OP_CONC"]  = OpConcat;
198     OpMap["OP_HI"]    = OpHi;
199     OpMap["OP_LO"]    = OpLo;
200     OpMap["OP_DUP"]   = OpDup;
201     OpMap["OP_DUP_LN"] = OpDupLane;
202     OpMap["OP_SEL"]   = OpSelect;
203     OpMap["OP_REV16"] = OpRev16;
204     OpMap["OP_REV32"] = OpRev32;
205     OpMap["OP_REV64"] = OpRev64;
206     OpMap["OP_REINT"] = OpReinterpret;
207     OpMap["OP_ABDL"]  = OpAbdl;
208     OpMap["OP_ABA"]   = OpAba;
209     OpMap["OP_ABAL"]  = OpAbal;
210     OpMap["OP_DIV"] = OpDiv;
211 
212     Record *SI = R.getClass("SInst");
213     Record *II = R.getClass("IInst");
214     Record *WI = R.getClass("WInst");
215     Record *SOpI = R.getClass("SOpInst");
216     Record *IOpI = R.getClass("IOpInst");
217     Record *WOpI = R.getClass("WOpInst");
218     Record *LOpI = R.getClass("LOpInst");
219     Record *NoTestOpI = R.getClass("NoTestOpInst");
220 
221     ClassMap[SI] = ClassS;
222     ClassMap[II] = ClassI;
223     ClassMap[WI] = ClassW;
224     ClassMap[SOpI] = ClassS;
225     ClassMap[IOpI] = ClassI;
226     ClassMap[WOpI] = ClassW;
227     ClassMap[LOpI] = ClassL;
228     ClassMap[NoTestOpI] = ClassNoTest;
229   }
230 
231   // run - Emit arm_neon.h.inc
232   void run(raw_ostream &o);
233 
234   // runHeader - Emit all the __builtin prototypes used in arm_neon.h
235   void runHeader(raw_ostream &o);
236 
237   // runTests - Emit tests for all the Neon intrinsics.
238   void runTests(raw_ostream &o);
239 
240 private:
241   void emitIntrinsic(raw_ostream &OS, Record *R,
242                      StringMap<ClassKind> &EmittedMap);
243   void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap,
244                       bool isA64GenBuiltinDef);
245   void genOverloadTypeCheckCode(raw_ostream &OS,
246                                 StringMap<ClassKind> &A64IntrinsicMap,
247                                 bool isA64TypeCheck);
248   void genIntrinsicRangeCheckCode(raw_ostream &OS,
249                                   StringMap<ClassKind> &A64IntrinsicMap,
250                                   bool isA64RangeCheck);
251   void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
252                      bool isA64TestGen);
253 };
254 } // end anonymous namespace
255 
256 /// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
257 /// which each StringRef representing a single type declared in the string.
258 /// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
259 /// 2xfloat and 4xfloat respectively.
ParseTypes(Record * r,std::string & s,SmallVectorImpl<StringRef> & TV)260 static void ParseTypes(Record *r, std::string &s,
261                        SmallVectorImpl<StringRef> &TV) {
262   const char *data = s.data();
263   int len = 0;
264 
265   for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
266     if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U')
267       continue;
268 
269     switch (data[len]) {
270       case 'c':
271       case 's':
272       case 'i':
273       case 'l':
274       case 'h':
275       case 'f':
276       case 'd':
277         break;
278       default:
279         PrintFatalError(r->getLoc(),
280                       "Unexpected letter: " + std::string(data + len, 1));
281     }
282     TV.push_back(StringRef(data, len + 1));
283     data += len + 1;
284     len = -1;
285   }
286 }
287 
288 /// Widen - Convert a type code into the next wider type.  char -> short,
289 /// short -> int, etc.
Widen(const char t)290 static char Widen(const char t) {
291   switch (t) {
292     case 'c':
293       return 's';
294     case 's':
295       return 'i';
296     case 'i':
297       return 'l';
298     case 'h':
299       return 'f';
300     default:
301       PrintFatalError("unhandled type in widen!");
302   }
303 }
304 
305 /// Narrow - Convert a type code into the next smaller type.  short -> char,
306 /// float -> half float, etc.
Narrow(const char t)307 static char Narrow(const char t) {
308   switch (t) {
309     case 's':
310       return 'c';
311     case 'i':
312       return 's';
313     case 'l':
314       return 'i';
315     case 'f':
316       return 'h';
317     default:
318       PrintFatalError("unhandled type in narrow!");
319   }
320 }
321 
322 /// For a particular StringRef, return the base type code, and whether it has
323 /// the quad-vector, polynomial, or unsigned modifiers set.
ClassifyType(StringRef ty,bool & quad,bool & poly,bool & usgn)324 static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
325   unsigned off = 0;
326 
327   // remember quad.
328   if (ty[off] == 'Q') {
329     quad = true;
330     ++off;
331   }
332 
333   // remember poly.
334   if (ty[off] == 'P') {
335     poly = true;
336     ++off;
337   }
338 
339   // remember unsigned.
340   if (ty[off] == 'U') {
341     usgn = true;
342     ++off;
343   }
344 
345   // base type to get the type string for.
346   return ty[off];
347 }
348 
349 /// ModType - Transform a type code and its modifiers based on a mod code. The
350 /// mod code definitions may be found at the top of arm_neon.td.
ModType(const char mod,char type,bool & quad,bool & poly,bool & usgn,bool & scal,bool & cnst,bool & pntr)351 static char ModType(const char mod, char type, bool &quad, bool &poly,
352                     bool &usgn, bool &scal, bool &cnst, bool &pntr) {
353   switch (mod) {
354     case 't':
355       if (poly) {
356         poly = false;
357         usgn = true;
358       }
359       break;
360     case 'u':
361       usgn = true;
362       poly = false;
363       if (type == 'f')
364         type = 'i';
365       if (type == 'd')
366         type = 'l';
367       break;
368     case 'x':
369       usgn = false;
370       poly = false;
371       if (type == 'f')
372         type = 'i';
373       break;
374     case 'f':
375       if (type == 'h')
376         quad = true;
377       type = 'f';
378       usgn = false;
379       break;
380     case 'g':
381       quad = false;
382       break;
383     case 'w':
384       type = Widen(type);
385       quad = true;
386       break;
387     case 'n':
388       type = Widen(type);
389       break;
390     case 'i':
391       type = 'i';
392       scal = true;
393       break;
394     case 'l':
395       type = 'l';
396       scal = true;
397       usgn = true;
398       break;
399     case 's':
400     case 'a':
401       scal = true;
402       break;
403     case 'k':
404       quad = true;
405       break;
406     case 'c':
407       cnst = true;
408     case 'p':
409       pntr = true;
410       scal = true;
411       break;
412     case 'h':
413       type = Narrow(type);
414       if (type == 'h')
415         quad = false;
416       break;
417     case 'e':
418       type = Narrow(type);
419       usgn = true;
420       break;
421     default:
422       break;
423   }
424   return type;
425 }
426 
427 /// TypeString - for a modifier and type, generate the name of the typedef for
428 /// that type.  QUc -> uint8x8_t.
TypeString(const char mod,StringRef typestr)429 static std::string TypeString(const char mod, StringRef typestr) {
430   bool quad = false;
431   bool poly = false;
432   bool usgn = false;
433   bool scal = false;
434   bool cnst = false;
435   bool pntr = false;
436 
437   if (mod == 'v')
438     return "void";
439   if (mod == 'i')
440     return "int";
441 
442   // base type to get the type string for.
443   char type = ClassifyType(typestr, quad, poly, usgn);
444 
445   // Based on the modifying character, change the type and width if necessary.
446   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
447 
448   SmallString<128> s;
449 
450   if (usgn)
451     s.push_back('u');
452 
453   switch (type) {
454     case 'c':
455       s += poly ? "poly8" : "int8";
456       if (scal)
457         break;
458       s += quad ? "x16" : "x8";
459       break;
460     case 's':
461       s += poly ? "poly16" : "int16";
462       if (scal)
463         break;
464       s += quad ? "x8" : "x4";
465       break;
466     case 'i':
467       s += "int32";
468       if (scal)
469         break;
470       s += quad ? "x4" : "x2";
471       break;
472     case 'l':
473       s += "int64";
474       if (scal)
475         break;
476       s += quad ? "x2" : "x1";
477       break;
478     case 'h':
479       s += "float16";
480       if (scal)
481         break;
482       s += quad ? "x8" : "x4";
483       break;
484     case 'f':
485       s += "float32";
486       if (scal)
487         break;
488       s += quad ? "x4" : "x2";
489       break;
490     case 'd':
491       s += "float64";
492       if (scal)
493         break;
494       s += quad ? "x2" : "x1";
495       break;
496 
497     default:
498       PrintFatalError("unhandled type!");
499   }
500 
501   if (mod == '2')
502     s += "x2";
503   if (mod == '3')
504     s += "x3";
505   if (mod == '4')
506     s += "x4";
507 
508   // Append _t, finishing the type string typedef type.
509   s += "_t";
510 
511   if (cnst)
512     s += " const";
513 
514   if (pntr)
515     s += " *";
516 
517   return s.str();
518 }
519 
520 /// BuiltinTypeString - for a modifier and type, generate the clang
521 /// BuiltinsARM.def prototype code for the function.  See the top of clang's
522 /// Builtins.def for a description of the type strings.
BuiltinTypeString(const char mod,StringRef typestr,ClassKind ck,bool ret)523 static std::string BuiltinTypeString(const char mod, StringRef typestr,
524                                      ClassKind ck, bool ret) {
525   bool quad = false;
526   bool poly = false;
527   bool usgn = false;
528   bool scal = false;
529   bool cnst = false;
530   bool pntr = false;
531 
532   if (mod == 'v')
533     return "v"; // void
534   if (mod == 'i')
535     return "i"; // int
536 
537   // base type to get the type string for.
538   char type = ClassifyType(typestr, quad, poly, usgn);
539 
540   // Based on the modifying character, change the type and width if necessary.
541   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
542 
543   // All pointers are void* pointers.  Change type to 'v' now.
544   if (pntr) {
545     usgn = false;
546     poly = false;
547     type = 'v';
548   }
549   // Treat half-float ('h') types as unsigned short ('s') types.
550   if (type == 'h') {
551     type = 's';
552     usgn = true;
553   }
554   usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && scal && type != 'f');
555 
556   if (scal) {
557     SmallString<128> s;
558 
559     if (usgn)
560       s.push_back('U');
561     else if (type == 'c')
562       s.push_back('S'); // make chars explicitly signed
563 
564     if (type == 'l') // 64-bit long
565       s += "LLi";
566     else
567       s.push_back(type);
568 
569     if (cnst)
570       s.push_back('C');
571     if (pntr)
572       s.push_back('*');
573     return s.str();
574   }
575 
576   // Since the return value must be one type, return a vector type of the
577   // appropriate width which we will bitcast.  An exception is made for
578   // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
579   // fashion, storing them to a pointer arg.
580   if (ret) {
581     if (mod >= '2' && mod <= '4')
582       return "vv*"; // void result with void* first argument
583     if (mod == 'f' || (ck != ClassB && type == 'f'))
584       return quad ? "V4f" : "V2f";
585     if (ck != ClassB && type == 's')
586       return quad ? "V8s" : "V4s";
587     if (ck != ClassB && type == 'i')
588       return quad ? "V4i" : "V2i";
589     if (ck != ClassB && type == 'l')
590       return quad ? "V2LLi" : "V1LLi";
591 
592     return quad ? "V16Sc" : "V8Sc";
593   }
594 
595   // Non-return array types are passed as individual vectors.
596   if (mod == '2')
597     return quad ? "V16ScV16Sc" : "V8ScV8Sc";
598   if (mod == '3')
599     return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
600   if (mod == '4')
601     return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
602 
603   if (mod == 'f' || (ck != ClassB && type == 'f'))
604     return quad ? "V4f" : "V2f";
605   if (ck != ClassB && type == 's')
606     return quad ? "V8s" : "V4s";
607   if (ck != ClassB && type == 'i')
608     return quad ? "V4i" : "V2i";
609   if (ck != ClassB && type == 'l')
610     return quad ? "V2LLi" : "V1LLi";
611 
612   return quad ? "V16Sc" : "V8Sc";
613 }
614 
615 /// InstructionTypeCode - Computes the ARM argument character code and
616 /// quad status for a specific type string and ClassKind.
InstructionTypeCode(const StringRef & typeStr,const ClassKind ck,bool & quad,std::string & typeCode)617 static void InstructionTypeCode(const StringRef &typeStr,
618                                 const ClassKind ck,
619                                 bool &quad,
620                                 std::string &typeCode) {
621   bool poly = false;
622   bool usgn = false;
623   char type = ClassifyType(typeStr, quad, poly, usgn);
624 
625   switch (type) {
626   case 'c':
627     switch (ck) {
628     case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break;
629     case ClassI: typeCode = "i8"; break;
630     case ClassW: typeCode = "8"; break;
631     default: break;
632     }
633     break;
634   case 's':
635     switch (ck) {
636     case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break;
637     case ClassI: typeCode = "i16"; break;
638     case ClassW: typeCode = "16"; break;
639     default: break;
640     }
641     break;
642   case 'i':
643     switch (ck) {
644     case ClassS: typeCode = usgn ? "u32" : "s32"; break;
645     case ClassI: typeCode = "i32"; break;
646     case ClassW: typeCode = "32"; break;
647     default: break;
648     }
649     break;
650   case 'l':
651     switch (ck) {
652     case ClassS: typeCode = usgn ? "u64" : "s64"; break;
653     case ClassI: typeCode = "i64"; break;
654     case ClassW: typeCode = "64"; break;
655     default: break;
656     }
657     break;
658   case 'h':
659     switch (ck) {
660     case ClassS:
661     case ClassI: typeCode = "f16"; break;
662     case ClassW: typeCode = "16"; break;
663     default: break;
664     }
665     break;
666   case 'f':
667     switch (ck) {
668     case ClassS:
669     case ClassI: typeCode = "f32"; break;
670     case ClassW: typeCode = "32"; break;
671     default: break;
672     }
673     break;
674   case 'd':
675     switch (ck) {
676     case ClassS:
677     case ClassI:
678       typeCode += "f64";
679       break;
680     case ClassW:
681       PrintFatalError("unhandled type!");
682     default:
683       break;
684     }
685     break;
686   default:
687     PrintFatalError("unhandled type!");
688   }
689 }
690 
691 /// MangleName - Append a type or width suffix to a base neon function name,
692 /// and insert a 'q' in the appropriate location if the operation works on
693 /// 128b rather than 64b.   E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
MangleName(const std::string & name,StringRef typestr,ClassKind ck)694 static std::string MangleName(const std::string &name, StringRef typestr,
695                               ClassKind ck) {
696   if (name == "vcvt_f32_f16")
697     return name;
698 
699   bool quad = false;
700   std::string typeCode = "";
701 
702   InstructionTypeCode(typestr, ck, quad, typeCode);
703 
704   std::string s = name;
705 
706   if (typeCode.size() > 0) {
707     s += "_" + typeCode;
708   }
709 
710   if (ck == ClassB)
711     s += "_v";
712 
713   // Insert a 'q' before the first '_' character so that it ends up before
714   // _lane or _n on vector-scalar operations.
715   if (quad) {
716     size_t pos = s.find('_');
717     s = s.insert(pos, "q");
718   }
719 
720   return s;
721 }
722 
PreprocessInstruction(const StringRef & Name,const std::string & InstName,std::string & Prefix,bool & HasNPostfix,bool & HasLanePostfix,bool & HasDupPostfix,bool & IsSpecialVCvt,size_t & TBNumber)723 static void PreprocessInstruction(const StringRef &Name,
724                                   const std::string &InstName,
725                                   std::string &Prefix,
726                                   bool &HasNPostfix,
727                                   bool &HasLanePostfix,
728                                   bool &HasDupPostfix,
729                                   bool &IsSpecialVCvt,
730                                   size_t &TBNumber) {
731   // All of our instruction name fields from arm_neon.td are of the form
732   //   <instructionname>_...
733   // Thus we grab our instruction name via computation of said Prefix.
734   const size_t PrefixEnd = Name.find_first_of('_');
735   // If InstName is passed in, we use that instead of our name Prefix.
736   Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName;
737 
738   const StringRef Postfix = Name.slice(PrefixEnd, Name.size());
739 
740   HasNPostfix = Postfix.count("_n");
741   HasLanePostfix = Postfix.count("_lane");
742   HasDupPostfix = Postfix.count("_dup");
743   IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt");
744 
745   if (InstName.compare("vtbl") == 0 ||
746       InstName.compare("vtbx") == 0) {
747     // If we have a vtblN/vtbxN instruction, use the instruction's ASCII
748     // encoding to get its true value.
749     TBNumber = Name[Name.size()-1] - 48;
750   }
751 }
752 
753 /// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have
754 /// extracted, generate a FileCheck pattern for a Load Or Store
755 static void
GenerateRegisterCheckPatternForLoadStores(const StringRef & NameRef,const std::string & OutTypeCode,const bool & IsQuad,const bool & HasDupPostfix,const bool & HasLanePostfix,const size_t Count,std::string & RegisterSuffix)756 GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef,
757                                           const std::string& OutTypeCode,
758                                           const bool &IsQuad,
759                                           const bool &HasDupPostfix,
760                                           const bool &HasLanePostfix,
761                                           const size_t Count,
762                                           std::string &RegisterSuffix) {
763   const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1");
764   // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang
765   // will output a series of v{ld,st}1s, so we have to handle it specially.
766   if ((Count == 3 || Count == 4) && IsQuad) {
767     RegisterSuffix += "{";
768     for (size_t i = 0; i < Count; i++) {
769       RegisterSuffix += "d{{[0-9]+}}";
770       if (HasDupPostfix) {
771         RegisterSuffix += "[]";
772       }
773       if (HasLanePostfix) {
774         RegisterSuffix += "[{{[0-9]+}}]";
775       }
776       if (i < Count-1) {
777         RegisterSuffix += ", ";
778       }
779     }
780     RegisterSuffix += "}";
781   } else {
782 
783     // Handle normal loads and stores.
784     RegisterSuffix += "{";
785     for (size_t i = 0; i < Count; i++) {
786       RegisterSuffix += "d{{[0-9]+}}";
787       if (HasDupPostfix) {
788         RegisterSuffix += "[]";
789       }
790       if (HasLanePostfix) {
791         RegisterSuffix += "[{{[0-9]+}}]";
792       }
793       if (IsQuad && !HasLanePostfix) {
794         RegisterSuffix += ", d{{[0-9]+}}";
795         if (HasDupPostfix) {
796           RegisterSuffix += "[]";
797         }
798       }
799       if (i < Count-1) {
800         RegisterSuffix += ", ";
801       }
802     }
803     RegisterSuffix += "}, [r{{[0-9]+}}";
804 
805     // We only include the alignment hint if we have a vld1.*64 or
806     // a dup/lane instruction.
807     if (IsLDSTOne) {
808       if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") {
809         RegisterSuffix += ":" + OutTypeCode;
810       }
811     }
812 
813     RegisterSuffix += "]";
814   }
815 }
816 
HasNPostfixAndScalarArgs(const StringRef & NameRef,const bool & HasNPostfix)817 static bool HasNPostfixAndScalarArgs(const StringRef &NameRef,
818                                      const bool &HasNPostfix) {
819   return (NameRef.count("vmla") ||
820           NameRef.count("vmlal") ||
821           NameRef.count("vmlsl") ||
822           NameRef.count("vmull") ||
823           NameRef.count("vqdmlal") ||
824           NameRef.count("vqdmlsl") ||
825           NameRef.count("vqdmulh") ||
826           NameRef.count("vqdmull") ||
827           NameRef.count("vqrdmulh")) && HasNPostfix;
828 }
829 
IsFiveOperandLaneAccumulator(const StringRef & NameRef,const bool & HasLanePostfix)830 static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef,
831                                          const bool &HasLanePostfix) {
832   return (NameRef.count("vmla") ||
833           NameRef.count("vmls") ||
834           NameRef.count("vmlal") ||
835           NameRef.count("vmlsl") ||
836           (NameRef.count("vmul") && NameRef.size() == 3)||
837           NameRef.count("vqdmlal") ||
838           NameRef.count("vqdmlsl") ||
839           NameRef.count("vqdmulh") ||
840           NameRef.count("vqrdmulh")) && HasLanePostfix;
841 }
842 
IsSpecialLaneMultiply(const StringRef & NameRef,const bool & HasLanePostfix,const bool & IsQuad)843 static bool IsSpecialLaneMultiply(const StringRef &NameRef,
844                                   const bool &HasLanePostfix,
845                                   const bool &IsQuad) {
846   const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh"))
847                                && IsQuad;
848   const bool IsVMull = NameRef.count("mull") && !IsQuad;
849   return (IsVMulOrMulh || IsVMull) && HasLanePostfix;
850 }
851 
NormalizeProtoForRegisterPatternCreation(const std::string & Name,const std::string & Proto,const bool & HasNPostfix,const bool & IsQuad,const bool & HasLanePostfix,const bool & HasDupPostfix,std::string & NormedProto)852 static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
853                                                      const std::string &Proto,
854                                                      const bool &HasNPostfix,
855                                                      const bool &IsQuad,
856                                                      const bool &HasLanePostfix,
857                                                      const bool &HasDupPostfix,
858                                                      std::string &NormedProto) {
859   // Handle generic case.
860   const StringRef NameRef(Name);
861   for (size_t i = 0, end = Proto.size(); i < end; i++) {
862     switch (Proto[i]) {
863     case 'u':
864     case 'f':
865     case 'd':
866     case 's':
867     case 'x':
868     case 't':
869     case 'n':
870       NormedProto += IsQuad? 'q' : 'd';
871       break;
872     case 'w':
873     case 'k':
874       NormedProto += 'q';
875       break;
876     case 'g':
877     case 'h':
878     case 'e':
879       NormedProto += 'd';
880       break;
881     case 'i':
882       NormedProto += HasLanePostfix? 'a' : 'i';
883       break;
884     case 'a':
885       if (HasLanePostfix) {
886         NormedProto += 'a';
887       } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) {
888         NormedProto += IsQuad? 'q' : 'd';
889       } else {
890         NormedProto += 'i';
891       }
892       break;
893     }
894   }
895 
896   // Handle Special Cases.
897   const bool IsNotVExt = !NameRef.count("vext");
898   const bool IsVPADAL = NameRef.count("vpadal");
899   const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef,
900                                                            HasLanePostfix);
901   const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix,
902                                                       IsQuad);
903 
904   if (IsSpecialLaneMul) {
905     // If
906     NormedProto[2] = NormedProto[3];
907     NormedProto.erase(3);
908   } else if (NormedProto.size() == 4 &&
909              NormedProto[0] == NormedProto[1] &&
910              IsNotVExt) {
911     // If NormedProto.size() == 4 and the first two proto characters are the
912     // same, ignore the first.
913     NormedProto = NormedProto.substr(1, 3);
914   } else if (Is5OpLaneAccum) {
915     // If we have a 5 op lane accumulator operation, we take characters 1,2,4
916     std::string tmp = NormedProto.substr(1,2);
917     tmp += NormedProto[4];
918     NormedProto = tmp;
919   } else if (IsVPADAL) {
920     // If we have VPADAL, ignore the first character.
921     NormedProto = NormedProto.substr(0, 2);
922   } else if (NameRef.count("vdup") && NormedProto.size() > 2) {
923     // If our instruction is a dup instruction, keep only the first and
924     // last characters.
925     std::string tmp = "";
926     tmp += NormedProto[0];
927     tmp += NormedProto[NormedProto.size()-1];
928     NormedProto = tmp;
929   }
930 }
931 
932 /// GenerateRegisterCheckPatterns - Given a bunch of data we have
933 /// extracted, generate a FileCheck pattern to check that an
934 /// instruction's arguments are correct.
GenerateRegisterCheckPattern(const std::string & Name,const std::string & Proto,const std::string & OutTypeCode,const bool & HasNPostfix,const bool & IsQuad,const bool & HasLanePostfix,const bool & HasDupPostfix,const size_t & TBNumber,std::string & RegisterSuffix)935 static void GenerateRegisterCheckPattern(const std::string &Name,
936                                          const std::string &Proto,
937                                          const std::string &OutTypeCode,
938                                          const bool &HasNPostfix,
939                                          const bool &IsQuad,
940                                          const bool &HasLanePostfix,
941                                          const bool &HasDupPostfix,
942                                          const size_t &TBNumber,
943                                          std::string &RegisterSuffix) {
944 
945   RegisterSuffix = "";
946 
947   const StringRef NameRef(Name);
948   const StringRef ProtoRef(Proto);
949 
950   if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) {
951     return;
952   }
953 
954   const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst");
955   const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx");
956 
957   if (IsLoadStore) {
958     // Grab N value from  v{ld,st}N using its ascii representation.
959     const size_t Count = NameRef[3] - 48;
960 
961     GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad,
962                                               HasDupPostfix, HasLanePostfix,
963                                               Count, RegisterSuffix);
964   } else if (IsTBXOrTBL) {
965     RegisterSuffix += "d{{[0-9]+}}, {";
966     for (size_t i = 0; i < TBNumber-1; i++) {
967       RegisterSuffix += "d{{[0-9]+}}, ";
968     }
969     RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}";
970   } else {
971     // Handle a normal instruction.
972     if (NameRef.count("vget") || NameRef.count("vset"))
973       return;
974 
975     // We first normalize our proto, since we only need to emit 4
976     // different types of checks, yet have more than 4 proto types
977     // that map onto those 4 patterns.
978     std::string NormalizedProto("");
979     NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad,
980                                              HasLanePostfix, HasDupPostfix,
981                                              NormalizedProto);
982 
983     for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) {
984       const char &c = NormalizedProto[i];
985       switch (c) {
986       case 'q':
987         RegisterSuffix += "q{{[0-9]+}}, ";
988         break;
989 
990       case 'd':
991         RegisterSuffix += "d{{[0-9]+}}, ";
992         break;
993 
994       case 'i':
995         RegisterSuffix += "#{{[0-9]+}}, ";
996         break;
997 
998       case 'a':
999         RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], ";
1000         break;
1001       }
1002     }
1003 
1004     // Remove extra ", ".
1005     RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2);
1006   }
1007 }
1008 
1009 /// GenerateChecksForIntrinsic - Given a specific instruction name +
1010 /// typestr + class kind, generate the proper set of FileCheck
1011 /// Patterns to check for. We could just return a string, but instead
1012 /// use a vector since it provides us with the extra flexibility of
1013 /// emitting multiple checks, which comes in handy for certain cases
1014 /// like mla where we want to check for 2 different instructions.
GenerateChecksForIntrinsic(const std::string & Name,const std::string & Proto,StringRef & OutTypeStr,StringRef & InTypeStr,ClassKind Ck,const std::string & InstName,bool IsHiddenLOp,std::vector<std::string> & Result)1015 static void GenerateChecksForIntrinsic(const std::string &Name,
1016                                        const std::string &Proto,
1017                                        StringRef &OutTypeStr,
1018                                        StringRef &InTypeStr,
1019                                        ClassKind Ck,
1020                                        const std::string &InstName,
1021                                        bool IsHiddenLOp,
1022                                        std::vector<std::string>& Result) {
1023 
1024   // If Ck is a ClassNoTest instruction, just return so no test is
1025   // emitted.
1026   if(Ck == ClassNoTest)
1027     return;
1028 
1029   if (Name == "vcvt_f32_f16") {
1030     Result.push_back("vcvt.f32.f16");
1031     return;
1032   }
1033 
1034 
1035   // Now we preprocess our instruction given the data we have to get the
1036   // data that we need.
1037   // Create a StringRef for String Manipulation of our Name.
1038   const StringRef NameRef(Name);
1039   // Instruction Prefix.
1040   std::string Prefix;
1041   // The type code for our out type string.
1042   std::string OutTypeCode;
1043   // To handle our different cases, we need to check for different postfixes.
1044   // Is our instruction a quad instruction.
1045   bool IsQuad = false;
1046   // Our instruction is of the form <instructionname>_n.
1047   bool HasNPostfix = false;
1048   // Our instruction is of the form <instructionname>_lane.
1049   bool HasLanePostfix = false;
1050   // Our instruction is of the form <instructionname>_dup.
1051   bool HasDupPostfix  = false;
1052   // Our instruction is a vcvt instruction which requires special handling.
1053   bool IsSpecialVCvt = false;
1054   // If we have a vtbxN or vtblN instruction, this is set to N.
1055   size_t TBNumber = -1;
1056   // Register Suffix
1057   std::string RegisterSuffix;
1058 
1059   PreprocessInstruction(NameRef, InstName, Prefix,
1060                         HasNPostfix, HasLanePostfix, HasDupPostfix,
1061                         IsSpecialVCvt, TBNumber);
1062 
1063   InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode);
1064   GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad,
1065                                HasLanePostfix, HasDupPostfix, TBNumber,
1066                                RegisterSuffix);
1067 
1068   // In the following section, we handle a bunch of special cases. You can tell
1069   // a special case by the fact we are returning early.
1070 
1071   // If our instruction is a logical instruction without postfix or a
1072   // hidden LOp just return the current Prefix.
1073   if (Ck == ClassL || IsHiddenLOp) {
1074     Result.push_back(Prefix + " " + RegisterSuffix);
1075     return;
1076   }
1077 
1078   // If we have a vmov, due to the many different cases, some of which
1079   // vary within the different intrinsics generated for a single
1080   // instruction type, just output a vmov. (e.g. given an instruction
1081   // A, A.u32 might be vmov and A.u8 might be vmov.8).
1082   //
1083   // FIXME: Maybe something can be done about this. The two cases that we care
1084   // about are vmov as an LType and vmov as a WType.
1085   if (Prefix == "vmov") {
1086     Result.push_back(Prefix + " " + RegisterSuffix);
1087     return;
1088   }
1089 
1090   // In the following section, we handle special cases.
1091 
1092   if (OutTypeCode == "64") {
1093     // If we have a 64 bit vdup/vext and are handling an uint64x1_t
1094     // type, the intrinsic will be optimized away, so just return
1095     // nothing.  On the other hand if we are handling an uint64x2_t
1096     // (i.e. quad instruction), vdup/vmov instructions should be
1097     // emitted.
1098     if (Prefix == "vdup" || Prefix == "vext") {
1099       if (IsQuad) {
1100         Result.push_back("{{vmov|vdup}}");
1101       }
1102       return;
1103     }
1104 
1105     // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with
1106     // multiple register operands.
1107     bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3"
1108                             || Prefix == "vld4";
1109     bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3"
1110                             || Prefix == "vst4";
1111     if (MultiLoadPrefix || MultiStorePrefix) {
1112       Result.push_back(NameRef.slice(0, 3).str() + "1.64");
1113       return;
1114     }
1115 
1116     // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of
1117     // emitting said instructions. So return a check for
1118     // vldr/vstr/vmov/str instead.
1119     if (HasLanePostfix || HasDupPostfix) {
1120       if (Prefix == "vst1") {
1121         Result.push_back("{{str|vstr|vmov}}");
1122         return;
1123       } else if (Prefix == "vld1") {
1124         Result.push_back("{{ldr|vldr|vmov}}");
1125         return;
1126       }
1127     }
1128   }
1129 
1130   // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are
1131   // sometimes disassembled as vtrn.32. We use a regex to handle both
1132   // cases.
1133   if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") {
1134     Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix);
1135     return;
1136   }
1137 
1138   // Currently on most ARM processors, we do not use vmla/vmls for
1139   // quad floating point operations. Instead we output vmul + vadd. So
1140   // check if we have one of those instructions and just output a
1141   // check for vmul.
1142   if (OutTypeCode == "f32") {
1143     if (Prefix == "vmls") {
1144       Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1145       Result.push_back("vsub." + OutTypeCode);
1146       return;
1147     } else if (Prefix == "vmla") {
1148       Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1149       Result.push_back("vadd." + OutTypeCode);
1150       return;
1151     }
1152   }
1153 
1154   // If we have vcvt, get the input type from the instruction name
1155   // (which should be of the form instname_inputtype) and append it
1156   // before the output type.
1157   if (Prefix == "vcvt") {
1158     const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1);
1159     Prefix += "." + inTypeCode;
1160   }
1161 
1162   // Append output type code to get our final mangled instruction.
1163   Prefix += "." + OutTypeCode;
1164 
1165   Result.push_back(Prefix + " " + RegisterSuffix);
1166 }
1167 
1168 /// UseMacro - Examine the prototype string to determine if the intrinsic
1169 /// should be defined as a preprocessor macro instead of an inline function.
UseMacro(const std::string & proto)1170 static bool UseMacro(const std::string &proto) {
1171   // If this builtin takes an immediate argument, we need to #define it rather
1172   // than use a standard declaration, so that SemaChecking can range check
1173   // the immediate passed by the user.
1174   if (proto.find('i') != std::string::npos)
1175     return true;
1176 
1177   // Pointer arguments need to use macros to avoid hiding aligned attributes
1178   // from the pointer type.
1179   if (proto.find('p') != std::string::npos ||
1180       proto.find('c') != std::string::npos)
1181     return true;
1182 
1183   return false;
1184 }
1185 
1186 /// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
1187 /// defined as a macro should be accessed directly instead of being first
1188 /// assigned to a local temporary.
MacroArgUsedDirectly(const std::string & proto,unsigned i)1189 static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
1190   // True for constant ints (i), pointers (p) and const pointers (c).
1191   return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
1192 }
1193 
1194 // Generate the string "(argtype a, argtype b, ...)"
GenArgs(const std::string & proto,StringRef typestr)1195 static std::string GenArgs(const std::string &proto, StringRef typestr) {
1196   bool define = UseMacro(proto);
1197   char arg = 'a';
1198 
1199   std::string s;
1200   s += "(";
1201 
1202   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1203     if (define) {
1204       // Some macro arguments are used directly instead of being assigned
1205       // to local temporaries; prepend an underscore prefix to make their
1206       // names consistent with the local temporaries.
1207       if (MacroArgUsedDirectly(proto, i))
1208         s += "__";
1209     } else {
1210       s += TypeString(proto[i], typestr) + " __";
1211     }
1212     s.push_back(arg);
1213     if ((i + 1) < e)
1214       s += ", ";
1215   }
1216 
1217   s += ")";
1218   return s;
1219 }
1220 
1221 // Macro arguments are not type-checked like inline function arguments, so
1222 // assign them to local temporaries to get the right type checking.
GenMacroLocals(const std::string & proto,StringRef typestr)1223 static std::string GenMacroLocals(const std::string &proto, StringRef typestr) {
1224   char arg = 'a';
1225   std::string s;
1226   bool generatedLocal = false;
1227 
1228   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1229     // Do not create a temporary for an immediate argument.
1230     // That would defeat the whole point of using a macro!
1231     if (MacroArgUsedDirectly(proto, i))
1232       continue;
1233     generatedLocal = true;
1234 
1235     s += TypeString(proto[i], typestr) + " __";
1236     s.push_back(arg);
1237     s += " = (";
1238     s.push_back(arg);
1239     s += "); ";
1240   }
1241 
1242   if (generatedLocal)
1243     s += "\\\n  ";
1244   return s;
1245 }
1246 
1247 // Use the vmovl builtin to sign-extend or zero-extend a vector.
Extend(StringRef typestr,const std::string & a)1248 static std::string Extend(StringRef typestr, const std::string &a) {
1249   std::string s;
1250   s = MangleName("vmovl", typestr, ClassS);
1251   s += "(" + a + ")";
1252   return s;
1253 }
1254 
Duplicate(unsigned nElts,StringRef typestr,const std::string & a)1255 static std::string Duplicate(unsigned nElts, StringRef typestr,
1256                              const std::string &a) {
1257   std::string s;
1258 
1259   s = "(" + TypeString('d', typestr) + "){ ";
1260   for (unsigned i = 0; i != nElts; ++i) {
1261     s += a;
1262     if ((i + 1) < nElts)
1263       s += ", ";
1264   }
1265   s += " }";
1266 
1267   return s;
1268 }
1269 
SplatLane(unsigned nElts,const std::string & vec,const std::string & lane)1270 static std::string SplatLane(unsigned nElts, const std::string &vec,
1271                              const std::string &lane) {
1272   std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
1273   for (unsigned i = 0; i < nElts; ++i)
1274     s += ", " + lane;
1275   s += ")";
1276   return s;
1277 }
1278 
GetNumElements(StringRef typestr,bool & quad)1279 static unsigned GetNumElements(StringRef typestr, bool &quad) {
1280   quad = false;
1281   bool dummy = false;
1282   char type = ClassifyType(typestr, quad, dummy, dummy);
1283   unsigned nElts = 0;
1284   switch (type) {
1285   case 'c': nElts = 8; break;
1286   case 's': nElts = 4; break;
1287   case 'i': nElts = 2; break;
1288   case 'l': nElts = 1; break;
1289   case 'h': nElts = 4; break;
1290   case 'f': nElts = 2; break;
1291   case 'd':
1292     nElts = 1;
1293     break;
1294   default:
1295     PrintFatalError("unhandled type!");
1296   }
1297   if (quad) nElts <<= 1;
1298   return nElts;
1299 }
1300 
1301 // Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
GenOpString(OpKind op,const std::string & proto,StringRef typestr)1302 static std::string GenOpString(OpKind op, const std::string &proto,
1303                                StringRef typestr) {
1304   bool quad;
1305   unsigned nElts = GetNumElements(typestr, quad);
1306   bool define = UseMacro(proto);
1307 
1308   std::string ts = TypeString(proto[0], typestr);
1309   std::string s;
1310   if (!define) {
1311     s = "return ";
1312   }
1313 
1314   switch(op) {
1315   case OpAdd:
1316     s += "__a + __b;";
1317     break;
1318   case OpAddl:
1319     s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
1320     break;
1321   case OpAddw:
1322     s += "__a + " + Extend(typestr, "__b") + ";";
1323     break;
1324   case OpSub:
1325     s += "__a - __b;";
1326     break;
1327   case OpSubl:
1328     s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
1329     break;
1330   case OpSubw:
1331     s += "__a - " + Extend(typestr, "__b") + ";";
1332     break;
1333   case OpMulN:
1334     s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
1335     break;
1336   case OpMulLane:
1337     s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
1338     break;
1339   case OpMul:
1340     s += "__a * __b;";
1341     break;
1342   case OpMullLane:
1343     s += MangleName("vmull", typestr, ClassS) + "(__a, " +
1344       SplatLane(nElts, "__b", "__c") + ");";
1345     break;
1346   case OpMlaN:
1347     s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1348     break;
1349   case OpMlaLane:
1350     s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1351     break;
1352   case OpMla:
1353     s += "__a + (__b * __c);";
1354     break;
1355   case OpMlalN:
1356     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1357       Duplicate(nElts, typestr, "__c") + ");";
1358     break;
1359   case OpMlalLane:
1360     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1361       SplatLane(nElts, "__c", "__d") + ");";
1362     break;
1363   case OpMlal:
1364     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1365     break;
1366   case OpMlsN:
1367     s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1368     break;
1369   case OpMlsLane:
1370     s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1371     break;
1372   case OpMls:
1373     s += "__a - (__b * __c);";
1374     break;
1375   case OpMlslN:
1376     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1377       Duplicate(nElts, typestr, "__c") + ");";
1378     break;
1379   case OpMlslLane:
1380     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1381       SplatLane(nElts, "__c", "__d") + ");";
1382     break;
1383   case OpMlsl:
1384     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1385     break;
1386   case OpQDMullLane:
1387     s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
1388       SplatLane(nElts, "__b", "__c") + ");";
1389     break;
1390   case OpQDMlalLane:
1391     s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
1392       SplatLane(nElts, "__c", "__d") + ");";
1393     break;
1394   case OpQDMlslLane:
1395     s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
1396       SplatLane(nElts, "__c", "__d") + ");";
1397     break;
1398   case OpQDMulhLane:
1399     s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
1400       SplatLane(nElts, "__b", "__c") + ");";
1401     break;
1402   case OpQRDMulhLane:
1403     s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
1404       SplatLane(nElts, "__b", "__c") + ");";
1405     break;
1406   case OpEq:
1407     s += "(" + ts + ")(__a == __b);";
1408     break;
1409   case OpGe:
1410     s += "(" + ts + ")(__a >= __b);";
1411     break;
1412   case OpLe:
1413     s += "(" + ts + ")(__a <= __b);";
1414     break;
1415   case OpGt:
1416     s += "(" + ts + ")(__a > __b);";
1417     break;
1418   case OpLt:
1419     s += "(" + ts + ")(__a < __b);";
1420     break;
1421   case OpNeg:
1422     s += " -__a;";
1423     break;
1424   case OpNot:
1425     s += " ~__a;";
1426     break;
1427   case OpAnd:
1428     s += "__a & __b;";
1429     break;
1430   case OpOr:
1431     s += "__a | __b;";
1432     break;
1433   case OpXor:
1434     s += "__a ^ __b;";
1435     break;
1436   case OpAndNot:
1437     s += "__a & ~__b;";
1438     break;
1439   case OpOrNot:
1440     s += "__a | ~__b;";
1441     break;
1442   case OpCast:
1443     s += "(" + ts + ")__a;";
1444     break;
1445   case OpConcat:
1446     s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
1447     s += ", (int64x1_t)__b, 0, 1);";
1448     break;
1449   case OpHi:
1450     // nElts is for the result vector, so the source is twice that number.
1451     s += "__builtin_shufflevector(__a, __a";
1452     for (unsigned i = nElts; i < nElts * 2; ++i)
1453       s += ", " + utostr(i);
1454     s+= ");";
1455     break;
1456   case OpLo:
1457     s += "__builtin_shufflevector(__a, __a";
1458     for (unsigned i = 0; i < nElts; ++i)
1459       s += ", " + utostr(i);
1460     s+= ");";
1461     break;
1462   case OpDup:
1463     s += Duplicate(nElts, typestr, "__a") + ";";
1464     break;
1465   case OpDupLane:
1466     s += SplatLane(nElts, "__a", "__b") + ";";
1467     break;
1468   case OpSelect:
1469     // ((0 & 1) | (~0 & 2))
1470     s += "(" + ts + ")";
1471     ts = TypeString(proto[1], typestr);
1472     s += "((__a & (" + ts + ")__b) | ";
1473     s += "(~__a & (" + ts + ")__c));";
1474     break;
1475   case OpRev16:
1476     s += "__builtin_shufflevector(__a, __a";
1477     for (unsigned i = 2; i <= nElts; i += 2)
1478       for (unsigned j = 0; j != 2; ++j)
1479         s += ", " + utostr(i - j - 1);
1480     s += ");";
1481     break;
1482   case OpRev32: {
1483     unsigned WordElts = nElts >> (1 + (int)quad);
1484     s += "__builtin_shufflevector(__a, __a";
1485     for (unsigned i = WordElts; i <= nElts; i += WordElts)
1486       for (unsigned j = 0; j != WordElts; ++j)
1487         s += ", " + utostr(i - j - 1);
1488     s += ");";
1489     break;
1490   }
1491   case OpRev64: {
1492     unsigned DblWordElts = nElts >> (int)quad;
1493     s += "__builtin_shufflevector(__a, __a";
1494     for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
1495       for (unsigned j = 0; j != DblWordElts; ++j)
1496         s += ", " + utostr(i - j - 1);
1497     s += ");";
1498     break;
1499   }
1500   case OpAbdl: {
1501     std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
1502     if (typestr[0] != 'U') {
1503       // vabd results are always unsigned and must be zero-extended.
1504       std::string utype = "U" + typestr.str();
1505       s += "(" + TypeString(proto[0], typestr) + ")";
1506       abd = "(" + TypeString('d', utype) + ")" + abd;
1507       s += Extend(utype, abd) + ";";
1508     } else {
1509       s += Extend(typestr, abd) + ";";
1510     }
1511     break;
1512   }
1513   case OpAba:
1514     s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
1515     break;
1516   case OpAbal: {
1517     s += "__a + ";
1518     std::string abd = MangleName("vabd", typestr, ClassS) + "(__b, __c)";
1519     if (typestr[0] != 'U') {
1520       // vabd results are always unsigned and must be zero-extended.
1521       std::string utype = "U" + typestr.str();
1522       s += "(" + TypeString(proto[0], typestr) + ")";
1523       abd = "(" + TypeString('d', utype) + ")" + abd;
1524       s += Extend(utype, abd) + ";";
1525     } else {
1526       s += Extend(typestr, abd) + ";";
1527     }
1528     break;
1529   }
1530   case OpDiv:
1531     s += "__a / __b;";
1532     break;
1533   default:
1534     PrintFatalError("unknown OpKind!");
1535   }
1536   return s;
1537 }
1538 
GetNeonEnum(const std::string & proto,StringRef typestr)1539 static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
1540   unsigned mod = proto[0];
1541 
1542   if (mod == 'v' || mod == 'f')
1543     mod = proto[1];
1544 
1545   bool quad = false;
1546   bool poly = false;
1547   bool usgn = false;
1548   bool scal = false;
1549   bool cnst = false;
1550   bool pntr = false;
1551 
1552   // Base type to get the type string for.
1553   char type = ClassifyType(typestr, quad, poly, usgn);
1554 
1555   // Based on the modifying character, change the type and width if necessary.
1556   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
1557 
1558   NeonTypeFlags::EltType ET;
1559   switch (type) {
1560     case 'c':
1561       ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
1562       break;
1563     case 's':
1564       ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
1565       break;
1566     case 'i':
1567       ET = NeonTypeFlags::Int32;
1568       break;
1569     case 'l':
1570       ET = NeonTypeFlags::Int64;
1571       break;
1572     case 'h':
1573       ET = NeonTypeFlags::Float16;
1574       break;
1575     case 'f':
1576       ET = NeonTypeFlags::Float32;
1577       break;
1578     case 'd':
1579       ET = NeonTypeFlags::Float64;
1580       break;
1581     default:
1582       PrintFatalError("unhandled type!");
1583   }
1584   NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
1585   return Flags.getFlags();
1586 }
1587 
1588 // Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
GenBuiltin(const std::string & name,const std::string & proto,StringRef typestr,ClassKind ck)1589 static std::string GenBuiltin(const std::string &name, const std::string &proto,
1590                               StringRef typestr, ClassKind ck) {
1591   std::string s;
1592 
1593   // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
1594   // sret-like argument.
1595   bool sret = (proto[0] >= '2' && proto[0] <= '4');
1596 
1597   bool define = UseMacro(proto);
1598 
1599   // Check if the prototype has a scalar operand with the type of the vector
1600   // elements.  If not, bitcasting the args will take care of arg checking.
1601   // The actual signedness etc. will be taken care of with special enums.
1602   if (proto.find('s') == std::string::npos)
1603     ck = ClassB;
1604 
1605   if (proto[0] != 'v') {
1606     std::string ts = TypeString(proto[0], typestr);
1607 
1608     if (define) {
1609       if (sret)
1610         s += ts + " r; ";
1611       else
1612         s += "(" + ts + ")";
1613     } else if (sret) {
1614       s += ts + " r; ";
1615     } else {
1616       s += "return (" + ts + ")";
1617     }
1618   }
1619 
1620   bool splat = proto.find('a') != std::string::npos;
1621 
1622   s += "__builtin_neon_";
1623   if (splat) {
1624     // Call the non-splat builtin: chop off the "_n" suffix from the name.
1625     std::string vname(name, 0, name.size()-2);
1626     s += MangleName(vname, typestr, ck);
1627   } else {
1628     s += MangleName(name, typestr, ck);
1629   }
1630   s += "(";
1631 
1632   // Pass the address of the return variable as the first argument to sret-like
1633   // builtins.
1634   if (sret)
1635     s += "&r, ";
1636 
1637   char arg = 'a';
1638   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1639     std::string args = std::string(&arg, 1);
1640 
1641     // Use the local temporaries instead of the macro arguments.
1642     args = "__" + args;
1643 
1644     bool argQuad = false;
1645     bool argPoly = false;
1646     bool argUsgn = false;
1647     bool argScalar = false;
1648     bool dummy = false;
1649     char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
1650     argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
1651                       dummy, dummy);
1652 
1653     // Handle multiple-vector values specially, emitting each subvector as an
1654     // argument to the __builtin.
1655     if (proto[i] >= '2' && proto[i] <= '4') {
1656       // Check if an explicit cast is needed.
1657       if (argType != 'c' || argPoly || argUsgn)
1658         args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
1659 
1660       for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) {
1661         s += args + ".val[" + utostr(vi) + "]";
1662         if ((vi + 1) < ve)
1663           s += ", ";
1664       }
1665       if ((i + 1) < e)
1666         s += ", ";
1667 
1668       continue;
1669     }
1670 
1671     if (splat && (i + 1) == e)
1672       args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
1673 
1674     // Check if an explicit cast is needed.
1675     if ((splat || !argScalar) &&
1676         ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
1677       std::string argTypeStr = "c";
1678       if (ck != ClassB)
1679         argTypeStr = argType;
1680       if (argQuad)
1681         argTypeStr = "Q" + argTypeStr;
1682       args = "(" + TypeString('d', argTypeStr) + ")" + args;
1683     }
1684 
1685     s += args;
1686     if ((i + 1) < e)
1687       s += ", ";
1688   }
1689 
1690   // Extra constant integer to hold type class enum for this function, e.g. s8
1691   if (ck == ClassB)
1692     s += ", " + utostr(GetNeonEnum(proto, typestr));
1693 
1694   s += ");";
1695 
1696   if (proto[0] != 'v' && sret) {
1697     if (define)
1698       s += " r;";
1699     else
1700       s += " return r;";
1701   }
1702   return s;
1703 }
1704 
GenBuiltinDef(const std::string & name,const std::string & proto,StringRef typestr,ClassKind ck)1705 static std::string GenBuiltinDef(const std::string &name,
1706                                  const std::string &proto,
1707                                  StringRef typestr, ClassKind ck) {
1708   std::string s("BUILTIN(__builtin_neon_");
1709 
1710   // If all types are the same size, bitcasting the args will take care
1711   // of arg checking.  The actual signedness etc. will be taken care of with
1712   // special enums.
1713   if (proto.find('s') == std::string::npos)
1714     ck = ClassB;
1715 
1716   s += MangleName(name, typestr, ck);
1717   s += ", \"";
1718 
1719   for (unsigned i = 0, e = proto.size(); i != e; ++i)
1720     s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
1721 
1722   // Extra constant integer to hold type class enum for this function, e.g. s8
1723   if (ck == ClassB)
1724     s += "i";
1725 
1726   s += "\", \"n\")";
1727   return s;
1728 }
1729 
GenIntrinsic(const std::string & name,const std::string & proto,StringRef outTypeStr,StringRef inTypeStr,OpKind kind,ClassKind classKind)1730 static std::string GenIntrinsic(const std::string &name,
1731                                 const std::string &proto,
1732                                 StringRef outTypeStr, StringRef inTypeStr,
1733                                 OpKind kind, ClassKind classKind) {
1734   assert(!proto.empty() && "");
1735   bool define = UseMacro(proto) && kind != OpUnavailable;
1736   std::string s;
1737 
1738   // static always inline + return type
1739   if (define)
1740     s += "#define ";
1741   else
1742     s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
1743 
1744   // Function name with type suffix
1745   std::string mangledName = MangleName(name, outTypeStr, ClassS);
1746   if (outTypeStr != inTypeStr) {
1747     // If the input type is different (e.g., for vreinterpret), append a suffix
1748     // for the input type.  String off a "Q" (quad) prefix so that MangleName
1749     // does not insert another "q" in the name.
1750     unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
1751     StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
1752     mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
1753   }
1754   s += mangledName;
1755 
1756   // Function arguments
1757   s += GenArgs(proto, inTypeStr);
1758 
1759   // Definition.
1760   if (define) {
1761     s += " __extension__ ({ \\\n  ";
1762     s += GenMacroLocals(proto, inTypeStr);
1763   } else if (kind == OpUnavailable) {
1764     s += " __attribute__((unavailable));\n";
1765     return s;
1766   } else
1767     s += " {\n  ";
1768 
1769   if (kind != OpNone)
1770     s += GenOpString(kind, proto, outTypeStr);
1771   else
1772     s += GenBuiltin(name, proto, outTypeStr, classKind);
1773   if (define)
1774     s += " })";
1775   else
1776     s += " }";
1777   s += "\n";
1778   return s;
1779 }
1780 
1781 /// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
1782 /// is comprised of type definitions and function declarations.
run(raw_ostream & OS)1783 void NeonEmitter::run(raw_ostream &OS) {
1784   OS <<
1785     "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
1786     "---===\n"
1787     " *\n"
1788     " * Permission is hereby granted, free of charge, to any person obtaining "
1789     "a copy\n"
1790     " * of this software and associated documentation files (the \"Software\"),"
1791     " to deal\n"
1792     " * in the Software without restriction, including without limitation the "
1793     "rights\n"
1794     " * to use, copy, modify, merge, publish, distribute, sublicense, "
1795     "and/or sell\n"
1796     " * copies of the Software, and to permit persons to whom the Software is\n"
1797     " * furnished to do so, subject to the following conditions:\n"
1798     " *\n"
1799     " * The above copyright notice and this permission notice shall be "
1800     "included in\n"
1801     " * all copies or substantial portions of the Software.\n"
1802     " *\n"
1803     " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
1804     "EXPRESS OR\n"
1805     " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
1806     "MERCHANTABILITY,\n"
1807     " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
1808     "SHALL THE\n"
1809     " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
1810     "OTHER\n"
1811     " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
1812     "ARISING FROM,\n"
1813     " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
1814     "DEALINGS IN\n"
1815     " * THE SOFTWARE.\n"
1816     " *\n"
1817     " *===--------------------------------------------------------------------"
1818     "---===\n"
1819     " */\n\n";
1820 
1821   OS << "#ifndef __ARM_NEON_H\n";
1822   OS << "#define __ARM_NEON_H\n\n";
1823 
1824   OS << "#if !defined(__ARM_NEON__) && !defined(__AARCH_FEATURE_ADVSIMD)\n";
1825   OS << "#error \"NEON support not enabled\"\n";
1826   OS << "#endif\n\n";
1827 
1828   OS << "#include <stdint.h>\n\n";
1829 
1830   // Emit NEON-specific scalar typedefs.
1831   OS << "typedef float float32_t;\n";
1832   OS << "typedef __fp16 float16_t;\n";
1833 
1834   OS << "#ifdef __aarch64__\n";
1835   OS << "typedef double float64_t;\n";
1836   OS << "#endif\n\n";
1837 
1838   // For now, signedness of polynomial types depends on target
1839   OS << "#ifdef __aarch64__\n";
1840   OS << "typedef uint8_t poly8_t;\n";
1841   OS << "typedef uint16_t poly16_t;\n";
1842   OS << "#else\n";
1843   OS << "typedef int8_t poly8_t;\n";
1844   OS << "typedef int16_t poly16_t;\n";
1845   OS << "#endif\n";
1846 
1847   // Emit Neon vector typedefs.
1848   std::string TypedefTypes(
1849       "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfQdPcQPcPsQPs");
1850   SmallVector<StringRef, 24> TDTypeVec;
1851   ParseTypes(0, TypedefTypes, TDTypeVec);
1852 
1853   // Emit vector typedefs.
1854   for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
1855     bool dummy, quad = false, poly = false;
1856     char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
1857     bool isA64 = false;
1858 
1859     if (type == 'd' && quad)
1860       isA64 = true;
1861 
1862     if (isA64)
1863       OS << "#ifdef __aarch64__\n";
1864 
1865     if (poly)
1866       OS << "typedef __attribute__((neon_polyvector_type(";
1867     else
1868       OS << "typedef __attribute__((neon_vector_type(";
1869 
1870     unsigned nElts = GetNumElements(TDTypeVec[i], quad);
1871     OS << utostr(nElts) << "))) ";
1872     if (nElts < 10)
1873       OS << " ";
1874 
1875     OS << TypeString('s', TDTypeVec[i]);
1876     OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
1877 
1878     if (isA64)
1879       OS << "#endif\n";
1880   }
1881   OS << "\n";
1882 
1883   // Emit struct typedefs.
1884   for (unsigned vi = 2; vi != 5; ++vi) {
1885     for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
1886       bool dummy, quad = false, poly = false;
1887       char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
1888       bool isA64 = false;
1889 
1890       if (type == 'd' && quad)
1891         isA64 = true;
1892 
1893       if (isA64)
1894         OS << "#ifdef __aarch64__\n";
1895 
1896       std::string ts = TypeString('d', TDTypeVec[i]);
1897       std::string vs = TypeString('0' + vi, TDTypeVec[i]);
1898       OS << "typedef struct " << vs << " {\n";
1899       OS << "  " << ts << " val";
1900       OS << "[" << utostr(vi) << "]";
1901       OS << ";\n} ";
1902       OS << vs << ";\n";
1903 
1904       if (isA64)
1905         OS << "#endif\n";
1906 
1907       OS << "\n";
1908     }
1909   }
1910 
1911   OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
1912 
1913   std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
1914 
1915   StringMap<ClassKind> EmittedMap;
1916 
1917   // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
1918   // intrinsics.  (Some of the saturating multiply instructions are also
1919   // used to implement the corresponding "_lane" variants, but tablegen
1920   // sorts the records into alphabetical order so that the "_lane" variants
1921   // come after the intrinsics they use.)
1922   emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap);
1923   emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap);
1924   emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap);
1925 
1926   // ARM intrinsics must be emitted before AArch64 intrinsics to ensure
1927   // common intrinsics appear only once in the output stream.
1928   // The check for uniquiness is done in emitIntrinsic.
1929   // Emit ARM intrinsics.
1930   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1931     Record *R = RV[i];
1932 
1933     // Skip AArch64 intrinsics; they will be emitted at the end.
1934     bool isA64 = R->getValueAsBit("isA64");
1935     if (isA64)
1936       continue;
1937 
1938     if (R->getName() != "VMOVL" && R->getName() != "VMULL" &&
1939         R->getName() != "VABD")
1940       emitIntrinsic(OS, R, EmittedMap);
1941   }
1942 
1943   // Emit AArch64-specific intrinsics.
1944   OS << "#ifdef __aarch64__\n";
1945 
1946   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1947     Record *R = RV[i];
1948 
1949     // Skip ARM intrinsics already included above.
1950     bool isA64 = R->getValueAsBit("isA64");
1951     if (!isA64)
1952       continue;
1953 
1954     emitIntrinsic(OS, R, EmittedMap);
1955   }
1956 
1957   OS << "#endif\n\n";
1958 
1959   OS << "#undef __ai\n\n";
1960   OS << "#endif /* __ARM_NEON_H */\n";
1961 }
1962 
1963 /// emitIntrinsic - Write out the arm_neon.h header file definitions for the
1964 /// intrinsics specified by record R checking for intrinsic uniqueness.
emitIntrinsic(raw_ostream & OS,Record * R,StringMap<ClassKind> & EmittedMap)1965 void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R,
1966                                 StringMap<ClassKind> &EmittedMap) {
1967   std::string name = R->getValueAsString("Name");
1968   std::string Proto = R->getValueAsString("Prototype");
1969   std::string Types = R->getValueAsString("Types");
1970 
1971   SmallVector<StringRef, 16> TypeVec;
1972   ParseTypes(R, Types, TypeVec);
1973 
1974   OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
1975 
1976   ClassKind classKind = ClassNone;
1977   if (R->getSuperClasses().size() >= 2)
1978     classKind = ClassMap[R->getSuperClasses()[1]];
1979   if (classKind == ClassNone && kind == OpNone)
1980     PrintFatalError(R->getLoc(), "Builtin has no class kind");
1981 
1982   for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
1983     if (kind == OpReinterpret) {
1984       bool outQuad = false;
1985       bool dummy = false;
1986       (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
1987       for (unsigned srcti = 0, srcte = TypeVec.size();
1988            srcti != srcte; ++srcti) {
1989         bool inQuad = false;
1990         (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
1991         if (srcti == ti || inQuad != outQuad)
1992           continue;
1993         std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
1994                                      OpCast, ClassS);
1995         if (EmittedMap.count(s))
1996           continue;
1997         EmittedMap[s] = ClassS;
1998         OS << s;
1999       }
2000     } else {
2001       std::string s =
2002           GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
2003       if (EmittedMap.count(s))
2004         continue;
2005       EmittedMap[s] = classKind;
2006       OS << s;
2007     }
2008   }
2009   OS << "\n";
2010 }
2011 
RangeFromType(const char mod,StringRef typestr)2012 static unsigned RangeFromType(const char mod, StringRef typestr) {
2013   // base type to get the type string for.
2014   bool quad = false, dummy = false;
2015   char type = ClassifyType(typestr, quad, dummy, dummy);
2016   type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
2017 
2018   switch (type) {
2019     case 'c':
2020       return (8 << (int)quad) - 1;
2021     case 'h':
2022     case 's':
2023       return (4 << (int)quad) - 1;
2024     case 'f':
2025     case 'i':
2026       return (2 << (int)quad) - 1;
2027     case 'l':
2028       return (1 << (int)quad) - 1;
2029     default:
2030       PrintFatalError("unhandled type!");
2031   }
2032 }
2033 
2034 /// Generate the ARM and AArch64 intrinsic range checking code for
2035 /// shift/lane immediates, checking for unique declarations.
2036 void
genIntrinsicRangeCheckCode(raw_ostream & OS,StringMap<ClassKind> & A64IntrinsicMap,bool isA64RangeCheck)2037 NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
2038                                         StringMap<ClassKind> &A64IntrinsicMap,
2039                                         bool isA64RangeCheck) {
2040   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2041   StringMap<OpKind> EmittedMap;
2042 
2043   // Generate the intrinsic range checking code for shift/lane immediates.
2044   if (isA64RangeCheck)
2045     OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n";
2046   else
2047     OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2048 
2049   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2050     Record *R = RV[i];
2051 
2052     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2053     if (k != OpNone)
2054       continue;
2055 
2056     std::string name = R->getValueAsString("Name");
2057     std::string Proto = R->getValueAsString("Prototype");
2058     std::string Types = R->getValueAsString("Types");
2059 
2060     // Functions with 'a' (the splat code) in the type prototype should not get
2061     // their own builtin as they use the non-splat variant.
2062     if (Proto.find('a') != std::string::npos)
2063       continue;
2064 
2065     // Functions which do not have an immediate do not need to have range
2066     // checking code emitted.
2067     size_t immPos = Proto.find('i');
2068     if (immPos == std::string::npos)
2069       continue;
2070 
2071     SmallVector<StringRef, 16> TypeVec;
2072     ParseTypes(R, Types, TypeVec);
2073 
2074     if (R->getSuperClasses().size() < 2)
2075       PrintFatalError(R->getLoc(), "Builtin has no class kind");
2076 
2077     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2078 
2079     // Do not include AArch64 range checks if not generating code for AArch64.
2080     bool isA64 = R->getValueAsBit("isA64");
2081     if (!isA64RangeCheck && isA64)
2082       continue;
2083 
2084     // Include ARM range checks in AArch64 but only if ARM intrinsics are not
2085     // redefined by AArch64 to handle new types.
2086     if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(name)) {
2087       ClassKind &A64CK = A64IntrinsicMap[name];
2088       if (A64CK == ck && ck != ClassNone)
2089         continue;
2090     }
2091 
2092     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2093       std::string namestr, shiftstr, rangestr;
2094 
2095       if (R->getValueAsBit("isVCVT_N")) {
2096         // VCVT between floating- and fixed-point values takes an immediate
2097         // in the range 1 to 32.
2098         ck = ClassB;
2099         rangestr = "l = 1; u = 31"; // upper bound = l + u
2100       } else if (Proto.find('s') == std::string::npos) {
2101         // Builtins which are overloaded by type will need to have their upper
2102         // bound computed at Sema time based on the type constant.
2103         ck = ClassB;
2104         if (R->getValueAsBit("isShift")) {
2105           shiftstr = ", true";
2106 
2107           // Right shifts have an 'r' in the name, left shifts do not.
2108           if (name.find('r') != std::string::npos)
2109             rangestr = "l = 1; ";
2110         }
2111         rangestr += "u = RFT(TV" + shiftstr + ")";
2112       } else {
2113         // The immediate generally refers to a lane in the preceding argument.
2114         assert(immPos > 0 && "unexpected immediate operand");
2115         rangestr =
2116             "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti]));
2117       }
2118       // Make sure cases appear only once by uniquing them in a string map.
2119       namestr = MangleName(name, TypeVec[ti], ck);
2120       if (EmittedMap.count(namestr))
2121         continue;
2122       EmittedMap[namestr] = OpNone;
2123 
2124       // Calculate the index of the immediate that should be range checked.
2125       unsigned immidx = 0;
2126 
2127       // Builtins that return a struct of multiple vectors have an extra
2128       // leading arg for the struct return.
2129       if (Proto[0] >= '2' && Proto[0] <= '4')
2130         ++immidx;
2131 
2132       // Add one to the index for each argument until we reach the immediate
2133       // to be checked.  Structs of vectors are passed as multiple arguments.
2134       for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
2135         switch (Proto[ii]) {
2136         default:
2137           immidx += 1;
2138           break;
2139         case '2':
2140           immidx += 2;
2141           break;
2142         case '3':
2143           immidx += 3;
2144           break;
2145         case '4':
2146           immidx += 4;
2147           break;
2148         case 'i':
2149           ie = ii + 1;
2150           break;
2151         }
2152       }
2153       if (isA64RangeCheck)
2154         OS << "case AArch64::BI__builtin_neon_";
2155       else
2156         OS << "case ARM::BI__builtin_neon_";
2157       OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; "
2158          << rangestr << "; break;\n";
2159     }
2160   }
2161   OS << "#endif\n\n";
2162 }
2163 
2164 /// Generate the ARM and AArch64 overloaded type checking code for
2165 /// SemaChecking.cpp, checking for unique builtin declarations.
2166 void
genOverloadTypeCheckCode(raw_ostream & OS,StringMap<ClassKind> & A64IntrinsicMap,bool isA64TypeCheck)2167 NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
2168                                       StringMap<ClassKind> &A64IntrinsicMap,
2169                                       bool isA64TypeCheck) {
2170   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2171   StringMap<OpKind> EmittedMap;
2172 
2173   // Generate the overloaded type checking code for SemaChecking.cpp
2174   if (isA64TypeCheck)
2175     OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n";
2176   else
2177     OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
2178 
2179   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2180     Record *R = RV[i];
2181     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2182     if (k != OpNone)
2183       continue;
2184 
2185     std::string Proto = R->getValueAsString("Prototype");
2186     std::string Types = R->getValueAsString("Types");
2187     std::string name = R->getValueAsString("Name");
2188 
2189     // Functions with 'a' (the splat code) in the type prototype should not get
2190     // their own builtin as they use the non-splat variant.
2191     if (Proto.find('a') != std::string::npos)
2192       continue;
2193 
2194     // Functions which have a scalar argument cannot be overloaded, no need to
2195     // check them if we are emitting the type checking code.
2196     if (Proto.find('s') != std::string::npos)
2197       continue;
2198 
2199     SmallVector<StringRef, 16> TypeVec;
2200     ParseTypes(R, Types, TypeVec);
2201 
2202     if (R->getSuperClasses().size() < 2)
2203       PrintFatalError(R->getLoc(), "Builtin has no class kind");
2204 
2205     // Do not include AArch64 type checks if not generating code for AArch64.
2206     bool isA64 = R->getValueAsBit("isA64");
2207     if (!isA64TypeCheck && isA64)
2208       continue;
2209 
2210     // Include ARM  type check in AArch64 but only if ARM intrinsics
2211     // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
2212     // redefined in AArch64 to handle an additional 2 x f64 type.
2213     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2214     if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(name)) {
2215       ClassKind &A64CK = A64IntrinsicMap[name];
2216       if (A64CK == ck && ck != ClassNone)
2217         continue;
2218     }
2219 
2220     int si = -1, qi = -1;
2221     uint64_t mask = 0, qmask = 0;
2222     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2223       // Generate the switch case(s) for this builtin for the type validation.
2224       bool quad = false, poly = false, usgn = false;
2225       (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
2226 
2227       if (quad) {
2228         qi = ti;
2229         qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2230       } else {
2231         si = ti;
2232         mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
2233       }
2234     }
2235 
2236     // Check if the builtin function has a pointer or const pointer argument.
2237     int PtrArgNum = -1;
2238     bool HasConstPtr = false;
2239     for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
2240       char ArgType = Proto[arg];
2241       if (ArgType == 'c') {
2242         HasConstPtr = true;
2243         PtrArgNum = arg - 1;
2244         break;
2245       }
2246       if (ArgType == 'p') {
2247         PtrArgNum = arg - 1;
2248         break;
2249       }
2250     }
2251     // For sret builtins, adjust the pointer argument index.
2252     if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4'))
2253       PtrArgNum += 1;
2254 
2255     // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
2256     // and vst1_lane intrinsics.  Using a pointer to the vector element
2257     // type with one of those operations causes codegen to select an aligned
2258     // load/store instruction.  If you want an unaligned operation,
2259     // the pointer argument needs to have less alignment than element type,
2260     // so just accept any pointer type.
2261     if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
2262       PtrArgNum = -1;
2263       HasConstPtr = false;
2264     }
2265 
2266     if (mask) {
2267       if (isA64TypeCheck)
2268         OS << "case AArch64::BI__builtin_neon_";
2269       else
2270         OS << "case ARM::BI__builtin_neon_";
2271       OS << MangleName(name, TypeVec[si], ClassB) << ": mask = "
2272          << "0x" << utohexstr(mask) << "ULL";
2273       if (PtrArgNum >= 0)
2274         OS << "; PtrArgNum = " << PtrArgNum;
2275       if (HasConstPtr)
2276         OS << "; HasConstPtr = true";
2277       OS << "; break;\n";
2278     }
2279     if (qmask) {
2280       if (isA64TypeCheck)
2281         OS << "case AArch64::BI__builtin_neon_";
2282       else
2283         OS << "case ARM::BI__builtin_neon_";
2284       OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = "
2285          << "0x" << utohexstr(qmask) << "ULL";
2286       if (PtrArgNum >= 0)
2287         OS << "; PtrArgNum = " << PtrArgNum;
2288       if (HasConstPtr)
2289         OS << "; HasConstPtr = true";
2290       OS << "; break;\n";
2291     }
2292   }
2293   OS << "#endif\n\n";
2294 }
2295 
2296 /// genBuiltinsDef: Generate the BuiltinsARM.def and  BuiltinsAArch64.def
2297 /// declaration of builtins, checking for unique builtin declarations.
genBuiltinsDef(raw_ostream & OS,StringMap<ClassKind> & A64IntrinsicMap,bool isA64GenBuiltinDef)2298 void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
2299                                  StringMap<ClassKind> &A64IntrinsicMap,
2300                                  bool isA64GenBuiltinDef) {
2301   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2302   StringMap<OpKind> EmittedMap;
2303 
2304   // Generate BuiltinsARM.def and BuiltinsAArch64.def
2305   if (isA64GenBuiltinDef)
2306     OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n";
2307   else
2308     OS << "#ifdef GET_NEON_BUILTINS\n";
2309 
2310   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2311     Record *R = RV[i];
2312     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2313     if (k != OpNone)
2314       continue;
2315 
2316     std::string Proto = R->getValueAsString("Prototype");
2317     std::string name = R->getValueAsString("Name");
2318 
2319     // Functions with 'a' (the splat code) in the type prototype should not get
2320     // their own builtin as they use the non-splat variant.
2321     if (Proto.find('a') != std::string::npos)
2322       continue;
2323 
2324     std::string Types = R->getValueAsString("Types");
2325     SmallVector<StringRef, 16> TypeVec;
2326     ParseTypes(R, Types, TypeVec);
2327 
2328     if (R->getSuperClasses().size() < 2)
2329       PrintFatalError(R->getLoc(), "Builtin has no class kind");
2330 
2331     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2332 
2333     // Do not include AArch64 BUILTIN() macros if not generating
2334     // code for AArch64
2335     bool isA64 = R->getValueAsBit("isA64");
2336     if (!isA64GenBuiltinDef && isA64)
2337       continue;
2338 
2339     // Include ARM  BUILTIN() macros  in AArch64 but only if ARM intrinsics
2340     // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
2341     // redefined in AArch64 to handle an additional 2 x f64 type.
2342     if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(name)) {
2343       ClassKind &A64CK = A64IntrinsicMap[name];
2344       if (A64CK == ck && ck != ClassNone)
2345         continue;
2346     }
2347 
2348     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2349       // Generate the declaration for this builtin, ensuring
2350       // that each unique BUILTIN() macro appears only once in the output
2351       // stream.
2352       std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
2353       if (EmittedMap.count(bd))
2354         continue;
2355 
2356       EmittedMap[bd] = OpNone;
2357       OS << bd << "\n";
2358     }
2359   }
2360   OS << "#endif\n\n";
2361 }
2362 
2363 /// runHeader - Emit a file with sections defining:
2364 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
2365 /// 2. the SemaChecking code for the type overload checking.
2366 /// 3. the SemaChecking code for validation of intrinsic immediate arguments.
runHeader(raw_ostream & OS)2367 void NeonEmitter::runHeader(raw_ostream &OS) {
2368   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2369 
2370   // build a map of AArch64 intriniscs to be used in uniqueness checks.
2371   StringMap<ClassKind> A64IntrinsicMap;
2372   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2373     Record *R = RV[i];
2374 
2375     bool isA64 = R->getValueAsBit("isA64");
2376     if (!isA64)
2377       continue;
2378 
2379     ClassKind CK = ClassNone;
2380     if (R->getSuperClasses().size() >= 2)
2381       CK = ClassMap[R->getSuperClasses()[1]];
2382 
2383     std::string Name = R->getValueAsString("Name");
2384     if (A64IntrinsicMap.count(Name))
2385       continue;
2386     A64IntrinsicMap[Name] = CK;
2387   }
2388 
2389   // Generate BuiltinsARM.def for ARM
2390   genBuiltinsDef(OS, A64IntrinsicMap, false);
2391 
2392   // Generate BuiltinsAArch64.def for AArch64
2393   genBuiltinsDef(OS, A64IntrinsicMap, true);
2394 
2395   // Generate ARM overloaded type checking code for SemaChecking.cpp
2396   genOverloadTypeCheckCode(OS, A64IntrinsicMap, false);
2397 
2398   // Generate AArch64 overloaded type checking code for SemaChecking.cpp
2399   genOverloadTypeCheckCode(OS, A64IntrinsicMap, true);
2400 
2401   // Generate ARM range checking code for shift/lane immediates.
2402   genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false);
2403 
2404   // Generate the AArch64 range checking code for shift/lane immediates.
2405   genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true);
2406 }
2407 
2408 /// GenTest - Write out a test for the intrinsic specified by the name and
2409 /// type strings, including the embedded patterns for FileCheck to match.
GenTest(const std::string & name,const std::string & proto,StringRef outTypeStr,StringRef inTypeStr,bool isShift,bool isHiddenLOp,ClassKind ck,const std::string & InstName,bool isA64,std::string & testFuncProto)2410 static std::string GenTest(const std::string &name,
2411                            const std::string &proto,
2412                            StringRef outTypeStr, StringRef inTypeStr,
2413                            bool isShift, bool isHiddenLOp,
2414                            ClassKind ck, const std::string &InstName,
2415 						   bool isA64,
2416 						   std::string & testFuncProto) {
2417   assert(!proto.empty() && "");
2418   std::string s;
2419 
2420   // Function name with type suffix
2421   std::string mangledName = MangleName(name, outTypeStr, ClassS);
2422   if (outTypeStr != inTypeStr) {
2423     // If the input type is different (e.g., for vreinterpret), append a suffix
2424     // for the input type.  String off a "Q" (quad) prefix so that MangleName
2425     // does not insert another "q" in the name.
2426     unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
2427     StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
2428     mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
2429   }
2430 
2431   // todo: GenerateChecksForIntrinsic does not generate CHECK
2432   // for aarch64 instructions yet
2433   std::vector<std::string> FileCheckPatterns;
2434   if (!isA64) {
2435 	GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
2436 							   isHiddenLOp, FileCheckPatterns);
2437 	s+= "// CHECK_ARM: test_" + mangledName + "\n";
2438   }
2439   s += "// CHECK_AARCH64: test_" + mangledName + "\n";
2440 
2441   // Emit the FileCheck patterns.
2442   // If for any reason we do not want to emit a check, mangledInst
2443   // will be the empty string.
2444   if (FileCheckPatterns.size()) {
2445     for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(),
2446                                                   e = FileCheckPatterns.end();
2447          i != e;
2448          ++i) {
2449       s += "// CHECK_ARM: " + *i + "\n";
2450     }
2451   }
2452 
2453   // Emit the start of the test function.
2454 
2455   testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
2456   char arg = 'a';
2457   std::string comma;
2458   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2459     // Do not create arguments for values that must be immediate constants.
2460     if (proto[i] == 'i')
2461       continue;
2462     testFuncProto += comma + TypeString(proto[i], inTypeStr) + " ";
2463     testFuncProto.push_back(arg);
2464     comma = ", ";
2465   }
2466   testFuncProto += ")";
2467 
2468   s+= testFuncProto;
2469   s+= " {\n  ";
2470 
2471   if (proto[0] != 'v')
2472     s += "return ";
2473   s += mangledName + "(";
2474   arg = 'a';
2475   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2476     if (proto[i] == 'i') {
2477       // For immediate operands, test the maximum value.
2478       if (isShift)
2479         s += "1"; // FIXME
2480       else
2481         // The immediate generally refers to a lane in the preceding argument.
2482         s += utostr(RangeFromType(proto[i-1], inTypeStr));
2483     } else {
2484       s.push_back(arg);
2485     }
2486     if ((i + 1) < e)
2487       s += ", ";
2488   }
2489   s += ");\n}\n\n";
2490   return s;
2491 }
2492 
2493 /// Write out all intrinsic tests for the specified target, checking
2494 /// for intrinsic test uniqueness.
genTargetTest(raw_ostream & OS,StringMap<OpKind> & EmittedMap,bool isA64GenTest)2495 void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
2496                                 bool isA64GenTest) {
2497   if (isA64GenTest)
2498 	OS << "#ifdef __aarch64__\n";
2499 
2500   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2501   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2502     Record *R = RV[i];
2503     std::string name = R->getValueAsString("Name");
2504     std::string Proto = R->getValueAsString("Prototype");
2505     std::string Types = R->getValueAsString("Types");
2506     bool isShift = R->getValueAsBit("isShift");
2507     std::string InstName = R->getValueAsString("InstName");
2508     bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
2509     bool isA64 = R->getValueAsBit("isA64");
2510 
2511     // do not include AArch64 intrinsic test if not generating
2512     // code for AArch64
2513     if (!isA64GenTest && isA64)
2514       continue;
2515 
2516     SmallVector<StringRef, 16> TypeVec;
2517     ParseTypes(R, Types, TypeVec);
2518 
2519     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2520     OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
2521     if (kind == OpUnavailable)
2522       continue;
2523     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2524       if (kind == OpReinterpret) {
2525         bool outQuad = false;
2526         bool dummy = false;
2527         (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
2528         for (unsigned srcti = 0, srcte = TypeVec.size();
2529              srcti != srcte; ++srcti) {
2530           bool inQuad = false;
2531           (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
2532           if (srcti == ti || inQuad != outQuad)
2533             continue;
2534 		  std::string testFuncProto;
2535           std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
2536                                   isShift, isHiddenLOp, ck, InstName, isA64,
2537 								  testFuncProto);
2538           if (EmittedMap.count(testFuncProto))
2539             continue;
2540           EmittedMap[testFuncProto] = kind;
2541           OS << s << "\n";
2542         }
2543       } else {
2544 		std::string testFuncProto;
2545         std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift,
2546                                 isHiddenLOp, ck, InstName, isA64, testFuncProto);
2547         if (EmittedMap.count(testFuncProto))
2548           continue;
2549         EmittedMap[testFuncProto] = kind;
2550         OS << s << "\n";
2551       }
2552     }
2553   }
2554 
2555   if (isA64GenTest)
2556 	OS << "#endif\n";
2557 }
2558 /// runTests - Write out a complete set of tests for all of the Neon
2559 /// intrinsics.
runTests(raw_ostream & OS)2560 void NeonEmitter::runTests(raw_ostream &OS) {
2561   OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi "
2562         "apcs-gnu\\\n"
2563         "// RUN:  -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
2564         "// RUN:  | FileCheck %s -check-prefix=CHECK_ARM\n"
2565 		"\n"
2566 	    "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
2567 	    "// RUN -target-feature +neon  -ffreestanding -S -o - %s \\\n"
2568 	    "// RUN:  | FileCheck %s -check-prefix=CHECK_AARCH64\n"
2569         "\n"
2570         "// REQUIRES: long_tests\n"
2571         "\n"
2572         "#include <arm_neon.h>\n"
2573         "\n";
2574 
2575   // ARM tests must be emitted before AArch64 tests to ensure
2576   // tests for intrinsics that are common to ARM and AArch64
2577   // appear only once in the output stream.
2578   // The check for uniqueness is done in genTargetTest.
2579   StringMap<OpKind> EmittedMap;
2580 
2581   genTargetTest(OS, EmittedMap, false);
2582 
2583   genTargetTest(OS, EmittedMap, true);
2584 }
2585 
2586 namespace clang {
EmitNeon(RecordKeeper & Records,raw_ostream & OS)2587 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
2588   NeonEmitter(Records).run(OS);
2589 }
EmitNeonSema(RecordKeeper & Records,raw_ostream & OS)2590 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
2591   NeonEmitter(Records).runHeader(OS);
2592 }
EmitNeonTest(RecordKeeper & Records,raw_ostream & OS)2593 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
2594   NeonEmitter(Records).runTests(OS);
2595 }
2596 } // End namespace clang
2597