• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This tablegen backend is responsible for emitting arm_neon.h, which includes
11 // a declaration and definition of each function specified by the ARM NEON
12 // compiler interface.  See ARM document DUI0348B.
13 //
14 // Each NEON instruction is implemented in terms of 1 or more functions which
15 // are suffixed with the element type of the input vectors.  Functions may be
16 // implemented in terms of generic vector operations such as +, *, -, etc. or
17 // by calling a __builtin_-prefixed function which will be handled by clang's
18 // CodeGen library.
19 //
20 // Additional validation code can be generated by this file when runHeader() is
21 // called, rather than the normal run() entry point.  A complete set of tests
22 // for Neon intrinsics can be generated by calling the runTests() entry point.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/SmallString.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/StringExtras.h"
30 #include "llvm/ADT/StringMap.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/TableGen/Error.h"
33 #include "llvm/TableGen/Record.h"
34 #include "llvm/TableGen/TableGenBackend.h"
35 #include <string>
36 using namespace llvm;
37 
38 enum OpKind {
39   OpNone,
40   OpUnavailable,
41   OpAdd,
42   OpAddl,
43   OpAddw,
44   OpSub,
45   OpSubl,
46   OpSubw,
47   OpMul,
48   OpMla,
49   OpMlal,
50   OpMls,
51   OpMlsl,
52   OpMulN,
53   OpMlaN,
54   OpMlsN,
55   OpMlalN,
56   OpMlslN,
57   OpMulLane,
58   OpMullLane,
59   OpMlaLane,
60   OpMlsLane,
61   OpMlalLane,
62   OpMlslLane,
63   OpQDMullLane,
64   OpQDMlalLane,
65   OpQDMlslLane,
66   OpQDMulhLane,
67   OpQRDMulhLane,
68   OpEq,
69   OpGe,
70   OpLe,
71   OpGt,
72   OpLt,
73   OpNeg,
74   OpNot,
75   OpAnd,
76   OpOr,
77   OpXor,
78   OpAndNot,
79   OpOrNot,
80   OpCast,
81   OpConcat,
82   OpDup,
83   OpDupLane,
84   OpHi,
85   OpLo,
86   OpSelect,
87   OpRev16,
88   OpRev32,
89   OpRev64,
90   OpReinterpret,
91   OpAbdl,
92   OpAba,
93   OpAbal
94 };
95 
96 enum ClassKind {
97   ClassNone,
98   ClassI,           // generic integer instruction, e.g., "i8" suffix
99   ClassS,           // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
100   ClassW,           // width-specific instruction, e.g., "8" suffix
101   ClassB            // bitcast arguments with enum argument to specify type
102 };
103 
104 /// NeonTypeFlags - Flags to identify the types for overloaded Neon
105 /// builtins.  These must be kept in sync with the flags in
106 /// include/clang/Basic/TargetBuiltins.h.
107 namespace {
108 class NeonTypeFlags {
109   enum {
110     EltTypeMask = 0xf,
111     UnsignedFlag = 0x10,
112     QuadFlag = 0x20
113   };
114   uint32_t Flags;
115 
116 public:
117   enum EltType {
118     Int8,
119     Int16,
120     Int32,
121     Int64,
122     Poly8,
123     Poly16,
124     Float16,
125     Float32
126   };
127 
NeonTypeFlags(unsigned F)128   NeonTypeFlags(unsigned F) : Flags(F) {}
NeonTypeFlags(EltType ET,bool IsUnsigned,bool IsQuad)129   NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
130     if (IsUnsigned)
131       Flags |= UnsignedFlag;
132     if (IsQuad)
133       Flags |= QuadFlag;
134   }
135 
getFlags() const136   uint32_t getFlags() const { return Flags; }
137 };
138 } // end anonymous namespace
139 
140 namespace {
141 class NeonEmitter {
142   RecordKeeper &Records;
143   StringMap<OpKind> OpMap;
144   DenseMap<Record*, ClassKind> ClassMap;
145 
146 public:
NeonEmitter(RecordKeeper & R)147   NeonEmitter(RecordKeeper &R) : Records(R) {
148     OpMap["OP_NONE"]  = OpNone;
149     OpMap["OP_UNAVAILABLE"] = OpUnavailable;
150     OpMap["OP_ADD"]   = OpAdd;
151     OpMap["OP_ADDL"]  = OpAddl;
152     OpMap["OP_ADDW"]  = OpAddw;
153     OpMap["OP_SUB"]   = OpSub;
154     OpMap["OP_SUBL"]  = OpSubl;
155     OpMap["OP_SUBW"]  = OpSubw;
156     OpMap["OP_MUL"]   = OpMul;
157     OpMap["OP_MLA"]   = OpMla;
158     OpMap["OP_MLAL"]  = OpMlal;
159     OpMap["OP_MLS"]   = OpMls;
160     OpMap["OP_MLSL"]  = OpMlsl;
161     OpMap["OP_MUL_N"] = OpMulN;
162     OpMap["OP_MLA_N"] = OpMlaN;
163     OpMap["OP_MLS_N"] = OpMlsN;
164     OpMap["OP_MLAL_N"] = OpMlalN;
165     OpMap["OP_MLSL_N"] = OpMlslN;
166     OpMap["OP_MUL_LN"]= OpMulLane;
167     OpMap["OP_MULL_LN"] = OpMullLane;
168     OpMap["OP_MLA_LN"]= OpMlaLane;
169     OpMap["OP_MLS_LN"]= OpMlsLane;
170     OpMap["OP_MLAL_LN"] = OpMlalLane;
171     OpMap["OP_MLSL_LN"] = OpMlslLane;
172     OpMap["OP_QDMULL_LN"] = OpQDMullLane;
173     OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
174     OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
175     OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
176     OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
177     OpMap["OP_EQ"]    = OpEq;
178     OpMap["OP_GE"]    = OpGe;
179     OpMap["OP_LE"]    = OpLe;
180     OpMap["OP_GT"]    = OpGt;
181     OpMap["OP_LT"]    = OpLt;
182     OpMap["OP_NEG"]   = OpNeg;
183     OpMap["OP_NOT"]   = OpNot;
184     OpMap["OP_AND"]   = OpAnd;
185     OpMap["OP_OR"]    = OpOr;
186     OpMap["OP_XOR"]   = OpXor;
187     OpMap["OP_ANDN"]  = OpAndNot;
188     OpMap["OP_ORN"]   = OpOrNot;
189     OpMap["OP_CAST"]  = OpCast;
190     OpMap["OP_CONC"]  = OpConcat;
191     OpMap["OP_HI"]    = OpHi;
192     OpMap["OP_LO"]    = OpLo;
193     OpMap["OP_DUP"]   = OpDup;
194     OpMap["OP_DUP_LN"] = OpDupLane;
195     OpMap["OP_SEL"]   = OpSelect;
196     OpMap["OP_REV16"] = OpRev16;
197     OpMap["OP_REV32"] = OpRev32;
198     OpMap["OP_REV64"] = OpRev64;
199     OpMap["OP_REINT"] = OpReinterpret;
200     OpMap["OP_ABDL"]  = OpAbdl;
201     OpMap["OP_ABA"]   = OpAba;
202     OpMap["OP_ABAL"]  = OpAbal;
203 
204     Record *SI = R.getClass("SInst");
205     Record *II = R.getClass("IInst");
206     Record *WI = R.getClass("WInst");
207     ClassMap[SI] = ClassS;
208     ClassMap[II] = ClassI;
209     ClassMap[WI] = ClassW;
210   }
211 
212   // run - Emit arm_neon.h.inc
213   void run(raw_ostream &o);
214 
215   // runHeader - Emit all the __builtin prototypes used in arm_neon.h
216   void runHeader(raw_ostream &o);
217 
218   // runTests - Emit tests for all the Neon intrinsics.
219   void runTests(raw_ostream &o);
220 
221 private:
222   void emitIntrinsic(raw_ostream &OS, Record *R);
223 };
224 } // end anonymous namespace
225 
226 /// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
227 /// which each StringRef representing a single type declared in the string.
228 /// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
229 /// 2xfloat and 4xfloat respectively.
ParseTypes(Record * r,std::string & s,SmallVectorImpl<StringRef> & TV)230 static void ParseTypes(Record *r, std::string &s,
231                        SmallVectorImpl<StringRef> &TV) {
232   const char *data = s.data();
233   int len = 0;
234 
235   for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
236     if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U')
237       continue;
238 
239     switch (data[len]) {
240       case 'c':
241       case 's':
242       case 'i':
243       case 'l':
244       case 'h':
245       case 'f':
246         break;
247       default:
248         PrintFatalError(r->getLoc(),
249                       "Unexpected letter: " + std::string(data + len, 1));
250     }
251     TV.push_back(StringRef(data, len + 1));
252     data += len + 1;
253     len = -1;
254   }
255 }
256 
257 /// Widen - Convert a type code into the next wider type.  char -> short,
258 /// short -> int, etc.
Widen(const char t)259 static char Widen(const char t) {
260   switch (t) {
261     case 'c':
262       return 's';
263     case 's':
264       return 'i';
265     case 'i':
266       return 'l';
267     case 'h':
268       return 'f';
269     default:
270       PrintFatalError("unhandled type in widen!");
271   }
272 }
273 
274 /// Narrow - Convert a type code into the next smaller type.  short -> char,
275 /// float -> half float, etc.
Narrow(const char t)276 static char Narrow(const char t) {
277   switch (t) {
278     case 's':
279       return 'c';
280     case 'i':
281       return 's';
282     case 'l':
283       return 'i';
284     case 'f':
285       return 'h';
286     default:
287       PrintFatalError("unhandled type in narrow!");
288   }
289 }
290 
291 /// For a particular StringRef, return the base type code, and whether it has
292 /// the quad-vector, polynomial, or unsigned modifiers set.
ClassifyType(StringRef ty,bool & quad,bool & poly,bool & usgn)293 static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
294   unsigned off = 0;
295 
296   // remember quad.
297   if (ty[off] == 'Q') {
298     quad = true;
299     ++off;
300   }
301 
302   // remember poly.
303   if (ty[off] == 'P') {
304     poly = true;
305     ++off;
306   }
307 
308   // remember unsigned.
309   if (ty[off] == 'U') {
310     usgn = true;
311     ++off;
312   }
313 
314   // base type to get the type string for.
315   return ty[off];
316 }
317 
318 /// ModType - Transform a type code and its modifiers based on a mod code. The
319 /// mod code definitions may be found at the top of arm_neon.td.
ModType(const char mod,char type,bool & quad,bool & poly,bool & usgn,bool & scal,bool & cnst,bool & pntr)320 static char ModType(const char mod, char type, bool &quad, bool &poly,
321                     bool &usgn, bool &scal, bool &cnst, bool &pntr) {
322   switch (mod) {
323     case 't':
324       if (poly) {
325         poly = false;
326         usgn = true;
327       }
328       break;
329     case 'u':
330       usgn = true;
331       poly = false;
332       if (type == 'f')
333         type = 'i';
334       break;
335     case 'x':
336       usgn = false;
337       poly = false;
338       if (type == 'f')
339         type = 'i';
340       break;
341     case 'f':
342       if (type == 'h')
343         quad = true;
344       type = 'f';
345       usgn = false;
346       break;
347     case 'g':
348       quad = false;
349       break;
350     case 'w':
351       type = Widen(type);
352       quad = true;
353       break;
354     case 'n':
355       type = Widen(type);
356       break;
357     case 'i':
358       type = 'i';
359       scal = true;
360       break;
361     case 'l':
362       type = 'l';
363       scal = true;
364       usgn = true;
365       break;
366     case 's':
367     case 'a':
368       scal = true;
369       break;
370     case 'k':
371       quad = true;
372       break;
373     case 'c':
374       cnst = true;
375     case 'p':
376       pntr = true;
377       scal = true;
378       break;
379     case 'h':
380       type = Narrow(type);
381       if (type == 'h')
382         quad = false;
383       break;
384     case 'e':
385       type = Narrow(type);
386       usgn = true;
387       break;
388     default:
389       break;
390   }
391   return type;
392 }
393 
394 /// TypeString - for a modifier and type, generate the name of the typedef for
395 /// that type.  QUc -> uint8x8_t.
TypeString(const char mod,StringRef typestr)396 static std::string TypeString(const char mod, StringRef typestr) {
397   bool quad = false;
398   bool poly = false;
399   bool usgn = false;
400   bool scal = false;
401   bool cnst = false;
402   bool pntr = false;
403 
404   if (mod == 'v')
405     return "void";
406   if (mod == 'i')
407     return "int";
408 
409   // base type to get the type string for.
410   char type = ClassifyType(typestr, quad, poly, usgn);
411 
412   // Based on the modifying character, change the type and width if necessary.
413   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
414 
415   SmallString<128> s;
416 
417   if (usgn)
418     s.push_back('u');
419 
420   switch (type) {
421     case 'c':
422       s += poly ? "poly8" : "int8";
423       if (scal)
424         break;
425       s += quad ? "x16" : "x8";
426       break;
427     case 's':
428       s += poly ? "poly16" : "int16";
429       if (scal)
430         break;
431       s += quad ? "x8" : "x4";
432       break;
433     case 'i':
434       s += "int32";
435       if (scal)
436         break;
437       s += quad ? "x4" : "x2";
438       break;
439     case 'l':
440       s += "int64";
441       if (scal)
442         break;
443       s += quad ? "x2" : "x1";
444       break;
445     case 'h':
446       s += "float16";
447       if (scal)
448         break;
449       s += quad ? "x8" : "x4";
450       break;
451     case 'f':
452       s += "float32";
453       if (scal)
454         break;
455       s += quad ? "x4" : "x2";
456       break;
457     default:
458       PrintFatalError("unhandled type!");
459   }
460 
461   if (mod == '2')
462     s += "x2";
463   if (mod == '3')
464     s += "x3";
465   if (mod == '4')
466     s += "x4";
467 
468   // Append _t, finishing the type string typedef type.
469   s += "_t";
470 
471   if (cnst)
472     s += " const";
473 
474   if (pntr)
475     s += " *";
476 
477   return s.str();
478 }
479 
480 /// BuiltinTypeString - for a modifier and type, generate the clang
481 /// BuiltinsARM.def prototype code for the function.  See the top of clang's
482 /// Builtins.def for a description of the type strings.
BuiltinTypeString(const char mod,StringRef typestr,ClassKind ck,bool ret)483 static std::string BuiltinTypeString(const char mod, StringRef typestr,
484                                      ClassKind ck, bool ret) {
485   bool quad = false;
486   bool poly = false;
487   bool usgn = false;
488   bool scal = false;
489   bool cnst = false;
490   bool pntr = false;
491 
492   if (mod == 'v')
493     return "v"; // void
494   if (mod == 'i')
495     return "i"; // int
496 
497   // base type to get the type string for.
498   char type = ClassifyType(typestr, quad, poly, usgn);
499 
500   // Based on the modifying character, change the type and width if necessary.
501   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
502 
503   // All pointers are void* pointers.  Change type to 'v' now.
504   if (pntr) {
505     usgn = false;
506     poly = false;
507     type = 'v';
508   }
509   // Treat half-float ('h') types as unsigned short ('s') types.
510   if (type == 'h') {
511     type = 's';
512     usgn = true;
513   }
514   usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && scal && type != 'f');
515 
516   if (scal) {
517     SmallString<128> s;
518 
519     if (usgn)
520       s.push_back('U');
521     else if (type == 'c')
522       s.push_back('S'); // make chars explicitly signed
523 
524     if (type == 'l') // 64-bit long
525       s += "LLi";
526     else
527       s.push_back(type);
528 
529     if (cnst)
530       s.push_back('C');
531     if (pntr)
532       s.push_back('*');
533     return s.str();
534   }
535 
536   // Since the return value must be one type, return a vector type of the
537   // appropriate width which we will bitcast.  An exception is made for
538   // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
539   // fashion, storing them to a pointer arg.
540   if (ret) {
541     if (mod >= '2' && mod <= '4')
542       return "vv*"; // void result with void* first argument
543     if (mod == 'f' || (ck != ClassB && type == 'f'))
544       return quad ? "V4f" : "V2f";
545     if (ck != ClassB && type == 's')
546       return quad ? "V8s" : "V4s";
547     if (ck != ClassB && type == 'i')
548       return quad ? "V4i" : "V2i";
549     if (ck != ClassB && type == 'l')
550       return quad ? "V2LLi" : "V1LLi";
551 
552     return quad ? "V16Sc" : "V8Sc";
553   }
554 
555   // Non-return array types are passed as individual vectors.
556   if (mod == '2')
557     return quad ? "V16ScV16Sc" : "V8ScV8Sc";
558   if (mod == '3')
559     return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
560   if (mod == '4')
561     return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
562 
563   if (mod == 'f' || (ck != ClassB && type == 'f'))
564     return quad ? "V4f" : "V2f";
565   if (ck != ClassB && type == 's')
566     return quad ? "V8s" : "V4s";
567   if (ck != ClassB && type == 'i')
568     return quad ? "V4i" : "V2i";
569   if (ck != ClassB && type == 'l')
570     return quad ? "V2LLi" : "V1LLi";
571 
572   return quad ? "V16Sc" : "V8Sc";
573 }
574 
575 /// MangleName - Append a type or width suffix to a base neon function name,
576 /// and insert a 'q' in the appropriate location if the operation works on
577 /// 128b rather than 64b.   E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
MangleName(const std::string & name,StringRef typestr,ClassKind ck)578 static std::string MangleName(const std::string &name, StringRef typestr,
579                               ClassKind ck) {
580   if (name == "vcvt_f32_f16")
581     return name;
582 
583   bool quad = false;
584   bool poly = false;
585   bool usgn = false;
586   char type = ClassifyType(typestr, quad, poly, usgn);
587 
588   std::string s = name;
589 
590   switch (type) {
591   case 'c':
592     switch (ck) {
593     case ClassS: s += poly ? "_p8" : usgn ? "_u8" : "_s8"; break;
594     case ClassI: s += "_i8"; break;
595     case ClassW: s += "_8"; break;
596     default: break;
597     }
598     break;
599   case 's':
600     switch (ck) {
601     case ClassS: s += poly ? "_p16" : usgn ? "_u16" : "_s16"; break;
602     case ClassI: s += "_i16"; break;
603     case ClassW: s += "_16"; break;
604     default: break;
605     }
606     break;
607   case 'i':
608     switch (ck) {
609     case ClassS: s += usgn ? "_u32" : "_s32"; break;
610     case ClassI: s += "_i32"; break;
611     case ClassW: s += "_32"; break;
612     default: break;
613     }
614     break;
615   case 'l':
616     switch (ck) {
617     case ClassS: s += usgn ? "_u64" : "_s64"; break;
618     case ClassI: s += "_i64"; break;
619     case ClassW: s += "_64"; break;
620     default: break;
621     }
622     break;
623   case 'h':
624     switch (ck) {
625     case ClassS:
626     case ClassI: s += "_f16"; break;
627     case ClassW: s += "_16"; break;
628     default: break;
629     }
630     break;
631   case 'f':
632     switch (ck) {
633     case ClassS:
634     case ClassI: s += "_f32"; break;
635     case ClassW: s += "_32"; break;
636     default: break;
637     }
638     break;
639   default:
640     PrintFatalError("unhandled type!");
641   }
642   if (ck == ClassB)
643     s += "_v";
644 
645   // Insert a 'q' before the first '_' character so that it ends up before
646   // _lane or _n on vector-scalar operations.
647   if (quad) {
648     size_t pos = s.find('_');
649     s = s.insert(pos, "q");
650   }
651   return s;
652 }
653 
654 /// UseMacro - Examine the prototype string to determine if the intrinsic
655 /// should be defined as a preprocessor macro instead of an inline function.
UseMacro(const std::string & proto)656 static bool UseMacro(const std::string &proto) {
657   // If this builtin takes an immediate argument, we need to #define it rather
658   // than use a standard declaration, so that SemaChecking can range check
659   // the immediate passed by the user.
660   if (proto.find('i') != std::string::npos)
661     return true;
662 
663   // Pointer arguments need to use macros to avoid hiding aligned attributes
664   // from the pointer type.
665   if (proto.find('p') != std::string::npos ||
666       proto.find('c') != std::string::npos)
667     return true;
668 
669   return false;
670 }
671 
672 /// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
673 /// defined as a macro should be accessed directly instead of being first
674 /// assigned to a local temporary.
MacroArgUsedDirectly(const std::string & proto,unsigned i)675 static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
676   // True for constant ints (i), pointers (p) and const pointers (c).
677   return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
678 }
679 
680 // Generate the string "(argtype a, argtype b, ...)"
GenArgs(const std::string & proto,StringRef typestr)681 static std::string GenArgs(const std::string &proto, StringRef typestr) {
682   bool define = UseMacro(proto);
683   char arg = 'a';
684 
685   std::string s;
686   s += "(";
687 
688   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
689     if (define) {
690       // Some macro arguments are used directly instead of being assigned
691       // to local temporaries; prepend an underscore prefix to make their
692       // names consistent with the local temporaries.
693       if (MacroArgUsedDirectly(proto, i))
694         s += "__";
695     } else {
696       s += TypeString(proto[i], typestr) + " __";
697     }
698     s.push_back(arg);
699     if ((i + 1) < e)
700       s += ", ";
701   }
702 
703   s += ")";
704   return s;
705 }
706 
707 // Macro arguments are not type-checked like inline function arguments, so
708 // assign them to local temporaries to get the right type checking.
GenMacroLocals(const std::string & proto,StringRef typestr)709 static std::string GenMacroLocals(const std::string &proto, StringRef typestr) {
710   char arg = 'a';
711   std::string s;
712   bool generatedLocal = false;
713 
714   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
715     // Do not create a temporary for an immediate argument.
716     // That would defeat the whole point of using a macro!
717     if (MacroArgUsedDirectly(proto, i))
718       continue;
719     generatedLocal = true;
720 
721     s += TypeString(proto[i], typestr) + " __";
722     s.push_back(arg);
723     s += " = (";
724     s.push_back(arg);
725     s += "); ";
726   }
727 
728   if (generatedLocal)
729     s += "\\\n  ";
730   return s;
731 }
732 
733 // Use the vmovl builtin to sign-extend or zero-extend a vector.
Extend(StringRef typestr,const std::string & a)734 static std::string Extend(StringRef typestr, const std::string &a) {
735   std::string s;
736   s = MangleName("vmovl", typestr, ClassS);
737   s += "(" + a + ")";
738   return s;
739 }
740 
Duplicate(unsigned nElts,StringRef typestr,const std::string & a)741 static std::string Duplicate(unsigned nElts, StringRef typestr,
742                              const std::string &a) {
743   std::string s;
744 
745   s = "(" + TypeString('d', typestr) + "){ ";
746   for (unsigned i = 0; i != nElts; ++i) {
747     s += a;
748     if ((i + 1) < nElts)
749       s += ", ";
750   }
751   s += " }";
752 
753   return s;
754 }
755 
SplatLane(unsigned nElts,const std::string & vec,const std::string & lane)756 static std::string SplatLane(unsigned nElts, const std::string &vec,
757                              const std::string &lane) {
758   std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
759   for (unsigned i = 0; i < nElts; ++i)
760     s += ", " + lane;
761   s += ")";
762   return s;
763 }
764 
GetNumElements(StringRef typestr,bool & quad)765 static unsigned GetNumElements(StringRef typestr, bool &quad) {
766   quad = false;
767   bool dummy = false;
768   char type = ClassifyType(typestr, quad, dummy, dummy);
769   unsigned nElts = 0;
770   switch (type) {
771   case 'c': nElts = 8; break;
772   case 's': nElts = 4; break;
773   case 'i': nElts = 2; break;
774   case 'l': nElts = 1; break;
775   case 'h': nElts = 4; break;
776   case 'f': nElts = 2; break;
777   default:
778     PrintFatalError("unhandled type!");
779   }
780   if (quad) nElts <<= 1;
781   return nElts;
782 }
783 
784 // Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
GenOpString(OpKind op,const std::string & proto,StringRef typestr)785 static std::string GenOpString(OpKind op, const std::string &proto,
786                                StringRef typestr) {
787   bool quad;
788   unsigned nElts = GetNumElements(typestr, quad);
789   bool define = UseMacro(proto);
790 
791   std::string ts = TypeString(proto[0], typestr);
792   std::string s;
793   if (!define) {
794     s = "return ";
795   }
796 
797   switch(op) {
798   case OpAdd:
799     s += "__a + __b;";
800     break;
801   case OpAddl:
802     s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
803     break;
804   case OpAddw:
805     s += "__a + " + Extend(typestr, "__b") + ";";
806     break;
807   case OpSub:
808     s += "__a - __b;";
809     break;
810   case OpSubl:
811     s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
812     break;
813   case OpSubw:
814     s += "__a - " + Extend(typestr, "__b") + ";";
815     break;
816   case OpMulN:
817     s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
818     break;
819   case OpMulLane:
820     s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
821     break;
822   case OpMul:
823     s += "__a * __b;";
824     break;
825   case OpMullLane:
826     s += MangleName("vmull", typestr, ClassS) + "(__a, " +
827       SplatLane(nElts, "__b", "__c") + ");";
828     break;
829   case OpMlaN:
830     s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
831     break;
832   case OpMlaLane:
833     s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
834     break;
835   case OpMla:
836     s += "__a + (__b * __c);";
837     break;
838   case OpMlalN:
839     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
840       Duplicate(nElts, typestr, "__c") + ");";
841     break;
842   case OpMlalLane:
843     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
844       SplatLane(nElts, "__c", "__d") + ");";
845     break;
846   case OpMlal:
847     s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
848     break;
849   case OpMlsN:
850     s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
851     break;
852   case OpMlsLane:
853     s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
854     break;
855   case OpMls:
856     s += "__a - (__b * __c);";
857     break;
858   case OpMlslN:
859     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
860       Duplicate(nElts, typestr, "__c") + ");";
861     break;
862   case OpMlslLane:
863     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
864       SplatLane(nElts, "__c", "__d") + ");";
865     break;
866   case OpMlsl:
867     s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
868     break;
869   case OpQDMullLane:
870     s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
871       SplatLane(nElts, "__b", "__c") + ");";
872     break;
873   case OpQDMlalLane:
874     s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
875       SplatLane(nElts, "__c", "__d") + ");";
876     break;
877   case OpQDMlslLane:
878     s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
879       SplatLane(nElts, "__c", "__d") + ");";
880     break;
881   case OpQDMulhLane:
882     s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
883       SplatLane(nElts, "__b", "__c") + ");";
884     break;
885   case OpQRDMulhLane:
886     s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
887       SplatLane(nElts, "__b", "__c") + ");";
888     break;
889   case OpEq:
890     s += "(" + ts + ")(__a == __b);";
891     break;
892   case OpGe:
893     s += "(" + ts + ")(__a >= __b);";
894     break;
895   case OpLe:
896     s += "(" + ts + ")(__a <= __b);";
897     break;
898   case OpGt:
899     s += "(" + ts + ")(__a > __b);";
900     break;
901   case OpLt:
902     s += "(" + ts + ")(__a < __b);";
903     break;
904   case OpNeg:
905     s += " -__a;";
906     break;
907   case OpNot:
908     s += " ~__a;";
909     break;
910   case OpAnd:
911     s += "__a & __b;";
912     break;
913   case OpOr:
914     s += "__a | __b;";
915     break;
916   case OpXor:
917     s += "__a ^ __b;";
918     break;
919   case OpAndNot:
920     s += "__a & ~__b;";
921     break;
922   case OpOrNot:
923     s += "__a | ~__b;";
924     break;
925   case OpCast:
926     s += "(" + ts + ")__a;";
927     break;
928   case OpConcat:
929     s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
930     s += ", (int64x1_t)__b, 0, 1);";
931     break;
932   case OpHi:
933     s += "(" + ts +
934       ")__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1);";
935     break;
936   case OpLo:
937     s += "(" + ts +
938       ")__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0);";
939     break;
940   case OpDup:
941     s += Duplicate(nElts, typestr, "__a") + ";";
942     break;
943   case OpDupLane:
944     s += SplatLane(nElts, "__a", "__b") + ";";
945     break;
946   case OpSelect:
947     // ((0 & 1) | (~0 & 2))
948     s += "(" + ts + ")";
949     ts = TypeString(proto[1], typestr);
950     s += "((__a & (" + ts + ")__b) | ";
951     s += "(~__a & (" + ts + ")__c));";
952     break;
953   case OpRev16:
954     s += "__builtin_shufflevector(__a, __a";
955     for (unsigned i = 2; i <= nElts; i += 2)
956       for (unsigned j = 0; j != 2; ++j)
957         s += ", " + utostr(i - j - 1);
958     s += ");";
959     break;
960   case OpRev32: {
961     unsigned WordElts = nElts >> (1 + (int)quad);
962     s += "__builtin_shufflevector(__a, __a";
963     for (unsigned i = WordElts; i <= nElts; i += WordElts)
964       for (unsigned j = 0; j != WordElts; ++j)
965         s += ", " + utostr(i - j - 1);
966     s += ");";
967     break;
968   }
969   case OpRev64: {
970     unsigned DblWordElts = nElts >> (int)quad;
971     s += "__builtin_shufflevector(__a, __a";
972     for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
973       for (unsigned j = 0; j != DblWordElts; ++j)
974         s += ", " + utostr(i - j - 1);
975     s += ");";
976     break;
977   }
978   case OpAbdl: {
979     std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
980     if (typestr[0] != 'U') {
981       // vabd results are always unsigned and must be zero-extended.
982       std::string utype = "U" + typestr.str();
983       s += "(" + TypeString(proto[0], typestr) + ")";
984       abd = "(" + TypeString('d', utype) + ")" + abd;
985       s += Extend(utype, abd) + ";";
986     } else {
987       s += Extend(typestr, abd) + ";";
988     }
989     break;
990   }
991   case OpAba:
992     s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
993     break;
994   case OpAbal: {
995     s += "__a + ";
996     std::string abd = MangleName("vabd", typestr, ClassS) + "(__b, __c)";
997     if (typestr[0] != 'U') {
998       // vabd results are always unsigned and must be zero-extended.
999       std::string utype = "U" + typestr.str();
1000       s += "(" + TypeString(proto[0], typestr) + ")";
1001       abd = "(" + TypeString('d', utype) + ")" + abd;
1002       s += Extend(utype, abd) + ";";
1003     } else {
1004       s += Extend(typestr, abd) + ";";
1005     }
1006     break;
1007   }
1008   default:
1009     PrintFatalError("unknown OpKind!");
1010   }
1011   return s;
1012 }
1013 
GetNeonEnum(const std::string & proto,StringRef typestr)1014 static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
1015   unsigned mod = proto[0];
1016 
1017   if (mod == 'v' || mod == 'f')
1018     mod = proto[1];
1019 
1020   bool quad = false;
1021   bool poly = false;
1022   bool usgn = false;
1023   bool scal = false;
1024   bool cnst = false;
1025   bool pntr = false;
1026 
1027   // Base type to get the type string for.
1028   char type = ClassifyType(typestr, quad, poly, usgn);
1029 
1030   // Based on the modifying character, change the type and width if necessary.
1031   type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
1032 
1033   NeonTypeFlags::EltType ET;
1034   switch (type) {
1035     case 'c':
1036       ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
1037       break;
1038     case 's':
1039       ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
1040       break;
1041     case 'i':
1042       ET = NeonTypeFlags::Int32;
1043       break;
1044     case 'l':
1045       ET = NeonTypeFlags::Int64;
1046       break;
1047     case 'h':
1048       ET = NeonTypeFlags::Float16;
1049       break;
1050     case 'f':
1051       ET = NeonTypeFlags::Float32;
1052       break;
1053     default:
1054       PrintFatalError("unhandled type!");
1055   }
1056   NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
1057   return Flags.getFlags();
1058 }
1059 
1060 // Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
GenBuiltin(const std::string & name,const std::string & proto,StringRef typestr,ClassKind ck)1061 static std::string GenBuiltin(const std::string &name, const std::string &proto,
1062                               StringRef typestr, ClassKind ck) {
1063   std::string s;
1064 
1065   // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
1066   // sret-like argument.
1067   bool sret = (proto[0] >= '2' && proto[0] <= '4');
1068 
1069   bool define = UseMacro(proto);
1070 
1071   // Check if the prototype has a scalar operand with the type of the vector
1072   // elements.  If not, bitcasting the args will take care of arg checking.
1073   // The actual signedness etc. will be taken care of with special enums.
1074   if (proto.find('s') == std::string::npos)
1075     ck = ClassB;
1076 
1077   if (proto[0] != 'v') {
1078     std::string ts = TypeString(proto[0], typestr);
1079 
1080     if (define) {
1081       if (sret)
1082         s += ts + " r; ";
1083       else
1084         s += "(" + ts + ")";
1085     } else if (sret) {
1086       s += ts + " r; ";
1087     } else {
1088       s += "return (" + ts + ")";
1089     }
1090   }
1091 
1092   bool splat = proto.find('a') != std::string::npos;
1093 
1094   s += "__builtin_neon_";
1095   if (splat) {
1096     // Call the non-splat builtin: chop off the "_n" suffix from the name.
1097     std::string vname(name, 0, name.size()-2);
1098     s += MangleName(vname, typestr, ck);
1099   } else {
1100     s += MangleName(name, typestr, ck);
1101   }
1102   s += "(";
1103 
1104   // Pass the address of the return variable as the first argument to sret-like
1105   // builtins.
1106   if (sret)
1107     s += "&r, ";
1108 
1109   char arg = 'a';
1110   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1111     std::string args = std::string(&arg, 1);
1112 
1113     // Use the local temporaries instead of the macro arguments.
1114     args = "__" + args;
1115 
1116     bool argQuad = false;
1117     bool argPoly = false;
1118     bool argUsgn = false;
1119     bool argScalar = false;
1120     bool dummy = false;
1121     char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
1122     argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
1123                       dummy, dummy);
1124 
1125     // Handle multiple-vector values specially, emitting each subvector as an
1126     // argument to the __builtin.
1127     if (proto[i] >= '2' && proto[i] <= '4') {
1128       // Check if an explicit cast is needed.
1129       if (argType != 'c' || argPoly || argUsgn)
1130         args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
1131 
1132       for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) {
1133         s += args + ".val[" + utostr(vi) + "]";
1134         if ((vi + 1) < ve)
1135           s += ", ";
1136       }
1137       if ((i + 1) < e)
1138         s += ", ";
1139 
1140       continue;
1141     }
1142 
1143     if (splat && (i + 1) == e)
1144       args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
1145 
1146     // Check if an explicit cast is needed.
1147     if ((splat || !argScalar) &&
1148         ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
1149       std::string argTypeStr = "c";
1150       if (ck != ClassB)
1151         argTypeStr = argType;
1152       if (argQuad)
1153         argTypeStr = "Q" + argTypeStr;
1154       args = "(" + TypeString('d', argTypeStr) + ")" + args;
1155     }
1156 
1157     s += args;
1158     if ((i + 1) < e)
1159       s += ", ";
1160   }
1161 
1162   // Extra constant integer to hold type class enum for this function, e.g. s8
1163   if (ck == ClassB)
1164     s += ", " + utostr(GetNeonEnum(proto, typestr));
1165 
1166   s += ");";
1167 
1168   if (proto[0] != 'v' && sret) {
1169     if (define)
1170       s += " r;";
1171     else
1172       s += " return r;";
1173   }
1174   return s;
1175 }
1176 
GenBuiltinDef(const std::string & name,const std::string & proto,StringRef typestr,ClassKind ck)1177 static std::string GenBuiltinDef(const std::string &name,
1178                                  const std::string &proto,
1179                                  StringRef typestr, ClassKind ck) {
1180   std::string s("BUILTIN(__builtin_neon_");
1181 
1182   // If all types are the same size, bitcasting the args will take care
1183   // of arg checking.  The actual signedness etc. will be taken care of with
1184   // special enums.
1185   if (proto.find('s') == std::string::npos)
1186     ck = ClassB;
1187 
1188   s += MangleName(name, typestr, ck);
1189   s += ", \"";
1190 
1191   for (unsigned i = 0, e = proto.size(); i != e; ++i)
1192     s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
1193 
1194   // Extra constant integer to hold type class enum for this function, e.g. s8
1195   if (ck == ClassB)
1196     s += "i";
1197 
1198   s += "\", \"n\")";
1199   return s;
1200 }
1201 
GenIntrinsic(const std::string & name,const std::string & proto,StringRef outTypeStr,StringRef inTypeStr,OpKind kind,ClassKind classKind)1202 static std::string GenIntrinsic(const std::string &name,
1203                                 const std::string &proto,
1204                                 StringRef outTypeStr, StringRef inTypeStr,
1205                                 OpKind kind, ClassKind classKind) {
1206   assert(!proto.empty() && "");
1207   bool define = UseMacro(proto) && kind != OpUnavailable;
1208   std::string s;
1209 
1210   // static always inline + return type
1211   if (define)
1212     s += "#define ";
1213   else
1214     s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
1215 
1216   // Function name with type suffix
1217   std::string mangledName = MangleName(name, outTypeStr, ClassS);
1218   if (outTypeStr != inTypeStr) {
1219     // If the input type is different (e.g., for vreinterpret), append a suffix
1220     // for the input type.  String off a "Q" (quad) prefix so that MangleName
1221     // does not insert another "q" in the name.
1222     unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
1223     StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
1224     mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
1225   }
1226   s += mangledName;
1227 
1228   // Function arguments
1229   s += GenArgs(proto, inTypeStr);
1230 
1231   // Definition.
1232   if (define) {
1233     s += " __extension__ ({ \\\n  ";
1234     s += GenMacroLocals(proto, inTypeStr);
1235   } else if (kind == OpUnavailable) {
1236     s += " __attribute__((unavailable));\n";
1237     return s;
1238   } else
1239     s += " {\n  ";
1240 
1241   if (kind != OpNone)
1242     s += GenOpString(kind, proto, outTypeStr);
1243   else
1244     s += GenBuiltin(name, proto, outTypeStr, classKind);
1245   if (define)
1246     s += " })";
1247   else
1248     s += " }";
1249   s += "\n";
1250   return s;
1251 }
1252 
1253 /// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
1254 /// is comprised of type definitions and function declarations.
run(raw_ostream & OS)1255 void NeonEmitter::run(raw_ostream &OS) {
1256   OS <<
1257     "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
1258     "---===\n"
1259     " *\n"
1260     " * Permission is hereby granted, free of charge, to any person obtaining "
1261     "a copy\n"
1262     " * of this software and associated documentation files (the \"Software\"),"
1263     " to deal\n"
1264     " * in the Software without restriction, including without limitation the "
1265     "rights\n"
1266     " * to use, copy, modify, merge, publish, distribute, sublicense, "
1267     "and/or sell\n"
1268     " * copies of the Software, and to permit persons to whom the Software is\n"
1269     " * furnished to do so, subject to the following conditions:\n"
1270     " *\n"
1271     " * The above copyright notice and this permission notice shall be "
1272     "included in\n"
1273     " * all copies or substantial portions of the Software.\n"
1274     " *\n"
1275     " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
1276     "EXPRESS OR\n"
1277     " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
1278     "MERCHANTABILITY,\n"
1279     " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
1280     "SHALL THE\n"
1281     " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
1282     "OTHER\n"
1283     " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
1284     "ARISING FROM,\n"
1285     " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
1286     "DEALINGS IN\n"
1287     " * THE SOFTWARE.\n"
1288     " *\n"
1289     " *===--------------------------------------------------------------------"
1290     "---===\n"
1291     " */\n\n";
1292 
1293   OS << "#ifndef __ARM_NEON_H\n";
1294   OS << "#define __ARM_NEON_H\n\n";
1295 
1296   OS << "#ifndef __ARM_NEON__\n";
1297   OS << "#error \"NEON support not enabled\"\n";
1298   OS << "#endif\n\n";
1299 
1300   OS << "#include <stdint.h>\n\n";
1301 
1302   // Emit NEON-specific scalar typedefs.
1303   OS << "typedef float float32_t;\n";
1304   OS << "typedef int8_t poly8_t;\n";
1305   OS << "typedef int16_t poly16_t;\n";
1306   OS << "typedef uint16_t float16_t;\n";
1307 
1308   // Emit Neon vector typedefs.
1309   std::string TypedefTypes("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfPcQPcPsQPs");
1310   SmallVector<StringRef, 24> TDTypeVec;
1311   ParseTypes(0, TypedefTypes, TDTypeVec);
1312 
1313   // Emit vector typedefs.
1314   for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
1315     bool dummy, quad = false, poly = false;
1316     (void) ClassifyType(TDTypeVec[i], quad, poly, dummy);
1317     if (poly)
1318       OS << "typedef __attribute__((neon_polyvector_type(";
1319     else
1320       OS << "typedef __attribute__((neon_vector_type(";
1321 
1322     unsigned nElts = GetNumElements(TDTypeVec[i], quad);
1323     OS << utostr(nElts) << "))) ";
1324     if (nElts < 10)
1325       OS << " ";
1326 
1327     OS << TypeString('s', TDTypeVec[i]);
1328     OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
1329   }
1330   OS << "\n";
1331 
1332   // Emit struct typedefs.
1333   for (unsigned vi = 2; vi != 5; ++vi) {
1334     for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
1335       std::string ts = TypeString('d', TDTypeVec[i]);
1336       std::string vs = TypeString('0' + vi, TDTypeVec[i]);
1337       OS << "typedef struct " << vs << " {\n";
1338       OS << "  " << ts << " val";
1339       OS << "[" << utostr(vi) << "]";
1340       OS << ";\n} ";
1341       OS << vs << ";\n\n";
1342     }
1343   }
1344 
1345   OS<<"#define __ai static __attribute__((__always_inline__, __nodebug__))\n\n";
1346 
1347   std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
1348 
1349   // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
1350   // intrinsics.  (Some of the saturating multiply instructions are also
1351   // used to implement the corresponding "_lane" variants, but tablegen
1352   // sorts the records into alphabetical order so that the "_lane" variants
1353   // come after the intrinsics they use.)
1354   emitIntrinsic(OS, Records.getDef("VMOVL"));
1355   emitIntrinsic(OS, Records.getDef("VMULL"));
1356   emitIntrinsic(OS, Records.getDef("VABD"));
1357 
1358   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1359     Record *R = RV[i];
1360     if (R->getName() != "VMOVL" &&
1361         R->getName() != "VMULL" &&
1362         R->getName() != "VABD")
1363       emitIntrinsic(OS, R);
1364   }
1365 
1366   OS << "#undef __ai\n\n";
1367   OS << "#endif /* __ARM_NEON_H */\n";
1368 }
1369 
1370 /// emitIntrinsic - Write out the arm_neon.h header file definitions for the
1371 /// intrinsics specified by record R.
emitIntrinsic(raw_ostream & OS,Record * R)1372 void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R) {
1373   std::string name = R->getValueAsString("Name");
1374   std::string Proto = R->getValueAsString("Prototype");
1375   std::string Types = R->getValueAsString("Types");
1376 
1377   SmallVector<StringRef, 16> TypeVec;
1378   ParseTypes(R, Types, TypeVec);
1379 
1380   OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
1381 
1382   ClassKind classKind = ClassNone;
1383   if (R->getSuperClasses().size() >= 2)
1384     classKind = ClassMap[R->getSuperClasses()[1]];
1385   if (classKind == ClassNone && kind == OpNone)
1386     PrintFatalError(R->getLoc(), "Builtin has no class kind");
1387 
1388   for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
1389     if (kind == OpReinterpret) {
1390       bool outQuad = false;
1391       bool dummy = false;
1392       (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
1393       for (unsigned srcti = 0, srcte = TypeVec.size();
1394            srcti != srcte; ++srcti) {
1395         bool inQuad = false;
1396         (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
1397         if (srcti == ti || inQuad != outQuad)
1398           continue;
1399         OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
1400                            OpCast, ClassS);
1401       }
1402     } else {
1403       OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti],
1404                          kind, classKind);
1405     }
1406   }
1407   OS << "\n";
1408 }
1409 
RangeFromType(const char mod,StringRef typestr)1410 static unsigned RangeFromType(const char mod, StringRef typestr) {
1411   // base type to get the type string for.
1412   bool quad = false, dummy = false;
1413   char type = ClassifyType(typestr, quad, dummy, dummy);
1414   type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
1415 
1416   switch (type) {
1417     case 'c':
1418       return (8 << (int)quad) - 1;
1419     case 'h':
1420     case 's':
1421       return (4 << (int)quad) - 1;
1422     case 'f':
1423     case 'i':
1424       return (2 << (int)quad) - 1;
1425     case 'l':
1426       return (1 << (int)quad) - 1;
1427     default:
1428       PrintFatalError("unhandled type!");
1429   }
1430 }
1431 
1432 /// runHeader - Emit a file with sections defining:
1433 /// 1. the NEON section of BuiltinsARM.def.
1434 /// 2. the SemaChecking code for the type overload checking.
1435 /// 3. the SemaChecking code for validation of intrinsic immediate arguments.
runHeader(raw_ostream & OS)1436 void NeonEmitter::runHeader(raw_ostream &OS) {
1437   std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
1438 
1439   StringMap<OpKind> EmittedMap;
1440 
1441   // Generate BuiltinsARM.def for NEON
1442   OS << "#ifdef GET_NEON_BUILTINS\n";
1443   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1444     Record *R = RV[i];
1445     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
1446     if (k != OpNone)
1447       continue;
1448 
1449     std::string Proto = R->getValueAsString("Prototype");
1450 
1451     // Functions with 'a' (the splat code) in the type prototype should not get
1452     // their own builtin as they use the non-splat variant.
1453     if (Proto.find('a') != std::string::npos)
1454       continue;
1455 
1456     std::string Types = R->getValueAsString("Types");
1457     SmallVector<StringRef, 16> TypeVec;
1458     ParseTypes(R, Types, TypeVec);
1459 
1460     if (R->getSuperClasses().size() < 2)
1461       PrintFatalError(R->getLoc(), "Builtin has no class kind");
1462 
1463     std::string name = R->getValueAsString("Name");
1464     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
1465 
1466     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
1467       // Generate the BuiltinsARM.def declaration for this builtin, ensuring
1468       // that each unique BUILTIN() macro appears only once in the output
1469       // stream.
1470       std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
1471       if (EmittedMap.count(bd))
1472         continue;
1473 
1474       EmittedMap[bd] = OpNone;
1475       OS << bd << "\n";
1476     }
1477   }
1478   OS << "#endif\n\n";
1479 
1480   // Generate the overloaded type checking code for SemaChecking.cpp
1481   OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
1482   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1483     Record *R = RV[i];
1484     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
1485     if (k != OpNone)
1486       continue;
1487 
1488     std::string Proto = R->getValueAsString("Prototype");
1489     std::string Types = R->getValueAsString("Types");
1490     std::string name = R->getValueAsString("Name");
1491 
1492     // Functions with 'a' (the splat code) in the type prototype should not get
1493     // their own builtin as they use the non-splat variant.
1494     if (Proto.find('a') != std::string::npos)
1495       continue;
1496 
1497     // Functions which have a scalar argument cannot be overloaded, no need to
1498     // check them if we are emitting the type checking code.
1499     if (Proto.find('s') != std::string::npos)
1500       continue;
1501 
1502     SmallVector<StringRef, 16> TypeVec;
1503     ParseTypes(R, Types, TypeVec);
1504 
1505     if (R->getSuperClasses().size() < 2)
1506       PrintFatalError(R->getLoc(), "Builtin has no class kind");
1507 
1508     int si = -1, qi = -1;
1509     uint64_t mask = 0, qmask = 0;
1510     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
1511       // Generate the switch case(s) for this builtin for the type validation.
1512       bool quad = false, poly = false, usgn = false;
1513       (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
1514 
1515       if (quad) {
1516         qi = ti;
1517         qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
1518       } else {
1519         si = ti;
1520         mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
1521       }
1522     }
1523 
1524     // Check if the builtin function has a pointer or const pointer argument.
1525     int PtrArgNum = -1;
1526     bool HasConstPtr = false;
1527     for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
1528       char ArgType = Proto[arg];
1529       if (ArgType == 'c') {
1530         HasConstPtr = true;
1531         PtrArgNum = arg - 1;
1532         break;
1533       }
1534       if (ArgType == 'p') {
1535         PtrArgNum = arg - 1;
1536         break;
1537       }
1538     }
1539     // For sret builtins, adjust the pointer argument index.
1540     if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4'))
1541       PtrArgNum += 1;
1542 
1543     // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
1544     // and vst1_lane intrinsics.  Using a pointer to the vector element
1545     // type with one of those operations causes codegen to select an aligned
1546     // load/store instruction.  If you want an unaligned operation,
1547     // the pointer argument needs to have less alignment than element type,
1548     // so just accept any pointer type.
1549     if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
1550       PtrArgNum = -1;
1551       HasConstPtr = false;
1552     }
1553 
1554     if (mask) {
1555       OS << "case ARM::BI__builtin_neon_"
1556          << MangleName(name, TypeVec[si], ClassB)
1557          << ": mask = " << "0x" << utohexstr(mask) << "ULL";
1558       if (PtrArgNum >= 0)
1559         OS << "; PtrArgNum = " << PtrArgNum;
1560       if (HasConstPtr)
1561         OS << "; HasConstPtr = true";
1562       OS << "; break;\n";
1563     }
1564     if (qmask) {
1565       OS << "case ARM::BI__builtin_neon_"
1566          << MangleName(name, TypeVec[qi], ClassB)
1567          << ": mask = " << "0x" << utohexstr(qmask) << "ULL";
1568       if (PtrArgNum >= 0)
1569         OS << "; PtrArgNum = " << PtrArgNum;
1570       if (HasConstPtr)
1571         OS << "; HasConstPtr = true";
1572       OS << "; break;\n";
1573     }
1574   }
1575   OS << "#endif\n\n";
1576 
1577   // Generate the intrinsic range checking code for shift/lane immediates.
1578   OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
1579   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1580     Record *R = RV[i];
1581 
1582     OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
1583     if (k != OpNone)
1584       continue;
1585 
1586     std::string name = R->getValueAsString("Name");
1587     std::string Proto = R->getValueAsString("Prototype");
1588     std::string Types = R->getValueAsString("Types");
1589 
1590     // Functions with 'a' (the splat code) in the type prototype should not get
1591     // their own builtin as they use the non-splat variant.
1592     if (Proto.find('a') != std::string::npos)
1593       continue;
1594 
1595     // Functions which do not have an immediate do not need to have range
1596     // checking code emitted.
1597     size_t immPos = Proto.find('i');
1598     if (immPos == std::string::npos)
1599       continue;
1600 
1601     SmallVector<StringRef, 16> TypeVec;
1602     ParseTypes(R, Types, TypeVec);
1603 
1604     if (R->getSuperClasses().size() < 2)
1605       PrintFatalError(R->getLoc(), "Builtin has no class kind");
1606 
1607     ClassKind ck = ClassMap[R->getSuperClasses()[1]];
1608 
1609     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
1610       std::string namestr, shiftstr, rangestr;
1611 
1612       if (R->getValueAsBit("isVCVT_N")) {
1613         // VCVT between floating- and fixed-point values takes an immediate
1614         // in the range 1 to 32.
1615         ck = ClassB;
1616         rangestr = "l = 1; u = 31"; // upper bound = l + u
1617       } else if (Proto.find('s') == std::string::npos) {
1618         // Builtins which are overloaded by type will need to have their upper
1619         // bound computed at Sema time based on the type constant.
1620         ck = ClassB;
1621         if (R->getValueAsBit("isShift")) {
1622           shiftstr = ", true";
1623 
1624           // Right shifts have an 'r' in the name, left shifts do not.
1625           if (name.find('r') != std::string::npos)
1626             rangestr = "l = 1; ";
1627         }
1628         rangestr += "u = RFT(TV" + shiftstr + ")";
1629       } else {
1630         // The immediate generally refers to a lane in the preceding argument.
1631         assert(immPos > 0 && "unexpected immediate operand");
1632         rangestr = "u = " + utostr(RangeFromType(Proto[immPos-1], TypeVec[ti]));
1633       }
1634       // Make sure cases appear only once by uniquing them in a string map.
1635       namestr = MangleName(name, TypeVec[ti], ck);
1636       if (EmittedMap.count(namestr))
1637         continue;
1638       EmittedMap[namestr] = OpNone;
1639 
1640       // Calculate the index of the immediate that should be range checked.
1641       unsigned immidx = 0;
1642 
1643       // Builtins that return a struct of multiple vectors have an extra
1644       // leading arg for the struct return.
1645       if (Proto[0] >= '2' && Proto[0] <= '4')
1646         ++immidx;
1647 
1648       // Add one to the index for each argument until we reach the immediate
1649       // to be checked.  Structs of vectors are passed as multiple arguments.
1650       for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
1651         switch (Proto[ii]) {
1652           default:  immidx += 1; break;
1653           case '2': immidx += 2; break;
1654           case '3': immidx += 3; break;
1655           case '4': immidx += 4; break;
1656           case 'i': ie = ii + 1; break;
1657         }
1658       }
1659       OS << "case ARM::BI__builtin_neon_" << MangleName(name, TypeVec[ti], ck)
1660          << ": i = " << immidx << "; " << rangestr << "; break;\n";
1661     }
1662   }
1663   OS << "#endif\n\n";
1664 }
1665 
1666 /// GenTest - Write out a test for the intrinsic specified by the name and
1667 /// type strings, including the embedded patterns for FileCheck to match.
GenTest(const std::string & name,const std::string & proto,StringRef outTypeStr,StringRef inTypeStr,bool isShift)1668 static std::string GenTest(const std::string &name,
1669                            const std::string &proto,
1670                            StringRef outTypeStr, StringRef inTypeStr,
1671                            bool isShift) {
1672   assert(!proto.empty() && "");
1673   std::string s;
1674 
1675   // Function name with type suffix
1676   std::string mangledName = MangleName(name, outTypeStr, ClassS);
1677   if (outTypeStr != inTypeStr) {
1678     // If the input type is different (e.g., for vreinterpret), append a suffix
1679     // for the input type.  String off a "Q" (quad) prefix so that MangleName
1680     // does not insert another "q" in the name.
1681     unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
1682     StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
1683     mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
1684   }
1685 
1686   // Emit the FileCheck patterns.
1687   s += "// CHECK: test_" + mangledName + "\n";
1688   // s += "// CHECK: \n"; // FIXME: + expected instruction opcode.
1689 
1690   // Emit the start of the test function.
1691   s += TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
1692   char arg = 'a';
1693   std::string comma;
1694   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1695     // Do not create arguments for values that must be immediate constants.
1696     if (proto[i] == 'i')
1697       continue;
1698     s += comma + TypeString(proto[i], inTypeStr) + " ";
1699     s.push_back(arg);
1700     comma = ", ";
1701   }
1702   s += ") {\n  ";
1703 
1704   if (proto[0] != 'v')
1705     s += "return ";
1706   s += mangledName + "(";
1707   arg = 'a';
1708   for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1709     if (proto[i] == 'i') {
1710       // For immediate operands, test the maximum value.
1711       if (isShift)
1712         s += "1"; // FIXME
1713       else
1714         // The immediate generally refers to a lane in the preceding argument.
1715         s += utostr(RangeFromType(proto[i-1], inTypeStr));
1716     } else {
1717       s.push_back(arg);
1718     }
1719     if ((i + 1) < e)
1720       s += ", ";
1721   }
1722   s += ");\n}\n\n";
1723   return s;
1724 }
1725 
1726 /// runTests - Write out a complete set of tests for all of the Neon
1727 /// intrinsics.
runTests(raw_ostream & OS)1728 void NeonEmitter::runTests(raw_ostream &OS) {
1729   OS <<
1730     "// RUN: %clang_cc1 -triple thumbv7-apple-darwin \\\n"
1731     "// RUN:  -target-cpu cortex-a9 -ffreestanding -S -o - %s | FileCheck %s\n"
1732     "\n"
1733     "#include <arm_neon.h>\n"
1734     "\n";
1735 
1736   std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
1737   for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1738     Record *R = RV[i];
1739     std::string name = R->getValueAsString("Name");
1740     std::string Proto = R->getValueAsString("Prototype");
1741     std::string Types = R->getValueAsString("Types");
1742     bool isShift = R->getValueAsBit("isShift");
1743 
1744     SmallVector<StringRef, 16> TypeVec;
1745     ParseTypes(R, Types, TypeVec);
1746 
1747     OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
1748     if (kind == OpUnavailable)
1749       continue;
1750     for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
1751       if (kind == OpReinterpret) {
1752         bool outQuad = false;
1753         bool dummy = false;
1754         (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
1755         for (unsigned srcti = 0, srcte = TypeVec.size();
1756              srcti != srcte; ++srcti) {
1757           bool inQuad = false;
1758           (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
1759           if (srcti == ti || inQuad != outQuad)
1760             continue;
1761           OS << GenTest(name, Proto, TypeVec[ti], TypeVec[srcti], isShift);
1762         }
1763       } else {
1764         OS << GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift);
1765       }
1766     }
1767     OS << "\n";
1768   }
1769 }
1770 
1771 namespace clang {
EmitNeon(RecordKeeper & Records,raw_ostream & OS)1772 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
1773   NeonEmitter(Records).run(OS);
1774 }
EmitNeonSema(RecordKeeper & Records,raw_ostream & OS)1775 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
1776   NeonEmitter(Records).runHeader(OS);
1777 }
EmitNeonTest(RecordKeeper & Records,raw_ostream & OS)1778 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
1779   NeonEmitter(Records).runTests(OS);
1780 }
1781 } // End namespace clang
1782