1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This tablegen backend is responsible for emitting arm_neon.h, which includes
11 // a declaration and definition of each function specified by the ARM NEON
12 // compiler interface. See ARM document DUI0348B.
13 //
14 // Each NEON instruction is implemented in terms of 1 or more functions which
15 // are suffixed with the element type of the input vectors. Functions may be
16 // implemented in terms of generic vector operations such as +, *, -, etc. or
17 // by calling a __builtin_-prefixed function which will be handled by clang's
18 // CodeGen library.
19 //
20 // Additional validation code can be generated by this file when runHeader() is
21 // called, rather than the normal run() entry point. A complete set of tests
22 // for Neon intrinsics can be generated by calling the runTests() entry point.
23 //
24 //===----------------------------------------------------------------------===//
25
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/SmallString.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/StringExtras.h"
30 #include "llvm/ADT/StringMap.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/TableGen/Error.h"
33 #include "llvm/TableGen/Record.h"
34 #include "llvm/TableGen/TableGenBackend.h"
35 #include <string>
36 using namespace llvm;
37
38 enum OpKind {
39 OpNone,
40 OpUnavailable,
41 OpAdd,
42 OpAddl,
43 OpAddw,
44 OpSub,
45 OpSubl,
46 OpSubw,
47 OpMul,
48 OpMla,
49 OpMlal,
50 OpMls,
51 OpMlsl,
52 OpMulN,
53 OpMlaN,
54 OpMlsN,
55 OpMlalN,
56 OpMlslN,
57 OpMulLane,
58 OpMullLane,
59 OpMlaLane,
60 OpMlsLane,
61 OpMlalLane,
62 OpMlslLane,
63 OpQDMullLane,
64 OpQDMlalLane,
65 OpQDMlslLane,
66 OpQDMulhLane,
67 OpQRDMulhLane,
68 OpEq,
69 OpGe,
70 OpLe,
71 OpGt,
72 OpLt,
73 OpNeg,
74 OpNot,
75 OpAnd,
76 OpOr,
77 OpXor,
78 OpAndNot,
79 OpOrNot,
80 OpCast,
81 OpConcat,
82 OpDup,
83 OpDupLane,
84 OpHi,
85 OpLo,
86 OpSelect,
87 OpRev16,
88 OpRev32,
89 OpRev64,
90 OpReinterpret,
91 OpAbdl,
92 OpAba,
93 OpAbal
94 };
95
96 enum ClassKind {
97 ClassNone,
98 ClassI, // generic integer instruction, e.g., "i8" suffix
99 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
100 ClassW, // width-specific instruction, e.g., "8" suffix
101 ClassB // bitcast arguments with enum argument to specify type
102 };
103
104 /// NeonTypeFlags - Flags to identify the types for overloaded Neon
105 /// builtins. These must be kept in sync with the flags in
106 /// include/clang/Basic/TargetBuiltins.h.
107 namespace {
108 class NeonTypeFlags {
109 enum {
110 EltTypeMask = 0xf,
111 UnsignedFlag = 0x10,
112 QuadFlag = 0x20
113 };
114 uint32_t Flags;
115
116 public:
117 enum EltType {
118 Int8,
119 Int16,
120 Int32,
121 Int64,
122 Poly8,
123 Poly16,
124 Float16,
125 Float32
126 };
127
NeonTypeFlags(unsigned F)128 NeonTypeFlags(unsigned F) : Flags(F) {}
NeonTypeFlags(EltType ET,bool IsUnsigned,bool IsQuad)129 NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
130 if (IsUnsigned)
131 Flags |= UnsignedFlag;
132 if (IsQuad)
133 Flags |= QuadFlag;
134 }
135
getFlags() const136 uint32_t getFlags() const { return Flags; }
137 };
138 } // end anonymous namespace
139
140 namespace {
141 class NeonEmitter {
142 RecordKeeper &Records;
143 StringMap<OpKind> OpMap;
144 DenseMap<Record*, ClassKind> ClassMap;
145
146 public:
NeonEmitter(RecordKeeper & R)147 NeonEmitter(RecordKeeper &R) : Records(R) {
148 OpMap["OP_NONE"] = OpNone;
149 OpMap["OP_UNAVAILABLE"] = OpUnavailable;
150 OpMap["OP_ADD"] = OpAdd;
151 OpMap["OP_ADDL"] = OpAddl;
152 OpMap["OP_ADDW"] = OpAddw;
153 OpMap["OP_SUB"] = OpSub;
154 OpMap["OP_SUBL"] = OpSubl;
155 OpMap["OP_SUBW"] = OpSubw;
156 OpMap["OP_MUL"] = OpMul;
157 OpMap["OP_MLA"] = OpMla;
158 OpMap["OP_MLAL"] = OpMlal;
159 OpMap["OP_MLS"] = OpMls;
160 OpMap["OP_MLSL"] = OpMlsl;
161 OpMap["OP_MUL_N"] = OpMulN;
162 OpMap["OP_MLA_N"] = OpMlaN;
163 OpMap["OP_MLS_N"] = OpMlsN;
164 OpMap["OP_MLAL_N"] = OpMlalN;
165 OpMap["OP_MLSL_N"] = OpMlslN;
166 OpMap["OP_MUL_LN"]= OpMulLane;
167 OpMap["OP_MULL_LN"] = OpMullLane;
168 OpMap["OP_MLA_LN"]= OpMlaLane;
169 OpMap["OP_MLS_LN"]= OpMlsLane;
170 OpMap["OP_MLAL_LN"] = OpMlalLane;
171 OpMap["OP_MLSL_LN"] = OpMlslLane;
172 OpMap["OP_QDMULL_LN"] = OpQDMullLane;
173 OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
174 OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
175 OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
176 OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
177 OpMap["OP_EQ"] = OpEq;
178 OpMap["OP_GE"] = OpGe;
179 OpMap["OP_LE"] = OpLe;
180 OpMap["OP_GT"] = OpGt;
181 OpMap["OP_LT"] = OpLt;
182 OpMap["OP_NEG"] = OpNeg;
183 OpMap["OP_NOT"] = OpNot;
184 OpMap["OP_AND"] = OpAnd;
185 OpMap["OP_OR"] = OpOr;
186 OpMap["OP_XOR"] = OpXor;
187 OpMap["OP_ANDN"] = OpAndNot;
188 OpMap["OP_ORN"] = OpOrNot;
189 OpMap["OP_CAST"] = OpCast;
190 OpMap["OP_CONC"] = OpConcat;
191 OpMap["OP_HI"] = OpHi;
192 OpMap["OP_LO"] = OpLo;
193 OpMap["OP_DUP"] = OpDup;
194 OpMap["OP_DUP_LN"] = OpDupLane;
195 OpMap["OP_SEL"] = OpSelect;
196 OpMap["OP_REV16"] = OpRev16;
197 OpMap["OP_REV32"] = OpRev32;
198 OpMap["OP_REV64"] = OpRev64;
199 OpMap["OP_REINT"] = OpReinterpret;
200 OpMap["OP_ABDL"] = OpAbdl;
201 OpMap["OP_ABA"] = OpAba;
202 OpMap["OP_ABAL"] = OpAbal;
203
204 Record *SI = R.getClass("SInst");
205 Record *II = R.getClass("IInst");
206 Record *WI = R.getClass("WInst");
207 ClassMap[SI] = ClassS;
208 ClassMap[II] = ClassI;
209 ClassMap[WI] = ClassW;
210 }
211
212 // run - Emit arm_neon.h.inc
213 void run(raw_ostream &o);
214
215 // runHeader - Emit all the __builtin prototypes used in arm_neon.h
216 void runHeader(raw_ostream &o);
217
218 // runTests - Emit tests for all the Neon intrinsics.
219 void runTests(raw_ostream &o);
220
221 private:
222 void emitIntrinsic(raw_ostream &OS, Record *R);
223 };
224 } // end anonymous namespace
225
226 /// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
227 /// which each StringRef representing a single type declared in the string.
228 /// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
229 /// 2xfloat and 4xfloat respectively.
ParseTypes(Record * r,std::string & s,SmallVectorImpl<StringRef> & TV)230 static void ParseTypes(Record *r, std::string &s,
231 SmallVectorImpl<StringRef> &TV) {
232 const char *data = s.data();
233 int len = 0;
234
235 for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
236 if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U')
237 continue;
238
239 switch (data[len]) {
240 case 'c':
241 case 's':
242 case 'i':
243 case 'l':
244 case 'h':
245 case 'f':
246 break;
247 default:
248 PrintFatalError(r->getLoc(),
249 "Unexpected letter: " + std::string(data + len, 1));
250 }
251 TV.push_back(StringRef(data, len + 1));
252 data += len + 1;
253 len = -1;
254 }
255 }
256
257 /// Widen - Convert a type code into the next wider type. char -> short,
258 /// short -> int, etc.
Widen(const char t)259 static char Widen(const char t) {
260 switch (t) {
261 case 'c':
262 return 's';
263 case 's':
264 return 'i';
265 case 'i':
266 return 'l';
267 case 'h':
268 return 'f';
269 default:
270 PrintFatalError("unhandled type in widen!");
271 }
272 }
273
274 /// Narrow - Convert a type code into the next smaller type. short -> char,
275 /// float -> half float, etc.
Narrow(const char t)276 static char Narrow(const char t) {
277 switch (t) {
278 case 's':
279 return 'c';
280 case 'i':
281 return 's';
282 case 'l':
283 return 'i';
284 case 'f':
285 return 'h';
286 default:
287 PrintFatalError("unhandled type in narrow!");
288 }
289 }
290
291 /// For a particular StringRef, return the base type code, and whether it has
292 /// the quad-vector, polynomial, or unsigned modifiers set.
ClassifyType(StringRef ty,bool & quad,bool & poly,bool & usgn)293 static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
294 unsigned off = 0;
295
296 // remember quad.
297 if (ty[off] == 'Q') {
298 quad = true;
299 ++off;
300 }
301
302 // remember poly.
303 if (ty[off] == 'P') {
304 poly = true;
305 ++off;
306 }
307
308 // remember unsigned.
309 if (ty[off] == 'U') {
310 usgn = true;
311 ++off;
312 }
313
314 // base type to get the type string for.
315 return ty[off];
316 }
317
318 /// ModType - Transform a type code and its modifiers based on a mod code. The
319 /// mod code definitions may be found at the top of arm_neon.td.
ModType(const char mod,char type,bool & quad,bool & poly,bool & usgn,bool & scal,bool & cnst,bool & pntr)320 static char ModType(const char mod, char type, bool &quad, bool &poly,
321 bool &usgn, bool &scal, bool &cnst, bool &pntr) {
322 switch (mod) {
323 case 't':
324 if (poly) {
325 poly = false;
326 usgn = true;
327 }
328 break;
329 case 'u':
330 usgn = true;
331 poly = false;
332 if (type == 'f')
333 type = 'i';
334 break;
335 case 'x':
336 usgn = false;
337 poly = false;
338 if (type == 'f')
339 type = 'i';
340 break;
341 case 'f':
342 if (type == 'h')
343 quad = true;
344 type = 'f';
345 usgn = false;
346 break;
347 case 'g':
348 quad = false;
349 break;
350 case 'w':
351 type = Widen(type);
352 quad = true;
353 break;
354 case 'n':
355 type = Widen(type);
356 break;
357 case 'i':
358 type = 'i';
359 scal = true;
360 break;
361 case 'l':
362 type = 'l';
363 scal = true;
364 usgn = true;
365 break;
366 case 's':
367 case 'a':
368 scal = true;
369 break;
370 case 'k':
371 quad = true;
372 break;
373 case 'c':
374 cnst = true;
375 case 'p':
376 pntr = true;
377 scal = true;
378 break;
379 case 'h':
380 type = Narrow(type);
381 if (type == 'h')
382 quad = false;
383 break;
384 case 'e':
385 type = Narrow(type);
386 usgn = true;
387 break;
388 default:
389 break;
390 }
391 return type;
392 }
393
394 /// TypeString - for a modifier and type, generate the name of the typedef for
395 /// that type. QUc -> uint8x8_t.
TypeString(const char mod,StringRef typestr)396 static std::string TypeString(const char mod, StringRef typestr) {
397 bool quad = false;
398 bool poly = false;
399 bool usgn = false;
400 bool scal = false;
401 bool cnst = false;
402 bool pntr = false;
403
404 if (mod == 'v')
405 return "void";
406 if (mod == 'i')
407 return "int";
408
409 // base type to get the type string for.
410 char type = ClassifyType(typestr, quad, poly, usgn);
411
412 // Based on the modifying character, change the type and width if necessary.
413 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
414
415 SmallString<128> s;
416
417 if (usgn)
418 s.push_back('u');
419
420 switch (type) {
421 case 'c':
422 s += poly ? "poly8" : "int8";
423 if (scal)
424 break;
425 s += quad ? "x16" : "x8";
426 break;
427 case 's':
428 s += poly ? "poly16" : "int16";
429 if (scal)
430 break;
431 s += quad ? "x8" : "x4";
432 break;
433 case 'i':
434 s += "int32";
435 if (scal)
436 break;
437 s += quad ? "x4" : "x2";
438 break;
439 case 'l':
440 s += "int64";
441 if (scal)
442 break;
443 s += quad ? "x2" : "x1";
444 break;
445 case 'h':
446 s += "float16";
447 if (scal)
448 break;
449 s += quad ? "x8" : "x4";
450 break;
451 case 'f':
452 s += "float32";
453 if (scal)
454 break;
455 s += quad ? "x4" : "x2";
456 break;
457 default:
458 PrintFatalError("unhandled type!");
459 }
460
461 if (mod == '2')
462 s += "x2";
463 if (mod == '3')
464 s += "x3";
465 if (mod == '4')
466 s += "x4";
467
468 // Append _t, finishing the type string typedef type.
469 s += "_t";
470
471 if (cnst)
472 s += " const";
473
474 if (pntr)
475 s += " *";
476
477 return s.str();
478 }
479
480 /// BuiltinTypeString - for a modifier and type, generate the clang
481 /// BuiltinsARM.def prototype code for the function. See the top of clang's
482 /// Builtins.def for a description of the type strings.
BuiltinTypeString(const char mod,StringRef typestr,ClassKind ck,bool ret)483 static std::string BuiltinTypeString(const char mod, StringRef typestr,
484 ClassKind ck, bool ret) {
485 bool quad = false;
486 bool poly = false;
487 bool usgn = false;
488 bool scal = false;
489 bool cnst = false;
490 bool pntr = false;
491
492 if (mod == 'v')
493 return "v"; // void
494 if (mod == 'i')
495 return "i"; // int
496
497 // base type to get the type string for.
498 char type = ClassifyType(typestr, quad, poly, usgn);
499
500 // Based on the modifying character, change the type and width if necessary.
501 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
502
503 // All pointers are void* pointers. Change type to 'v' now.
504 if (pntr) {
505 usgn = false;
506 poly = false;
507 type = 'v';
508 }
509 // Treat half-float ('h') types as unsigned short ('s') types.
510 if (type == 'h') {
511 type = 's';
512 usgn = true;
513 }
514 usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && scal && type != 'f');
515
516 if (scal) {
517 SmallString<128> s;
518
519 if (usgn)
520 s.push_back('U');
521 else if (type == 'c')
522 s.push_back('S'); // make chars explicitly signed
523
524 if (type == 'l') // 64-bit long
525 s += "LLi";
526 else
527 s.push_back(type);
528
529 if (cnst)
530 s.push_back('C');
531 if (pntr)
532 s.push_back('*');
533 return s.str();
534 }
535
536 // Since the return value must be one type, return a vector type of the
537 // appropriate width which we will bitcast. An exception is made for
538 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
539 // fashion, storing them to a pointer arg.
540 if (ret) {
541 if (mod >= '2' && mod <= '4')
542 return "vv*"; // void result with void* first argument
543 if (mod == 'f' || (ck != ClassB && type == 'f'))
544 return quad ? "V4f" : "V2f";
545 if (ck != ClassB && type == 's')
546 return quad ? "V8s" : "V4s";
547 if (ck != ClassB && type == 'i')
548 return quad ? "V4i" : "V2i";
549 if (ck != ClassB && type == 'l')
550 return quad ? "V2LLi" : "V1LLi";
551
552 return quad ? "V16Sc" : "V8Sc";
553 }
554
555 // Non-return array types are passed as individual vectors.
556 if (mod == '2')
557 return quad ? "V16ScV16Sc" : "V8ScV8Sc";
558 if (mod == '3')
559 return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
560 if (mod == '4')
561 return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
562
563 if (mod == 'f' || (ck != ClassB && type == 'f'))
564 return quad ? "V4f" : "V2f";
565 if (ck != ClassB && type == 's')
566 return quad ? "V8s" : "V4s";
567 if (ck != ClassB && type == 'i')
568 return quad ? "V4i" : "V2i";
569 if (ck != ClassB && type == 'l')
570 return quad ? "V2LLi" : "V1LLi";
571
572 return quad ? "V16Sc" : "V8Sc";
573 }
574
575 /// MangleName - Append a type or width suffix to a base neon function name,
576 /// and insert a 'q' in the appropriate location if the operation works on
577 /// 128b rather than 64b. E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
MangleName(const std::string & name,StringRef typestr,ClassKind ck)578 static std::string MangleName(const std::string &name, StringRef typestr,
579 ClassKind ck) {
580 if (name == "vcvt_f32_f16")
581 return name;
582
583 bool quad = false;
584 bool poly = false;
585 bool usgn = false;
586 char type = ClassifyType(typestr, quad, poly, usgn);
587
588 std::string s = name;
589
590 switch (type) {
591 case 'c':
592 switch (ck) {
593 case ClassS: s += poly ? "_p8" : usgn ? "_u8" : "_s8"; break;
594 case ClassI: s += "_i8"; break;
595 case ClassW: s += "_8"; break;
596 default: break;
597 }
598 break;
599 case 's':
600 switch (ck) {
601 case ClassS: s += poly ? "_p16" : usgn ? "_u16" : "_s16"; break;
602 case ClassI: s += "_i16"; break;
603 case ClassW: s += "_16"; break;
604 default: break;
605 }
606 break;
607 case 'i':
608 switch (ck) {
609 case ClassS: s += usgn ? "_u32" : "_s32"; break;
610 case ClassI: s += "_i32"; break;
611 case ClassW: s += "_32"; break;
612 default: break;
613 }
614 break;
615 case 'l':
616 switch (ck) {
617 case ClassS: s += usgn ? "_u64" : "_s64"; break;
618 case ClassI: s += "_i64"; break;
619 case ClassW: s += "_64"; break;
620 default: break;
621 }
622 break;
623 case 'h':
624 switch (ck) {
625 case ClassS:
626 case ClassI: s += "_f16"; break;
627 case ClassW: s += "_16"; break;
628 default: break;
629 }
630 break;
631 case 'f':
632 switch (ck) {
633 case ClassS:
634 case ClassI: s += "_f32"; break;
635 case ClassW: s += "_32"; break;
636 default: break;
637 }
638 break;
639 default:
640 PrintFatalError("unhandled type!");
641 }
642 if (ck == ClassB)
643 s += "_v";
644
645 // Insert a 'q' before the first '_' character so that it ends up before
646 // _lane or _n on vector-scalar operations.
647 if (quad) {
648 size_t pos = s.find('_');
649 s = s.insert(pos, "q");
650 }
651 return s;
652 }
653
654 /// UseMacro - Examine the prototype string to determine if the intrinsic
655 /// should be defined as a preprocessor macro instead of an inline function.
UseMacro(const std::string & proto)656 static bool UseMacro(const std::string &proto) {
657 // If this builtin takes an immediate argument, we need to #define it rather
658 // than use a standard declaration, so that SemaChecking can range check
659 // the immediate passed by the user.
660 if (proto.find('i') != std::string::npos)
661 return true;
662
663 // Pointer arguments need to use macros to avoid hiding aligned attributes
664 // from the pointer type.
665 if (proto.find('p') != std::string::npos ||
666 proto.find('c') != std::string::npos)
667 return true;
668
669 return false;
670 }
671
672 /// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
673 /// defined as a macro should be accessed directly instead of being first
674 /// assigned to a local temporary.
MacroArgUsedDirectly(const std::string & proto,unsigned i)675 static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
676 // True for constant ints (i), pointers (p) and const pointers (c).
677 return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
678 }
679
680 // Generate the string "(argtype a, argtype b, ...)"
GenArgs(const std::string & proto,StringRef typestr)681 static std::string GenArgs(const std::string &proto, StringRef typestr) {
682 bool define = UseMacro(proto);
683 char arg = 'a';
684
685 std::string s;
686 s += "(";
687
688 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
689 if (define) {
690 // Some macro arguments are used directly instead of being assigned
691 // to local temporaries; prepend an underscore prefix to make their
692 // names consistent with the local temporaries.
693 if (MacroArgUsedDirectly(proto, i))
694 s += "__";
695 } else {
696 s += TypeString(proto[i], typestr) + " __";
697 }
698 s.push_back(arg);
699 if ((i + 1) < e)
700 s += ", ";
701 }
702
703 s += ")";
704 return s;
705 }
706
707 // Macro arguments are not type-checked like inline function arguments, so
708 // assign them to local temporaries to get the right type checking.
GenMacroLocals(const std::string & proto,StringRef typestr)709 static std::string GenMacroLocals(const std::string &proto, StringRef typestr) {
710 char arg = 'a';
711 std::string s;
712 bool generatedLocal = false;
713
714 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
715 // Do not create a temporary for an immediate argument.
716 // That would defeat the whole point of using a macro!
717 if (MacroArgUsedDirectly(proto, i))
718 continue;
719 generatedLocal = true;
720
721 s += TypeString(proto[i], typestr) + " __";
722 s.push_back(arg);
723 s += " = (";
724 s.push_back(arg);
725 s += "); ";
726 }
727
728 if (generatedLocal)
729 s += "\\\n ";
730 return s;
731 }
732
733 // Use the vmovl builtin to sign-extend or zero-extend a vector.
Extend(StringRef typestr,const std::string & a)734 static std::string Extend(StringRef typestr, const std::string &a) {
735 std::string s;
736 s = MangleName("vmovl", typestr, ClassS);
737 s += "(" + a + ")";
738 return s;
739 }
740
Duplicate(unsigned nElts,StringRef typestr,const std::string & a)741 static std::string Duplicate(unsigned nElts, StringRef typestr,
742 const std::string &a) {
743 std::string s;
744
745 s = "(" + TypeString('d', typestr) + "){ ";
746 for (unsigned i = 0; i != nElts; ++i) {
747 s += a;
748 if ((i + 1) < nElts)
749 s += ", ";
750 }
751 s += " }";
752
753 return s;
754 }
755
SplatLane(unsigned nElts,const std::string & vec,const std::string & lane)756 static std::string SplatLane(unsigned nElts, const std::string &vec,
757 const std::string &lane) {
758 std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
759 for (unsigned i = 0; i < nElts; ++i)
760 s += ", " + lane;
761 s += ")";
762 return s;
763 }
764
GetNumElements(StringRef typestr,bool & quad)765 static unsigned GetNumElements(StringRef typestr, bool &quad) {
766 quad = false;
767 bool dummy = false;
768 char type = ClassifyType(typestr, quad, dummy, dummy);
769 unsigned nElts = 0;
770 switch (type) {
771 case 'c': nElts = 8; break;
772 case 's': nElts = 4; break;
773 case 'i': nElts = 2; break;
774 case 'l': nElts = 1; break;
775 case 'h': nElts = 4; break;
776 case 'f': nElts = 2; break;
777 default:
778 PrintFatalError("unhandled type!");
779 }
780 if (quad) nElts <<= 1;
781 return nElts;
782 }
783
784 // Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
GenOpString(OpKind op,const std::string & proto,StringRef typestr)785 static std::string GenOpString(OpKind op, const std::string &proto,
786 StringRef typestr) {
787 bool quad;
788 unsigned nElts = GetNumElements(typestr, quad);
789 bool define = UseMacro(proto);
790
791 std::string ts = TypeString(proto[0], typestr);
792 std::string s;
793 if (!define) {
794 s = "return ";
795 }
796
797 switch(op) {
798 case OpAdd:
799 s += "__a + __b;";
800 break;
801 case OpAddl:
802 s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
803 break;
804 case OpAddw:
805 s += "__a + " + Extend(typestr, "__b") + ";";
806 break;
807 case OpSub:
808 s += "__a - __b;";
809 break;
810 case OpSubl:
811 s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
812 break;
813 case OpSubw:
814 s += "__a - " + Extend(typestr, "__b") + ";";
815 break;
816 case OpMulN:
817 s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
818 break;
819 case OpMulLane:
820 s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
821 break;
822 case OpMul:
823 s += "__a * __b;";
824 break;
825 case OpMullLane:
826 s += MangleName("vmull", typestr, ClassS) + "(__a, " +
827 SplatLane(nElts, "__b", "__c") + ");";
828 break;
829 case OpMlaN:
830 s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
831 break;
832 case OpMlaLane:
833 s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
834 break;
835 case OpMla:
836 s += "__a + (__b * __c);";
837 break;
838 case OpMlalN:
839 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
840 Duplicate(nElts, typestr, "__c") + ");";
841 break;
842 case OpMlalLane:
843 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
844 SplatLane(nElts, "__c", "__d") + ");";
845 break;
846 case OpMlal:
847 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
848 break;
849 case OpMlsN:
850 s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
851 break;
852 case OpMlsLane:
853 s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
854 break;
855 case OpMls:
856 s += "__a - (__b * __c);";
857 break;
858 case OpMlslN:
859 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
860 Duplicate(nElts, typestr, "__c") + ");";
861 break;
862 case OpMlslLane:
863 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
864 SplatLane(nElts, "__c", "__d") + ");";
865 break;
866 case OpMlsl:
867 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
868 break;
869 case OpQDMullLane:
870 s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
871 SplatLane(nElts, "__b", "__c") + ");";
872 break;
873 case OpQDMlalLane:
874 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
875 SplatLane(nElts, "__c", "__d") + ");";
876 break;
877 case OpQDMlslLane:
878 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
879 SplatLane(nElts, "__c", "__d") + ");";
880 break;
881 case OpQDMulhLane:
882 s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
883 SplatLane(nElts, "__b", "__c") + ");";
884 break;
885 case OpQRDMulhLane:
886 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
887 SplatLane(nElts, "__b", "__c") + ");";
888 break;
889 case OpEq:
890 s += "(" + ts + ")(__a == __b);";
891 break;
892 case OpGe:
893 s += "(" + ts + ")(__a >= __b);";
894 break;
895 case OpLe:
896 s += "(" + ts + ")(__a <= __b);";
897 break;
898 case OpGt:
899 s += "(" + ts + ")(__a > __b);";
900 break;
901 case OpLt:
902 s += "(" + ts + ")(__a < __b);";
903 break;
904 case OpNeg:
905 s += " -__a;";
906 break;
907 case OpNot:
908 s += " ~__a;";
909 break;
910 case OpAnd:
911 s += "__a & __b;";
912 break;
913 case OpOr:
914 s += "__a | __b;";
915 break;
916 case OpXor:
917 s += "__a ^ __b;";
918 break;
919 case OpAndNot:
920 s += "__a & ~__b;";
921 break;
922 case OpOrNot:
923 s += "__a | ~__b;";
924 break;
925 case OpCast:
926 s += "(" + ts + ")__a;";
927 break;
928 case OpConcat:
929 s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
930 s += ", (int64x1_t)__b, 0, 1);";
931 break;
932 case OpHi:
933 s += "(" + ts +
934 ")__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1);";
935 break;
936 case OpLo:
937 s += "(" + ts +
938 ")__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0);";
939 break;
940 case OpDup:
941 s += Duplicate(nElts, typestr, "__a") + ";";
942 break;
943 case OpDupLane:
944 s += SplatLane(nElts, "__a", "__b") + ";";
945 break;
946 case OpSelect:
947 // ((0 & 1) | (~0 & 2))
948 s += "(" + ts + ")";
949 ts = TypeString(proto[1], typestr);
950 s += "((__a & (" + ts + ")__b) | ";
951 s += "(~__a & (" + ts + ")__c));";
952 break;
953 case OpRev16:
954 s += "__builtin_shufflevector(__a, __a";
955 for (unsigned i = 2; i <= nElts; i += 2)
956 for (unsigned j = 0; j != 2; ++j)
957 s += ", " + utostr(i - j - 1);
958 s += ");";
959 break;
960 case OpRev32: {
961 unsigned WordElts = nElts >> (1 + (int)quad);
962 s += "__builtin_shufflevector(__a, __a";
963 for (unsigned i = WordElts; i <= nElts; i += WordElts)
964 for (unsigned j = 0; j != WordElts; ++j)
965 s += ", " + utostr(i - j - 1);
966 s += ");";
967 break;
968 }
969 case OpRev64: {
970 unsigned DblWordElts = nElts >> (int)quad;
971 s += "__builtin_shufflevector(__a, __a";
972 for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
973 for (unsigned j = 0; j != DblWordElts; ++j)
974 s += ", " + utostr(i - j - 1);
975 s += ");";
976 break;
977 }
978 case OpAbdl: {
979 std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
980 if (typestr[0] != 'U') {
981 // vabd results are always unsigned and must be zero-extended.
982 std::string utype = "U" + typestr.str();
983 s += "(" + TypeString(proto[0], typestr) + ")";
984 abd = "(" + TypeString('d', utype) + ")" + abd;
985 s += Extend(utype, abd) + ";";
986 } else {
987 s += Extend(typestr, abd) + ";";
988 }
989 break;
990 }
991 case OpAba:
992 s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
993 break;
994 case OpAbal: {
995 s += "__a + ";
996 std::string abd = MangleName("vabd", typestr, ClassS) + "(__b, __c)";
997 if (typestr[0] != 'U') {
998 // vabd results are always unsigned and must be zero-extended.
999 std::string utype = "U" + typestr.str();
1000 s += "(" + TypeString(proto[0], typestr) + ")";
1001 abd = "(" + TypeString('d', utype) + ")" + abd;
1002 s += Extend(utype, abd) + ";";
1003 } else {
1004 s += Extend(typestr, abd) + ";";
1005 }
1006 break;
1007 }
1008 default:
1009 PrintFatalError("unknown OpKind!");
1010 }
1011 return s;
1012 }
1013
GetNeonEnum(const std::string & proto,StringRef typestr)1014 static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
1015 unsigned mod = proto[0];
1016
1017 if (mod == 'v' || mod == 'f')
1018 mod = proto[1];
1019
1020 bool quad = false;
1021 bool poly = false;
1022 bool usgn = false;
1023 bool scal = false;
1024 bool cnst = false;
1025 bool pntr = false;
1026
1027 // Base type to get the type string for.
1028 char type = ClassifyType(typestr, quad, poly, usgn);
1029
1030 // Based on the modifying character, change the type and width if necessary.
1031 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
1032
1033 NeonTypeFlags::EltType ET;
1034 switch (type) {
1035 case 'c':
1036 ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
1037 break;
1038 case 's':
1039 ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
1040 break;
1041 case 'i':
1042 ET = NeonTypeFlags::Int32;
1043 break;
1044 case 'l':
1045 ET = NeonTypeFlags::Int64;
1046 break;
1047 case 'h':
1048 ET = NeonTypeFlags::Float16;
1049 break;
1050 case 'f':
1051 ET = NeonTypeFlags::Float32;
1052 break;
1053 default:
1054 PrintFatalError("unhandled type!");
1055 }
1056 NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
1057 return Flags.getFlags();
1058 }
1059
1060 // Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
GenBuiltin(const std::string & name,const std::string & proto,StringRef typestr,ClassKind ck)1061 static std::string GenBuiltin(const std::string &name, const std::string &proto,
1062 StringRef typestr, ClassKind ck) {
1063 std::string s;
1064
1065 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
1066 // sret-like argument.
1067 bool sret = (proto[0] >= '2' && proto[0] <= '4');
1068
1069 bool define = UseMacro(proto);
1070
1071 // Check if the prototype has a scalar operand with the type of the vector
1072 // elements. If not, bitcasting the args will take care of arg checking.
1073 // The actual signedness etc. will be taken care of with special enums.
1074 if (proto.find('s') == std::string::npos)
1075 ck = ClassB;
1076
1077 if (proto[0] != 'v') {
1078 std::string ts = TypeString(proto[0], typestr);
1079
1080 if (define) {
1081 if (sret)
1082 s += ts + " r; ";
1083 else
1084 s += "(" + ts + ")";
1085 } else if (sret) {
1086 s += ts + " r; ";
1087 } else {
1088 s += "return (" + ts + ")";
1089 }
1090 }
1091
1092 bool splat = proto.find('a') != std::string::npos;
1093
1094 s += "__builtin_neon_";
1095 if (splat) {
1096 // Call the non-splat builtin: chop off the "_n" suffix from the name.
1097 std::string vname(name, 0, name.size()-2);
1098 s += MangleName(vname, typestr, ck);
1099 } else {
1100 s += MangleName(name, typestr, ck);
1101 }
1102 s += "(";
1103
1104 // Pass the address of the return variable as the first argument to sret-like
1105 // builtins.
1106 if (sret)
1107 s += "&r, ";
1108
1109 char arg = 'a';
1110 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1111 std::string args = std::string(&arg, 1);
1112
1113 // Use the local temporaries instead of the macro arguments.
1114 args = "__" + args;
1115
1116 bool argQuad = false;
1117 bool argPoly = false;
1118 bool argUsgn = false;
1119 bool argScalar = false;
1120 bool dummy = false;
1121 char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
1122 argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
1123 dummy, dummy);
1124
1125 // Handle multiple-vector values specially, emitting each subvector as an
1126 // argument to the __builtin.
1127 if (proto[i] >= '2' && proto[i] <= '4') {
1128 // Check if an explicit cast is needed.
1129 if (argType != 'c' || argPoly || argUsgn)
1130 args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
1131
1132 for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) {
1133 s += args + ".val[" + utostr(vi) + "]";
1134 if ((vi + 1) < ve)
1135 s += ", ";
1136 }
1137 if ((i + 1) < e)
1138 s += ", ";
1139
1140 continue;
1141 }
1142
1143 if (splat && (i + 1) == e)
1144 args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
1145
1146 // Check if an explicit cast is needed.
1147 if ((splat || !argScalar) &&
1148 ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
1149 std::string argTypeStr = "c";
1150 if (ck != ClassB)
1151 argTypeStr = argType;
1152 if (argQuad)
1153 argTypeStr = "Q" + argTypeStr;
1154 args = "(" + TypeString('d', argTypeStr) + ")" + args;
1155 }
1156
1157 s += args;
1158 if ((i + 1) < e)
1159 s += ", ";
1160 }
1161
1162 // Extra constant integer to hold type class enum for this function, e.g. s8
1163 if (ck == ClassB)
1164 s += ", " + utostr(GetNeonEnum(proto, typestr));
1165
1166 s += ");";
1167
1168 if (proto[0] != 'v' && sret) {
1169 if (define)
1170 s += " r;";
1171 else
1172 s += " return r;";
1173 }
1174 return s;
1175 }
1176
GenBuiltinDef(const std::string & name,const std::string & proto,StringRef typestr,ClassKind ck)1177 static std::string GenBuiltinDef(const std::string &name,
1178 const std::string &proto,
1179 StringRef typestr, ClassKind ck) {
1180 std::string s("BUILTIN(__builtin_neon_");
1181
1182 // If all types are the same size, bitcasting the args will take care
1183 // of arg checking. The actual signedness etc. will be taken care of with
1184 // special enums.
1185 if (proto.find('s') == std::string::npos)
1186 ck = ClassB;
1187
1188 s += MangleName(name, typestr, ck);
1189 s += ", \"";
1190
1191 for (unsigned i = 0, e = proto.size(); i != e; ++i)
1192 s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
1193
1194 // Extra constant integer to hold type class enum for this function, e.g. s8
1195 if (ck == ClassB)
1196 s += "i";
1197
1198 s += "\", \"n\")";
1199 return s;
1200 }
1201
GenIntrinsic(const std::string & name,const std::string & proto,StringRef outTypeStr,StringRef inTypeStr,OpKind kind,ClassKind classKind)1202 static std::string GenIntrinsic(const std::string &name,
1203 const std::string &proto,
1204 StringRef outTypeStr, StringRef inTypeStr,
1205 OpKind kind, ClassKind classKind) {
1206 assert(!proto.empty() && "");
1207 bool define = UseMacro(proto) && kind != OpUnavailable;
1208 std::string s;
1209
1210 // static always inline + return type
1211 if (define)
1212 s += "#define ";
1213 else
1214 s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
1215
1216 // Function name with type suffix
1217 std::string mangledName = MangleName(name, outTypeStr, ClassS);
1218 if (outTypeStr != inTypeStr) {
1219 // If the input type is different (e.g., for vreinterpret), append a suffix
1220 // for the input type. String off a "Q" (quad) prefix so that MangleName
1221 // does not insert another "q" in the name.
1222 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
1223 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
1224 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
1225 }
1226 s += mangledName;
1227
1228 // Function arguments
1229 s += GenArgs(proto, inTypeStr);
1230
1231 // Definition.
1232 if (define) {
1233 s += " __extension__ ({ \\\n ";
1234 s += GenMacroLocals(proto, inTypeStr);
1235 } else if (kind == OpUnavailable) {
1236 s += " __attribute__((unavailable));\n";
1237 return s;
1238 } else
1239 s += " {\n ";
1240
1241 if (kind != OpNone)
1242 s += GenOpString(kind, proto, outTypeStr);
1243 else
1244 s += GenBuiltin(name, proto, outTypeStr, classKind);
1245 if (define)
1246 s += " })";
1247 else
1248 s += " }";
1249 s += "\n";
1250 return s;
1251 }
1252
1253 /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h
1254 /// is comprised of type definitions and function declarations.
run(raw_ostream & OS)1255 void NeonEmitter::run(raw_ostream &OS) {
1256 OS <<
1257 "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
1258 "---===\n"
1259 " *\n"
1260 " * Permission is hereby granted, free of charge, to any person obtaining "
1261 "a copy\n"
1262 " * of this software and associated documentation files (the \"Software\"),"
1263 " to deal\n"
1264 " * in the Software without restriction, including without limitation the "
1265 "rights\n"
1266 " * to use, copy, modify, merge, publish, distribute, sublicense, "
1267 "and/or sell\n"
1268 " * copies of the Software, and to permit persons to whom the Software is\n"
1269 " * furnished to do so, subject to the following conditions:\n"
1270 " *\n"
1271 " * The above copyright notice and this permission notice shall be "
1272 "included in\n"
1273 " * all copies or substantial portions of the Software.\n"
1274 " *\n"
1275 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
1276 "EXPRESS OR\n"
1277 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
1278 "MERCHANTABILITY,\n"
1279 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
1280 "SHALL THE\n"
1281 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
1282 "OTHER\n"
1283 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
1284 "ARISING FROM,\n"
1285 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
1286 "DEALINGS IN\n"
1287 " * THE SOFTWARE.\n"
1288 " *\n"
1289 " *===--------------------------------------------------------------------"
1290 "---===\n"
1291 " */\n\n";
1292
1293 OS << "#ifndef __ARM_NEON_H\n";
1294 OS << "#define __ARM_NEON_H\n\n";
1295
1296 OS << "#ifndef __ARM_NEON__\n";
1297 OS << "#error \"NEON support not enabled\"\n";
1298 OS << "#endif\n\n";
1299
1300 OS << "#include <stdint.h>\n\n";
1301
1302 // Emit NEON-specific scalar typedefs.
1303 OS << "typedef float float32_t;\n";
1304 OS << "typedef int8_t poly8_t;\n";
1305 OS << "typedef int16_t poly16_t;\n";
1306 OS << "typedef uint16_t float16_t;\n";
1307
1308 // Emit Neon vector typedefs.
1309 std::string TypedefTypes("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfPcQPcPsQPs");
1310 SmallVector<StringRef, 24> TDTypeVec;
1311 ParseTypes(0, TypedefTypes, TDTypeVec);
1312
1313 // Emit vector typedefs.
1314 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
1315 bool dummy, quad = false, poly = false;
1316 (void) ClassifyType(TDTypeVec[i], quad, poly, dummy);
1317 if (poly)
1318 OS << "typedef __attribute__((neon_polyvector_type(";
1319 else
1320 OS << "typedef __attribute__((neon_vector_type(";
1321
1322 unsigned nElts = GetNumElements(TDTypeVec[i], quad);
1323 OS << utostr(nElts) << "))) ";
1324 if (nElts < 10)
1325 OS << " ";
1326
1327 OS << TypeString('s', TDTypeVec[i]);
1328 OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
1329 }
1330 OS << "\n";
1331
1332 // Emit struct typedefs.
1333 for (unsigned vi = 2; vi != 5; ++vi) {
1334 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
1335 std::string ts = TypeString('d', TDTypeVec[i]);
1336 std::string vs = TypeString('0' + vi, TDTypeVec[i]);
1337 OS << "typedef struct " << vs << " {\n";
1338 OS << " " << ts << " val";
1339 OS << "[" << utostr(vi) << "]";
1340 OS << ";\n} ";
1341 OS << vs << ";\n\n";
1342 }
1343 }
1344
1345 OS<<"#define __ai static __attribute__((__always_inline__, __nodebug__))\n\n";
1346
1347 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
1348
1349 // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
1350 // intrinsics. (Some of the saturating multiply instructions are also
1351 // used to implement the corresponding "_lane" variants, but tablegen
1352 // sorts the records into alphabetical order so that the "_lane" variants
1353 // come after the intrinsics they use.)
1354 emitIntrinsic(OS, Records.getDef("VMOVL"));
1355 emitIntrinsic(OS, Records.getDef("VMULL"));
1356 emitIntrinsic(OS, Records.getDef("VABD"));
1357
1358 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1359 Record *R = RV[i];
1360 if (R->getName() != "VMOVL" &&
1361 R->getName() != "VMULL" &&
1362 R->getName() != "VABD")
1363 emitIntrinsic(OS, R);
1364 }
1365
1366 OS << "#undef __ai\n\n";
1367 OS << "#endif /* __ARM_NEON_H */\n";
1368 }
1369
1370 /// emitIntrinsic - Write out the arm_neon.h header file definitions for the
1371 /// intrinsics specified by record R.
emitIntrinsic(raw_ostream & OS,Record * R)1372 void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R) {
1373 std::string name = R->getValueAsString("Name");
1374 std::string Proto = R->getValueAsString("Prototype");
1375 std::string Types = R->getValueAsString("Types");
1376
1377 SmallVector<StringRef, 16> TypeVec;
1378 ParseTypes(R, Types, TypeVec);
1379
1380 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
1381
1382 ClassKind classKind = ClassNone;
1383 if (R->getSuperClasses().size() >= 2)
1384 classKind = ClassMap[R->getSuperClasses()[1]];
1385 if (classKind == ClassNone && kind == OpNone)
1386 PrintFatalError(R->getLoc(), "Builtin has no class kind");
1387
1388 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
1389 if (kind == OpReinterpret) {
1390 bool outQuad = false;
1391 bool dummy = false;
1392 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
1393 for (unsigned srcti = 0, srcte = TypeVec.size();
1394 srcti != srcte; ++srcti) {
1395 bool inQuad = false;
1396 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
1397 if (srcti == ti || inQuad != outQuad)
1398 continue;
1399 OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
1400 OpCast, ClassS);
1401 }
1402 } else {
1403 OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti],
1404 kind, classKind);
1405 }
1406 }
1407 OS << "\n";
1408 }
1409
RangeFromType(const char mod,StringRef typestr)1410 static unsigned RangeFromType(const char mod, StringRef typestr) {
1411 // base type to get the type string for.
1412 bool quad = false, dummy = false;
1413 char type = ClassifyType(typestr, quad, dummy, dummy);
1414 type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
1415
1416 switch (type) {
1417 case 'c':
1418 return (8 << (int)quad) - 1;
1419 case 'h':
1420 case 's':
1421 return (4 << (int)quad) - 1;
1422 case 'f':
1423 case 'i':
1424 return (2 << (int)quad) - 1;
1425 case 'l':
1426 return (1 << (int)quad) - 1;
1427 default:
1428 PrintFatalError("unhandled type!");
1429 }
1430 }
1431
1432 /// runHeader - Emit a file with sections defining:
1433 /// 1. the NEON section of BuiltinsARM.def.
1434 /// 2. the SemaChecking code for the type overload checking.
1435 /// 3. the SemaChecking code for validation of intrinsic immediate arguments.
runHeader(raw_ostream & OS)1436 void NeonEmitter::runHeader(raw_ostream &OS) {
1437 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
1438
1439 StringMap<OpKind> EmittedMap;
1440
1441 // Generate BuiltinsARM.def for NEON
1442 OS << "#ifdef GET_NEON_BUILTINS\n";
1443 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1444 Record *R = RV[i];
1445 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
1446 if (k != OpNone)
1447 continue;
1448
1449 std::string Proto = R->getValueAsString("Prototype");
1450
1451 // Functions with 'a' (the splat code) in the type prototype should not get
1452 // their own builtin as they use the non-splat variant.
1453 if (Proto.find('a') != std::string::npos)
1454 continue;
1455
1456 std::string Types = R->getValueAsString("Types");
1457 SmallVector<StringRef, 16> TypeVec;
1458 ParseTypes(R, Types, TypeVec);
1459
1460 if (R->getSuperClasses().size() < 2)
1461 PrintFatalError(R->getLoc(), "Builtin has no class kind");
1462
1463 std::string name = R->getValueAsString("Name");
1464 ClassKind ck = ClassMap[R->getSuperClasses()[1]];
1465
1466 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
1467 // Generate the BuiltinsARM.def declaration for this builtin, ensuring
1468 // that each unique BUILTIN() macro appears only once in the output
1469 // stream.
1470 std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
1471 if (EmittedMap.count(bd))
1472 continue;
1473
1474 EmittedMap[bd] = OpNone;
1475 OS << bd << "\n";
1476 }
1477 }
1478 OS << "#endif\n\n";
1479
1480 // Generate the overloaded type checking code for SemaChecking.cpp
1481 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
1482 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1483 Record *R = RV[i];
1484 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
1485 if (k != OpNone)
1486 continue;
1487
1488 std::string Proto = R->getValueAsString("Prototype");
1489 std::string Types = R->getValueAsString("Types");
1490 std::string name = R->getValueAsString("Name");
1491
1492 // Functions with 'a' (the splat code) in the type prototype should not get
1493 // their own builtin as they use the non-splat variant.
1494 if (Proto.find('a') != std::string::npos)
1495 continue;
1496
1497 // Functions which have a scalar argument cannot be overloaded, no need to
1498 // check them if we are emitting the type checking code.
1499 if (Proto.find('s') != std::string::npos)
1500 continue;
1501
1502 SmallVector<StringRef, 16> TypeVec;
1503 ParseTypes(R, Types, TypeVec);
1504
1505 if (R->getSuperClasses().size() < 2)
1506 PrintFatalError(R->getLoc(), "Builtin has no class kind");
1507
1508 int si = -1, qi = -1;
1509 uint64_t mask = 0, qmask = 0;
1510 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
1511 // Generate the switch case(s) for this builtin for the type validation.
1512 bool quad = false, poly = false, usgn = false;
1513 (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
1514
1515 if (quad) {
1516 qi = ti;
1517 qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
1518 } else {
1519 si = ti;
1520 mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
1521 }
1522 }
1523
1524 // Check if the builtin function has a pointer or const pointer argument.
1525 int PtrArgNum = -1;
1526 bool HasConstPtr = false;
1527 for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
1528 char ArgType = Proto[arg];
1529 if (ArgType == 'c') {
1530 HasConstPtr = true;
1531 PtrArgNum = arg - 1;
1532 break;
1533 }
1534 if (ArgType == 'p') {
1535 PtrArgNum = arg - 1;
1536 break;
1537 }
1538 }
1539 // For sret builtins, adjust the pointer argument index.
1540 if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4'))
1541 PtrArgNum += 1;
1542
1543 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
1544 // and vst1_lane intrinsics. Using a pointer to the vector element
1545 // type with one of those operations causes codegen to select an aligned
1546 // load/store instruction. If you want an unaligned operation,
1547 // the pointer argument needs to have less alignment than element type,
1548 // so just accept any pointer type.
1549 if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
1550 PtrArgNum = -1;
1551 HasConstPtr = false;
1552 }
1553
1554 if (mask) {
1555 OS << "case ARM::BI__builtin_neon_"
1556 << MangleName(name, TypeVec[si], ClassB)
1557 << ": mask = " << "0x" << utohexstr(mask) << "ULL";
1558 if (PtrArgNum >= 0)
1559 OS << "; PtrArgNum = " << PtrArgNum;
1560 if (HasConstPtr)
1561 OS << "; HasConstPtr = true";
1562 OS << "; break;\n";
1563 }
1564 if (qmask) {
1565 OS << "case ARM::BI__builtin_neon_"
1566 << MangleName(name, TypeVec[qi], ClassB)
1567 << ": mask = " << "0x" << utohexstr(qmask) << "ULL";
1568 if (PtrArgNum >= 0)
1569 OS << "; PtrArgNum = " << PtrArgNum;
1570 if (HasConstPtr)
1571 OS << "; HasConstPtr = true";
1572 OS << "; break;\n";
1573 }
1574 }
1575 OS << "#endif\n\n";
1576
1577 // Generate the intrinsic range checking code for shift/lane immediates.
1578 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
1579 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1580 Record *R = RV[i];
1581
1582 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
1583 if (k != OpNone)
1584 continue;
1585
1586 std::string name = R->getValueAsString("Name");
1587 std::string Proto = R->getValueAsString("Prototype");
1588 std::string Types = R->getValueAsString("Types");
1589
1590 // Functions with 'a' (the splat code) in the type prototype should not get
1591 // their own builtin as they use the non-splat variant.
1592 if (Proto.find('a') != std::string::npos)
1593 continue;
1594
1595 // Functions which do not have an immediate do not need to have range
1596 // checking code emitted.
1597 size_t immPos = Proto.find('i');
1598 if (immPos == std::string::npos)
1599 continue;
1600
1601 SmallVector<StringRef, 16> TypeVec;
1602 ParseTypes(R, Types, TypeVec);
1603
1604 if (R->getSuperClasses().size() < 2)
1605 PrintFatalError(R->getLoc(), "Builtin has no class kind");
1606
1607 ClassKind ck = ClassMap[R->getSuperClasses()[1]];
1608
1609 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
1610 std::string namestr, shiftstr, rangestr;
1611
1612 if (R->getValueAsBit("isVCVT_N")) {
1613 // VCVT between floating- and fixed-point values takes an immediate
1614 // in the range 1 to 32.
1615 ck = ClassB;
1616 rangestr = "l = 1; u = 31"; // upper bound = l + u
1617 } else if (Proto.find('s') == std::string::npos) {
1618 // Builtins which are overloaded by type will need to have their upper
1619 // bound computed at Sema time based on the type constant.
1620 ck = ClassB;
1621 if (R->getValueAsBit("isShift")) {
1622 shiftstr = ", true";
1623
1624 // Right shifts have an 'r' in the name, left shifts do not.
1625 if (name.find('r') != std::string::npos)
1626 rangestr = "l = 1; ";
1627 }
1628 rangestr += "u = RFT(TV" + shiftstr + ")";
1629 } else {
1630 // The immediate generally refers to a lane in the preceding argument.
1631 assert(immPos > 0 && "unexpected immediate operand");
1632 rangestr = "u = " + utostr(RangeFromType(Proto[immPos-1], TypeVec[ti]));
1633 }
1634 // Make sure cases appear only once by uniquing them in a string map.
1635 namestr = MangleName(name, TypeVec[ti], ck);
1636 if (EmittedMap.count(namestr))
1637 continue;
1638 EmittedMap[namestr] = OpNone;
1639
1640 // Calculate the index of the immediate that should be range checked.
1641 unsigned immidx = 0;
1642
1643 // Builtins that return a struct of multiple vectors have an extra
1644 // leading arg for the struct return.
1645 if (Proto[0] >= '2' && Proto[0] <= '4')
1646 ++immidx;
1647
1648 // Add one to the index for each argument until we reach the immediate
1649 // to be checked. Structs of vectors are passed as multiple arguments.
1650 for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
1651 switch (Proto[ii]) {
1652 default: immidx += 1; break;
1653 case '2': immidx += 2; break;
1654 case '3': immidx += 3; break;
1655 case '4': immidx += 4; break;
1656 case 'i': ie = ii + 1; break;
1657 }
1658 }
1659 OS << "case ARM::BI__builtin_neon_" << MangleName(name, TypeVec[ti], ck)
1660 << ": i = " << immidx << "; " << rangestr << "; break;\n";
1661 }
1662 }
1663 OS << "#endif\n\n";
1664 }
1665
1666 /// GenTest - Write out a test for the intrinsic specified by the name and
1667 /// type strings, including the embedded patterns for FileCheck to match.
GenTest(const std::string & name,const std::string & proto,StringRef outTypeStr,StringRef inTypeStr,bool isShift)1668 static std::string GenTest(const std::string &name,
1669 const std::string &proto,
1670 StringRef outTypeStr, StringRef inTypeStr,
1671 bool isShift) {
1672 assert(!proto.empty() && "");
1673 std::string s;
1674
1675 // Function name with type suffix
1676 std::string mangledName = MangleName(name, outTypeStr, ClassS);
1677 if (outTypeStr != inTypeStr) {
1678 // If the input type is different (e.g., for vreinterpret), append a suffix
1679 // for the input type. String off a "Q" (quad) prefix so that MangleName
1680 // does not insert another "q" in the name.
1681 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
1682 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
1683 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
1684 }
1685
1686 // Emit the FileCheck patterns.
1687 s += "// CHECK: test_" + mangledName + "\n";
1688 // s += "// CHECK: \n"; // FIXME: + expected instruction opcode.
1689
1690 // Emit the start of the test function.
1691 s += TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
1692 char arg = 'a';
1693 std::string comma;
1694 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1695 // Do not create arguments for values that must be immediate constants.
1696 if (proto[i] == 'i')
1697 continue;
1698 s += comma + TypeString(proto[i], inTypeStr) + " ";
1699 s.push_back(arg);
1700 comma = ", ";
1701 }
1702 s += ") {\n ";
1703
1704 if (proto[0] != 'v')
1705 s += "return ";
1706 s += mangledName + "(";
1707 arg = 'a';
1708 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1709 if (proto[i] == 'i') {
1710 // For immediate operands, test the maximum value.
1711 if (isShift)
1712 s += "1"; // FIXME
1713 else
1714 // The immediate generally refers to a lane in the preceding argument.
1715 s += utostr(RangeFromType(proto[i-1], inTypeStr));
1716 } else {
1717 s.push_back(arg);
1718 }
1719 if ((i + 1) < e)
1720 s += ", ";
1721 }
1722 s += ");\n}\n\n";
1723 return s;
1724 }
1725
1726 /// runTests - Write out a complete set of tests for all of the Neon
1727 /// intrinsics.
runTests(raw_ostream & OS)1728 void NeonEmitter::runTests(raw_ostream &OS) {
1729 OS <<
1730 "// RUN: %clang_cc1 -triple thumbv7-apple-darwin \\\n"
1731 "// RUN: -target-cpu cortex-a9 -ffreestanding -S -o - %s | FileCheck %s\n"
1732 "\n"
1733 "#include <arm_neon.h>\n"
1734 "\n";
1735
1736 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
1737 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
1738 Record *R = RV[i];
1739 std::string name = R->getValueAsString("Name");
1740 std::string Proto = R->getValueAsString("Prototype");
1741 std::string Types = R->getValueAsString("Types");
1742 bool isShift = R->getValueAsBit("isShift");
1743
1744 SmallVector<StringRef, 16> TypeVec;
1745 ParseTypes(R, Types, TypeVec);
1746
1747 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
1748 if (kind == OpUnavailable)
1749 continue;
1750 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
1751 if (kind == OpReinterpret) {
1752 bool outQuad = false;
1753 bool dummy = false;
1754 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
1755 for (unsigned srcti = 0, srcte = TypeVec.size();
1756 srcti != srcte; ++srcti) {
1757 bool inQuad = false;
1758 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
1759 if (srcti == ti || inQuad != outQuad)
1760 continue;
1761 OS << GenTest(name, Proto, TypeVec[ti], TypeVec[srcti], isShift);
1762 }
1763 } else {
1764 OS << GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift);
1765 }
1766 }
1767 OS << "\n";
1768 }
1769 }
1770
1771 namespace clang {
EmitNeon(RecordKeeper & Records,raw_ostream & OS)1772 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
1773 NeonEmitter(Records).run(OS);
1774 }
EmitNeonSema(RecordKeeper & Records,raw_ostream & OS)1775 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
1776 NeonEmitter(Records).runHeader(OS);
1777 }
EmitNeonTest(RecordKeeper & Records,raw_ostream & OS)1778 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
1779 NeonEmitter(Records).runTests(OS);
1780 }
1781 } // End namespace clang
1782