• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1//===---- AMDCallingConv.td - Calling Conventions for Radeon GPUs ---------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This describes the calling conventions for the AMD Radeon GPUs.
11//
12//===----------------------------------------------------------------------===//
13
14// Inversion of CCIfInReg
15class CCIfNotInReg<CCAction A> : CCIf<"!ArgFlags.isInReg()", A> {}
16class CCIfExtend<CCAction A>
17  : CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>;
18
19// Calling convention for SI
20def CC_SI : CallingConv<[
21
22  CCIfInReg<CCIfType<[f32, i32, f16] , CCAssignToReg<[
23    SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
24    SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
25    SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
26    SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
27    SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39
28  ]>>>,
29
30  // We have no way of referring to the generated register tuples
31  // here, so use a custom function.
32  CCIfInReg<CCIfType<[i64], CCCustom<"allocateSGPRTuple">>>,
33  CCIfByVal<CCIfType<[i64], CCCustom<"allocateSGPRTuple">>>,
34
35  // 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs.
36  CCIfNotInReg<CCIfType<[f32, i32, f16] , CCAssignToReg<[
37    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
38    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
39    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
40    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,
41    VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,
42    VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,
43    VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,
44    VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,
45    VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,
46    VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,
47    VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,
48    VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,
49    VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,
50    VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,
51    VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
52    VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
53    VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
54  ]>>>
55]>;
56
57def RetCC_SI_Shader : CallingConv<[
58  CCIfType<[i32] , CCAssignToReg<[
59    SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
60    SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
61    SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
62    SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
63    SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39
64  ]>>,
65
66  // 32*4 + 4 is the minimum for a fetch shader with 32 outputs.
67  CCIfType<[f32, f16] , CCAssignToReg<[
68    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
69    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
70    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
71    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,
72    VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,
73    VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,
74    VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,
75    VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,
76    VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,
77    VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,
78    VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,
79    VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,
80    VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,
81    VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,
82    VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
83    VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
84    VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
85  ]>>
86]>;
87
88def CSR_AMDGPU_VGPRs_24_255 : CalleeSavedRegs<
89  (sequence "VGPR%u", 24, 255)
90>;
91
92def CSR_AMDGPU_VGPRs_32_255 : CalleeSavedRegs<
93  (sequence "VGPR%u", 32, 255)
94>;
95
96def CSR_AMDGPU_SGPRs_32_103 : CalleeSavedRegs<
97  (sequence "SGPR%u", 32, 103)
98>;
99
100def CSR_AMDGPU_HighRegs : CalleeSavedRegs<
101  (add CSR_AMDGPU_VGPRs_32_255, CSR_AMDGPU_SGPRs_32_103)
102>;
103
104// Calling convention for leaf functions
105def CC_AMDGPU_Func : CallingConv<[
106  CCIfByVal<CCPassByVal<4, 4>>,
107  CCIfType<[i1], CCPromoteToType<i32>>,
108  CCIfType<[i1, i8, i16], CCIfExtend<CCPromoteToType<i32>>>,
109  CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1], CCAssignToReg<[
110    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
111    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
112    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
113    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
114  CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64, v4i16, v4f16], CCCustom<"allocateVGPRTuple">>,
115  CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>,
116  CCIfType<[i64, f64, v2i32, v2f32], CCAssignToStack<8, 4>>,
117  CCIfType<[v4i32, v4f32, v2i64, v2f64], CCAssignToStack<16, 4>>,
118  CCIfType<[v8i32, v8f32], CCAssignToStack<32, 4>>,
119  CCIfType<[v16i32, v16f32], CCAssignToStack<64, 4>>
120]>;
121
122// Calling convention for leaf functions
123def RetCC_AMDGPU_Func : CallingConv<[
124  CCIfType<[i1], CCPromoteToType<i32>>,
125  CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
126  CCIfType<[i32, f32, i16, f16, v2i16, v2f16], CCAssignToReg<[
127    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
128    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
129    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
130    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
131  CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64, v4i16, v4f16], CCCustom<"allocateVGPRTuple">>
132]>;
133
134def CC_AMDGPU : CallingConv<[
135   CCIf<"static_cast<const GCNSubtarget&>"
136         "(State.getMachineFunction().getSubtarget()).getGeneration() >= "
137           "AMDGPUSubtarget::SOUTHERN_ISLANDS",
138        CCDelegateTo<CC_SI>>,
139   CCIf<"static_cast<const GCNSubtarget&>"
140         "(State.getMachineFunction().getSubtarget()).getGeneration() >= "
141           "AMDGPUSubtarget::SOUTHERN_ISLANDS && State.getCallingConv() == CallingConv::C",
142        CCDelegateTo<CC_AMDGPU_Func>>
143]>;
144