• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
2// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -filetype=obj < %s > %t
3// RUN: llvm-readobj -elf-output-style=GNU -sections -symbols -relocations %t | FileCheck --check-prefix=READOBJ %s
4// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
5
6// READOBJ: Section Headers
7// READOBJ: .text   PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
8// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}}        0000c0 {{[0-9]+}}  A {{[0-9]+}} {{[0-9]+}} 64
9
10// READOBJ: Relocation section '.rela.rodata' at offset
11// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
12// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
13// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210
14
15// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
16// READOBJ: {{[0-9]+}}: 0000000000000100  0 FUNC    LOCAL  PROTECTED 2 complete
17// READOBJ: {{[0-9]+}}: 0000000000000040 64 OBJECT  LOCAL  DEFAULT   3 complete.kd
18// READOBJ: {{[0-9]+}}: 0000000000000000  0 FUNC    LOCAL  PROTECTED 2 minimal
19// READOBJ: {{[0-9]+}}: 0000000000000000 64 OBJECT  LOCAL  DEFAULT   3 minimal.kd
20// READOBJ: {{[0-9]+}}: 0000000000000200  0 FUNC    LOCAL  PROTECTED 2 special_sgpr
21// READOBJ: {{[0-9]+}}: 0000000000000080 64 OBJECT  LOCAL  DEFAULT   3 special_sgpr.kd
22
23// OBJDUMP: Contents of section .rodata
24// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
25// minimal
26// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
27// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
28// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
29// OBJDUMP-NEXT: 0030 0000ac60 80000000 00000000 00000000
30// complete
31// OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000
32// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
33// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
34// OBJDUMP-NEXT: 0070 015001e4 1f0f007f 7f040000 00000000
35// special_sgpr
36// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
37// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
38// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000
39// OBJDUMP-NEXT: 00b0 00000060 80000000 00000000 00000000
40
41.text
42// ASM: .text
43
44.amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack"
45// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack"
46
47.p2align 8
48.type minimal,@function
49minimal:
50  s_endpgm
51
52.p2align 8
53.type complete,@function
54complete:
55  s_endpgm
56
57.p2align 8
58.type special_sgpr,@function
59special_sgpr:
60  s_endpgm
61
62.rodata
63// ASM: .rodata
64
65// Test that only specifying required directives is allowed, and that defaulted
66// values are omitted.
67.p2align 6
68.amdhsa_kernel minimal
69  .amdhsa_next_free_vgpr 0
70  .amdhsa_next_free_sgpr 0
71.end_amdhsa_kernel
72
73// ASM: .amdhsa_kernel minimal
74// ASM: .amdhsa_next_free_vgpr 0
75// ASM-NEXT: .amdhsa_next_free_sgpr 0
76// ASM: .end_amdhsa_kernel
77
78// Test that we can specify all available directives with non-default values.
79.p2align 6
80.amdhsa_kernel complete
81  .amdhsa_group_segment_fixed_size 1
82  .amdhsa_private_segment_fixed_size 1
83  .amdhsa_user_sgpr_private_segment_buffer 1
84  .amdhsa_user_sgpr_dispatch_ptr 1
85  .amdhsa_user_sgpr_queue_ptr 1
86  .amdhsa_user_sgpr_kernarg_segment_ptr 1
87  .amdhsa_user_sgpr_dispatch_id 1
88  .amdhsa_user_sgpr_flat_scratch_init 1
89  .amdhsa_user_sgpr_private_segment_size 1
90  .amdhsa_wavefront_size32 1
91  .amdhsa_system_sgpr_private_segment_wavefront_offset 1
92  .amdhsa_system_sgpr_workgroup_id_x 0
93  .amdhsa_system_sgpr_workgroup_id_y 1
94  .amdhsa_system_sgpr_workgroup_id_z 1
95  .amdhsa_system_sgpr_workgroup_info 1
96  .amdhsa_system_vgpr_workitem_id 1
97  .amdhsa_next_free_vgpr 9
98  .amdhsa_next_free_sgpr 27
99  .amdhsa_reserve_vcc 0
100  .amdhsa_reserve_flat_scratch 0
101  .amdhsa_reserve_xnack_mask 0
102  .amdhsa_float_round_mode_32 1
103  .amdhsa_float_round_mode_16_64 1
104  .amdhsa_float_denorm_mode_32 1
105  .amdhsa_float_denorm_mode_16_64 0
106  .amdhsa_dx10_clamp 0
107  .amdhsa_ieee_mode 0
108  .amdhsa_fp16_overflow 1
109  .amdhsa_workgroup_processor_mode 1
110  .amdhsa_memory_ordered 1
111  .amdhsa_forward_progress 1
112  .amdhsa_exception_fp_ieee_invalid_op 1
113  .amdhsa_exception_fp_denorm_src 1
114  .amdhsa_exception_fp_ieee_div_zero 1
115  .amdhsa_exception_fp_ieee_overflow 1
116  .amdhsa_exception_fp_ieee_underflow 1
117  .amdhsa_exception_fp_ieee_inexact 1
118  .amdhsa_exception_int_div_zero 1
119.end_amdhsa_kernel
120
121// ASM: .amdhsa_kernel complete
122// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
123// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
124// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
125// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
126// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
127// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
128// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
129// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
130// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
131// ASM-NEXT: .amdhsa_wavefront_size32 1
132// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
133// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
134// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
135// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
136// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
137// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1
138// ASM-NEXT: .amdhsa_next_free_vgpr 9
139// ASM-NEXT: .amdhsa_next_free_sgpr 27
140// ASM-NEXT: .amdhsa_reserve_vcc 0
141// ASM-NEXT: .amdhsa_reserve_flat_scratch 0
142// ASM-NEXT: .amdhsa_reserve_xnack_mask 0
143// ASM-NEXT: .amdhsa_float_round_mode_32 1
144// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
145// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
146// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0
147// ASM-NEXT: .amdhsa_dx10_clamp 0
148// ASM-NEXT: .amdhsa_ieee_mode 0
149// ASM-NEXT: .amdhsa_fp16_overflow 1
150// ASM-NEXT: .amdhsa_workgroup_processor_mode 1
151// ASM-NEXT: .amdhsa_memory_ordered 1
152// ASM-NEXT: .amdhsa_forward_progress 1
153// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
154// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
155// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
156// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
157// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
158// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
159// ASM-NEXT: .amdhsa_exception_int_div_zero 1
160// ASM-NEXT: .end_amdhsa_kernel
161
162// Test that we are including special SGPR usage in the granulated count.
163.p2align 6
164.amdhsa_kernel special_sgpr
165  // Same next_free_sgpr as "complete", but...
166  .amdhsa_next_free_sgpr 27
167  // ...on GFX10+ this should require an additional 6 SGPRs, pushing us from
168  // 3 granules to 4
169  .amdhsa_reserve_flat_scratch 1
170
171  .amdhsa_reserve_vcc 0
172  .amdhsa_reserve_xnack_mask 0
173
174  .amdhsa_float_denorm_mode_16_64 0
175  .amdhsa_dx10_clamp 0
176  .amdhsa_ieee_mode 0
177  .amdhsa_next_free_vgpr 0
178.end_amdhsa_kernel
179
180// ASM: .amdhsa_kernel special_sgpr
181// ASM: .amdhsa_next_free_vgpr 0
182// ASM-NEXT: .amdhsa_next_free_sgpr 27
183// ASM-NEXT: .amdhsa_reserve_vcc 0
184// ASM-NEXT: .amdhsa_reserve_xnack_mask 0
185// ASM: .amdhsa_float_denorm_mode_16_64 0
186// ASM-NEXT: .amdhsa_dx10_clamp 0
187// ASM-NEXT: .amdhsa_ieee_mode 0
188// ASM: .end_amdhsa_kernel
189
190.section .foo
191
192.byte .amdgcn.gfx_generation_number
193// ASM: .byte 10
194
195.byte .amdgcn.next_free_vgpr
196// ASM: .byte 0
197.byte .amdgcn.next_free_sgpr
198// ASM: .byte 0
199
200v_mov_b32_e32 v7, s10
201
202.byte .amdgcn.next_free_vgpr
203// ASM: .byte 8
204.byte .amdgcn.next_free_sgpr
205// ASM: .byte 11
206
207.set .amdgcn.next_free_vgpr, 0
208.set .amdgcn.next_free_sgpr, 0
209
210.byte .amdgcn.next_free_vgpr
211// ASM: .byte 0
212.byte .amdgcn.next_free_sgpr
213// ASM: .byte 0
214
215v_mov_b32_e32 v16, s3
216
217.byte .amdgcn.next_free_vgpr
218// ASM: .byte 17
219.byte .amdgcn.next_free_sgpr
220// ASM: .byte 4
221