1 /* libs/pixelflinger/codeflinger/ARMAssembler.cpp
2 **
3 ** Copyright 2006, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 ** http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17
18 #define LOG_TAG "ARMAssembler"
19
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <cutils/log.h>
23 #include <cutils/properties.h>
24
25 #if defined(WITH_LIB_HARDWARE)
26 #include <hardware_legacy/qemu_tracing.h>
27 #endif
28
29 #include <private/pixelflinger/ggl_context.h>
30
31 #include "codeflinger/ARMAssembler.h"
32 #include "codeflinger/CodeCache.h"
33 #include "codeflinger/disassem.h"
34
35 // ----------------------------------------------------------------------------
36
37 namespace android {
38
39 // ----------------------------------------------------------------------------
40 #if 0
41 #pragma mark -
42 #pragma mark ARMAssembler...
43 #endif
44
ARMAssembler(const sp<Assembly> & assembly)45 ARMAssembler::ARMAssembler(const sp<Assembly>& assembly)
46 : ARMAssemblerInterface(),
47 mAssembly(assembly)
48 {
49 mBase = mPC = (uint32_t *)assembly->base();
50 mDuration = ggl_system_time();
51 #if defined(WITH_LIB_HARDWARE)
52 mQemuTracing = true;
53 #endif
54 }
55
~ARMAssembler()56 ARMAssembler::~ARMAssembler()
57 {
58 }
59
pc() const60 uint32_t* ARMAssembler::pc() const
61 {
62 return mPC;
63 }
64
base() const65 uint32_t* ARMAssembler::base() const
66 {
67 return mBase;
68 }
69
reset()70 void ARMAssembler::reset()
71 {
72 mBase = mPC = (uint32_t *)mAssembly->base();
73 mBranchTargets.clear();
74 mLabels.clear();
75 mLabelsInverseMapping.clear();
76 mComments.clear();
77 }
78
79 // ----------------------------------------------------------------------------
80
disassemble(const char * name)81 void ARMAssembler::disassemble(const char* name)
82 {
83 if (name) {
84 printf("%s:\n", name);
85 }
86 size_t count = pc()-base();
87 uint32_t* i = base();
88 while (count--) {
89 ssize_t label = mLabelsInverseMapping.indexOfKey(i);
90 if (label >= 0) {
91 printf("%s:\n", mLabelsInverseMapping.valueAt(label));
92 }
93 ssize_t comment = mComments.indexOfKey(i);
94 if (comment >= 0) {
95 printf("; %s\n", mComments.valueAt(comment));
96 }
97 printf("%08x: %08x ", int(i), int(i[0]));
98 ::disassemble((u_int)i);
99 i++;
100 }
101 }
102
comment(const char * string)103 void ARMAssembler::comment(const char* string)
104 {
105 mComments.add(mPC, string);
106 }
107
label(const char * theLabel)108 void ARMAssembler::label(const char* theLabel)
109 {
110 mLabels.add(theLabel, mPC);
111 mLabelsInverseMapping.add(mPC, theLabel);
112 }
113
B(int cc,const char * label)114 void ARMAssembler::B(int cc, const char* label)
115 {
116 mBranchTargets.add(branch_target_t(label, mPC));
117 *mPC++ = (cc<<28) | (0xA<<24) | 0;
118 }
119
BL(int cc,const char * label)120 void ARMAssembler::BL(int cc, const char* label)
121 {
122 mBranchTargets.add(branch_target_t(label, mPC));
123 *mPC++ = (cc<<28) | (0xB<<24) | 0;
124 }
125
126 #if 0
127 #pragma mark -
128 #pragma mark Prolog/Epilog & Generate...
129 #endif
130
131
prolog()132 void ARMAssembler::prolog()
133 {
134 // write dummy prolog code
135 mPrologPC = mPC;
136 STM(AL, FD, SP, 1, LSAVED);
137 }
138
epilog(uint32_t touched)139 void ARMAssembler::epilog(uint32_t touched)
140 {
141 touched &= LSAVED;
142 if (touched) {
143 // write prolog code
144 uint32_t* pc = mPC;
145 mPC = mPrologPC;
146 STM(AL, FD, SP, 1, touched | LLR);
147 mPC = pc;
148 // write epilog code
149 LDM(AL, FD, SP, 1, touched | LLR);
150 BX(AL, LR);
151 } else { // heh, no registers to save!
152 // write prolog code
153 uint32_t* pc = mPC;
154 mPC = mPrologPC;
155 MOV(AL, 0, R0, R0); // NOP
156 mPC = pc;
157 // write epilog code
158 BX(AL, LR);
159 }
160 }
161
generate(const char * name)162 int ARMAssembler::generate(const char* name)
163 {
164 // fixup all the branches
165 size_t count = mBranchTargets.size();
166 while (count--) {
167 const branch_target_t& bt = mBranchTargets[count];
168 uint32_t* target_pc = mLabels.valueFor(bt.label);
169 LOG_ALWAYS_FATAL_IF(!target_pc,
170 "error resolving branch targets, target_pc is null");
171 int32_t offset = int32_t(target_pc - (bt.pc+2));
172 *bt.pc |= offset & 0xFFFFFF;
173 }
174
175 mAssembly->resize( int(pc()-base())*4 );
176
177 // the instruction cache is flushed by CodeCache
178 const int64_t duration = ggl_system_time() - mDuration;
179 const char * const format = "generated %s (%d ins) at [%p:%p] in %lld ns\n";
180 LOGI(format, name, int(pc()-base()), base(), pc(), duration);
181
182 #if defined(WITH_LIB_HARDWARE)
183 if (__builtin_expect(mQemuTracing, 0)) {
184 int err = qemu_add_mapping(int(base()), name);
185 mQemuTracing = (err >= 0);
186 }
187 #endif
188
189 char value[PROPERTY_VALUE_MAX];
190 property_get("debug.pf.disasm", value, "0");
191 if (atoi(value) != 0) {
192 printf(format, name, int(pc()-base()), base(), pc(), duration);
193 disassemble(name);
194 }
195
196 return NO_ERROR;
197 }
198
pcForLabel(const char * label)199 uint32_t* ARMAssembler::pcForLabel(const char* label)
200 {
201 return mLabels.valueFor(label);
202 }
203
204 // ----------------------------------------------------------------------------
205
206 #if 0
207 #pragma mark -
208 #pragma mark Data Processing...
209 #endif
210
dataProcessing(int opcode,int cc,int s,int Rd,int Rn,uint32_t Op2)211 void ARMAssembler::dataProcessing(int opcode, int cc,
212 int s, int Rd, int Rn, uint32_t Op2)
213 {
214 *mPC++ = (cc<<28) | (opcode<<21) | (s<<20) | (Rn<<16) | (Rd<<12) | Op2;
215 }
216
217 #if 0
218 #pragma mark -
219 #pragma mark Multiply...
220 #endif
221
222 // multiply...
MLA(int cc,int s,int Rd,int Rm,int Rs,int Rn)223 void ARMAssembler::MLA(int cc, int s,
224 int Rd, int Rm, int Rs, int Rn) {
225 if (Rd == Rm) { int t = Rm; Rm=Rs; Rs=t; }
226 LOG_FATAL_IF(Rd==Rm, "MLA(r%u,r%u,r%u,r%u)", Rd,Rm,Rs,Rn);
227 *mPC++ = (cc<<28) | (1<<21) | (s<<20) |
228 (Rd<<16) | (Rn<<12) | (Rs<<8) | 0x90 | Rm;
229 }
MUL(int cc,int s,int Rd,int Rm,int Rs)230 void ARMAssembler::MUL(int cc, int s,
231 int Rd, int Rm, int Rs) {
232 if (Rd == Rm) { int t = Rm; Rm=Rs; Rs=t; }
233 LOG_FATAL_IF(Rd==Rm, "MUL(r%u,r%u,r%u)", Rd,Rm,Rs);
234 *mPC++ = (cc<<28) | (s<<20) | (Rd<<16) | (Rs<<8) | 0x90 | Rm;
235 }
UMULL(int cc,int s,int RdLo,int RdHi,int Rm,int Rs)236 void ARMAssembler::UMULL(int cc, int s,
237 int RdLo, int RdHi, int Rm, int Rs) {
238 LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi,
239 "UMULL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs);
240 *mPC++ = (cc<<28) | (1<<23) | (s<<20) |
241 (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm;
242 }
UMUAL(int cc,int s,int RdLo,int RdHi,int Rm,int Rs)243 void ARMAssembler::UMUAL(int cc, int s,
244 int RdLo, int RdHi, int Rm, int Rs) {
245 LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi,
246 "UMUAL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs);
247 *mPC++ = (cc<<28) | (1<<23) | (1<<21) | (s<<20) |
248 (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm;
249 }
SMULL(int cc,int s,int RdLo,int RdHi,int Rm,int Rs)250 void ARMAssembler::SMULL(int cc, int s,
251 int RdLo, int RdHi, int Rm, int Rs) {
252 LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi,
253 "SMULL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs);
254 *mPC++ = (cc<<28) | (1<<23) | (1<<22) | (s<<20) |
255 (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm;
256 }
SMUAL(int cc,int s,int RdLo,int RdHi,int Rm,int Rs)257 void ARMAssembler::SMUAL(int cc, int s,
258 int RdLo, int RdHi, int Rm, int Rs) {
259 LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi,
260 "SMUAL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs);
261 *mPC++ = (cc<<28) | (1<<23) | (1<<22) | (1<<21) | (s<<20) |
262 (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm;
263 }
264
265 #if 0
266 #pragma mark -
267 #pragma mark Branches...
268 #endif
269
270 // branches...
B(int cc,uint32_t * pc)271 void ARMAssembler::B(int cc, uint32_t* pc)
272 {
273 int32_t offset = int32_t(pc - (mPC+2));
274 *mPC++ = (cc<<28) | (0xA<<24) | (offset & 0xFFFFFF);
275 }
276
BL(int cc,uint32_t * pc)277 void ARMAssembler::BL(int cc, uint32_t* pc)
278 {
279 int32_t offset = int32_t(pc - (mPC+2));
280 *mPC++ = (cc<<28) | (0xB<<24) | (offset & 0xFFFFFF);
281 }
282
BX(int cc,int Rn)283 void ARMAssembler::BX(int cc, int Rn)
284 {
285 *mPC++ = (cc<<28) | 0x12FFF10 | Rn;
286 }
287
288 #if 0
289 #pragma mark -
290 #pragma mark Data Transfer...
291 #endif
292
293 // data transfert...
LDR(int cc,int Rd,int Rn,uint32_t offset)294 void ARMAssembler::LDR(int cc, int Rd, int Rn, uint32_t offset) {
295 *mPC++ = (cc<<28) | (1<<26) | (1<<20) | (Rn<<16) | (Rd<<12) | offset;
296 }
LDRB(int cc,int Rd,int Rn,uint32_t offset)297 void ARMAssembler::LDRB(int cc, int Rd, int Rn, uint32_t offset) {
298 *mPC++ = (cc<<28) | (1<<26) | (1<<22) | (1<<20) | (Rn<<16) | (Rd<<12) | offset;
299 }
STR(int cc,int Rd,int Rn,uint32_t offset)300 void ARMAssembler::STR(int cc, int Rd, int Rn, uint32_t offset) {
301 *mPC++ = (cc<<28) | (1<<26) | (Rn<<16) | (Rd<<12) | offset;
302 }
STRB(int cc,int Rd,int Rn,uint32_t offset)303 void ARMAssembler::STRB(int cc, int Rd, int Rn, uint32_t offset) {
304 *mPC++ = (cc<<28) | (1<<26) | (1<<22) | (Rn<<16) | (Rd<<12) | offset;
305 }
306
LDRH(int cc,int Rd,int Rn,uint32_t offset)307 void ARMAssembler::LDRH(int cc, int Rd, int Rn, uint32_t offset) {
308 *mPC++ = (cc<<28) | (1<<20) | (Rn<<16) | (Rd<<12) | 0xB0 | offset;
309 }
LDRSB(int cc,int Rd,int Rn,uint32_t offset)310 void ARMAssembler::LDRSB(int cc, int Rd, int Rn, uint32_t offset) {
311 *mPC++ = (cc<<28) | (1<<20) | (Rn<<16) | (Rd<<12) | 0xD0 | offset;
312 }
LDRSH(int cc,int Rd,int Rn,uint32_t offset)313 void ARMAssembler::LDRSH(int cc, int Rd, int Rn, uint32_t offset) {
314 *mPC++ = (cc<<28) | (1<<20) | (Rn<<16) | (Rd<<12) | 0xF0 | offset;
315 }
STRH(int cc,int Rd,int Rn,uint32_t offset)316 void ARMAssembler::STRH(int cc, int Rd, int Rn, uint32_t offset) {
317 *mPC++ = (cc<<28) | (Rn<<16) | (Rd<<12) | 0xB0 | offset;
318 }
319
320 #if 0
321 #pragma mark -
322 #pragma mark Block Data Transfer...
323 #endif
324
325 // block data transfer...
LDM(int cc,int dir,int Rn,int W,uint32_t reg_list)326 void ARMAssembler::LDM(int cc, int dir,
327 int Rn, int W, uint32_t reg_list)
328 { // ED FD EA FA IB IA DB DA
329 const uint8_t P[8] = { 1, 0, 1, 0, 1, 0, 1, 0 };
330 const uint8_t U[8] = { 1, 1, 0, 0, 1, 1, 0, 0 };
331 *mPC++ = (cc<<28) | (4<<25) | (uint32_t(P[dir])<<24) |
332 (uint32_t(U[dir])<<23) | (1<<20) | (W<<21) | (Rn<<16) | reg_list;
333 }
334
STM(int cc,int dir,int Rn,int W,uint32_t reg_list)335 void ARMAssembler::STM(int cc, int dir,
336 int Rn, int W, uint32_t reg_list)
337 { // FA EA FD ED IB IA DB DA
338 const uint8_t P[8] = { 0, 1, 0, 1, 1, 0, 1, 0 };
339 const uint8_t U[8] = { 0, 0, 1, 1, 1, 1, 0, 0 };
340 *mPC++ = (cc<<28) | (4<<25) | (uint32_t(P[dir])<<24) |
341 (uint32_t(U[dir])<<23) | (0<<20) | (W<<21) | (Rn<<16) | reg_list;
342 }
343
344 #if 0
345 #pragma mark -
346 #pragma mark Special...
347 #endif
348
349 // special...
SWP(int cc,int Rn,int Rd,int Rm)350 void ARMAssembler::SWP(int cc, int Rn, int Rd, int Rm) {
351 *mPC++ = (cc<<28) | (2<<23) | (Rn<<16) | (Rd << 12) | 0x90 | Rm;
352 }
SWPB(int cc,int Rn,int Rd,int Rm)353 void ARMAssembler::SWPB(int cc, int Rn, int Rd, int Rm) {
354 *mPC++ = (cc<<28) | (2<<23) | (1<<22) | (Rn<<16) | (Rd << 12) | 0x90 | Rm;
355 }
SWI(int cc,uint32_t comment)356 void ARMAssembler::SWI(int cc, uint32_t comment) {
357 *mPC++ = (cc<<28) | (0xF<<24) | comment;
358 }
359
360 #if 0
361 #pragma mark -
362 #pragma mark DSP instructions...
363 #endif
364
365 // DSP instructions...
PLD(int Rn,uint32_t offset)366 void ARMAssembler::PLD(int Rn, uint32_t offset) {
367 LOG_ALWAYS_FATAL_IF(!((offset&(1<<24)) && !(offset&(1<<21))),
368 "PLD only P=1, W=0");
369 *mPC++ = 0xF550F000 | (Rn<<16) | offset;
370 }
371
CLZ(int cc,int Rd,int Rm)372 void ARMAssembler::CLZ(int cc, int Rd, int Rm)
373 {
374 *mPC++ = (cc<<28) | 0x16F0F10| (Rd<<12) | Rm;
375 }
376
QADD(int cc,int Rd,int Rm,int Rn)377 void ARMAssembler::QADD(int cc, int Rd, int Rm, int Rn)
378 {
379 *mPC++ = (cc<<28) | 0x1000050 | (Rn<<16) | (Rd<<12) | Rm;
380 }
381
QDADD(int cc,int Rd,int Rm,int Rn)382 void ARMAssembler::QDADD(int cc, int Rd, int Rm, int Rn)
383 {
384 *mPC++ = (cc<<28) | 0x1400050 | (Rn<<16) | (Rd<<12) | Rm;
385 }
386
QSUB(int cc,int Rd,int Rm,int Rn)387 void ARMAssembler::QSUB(int cc, int Rd, int Rm, int Rn)
388 {
389 *mPC++ = (cc<<28) | 0x1200050 | (Rn<<16) | (Rd<<12) | Rm;
390 }
391
QDSUB(int cc,int Rd,int Rm,int Rn)392 void ARMAssembler::QDSUB(int cc, int Rd, int Rm, int Rn)
393 {
394 *mPC++ = (cc<<28) | 0x1600050 | (Rn<<16) | (Rd<<12) | Rm;
395 }
396
SMUL(int cc,int xy,int Rd,int Rm,int Rs)397 void ARMAssembler::SMUL(int cc, int xy,
398 int Rd, int Rm, int Rs)
399 {
400 *mPC++ = (cc<<28) | 0x1600080 | (Rd<<16) | (Rs<<8) | (xy<<4) | Rm;
401 }
402
SMULW(int cc,int y,int Rd,int Rm,int Rs)403 void ARMAssembler::SMULW(int cc, int y,
404 int Rd, int Rm, int Rs)
405 {
406 *mPC++ = (cc<<28) | 0x12000A0 | (Rd<<16) | (Rs<<8) | (y<<4) | Rm;
407 }
408
SMLA(int cc,int xy,int Rd,int Rm,int Rs,int Rn)409 void ARMAssembler::SMLA(int cc, int xy,
410 int Rd, int Rm, int Rs, int Rn)
411 {
412 *mPC++ = (cc<<28) | 0x1000080 | (Rd<<16) | (Rn<<12) | (Rs<<8) | (xy<<4) | Rm;
413 }
414
SMLAL(int cc,int xy,int RdHi,int RdLo,int Rs,int Rm)415 void ARMAssembler::SMLAL(int cc, int xy,
416 int RdHi, int RdLo, int Rs, int Rm)
417 {
418 *mPC++ = (cc<<28) | 0x1400080 | (RdHi<<16) | (RdLo<<12) | (Rs<<8) | (xy<<4) | Rm;
419 }
420
SMLAW(int cc,int y,int Rd,int Rm,int Rs,int Rn)421 void ARMAssembler::SMLAW(int cc, int y,
422 int Rd, int Rm, int Rs, int Rn)
423 {
424 *mPC++ = (cc<<28) | 0x1200080 | (Rd<<16) | (Rn<<12) | (Rs<<8) | (y<<4) | Rm;
425 }
426
427 }; // namespace android
428
429