• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 *  linux/arch/arm/boot/compressed/head.S
4 *
5 *  Copyright (C) 1996-2002 Russell King
6 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
7 */
8#include <linux/linkage.h>
9#include <asm/assembler.h>
10#include <asm/v7m.h>
11
12#include "efi-header.S"
13
14 AR_CLASS(	.arch	armv7-a	)
15 M_CLASS(	.arch	armv7-m	)
16
17/*
18 * Debugging stuff
19 *
20 * Note that these macros must not contain any code which is not
21 * 100% relocatable.  Any attempt to do so will result in a crash.
22 * Please select one of the following when turning on debugging.
23 */
24#ifdef DEBUG
25
26#if defined(CONFIG_DEBUG_ICEDCC)
27
28#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
29		.macro	loadsp, rb, tmp1, tmp2
30		.endm
31		.macro	writeb, ch, rb, tmp
32		mcr	p14, 0, \ch, c0, c5, 0
33		.endm
34#elif defined(CONFIG_CPU_XSCALE)
35		.macro	loadsp, rb, tmp1, tmp2
36		.endm
37		.macro	writeb, ch, rb, tmp
38		mcr	p14, 0, \ch, c8, c0, 0
39		.endm
40#else
41		.macro	loadsp, rb, tmp1, tmp2
42		.endm
43		.macro	writeb, ch, rb, tmp
44		mcr	p14, 0, \ch, c1, c0, 0
45		.endm
46#endif
47
48#else
49
50#include CONFIG_DEBUG_LL_INCLUDE
51
52		.macro	writeb,	ch, rb, tmp
53#ifdef CONFIG_DEBUG_UART_FLOW_CONTROL
54		waituartcts \tmp, \rb
55#endif
56		waituarttxrdy \tmp, \rb
57		senduart \ch, \rb
58		busyuart \tmp, \rb
59		.endm
60
61#if defined(CONFIG_ARCH_SA1100)
62		.macro	loadsp, rb, tmp1, tmp2
63		mov	\rb, #0x80000000	@ physical base address
64#ifdef CONFIG_DEBUG_LL_SER3
65		add	\rb, \rb, #0x00050000	@ Ser3
66#else
67		add	\rb, \rb, #0x00010000	@ Ser1
68#endif
69		.endm
70#else
71		.macro	loadsp,	rb, tmp1, tmp2
72		addruart \rb, \tmp1, \tmp2
73		.endm
74#endif
75#endif
76#endif
77
78		.macro	kputc,val
79		mov	r0, \val
80		bl	putc
81		.endm
82
83		.macro	kphex,val,len
84		mov	r0, \val
85		mov	r1, #\len
86		bl	phex
87		.endm
88
89		/*
90		 * Debug kernel copy by printing the memory addresses involved
91		 */
92		.macro dbgkc, begin, end, cbegin, cend
93#ifdef DEBUG
94		kputc   #'C'
95		kputc   #':'
96		kputc   #'0'
97		kputc   #'x'
98		kphex   \begin, 8	/* Start of compressed kernel */
99		kputc	#'-'
100		kputc	#'0'
101		kputc	#'x'
102		kphex	\end, 8		/* End of compressed kernel */
103		kputc	#'-'
104		kputc	#'>'
105		kputc   #'0'
106		kputc   #'x'
107		kphex   \cbegin, 8	/* Start of kernel copy */
108		kputc	#'-'
109		kputc	#'0'
110		kputc	#'x'
111		kphex	\cend, 8	/* End of kernel copy */
112		kputc	#'\n'
113#endif
114		.endm
115
116		/*
117		 * Debug print of the final appended DTB location
118		 */
119		.macro dbgadtb, begin, size
120#ifdef DEBUG
121		kputc   #'D'
122		kputc   #'T'
123		kputc   #'B'
124		kputc   #':'
125		kputc   #'0'
126		kputc   #'x'
127		kphex   \begin, 8	/* Start of appended DTB */
128		kputc	#' '
129		kputc	#'('
130		kputc	#'0'
131		kputc	#'x'
132		kphex	\size, 8	/* Size of appended DTB */
133		kputc	#')'
134		kputc	#'\n'
135#endif
136		.endm
137
138		.macro	enable_cp15_barriers, reg
139		mrc	p15, 0, \reg, c1, c0, 0	@ read SCTLR
140		tst	\reg, #(1 << 5)		@ CP15BEN bit set?
141		bne	.L_\@
142		orr	\reg, \reg, #(1 << 5)	@ CP15 barrier instructions
143		mcr	p15, 0, \reg, c1, c0, 0	@ write SCTLR
144 ARM(		.inst   0xf57ff06f		@ v7+ isb	)
145 THUMB(		isb						)
146.L_\@:
147		.endm
148
149		/*
150		 * The kernel build system appends the size of the
151		 * decompressed kernel at the end of the compressed data
152		 * in little-endian form.
153		 */
154		.macro	get_inflated_image_size, res:req, tmp1:req, tmp2:req
155		adr_l	\res, .Linflated_image_size_offset
156		ldr	\tmp1, [\res]
157		add	\tmp1, \tmp1, \res	@ address of inflated image size
158
159		ldrb	\res, [\tmp1]		@ get_unaligned_le32
160		ldrb	\tmp2, [\tmp1, #1]
161		orr	\res, \res, \tmp2, lsl #8
162		ldrb	\tmp2, [\tmp1, #2]
163		ldrb	\tmp1, [\tmp1, #3]
164		orr	\res, \res, \tmp2, lsl #16
165		orr	\res, \res, \tmp1, lsl #24
166		.endm
167
168		.macro  record_seed
169#ifdef CONFIG_RANDOMIZE_BASE
170		sub	ip, r1, ip, ror #1	@ poor man's kaslr seed, will
171		sub	ip, r2, ip, ror #2	@ be superseded by kaslr-seed
172		sub	ip, r3, ip, ror #3	@ from /chosen if present
173		sub	ip, r4, ip, ror #5
174		sub	ip, r5, ip, ror #8
175		sub	ip, r6, ip, ror #13
176		sub	ip, r7, ip, ror #21
177		sub	ip, r8, ip, ror #3
178		sub	ip, r9, ip, ror #24
179		sub	ip, r10, ip, ror #27
180		sub	ip, r11, ip, ror #19
181		sub	ip, r13, ip, ror #14
182		sub	ip, r14, ip, ror #2
183		str_l	ip, __kaslr_seed, r9
184#endif
185		.endm
186
187		.section ".start", "ax"
188/*
189 * sort out different calling conventions
190 */
191		.align
192		/*
193		 * Always enter in ARM state for CPUs that support the ARM ISA.
194		 * As of today (2014) that's exactly the members of the A and R
195		 * classes.
196		 */
197 AR_CLASS(	.arm	)
198start:
199		.type	start,#function
200		/*
201		 * These 7 nops along with the 1 nop immediately below for
202		 * !THUMB2 form 8 nops that make the compressed kernel bootable
203		 * on legacy ARM systems that were assuming the kernel in a.out
204		 * binary format. The boot loaders on these systems would
205		 * jump 32 bytes into the image to skip the a.out header.
206		 * with these 8 nops filling exactly 32 bytes, things still
207		 * work as expected on these legacy systems. Thumb2 mode keeps
208		 * 7 of the nops as it turns out that some boot loaders
209		 * were patching the initial instructions of the kernel, i.e
210		 * had started to exploit this "patch area".
211		 */
212		.rept	7
213		__nop
214		.endr
215#ifndef CONFIG_THUMB2_KERNEL
216		__nop
217#else
218 AR_CLASS(	sub	pc, pc, #3	)	@ A/R: switch to Thumb2 mode
219  M_CLASS(	nop.w			)	@ M: already in Thumb2 mode
220		.thumb
221#endif
222		W(b)	1f
223
224		.word	_magic_sig	@ Magic numbers to help the loader
225		.word	_magic_start	@ absolute load/run zImage address
226		.word	_magic_end	@ zImage end address
227		.word	0x04030201	@ endianness flag
228		.word	0x45454545	@ another magic number to indicate
229		.word	_magic_table	@ additional data table
230
231		__EFI_HEADER
2321:
233 ARM_BE8(	setend	be		)	@ go BE8 if compiled for BE8
234		record_seed
235 AR_CLASS(	mrs	r9, cpsr	)
236#ifdef CONFIG_ARM_VIRT_EXT
237		bl	__hyp_stub_install	@ get into SVC mode, reversibly
238#endif
239		mov	r7, r1			@ save architecture ID
240		mov	r8, r2			@ save atags pointer
241
242#ifndef CONFIG_CPU_V7M
243		/*
244		 * Booting from Angel - need to enter SVC mode and disable
245		 * FIQs/IRQs (numeric definitions from angel arm.h source).
246		 * We only do this if we were in user mode on entry.
247		 */
248		mrs	r2, cpsr		@ get current mode
249		tst	r2, #3			@ not user?
250		bne	not_angel
251		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
252 ARM(		swi	0x123456	)	@ angel_SWI_ARM
253 THUMB(		svc	0xab		)	@ angel_SWI_THUMB
254not_angel:
255		safe_svcmode_maskall r0
256		msr	spsr_cxsf, r9		@ Save the CPU boot mode in
257						@ SPSR
258#endif
259		/*
260		 * Note that some cache flushing and other stuff may
261		 * be needed here - is there an Angel SWI call for this?
262		 */
263
264		/*
265		 * some architecture specific code can be inserted
266		 * by the linker here, but it should preserve r7, r8, and r9.
267		 */
268
269		.text
270
271#ifdef CONFIG_AUTO_ZRELADDR
272		/*
273		 * Find the start of physical memory.  As we are executing
274		 * without the MMU on, we are in the physical address space.
275		 * We just need to get rid of any offset by aligning the
276		 * address.
277		 *
278		 * This alignment is a balance between the requirements of
279		 * different platforms - we have chosen 128MB to allow
280		 * platforms which align the start of their physical memory
281		 * to 128MB to use this feature, while allowing the zImage
282		 * to be placed within the first 128MB of memory on other
283		 * platforms.  Increasing the alignment means we place
284		 * stricter alignment requirements on the start of physical
285		 * memory, but relaxing it means that we break people who
286		 * are already placing their zImage in (eg) the top 64MB
287		 * of this range.
288		 */
289		mov	r4, pc
290		and	r4, r4, #0xf8000000
291		/* Determine final kernel image address. */
292		add	r4, r4, #TEXT_OFFSET
293#else
294		ldr	r4, =zreladdr
295#endif
296
297		/*
298		 * Set up a page table only if it won't overwrite ourself.
299		 * That means r4 < pc || r4 - 16k page directory > &_end.
300		 * Given that r4 > &_end is most unfrequent, we add a rough
301		 * additional 1MB of room for a possible appended DTB.
302		 */
303		mov	r0, pc
304		cmp	r0, r4
305		ldrcc	r0, .Lheadroom
306		addcc	r0, r0, pc
307		cmpcc	r4, r0
308		orrcc	r4, r4, #1		@ remember we skipped cache_on
309		blcs	cache_on
310
311restart:	adr_l	r0, LC1
312		ldr	sp, [r0]
313		ldr	r6, [r0, #4]
314		add	sp, sp, r0
315		add	r6, r6, r0
316
317		get_inflated_image_size	r9, r10, lr
318
319#ifndef CONFIG_ZBOOT_ROM
320		/* malloc space is above the relocated stack (64k max) */
321		add	r10, sp, #MALLOC_SIZE
322#else
323		/*
324		 * With ZBOOT_ROM the bss/stack is non relocatable,
325		 * but someone could still run this code from RAM,
326		 * in which case our reference is _edata.
327		 */
328		mov	r10, r6
329#endif
330
331		mov	r5, #0			@ init dtb size to 0
332#ifdef CONFIG_ARM_APPENDED_DTB
333/*
334 *   r4  = final kernel address (possibly with LSB set)
335 *   r5  = appended dtb size (still unknown)
336 *   r6  = _edata
337 *   r7  = architecture ID
338 *   r8  = atags/device tree pointer
339 *   r9  = size of decompressed image
340 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
341 *   sp  = stack pointer
342 *
343 * if there are device trees (dtb) appended to zImage, advance r10 so that the
344 * dtb data will get relocated along with the kernel if necessary.
345 */
346
347		ldr	lr, [r6, #0]
348#ifndef __ARMEB__
349		ldr	r1, =0xedfe0dd0		@ sig is 0xd00dfeed big endian
350#else
351		ldr	r1, =0xd00dfeed
352#endif
353		cmp	lr, r1
354		bne	dtb_check_done		@ not found
355
356#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
357		/*
358		 * OK... Let's do some funky business here.
359		 * If we do have a DTB appended to zImage, and we do have
360		 * an ATAG list around, we want the later to be translated
361		 * and folded into the former here. No GOT fixup has occurred
362		 * yet, but none of the code we're about to call uses any
363		 * global variable.
364		*/
365
366		/* Get the initial DTB size */
367		ldr	r5, [r6, #4]
368#ifndef __ARMEB__
369		/* convert to little endian */
370		eor	r1, r5, r5, ror #16
371		bic	r1, r1, #0x00ff0000
372		mov	r5, r5, ror #8
373		eor	r5, r5, r1, lsr #8
374#endif
375		dbgadtb	r6, r5
376		/* 50% DTB growth should be good enough */
377		add	r5, r5, r5, lsr #1
378		/* preserve 64-bit alignment */
379		add	r5, r5, #7
380		bic	r5, r5, #7
381		/* clamp to 32KB min and 1MB max */
382		cmp	r5, #(1 << 15)
383		movlo	r5, #(1 << 15)
384		cmp	r5, #(1 << 20)
385		movhi	r5, #(1 << 20)
386		/* temporarily relocate the stack past the DTB work space */
387		add	sp, sp, r5
388
389		mov	r0, r8
390		mov	r1, r6
391		mov	r2, r5
392		bl	atags_to_fdt
393
394		/*
395		 * If returned value is 1, there is no ATAG at the location
396		 * pointed by r8.  Try the typical 0x100 offset from start
397		 * of RAM and hope for the best.
398		 */
399		cmp	r0, #1
400		sub	r0, r4, #TEXT_OFFSET
401		bic	r0, r0, #1
402		add	r0, r0, #0x100
403		mov	r1, r6
404		mov	r2, r5
405		bleq	atags_to_fdt
406
407		sub	sp, sp, r5
408#endif
409
410		mov	r8, r6			@ use the appended device tree
411
412		/*
413		 * Make sure that the DTB doesn't end up in the final
414		 * kernel's .bss area. To do so, we adjust the decompressed
415		 * kernel size to compensate if that .bss size is larger
416		 * than the relocated code.
417		 */
418		ldr	r5, =_kernel_bss_size
419		adr	r1, wont_overwrite
420		sub	r1, r6, r1
421		subs	r1, r5, r1
422		addhi	r9, r9, r1
423
424		/* Get the current DTB size */
425		ldr	r5, [r6, #4]
426#ifndef __ARMEB__
427		/* convert r5 (dtb size) to little endian */
428		eor	r1, r5, r5, ror #16
429		bic	r1, r1, #0x00ff0000
430		mov	r5, r5, ror #8
431		eor	r5, r5, r1, lsr #8
432#endif
433
434		/* preserve 64-bit alignment */
435		add	r5, r5, #7
436		bic	r5, r5, #7
437
438		/* relocate some pointers past the appended dtb */
439		add	r6, r6, r5
440		add	r10, r10, r5
441		add	sp, sp, r5
442dtb_check_done:
443#endif
444
445#ifdef CONFIG_RANDOMIZE_BASE
446		ldr	r1, __kaslr_offset	@ check if the kaslr_offset is
447		cmp	r1, #0			@ already set
448		bne	1f
449
450		stmfd	sp!, {r0-r3, ip, lr}
451#ifdef CONFIG_ARCH_HISI
452#ifdef CONFIG_ARM_APPENDED_DTB
453#ifdef CONFIG_START_MEM_2M_ALIGN
454		mov r0, r4
455#ifdef CONFIG_CORTEX_A9
456		lsr r0, r0, #20
457		lsl r0, r0, #20
458#else
459		lsr r0, r0, #21
460		lsl r0, r0, #21
461#endif
462		add r0, r0, #0x1000
463		ldr r1, [r0]
464#ifndef __ARMEB__
465		ldr r2, =0xedfe0dd0    @ sig is 0xd00dfeed big endian
466#else
467		ldr r2, =0xd00dfeed
468#endif
469		cmp  r1, r2
470		moveq r8, r0
471#endif
472#endif
473#endif
474		adr_l	r2, _text		@ start of zImage
475		stmfd	sp!, {r2, r8, r10}	@ pass stack arguments
476
477		ldr_l	r3, __kaslr_seed
478#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
479		/*
480		 * Get some pseudo-entropy from the low bits of the generic
481		 * timer if it is implemented.
482		 */
483		mrc	p15, 0, r1, c0, c1, 1	@ read ID_PFR1 register
484		tst	r1, #0x10000		@ have generic timer?
485		mrrcne	p15, 1, r3, r1, c14	@ read CNTVCT
486#endif
487		adr_l	r0, __kaslr_offset	@ pass &__kaslr_offset in r0
488		mov	r1, r4			@ pass base address
489		mov	r2, r9			@ pass decompressed image size
490		eor	r3, r3, r3, ror #16	@ pass pseudorandom seed
491		bl	kaslr_early_init
492		add	sp, sp, #12
493		cmp	r0, #0
494		addne	r4, r4, r0		@ add offset to base address
495#ifdef CONFIG_VXBOOT
496#ifdef CONFIG_START_MEM_2M_ALIGN
497#ifdef CONFIG_CORTEX_A9
498		adr r1, vx_edata
499		strne   r6, [r1]
500#endif
501#endif
502#endif
503		ldmfd	sp!, {r0-r3, ip, lr}
504		bne	restart
5051:
506#endif
507
508/*
509 * Check to see if we will overwrite ourselves.
510 *   r4  = final kernel address (possibly with LSB set)
511 *   r9  = size of decompressed image
512 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
513 * We basically want:
514 *   r4 - 16k page directory >= r10 -> OK
515 *   r4 + image length <= address of wont_overwrite -> OK
516 * Note: the possible LSB in r4 is harmless here.
517 */
518		add	r10, r10, #16384
519		cmp	r4, r10
520		bhs	wont_overwrite
521		add	r10, r4, r9
522		adr	r9, wont_overwrite
523		cmp	r10, r9
524		bls	wont_overwrite
525
526/*
527 * Relocate ourselves past the end of the decompressed kernel.
528 *   r6  = _edata
529 *   r10 = end of the decompressed kernel
530 * Because we always copy ahead, we need to do it from the end and go
531 * backward in case the source and destination overlap.
532 */
533		/*
534		 * Bump to the next 256-byte boundary with the size of
535		 * the relocation code added. This avoids overwriting
536		 * ourself when the offset is small.
537		 */
538		add	r10, r10, #((reloc_code_end - restart + 256) & ~255)
539		bic	r10, r10, #255
540
541		/* Get start of code we want to copy and align it down. */
542		adr	r5, restart
543		bic	r5, r5, #31
544
545/* Relocate the hyp vector base if necessary */
546#ifdef CONFIG_ARM_VIRT_EXT
547		mrs	r0, spsr
548		and	r0, r0, #MODE_MASK
549		cmp	r0, #HYP_MODE
550		bne	1f
551
552		/*
553		 * Compute the address of the hyp vectors after relocation.
554		 * Call __hyp_set_vectors with the new address so that we
555		 * can HVC again after the copy.
556		 */
557		adr_l	r0, __hyp_stub_vectors
558		sub	r0, r0, r5
559		add	r0, r0, r10
560		bl	__hyp_set_vectors
5611:
562#endif
563
564		sub	r9, r6, r5		@ size to copy
565		add	r9, r9, #31		@ rounded up to a multiple
566		bic	r9, r9, #31		@ ... of 32 bytes
567		add	r6, r9, r5
568		add	r9, r9, r10
569
570#ifdef DEBUG
571		sub     r10, r6, r5
572		sub     r10, r9, r10
573		/*
574		 * We are about to copy the kernel to a new memory area.
575		 * The boundaries of the new memory area can be found in
576		 * r10 and r9, whilst r5 and r6 contain the boundaries
577		 * of the memory we are going to copy.
578		 * Calling dbgkc will help with the printing of this
579		 * information.
580		 */
581		dbgkc	r5, r6, r10, r9
582#endif
583
5841:		ldmdb	r6!, {r0 - r3, r10 - r12, lr}
585		cmp	r6, r5
586		stmdb	r9!, {r0 - r3, r10 - r12, lr}
587		bhi	1b
588
589		/* Preserve offset to relocated code. */
590		sub	r6, r9, r6
591
592		mov	r0, r9			@ start of relocated zImage
593		add	r1, sp, r6		@ end of relocated zImage
594		bl	cache_clean_flush
595
596		badr	r0, restart
597		add	r0, r0, r6
598		mov	pc, r0
599
600wont_overwrite:
601		adr	r0, LC0
602		ldmia	r0, {r1, r2, r3, r11, r12}
603		sub	r0, r0, r1		@ calculate the delta offset
604
605/*
606 * If delta is zero, we are running at the address we were linked at.
607 *   r0  = delta
608 *   r2  = BSS start
609 *   r3  = BSS end
610 *   r4  = kernel execution address (possibly with LSB set)
611 *   r5  = appended dtb size (0 if not present)
612 *   r7  = architecture ID
613 *   r8  = atags pointer
614 *   r11 = GOT start
615 *   r12 = GOT end
616 *   sp  = stack pointer
617 */
618		orrs	r1, r0, r5
619		beq	not_relocated
620
621		add	r11, r11, r0
622		add	r12, r12, r0
623
624#ifndef CONFIG_ZBOOT_ROM
625		/*
626		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
627		 * we need to fix up pointers into the BSS region.
628		 * Note that the stack pointer has already been fixed up.
629		 */
630		add	r2, r2, r0
631		add	r3, r3, r0
632
633		/*
634		 * Relocate all entries in the GOT table.
635		 * Bump bss entries to _edata + dtb size
636		 */
6371:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
638		add	r1, r1, r0		@ This fixes up C references
639		cmp	r1, r2			@ if entry >= bss_start &&
640		cmphs	r3, r1			@       bss_end > entry
641		addhi	r1, r1, r5		@    entry += dtb size
642		str	r1, [r11], #4		@ next entry
643		cmp	r11, r12
644		blo	1b
645
646		/* bump our bss pointers too */
647		add	r2, r2, r5
648		add	r3, r3, r5
649
650#else
651
652		/*
653		 * Relocate entries in the GOT table.  We only relocate
654		 * the entries that are outside the (relocated) BSS region.
655		 */
6561:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
657		cmp	r1, r2			@ entry < bss_start ||
658		cmphs	r3, r1			@ _end < entry
659		addlo	r1, r1, r0		@ table.  This fixes up the
660		str	r1, [r11], #4		@ C references.
661		cmp	r11, r12
662		blo	1b
663#endif
664
665not_relocated:	mov	r0, #0
6661:		str	r0, [r2], #4		@ clear bss
667		str	r0, [r2], #4
668		str	r0, [r2], #4
669		str	r0, [r2], #4
670		cmp	r2, r3
671		blo	1b
672
673		/*
674		 * Did we skip the cache setup earlier?
675		 * That is indicated by the LSB in r4.
676		 * Do it now if so.
677		 */
678		tst	r4, #1
679		bic	r4, r4, #1
680		blne	cache_on
681
682/*
683 * The C runtime environment should now be setup sufficiently.
684 * Set up some pointers, and start decompressing.
685 *   r4  = kernel execution address
686 *   r7  = architecture ID
687 *   r8  = atags pointer
688 */
689		mov	r0, r4
690		mov	r1, sp			@ malloc space above stack
691		add	r2, sp, #MALLOC_SIZE	@ 64k max
692		mov	r3, r7
693		bl	decompress_kernel
694
695		get_inflated_image_size	r1, r2, r3
696
697		mov	r0, r4			@ start of inflated image
698		add	r1, r1, r0		@ end of inflated image
699		bl	cache_clean_flush
700		bl	cache_off
701
702#ifdef CONFIG_ARM_VIRT_EXT
703		mrs	r0, spsr		@ Get saved CPU boot mode
704		and	r0, r0, #MODE_MASK
705		cmp	r0, #HYP_MODE		@ if not booted in HYP mode...
706		bne	__enter_kernel		@ boot kernel directly
707
708		adr_l	r0, __hyp_reentry_vectors
709		bl	__hyp_set_vectors
710		__HVC(0)			@ otherwise bounce to hyp mode
711
712		b	.			@ should never be reached
713#else
714		b	__enter_kernel
715#endif
716
717		.align	2
718		.type	LC0, #object
719LC0:		.word	LC0			@ r1
720		.word	__bss_start		@ r2
721		.word	_end			@ r3
722		.word	_got_start		@ r11
723		.word	_got_end		@ ip
724		.size	LC0, . - LC0
725
726		.type	LC1, #object
727LC1:		.word	.L_user_stack_end - LC1	@ sp
728		.word	_edata - LC1		@ r6
729		.size	LC1, . - LC1
730
731.Lheadroom:
732		.word	_end - restart + 16384 + 1024*1024
733
734.Linflated_image_size_offset:
735		.long	(input_data_end - 4) - .
736
737#ifdef CONFIG_ARCH_RPC
738		.globl	params
739params:		ldr	r0, =0x10000100		@ params_phys for RPC
740		mov	pc, lr
741		.ltorg
742		.align
743#endif
744
745/*
746 * dcache_line_size - get the minimum D-cache line size from the CTR register
747 * on ARMv7.
748 */
749		.macro	dcache_line_size, reg, tmp
750#ifdef CONFIG_CPU_V7M
751		movw	\tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR
752		movt	\tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR
753		ldr	\tmp, [\tmp]
754#else
755		mrc	p15, 0, \tmp, c0, c0, 1		@ read ctr
756#endif
757		lsr	\tmp, \tmp, #16
758		and	\tmp, \tmp, #0xf		@ cache line size encoding
759		mov	\reg, #4			@ bytes per word
760		mov	\reg, \reg, lsl \tmp		@ actual cache line size
761		.endm
762
763/*
764 * Turn on the cache.  We need to setup some page tables so that we
765 * can have both the I and D caches on.
766 *
767 * We place the page tables 16k down from the kernel execution address,
768 * and we hope that nothing else is using it.  If we're using it, we
769 * will go pop!
770 *
771 * On entry,
772 *  r4 = kernel execution address
773 *  r7 = architecture number
774 *  r8 = atags pointer
775 * On exit,
776 *  r0, r1, r2, r3, r9, r10, r12 corrupted
777 * This routine must preserve:
778 *  r4, r7, r8
779 */
780		.align	5
781cache_on:	mov	r3, #8			@ cache_on function
782		b	call_cache_fn
783
784/*
785 * Initialize the highest priority protection region, PR7
786 * to cover all 32bit address and cacheable and bufferable.
787 */
788__armv4_mpu_cache_on:
789		mov	r0, #0x3f		@ 4G, the whole
790		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
791		mcr 	p15, 0, r0, c6, c7, 1
792
793		mov	r0, #0x80		@ PR7
794		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
795		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
796		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
797
798		mov	r0, #0xc000
799		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
800		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
801
802		mov	r0, #0
803		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
804		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
805		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
806		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
807						@ ...I .... ..D. WC.M
808		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
809		orr	r0, r0, #0x1000		@ ...1 .... .... ....
810
811		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
812
813		mov	r0, #0
814		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
815		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
816		mov	pc, lr
817
818__armv3_mpu_cache_on:
819		mov	r0, #0x3f		@ 4G, the whole
820		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
821
822		mov	r0, #0x80		@ PR7
823		mcr	p15, 0, r0, c2, c0, 0	@ cache on
824		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
825
826		mov	r0, #0xc000
827		mcr	p15, 0, r0, c5, c0, 0	@ access permission
828
829		mov	r0, #0
830		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
831		/*
832		 * ?? ARMv3 MMU does not allow reading the control register,
833		 * does this really work on ARMv3 MPU?
834		 */
835		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
836						@ .... .... .... WC.M
837		orr	r0, r0, #0x000d		@ .... .... .... 11.1
838		/* ?? this overwrites the value constructed above? */
839		mov	r0, #0
840		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
841
842		/* ?? invalidate for the second time? */
843		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
844		mov	pc, lr
845
846#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
847#define CB_BITS 0x08
848#else
849#define CB_BITS 0x0c
850#endif
851
852__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
853		bic	r3, r3, #0xff		@ Align the pointer
854		bic	r3, r3, #0x3f00
855/*
856 * Initialise the page tables, turning on the cacheable and bufferable
857 * bits for the RAM area only.
858 */
859		mov	r0, r3
860		mov	r9, r0, lsr #18
861		mov	r9, r9, lsl #18		@ start of RAM
862		add	r10, r9, #0x10000000	@ a reasonable RAM size
863		mov	r1, #0x12		@ XN|U + section mapping
864		orr	r1, r1, #3 << 10	@ AP=11
865		add	r2, r3, #16384
8661:		cmp	r1, r9			@ if virt > start of RAM
867		cmphs	r10, r1			@   && end of RAM > virt
868		bic	r1, r1, #0x1c		@ clear XN|U + C + B
869		orrlo	r1, r1, #0x10		@ Set XN|U for non-RAM
870		orrhs	r1, r1, r6		@ set RAM section settings
871		str	r1, [r0], #4		@ 1:1 mapping
872		add	r1, r1, #1048576
873		teq	r0, r2
874		bne	1b
875/*
876 * Make sure our entire executable image (including payload) is mapped
877 * cacheable, in case it is located outside the region we covered above.
878 * (This may be the case if running from flash or with randomization enabled)
879 * If the regions happen to overlap, we just duplicate some of the above.
880 */
881		orr	r1, r6, #0x04		@ ensure B is set for this
882		orr	r1, r1, #3 << 10
883		mov	r2, pc
884		adr_l	r9, _end
885		mov	r2, r2, lsr #20
886		mov	r9, r9, lsr #20
887		orr	r1, r1, r2, lsl #20
888		add	r0, r3, r2, lsl #2
889		add	r9, r3, r9, lsl #2
8900:		str	r1, [r0], #4
891		add	r1, r1, #1048576
892		cmp	r0, r9
893		bls	0b
894		mov	pc, lr
895ENDPROC(__setup_mmu)
896
897@ Enable unaligned access on v6, to allow better code generation
898@ for the decompressor C code:
899__armv6_mmu_cache_on:
900		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
901		bic	r0, r0, #2		@ A (no unaligned access fault)
902		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
903		mcr	p15, 0, r0, c1, c0, 0	@ write SCTLR
904		b	__armv4_mmu_cache_on
905
906__arm926ejs_mmu_cache_on:
907#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
908		mov	r0, #4			@ put dcache in WT mode
909		mcr	p15, 7, r0, c15, c0, 0
910#endif
911
912__armv4_mmu_cache_on:
913		mov	r12, lr
914#ifdef CONFIG_MMU
915		mov	r6, #CB_BITS | 0x12	@ U
916		bl	__setup_mmu
917		mov	r0, #0
918		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
919		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
920		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
921		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
922		orr	r0, r0, #0x0030
923 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
924		bl	__common_mmu_cache_on
925		mov	r0, #0
926		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
927#endif
928		mov	pc, r12
929
930__armv7_mmu_cache_on:
931		enable_cp15_barriers	r11
932		mov	r12, lr
933#ifdef CONFIG_MMU
934		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
935		tst	r11, #0xf		@ VMSA
936		movne	r6, #CB_BITS | 0x02	@ !XN
937		blne	__setup_mmu
938		mov	r0, #0
939		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
940		tst	r11, #0xf		@ VMSA
941		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
942#endif
943		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
944		bic	r0, r0, #1 << 28	@ clear SCTLR.TRE
945		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
946		orr	r0, r0, #0x003c		@ write buffer
947		bic	r0, r0, #2		@ A (no unaligned access fault)
948		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
949						@ (needed for ARM1176)
950#ifdef CONFIG_MMU
951 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
952		mrcne   p15, 0, r6, c2, c0, 2   @ read ttb control reg
953		orrne	r0, r0, #1		@ MMU enabled
954		movne	r1, #0xfffffffd		@ domain 0 = client
955		bic     r6, r6, #1 << 31        @ 32-bit translation system
956		bic     r6, r6, #(7 << 0) | (1 << 4)	@ use only ttbr0
957		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
958		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
959		mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
960#endif
961		mcr	p15, 0, r0, c7, c5, 4	@ ISB
962		mcr	p15, 0, r0, c1, c0, 0	@ load control register
963		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
964		mov	r0, #0
965		mcr	p15, 0, r0, c7, c5, 4	@ ISB
966		mov	pc, r12
967
968__fa526_cache_on:
969		mov	r12, lr
970		mov	r6, #CB_BITS | 0x12	@ U
971		bl	__setup_mmu
972		mov	r0, #0
973		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
974		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
975		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
976		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
977		orr	r0, r0, #0x1000		@ I-cache enable
978		bl	__common_mmu_cache_on
979		mov	r0, #0
980		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
981		mov	pc, r12
982
983__common_mmu_cache_on:
984#ifndef CONFIG_THUMB2_KERNEL
985#ifndef DEBUG
986		orr	r0, r0, #0x000d		@ Write buffer, mmu
987#endif
988		mov	r1, #-1
989		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
990		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
991		b	1f
992		.align	5			@ cache line aligned
9931:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
994		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
995		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
996#endif
997
998#define PROC_ENTRY_SIZE (4*5)
999
1000/*
1001 * Here follow the relocatable cache support functions for the
1002 * various processors.  This is a generic hook for locating an
1003 * entry and jumping to an instruction at the specified offset
1004 * from the start of the block.  Please note this is all position
1005 * independent code.
1006 *
1007 *  r1  = corrupted
1008 *  r2  = corrupted
1009 *  r3  = block offset
1010 *  r9  = corrupted
1011 *  r12 = corrupted
1012 */
1013
1014call_cache_fn:	adr	r12, proc_types
1015#ifdef CONFIG_CPU_CP15
1016		mrc	p15, 0, r9, c0, c0	@ get processor ID
1017#elif defined(CONFIG_CPU_V7M)
1018		/*
1019		 * On v7-M the processor id is located in the V7M_SCB_CPUID
1020		 * register, but as cache handling is IMPLEMENTATION DEFINED on
1021		 * v7-M (if existant at all) we just return early here.
1022		 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
1023		 * __armv7_mmu_cache_{on,off,flush}) would be selected which
1024		 * use cp15 registers that are not implemented on v7-M.
1025		 */
1026		bx	lr
1027#else
1028		ldr	r9, =CONFIG_PROCESSOR_ID
1029#endif
10301:		ldr	r1, [r12, #0]		@ get value
1031		ldr	r2, [r12, #4]		@ get mask
1032		eor	r1, r1, r9		@ (real ^ match)
1033		tst	r1, r2			@       & mask
1034 ARM(		addeq	pc, r12, r3		) @ call cache function
1035 THUMB(		addeq	r12, r3			)
1036 THUMB(		moveq	pc, r12			) @ call cache function
1037		add	r12, r12, #PROC_ENTRY_SIZE
1038		b	1b
1039
1040/*
1041 * Table for cache operations.  This is basically:
1042 *   - CPU ID match
1043 *   - CPU ID mask
1044 *   - 'cache on' method instruction
1045 *   - 'cache off' method instruction
1046 *   - 'cache flush' method instruction
1047 *
1048 * We match an entry using: ((real_id ^ match) & mask) == 0
1049 *
1050 * Writethrough caches generally only need 'on' and 'off'
1051 * methods.  Writeback caches _must_ have the flush method
1052 * defined.
1053 */
1054		.align	2
1055		.type	proc_types,#object
1056proc_types:
1057		.word	0x41000000		@ old ARM ID
1058		.word	0xff00f000
1059		mov	pc, lr
1060 THUMB(		nop				)
1061		mov	pc, lr
1062 THUMB(		nop				)
1063		mov	pc, lr
1064 THUMB(		nop				)
1065
1066		.word	0x41007000		@ ARM7/710
1067		.word	0xfff8fe00
1068		mov	pc, lr
1069 THUMB(		nop				)
1070		mov	pc, lr
1071 THUMB(		nop				)
1072		mov	pc, lr
1073 THUMB(		nop				)
1074
1075		.word	0x41807200		@ ARM720T (writethrough)
1076		.word	0xffffff00
1077		W(b)	__armv4_mmu_cache_on
1078		W(b)	__armv4_mmu_cache_off
1079		mov	pc, lr
1080 THUMB(		nop				)
1081
1082		.word	0x41007400		@ ARM74x
1083		.word	0xff00ff00
1084		W(b)	__armv3_mpu_cache_on
1085		W(b)	__armv3_mpu_cache_off
1086		W(b)	__armv3_mpu_cache_flush
1087
1088		.word	0x41009400		@ ARM94x
1089		.word	0xff00ff00
1090		W(b)	__armv4_mpu_cache_on
1091		W(b)	__armv4_mpu_cache_off
1092		W(b)	__armv4_mpu_cache_flush
1093
1094		.word	0x41069260		@ ARM926EJ-S (v5TEJ)
1095		.word	0xff0ffff0
1096		W(b)	__arm926ejs_mmu_cache_on
1097		W(b)	__armv4_mmu_cache_off
1098		W(b)	__armv5tej_mmu_cache_flush
1099
1100		.word	0x00007000		@ ARM7 IDs
1101		.word	0x0000f000
1102		mov	pc, lr
1103 THUMB(		nop				)
1104		mov	pc, lr
1105 THUMB(		nop				)
1106		mov	pc, lr
1107 THUMB(		nop				)
1108
1109		@ Everything from here on will be the new ID system.
1110
1111		.word	0x4401a100		@ sa110 / sa1100
1112		.word	0xffffffe0
1113		W(b)	__armv4_mmu_cache_on
1114		W(b)	__armv4_mmu_cache_off
1115		W(b)	__armv4_mmu_cache_flush
1116
1117		.word	0x6901b110		@ sa1110
1118		.word	0xfffffff0
1119		W(b)	__armv4_mmu_cache_on
1120		W(b)	__armv4_mmu_cache_off
1121		W(b)	__armv4_mmu_cache_flush
1122
1123		.word	0x56056900
1124		.word	0xffffff00		@ PXA9xx
1125		W(b)	__armv4_mmu_cache_on
1126		W(b)	__armv4_mmu_cache_off
1127		W(b)	__armv4_mmu_cache_flush
1128
1129		.word	0x56158000		@ PXA168
1130		.word	0xfffff000
1131		W(b)	__armv4_mmu_cache_on
1132		W(b)	__armv4_mmu_cache_off
1133		W(b)	__armv5tej_mmu_cache_flush
1134
1135		.word	0x56050000		@ Feroceon
1136		.word	0xff0f0000
1137		W(b)	__armv4_mmu_cache_on
1138		W(b)	__armv4_mmu_cache_off
1139		W(b)	__armv5tej_mmu_cache_flush
1140
1141#ifdef CONFIG_CPU_FEROCEON_OLD_ID
1142		/* this conflicts with the standard ARMv5TE entry */
1143		.long	0x41009260		@ Old Feroceon
1144		.long	0xff00fff0
1145		b	__armv4_mmu_cache_on
1146		b	__armv4_mmu_cache_off
1147		b	__armv5tej_mmu_cache_flush
1148#endif
1149
1150		.word	0x66015261		@ FA526
1151		.word	0xff01fff1
1152		W(b)	__fa526_cache_on
1153		W(b)	__armv4_mmu_cache_off
1154		W(b)	__fa526_cache_flush
1155
1156		@ These match on the architecture ID
1157
1158		.word	0x00020000		@ ARMv4T
1159		.word	0x000f0000
1160		W(b)	__armv4_mmu_cache_on
1161		W(b)	__armv4_mmu_cache_off
1162		W(b)	__armv4_mmu_cache_flush
1163
1164		.word	0x00050000		@ ARMv5TE
1165		.word	0x000f0000
1166		W(b)	__armv4_mmu_cache_on
1167		W(b)	__armv4_mmu_cache_off
1168		W(b)	__armv4_mmu_cache_flush
1169
1170		.word	0x00060000		@ ARMv5TEJ
1171		.word	0x000f0000
1172		W(b)	__armv4_mmu_cache_on
1173		W(b)	__armv4_mmu_cache_off
1174		W(b)	__armv5tej_mmu_cache_flush
1175
1176		.word	0x0007b000		@ ARMv6
1177		.word	0x000ff000
1178		W(b)	__armv6_mmu_cache_on
1179		W(b)	__armv4_mmu_cache_off
1180		W(b)	__armv6_mmu_cache_flush
1181
1182		.word	0x000f0000		@ new CPU Id
1183		.word	0x000f0000
1184		W(b)	__armv7_mmu_cache_on
1185		W(b)	__armv7_mmu_cache_off
1186		W(b)	__armv7_mmu_cache_flush
1187
1188		.word	0			@ unrecognised type
1189		.word	0
1190		mov	pc, lr
1191 THUMB(		nop				)
1192		mov	pc, lr
1193 THUMB(		nop				)
1194		mov	pc, lr
1195 THUMB(		nop				)
1196
1197		.size	proc_types, . - proc_types
1198
1199		/*
1200		 * If you get a "non-constant expression in ".if" statement"
1201		 * error from the assembler on this line, check that you have
1202		 * not accidentally written a "b" instruction where you should
1203		 * have written W(b).
1204		 */
1205		.if (. - proc_types) % PROC_ENTRY_SIZE != 0
1206		.error "The size of one or more proc_types entries is wrong."
1207		.endif
1208
1209/*
1210 * Turn off the Cache and MMU.  ARMv3 does not support
1211 * reading the control register, but ARMv4 does.
1212 *
1213 * On exit,
1214 *  r0, r1, r2, r3, r9, r12 corrupted
1215 * This routine must preserve:
1216 *  r4, r7, r8
1217 */
1218		.align	5
1219cache_off:	mov	r3, #12			@ cache_off function
1220		b	call_cache_fn
1221
1222__armv4_mpu_cache_off:
1223		mrc	p15, 0, r0, c1, c0
1224		bic	r0, r0, #0x000d
1225		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
1226		mov	r0, #0
1227		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
1228		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
1229		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
1230		mov	pc, lr
1231
1232__armv3_mpu_cache_off:
1233		mrc	p15, 0, r0, c1, c0
1234		bic	r0, r0, #0x000d
1235		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
1236		mov	r0, #0
1237		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
1238		mov	pc, lr
1239
1240__armv4_mmu_cache_off:
1241#ifdef CONFIG_MMU
1242		mrc	p15, 0, r0, c1, c0
1243		bic	r0, r0, #0x000d
1244		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1245		mov	r0, #0
1246		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
1247		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
1248#endif
1249		mov	pc, lr
1250
1251__armv7_mmu_cache_off:
1252		mrc	p15, 0, r0, c1, c0
1253#ifdef CONFIG_MMU
1254		bic	r0, r0, #0x0005
1255#else
1256		bic	r0, r0, #0x0004
1257#endif
1258		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1259		mov	r0, #0
1260#ifdef CONFIG_MMU
1261		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
1262#endif
1263		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
1264		mcr	p15, 0, r0, c7, c10, 4	@ DSB
1265		mcr	p15, 0, r0, c7, c5, 4	@ ISB
1266		mov	pc, lr
1267
1268/*
1269 * Clean and flush the cache to maintain consistency.
1270 *
1271 * On entry,
1272 *  r0 = start address
1273 *  r1 = end address (exclusive)
1274 * On exit,
1275 *  r1, r2, r3, r9, r10, r11, r12 corrupted
1276 * This routine must preserve:
1277 *  r4, r6, r7, r8
1278 */
1279		.align	5
1280cache_clean_flush:
1281		mov	r3, #16
1282		mov	r11, r1
1283		b	call_cache_fn
1284
1285__armv4_mpu_cache_flush:
1286		tst	r4, #1
1287		movne	pc, lr
1288		mov	r2, #1
1289		mov	r3, #0
1290		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
1291		mov	r1, #7 << 5		@ 8 segments
12921:		orr	r3, r1, #63 << 26	@ 64 entries
12932:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
1294		subs	r3, r3, #1 << 26
1295		bcs	2b			@ entries 63 to 0
1296		subs 	r1, r1, #1 << 5
1297		bcs	1b			@ segments 7 to 0
1298
1299		teq	r2, #0
1300		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
1301		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
1302		mov	pc, lr
1303
1304__fa526_cache_flush:
1305		tst	r4, #1
1306		movne	pc, lr
1307		mov	r1, #0
1308		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
1309		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1310		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1311		mov	pc, lr
1312
1313__armv6_mmu_cache_flush:
1314		mov	r1, #0
1315		tst	r4, #1
1316		mcreq	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
1317		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
1318		mcreq	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
1319		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1320		mov	pc, lr
1321
1322__armv7_mmu_cache_flush:
1323		enable_cp15_barriers	r10
1324		tst	r4, #1
1325		bne	iflush
1326		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
1327		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
1328		mov	r10, #0
1329		beq	hierarchical
1330		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
1331		b	iflush
1332hierarchical:
1333		dcache_line_size r1, r2		@ r1 := dcache min line size
1334		sub	r2, r1, #1		@ r2 := line size mask
1335		bic	r0, r0, r2		@ round down start to line size
1336		sub	r11, r11, #1		@ end address is exclusive
1337		bic	r11, r11, r2		@ round down end to line size
13380:		cmp	r0, r11			@ finished?
1339		bgt	iflush
1340		mcr	p15, 0, r0, c7, c14, 1	@ Dcache clean/invalidate by VA
1341		add	r0, r0, r1
1342		b	0b
1343iflush:
1344		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1345		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
1346		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1347		mcr	p15, 0, r10, c7, c5, 4	@ ISB
1348		mov	pc, lr
1349
1350__armv5tej_mmu_cache_flush:
1351		tst	r4, #1
1352		movne	pc, lr
13531:		mrc	p15, 0, APSR_nzcv, c7, c14, 3	@ test,clean,invalidate D cache
1354		bne	1b
1355		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
1356		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
1357		mov	pc, lr
1358
1359__armv4_mmu_cache_flush:
1360		tst	r4, #1
1361		movne	pc, lr
1362		mov	r2, #64*1024		@ default: 32K dcache size (*2)
1363		mov	r11, #32		@ default: 32 byte line size
1364		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
1365		teq	r3, r9			@ cache ID register present?
1366		beq	no_cache_id
1367		mov	r1, r3, lsr #18
1368		and	r1, r1, #7
1369		mov	r2, #1024
1370		mov	r2, r2, lsl r1		@ base dcache size *2
1371		tst	r3, #1 << 14		@ test M bit
1372		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
1373		mov	r3, r3, lsr #12
1374		and	r3, r3, #3
1375		mov	r11, #8
1376		mov	r11, r11, lsl r3	@ cache line size in bytes
1377no_cache_id:
1378		mov	r1, pc
1379		bic	r1, r1, #63		@ align to longest cache line
1380		add	r2, r1, r2
13811:
1382 ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
1383 THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
1384 THUMB(		add     r1, r1, r11		)
1385		teq	r1, r2
1386		bne	1b
1387
1388		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1389		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
1390		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1391		mov	pc, lr
1392
1393__armv3_mmu_cache_flush:
1394__armv3_mpu_cache_flush:
1395		tst	r4, #1
1396		movne	pc, lr
1397		mov	r1, #0
1398		mcr	p15, 0, r1, c7, c0, 0	@ invalidate whole cache v3
1399		mov	pc, lr
1400
1401/*
1402 * Various debugging routines for printing hex characters and
1403 * memory, which again must be relocatable.
1404 */
1405#ifdef DEBUG
1406		.align	2
1407		.type	phexbuf,#object
1408phexbuf:	.space	12
1409		.size	phexbuf, . - phexbuf
1410
1411@ phex corrupts {r0, r1, r2, r3}
1412phex:		adr	r3, phexbuf
1413		mov	r2, #0
1414		strb	r2, [r3, r1]
14151:		subs	r1, r1, #1
1416		movmi	r0, r3
1417		bmi	puts
1418		and	r2, r0, #15
1419		mov	r0, r0, lsr #4
1420		cmp	r2, #10
1421		addge	r2, r2, #7
1422		add	r2, r2, #'0'
1423		strb	r2, [r3, r1]
1424		b	1b
1425
1426@ puts corrupts {r0, r1, r2, r3}
1427puts:		loadsp	r3, r2, r1
14281:		ldrb	r2, [r0], #1
1429		teq	r2, #0
1430		moveq	pc, lr
14312:		writeb	r2, r3, r1
1432		mov	r1, #0x00020000
14333:		subs	r1, r1, #1
1434		bne	3b
1435		teq	r2, #'\n'
1436		moveq	r2, #'\r'
1437		beq	2b
1438		teq	r0, #0
1439		bne	1b
1440		mov	pc, lr
1441@ putc corrupts {r0, r1, r2, r3}
1442putc:
1443		mov	r2, r0
1444		loadsp	r3, r1, r0
1445		mov	r0, #0
1446		b	2b
1447
1448@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
1449memdump:	mov	r12, r0
1450		mov	r10, lr
1451		mov	r11, #0
14522:		mov	r0, r11, lsl #2
1453		add	r0, r0, r12
1454		mov	r1, #8
1455		bl	phex
1456		mov	r0, #':'
1457		bl	putc
14581:		mov	r0, #' '
1459		bl	putc
1460		ldr	r0, [r12, r11, lsl #2]
1461		mov	r1, #8
1462		bl	phex
1463		and	r0, r11, #7
1464		teq	r0, #3
1465		moveq	r0, #' '
1466		bleq	putc
1467		and	r0, r11, #7
1468		add	r11, r11, #1
1469		teq	r0, #7
1470		bne	1b
1471		mov	r0, #'\n'
1472		bl	putc
1473		cmp	r11, #64
1474		blt	2b
1475		mov	pc, r10
1476#endif
1477
1478		.ltorg
1479
1480#ifdef CONFIG_ARM_VIRT_EXT
1481.align 5
1482__hyp_reentry_vectors:
1483		W(b)	.			@ reset
1484		W(b)	.			@ undef
1485#ifdef CONFIG_EFI_STUB
1486		W(b)	__enter_kernel_from_hyp	@ hvc from HYP
1487#else
1488		W(b)	.			@ svc
1489#endif
1490		W(b)	.			@ pabort
1491		W(b)	.			@ dabort
1492		W(b)	__enter_kernel		@ hyp
1493		W(b)	.			@ irq
1494		W(b)	.			@ fiq
1495#endif /* CONFIG_ARM_VIRT_EXT */
1496
1497__enter_kernel:
1498		mov	r0, #0			@ must be 0
1499		mov	r1, r7			@ restore architecture number
1500		mov	r2, r8			@ restore atags pointer
1501#ifdef CONFIG_RANDOMIZE_BASE
1502		ldr	r3, __kaslr_offset
1503		add	r4, r4, #4		@ skip first instruction
1504#endif
1505 ARM(		mov	pc, r4		)	@ call kernel
1506 M_CLASS(	add	r4, r4, #1	)	@ enter in Thumb mode for M class
1507 THUMB(		bx	r4		)	@ entry point is always ARM for A/R classes
1508
1509#ifdef CONFIG_RANDOMIZE_BASE
1510		/*
1511		 * Minimal implementation of CRC-16 that does not use a
1512		 * lookup table and uses 32-bit wide loads, so it still
1513		 * performs reasonably well with the D-cache off. Equivalent
1514		 * to lib/crc16.c for input sizes that are 4 byte multiples.
1515		 */
1516ENTRY(__crc16)
1517		push	{r4, lr}
1518		ldr	r3, =0xa001     @ CRC-16 polynomial
15190:		subs	r2, r2, #4
1520		popmi	{r4, pc}
1521		ldr	r4, [r1], #4
1522#ifdef __ARMEB__
1523		eor	ip, r4, r4, ror #16     @ endian swap
1524		bic	ip, ip, #0x00ff0000
1525		mov	r4, r4, ror #8
1526		eor	r4, r4, ip, lsr #8
1527#endif
1528		eor	r0, r0, r4
1529		.rept	32
1530		lsrs	r0, r0, #1
1531		eorcs	r0, r0, r3
1532		.endr
1533		b	0b
1534ENDPROC(__crc16)
1535
1536		.align	2
1537__kaslr_seed:	.long	0
1538__kaslr_offset:	.long	0
1539#endif
1540
1541reloc_code_end:
1542
1543#ifdef CONFIG_EFI_STUB
1544__enter_kernel_from_hyp:
1545		mrc	p15, 4, r0, c1, c0, 0	@ read HSCTLR
1546		bic	r0, r0, #0x5		@ disable MMU and caches
1547		mcr	p15, 4, r0, c1, c0, 0	@ write HSCTLR
1548		isb
1549		b	__enter_kernel
1550
1551ENTRY(efi_enter_kernel)
1552		mov	r4, r0			@ preserve image base
1553		mov	r8, r1			@ preserve DT pointer
1554
1555		adr_l	r0, call_cache_fn
1556		adr	r1, 0f			@ clean the region of code we
1557		bl	cache_clean_flush	@ may run with the MMU off
1558
1559#ifdef CONFIG_ARM_VIRT_EXT
1560		@
1561		@ The EFI spec does not support booting on ARM in HYP mode,
1562		@ since it mandates that the MMU and caches are on, with all
1563		@ 32-bit addressable DRAM mapped 1:1 using short descriptors.
1564		@
1565		@ While the EDK2 reference implementation adheres to this,
1566		@ U-Boot might decide to enter the EFI stub in HYP mode
1567		@ anyway, with the MMU and caches either on or off.
1568		@
1569		mrs	r0, cpsr		@ get the current mode
1570		msr	spsr_cxsf, r0		@ record boot mode
1571		and	r0, r0, #MODE_MASK	@ are we running in HYP mode?
1572		cmp	r0, #HYP_MODE
1573		bne	.Lefi_svc
1574
1575		mrc	p15, 4, r1, c1, c0, 0	@ read HSCTLR
1576		tst	r1, #0x1		@ MMU enabled at HYP?
1577		beq	1f
1578
1579		@
1580		@ When running in HYP mode with the caches on, we're better
1581		@ off just carrying on using the cached 1:1 mapping that the
1582		@ firmware provided. Set up the HYP vectors so HVC instructions
1583		@ issued from HYP mode take us to the correct handler code. We
1584		@ will disable the MMU before jumping to the kernel proper.
1585		@
1586 ARM(		bic	r1, r1, #(1 << 30)	) @ clear HSCTLR.TE
1587 THUMB(		orr	r1, r1, #(1 << 30)	) @ set HSCTLR.TE
1588		mcr	p15, 4, r1, c1, c0, 0
1589		adr	r0, __hyp_reentry_vectors
1590		mcr	p15, 4, r0, c12, c0, 0	@ set HYP vector base (HVBAR)
1591		isb
1592		b	.Lefi_hyp
1593
1594		@
1595		@ When running in HYP mode with the caches off, we need to drop
1596		@ into SVC mode now, and let the decompressor set up its cached
1597		@ 1:1 mapping as usual.
1598		@
15991:		mov	r9, r4			@ preserve image base
1600		bl	__hyp_stub_install	@ install HYP stub vectors
1601		safe_svcmode_maskall	r1	@ drop to SVC mode
1602		msr	spsr_cxsf, r0		@ record boot mode
1603		orr	r4, r9, #1		@ restore image base and set LSB
1604		b	.Lefi_hyp
1605.Lefi_svc:
1606#endif
1607		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
1608		tst	r0, #0x1		@ MMU enabled?
1609		orreq	r4, r4, #1		@ set LSB if not
1610
1611.Lefi_hyp:
1612		mov	r0, r8			@ DT start
1613		add	r1, r8, r2		@ DT end
1614		bl	cache_clean_flush
1615
1616		adr	r0, 0f			@ switch to our stack
1617		ldr	sp, [r0]
1618		add	sp, sp, r0
1619
1620		mov	r5, #0			@ appended DTB size
1621		mov	r7, #0xFFFFFFFF		@ machine ID
1622		b	wont_overwrite
1623ENDPROC(efi_enter_kernel)
16240:		.long	.L_user_stack_end - .
1625#endif
1626
1627		.align
1628		.section ".stack", "aw", %nobits
1629.L_user_stack:	.space	4096
1630.L_user_stack_end:
1631