• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright © 2008 Mozilla Corporation
3 * Copyright © 2010 Nokia Corporation
4 *
5 * Permission to use, copy, modify, distribute, and sell this software and its
6 * documentation for any purpose is hereby granted without fee, provided that
7 * the above copyright notice appear in all copies and that both that
8 * copyright notice and this permission notice appear in supporting
9 * documentation, and that the name of Mozilla Corporation not be used in
10 * advertising or publicity pertaining to distribution of the software without
11 * specific, written prior permission.  Mozilla Corporation makes no
12 * representations about the suitability of this software for any purpose.  It
13 * is provided "as is" without express or implied warranty.
14 *
15 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
16 * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17 * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
20 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
21 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
22 * SOFTWARE.
23 *
24 * Author:  Jeff Muizelaar (jeff@infidigm.net)
25 *
26 */
27
28/* Prevent the stack from becoming executable */
29#if defined(__linux__) && defined(__ELF__)
30.section .note.GNU-stack,"",%progbits
31#endif
32
33	.text
34	.arch armv6
35	.object_arch armv4
36	.arm
37	.altmacro
38	.p2align 2
39
40#include "pixman-arm-asm.h"
41
42/*
43 * Note: This code is only using armv5te instructions (not even armv6),
44 *       but is scheduled for ARM Cortex-A8 pipeline. So it might need to
45 *       be split into a few variants, tuned for each microarchitecture.
46 *
47 * TODO: In order to get good performance on ARM9/ARM11 cores (which don't
48 * have efficient write combining), it needs to be changed to use 16-byte
49 * aligned writes using STM instruction.
50 *
51 * Nearest scanline scaler macro template uses the following arguments:
52 *  fname                     - name of the function to generate
53 *  bpp_shift                 - (1 << bpp_shift) is the size of pixel in bytes
54 *  t                         - type suffix for LDR/STR instructions
55 *  prefetch_distance         - prefetch in the source image by that many
56 *                              pixels ahead
57 *  prefetch_braking_distance - stop prefetching when that many pixels are
58 *                              remaining before the end of scanline
59 */
60
61.macro generate_nearest_scanline_func fname, bpp_shift, t,      \
62                                      prefetch_distance,        \
63                                      prefetch_braking_distance
64
65pixman_asm_function fname
66	W		.req	r0
67	DST		.req	r1
68	SRC		.req	r2
69	VX		.req	r3
70	UNIT_X		.req	ip
71	TMP1		.req	r4
72	TMP2		.req	r5
73	VXMASK		.req	r6
74	PF_OFFS		.req	r7
75	SRC_WIDTH_FIXED	.req	r8
76
77	ldr	UNIT_X, [sp]
78	push	{r4, r5, r6, r7, r8, r10}
79	mvn	VXMASK, #((1 << bpp_shift) - 1)
80	ldr	SRC_WIDTH_FIXED, [sp, #28]
81
82	/* define helper macro */
83	.macro	scale_2_pixels
84		ldr&t	TMP1, [SRC, TMP1]
85		and	TMP2, VXMASK, VX, asr #(16 - bpp_shift)
86		adds	VX, VX, UNIT_X
87		str&t	TMP1, [DST], #(1 << bpp_shift)
889:		subpls	VX, VX, SRC_WIDTH_FIXED
89		bpl	9b
90
91		ldr&t	TMP2, [SRC, TMP2]
92		and	TMP1, VXMASK, VX, asr #(16 - bpp_shift)
93		adds	VX, VX, UNIT_X
94		str&t	TMP2, [DST], #(1 << bpp_shift)
959:		subpls	VX, VX, SRC_WIDTH_FIXED
96		bpl	9b
97	.endm
98
99	/* now do the scaling */
100	and	TMP1, VXMASK, VX, asr #(16 - bpp_shift)
101	adds	VX, VX, UNIT_X
1029:	subpls	VX, VX, SRC_WIDTH_FIXED
103	bpl	9b
104	subs	W, W, #(8 + prefetch_braking_distance)
105	blt	2f
106	/* calculate prefetch offset */
107	mov	PF_OFFS, #prefetch_distance
108	mla	PF_OFFS, UNIT_X, PF_OFFS, VX
1091:	/* main loop, process 8 pixels per iteration with prefetch */
110	pld	[SRC, PF_OFFS, asr #(16 - bpp_shift)]
111	add	PF_OFFS, UNIT_X, lsl #3
112	scale_2_pixels
113	scale_2_pixels
114	scale_2_pixels
115	scale_2_pixels
116	subs	W, W, #8
117	bge	1b
1182:
119	subs	W, W, #(4 - 8 - prefetch_braking_distance)
120	blt	2f
1211:	/* process the remaining pixels */
122	scale_2_pixels
123	scale_2_pixels
124	subs	W, W, #4
125	bge	1b
1262:
127	tst	W, #2
128	beq	2f
129	scale_2_pixels
1302:
131	tst	W, #1
132	ldrne&t	TMP1, [SRC, TMP1]
133	strne&t	TMP1, [DST]
134	/* cleanup helper macro */
135	.purgem	scale_2_pixels
136	.unreq	DST
137	.unreq	SRC
138	.unreq	W
139	.unreq	VX
140	.unreq	UNIT_X
141	.unreq	TMP1
142	.unreq	TMP2
143	.unreq	VXMASK
144	.unreq	PF_OFFS
145	.unreq  SRC_WIDTH_FIXED
146	/* return */
147	pop	{r4, r5, r6, r7, r8, r10}
148	bx	lr
149.endfunc
150.endm
151
152generate_nearest_scanline_func \
153    pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
154
155generate_nearest_scanline_func \
156    pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2,  , 48, 32
157