1/* 2 * Copyright © 2008 Mozilla Corporation 3 * Copyright © 2010 Nokia Corporation 4 * 5 * Permission to use, copy, modify, distribute, and sell this software and its 6 * documentation for any purpose is hereby granted without fee, provided that 7 * the above copyright notice appear in all copies and that both that 8 * copyright notice and this permission notice appear in supporting 9 * documentation, and that the name of Mozilla Corporation not be used in 10 * advertising or publicity pertaining to distribution of the software without 11 * specific, written prior permission. Mozilla Corporation makes no 12 * representations about the suitability of this software for any purpose. It 13 * is provided "as is" without express or implied warranty. 14 * 15 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS 16 * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 17 * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY 18 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN 20 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING 21 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 22 * SOFTWARE. 23 * 24 * Author: Jeff Muizelaar (jeff@infidigm.net) 25 * 26 */ 27 28/* Prevent the stack from becoming executable */ 29#if defined(__linux__) && defined(__ELF__) 30.section .note.GNU-stack,"",%progbits 31#endif 32 33 .text 34 .arch armv6 35 .object_arch armv4 36 .arm 37 .altmacro 38 .p2align 2 39 40#include "pixman-arm-asm.h" 41 42/* 43 * Note: This code is only using armv5te instructions (not even armv6), 44 * but is scheduled for ARM Cortex-A8 pipeline. So it might need to 45 * be split into a few variants, tuned for each microarchitecture. 46 * 47 * TODO: In order to get good performance on ARM9/ARM11 cores (which don't 48 * have efficient write combining), it needs to be changed to use 16-byte 49 * aligned writes using STM instruction. 50 * 51 * Nearest scanline scaler macro template uses the following arguments: 52 * fname - name of the function to generate 53 * bpp_shift - (1 << bpp_shift) is the size of pixel in bytes 54 * t - type suffix for LDR/STR instructions 55 * prefetch_distance - prefetch in the source image by that many 56 * pixels ahead 57 * prefetch_braking_distance - stop prefetching when that many pixels are 58 * remaining before the end of scanline 59 */ 60 61.macro generate_nearest_scanline_func fname, bpp_shift, t, \ 62 prefetch_distance, \ 63 prefetch_braking_distance 64 65pixman_asm_function fname 66 W .req r0 67 DST .req r1 68 SRC .req r2 69 VX .req r3 70 UNIT_X .req ip 71 TMP1 .req r4 72 TMP2 .req r5 73 VXMASK .req r6 74 PF_OFFS .req r7 75 SRC_WIDTH_FIXED .req r8 76 77 ldr UNIT_X, [sp] 78 push {r4, r5, r6, r7, r8, r10} 79 mvn VXMASK, #((1 << bpp_shift) - 1) 80 ldr SRC_WIDTH_FIXED, [sp, #28] 81 82 /* define helper macro */ 83 .macro scale_2_pixels 84 ldr&t TMP1, [SRC, TMP1] 85 and TMP2, VXMASK, VX, asr #(16 - bpp_shift) 86 adds VX, VX, UNIT_X 87 str&t TMP1, [DST], #(1 << bpp_shift) 889: subpls VX, VX, SRC_WIDTH_FIXED 89 bpl 9b 90 91 ldr&t TMP2, [SRC, TMP2] 92 and TMP1, VXMASK, VX, asr #(16 - bpp_shift) 93 adds VX, VX, UNIT_X 94 str&t TMP2, [DST], #(1 << bpp_shift) 959: subpls VX, VX, SRC_WIDTH_FIXED 96 bpl 9b 97 .endm 98 99 /* now do the scaling */ 100 and TMP1, VXMASK, VX, asr #(16 - bpp_shift) 101 adds VX, VX, UNIT_X 1029: subpls VX, VX, SRC_WIDTH_FIXED 103 bpl 9b 104 subs W, W, #(8 + prefetch_braking_distance) 105 blt 2f 106 /* calculate prefetch offset */ 107 mov PF_OFFS, #prefetch_distance 108 mla PF_OFFS, UNIT_X, PF_OFFS, VX 1091: /* main loop, process 8 pixels per iteration with prefetch */ 110 pld [SRC, PF_OFFS, asr #(16 - bpp_shift)] 111 add PF_OFFS, UNIT_X, lsl #3 112 scale_2_pixels 113 scale_2_pixels 114 scale_2_pixels 115 scale_2_pixels 116 subs W, W, #8 117 bge 1b 1182: 119 subs W, W, #(4 - 8 - prefetch_braking_distance) 120 blt 2f 1211: /* process the remaining pixels */ 122 scale_2_pixels 123 scale_2_pixels 124 subs W, W, #4 125 bge 1b 1262: 127 tst W, #2 128 beq 2f 129 scale_2_pixels 1302: 131 tst W, #1 132 ldrne&t TMP1, [SRC, TMP1] 133 strne&t TMP1, [DST] 134 /* cleanup helper macro */ 135 .purgem scale_2_pixels 136 .unreq DST 137 .unreq SRC 138 .unreq W 139 .unreq VX 140 .unreq UNIT_X 141 .unreq TMP1 142 .unreq TMP2 143 .unreq VXMASK 144 .unreq PF_OFFS 145 .unreq SRC_WIDTH_FIXED 146 /* return */ 147 pop {r4, r5, r6, r7, r8, r10} 148 bx lr 149.endfunc 150.endm 151 152generate_nearest_scanline_func \ 153 pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32 154 155generate_nearest_scanline_func \ 156 pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2, , 48, 32 157