1/* 2 * Copyright (C) 2013 The Android Open Source Project 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in 12 * the documentation and/or other materials provided with the 13 * distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 .text 29 .align 30 31 .global scanline_t32cb16blend_arm64 32 33/* 34 * .macro pixel 35 * 36 * This macro alpha blends RGB565 original pixel located in either 37 * top or bottom 16 bits of DREG register with SRC 32 bit pixel value 38 * and writes the result to FB register 39 * 40 * \DREG is a 32-bit register containing *two* original destination RGB565 41 * pixels, with the even one in the low-16 bits, and the odd one in the 42 * high 16 bits. 43 * 44 * \SRC is a 32-bit 0xAABBGGRR pixel value, with pre-multiplied colors. 45 * 46 * \FB is a target register that will contain the blended pixel values. 47 * 48 * \ODD is either 0 or 1 and indicates if we're blending the lower or 49 * upper 16-bit pixels in DREG into FB 50 * 51 * 52 * clobbered: w6, w7, w16, w17, w18 53 * 54 */ 55 56.macro pixel, DREG, SRC, FB, ODD 57 58 // SRC = 0xAABBGGRR 59 lsr w7, \SRC, #24 // sA 60 add w7, w7, w7, lsr #7 // sA + (sA >> 7) 61 mov w6, #0x100 62 sub w7, w6, w7 // sA = 0x100 - (sA+(sA>>7)) 63 641: 65 66.if \ODD //Blending odd pixel present in top 16 bits of DREG register 67 68 // red 69 lsr w16, \DREG, #(16 + 11) 70 mul w16, w7, w16 71 lsr w6, \SRC, #3 72 and w6, w6, #0x1F 73 add w16, w6, w16, lsr #8 74 cmp w16, #0x1F 75 orr w17, \FB, #(0x1F<<(16 + 11)) 76 orr w18, \FB, w16, lsl #(16 + 11) 77 csel \FB, w17, w18, hi 78 // green 79 and w6, \DREG, #(0x3F<<(16 + 5)) 80 lsr w17,w6,#(16+5) 81 mul w6, w7, w17 82 lsr w16, \SRC, #(8+2) 83 and w16, w16, #0x3F 84 add w6, w16, w6, lsr #8 85 cmp w6, #0x3F 86 orr w17, \FB, #(0x3F<<(16 + 5)) 87 orr w18, \FB, w6, lsl #(16 + 5) 88 csel \FB, w17, w18, hi 89 // blue 90 and w16, \DREG, #(0x1F << 16) 91 lsr w17,w16,#16 92 mul w16, w7, w17 93 lsr w6, \SRC, #(8+8+3) 94 and w6, w6, #0x1F 95 add w16, w6, w16, lsr #8 96 cmp w16, #0x1F 97 orr w17, \FB, #(0x1F << 16) 98 orr w18, \FB, w16, lsl #16 99 csel \FB, w17, w18, hi 100 101.else //Blending even pixel present in bottom 16 bits of DREG register 102 103 // red 104 lsr w16, \DREG, #11 105 and w16, w16, #0x1F 106 mul w16, w7, w16 107 lsr w6, \SRC, #3 108 and w6, w6, #0x1F 109 add w16, w6, w16, lsr #8 110 cmp w16, #0x1F 111 mov w17, #(0x1F<<11) 112 lsl w18, w16, #11 113 csel \FB, w17, w18, hi 114 115 116 // green 117 and w6, \DREG, #(0x3F<<5) 118 mul w6, w7, w6 119 lsr w16, \SRC, #(8+2) 120 and w16, w16, #0x3F 121 add w6, w16, w6, lsr #(5+8) 122 cmp w6, #0x3F 123 orr w17, \FB, #(0x3F<<5) 124 orr w18, \FB, w6, lsl #5 125 csel \FB, w17, w18, hi 126 127 // blue 128 and w16, \DREG, #0x1F 129 mul w16, w7, w16 130 lsr w6, \SRC, #(8+8+3) 131 and w6, w6, #0x1F 132 add w16, w6, w16, lsr #8 133 cmp w16, #0x1F 134 orr w17, \FB, #0x1F 135 orr w18, \FB, w16 136 csel \FB, w17, w18, hi 137 138.endif // End of blending even pixel 139 140.endm // End of pixel macro 141 142 143// x0: dst ptr 144// x1: src ptr 145// w2: count 146// w3: d 147// w4: s0 148// w5: s1 149// w6: pixel 150// w7: pixel 151// w8: free 152// w9: free 153// w10: free 154// w11: free 155// w12: scratch 156// w14: pixel 157 158scanline_t32cb16blend_arm64: 159 160 // align DST to 32 bits 161 tst x0, #0x3 162 b.eq aligned 163 subs w2, w2, #1 164 b.lo return 165 166last: 167 ldr w4, [x1], #4 168 ldrh w3, [x0] 169 pixel w3, w4, w12, 0 170 strh w12, [x0], #2 171 172aligned: 173 subs w2, w2, #2 174 b.lo 9f 175 176 // The main loop is unrolled twice and processes 4 pixels 1778: 178 ldp w4,w5, [x1], #8 179 add x0, x0, #4 180 // it's all zero, skip this pixel 181 orr w3, w4, w5 182 cbz w3, 7f 183 184 // load the destination 185 ldr w3, [x0, #-4] 186 // stream the destination 187 pixel w3, w4, w12, 0 188 pixel w3, w5, w12, 1 189 str w12, [x0, #-4] 190 191 // 2nd iteration of the loop, don't stream anything 192 subs w2, w2, #2 193 csel w4, w5, w4, lt 194 blt 9f 195 ldp w4,w5, [x1], #8 196 add x0, x0, #4 197 orr w3, w4, w5 198 cbz w3, 7f 199 ldr w3, [x0, #-4] 200 pixel w3, w4, w12, 0 201 pixel w3, w5, w12, 1 202 str w12, [x0, #-4] 203 2047: subs w2, w2, #2 205 bhs 8b 206 mov w4, w5 207 2089: adds w2, w2, #1 209 b.lo return 210 b last 211 212return: 213 ret 214