1; PowerPC optimized drawing methods for Goom 2; © 2003 Guillaume Borios 3; This library is free software; you can redistribute it and/or 4; modify it under the terms of the GNU Library General Public 5; License as published by the Free Software Foundation; either 6; version 2 of the License, or (at your option) any later version. 7; 8; This library is distributed in the hope that it will be useful, 9; but WITHOUT ANY WARRANTY; without even the implied warranty of 10; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11; Library General Public License for more details. 12; 13; You should have received a copy of the GNU Library General Public 14; License along with this library; if not, write to the 15; Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, 16; Boston, MA 02110-1301, USA. 17 18; Change log : 19; 30 May 2003 : File creation 20 21; Section definition : We use a read only code section for the whole file 22.section __TEXT,__text,regular,pure_instructions 23 24 25; -------------------------------------------------------------------------------------- 26; Single 32b pixel drawing macros 27; Usage : 28; DRAWMETHOD_XXXX_MACRO *pixelIN, *pixelOUT, COLOR, WR1, WR2, WR3, WR4 29; Only the work registers (WR) can be touched by the macros 30; 31; Available methods : 32; DRAWMETHOD_DFLT_MACRO : Default drawing method (Actually OVRW) 33; DRAWMETHOD_PLUS_MACRO : RVB Saturated per channel addition (SLOWEST) 34; DRAWMETHOD_HALF_MACRO : 50% Transparency color drawing 35; DRAWMETHOD_OVRW_MACRO : Direct COLOR drawing (FASTEST) 36; DRAWMETHOD_B_OR_MACRO : Bitwise OR 37; DRAWMETHOD_BAND_MACRO : Bitwise AND 38; DRAWMETHOD_BXOR_MACRO : Bitwise XOR 39; DRAWMETHOD_BNOT_MACRO : Bitwise NOT 40; -------------------------------------------------------------------------------------- 41 42.macro DRAWMETHOD_OVRW_MACRO 43 stw $2,0($1) ;; *$1 <- $2 44.endmacro 45 46.macro DRAWMETHOD_B_OR_MACRO 47 lwz $3,0($0) ;; $3 <- *$0 48 or $3,$3,$2 ;; $3 <- $3 | $2 49 stw $3,0($1) ;; *$1 <- $3 50.endmacro 51 52.macro DRAWMETHOD_BAND_MACRO 53 lwz $3,0($0) ;; $3 <- *$0 54 and $3,$3,$2 ;; $3 <- $3 & $2 55 stw $3,0($1) ;; *$1 <- $3 56.endmacro 57 58.macro DRAWMETHOD_BXOR_MACRO 59 lwz $3,0($0) ;; $3 <- *$0 60 xor $3,$3,$2 ;; $3 <- $3 ^ $2 61 stw $3,0($1) ;; *$1 <- $3 62.endmacro 63 64.macro DRAWMETHOD_BNOT_MACRO 65 lwz $3,0($0) ;; $3 <- *$0 66 nand $3,$3,$3 ;; $3 <- ~$3 67 stw $3,0($1) ;; *$1 <- $3 68.endmacro 69 70.macro DRAWMETHOD_PLUS_MACRO 71 lwz $4,0($0) ;; $4 <- *$0 72 andi. $3,$4,0xFF00 ;; $3 <- $4 & 0x0000FF00 73 andi. $5,$2,0xFF00 ;; $5 <- $2 & 0x0000FF00 74 add $3,$3,$5 ;; $3 <- $3 + $5 75 rlwinm $5,$3,15,0,0 ;; $5 <- 0 | ($3[15] << 15) 76 srawi $5,$5,23 ;; $5 <- $5 >> 23 (algebraic for sign extension) 77 or $3,$3,$5 ;; $3 <- $3 | $5 78 lis $5,0xFF ;; $5 <- 0x00FF00FF 79 addi $5,$5,0xFF 80 and $4,$4,$5 ;; $4 <- $4 & $5 81 and $6,$2,$5 ;; $6 <- $2 & $5 82 add $4,$4,$6 ;; $4 <- $4 + $6 83 rlwinm $6,$4,7,0,0 ;; $6 <- 0 | ($4[7] << 7) 84 srawi $6,$6,15 ;; $6 <- $6 >> 15 (algebraic for sign extension) 85 rlwinm $5,$4,23,0,0 ;; $5 <- 0 | ($4[23] << 23) 86 srawi $5,$5,31 ;; $5 <- $5 >> 31 (algebraic for sign extension) 87 rlwimi $6,$5,0,24,31 ;; $6[24..31] <- $5[24..31] 88 or $4,$4,$6 ;; $4 <- $4 | $6 89 rlwimi $4,$3,0,16,23 ;; $4[16..23] <- $3[16..23] 90 stw $4,0($1) ;; *$1 <- $4 91.endmacro 92 93.macro DRAWMETHOD_HALF_MACRO 94 lwz $4,0($0) ;; $4 <- *$0 95 andi. $3,$4,0xFF00 ;; $3 <- $4 & 0x0000FF00 96 andi. $5,$2,0xFF00 ;; $5 <- $2 & 0x0000FF00 97 add $3,$3,$5 ;; $3 <- $3 + $5 98 lis $5,0xFF ;; $5 <- 0x00FF00FF 99 addi $5,$5,0xFF 100 and $4,$4,$5 ;; $4 <- $4 & $5 101 and $5,$2,$5 ;; $5 <- $2 & $5 102 add $4,$4,$5 ;; $4 <- $4 + $5 103 srwi $4,$4,1 ;; $4 <- $4 >> 1 104 rlwimi $4,$3,31,16,23 ;; $4[16..23] <- $3[15..22] 105 stw $4,0($1) ;; *$1 <- $4 106.endmacro 107 108.macro DRAWMETHOD_DFLT_MACRO 109 DRAWMETHOD_PLUS_MACRO 110.endmacro 111 112; -------------------------------------------------------------------------------------- 113 114 115 116; ************************************************************************************** 117; void DRAWMETHOD_PLUS_PPC(unsigned int * buf, unsigned int _col); 118; void DRAWMETHOD_PLUS_2_PPC(unsigned * in, unsigned int * out, unsigned int _col); 119; ************************************************************************************** 120.globl _DRAWMETHOD_PLUS_2_PPC 121.align 3 122_DRAWMETHOD_PLUS_2_PPC: 123 DRAWMETHOD_PLUS_MACRO r3,r4,r5,r6,r7,r8,r9 124 blr ;; return 125 126.globl _DRAWMETHOD_PLUS_PPC 127.align 3 128_DRAWMETHOD_PLUS_PPC: 129 DRAWMETHOD_PLUS_MACRO r3,r3,r4,r5,r6,r7,r9 130 blr ;; return 131 132 133; ************************************************************************************** 134; void DRAWMETHOD_HALF_PPC(unsigned int * buf, unsigned int _col); 135; void DRAWMETHOD_HALF_2_PPC(unsigned * in, unsigned int * out, unsigned int _col); 136; ************************************************************************************** 137.globl _DRAWMETHOD_HALF_2_PPC 138.align 3 139_DRAWMETHOD_HALF_2_PPC: 140 DRAWMETHOD_HALF_MACRO r3,r4,r5,r6,r7,r8 141 blr ;; return 142 143.globl _DRAWMETHOD_HALF_PPC 144.align 3 145_DRAWMETHOD_HALF_PPC: 146 DRAWMETHOD_HALF_MACRO r3,r3,r4,r5,r6,r7 147 blr ;; return 148 149 150; ************************************************************************************** 151; void DRAW_LINE_PPC(unsigned int *data, int x1, int y1, int x2, int y2, unsigned int col, 152; unsigned int screenx, unsigned int screeny) 153; ************************************************************************************** 154.globl _DRAW_LINE_PPC 155.align 3 156_DRAW_LINE_PPC: 157 ;; NOT IMPLEMENTED YET 158 blr ;; return 159 160 161; ************************************************************************************** 162; void _ppc_brightness(Pixel * src, Pixel * dest, unsigned int size, unsigned int coeff) 163; ************************************************************************************** 164 165 166.const 167.align 4 168vectorZERO: 169 .long 0,0,0,0 170 .long 0x10101000, 0x10101001, 0x10101002, 0x10101003 171 .long 0x10101004, 0x10101005, 0x10101006, 0x10101007 172 .long 0x10101008, 0x10101009, 0x1010100A, 0x1010100B 173 .long 0x1010100C, 0x1010100D, 0x1010100E, 0x1010100F 174 175 176.section __TEXT,__text,regular,pure_instructions 177 178.globl _ppc_brightness_G4 179.align 3 180_ppc_brightness_G4: 181 182 183;; PowerPC Altivec code 184 srwi r5,r5,2 185 mtctr r5 186 187;;vrsave 188 mfspr r11,256 189 lis r12,0xCFFC 190 mtspr 256,r12 191 192 mflr r0 193 bcl 20,31,"L00000000001$pb" 194"L00000000001$pb": 195 mflr r10 196 mtlr r0 197 198 addis r9,r10,ha16(vectorZERO-"L00000000001$pb") 199 addi r9,r9,lo16(vectorZERO-"L00000000001$pb") 200 201 vxor v0,v0,v0 ;; V0 = NULL vector 202 203 addi r9,r9,16 204 lvx v10,0,r9 205 addi r9,r9,16 206 lvx v11,0,r9 207 addi r9,r9,16 208 lvx v12,0,r9 209 addi r9,r9,16 210 lvx v13,0,r9 211 212 addis r9,r10,ha16(vectortmpwork-"L00000000001$pb") 213 addi r9,r9,lo16(vectortmpwork-"L00000000001$pb") 214 stw r6,0(r9) 215 li r6,8 216 stw r6,4(r9) 217 lvx v9,0,r9 218 li r9,128 219 vspltw v8,v9,0 220 vspltw v9,v9,1 221 222;; elt counter 223 li r9,0 224 lis r7,0x0F01 225 b L7 226.align 4 227L7: 228 lvx v1,r9,r3 229 230 vperm v4,v1,v0,v10 231 ;********************* 232 add r10,r9,r3 233 ;********************* 234 vperm v5,v1,v0,v11 235 vperm v6,v1,v0,v12 236 vperm v7,v1,v0,v13 237 238 vmulouh v4,v4,v8 239 ;********************* 240 dst r10,r7,3 241 ;********************* 242 vmulouh v5,v5,v8 243 vmulouh v6,v6,v8 244 vmulouh v7,v7,v8 245 vsrw v4,v4,v9 246 vsrw v5,v5,v9 247 vsrw v6,v6,v9 248 vsrw v7,v7,v9 249 250 vpkuwus v4,v4,v5 251 vpkuwus v6,v6,v7 252 vpkuhus v1,v4,v6 253 254 stvx v1,r9,r4 255 addi r9,r9,16 256 257 bdnz L7 258 259 mtspr 256,r11 260 blr 261 262 263.globl _ppc_brightness_G5 264.align 3 265_ppc_brightness_G5: 266 267;; PowerPC Altivec G5 code 268 srwi r5,r5,2 269 mtctr r5 270 271;;vrsave 272 mfspr r11,256 273 lis r12,0xCFFC 274 mtspr 256,r12 275 276 mflr r0 277 bcl 20,31,"L00000000002$pb" 278"L00000000002$pb": 279 mflr r10 280 mtlr r0 281 282 addis r9,r10,ha16(vectorZERO-"L00000000002$pb") 283 addi r9,r9,lo16(vectorZERO-"L00000000002$pb") 284 285 vxor v0,v0,v0 ;; V0 = NULL vector 286 287 addi r9,r9,16 288 lvx v10,0,r9 289 addi r9,r9,16 290 lvx v11,0,r9 291 addi r9,r9,16 292 lvx v12,0,r9 293 addi r9,r9,16 294 lvx v13,0,r9 295 296 addis r9,r10,ha16(vectortmpwork-"L00000000002$pb") 297 addi r9,r9,lo16(vectortmpwork-"L00000000002$pb") 298 stw r6,0(r9) 299 li r6,8 300 stw r6,4(r9) 301 lvx v9,0,r9 302 li r9,128 303 vspltw v8,v9,0 304 vspltw v9,v9,1 305 306;; elt counter 307 li r9,0 308 lis r7,0x0F01 309 b L6 310.align 4 311L6: 312 lvx v1,r9,r3 313 314 vperm v4,v1,v0,v10 315 ;********************* 316 add r10,r9,r3 317 ;********************* 318 vperm v5,v1,v0,v11 319 vperm v6,v1,v0,v12 320 vperm v7,v1,v0,v13 321 322 vmulouh v4,v4,v8 323 vmulouh v5,v5,v8 324 vmulouh v6,v6,v8 325 vmulouh v7,v7,v8 326 vsrw v4,v4,v9 327 vsrw v5,v5,v9 328 vsrw v6,v6,v9 329 vsrw v7,v7,v9 330 331 vpkuwus v4,v4,v5 332 vpkuwus v6,v6,v7 333 vpkuhus v1,v4,v6 334 335 stvx v1,r9,r4 336 addi r9,r9,16 337 338 bdnz L6 339 340 mtspr 256,r11 341 blr 342 343 344.globl _ppc_brightness_generic 345.align 3 346_ppc_brightness_generic: 347 lis r12,0x00FF 348 ori r12,r12,0x00FF 349 subi r3,r3,4 350 subi r4,r4,4 351 mtctr r5 352 b L1 353.align 4 354L1: 355 lwzu r7,4(r3) 356 357 rlwinm r8,r7,16,24,31 358 rlwinm r9,r7,24,24,31 359 mullw r8,r8,r6 360 rlwinm r10,r7,0,24,31 361 mullw r9,r9,r6 362 srwi r8,r8,8 363 mullw r10,r10,r6 364 srwi r9,r9,8 365 366 rlwinm. r11,r8,0,0,23 367 beq L2 368 li r8,0xFF 369L2: 370 srwi r10,r10,8 371 rlwinm. r11,r9,0,0,23 372 beq L3 373 li r9,0xFF 374L3: 375 rlwinm r7,r8,16,8,15 376 rlwinm. r11,r10,0,0,23 377 beq L4 378 li r10,0xFF 379L4: 380 rlwimi r7,r9,8,16,23 381 rlwimi r7,r10,0,24,31 382 383 stwu r7,4(r4) 384 bdnz L1 385 386 blr 387 388 389 390.static_data 391.align 4 392vectortmpwork: 393 .long 0,0,0,0 394 395