1/* Intel SIMD MMX implementation of Viterbi ACS butterflies 2 for 64-state (k=7) convolutional code 3 Copyright 2004 Phil Karn, KA9Q 4 This code may be used under the terms of the GNU Lesser General Public License (LGPL) 5 6 int update_viterbi27_blk_mmx(struct v27 *vp,unsigned char *syms,int nbits) ; 7*/ 8 # MMX (64-bit SIMD) version 9 # requires Pentium-MMX, Pentium-II or better 10 11 # These are offsets into struct v27, defined in viterbi27_mmx.c 12 .set DP,128 13 .set OLDMETRICS,132 14 .set NEWMETRICS,136 15 .text 16 .global update_viterbi27_blk_mmx,Mettab27_1,Mettab27_2 17 .type update_viterbi27_blk_mmx,@function 18 .align 16 19 20update_viterbi27_blk_mmx: 21 pushl %ebp 22 movl %esp,%ebp 23 pushl %esi 24 pushl %edi 25 pushl %edx 26 pushl %ebx 27 28 movl 8(%ebp),%edx # edx = vp 29 testl %edx,%edx 30 jnz 0f 31 movl -1,%eax 32 jmp err 330: movl OLDMETRICS(%edx),%esi # esi -> old metrics 34 movl NEWMETRICS(%edx),%edi # edi -> new metrics 35 movl DP(%edx),%edx # edx -> decisions 36 371: movl 16(%ebp),%eax # eax = nbits 38 decl %eax 39 jl 2f # passed zero, we're done 40 movl %eax,16(%ebp) 41 42 movl 12(%ebp),%ebx # ebx = syms 43 movw (%ebx),%ax # ax = second symbol : first symbol 44 addl $2,%ebx 45 movl %ebx,12(%ebp) 46 47 movb %ah,%bl 48 andl $255,%eax 49 andl $255,%ebx 50 51 # shift into first array index dimension slot 52 shll $5,%eax 53 shll $5,%ebx 54 55 # each invocation of this macro will do 8 butterflies in parallel 56 .MACRO butterfly GROUP 57 # Compute branch metrics 58 movq (Mettab27_1+8*\GROUP)(%eax),%mm3 59 movq fifteens,%mm0 60 61 paddb (Mettab27_2+8*\GROUP)(%ebx),%mm3 62 paddb ones,%mm3 # emulate pavgb - this may not be necessary 63 psrlq $1,%mm3 64 pand %mm0,%mm3 65 66 movq (8*\GROUP)(%esi),%mm6 # Incoming path metric, high bit = 0 67 movq ((8*\GROUP)+32)(%esi),%mm2 # Incoming path metric, high bit = 1 68 movq %mm6,%mm1 69 movq %mm2,%mm7 70 71 paddb %mm3,%mm6 72 paddb %mm3,%mm2 73 pxor %mm0,%mm3 # invert branch metric 74 paddb %mm3,%mm7 # path metric for inverted symbols 75 paddb %mm3,%mm1 76 77 # live registers 1 2 6 7 78 # Compare mm6 and mm7; mm1 and mm2 79 pxor %mm3,%mm3 80 movq %mm6,%mm4 81 movq %mm1,%mm5 82 psubb %mm7,%mm4 # mm4 = mm6 - mm7 83 psubb %mm2,%mm5 # mm5 = mm1 - mm2 84 pcmpgtb %mm3,%mm4 # mm4 = first set of decisions (ff = 1 better) 85 pcmpgtb %mm3,%mm5 # mm5 = second set of decisions 86 87 # live registers 1 2 4 5 6 7 88 # select survivors 89 movq %mm4,%mm0 90 pand %mm4,%mm7 91 movq %mm5,%mm3 92 pand %mm5,%mm2 93 pandn %mm6,%mm0 94 pandn %mm1,%mm3 95 por %mm0,%mm7 # mm7 = first set of survivors 96 por %mm3,%mm2 # mm2 = second set of survivors 97 98 # live registers 2 4 5 7 99 # interleave & store decisions in mm4, mm5 100 # interleave & store new branch metrics in mm2, mm7 101 movq %mm4,%mm3 102 movq %mm7,%mm0 103 punpckhbw %mm5,%mm4 104 punpcklbw %mm5,%mm3 105 punpcklbw %mm2,%mm7 # interleave second 8 new metrics 106 punpckhbw %mm2,%mm0 # interleave first 8 new metrics 107 movq %mm4,(16*\GROUP+8)(%edx) 108 movq %mm3,(16*\GROUP)(%edx) 109 movq %mm7,(16*\GROUP)(%edi) 110 movq %mm0,(16*\GROUP+8)(%edi) 111 112 .endm 113 114# invoke macro 4 times for a total of 32 butterflies 115 butterfly GROUP=0 116 butterfly GROUP=1 117 butterfly GROUP=2 118 butterfly GROUP=3 119 120 addl $64,%edx # bump decision pointer 121 122 # swap metrics 123 movl %esi,%eax 124 movl %edi,%esi 125 movl %eax,%edi 126 jmp 1b 127 1282: emms 129 movl 8(%ebp),%ebx # ebx = vp 130 # stash metric pointers 131 movl %esi,OLDMETRICS(%ebx) 132 movl %edi,NEWMETRICS(%ebx) 133 movl %edx,DP(%ebx) # stash incremented value of vp->dp 134 xorl %eax,%eax 135err: popl %ebx 136 popl %edx 137 popl %edi 138 popl %esi 139 popl %ebp 140 ret 141 142 .data 143 .align 8 144fifteens: 145 .byte 15,15,15,15,15,15,15,15 146 147 .align 8 148ones: .byte 1,1,1,1,1,1,1,1 149