1/* Intel SIMD MMX implementation of Viterbi ACS butterflies 2 for 256-state (k=9) convolutional code 3 Copyright 2004 Phil Karn, KA9Q 4 This code may be used under the terms of the GNU Lesser General Public License (LGPL) 5 6 void update_viterbi29_blk_mmx(struct v29 *vp,unsigned char *syms,int nbits); 7*/ 8 9 # These are offsets into struct v29, defined in viterbi29.h 10 .set DP,512 11 .set OLDMETRICS,516 12 .set NEWMETRICS,520 13 .text 14 .global update_viterbi29_blk_mmx,Mettab29_1,Mettab29_2 15 .type update_viterbi29_blk_mmx,@function 16 .align 16 17 18 # MMX (64-bit SIMD) version 19 # requires Pentium-MMX, Pentium-II or better 20 21update_viterbi29_blk_mmx: 22 pushl %ebp 23 movl %esp,%ebp 24 pushl %esi 25 pushl %edi 26 pushl %edx 27 pushl %ebx 28 29 movl 8(%ebp),%edx # edx = vp 30 movl 8(%ebp),%edx # edx = vp 31 testl %edx,%edx 32 jnz 0f 33 movl -1,%eax 34 jmp err 350: movl OLDMETRICS(%edx),%esi # esi -> old metrics 36 movl NEWMETRICS(%edx),%edi # edi -> new metrics 37 movl DP(%edx),%edx # edx -> decisions 38 391: movl 16(%ebp),%eax # eax = nbits 40 decl %eax 41 jl 2f # passed zero, we're done 42 movl %eax,16(%ebp) 43 44 movl 12(%ebp),%ebx # ebx = syms 45 movw (%ebx),%ax # ax = second symbol : first symbol 46 addl $2,%ebx 47 movl %ebx,12(%ebp) 48 49 movb %ah,%bl 50 andl $255,%eax 51 andl $255,%ebx 52 53 # shift into first array index dimension slot 54 shll $7,%eax 55 shll $7,%ebx 56 57 # each invocation of this macro will do 8 butterflies in parallel 58 .MACRO butterfly GROUP 59 # Compute branch metrics 60 movq (Mettab29_1+8*\GROUP)(%eax),%mm3 61 movq fifteens,%mm0 62 paddb (Mettab29_2+8*\GROUP)(%ebx),%mm3 63 paddb ones,%mm3 # emulate pavgb - this may not be necessary 64 psrlq $1,%mm3 65 pand %mm0,%mm3 66 67 movq (8*\GROUP)(%esi),%mm6 # Incoming path metric, high bit = 0 68 movq ((8*\GROUP)+128)(%esi),%mm2 # Incoming path metric, high bit = 1 69 movq %mm6,%mm1 70 movq %mm2,%mm7 71 72 paddb %mm3,%mm6 73 paddb %mm3,%mm2 74 pxor %mm0,%mm3 # invert branch metric 75 paddb %mm3,%mm7 # path metric for inverted symbols 76 paddb %mm3,%mm1 77 78 # live registers 1 2 6 7 79 # Compare mm6 and mm7; mm1 and mm2 80 pxor %mm3,%mm3 81 movq %mm6,%mm4 82 movq %mm1,%mm5 83 psubb %mm7,%mm4 # mm4 = mm6 - mm7 84 psubb %mm2,%mm5 # mm5 = mm1 - mm2 85 pcmpgtb %mm3,%mm4 # mm4 = first set of decisions (ff = 1 better) 86 pcmpgtb %mm3,%mm5 # mm5 = second set of decisions 87 88 # live registers 1 2 4 5 6 7 89 # select survivors 90 movq %mm4,%mm0 91 pand %mm4,%mm7 92 movq %mm5,%mm3 93 pand %mm5,%mm2 94 pandn %mm6,%mm0 95 pandn %mm1,%mm3 96 por %mm0,%mm7 # mm7 = first set of survivors 97 por %mm3,%mm2 # mm2 = second set of survivors 98 99 # live registers 2 4 5 7 100 # interleave & store decisions in mm4, mm5 101 # interleave & store new branch metrics in mm2, mm7 102 movq %mm4,%mm3 103 movq %mm7,%mm0 104 punpckhbw %mm5,%mm4 105 punpcklbw %mm5,%mm3 106 punpcklbw %mm2,%mm7 # interleave second 8 new metrics 107 punpckhbw %mm2,%mm0 # interleave first 8 new metrics 108 movq %mm4,(16*\GROUP+8)(%edx) 109 movq %mm3,(16*\GROUP)(%edx) 110 movq %mm7,(16*\GROUP)(%edi) 111 movq %mm0,(16*\GROUP+8)(%edi) 112 113 .endm 114 115# invoke macro 16 times for a total of 128 butterflies 116 butterfly GROUP=0 117 butterfly GROUP=1 118 butterfly GROUP=2 119 butterfly GROUP=3 120 butterfly GROUP=4 121 butterfly GROUP=5 122 butterfly GROUP=6 123 butterfly GROUP=7 124 butterfly GROUP=8 125 butterfly GROUP=9 126 butterfly GROUP=10 127 butterfly GROUP=11 128 butterfly GROUP=12 129 butterfly GROUP=13 130 butterfly GROUP=14 131 butterfly GROUP=15 132 133 addl $256,%edx # bump decision pointer 134 135 # swap metrics 136 movl %esi,%eax 137 movl %edi,%esi 138 movl %eax,%edi 139 jmp 1b 140 1412: emms 142 movl 8(%ebp),%ebx # ebx = vp 143 # stash metric pointers 144 movl %esi,OLDMETRICS(%ebx) 145 movl %edi,NEWMETRICS(%ebx) 146 movl %edx,DP(%ebx) # stash incremented value of vp->dp 147 xorl %eax,%eax 148err: popl %ebx 149 popl %edx 150 popl %edi 151 popl %esi 152 popl %ebp 153 ret 154 155 .data 156 .align 8 157fifteens: 158 .byte 15,15,15,15,15,15,15,15 159 160 .align 8 161ones: .byte 1,1,1,1,1,1,1,1 162