• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// -*- c++ -*-
2
3unsigned char* pDest;
4const unsigned char* pSrcP;
5const unsigned char* pSrc;
6const unsigned char* pBob;
7const unsigned char* pBobP;
8
9// long is int32 on ARCH_368, int64 on ARCH_AMD64. Declaring it this way
10// saves a lot of xor's to delete 64bit garbage.
11
12#if defined(DBL_RESIZE) || defined(USE_FOR_DSCALER)
13long	    src_pitch2 = src_pitch;			// even & odd lines are not interleaved in DScaler
14#else
15long	    src_pitch2 = 2 * src_pitch;		// even & odd lines are interleaved in Avisynth
16#endif
17
18
19long	    dst_pitch2 = 2 * dst_pitch;
20long        y;
21
22long     Last8;
23
24// XXX; silence unused-but-set warnings turned into errors with Werror
25(void) pSrc;
26(void) pSrcP;
27(void) pBob;
28(void) pBobP;
29
30	pSrc  = pWeaveSrc;			// points 1 weave line above
31	pSrcP = pWeaveSrcP;			// "
32
33#ifdef DBL_RESIZE
34
35#ifdef USE_VERTICAL_FILTER
36	pDest = pWeaveDest + dst_pitch2;
37#else
38	pDest = pWeaveDest + 3*dst_pitch;
39#endif
40
41#else
42
43#ifdef USE_VERTICAL_FILTER
44	pDest = pWeaveDest + dst_pitch;
45#else
46	pDest = pWeaveDest + dst_pitch2;
47#endif
48
49#endif
50
51	if (TopFirst)
52	{
53		pBob = pCopySrc + src_pitch2;      // remember one weave line just copied previously
54		pBobP = pCopySrcP + src_pitch2;
55	}
56	else
57	{
58		pBob =  pCopySrc;
59		pBobP =  pCopySrcP;
60	}
61
62#ifndef IS_C
63
64#ifndef _pBob
65#define _pBob       "%0"
66#define _src_pitch2 "%1"
67#define _ShiftMask  "%2"
68#define _pDest      "%3"
69#define _dst_pitchw "%4"
70#define _Last8      "%5"
71#define _pSrc       "%6"
72#define _pSrcP      "%7"
73#define _pBobP      "%8"
74#define _DiffThres  "%9"
75#define _Min_Vals   "%10"
76#define _Max_Vals   "%11"
77#define _FOURS      "%12"
78#define _TENS       "%13"
79#define _ONES       "%14"
80#define _UVMask     "%15"
81#define _Max_Mov    "%16"
82#define _YMask      "%17"
83#define _oldbx      "%18"
84#endif
85        Last8 = (rowsize-8);
86
87	for (y=1; y < FldHeight-1; y++)
88	{
89          long	dst_pitchw = dst_pitch; // local stor so asm can ref
90          int64_t Max_Mov   = 0x0404040404040404ull;
91          int64_t DiffThres = 0x0f0f0f0f0f0f0f0full;
92          int64_t YMask     = 0x00ff00ff00ff00ffull; // keeps only luma
93          int64_t UVMask    = 0xff00ff00ff00ff00ull; // keeps only chroma
94          int64_t TENS      = 0x0a0a0a0a0a0a0a0aull;
95          int64_t FOURS     = 0x0404040404040404ull;
96          int64_t ONES      = 0x0101010101010101ull;
97          int64_t Min_Vals  = 0x0000000000000000ull;
98          int64_t Max_Vals  = 0x0000000000000000ull;
99          int64_t ShiftMask = 0xfefffefffefffeffull;
100
101          long oldbx = 0;
102
103		// pretend it's indented -->>
104        __asm__ __volatile__
105            (
106             // Loop general reg usage
107             //
108             // XAX - pBobP, then pDest
109             // XBX - pBob
110             // XCX - src_pitch2
111             // XDX - current offset
112             // XDI - prev weave pixels, 1 line up
113             // XSI - next weave pixels, 1 line up
114
115             // Save "XBX" (-fPIC)
116	     MOVX" %%"XBX", "_oldbx"\n\t"
117
118             // simple bob first 8 bytes
119             MOVX"	"_pBob",        %%"XBX"\n\t"
120             MOVX"	"_src_pitch2",  %%"XCX"\n\t"
121
122#ifdef USE_VERTICAL_FILTER
123             "movq	    (%%"XBX"),        %%mm0\n\t"
124             "movq	    (%%"XBX", %%"XCX"), %%mm1\n\t" //, qword ptr["XBX"+"XCX"]
125             "movq	    %%mm0,          %%mm2\n\t"
126             V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask)		// halfway between
127             V_PAVGB ("%%mm0", "%%mm2", "%%mm3", _ShiftMask)		// 1/4 way
128             V_PAVGB ("%%mm1", "%%mm2", "%%mm3", _ShiftMask)		// 3/4 way
129             MOVX"		"_pDest",       %%"XDI"\n\t"
130             MOVX"		"_dst_pitchw",  %%"XAX"\n\t"
131             V_MOVNTQ	("(%%"XDI")", "%%mm0")
132             V_MOVNTQ	("(%%"XDI", %%"XAX")", "%%mm1") // qword ptr["XDI"+"XAX"], mm1
133
134             // simple bob last 8 bytes
135             MOVX"		"_Last8", %%"XDX"\n\t"
136             LEAX"		(%%"XBX", %%"XDX"), %%"XSI"\n\t"  // ["XBX"+"XDX"]
137             "movq	    (%%"XSI"), %%mm0\n\t"
138             "movq	    (%%"XSI", %%"XCX"), %%mm1\n\t"    // qword ptr["XSI"+"XCX"]
139             "movq	    %%mm0, %%mm2\n\t"
140             V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask)		// halfway between
141             V_PAVGB ("%%mm0", "%%mm2", "%%mm3", _ShiftMask)		// 1/4 way
142             V_PAVGB ("%%mm1", "%%mm2", "%%mm3", _ShiftMask)		// 3/4 way
143             ADDX"		%%"XDX", %%"XDI"\n\t"						// last 8 bytes of dest
144             V_MOVNTQ	("%%"XDI"", "%%mm0")
145             V_MOVNTQ	("(%%"XDI", %%"XAX")", "%%mm1") // qword ptr["XDI"+"XAX"], mm1)
146
147#else
148             "movq	(%%"XBX"), %%mm0\n\t"
149             //		pavgb	mm0, qword ptr["XBX"+"XCX"]
150             V_PAVGB ("%%mm0", "(%%"XBX", %%"XCX")", "%%mm2", _ShiftMask) // qword ptr["XBX"+"XCX"], mm2, ShiftMask)
151             MOVX"		"_pDest", %%"XDI"\n\t"
152             V_MOVNTQ	("(%%"XDI")", "%%mm0")
153
154             // simple bob last 8 bytes
155             MOVX"		"_Last8", %%"XDX"\n\t"
156             LEAX"		(%%"XBX", %%"XDX"), %%"XSI"\n\t" //"XSI", ["XBX"+"XDX"]
157             "movq	    (%%"XSI"), %%mm0\n\t"
158             //		pavgb	mm0, qword ptr["XSI"+"XCX"]
159             V_PAVGB	("%%mm0", "(%%"XSI", %%"XCX")", "%%mm2", _ShiftMask) // qword ptr["XSI"+"XCX"], mm2, ShiftMask)
160             V_MOVNTQ	("(%%"XDI", %%"XDX")", "%%mm0") // qword ptr["XDI"+"XDX"], mm0)
161#endif
162             // now loop and get the middle qwords
163             MOVX"		"_pSrc", %%"XSI"\n\t"
164             MOVX"		"_pSrcP", %%"XDI"\n\t"
165             MOVX"		$8, %%"XDX"\n\t"				// curr offset longo all lines
166
167             "1:\n\t"
168             MOVX"		"_pBobP", %%"XAX"\n\t"
169             ADDX"		$8, %%"XDI"\n\t"
170             ADDX"		$8, %%"XSI"\n\t"
171             ADDX"		$8, %%"XBX"\n\t"
172             ADDX"		%%"XDX", %%"XAX"\n\t"
173
174#ifdef USE_STRANGE_BOB
175#include "StrangeBob.inc"
176#else
177#include "WierdBob.inc"
178#endif
179
180             // For non-SSE2:
181             // through out most of the rest of this loop we will maintain
182             //	mm4		our min bob value
183             //	mm5		best weave pixels so far
184             // mm6		our max Bob value
185             //	mm7		best weighted pixel ratings so far
186
187             // We will keep a slight bias to using the weave pixels
188             // from the current location, by rating them by the min distance
189             // from the Bob value instead of the avg distance from that value.
190             // our best and only rating so far
191             "pcmpeqb	%%mm7, %%mm7\n\t"			// ffff, say we didn't find anything good yet
192
193#else
194        Last8 = (rowsize - 4);
195
196	for (y=1; y < FldHeight-1; y++)
197	{
198	  #ifdef USE_STRANGE_BOB
199	  long DiffThres = 0x0f;
200	  #endif
201
202	  #ifndef SKIP_SEARCH
203	  long weave[2], MaxVals[2], MinVals[2];
204	  #endif
205
206	  long diff[2], best[2], avg[2], diff2[2], out[2], x;
207
208#ifdef USE_VERTICAL_FILTER
209             pDest[0] = (3 * pBob[0] + pBob[src_pitch2]) / 4;
210             pDest[1] = (3 * pBob[1] + pBob[src_pitch2 + 1]) / 4;
211             pDest[2] = (3 * pBob[2] + pBob[src_pitch2 + 2]) / 4;
212             pDest[3] = (3 * pBob[3] + pBob[src_pitch2 + 3]) / 4;
213	     pDest[dst_pitchw] = (pBob[0] + 3 * pBob[src_pitch2]) / 4;
214	     pDest[dst_pitchw + 1] = (pBob[1] + 3 * pBob[src_pitch2 + 1]) / 4;
215	     pDest[dst_pitchw + 2] = (pBob[2] + 3 * pBob[src_pitch2 + 2]) / 4;
216	     pDest[dst_pitchw + 3] = (pBob[3] + 3 * pBob[src_pitch2 + 3]) / 4;
217
218             // simple bob last byte
219	     pDest[Last8] = (3 * pBob[Last8] + pBob[Last8 + src_pitch2]) / 4;
220	     pDest[Last8 + 1] = (3 * pBob[Last8 + 1] + pBob[Last8 + src_pitch2 + 1]) / 4;
221	     pDest[Last8 + 2] = (3 * pBob[Last8 + 2] + pBob[Last8 + src_pitch2 + 2]) / 4;
222	     pDest[Last8 + 3] = (3 * pBob[Last8 + 3] + pBob[Last8 + src_pitch2 + 3]) / 4;
223	     pDest[Last8 + src_pitch2] = (pBob[Last8] + 3 * pBob[Last8 + src_pitch2]) / 4;
224	     pDest[Last8 + src_pitch2 + 1] = (pBob[Last8 + 1] + 3 * pBob[Last8 + src_pitch2 + 1]) / 4;
225	     pDest[Last8 + src_pitch2 + 2] = (pBob[Last8 + 2] + 3 * pBob[Last8 + src_pitch2 + 2]) / 4;
226	     pDest[Last8 + src_pitch2 + 3] = (pBob[Last8 + 3] + 3 * pBob[Last8 + src_pitch2 + 3]) / 4;
227#else
228             pDest[0] = (pBob[0] + pBob[src_pitch2 + 1]) / 2;
229             pDest[1] = (pBob[1] + pBob[src_pitch2 + 1]) / 2;
230             pDest[2] = (pBob[2] + pBob[src_pitch2 + 2]) / 2;
231             pDest[3] = (pBob[3] + pBob[src_pitch2 + 3]) / 2;
232
233             // simple bob last byte
234	     pDest[Last8] = (pBob[Last8] + pBob[Last8 + src_pitch2]) / 2;
235	     pDest[Last8 + 1] = (pBob[Last8 + 1] + pBob[Last8 + src_pitch2 + 1]) / 2;
236	     pDest[Last8 + 2] = (pBob[Last8 + 2] + pBob[Last8 + src_pitch2 + 2]) / 2;
237	     pDest[Last8 + 3] = (pBob[Last8 + 3] + pBob[Last8 + src_pitch2 + 3]) / 2;
238#endif
239
240             pBob += 4;
241	     pBobP += 4;
242	     pSrc += 4;
243	     pSrcP += 4;
244
245             for (x=4; x < Last8; x += 2) {
246
247#ifdef USE_STRANGE_BOB
248#include "StrangeBob.inc"
249#else
250#include "WierdBob.inc"
251#endif
252
253             // We will keep a slight bias to using the weave pixels
254             // from the current location, by rating them by the min distance
255             // from the Bob value instead of the avg distance from that value.
256             // our best and only rating so far
257             diff[0] = diff[1] = 255;
258
259
260#endif
261