• Home
  • Raw
  • Download

Lines Matching refs:ido

80 static NEVER_INLINE(void) passf2_ps(int ido, int l1, const v4sf *cc, v4sf *ch, const float *wa1, fl…  in passf2_ps()  argument
82 int l1ido = l1*ido; in passf2_ps()
83 if (ido <= 2) { in passf2_ps()
84 for (k=0; k < l1ido; k += ido, ch += ido, cc+= 2*ido) { in passf2_ps()
85 ch[0] = VADD(cc[0], cc[ido+0]); in passf2_ps()
86 ch[l1ido] = VSUB(cc[0], cc[ido+0]); in passf2_ps()
87 ch[1] = VADD(cc[1], cc[ido+1]); in passf2_ps()
88 ch[l1ido + 1] = VSUB(cc[1], cc[ido+1]); in passf2_ps()
91 for (k=0; k < l1ido; k += ido, ch += ido, cc += 2*ido) { in passf2_ps()
92 for (i=0; i<ido-1; i+=2) { in passf2_ps()
93 v4sf tr2 = VSUB(cc[i+0], cc[i+ido+0]); in passf2_ps()
94 v4sf ti2 = VSUB(cc[i+1], cc[i+ido+1]); in passf2_ps()
96 ch[i] = VADD(cc[i+0], cc[i+ido+0]); in passf2_ps()
97 ch[i+1] = VADD(cc[i+1], cc[i+ido+1]); in passf2_ps()
109 static NEVER_INLINE(void) passf3_ps(int ido, int l1, const v4sf *cc, v4sf *ch, in passf3_ps() argument
115 int l1ido = l1*ido; in passf3_ps()
117 assert(ido > 2); in passf3_ps()
118 for (k=0; k< l1ido; k += ido, cc+= 3*ido, ch +=ido) { in passf3_ps()
119 for (i=0; i<ido-1; i+=2) { in passf3_ps()
120 tr2 = VADD(cc[i+ido], cc[i+2*ido]); in passf3_ps()
123 ti2 = VADD(cc[i+ido+1], cc[i+2*ido+1]); in passf3_ps()
126 cr3 = SVMUL(taui, VSUB(cc[i+ido], cc[i+2*ido])); in passf3_ps()
127 ci3 = SVMUL(taui, VSUB(cc[i+ido+1], cc[i+2*ido+1])); in passf3_ps()
143 static NEVER_INLINE(void) passf4_ps(int ido, int l1, const v4sf *cc, v4sf *ch, in passf4_ps() argument
149 int l1ido = l1*ido; in passf4_ps()
150 if (ido == 2) { in passf4_ps()
151 for (k=0; k < l1ido; k += ido, ch += ido, cc += 4*ido) { in passf4_ps()
152 tr1 = VSUB(cc[0], cc[2*ido + 0]); in passf4_ps()
153 tr2 = VADD(cc[0], cc[2*ido + 0]); in passf4_ps()
154 ti1 = VSUB(cc[1], cc[2*ido + 1]); in passf4_ps()
155 ti2 = VADD(cc[1], cc[2*ido + 1]); in passf4_ps()
156 ti4 = VMUL(VSUB(cc[1*ido + 0], cc[3*ido + 0]), LD_PS1(fsign)); in passf4_ps()
157 tr4 = VMUL(VSUB(cc[3*ido + 1], cc[1*ido + 1]), LD_PS1(fsign)); in passf4_ps()
158 tr3 = VADD(cc[ido + 0], cc[3*ido + 0]); in passf4_ps()
159 ti3 = VADD(cc[ido + 1], cc[3*ido + 1]); in passf4_ps()
171 for (k=0; k < l1ido; k += ido, ch+=ido, cc += 4*ido) { in passf4_ps()
172 for (i=0; i<ido-1; i+=2) { in passf4_ps()
174 tr1 = VSUB(cc[i + 0], cc[i + 2*ido + 0]); in passf4_ps()
175 tr2 = VADD(cc[i + 0], cc[i + 2*ido + 0]); in passf4_ps()
176 ti1 = VSUB(cc[i + 1], cc[i + 2*ido + 1]); in passf4_ps()
177 ti2 = VADD(cc[i + 1], cc[i + 2*ido + 1]); in passf4_ps()
178 tr4 = VMUL(VSUB(cc[i + 3*ido + 1], cc[i + 1*ido + 1]), LD_PS1(fsign)); in passf4_ps()
179 ti4 = VMUL(VSUB(cc[i + 1*ido + 0], cc[i + 3*ido + 0]), LD_PS1(fsign)); in passf4_ps()
180 tr3 = VADD(cc[i + ido + 0], cc[i + 3*ido + 0]); in passf4_ps()
181 ti3 = VADD(cc[i + ido + 1], cc[i + 3*ido + 1]); in passf4_ps()
214 static NEVER_INLINE(void) passf5_ps(int ido, int l1, const v4sf *cc, v4sf *ch, in passf5_ps() argument
229 #define cc_ref(a_1,a_2) cc[(a_2-1)*ido + a_1 + 1] in passf5_ps()
230 #define ch_ref(a_1,a_3) ch[(a_3-1)*l1*ido + a_1 + 1] in passf5_ps()
232 assert(ido > 2); in passf5_ps()
233 for (k = 0; k < l1; ++k, cc += 5*ido, ch += ido) { in passf5_ps()
234 for (i = 0; i < ido-1; i += 2) { in passf5_ps()
281 static NEVER_INLINE(void) radf2_ps(int ido, int l1, const v4sf * RESTRICT cc, v4sf * RESTRICT ch, c… in radf2_ps() argument
283 int i, k, l1ido = l1*ido; in radf2_ps()
284 for (k=0; k < l1ido; k += ido) { in radf2_ps()
287 ch[2*(k+ido)-1] = VSUB(a, b); in radf2_ps()
289 if (ido < 2) return; in radf2_ps()
290 if (ido != 2) { in radf2_ps()
291 for (k=0; k < l1ido; k += ido) { in radf2_ps()
292 for (i=2; i<ido; i+=2) { in radf2_ps()
297 ch[2*(k+ido) - i] = VSUB(ti2, bi); in radf2_ps()
299 ch[2*(k+ido) - i -1] = VSUB(br, tr2); in radf2_ps()
302 if (ido % 2 == 1) return; in radf2_ps()
304 for (k=0; k < l1ido; k += ido) { in radf2_ps()
305 ch[2*k + ido] = SVMUL(minus_one, cc[ido-1 + k + l1ido]); in radf2_ps()
306 ch[2*k + ido-1] = cc[k + ido-1]; in radf2_ps()
311 static NEVER_INLINE(void) radb2_ps(int ido, int l1, const v4sf *cc, v4sf *ch, const float *wa1) { in radb2_ps() argument
313 int i, k, l1ido = l1*ido; in radb2_ps()
315 for (k=0; k < l1ido; k += ido) { in radb2_ps()
316 a = cc[2*k]; b = cc[2*(k+ido) - 1]; in radb2_ps()
320 if (ido < 2) return; in radb2_ps()
321 if (ido != 2) { in radb2_ps()
322 for (k = 0; k < l1ido; k += ido) { in radb2_ps()
323 for (i = 2; i < ido; i += 2) { in radb2_ps()
324 a = cc[i-1 + 2*k]; b = cc[2*(k + ido) - i - 1]; in radb2_ps()
325 c = cc[i+0 + 2*k]; d = cc[2*(k + ido) - i + 0]; in radb2_ps()
335 if (ido % 2 == 1) return; in radb2_ps()
337 for (k = 0; k < l1ido; k += ido) { in radb2_ps()
338 a = cc[2*k + ido-1]; b = cc[2*k + ido]; in radb2_ps()
339 ch[k + ido-1] = VADD(a,a); in radb2_ps()
340 ch[k + ido-1 + l1ido] = SVMUL(minus_two, b); in radb2_ps()
344 static void radf3_ps(int ido, int l1, const v4sf * RESTRICT cc, v4sf * RESTRICT ch, in radf3_ps() argument
351 cr2 = VADD(cc[(k + l1)*ido], cc[(k + 2*l1)*ido]); in radf3_ps()
352 ch[3*k*ido] = VADD(cc[k*ido], cr2); in radf3_ps()
353 ch[(3*k+2)*ido] = SVMUL(taui, VSUB(cc[(k + l1*2)*ido], cc[(k + l1)*ido])); in radf3_ps()
354 ch[ido-1 + (3*k + 1)*ido] = VADD(cc[k*ido], SVMUL(taur, cr2)); in radf3_ps()
356 if (ido == 1) return; in radf3_ps()
358 for (i=2; i<ido; i+=2) { in radf3_ps()
359 ic = ido - i; in radf3_ps()
361 dr2 = cc[i - 1 + (k + l1)*ido]; di2 = cc[i + (k + l1)*ido]; in radf3_ps()
365 dr3 = cc[i - 1 + (k + l1*2)*ido]; di3 = cc[i + (k + l1*2)*ido]; in radf3_ps()
370 ch[i - 1 + 3*k*ido] = VADD(cc[i - 1 + k*ido], cr2); in radf3_ps()
371 ch[i + 3*k*ido] = VADD(cc[i + k*ido], ci2); in radf3_ps()
372 tr2 = VADD(cc[i - 1 + k*ido], SVMUL(taur, cr2)); in radf3_ps()
373 ti2 = VADD(cc[i + k*ido], SVMUL(taur, ci2)); in radf3_ps()
376 ch[i - 1 + (3*k + 2)*ido] = VADD(tr2, tr3); in radf3_ps()
377 ch[ic - 1 + (3*k + 1)*ido] = VSUB(tr2, tr3); in radf3_ps()
378 ch[i + (3*k + 2)*ido] = VADD(ti2, ti3); in radf3_ps()
379 ch[ic + (3*k + 1)*ido] = VSUB(ti3, ti2); in radf3_ps()
385 static void radb3_ps(int ido, int l1, const v4sf *RESTRICT cc, v4sf *RESTRICT ch, in radb3_ps() argument
394 tr2 = cc[ido-1 + (3*k + 1)*ido]; tr2 = VADD(tr2,tr2); in radb3_ps()
395 cr2 = VMADD(LD_PS1(taur), tr2, cc[3*k*ido]); in radb3_ps()
396 ch[k*ido] = VADD(cc[3*k*ido], tr2); in radb3_ps()
397 ci3 = SVMUL(taui_2, cc[(3*k + 2)*ido]); in radb3_ps()
398 ch[(k + l1)*ido] = VSUB(cr2, ci3); in radb3_ps()
399 ch[(k + 2*l1)*ido] = VADD(cr2, ci3); in radb3_ps()
401 if (ido == 1) return; in radb3_ps()
403 for (i=2; i<ido; i+=2) { in radb3_ps()
404 ic = ido - i; in radb3_ps()
405 tr2 = VADD(cc[i - 1 + (3*k + 2)*ido], cc[ic - 1 + (3*k + 1)*ido]); in radb3_ps()
406 cr2 = VMADD(LD_PS1(taur), tr2, cc[i - 1 + 3*k*ido]); in radb3_ps()
407 ch[i - 1 + k*ido] = VADD(cc[i - 1 + 3*k*ido], tr2); in radb3_ps()
408 ti2 = VSUB(cc[i + (3*k + 2)*ido], cc[ic + (3*k + 1)*ido]); in radb3_ps()
409 ci2 = VMADD(LD_PS1(taur), ti2, cc[i + 3*k*ido]); in radb3_ps()
410 ch[i + k*ido] = VADD(cc[i + 3*k*ido], ti2); in radb3_ps()
411 cr3 = SVMUL(taui, VSUB(cc[i - 1 + (3*k + 2)*ido], cc[ic - 1 + (3*k + 1)*ido])); in radb3_ps()
412 ci3 = SVMUL(taui, VADD(cc[i + (3*k + 2)*ido], cc[ic + (3*k + 1)*ido])); in radb3_ps()
418 ch[i - 1 + (k + l1)*ido] = dr2; in radb3_ps()
419 ch[i + (k + l1)*ido] = di2; in radb3_ps()
421 ch[i - 1 + (k + 2*l1)*ido] = dr3; in radb3_ps()
422 ch[i + (k + 2*l1)*ido] = di3; in radb3_ps()
427 static NEVER_INLINE(void) radf4_ps(int ido, int l1, const v4sf *RESTRICT cc, v4sf * RESTRICT ch, in radf4_ps() argument
431 int i, k, l1ido = l1*ido; in radf4_ps()
441 ch[2*ido-1] = VSUB(a0, a2); in radf4_ps()
442 ch[2*ido ] = VSUB(a3, a1); in radf4_ps()
444 ch[4*ido-1] = VSUB(tr2, tr1); in radf4_ps()
445 cc += ido; ch += 4*ido; in radf4_ps()
449 if (ido < 2) return; in radf4_ps()
450 if (ido != 2) { in radf4_ps()
451 for (k = 0; k < l1ido; k += ido) { in radf4_ps()
453 for (i=2; i<ido; i += 2, pc += 2) { in radf4_ps()
454 int ic = ido - i; in radf4_ps()
483 ch[ic - 1 + 4*k + 3*ido] = VSUB(tr2,tr1); /* at this point tr1 and tr2 can be disposed */ in radf4_ps()
486 ch[i - 1 + 4*k + 2*ido] = VADD(ti4,tr3); in radf4_ps()
487 ch[ic - 1 + 4*k + 1*ido] = VSUB(tr3,ti4); /* dispose tr3, ti4 */ in radf4_ps()
491 ch[ic + 4*k + 3*ido] = VSUB(ti1, ti2); in radf4_ps()
492 ch[i + 4*k + 2*ido] = VADD(tr4, ti3); in radf4_ps()
493 ch[ic + 4*k + 1*ido] = VSUB(tr4, ti3); in radf4_ps()
496 if (ido % 2 == 1) return; in radf4_ps()
498 for (k=0; k<l1ido; k += ido) { in radf4_ps()
499 v4sf a = cc[ido-1 + k + l1ido], b = cc[ido-1 + k + 3*l1ido]; in radf4_ps()
500 v4sf c = cc[ido-1 + k], d = cc[ido-1 + k + 2*l1ido]; in radf4_ps()
503 ch[ido-1 + 4*k] = VADD(tr1, c); in radf4_ps()
504 ch[ido-1 + 4*k + 2*ido] = VSUB(c, tr1); in radf4_ps()
505 ch[4*k + 1*ido] = VSUB(ti1, d); in radf4_ps()
506 ch[4*k + 3*ido] = VADD(ti1, d); in radf4_ps()
511 static NEVER_INLINE(void) radb4_ps(int ido, int l1, const v4sf * RESTRICT cc, v4sf * RESTRICT ch, in radb4_ps() argument
516 int i, k, l1ido = l1*ido; in radb4_ps()
522 v4sf a = cc[0], b = cc[4*ido-1]; in radb4_ps()
523 v4sf c = cc[2*ido], d = cc[2*ido-1]; in radb4_ps()
533 cc += 4*ido; ch += ido; in radb4_ps()
537 if (ido < 2) return; in radb4_ps()
538 if (ido != 2) { in radb4_ps()
539 for (k = 0; k < l1ido; k += ido) { in radb4_ps()
542 for (i = 2; i < ido; i += 2) { in radb4_ps()
544 tr1 = VSUB(pc[i], pc[4*ido - i]); in radb4_ps()
545 tr2 = VADD(pc[i], pc[4*ido - i]); in radb4_ps()
546 ti4 = VSUB(pc[2*ido + i], pc[2*ido - i]); in radb4_ps()
547 tr3 = VADD(pc[2*ido + i], pc[2*ido - i]); in radb4_ps()
551 ti3 = VSUB(pc[2*ido + i + 1], pc[2*ido - i + 1]); in radb4_ps()
552 tr4 = VADD(pc[2*ido + i + 1], pc[2*ido - i + 1]); in radb4_ps()
556 ti1 = VADD(pc[i + 1], pc[4*ido - i + 1]); in radb4_ps()
557 ti2 = VSUB(pc[i + 1], pc[4*ido - i + 1]); in radb4_ps()
574 if (ido % 2 == 1) return; in radb4_ps()
576 for (k=0; k < l1ido; k+=ido) { in radb4_ps()
577 int i0 = 4*k + ido; in radb4_ps()
578 v4sf c = cc[i0-1], d = cc[i0 + 2*ido-1]; in radb4_ps()
579 v4sf a = cc[i0+0], b = cc[i0 + 2*ido+0]; in radb4_ps()
584 ch[ido-1 + k + 0*l1ido] = VADD(tr2,tr2); in radb4_ps()
585 ch[ido-1 + k + 1*l1ido] = SVMUL(minus_sqrt2, VSUB(ti1, tr1)); in radb4_ps()
586 ch[ido-1 + k + 2*l1ido] = VADD(ti2, ti2); in radb4_ps()
587 ch[ido-1 + k + 3*l1ido] = SVMUL(minus_sqrt2, VADD(ti1, tr1)); in radb4_ps()
591 static void radf5_ps(int ido, int l1, const v4sf * RESTRICT cc, v4sf * RESTRICT ch, in radf5_ps() argument
609 #define cc_ref(a_1,a_2,a_3) cc[((a_3)*l1 + (a_2))*ido + a_1] in radf5_ps()
610 #define ch_ref(a_1,a_2,a_3) ch[((a_3)*5 + (a_2))*ido + a_1] in radf5_ps()
613 ch_offset = 1 + ido * 6; in radf5_ps()
615 cc_offset = 1 + ido * (1 + l1); in radf5_ps()
625 ch_ref(ido, 2, k) = VADD(cc_ref(1, k, 1), VADD(SVMUL(tr11, cr2), SVMUL(tr12, cr3))); in radf5_ps()
627 ch_ref(ido, 4, k) = VADD(cc_ref(1, k, 1), VADD(SVMUL(tr12, cr2), SVMUL(tr11, cr3))); in radf5_ps()
631 if (ido == 1) { in radf5_ps()
634 idp2 = ido + 2; in radf5_ps()
636 for (i = 3; i <= ido; i += 2) { in radf5_ps()
678 static void radb5_ps(int ido, int l1, const v4sf *RESTRICT cc, v4sf *RESTRICT ch, in radb5_ps() argument
694 #define cc_ref(a_1,a_2,a_3) cc[((a_3)*5 + (a_2))*ido + a_1] in radb5_ps()
695 #define ch_ref(a_1,a_2,a_3) ch[((a_3)*l1 + (a_2))*ido + a_1] in radb5_ps()
698 ch_offset = 1 + ido * (1 + l1); in radb5_ps()
700 cc_offset = 1 + ido * 6; in radb5_ps()
707 tr2 = VADD(cc_ref(ido, 2, k), cc_ref(ido, 2, k)); in radb5_ps()
708 tr3 = VADD(cc_ref(ido, 4, k), cc_ref(ido, 4, k)); in radb5_ps()
719 if (ido == 1) { in radb5_ps()
722 idp2 = ido + 2; in radb5_ps()
724 for (i = 3; i <= ido; i += 2) { in radb5_ps()
779 int ido = n / l2; in rfftf1_ps() local
780 iw -= (ip - 1)*ido; in rfftf1_ps()
783 int ix2 = iw + ido; in rfftf1_ps()
784 int ix3 = ix2 + ido; in rfftf1_ps()
785 int ix4 = ix3 + ido; in rfftf1_ps()
786 radf5_ps(ido, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4]); in rfftf1_ps()
789 int ix2 = iw + ido; in rfftf1_ps()
790 int ix3 = ix2 + ido; in rfftf1_ps()
791 radf4_ps(ido, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3]); in rfftf1_ps()
794 int ix2 = iw + ido; in rfftf1_ps()
795 radf3_ps(ido, l1, in, out, &wa[iw], &wa[ix2]); in rfftf1_ps()
798 radf2_ps(ido, l1, in, out, &wa[iw]); in rfftf1_ps()
825 int ido = n / l2; in rfftb1_ps() local
828 int ix2 = iw + ido; in rfftb1_ps()
829 int ix3 = ix2 + ido; in rfftb1_ps()
830 int ix4 = ix3 + ido; in rfftb1_ps()
831 radb5_ps(ido, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4]); in rfftb1_ps()
834 int ix2 = iw + ido; in rfftb1_ps()
835 int ix3 = ix2 + ido; in rfftb1_ps()
836 radb4_ps(ido, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3]); in rfftb1_ps()
839 int ix2 = iw + ido; in rfftb1_ps()
840 radb3_ps(ido, l1, in, out, &wa[iw], &wa[ix2]); in rfftb1_ps()
843 radb2_ps(ido, l1, in, out, &wa[iw]); in rfftb1_ps()
850 iw += (ip - 1)*ido; in rfftb1_ps()
902 int ido = n / l2; in rffti1_ps() local
909 for (ii = 3; ii <= ido; ii += 2) { in rffti1_ps()
915 is += ido; in rffti1_ps()
934 int ido = n / l2; in cffti1_ps() local
935 int idot = ido + ido + 2; in cffti1_ps()
970 int ido = n / l2; in cfftf1_ps() local
971 int idot = ido + ido; in cfftf1_ps()