1 /********************************************************************
3 * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
9 * by the XIPHOPHORUS Company http://www.xiph.org/ *
11 ********************************************************************
13 function: *unnormalized* fft transform
14 last mod: $Id: smallft.c,v 1.17 2002/07/11 06:40:50 xiphmont Exp $
16 ********************************************************************/
18 /* FFT implementation from OggSquish, minus cosine transforms,
19 * minus all but radix 2/4 case. In Vorbis we only need this
22 * To do more than just power-of-two sized vectors, see the full
23 * version I wrote for NetLib.
25 * Note that the packing is a little strange; rather than the FFT r/i
26 * packing following R_0, I_n, R_1, I_1, R_2, I_2 ... R_n-1, I_n-1,
27 * it follows R_0, R_1, I_1, R_2, I_2 ... R_n-1, I_n-1, I_n like the
37 static void drfti1(int n, float *wa, int *ifac){
38 static int ntryh[4] = { 4,2,3,5 };
39 static float tpi = 6.28318530717958648f;
40 float arg,argh,argld,fi;
43 int ld, ii, ip, is, nq, nr;
83 for (k1=0;k1<nfm1;k1++){
95 for (ii=2;ii<ido;ii+=2){
107 static void fdrffti(int n, float *wsave, int *ifac){
110 drfti1(n, wsave+n, ifac);
113 static void dradf2(int ido,int l1,float *cc,float *ch,float *wa1){
116 int t0,t1,t2,t3,t4,t5,t6;
122 ch[t1<<1]=cc[t1]+cc[t2];
123 ch[(t1<<1)+t3-1]=cc[t1]-cc[t2];
143 tr2=wa1[i-2]*cc[t3-1]+wa1[i-1]*cc[t3];
144 ti2=wa1[i-2]*cc[t3]-wa1[i-1]*cc[t3-1];
147 ch[t6-1]=cc[t5-1]+tr2;
148 ch[t4-1]=cc[t5-1]-tr2;
168 static void dradf4(int ido,int l1,float *cc,float *ch,float *wa1,
169 float *wa2,float *wa3){
170 static float hsqt2 = .70710678118654752f;
171 int i,k,t0,t1,t2,t3,t4,t5,t6;
172 float ci2,ci3,ci4,cr2,cr3,cr4,ti1,ti2,ti3,ti4,tr1,tr2,tr3,tr4;
184 ch[t5=t3<<2]=tr1+tr2;
185 ch[(ido<<2)+t5-1]=tr2-tr1;
186 ch[(t5+=(ido<<1))-1]=cc[t3]-cc[t4];
187 ch[t5]=cc[t2]-cc[t1];
210 cr2=wa1[i-2]*cc[t3-1]+wa1[i-1]*cc[t3];
211 ci2=wa1[i-2]*cc[t3]-wa1[i-1]*cc[t3-1];
213 cr3=wa2[i-2]*cc[t3-1]+wa2[i-1]*cc[t3];
214 ci3=wa2[i-2]*cc[t3]-wa2[i-1]*cc[t3-1];
216 cr4=wa3[i-2]*cc[t3-1]+wa3[i-1]*cc[t3];
217 ci4=wa3[i-2]*cc[t3]-wa3[i-1]*cc[t3-1];
247 t2=(t1=t0+ido-1)+(t0<<1);
254 ti1=-hsqt2*(cc[t1]+cc[t2]);
255 tr1=hsqt2*(cc[t1]-cc[t2]);
257 ch[t4-1]=tr1+cc[t6-1];
258 ch[t4+t5-1]=cc[t6-1]-tr1;
260 ch[t4]=ti1-cc[t1+t0];
261 ch[t4+t5]=ti1+cc[t1+t0];
270 static void dradfg(int ido,int ip,int l1,int idl1,float *cc,float *c1,
271 float *c2,float *ch,float *ch2,float *wa){
273 static float tpi=6.283185307179586f;
274 int idij,ipph,i,j,k,l,ic,ik,is;
275 int t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10;
276 float dc2,ai1,ai2,ar1,ar2,ds2;
278 float dcp,arg,dsp,ar1h,ar2h;
292 for(ik=0;ik<idl1;ik++)ch2[ik]=c2[ik];
318 ch[t3-1]=wa[idij-1]*c1[t3-1]+wa[idij]*c1[t3];
319 ch[t3]=wa[idij-1]*c1[t3]-wa[idij]*c1[t3-1];
335 ch[t3-1]=wa[idij-1]*c1[t3-1]+wa[idij]*c1[t3];
336 ch[t3]=wa[idij-1]*c1[t3]-wa[idij]*c1[t3-1];
359 c1[t5-1]=ch[t5-1]+ch[t6-1];
360 c1[t6-1]=ch[t5]-ch[t6];
361 c1[t5]=ch[t5]+ch[t6];
362 c1[t6]=ch[t6-1]-ch[t5-1];
378 c1[t5-1]=ch[t5-1]+ch[t6-1];
379 c1[t6-1]=ch[t5]-ch[t6];
380 c1[t5]=ch[t5]+ch[t6];
381 c1[t6]=ch[t6-1]-ch[t5-1];
390 for(ik=0;ik<idl1;ik++)c2[ik]=ch2[ik];
402 c1[t3]=ch[t3]+ch[t4];
403 c1[t4]=ch[t4]-ch[t3];
415 ar1h=dcp*ar1-dsp*ai1;
423 for(ik=0;ik<idl1;ik++){
424 ch2[t4++]=c2[ik]+ar1*c2[t7++];
425 ch2[t5++]=ai1*c2[t6++];
439 ar2h=dc2*ar2-ds2*ai2;
447 for(ik=0;ik<idl1;ik++){
448 ch2[t6++]+=ar2*c2[t8++];
449 ch2[t7++]+=ai2*c2[t9++];
458 for(ik=0;ik<idl1;ik++)ch2[ik]+=c2[t2++];
468 for(i=0;i<ido;i++)cc[t4++]=ch[t3++];
529 cc[i+t7-1]=ch[i+t8-1]+ch[i+t9-1];
530 cc[ic+t6-1]=ch[i+t8-1]-ch[i+t9-1];
531 cc[i+t7]=ch[i+t8]+ch[i+t9];
532 cc[ic+t6]=ch[i+t9]-ch[i+t8];
559 cc[t7-1]=ch[t8-1]+ch[t9-1];
560 cc[t6-1]=ch[t8-1]-ch[t9-1];
561 cc[t7]=ch[t8]+ch[t9];
562 cc[t6]=ch[t9]-ch[t8];
572 static void drftf1(int n,float *c,float *ch,float *wa,int *ifac){
575 int ip,iw,ido,idl1,ix2,ix3;
582 for(k1=0;k1<nf;k1++){
596 dradf4(ido,l1,ch,c,wa+iw-1,wa+ix2-1,wa+ix3-1);
598 dradf4(ido,l1,c,ch,wa+iw-1,wa+ix2-1,wa+ix3-1);
605 dradf2(ido,l1,c,ch,wa+iw-1);
609 dradf2(ido,l1,ch,c,wa+iw-1);
616 dradfg(ido,ip,l1,idl1,c,c,c,ch,ch,wa+iw-1);
621 dradfg(ido,ip,l1,idl1,ch,ch,ch,c,c,wa+iw-1);
630 for(i=0;i<n;i++)c[i]=ch[i];
633 static void dradb2(int ido,int l1,float *cc,float *ch,float *wa1){
634 int i,k,t0,t1,t2,t3,t4,t5,t6;
643 ch[t1]=cc[t2]+cc[t3+t2];
644 ch[t1+t0]=cc[t2]-cc[t3+t2];
662 ch[t3-1]=cc[t4-1]+cc[t5-1];
663 tr2=cc[t4-1]-cc[t5-1];
664 ch[t3]=cc[t4]-cc[t5];
666 ch[t6-1]=wa1[i-2]*tr2-wa1[i-1]*ti2;
667 ch[t6]=wa1[i-2]*ti2+wa1[i-1]*tr2;
678 ch[t1]=cc[t2]+cc[t2];
679 ch[t1+t0]=-(cc[t2+1]+cc[t2+1]);
685 static void dradb3(int ido,int l1,float *cc,float *ch,float *wa1,
687 static float taur = -.5f;
688 static float taui = .8660254037844386f;
689 int i,k,t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10;
690 float ci2,ci3,di2,di3,cr2,cr3,dr2,dr3,ti2,tr2;
699 tr2=cc[t3-1]+cc[t3-1];
700 cr2=cc[t5]+(taur*tr2);
702 ci3=taui*(cc[t3]+cc[t3]);
727 tr2=cc[t5-1]+cc[t6-1];
728 cr2=cc[t7-1]+(taur*tr2);
729 ch[t8-1]=cc[t7-1]+tr2;
731 ci2=cc[t7]+(taur*ti2);
733 cr3=taui*(cc[t5-1]-cc[t6-1]);
734 ci3=taui*(cc[t5]+cc[t6]);
739 ch[t9-1]=wa1[i-2]*dr2-wa1[i-1]*di2;
740 ch[t9]=wa1[i-2]*di2+wa1[i-1]*dr2;
741 ch[t10-1]=wa2[i-2]*dr3-wa2[i-1]*di3;
742 ch[t10]=wa2[i-2]*di3+wa2[i-1]*dr3;
748 static void dradb4(int ido,int l1,float *cc,float *ch,float *wa1,
749 float *wa2,float *wa3){
750 static float sqrt2=1.414213562373095f;
751 int i,k,t0,t1,t2,t3,t4,t5,t6,t7,t8;
752 float ci2,ci3,ci4,cr2,cr3,cr4,ti1,ti2,ti3,ti4,tr1,tr2,tr3,tr4;
762 tr3=cc[t4-1]+cc[t4-1];
764 tr1=cc[t3]-cc[(t4+=t6)-1];
779 t5=(t4=(t3=(t2=t1<<2)+t6))+t6;
791 tr1=cc[t2-1]-cc[t5-1];
792 tr2=cc[t2-1]+cc[t5-1];
793 ti4=cc[t3-1]-cc[t4-1];
794 tr3=cc[t3-1]+cc[t4-1];
804 ch[(t8=t7+t0)-1]=wa1[i-2]*cr2-wa1[i-1]*ci2;
805 ch[t8]=wa1[i-2]*ci2+wa1[i-1]*cr2;
806 ch[(t8+=t0)-1]=wa2[i-2]*cr3-wa2[i-1]*ci3;
807 ch[t8]=wa2[i-2]*ci3+wa2[i-1]*cr3;
808 ch[(t8+=t0)-1]=wa3[i-2]*cr4-wa3[i-1]*ci4;
809 ch[t8]=wa3[i-2]*ci4+wa3[i-1]*cr4;
814 if(ido%2 == 1)return;
826 tr1=cc[t1-1]-cc[t4-1];
827 tr2=cc[t1-1]+cc[t4-1];
829 ch[t5+=t0]=sqrt2*(tr1-ti1);
831 ch[t5+=t0]=-sqrt2*(tr1+ti1);
839 static void dradbg(int ido,int ip,int l1,int idl1,float *cc,float *c1,
840 float *c2,float *ch,float *ch2,float *wa){
841 static float tpi=6.283185307179586f;
842 int idij,ipph,i,j,k,l,ik,is,t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,
844 float dc2,ai1,ai2,ar1,ar2,ds2;
846 float dcp,arg,dsp,ar1h,ar2h;
898 ch[t3]=cc[t6-1]+cc[t6-1];
899 ch[t4]=cc[t6]+cc[t6];
907 if (ido == 1)goto L116;
931 ch[t5-1]=cc[t9-1]+cc[t11-1];
932 ch[t6-1]=cc[t9-1]-cc[t11-1];
933 ch[t5]=cc[t9]-cc[t11];
934 ch[t6]=cc[t9]+cc[t11];
965 ch[t5-1]=cc[t11-1]+cc[t12-1];
966 ch[t6-1]=cc[t11-1]-cc[t12-1];
967 ch[t5]=cc[t11]-cc[t12];
968 ch[t6]=cc[t11]+cc[t12];
987 ar1h=dcp*ar1-dsp*ai1;
995 for(ik=0;ik<idl1;ik++){
996 c2[t4++]=ch2[t6++]+ar1*ch2[t7++];
997 c2[t5++]=ai1*ch2[t8++];
1006 for(j=2;j<ipph;j++){
1009 ar2h=dc2*ar2-ds2*ai2;
1010 ai2=dc2*ai2+ds2*ar2;
1016 for(ik=0;ik<idl1;ik++){
1017 c2[t4++]+=ar2*ch2[t11++];
1018 c2[t5++]+=ai2*ch2[t12++];
1024 for(j=1;j<ipph;j++){
1027 for(ik=0;ik<idl1;ik++)ch2[ik]+=ch2[t2++];
1032 for(j=1;j<ipph;j++){
1038 ch[t3]=c1[t3]-c1[t4];
1039 ch[t4]=c1[t3]+c1[t4];
1045 if(ido==1)goto L132;
1046 if(nbd<l1)goto L128;
1050 for(j=1;j<ipph;j++){
1058 for(i=2;i<ido;i+=2){
1061 ch[t5-1]=c1[t5-1]-c1[t6];
1062 ch[t6-1]=c1[t5-1]+c1[t6];
1063 ch[t5]=c1[t5]+c1[t6-1];
1064 ch[t6]=c1[t5]-c1[t6-1];
1075 for(j=1;j<ipph;j++){
1080 for(i=2;i<ido;i+=2){
1086 ch[t5-1]=c1[t5-1]-c1[t6];
1087 ch[t6-1]=c1[t5-1]+c1[t6];
1088 ch[t5]=c1[t5]+c1[t6-1];
1089 ch[t6]=c1[t5]-c1[t6-1];
1099 for(ik=0;ik<idl1;ik++)c2[ik]=ch2[ik];
1110 if(nbd>l1)goto L139;
1119 for(i=2;i<ido;i+=2){
1124 c1[t3-1]=wa[idij-1]*ch[t3-1]-wa[idij]*ch[t3];
1125 c1[t3]=wa[idij-1]*ch[t3]+wa[idij]*ch[t3-1];
1142 for(i=2;i<ido;i+=2){
1145 c1[t3-1]=wa[idij-1]*ch[t3-1]-wa[idij]*ch[t3];
1146 c1[t3]=wa[idij-1]*ch[t3]+wa[idij]*ch[t3-1];
1153 static void drftb1(int n, float *c, float *ch, float *wa, int *ifac){
1156 int nf,ip,iw,ix2,ix3,ido,idl1;
1163 for(k1=0;k1<nf;k1++){
1173 dradb4(ido,l1,ch,c,wa+iw-1,wa+ix2-1,wa+ix3-1);
1175 dradb4(ido,l1,c,ch,wa+iw-1,wa+ix2-1,wa+ix3-1);
1183 dradb2(ido,l1,ch,c,wa+iw-1);
1185 dradb2(ido,l1,c,ch,wa+iw-1);
1194 dradb3(ido,l1,ch,c,wa+iw-1,wa+ix2-1);
1196 dradb3(ido,l1,c,ch,wa+iw-1,wa+ix2-1);
1201 /* The radix five case can be translated later..... */
1202 /* if(ip!=5)goto L112;
1208 dradb5(ido,l1,ch,c,wa+iw-1,wa+ix2-1,wa+ix3-1,wa+ix4-1);
1210 dradb5(ido,l1,c,ch,wa+iw-1,wa+ix2-1,wa+ix3-1,wa+ix4-1);
1216 dradbg(ido,ip,l1,idl1,ch,ch,ch,c,c,wa+iw-1);
1218 dradbg(ido,ip,l1,idl1,c,c,c,ch,ch,wa+iw-1);
1228 for(i=0;i<n;i++)c[i]=ch[i];
1231 void drft_forward(drft_lookup *l,float *data){
1233 drftf1(l->n,data,l->trigcache,l->trigcache+l->n,l->splitcache);
1236 void drft_backward(drft_lookup *l,float *data){
1238 drftb1(l->n,data,l->trigcache,l->trigcache+l->n,l->splitcache);
1241 void drft_init(drft_lookup *l,int n){
1243 l->trigcache=_ogg_calloc(3*n,sizeof(*l->trigcache));
1244 l->splitcache=_ogg_calloc(32,sizeof(*l->splitcache));
1245 fdrffti(n, l->trigcache, l->splitcache);
1248 void drft_clear(drft_lookup *l){
1250 if(l->trigcache)_ogg_free(l->trigcache);
1251 if(l->splitcache)_ogg_free(l->splitcache);
1252 memset(l,0,sizeof(*l));