402 lines
11 KiB
C
402 lines
11 KiB
C
/*
|
|
* Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
|
|
* This file is part of KISS FFT - https://github.com/mborgerding/kissfft
|
|
*
|
|
* SPDX-License-Identifier: BSD-3-Clause
|
|
* See COPYING file for more information.
|
|
*/
|
|
|
|
|
|
#include "_kiss_fft_guts.h"
|
|
/* The guts header contains all the multiplication and addition macros that are defined for
|
|
fixed or floating point complex numbers. It also delares the kf_ internal functions.
|
|
*/
|
|
|
|
static void kf_bfly2(
|
|
kiss_fft_cpx * Fout,
|
|
const size_t fstride,
|
|
const kiss_fft_cfg st,
|
|
int m
|
|
)
|
|
{
|
|
kiss_fft_cpx * Fout2;
|
|
kiss_fft_cpx * tw1 = st->twiddles;
|
|
kiss_fft_cpx t;
|
|
Fout2 = Fout + m;
|
|
do{
|
|
C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
|
|
|
|
C_MUL (t, *Fout2 , *tw1);
|
|
tw1 += fstride;
|
|
C_SUB( *Fout2 , *Fout , t );
|
|
C_ADDTO( *Fout , t );
|
|
++Fout2;
|
|
++Fout;
|
|
}while (--m);
|
|
}
|
|
|
|
static void kf_bfly4(
|
|
kiss_fft_cpx * Fout,
|
|
const size_t fstride,
|
|
const kiss_fft_cfg st,
|
|
const size_t m
|
|
)
|
|
{
|
|
kiss_fft_cpx *tw1,*tw2,*tw3;
|
|
kiss_fft_cpx scratch[6];
|
|
size_t k=m;
|
|
const size_t m2=2*m;
|
|
const size_t m3=3*m;
|
|
|
|
|
|
tw3 = tw2 = tw1 = st->twiddles;
|
|
|
|
do {
|
|
C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
|
|
|
|
C_MUL(scratch[0],Fout[m] , *tw1 );
|
|
C_MUL(scratch[1],Fout[m2] , *tw2 );
|
|
C_MUL(scratch[2],Fout[m3] , *tw3 );
|
|
|
|
C_SUB( scratch[5] , *Fout, scratch[1] );
|
|
C_ADDTO(*Fout, scratch[1]);
|
|
C_ADD( scratch[3] , scratch[0] , scratch[2] );
|
|
C_SUB( scratch[4] , scratch[0] , scratch[2] );
|
|
C_SUB( Fout[m2], *Fout, scratch[3] );
|
|
tw1 += fstride;
|
|
tw2 += fstride*2;
|
|
tw3 += fstride*3;
|
|
C_ADDTO( *Fout , scratch[3] );
|
|
|
|
if(st->inverse) {
|
|
Fout[m].r = scratch[5].r - scratch[4].i;
|
|
Fout[m].i = scratch[5].i + scratch[4].r;
|
|
Fout[m3].r = scratch[5].r + scratch[4].i;
|
|
Fout[m3].i = scratch[5].i - scratch[4].r;
|
|
}else{
|
|
Fout[m].r = scratch[5].r + scratch[4].i;
|
|
Fout[m].i = scratch[5].i - scratch[4].r;
|
|
Fout[m3].r = scratch[5].r - scratch[4].i;
|
|
Fout[m3].i = scratch[5].i + scratch[4].r;
|
|
}
|
|
++Fout;
|
|
}while(--k);
|
|
}
|
|
|
|
static void kf_bfly3(
|
|
kiss_fft_cpx * Fout,
|
|
const size_t fstride,
|
|
const kiss_fft_cfg st,
|
|
size_t m
|
|
)
|
|
{
|
|
size_t k=m;
|
|
const size_t m2 = 2*m;
|
|
kiss_fft_cpx *tw1,*tw2;
|
|
kiss_fft_cpx scratch[5];
|
|
kiss_fft_cpx epi3;
|
|
epi3 = st->twiddles[fstride*m];
|
|
|
|
tw1=tw2=st->twiddles;
|
|
|
|
do{
|
|
C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
|
|
|
|
C_MUL(scratch[1],Fout[m] , *tw1);
|
|
C_MUL(scratch[2],Fout[m2] , *tw2);
|
|
|
|
C_ADD(scratch[3],scratch[1],scratch[2]);
|
|
C_SUB(scratch[0],scratch[1],scratch[2]);
|
|
tw1 += fstride;
|
|
tw2 += fstride*2;
|
|
|
|
Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
|
|
Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
|
|
|
|
C_MULBYSCALAR( scratch[0] , epi3.i );
|
|
|
|
C_ADDTO(*Fout,scratch[3]);
|
|
|
|
Fout[m2].r = Fout[m].r + scratch[0].i;
|
|
Fout[m2].i = Fout[m].i - scratch[0].r;
|
|
|
|
Fout[m].r -= scratch[0].i;
|
|
Fout[m].i += scratch[0].r;
|
|
|
|
++Fout;
|
|
}while(--k);
|
|
}
|
|
|
|
static void kf_bfly5(
|
|
kiss_fft_cpx * Fout,
|
|
const size_t fstride,
|
|
const kiss_fft_cfg st,
|
|
int m
|
|
)
|
|
{
|
|
kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
|
|
int u;
|
|
kiss_fft_cpx scratch[13];
|
|
kiss_fft_cpx * twiddles = st->twiddles;
|
|
kiss_fft_cpx *tw;
|
|
kiss_fft_cpx ya,yb;
|
|
ya = twiddles[fstride*m];
|
|
yb = twiddles[fstride*2*m];
|
|
|
|
Fout0=Fout;
|
|
Fout1=Fout0+m;
|
|
Fout2=Fout0+2*m;
|
|
Fout3=Fout0+3*m;
|
|
Fout4=Fout0+4*m;
|
|
|
|
tw=st->twiddles;
|
|
for ( u=0; u<m; ++u ) {
|
|
C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
|
|
scratch[0] = *Fout0;
|
|
|
|
C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
|
|
C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
|
|
C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
|
|
C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
|
|
|
|
C_ADD( scratch[7],scratch[1],scratch[4]);
|
|
C_SUB( scratch[10],scratch[1],scratch[4]);
|
|
C_ADD( scratch[8],scratch[2],scratch[3]);
|
|
C_SUB( scratch[9],scratch[2],scratch[3]);
|
|
|
|
Fout0->r += scratch[7].r + scratch[8].r;
|
|
Fout0->i += scratch[7].i + scratch[8].i;
|
|
|
|
scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
|
|
scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
|
|
|
|
scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
|
|
scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
|
|
|
|
C_SUB(*Fout1,scratch[5],scratch[6]);
|
|
C_ADD(*Fout4,scratch[5],scratch[6]);
|
|
|
|
scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
|
|
scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
|
|
scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
|
|
scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
|
|
|
|
C_ADD(*Fout2,scratch[11],scratch[12]);
|
|
C_SUB(*Fout3,scratch[11],scratch[12]);
|
|
|
|
++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
|
|
}
|
|
}
|
|
|
|
/* perform the butterfly for one stage of a mixed radix FFT */
|
|
static void kf_bfly_generic(
|
|
kiss_fft_cpx * Fout,
|
|
const size_t fstride,
|
|
const kiss_fft_cfg st,
|
|
int m,
|
|
int p
|
|
)
|
|
{
|
|
int u,k,q1,q;
|
|
kiss_fft_cpx * twiddles = st->twiddles;
|
|
kiss_fft_cpx t;
|
|
int Norig = st->nfft;
|
|
|
|
kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p);
|
|
|
|
for ( u=0; u<m; ++u ) {
|
|
k=u;
|
|
for ( q1=0 ; q1<p ; ++q1 ) {
|
|
scratch[q1] = Fout[ k ];
|
|
C_FIXDIV(scratch[q1],p);
|
|
k += m;
|
|
}
|
|
|
|
k=u;
|
|
for ( q1=0 ; q1<p ; ++q1 ) {
|
|
int twidx=0;
|
|
Fout[ k ] = scratch[0];
|
|
for (q=1;q<p;++q ) {
|
|
twidx += fstride * k;
|
|
if (twidx>=Norig) twidx-=Norig;
|
|
C_MUL(t,scratch[q] , twiddles[twidx] );
|
|
C_ADDTO( Fout[ k ] ,t);
|
|
}
|
|
k += m;
|
|
}
|
|
}
|
|
KISS_FFT_TMP_FREE(scratch);
|
|
}
|
|
|
|
static
|
|
void kf_work(
|
|
kiss_fft_cpx * Fout,
|
|
const kiss_fft_cpx * f,
|
|
const size_t fstride,
|
|
int in_stride,
|
|
int * factors,
|
|
const kiss_fft_cfg st
|
|
)
|
|
{
|
|
kiss_fft_cpx * Fout_beg=Fout;
|
|
const int p=*factors++; /* the radix */
|
|
const int m=*factors++; /* stage's fft length/p */
|
|
const kiss_fft_cpx * Fout_end = Fout + p*m;
|
|
|
|
#ifdef _OPENMP
|
|
// use openmp extensions at the
|
|
// top-level (not recursive)
|
|
if (fstride==1 && p<=5)
|
|
{
|
|
int k;
|
|
|
|
// execute the p different work units in different threads
|
|
# pragma omp parallel for
|
|
for (k=0;k<p;++k)
|
|
kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
|
|
// all threads have joined by this point
|
|
|
|
switch (p) {
|
|
case 2: kf_bfly2(Fout,fstride,st,m); break;
|
|
case 3: kf_bfly3(Fout,fstride,st,m); break;
|
|
case 4: kf_bfly4(Fout,fstride,st,m); break;
|
|
case 5: kf_bfly5(Fout,fstride,st,m); break;
|
|
default: kf_bfly_generic(Fout,fstride,st,m,p); break;
|
|
}
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
if (m==1) {
|
|
do{
|
|
*Fout = *f;
|
|
f += fstride*in_stride;
|
|
}while(++Fout != Fout_end );
|
|
}else{
|
|
do{
|
|
// recursive call:
|
|
// DFT of size m*p performed by doing
|
|
// p instances of smaller DFTs of size m,
|
|
// each one takes a decimated version of the input
|
|
kf_work( Fout , f, fstride*p, in_stride, factors,st);
|
|
f += fstride*in_stride;
|
|
}while( (Fout += m) != Fout_end );
|
|
}
|
|
|
|
Fout=Fout_beg;
|
|
|
|
// recombine the p smaller DFTs
|
|
switch (p) {
|
|
case 2: kf_bfly2(Fout,fstride,st,m); break;
|
|
case 3: kf_bfly3(Fout,fstride,st,m); break;
|
|
case 4: kf_bfly4(Fout,fstride,st,m); break;
|
|
case 5: kf_bfly5(Fout,fstride,st,m); break;
|
|
default: kf_bfly_generic(Fout,fstride,st,m,p); break;
|
|
}
|
|
}
|
|
|
|
/* facbuf is populated by p1,m1,p2,m2, ...
|
|
where
|
|
p[i] * m[i] = m[i-1]
|
|
m0 = n */
|
|
static
|
|
void kf_factor(int n,int * facbuf)
|
|
{
|
|
int p=4;
|
|
double floor_sqrt;
|
|
floor_sqrt = floor( sqrt((double)n) );
|
|
|
|
/*factor out powers of 4, powers of 2, then any remaining primes */
|
|
do {
|
|
while (n % p) {
|
|
switch (p) {
|
|
case 4: p = 2; break;
|
|
case 2: p = 3; break;
|
|
default: p += 2; break;
|
|
}
|
|
if (p > floor_sqrt)
|
|
p = n; /* no more factors, skip to end */
|
|
}
|
|
n /= p;
|
|
*facbuf++ = p;
|
|
*facbuf++ = n;
|
|
} while (n > 1);
|
|
}
|
|
|
|
/*
|
|
*
|
|
* User-callable function to allocate all necessary storage space for the fft.
|
|
*
|
|
* The return value is a contiguous block of memory, allocated with malloc. As such,
|
|
* It can be freed with free(), rather than a kiss_fft-specific function.
|
|
* */
|
|
kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
|
|
{
|
|
kiss_fft_cfg st=NULL;
|
|
size_t memneeded = sizeof(struct kiss_fft_state)
|
|
+ sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
|
|
|
|
if ( lenmem==NULL ) {
|
|
st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
|
|
}else{
|
|
if (mem != NULL && *lenmem >= memneeded)
|
|
st = (kiss_fft_cfg)mem;
|
|
*lenmem = memneeded;
|
|
}
|
|
if (st) {
|
|
int i;
|
|
st->nfft=nfft;
|
|
st->inverse = inverse_fft;
|
|
|
|
for (i=0;i<nfft;++i) {
|
|
const double pi=3.141592653589793238462643383279502884197169399375105820974944;
|
|
double phase = -2*pi*i / nfft;
|
|
if (st->inverse)
|
|
phase *= -1;
|
|
kf_cexp(st->twiddles+i, phase );
|
|
}
|
|
|
|
kf_factor(nfft,st->factors);
|
|
}
|
|
return st;
|
|
}
|
|
|
|
|
|
void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
|
|
{
|
|
if (fin == fout) {
|
|
//NOTE: this is not really an in-place FFT algorithm.
|
|
//It just performs an out-of-place FFT into a temp buffer
|
|
kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
|
|
kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
|
|
memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
|
|
KISS_FFT_TMP_FREE(tmpbuf);
|
|
}else{
|
|
kf_work( fout, fin, 1,in_stride, st->factors,st );
|
|
}
|
|
}
|
|
|
|
void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
|
|
{
|
|
kiss_fft_stride(cfg,fin,fout,1);
|
|
}
|
|
|
|
|
|
void kiss_fft_cleanup(void)
|
|
{
|
|
// nothing needed any more
|
|
}
|
|
|
|
int kiss_fft_next_fast_size(int n)
|
|
{
|
|
while(1) {
|
|
int m=n;
|
|
while ( (m%2) == 0 ) m/=2;
|
|
while ( (m%3) == 0 ) m/=3;
|
|
while ( (m%5) == 0 ) m/=5;
|
|
if (m<=1)
|
|
break; /* n is completely factorable by twos, threes, and fives */
|
|
n++;
|
|
}
|
|
return n;
|
|
}
|