Merged origin/master into feature/digitalmods

2017-04-18 11:34:13 +02:00 · 2017-04-18 11:34:13 +02:00 · 2c3fa2fde9
commit 2c3fa2fde9
parent 9c5b050560 47333b6690
6 changed files with 2213 additions and 1554 deletions
--- a/README.md
+++ b/README.md
@ -554,11 +554,28 @@ The output sample rate will be `interpolation / decimation × input_sample_rate`

 Syntax: 

-	csdr fractional_decimator_ff <decimation_rate> [transition_bw [window]]
+	csdr fractional_decimator_ff <decimation_rate> [num_poly_points ( [transition_bw [window]] | --prefilter )]

 It can decimate by a floating point ratio.

-`transition_bw` and `window` are the parameters of the filter.
+It uses Lagrance interpolation, where `num_poly_points` (12 by default) input samples are taken into consideration while calculating one output sample. 
+
+It can filter the signal with an anti-aliasing FIR filter before applying the Lagrange interpolation. This filter is inactive by default, but can be activated by:
+
+* passing only the `transition_bw`, or both the `transition_bw` and the `window` parameters of the filter,
+* using the `--prefilter` switch after `num_poly_points` to switch this filter on with the default parameters.
+
+----
+
+### [old_fractional_decimator_ff](#old_fractional_decimator_ff)
+
+Syntax: 
+
+	csdr old_fractional_decimator_ff <decimation_rate> [transition_bw [window]]
+
+This is the deprecated, old algorithm to decimate by a floating point ratio, superseded by `fractional_decimator_ff`. 
+
+(It uses linear interpolation, and its filter cuts at 59% of the passband.)

 ----

--- a/csdr.c
+++ b/csdr.c
@ -99,7 +99,8 @@ char usage[]=
 "    agc_ff [hang_time [reference [attack_rate [decay_rate [max_gain [attack_wait [filter_alpha]]]]]]]\n"
 "    fastagc_ff [block_size [reference]]\n"
 "    rational_resampler_ff <interpolation> <decimation> [transition_bw [window]]\n"
-"    fractional_decimator_ff <decimation_rate> [transition_bw [window]]\n"
+"    old_fractional_decimator_ff <decimation_rate> [transition_bw [window]]\n"
+"    fractional_decimator_ff <decimation_rate> [num_poly_points ( [transition_bw [window]] | --prefilter )]\n"
 "    fft_cc <fft_size> <out_of_every_n_samples> [window [--octave] [--benchmark]]\n"
 "    logpower_cf [add_db]\n"
 "    fft_benchmark <fft_size> <fft_cycles> [--benchmark]\n"
@ -1093,7 +1094,7 @@ int main(int argc, char *argv[])
 		padded_taps_length = taps_length+(NEON_ALIGNMENT/4)-1 - ((taps_length+(NEON_ALIGNMENT/4)-1)%(NEON_ALIGNMENT/4));
 		fprintf(stderr,"padded_taps_length = %d\n", padded_taps_length);

-		taps = (float*) (float*)malloc(padded_taps_length+NEON_ALIGNMENT);
+		taps = (float*) (float*)malloc((padded_taps_length+NEON_ALIGNMENT)*sizeof(float));
 		fprintf(stderr,"taps = %x\n", taps);
 		taps =  (float*)((((unsigned)taps)+NEON_ALIGNMENT-1) & ~(NEON_ALIGNMENT-1));
 		fprintf(stderr,"taps = %x\n", taps);
@ -1414,6 +1415,68 @@ int main(int argc, char *argv[])
 		float rate;
 		sscanf(argv[2],"%g",&rate);

+		int num_poly_points = 12;
+		if(argc>=4) sscanf(argv[3],"%d",&num_poly_points);
+		if(num_poly_points&1) return badsyntax("num_poly_points should be even");
+		if(num_poly_points<2) return badsyntax("num_poly_points should be >= 2");
+
+		int use_prefilter = 0;
+		float transition_bw=0.03;
+		window_t window = WINDOW_DEFAULT;
+		if(argc>=5)
+		{
+			if(!strcmp(argv[4], "--prefilter")) 
+			{
+				fprintf(stderr, "fractional_decimator_ff: using prefilter with default values\n");
+				use_prefilter = 1;
+			}
+			else 
+			{
+				sscanf(argv[4],"%g",&transition_bw);
+				if(argc>=6) window = firdes_get_window_from_string(argv[5]);
+			}
+		}
+		fprintf(stderr,"fractional_decimator_ff: use_prefilter = %d, num_poly_points = %d, transition_bw = %g, window = %s\n", 
+			use_prefilter, num_poly_points, transition_bw, firdes_get_string_from_window(window));
+
+		if(!initialize_buffers()) return -2;
+		sendbufsize(the_bufsize / rate);
+
+		if(rate==1) clone_(the_bufsize); //copy input to output in this special case (and stick in this function).
+
+		//Generate filter taps
+		int taps_length = 0;
+		float* taps = NULL;
+		if(use_prefilter)
+		{
+			taps_length = firdes_filter_len(transition_bw);
+			fprintf(stderr,"fractional_decimator_ff: taps_length = %d\n",taps_length);
+			taps = (float*)malloc(sizeof(float)*taps_length);
+			firdes_lowpass_f(taps, taps_length, 0.5/(rate-transition_bw), window); //0.6 const to compensate rolloff
+			//for(int=0;i<taps_length; i++) fprintf(stderr,"%g ",taps[i]);
+		}
+		else fprintf(stderr,"fractional_decimator_ff: not using taps\n");
+		fractional_decimator_ff_t d = fractional_decimator_ff_init(rate, num_poly_points, taps, taps_length); 
+		for(;;)
+		{
+			FEOF_CHECK;
+			if(d.input_processed==0) d.input_processed=the_bufsize;
+			else memcpy(input_buffer, input_buffer+d.input_processed, sizeof(float)*(the_bufsize-d.input_processed));
+			fread(input_buffer+(the_bufsize-d.input_processed), sizeof(float), d.input_processed, stdin);
+			fractional_decimator_ff(input_buffer, output_buffer, the_bufsize, &d);
+			fwrite(output_buffer, sizeof(float), d.output_size, stdout);
+			//fprintf(stderr, "os = %d, ip = %d\n", d.output_size, d.input_processed);
+			TRY_YIELD;
+		}
+	}
+
+	if(!strcmp(argv[1],"old_fractional_decimator_ff"))
+	{
+		//Process the params
+		if(argc<=2) return badsyntax("need required parameters (rate)");
+		float rate;
+		sscanf(argv[2],"%g",&rate);
+
 		float transition_bw=0.03;
 		if(argc>=4) sscanf(argv[3],"%g",&transition_bw);

@ -1422,7 +1485,7 @@ int main(int argc, char *argv[])
 		{
 			window = firdes_get_window_from_string(argv[4]);
 		}
-		else fprintf(stderr,"fractional_decimator_ff: window = %s\n",firdes_get_string_from_window(window));
+		else fprintf(stderr,"old_fractional_decimator_ff: window = %s\n",firdes_get_string_from_window(window));

 		if(!initialize_buffers()) return -2;
 		sendbufsize(the_bufsize / rate);
@ -1431,19 +1494,19 @@ int main(int argc, char *argv[])

 		//Generate filter taps
 		int taps_length = firdes_filter_len(transition_bw);
-		fprintf(stderr,"fractional_decimator_ff: taps_length = %d\n",taps_length);
+		fprintf(stderr,"old_fractional_decimator_ff: taps_length = %d\n",taps_length);
 		float* taps = (float*)malloc(sizeof(float)*taps_length);
 		firdes_lowpass_f(taps, taps_length, 0.59*0.5/(rate-transition_bw), window); //0.6 const to compensate rolloff
 		//for(int=0;i<taps_length; i++) fprintf(stderr,"%g ",taps[i]);

-		static fractional_decimator_ff_t d; //in .bss => initialized to zero
+		static old_fractional_decimator_ff_t d; //in .bss => initialized to zero
 		for(;;)
 		{
 			FEOF_CHECK;
 			if(d.input_processed==0) d.input_processed=the_bufsize;
 			else memcpy(input_buffer, input_buffer+d.input_processed, sizeof(float)*(the_bufsize-d.input_processed));
 			fread(input_buffer+(the_bufsize-d.input_processed), sizeof(float), d.input_processed, stdin);
-			d = fractional_decimator_ff(input_buffer, output_buffer, the_bufsize, rate, taps, taps_length, d);
+			d = old_fractional_decimator_ff(input_buffer, output_buffer, the_bufsize, rate, taps, taps_length, d);
 			fwrite(output_buffer, sizeof(float), d.output_size, stdout);
 			TRY_YIELD;
 		}
--- a/grc_tests/test_fractional_decimator.grc
+++ b/grc_tests/test_fractional_decimator.grc
--- a/grc_tests/test_rational_resampler.grc
+++ b/grc_tests/test_rational_resampler.grc
--- a/libcsdr.c
+++ b/libcsdr.c
@ -481,11 +481,7 @@ int fir_decimate_cc(complexf *input, complexf *output, int input_size, int decim
 	for(int i=0; i<input_size; i+=decimation) //@fir_decimate_cc: outer loop
 	{
 		if(i+taps_length>input_size) break;
-		register float acci=0;
-		register float accq=0;
-
-		register int ti=0;
-		register float* pinput=(float*)&(input[i+ti]);
+		register float* pinput=(float*)&(input[i]);
 		register float* ptaps=taps;
 		register float* ptaps_end=taps+taps_length;
 		float quad_acciq [8];
@ -498,8 +494,8 @@ q4, q5: accumulator for I branch and Q branch (will be the output)
 */

 		asm volatile(
-			"		vmov.f32 q4, #0.0\n\t" //another way to null the accumulators
-			"		vmov.f32 q5, #0.0\n\t"
+			"		veor q4, q4\n\t"
+			"		veor q5, q5\n\t"
 			"for_fdccasm: vld2.32	{q0-q1}, [%[pinput]]!\n\t" //load q0 and q1 directly from the memory address stored in pinput, with interleaving (so that we get the I samples in q0 and the Q samples in q1), also increment the memory address in pinput (hence the "!" mark) //http://community.arm.com/groups/processors/blog/2010/03/17/coding-for-neon--part-1-load-and-stores
 			"		vld1.32	{q2}, [%[ptaps]]!\n\t"
 			"		vmla.f32 q4, q0, q2\n\t" //quad_acc_i += quad_input_i * quad_taps_1 //http://stackoverflow.com/questions/3240440/how-to-use-the-multiply-and-accumulate-intrinsics-in-arm-cortex-a8 //http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0489e/CIHEJBIE.html
@ -683,7 +679,7 @@ float inline fir_one_pass_ff(float* input, float* taps, int taps_length)
 	return acc;
 }

-fractional_decimator_ff_t fractional_decimator_ff(float* input, float* output, int input_size, float rate, float *taps, int taps_length, fractional_decimator_ff_t d)
+old_fractional_decimator_ff_t old_fractional_decimator_ff(float* input, float* output, int input_size, float rate, float *taps, int taps_length, old_fractional_decimator_ff_t d)
 {
 	if(rate<=1.0) return d; //sanity check, can't decimate <=1.0
 	//This routine can handle floating point decimation rates.
@ -716,6 +712,104 @@ fractional_decimator_ff_t fractional_decimator_ff(float* input, float* output, i
 	return d;
 }

+fractional_decimator_ff_t fractional_decimator_ff_init(float rate, int num_poly_points, float* taps, int taps_length)
+{
+	fractional_decimator_ff_t d;
+	d.num_poly_points = num_poly_points&~1; //num_poly_points needs to be even!
+	d.poly_precalc_denomiator = (float*)malloc(d.num_poly_points*sizeof(float));
+	//x0..x3
+	//-1,0,1,2
+	//-(4/2)+1
+	//x0..x5
+	//-2,-1,0,1,2,3
+	d.xifirst=-(num_poly_points/2)+1, d.xilast=num_poly_points/2;
+	int id = 0; //index in poly_precalc_denomiator
+	for(int xi=d.xifirst;xi<=d.xilast;xi++)
+	{
+		d.poly_precalc_denomiator[id]=1;
+		for(int xj=d.xifirst;xj<=d.xilast;xj++)
+		{
+			if(xi!=xj) d.poly_precalc_denomiator[id] *= (xi-xj); //poly_precalc_denomiator could be integer as well. But that would later add a necessary conversion.
+		}
+		id++;
+	}
+	d.where=-d.xifirst;
+	d.coeffs_buf=(float*)malloc(d.num_poly_points*sizeof(float)); 
+	d.filtered_buf=(float*)malloc(d.num_poly_points*sizeof(float)); 
+	//d.last_inputs_circbuf = (float)malloc(d.num_poly_points*sizeof(float));
+	//d.last_inputs_startsat = 0; 
+	//d.last_inputs_samplewhere = -1;
+	//for(int i=0;i<num_poly_points; i++) d.last_inputs_circbuf[i] = 0;
+	d.rate = rate;
+	d.taps = taps;
+	d.taps_length = taps_length;
+	d.input_processed = 0;
+	return d;
+}
+
+#define DEBUG_ASSERT 1
+void fractional_decimator_ff(float* input, float* output, int input_size, fractional_decimator_ff_t* d)
+{
+	//This routine can handle floating point decimation rates.
+	//It applies polynomial interpolation to samples that are taken into consideration from a pre-filtered input.
+	//The pre-filter can be switched off by applying taps=NULL.
+	//fprintf(stderr, "drate=%f\n", d->rate);
+	if(DEBUG_ASSERT) assert(d->rate > 1.0); 
+	if(DEBUG_ASSERT) assert(d->where >= -d->xifirst);
+	int oi=0; //output index
+	int index_high; 
+#define FD_INDEX_LOW (index_high-1)
+	//we optimize to calculate ceilf(where) only once every iteration, so we do it here:
+	for(;(index_high=ceilf(d->where))+d->num_poly_points+d->taps_length<input_size;d->where+=d->rate) //@fractional_decimator_ff
+	{
+		//d->num_poly_points above is theoretically more than we could have here, but this makes the spectrum look good
+		int sxifirst = FD_INDEX_LOW + d->xifirst; 
+		int sxilast = FD_INDEX_LOW + d->xilast; 
+		if(d->taps) 
+			for(int wi=0;wi<d->num_poly_points;wi++) d->filtered_buf[wi] = fir_one_pass_ff(input+FD_INDEX_LOW+wi, d->taps, d->taps_length);
+		else
+			for(int wi=0;wi<d->num_poly_points;wi++) d->filtered_buf[wi] = *(input+FD_INDEX_LOW+wi);
+		int id=0;
+		float xwhere = d->where - FD_INDEX_LOW;
+		for(int xi=d->xifirst;xi<=d->xilast;xi++)
+		{
+			d->coeffs_buf[id]=1;
+			for(int xj=d->xifirst;xj<=d->xilast;xj++)
+			{
+				if(xi!=xj) d->coeffs_buf[id] *= (xwhere-xj);
+			}
+			id++;		
+		}
+		float acc = 0;
+		for(int i=0;i<d->num_poly_points;i++)
+		{
+			acc += (d->coeffs_buf[i]/d->poly_precalc_denomiator[i])*d->filtered_buf[i];  //(xnom/xden)*yn
+		}
+		output[oi++]=acc;
+	}
+	d->input_processed = FD_INDEX_LOW + d->xifirst;
+	d->where -= d->input_processed;
+	d->output_size = oi;
+}
+
+/*
+ * Some notes to myself on the circular buffer I wanted to implement here:
+		int last_input_samplewhere_shouldbe = (index_high-1)+xifirst;
+		int last_input_offset = last_input_samplewhere_shouldbe - d->last_input_samplewhere;
+		if(last_input_offset < num_poly_points)
+		{
+			//if we can move the last_input circular buffer, we move, and add the new samples at the end
+			d->last_inputs_startsat += last_input_offset;
+			d->last_inputs_startsat %= num_poly_points;
+			int num_copied_samples = 0;
+			for(int i=0; i<last_input_offset; i++)
+			{
+				d->last_inputs_circbuf[i]=
+			}
+			d->last_input_samplewhere = d->las
+		}
+	However, I think I should just rather do a continuous big buffer.
+*/

 void apply_fir_fft_cc(FFT_PLAN_T* plan, FFT_PLAN_T* plan_inverse, complexf* taps_fft, complexf* last_overlap, int overlap_size)
 {
--- a/libcsdr.h
+++ b/libcsdr.h
@ -148,12 +148,33 @@ void accumulate_power_cf(complexf* input, float* output, int size);
 void log_ff(float* input, float* output, int size, float add_db);

 typedef struct fractional_decimator_ff_s
+{
+	float where;
+	int input_processed;
+	int output_size;
+	int num_poly_points; //number of samples that the Lagrange interpolator will use
+	float* poly_precalc_denomiator; //while we don't precalculate coefficients here as in a Farrow structure, because it is a fractional interpolator, but we rather precaculate part of the interpolator expression
+	//float* last_inputs_circbuf; //circular buffer to store the last (num_poly_points) number of input samples.
+	//int last_inputs_startsat; //where the circular buffer starts now
+	//int last_inputs_samplewhere; 
+	float* coeffs_buf;
+	float* filtered_buf;
+	int xifirst; 
+	int xilast; 
+	float rate;
+	float *taps;
+	int taps_length;
+} fractional_decimator_ff_t;
+fractional_decimator_ff_t fractional_decimator_ff_init(float rate, int num_poly_points, float* taps, int taps_length);
+void fractional_decimator_ff(float* input, float* output, int input_size, fractional_decimator_ff_t* d);
+
+typedef struct old_fractional_decimator_ff_s
 {
 	float remain;
 	int input_processed;
 	int output_size;
-} fractional_decimator_ff_t;
-fractional_decimator_ff_t fractional_decimator_ff(float* input, float* output, int input_size, float rate, float *taps, int taps_length, fractional_decimator_ff_t d);
+} old_fractional_decimator_ff_t;
+old_fractional_decimator_ff_t old_fractional_decimator_ff(float* input, float* output, int input_size, float rate, float *taps, int taps_length, old_fractional_decimator_ff_t d);

 typedef struct shift_table_data_s
 {