From 7ba726af5b4ce523bd0496ddfd2eee6eb3f66182 Mon Sep 17 00:00:00 2001 From: Rico van Genugten Date: Thu, 6 Oct 2016 09:12:34 +0000 Subject: [PATCH 1/2] Fixed issue simonyiszk/csdr#15 by using veor instead of vmov to zero accumulators in fir_decimate_cc. Also removed some unused variables --- libcsdr.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/libcsdr.c b/libcsdr.c index 737a786..3f197cb 100644 --- a/libcsdr.c +++ b/libcsdr.c @@ -280,11 +280,7 @@ int fir_decimate_cc(complexf *input, complexf *output, int input_size, int decim for(int i=0; iinput_size) break; - register float acci=0; - register float accq=0; - - register int ti=0; - register float* pinput=(float*)&(input[i+ti]); + register float* pinput=(float*)&(input[i]); register float* ptaps=taps; register float* ptaps_end=taps+taps_length; float quad_acciq [8]; @@ -297,8 +293,8 @@ q4, q5: accumulator for I branch and Q branch (will be the output) */ asm volatile( - " vmov.f32 q4, #0.0\n\t" //another way to null the accumulators - " vmov.f32 q5, #0.0\n\t" + " veor q4, q4\n\t" + " veor q5, q5\n\t" "for_fdccasm: vld2.32 {q0-q1}, [%[pinput]]!\n\t" //load q0 and q1 directly from the memory address stored in pinput, with interleaving (so that we get the I samples in q0 and the Q samples in q1), also increment the memory address in pinput (hence the "!" mark) //http://community.arm.com/groups/processors/blog/2010/03/17/coding-for-neon--part-1-load-and-stores " vld1.32 {q2}, [%[ptaps]]!\n\t" " vmla.f32 q4, q0, q2\n\t" //quad_acc_i += quad_input_i * quad_taps_1 //http://stackoverflow.com/questions/3240440/how-to-use-the-multiply-and-accumulate-intrinsics-in-arm-cortex-a8 //http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0489e/CIHEJBIE.html From 12d7db8b4973551cd8f0f2b43f3d9e9165b455b4 Mon Sep 17 00:00:00 2001 From: Rico van Genugten Date: Thu, 6 Oct 2016 11:28:34 +0000 Subject: [PATCH 2/2] Fixed issue simonyiszk/csdr#15 by fixing allocating size in bytes instead of size in amount of taps --- csdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csdr.c b/csdr.c index ec08e27..b2b8678 100644 --- a/csdr.c +++ b/csdr.c @@ -940,7 +940,7 @@ int main(int argc, char *argv[]) padded_taps_length = taps_length+(NEON_ALIGNMENT/4)-1 - ((taps_length+(NEON_ALIGNMENT/4)-1)%(NEON_ALIGNMENT/4)); fprintf(stderr,"padded_taps_length = %d\n", padded_taps_length); - taps = (float*) (float*)malloc(padded_taps_length+NEON_ALIGNMENT); + taps = (float*) (float*)malloc((padded_taps_length+NEON_ALIGNMENT)*sizeof(float)); fprintf(stderr,"taps = %x\n", taps); taps = (float*)((((unsigned)taps)+NEON_ALIGNMENT-1) & ~(NEON_ALIGNMENT-1)); fprintf(stderr,"taps = %x\n", taps);