From 965ea631fcd46e208d5051bffc87351d29dcb659 Mon Sep 17 00:00:00 2001 From: ha7ilm Date: Sun, 29 Nov 2015 20:47:00 +0000 Subject: [PATCH] Now at least we get a continuous output signal from shift_addfast_cc/NEON. And we get a good result if the shift_rate is 0! (Okay, we get a bad result for anything else...) --- grc_tests/test_shift_remote.grc | 63 +++++++++++++++++++++++++++++++-- grc_tests/test_shift_remote.sh | 2 +- libcsdr.c | 10 +++--- 3 files changed, 67 insertions(+), 8 deletions(-) diff --git a/grc_tests/test_shift_remote.grc b/grc_tests/test_shift_remote.grc index 516635f..9b4f589 100644 --- a/grc_tests/test_shift_remote.grc +++ b/grc_tests/test_shift_remote.grc @@ -69,6 +69,65 @@ + + variable_slider + + comment + + + + converver + float_converter + + + value + 0 + + + _enabled + True + + + _coordinate + (24, 331) + + + _rotation + 0 + + + grid_pos + + + + id + gen_freq + + + label + Frequency: + + + max + samp_rate/2 + + + min + -samp_rate/2 + + + notebook + + + + num_steps + 100 + + + style + wx.SL_HORIZONTAL + + variable @@ -147,7 +206,7 @@ freq - 20000 + gen_freq _coordinate @@ -183,7 +242,7 @@ waveform - analog.GR_CONST_WAVE + analog.GR_SIN_WAVE diff --git a/grc_tests/test_shift_remote.sh b/grc_tests/test_shift_remote.sh index 65c7192..14f061d 100755 --- a/grc_tests/test_shift_remote.sh +++ b/grc_tests/test_shift_remote.sh @@ -2,7 +2,7 @@ # Run this script on a Raspberry Pi 2, while running test_shift_remote.grc on your PC. # It allows you to debug the NEON-accelerated version of specific DSP algorithms on the target hardware. TEMPSCRIPT="/tmp/test_shift_remote_exec.sh" -echo '#!/bin/sh\ncsdr shift_addfast_cc -0.1' > $TEMPSCRIPT +echo '#!/bin/sh\ncsdr shift_addfast_cc -0' > $TEMPSCRIPT cat $TEMPSCRIPT chmod +x $TEMPSCRIPT ncat -vvl 5321 -e $TEMPSCRIPT diff --git a/libcsdr.c b/libcsdr.c index 64099bb..d0c3b2d 100644 --- a/libcsdr.c +++ b/libcsdr.c @@ -295,9 +295,10 @@ float shift_addfast_cc(complexf *input, complexf* output, int input_size, shift_ float* pdcos = d->dcos; float* pdsin = d->dsin; register float* pinput = (float*)input; - register float* pinput_end = ((float*)input)+input_size; + register float* pinput_end = (float*)(input+input_size); register float* poutput = (float*)output; + //Register map: #define RDCOS "q0" //dcos, dsin #define RDSIN "q1" #define RCOSST "q2" //cos_start, sin_start @@ -324,7 +325,7 @@ float shift_addfast_cc(complexf *input, complexf* output, int input_size, shift_ " vmul.f32 " R3(RCOSV, RCOSST, RDCOS) //cos_vals[i] = cos_start * d->dcos[i] " vmls.f32 " R3(RCOSV, RSINST, RDSIN) //cos_vals[i] -= sin_start * d->dsin[i] " vmul.f32 " R3(RSINV, RSINST, RDCOS) //sin_vals[i] = sin_start * d->dcos[i] - " vmla.f32 " R3(RCOSV, RSINST, RDSIN) //sin_vals[i] += cos_start * d->dsin[i] + " vmla.f32 " R3(RSINV, RCOSST, RDSIN) //sin_vals[i] += cos_start * d->dsin[i] //C version: //iof(output,4*i+j)=cos_vals[j]*iof(input,4*i+j)-sin_vals[j]*qof(input,4*i+j); @@ -334,8 +335,8 @@ float shift_addfast_cc(complexf *input, complexf* output, int input_size, shift_ " vmul.f32 " R3(ROUTQ, RSINV, RINPI) //sin_vals[i] = sin_start * d->dcos[i] " vmla.f32 " R3(ROUTQ, RCOSV, RINPQ) //sin_vals[i] += cos_start * d->dsin[i] - " vst2.32 {" ROUTI "-" ROUTQ "}, [%[poutput]]!\n\t" //store the outputs in memory - + " vst2.32 {" ROUTI "-" ROUTQ "}, [%[poutput]]\n\t" //store the outputs in memory + " add %[poutput],%[poutput],#32\n\t" " vdup.32 " RCOSST ", d5[1]\n\t" // cos_start[0-3] = cos_vals[3] " vdup.32 " RSINST ", d7[1]\n\t" // sin_start[0-3] = sin_vals[3] @@ -348,7 +349,6 @@ float shift_addfast_cc(complexf *input, complexf* output, int input_size, shift_ : "memory", "q0", "q1", "q2", "q4", "q5", "q6", "q7", "q8", "q9", "cc" //clobber list ); - return phase+input_size*d->phase_increment; }