Now at least we get a continuous output signal from shift_addfast_cc/NEON. And we get a good result if the shift_rate is 0! (Okay, we get a bad result for anything else...)

This commit is contained in:
ha7ilm 2015-11-29 20:47:00 +00:00
parent d28edcca3d
commit 965ea631fc
3 changed files with 67 additions and 8 deletions

View file

@ -69,6 +69,65 @@
<value></value>
</param>
</block>
<block>
<key>variable_slider</key>
<param>
<key>comment</key>
<value></value>
</param>
<param>
<key>converver</key>
<value>float_converter</value>
</param>
<param>
<key>value</key>
<value>0</value>
</param>
<param>
<key>_enabled</key>
<value>True</value>
</param>
<param>
<key>_coordinate</key>
<value>(24, 331)</value>
</param>
<param>
<key>_rotation</key>
<value>0</value>
</param>
<param>
<key>grid_pos</key>
<value></value>
</param>
<param>
<key>id</key>
<value>gen_freq</value>
</param>
<param>
<key>label</key>
<value>Frequency:</value>
</param>
<param>
<key>max</key>
<value>samp_rate/2</value>
</param>
<param>
<key>min</key>
<value>-samp_rate/2</value>
</param>
<param>
<key>notebook</key>
<value></value>
</param>
<param>
<key>num_steps</key>
<value>100</value>
</param>
<param>
<key>style</key>
<value>wx.SL_HORIZONTAL</value>
</param>
</block>
<block>
<key>variable</key>
<param>
@ -147,7 +206,7 @@
</param>
<param>
<key>freq</key>
<value>20000</value>
<value>gen_freq</value>
</param>
<param>
<key>_coordinate</key>
@ -183,7 +242,7 @@
</param>
<param>
<key>waveform</key>
<value>analog.GR_CONST_WAVE</value>
<value>analog.GR_SIN_WAVE</value>
</param>
</block>
<block>

View file

@ -2,7 +2,7 @@
# Run this script on a Raspberry Pi 2, while running test_shift_remote.grc on your PC.
# It allows you to debug the NEON-accelerated version of specific DSP algorithms on the target hardware.
TEMPSCRIPT="/tmp/test_shift_remote_exec.sh"
echo '#!/bin/sh\ncsdr shift_addfast_cc -0.1' > $TEMPSCRIPT
echo '#!/bin/sh\ncsdr shift_addfast_cc -0' > $TEMPSCRIPT
cat $TEMPSCRIPT
chmod +x $TEMPSCRIPT
ncat -vvl 5321 -e $TEMPSCRIPT

View file

@ -295,9 +295,10 @@ float shift_addfast_cc(complexf *input, complexf* output, int input_size, shift_
float* pdcos = d->dcos;
float* pdsin = d->dsin;
register float* pinput = (float*)input;
register float* pinput_end = ((float*)input)+input_size;
register float* pinput_end = (float*)(input+input_size);
register float* poutput = (float*)output;
//Register map:
#define RDCOS "q0" //dcos, dsin
#define RDSIN "q1"
#define RCOSST "q2" //cos_start, sin_start
@ -324,7 +325,7 @@ float shift_addfast_cc(complexf *input, complexf* output, int input_size, shift_
" vmul.f32 " R3(RCOSV, RCOSST, RDCOS) //cos_vals[i] = cos_start * d->dcos[i]
" vmls.f32 " R3(RCOSV, RSINST, RDSIN) //cos_vals[i] -= sin_start * d->dsin[i]
" vmul.f32 " R3(RSINV, RSINST, RDCOS) //sin_vals[i] = sin_start * d->dcos[i]
" vmla.f32 " R3(RCOSV, RSINST, RDSIN) //sin_vals[i] += cos_start * d->dsin[i]
" vmla.f32 " R3(RSINV, RCOSST, RDSIN) //sin_vals[i] += cos_start * d->dsin[i]
//C version:
//iof(output,4*i+j)=cos_vals[j]*iof(input,4*i+j)-sin_vals[j]*qof(input,4*i+j);
@ -334,8 +335,8 @@ float shift_addfast_cc(complexf *input, complexf* output, int input_size, shift_
" vmul.f32 " R3(ROUTQ, RSINV, RINPI) //sin_vals[i] = sin_start * d->dcos[i]
" vmla.f32 " R3(ROUTQ, RCOSV, RINPQ) //sin_vals[i] += cos_start * d->dsin[i]
" vst2.32 {" ROUTI "-" ROUTQ "}, [%[poutput]]!\n\t" //store the outputs in memory
" vst2.32 {" ROUTI "-" ROUTQ "}, [%[poutput]]\n\t" //store the outputs in memory
" add %[poutput],%[poutput],#32\n\t"
" vdup.32 " RCOSST ", d5[1]\n\t" // cos_start[0-3] = cos_vals[3]
" vdup.32 " RSINST ", d7[1]\n\t" // sin_start[0-3] = sin_vals[3]
@ -348,7 +349,6 @@ float shift_addfast_cc(complexf *input, complexf* output, int input_size, shift_
:
"memory", "q0", "q1", "q2", "q4", "q5", "q6", "q7", "q8", "q9", "cc" //clobber list
);
return phase+input_size*d->phase_increment;
}