Hi list,
I am trying to speed up my own gnuradio block using volk. Besides using
volk on gnuradios input and output buffers I also want do use it with my
own arrays. I found this Thread in the mailing list archive
http://lists.gnu.org/archive/html/discuss-gnuradio/2012-09/msg00055.html
which shows how to allocate aligned memory and I also tried using
posix_memalign directly as well (see the code below). Both end in the
same
result: If I run my block with the aligned volk_32fc_x2_multiply_32fc_a
kernel my application crashes with a Segmentation fault. I checked with
a
debugger and it is the volk call at which the segfault happens. I get no
segfaults for the unaligned version volk_32fc_x2_multiply_32fc_u. My
flowgraph is created with grc and just contains of
signal_source->my_block->null_source
Running volk_profile, sse3 is selected for both kernels (and the aligned
version does not segfault here), so there has to be an issue with my
code.
I am working on an Intel Xeon X5690 CPU with an 3.2.0-32-generic
#51-Ubuntu SMP Wed Sep 26 21:33:09 UTC 2012 x86_64 x86_64 x86_64
GNU/Linux
and gnuradio-3.6.0
I know that there is a newer version of gnuradio available but before
turning my whole system upside-down I’d like to know if there is
anything
wrong with the minimal code example below.
Any help is appreciated.
Yours
Martin
#ifdef HAVE_CONFIG_H
#include “config.h”
#endif
#include <ccsds_fll_cc.h>
#include <gr_io_signature.h>
#include <volk/volk.h>
#include <stdio.h>
#include
#include <fftw3.h>
ccsds_fll_cc_sptr
ccsds_make_fll_cc ()
{
return ccsds_fll_cc_sptr (new ccsds_fll_cc ());
}
ccsds_fll_cc::ccsds_fll_cc ()
: gr_block (“ccsds_fll_cc”,
gr_make_io_signature (1, 1, sizeof (gr_complex)),
gr_make_io_signature (1, 1, sizeof (gr_complex)))
{
// nothing
}
ccsds_fll_cc::~ccsds_fll_cc ()
{
// nothing
}
int ccsds_fll_cc::general_work (int noutput_items,
gr_vector_int
&ninput_items,
gr_vector_const_void_star
&input_items,
gr_vector_void_star
&output_items)
{
const gr_complex *in = (const gr_complex *) input_items[0];
gr_complex *out = (gr_complex *) output_items[0];
unsigned int num = (noutput_items > ninput_items[0]) ? ninput_items[0]
:
noutput_items;
/* use posix_memalign instead of fftwf_malloc
gr_complex rot;
int mem = posix_memalign((void*)&rot, volk_get_alignment(),
num*sizeof(gr_complex));
if(mem != 0) {
fprintf(stderr,“ERROR: allocation of aligned memory failed\n”);
exit(EXIT_FAILURE);
return 0;
}
*/
gr_complex rot = (gr_complex)fftwf_malloc(sizeof(gr_complex)*num);
if(rot == 0) {
fprintf(stderr,“ERROR: allocation of aligned memory failed\n”);
exit(EXIT_FAILURE);
return 0;
}
// asign values to rot
for(size_t i=0;i<num;i++) {
rot[i] = gr_complex(0.1f,0.0f);
}
// invoking the unaligned version does not segfault
//volk_32fc_x2_multiply_32fc_u(out, in, rot, num);
if(is_unaligned()) {
volk_32fc_x2_multiply_32fc_u(out, in, rot, num);
} else {
// segfault
volk_32fc_x2_multiply_32fc_a(out, in, rot, num);
}
// use free for posix_memalign
//free(rot);
fftw_free(rot);
consume_each(num);
return num;
}