Skip to content

Commit

Permalink
use fftw_malloc not _alloc_real, broadcastloop c2r so only malloc once
Browse files Browse the repository at this point in the history
  • Loading branch information
mohawk2 committed Nov 28, 2024
1 parent 6c2f2e9 commit 2cecd0d
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 12 deletions.
4 changes: 2 additions & 2 deletions Makefile.PL
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ $descriptor{INC} = '' unless defined $descriptor{INC};
$descriptor{INC} .= " $cflags";

$descriptor{PREREQ_PM} = {
'PDL' => '2.049', # as_native
'PDL' => '2.075', # broadcastloop
};
$descriptor{CONFIGURE_REQUIRES} = {
'PDL' => '2.049',
'PDL' => '2.075',
'IPC::Run' =>0,
# 'Alien::FFTW3' =>0,
};
Expand Down
20 changes: 10 additions & 10 deletions fftw3.pd
Original file line number Diff line number Diff line change
Expand Up @@ -52,22 +52,22 @@ static_assert_fftw(sizeof($GENERIC()) == sizeof($TGC(fftwf_,fftw_)complex));
$TGC(fftwf_,fftw_)plan plan = INT2PTR($TGC(fftwf_,fftw_)plan, $COMP(plan));
// FFTW inverse real transforms clobber their input. I thus make a new
// buffer and transform from there
unsigned long nelem = 1;
PDL_Indx i, rank = $PRIV(vtable)->par_realdims[0];
for( i=0; i<rank; i++ )
nelem *= $PDL(complexv)->dims[i];
unsigned long elem_scale = sizeof($GENERIC()) / sizeof( $TGC(float,double) ); /* native complex */
void *input_copy = $TGC(fftwf_,fftw_)alloc_real( nelem * elem_scale );
memcpy( input_copy, $P(complexv), sizeof($GENERIC()) * nelem );
$TGC(fftwf_,fftw_)execute_dft_c2r( plan, (void*)input_copy, (void*)$P(real) );
$TGC(fftwf_,fftw_)free( input_copy );
PDL_Indx i, nbytes = sizeof($GENERIC());
PDL_Indx rank = $PRIV(vtable)->par_realdims[0], *dims = $PDL(complexv)->dims;
for (i=0; i<rank; i++) nbytes *= dims[i];
void *input_copy = fftw_malloc(nbytes);
broadcastloop %{
memcpy(input_copy, $P(complexv), nbytes);
$TGC(fftwf_,fftw_)execute_dft_c2r(plan, input_copy, (void*)$P(real));
%}
fftw_free(input_copy);
EOF
my $TEMPLATE_COMPLEX = <<'EOF';
// This is the template used by PP to generate the FFTW routines.
// make sure the PDL data type I'm using matches the FFTW data type
static_assert_fftw(sizeof($GENERIC())*2 == sizeof($TFD(fftwf_,fftw_)complex));
$TFD(fftwf_,fftw_)plan plan = INT2PTR($TFD(fftwf_,fftw_)plan, $COMP(plan));
$TFD(fftwf_,fftw_)execute_dft( plan, (void*)$P(in), (void*)$P(out) );
$TFD(fftwf_,fftw_)execute_dft(plan, (void*)$P(in), (void*)$P(out));
EOF

# I define up to rank-10 FFTs. This is annoyingly arbitrary, but hopefully
Expand Down

0 comments on commit 2cecd0d

Please sign in to comment.