Implements a CUDA version [7] of the Resamp FFT-based resampling algorithm for computing the \(\mathcal{F}\)-statistic [9] .
Prototypes | |
int | XLALSetupFstatResampCUDA (void **method_data, FstatCommon *common, FstatMethodFuncs *funcs, MultiSFTVector *multiSFTs, const FstatOptionalArgs *optArgs) |
int | XLALExtractResampledTimeseries_ResampCUDA (MultiCOMPLEX8TimeSeries **multiTimeSeries_SRC_a, MultiCOMPLEX8TimeSeries **multiTimeSeries_SRC_b, void *method_data) |
int | XLALGetFstatTiming_ResampCUDA (const void *method_data, FstatTimingGeneric *timingGeneric, FstatTimingModel *timingModel) |
static int | XLALComputeFstatResampCUDA (FstatResults *Fstats, const FstatCommon *common, void *method_data) |
static int | XLALApplySpindownAndFreqShiftCUDA (cuComplex *xOut, const COMPLEX8TimeSeries *xIn, const PulsarDopplerParams *doppler, REAL8 freqShift) |
static int | XLALBarycentricResampleMultiCOMPLEX8TimeSeriesCUDA (ResampCUDAMethodData *resamp, const PulsarDopplerParams *thisPoint, const FstatCommon *common) |
Performs barycentric resampling on a multi-detector timeseries, updates resampling buffer with results. More... | |
static int | XLALComputeFaFb_ResampCUDA (ResampCUDAMethodData *resamp, ResampCUDAWorkspace *ws, const PulsarDopplerParams thisPoint, REAL8 dFreq, UINT4 numFreqBins, const COMPLEX8TimeSeries *TimeSeries_SRC_a, const COMPLEX8TimeSeries *TimeSeries_SRC_b) |
static COMPLEX8Vector * | CreateCOMPLEX8VectorCUDA (UINT4 length) |
static REAL8Vector * | CreateREAL8VectorCUDA (UINT4 length) |
static void | DestroyCOMPLEX8VectorCUDA (COMPLEX8Vector *vec) |
static void | DestroyREAL8VectorCUDA (REAL8Vector *vec) |
static void | DestroyCOMPLEX8TimeSeriesCUDA (COMPLEX8TimeSeries *series) |
static int | MoveCOMPLEX8TimeSeriesHtoD (COMPLEX8TimeSeries *series) |
static int | MoveMultiCOMPLEX8TimeSeriesHtoD (MultiCOMPLEX8TimeSeries *multi) |
static int | CopyCOMPLEX8TimeSeriesDtoH (COMPLEX8TimeSeries **dst, COMPLEX8TimeSeries *src) |
static int | CopyMultiCOMPLEX8TimeSeriesDtoH (MultiCOMPLEX8TimeSeries **dst, MultiCOMPLEX8TimeSeries *src) |
static void | DestroyMultiCOMPLEX8TimeSeriesCUDA (MultiCOMPLEX8TimeSeries *multi) |
static void | XLALDestroyResampCUDAWorkspace (void *workspace) |
static void | XLALDestroyResampCUDAMethodData (void *method_data) |
__global__ void | CUDAAddToFaFb (cuComplex *Fa_k, cuComplex *Fb_k, cuComplex *FaX_k, cuComplex *FbX_k, UINT4 numFreqBins) |
__global__ void | CUDAComputeTwoF (REAL4 *twoF, cuComplex *Fa_k, cuComplex *Fb_k, REAL4 A, REAL4 B, REAL4 C, REAL4 E, REAL4 Dinv, UINT4 numFreqBins) |
__global__ void | CUDANormFaFb (cuComplex *Fa_out, cuComplex *Fb_out, REAL8 FreqOut0, REAL8 dFreq, REAL8 dtauX, REAL8 dt_SRC, UINT4 numFreqBins) |
__global__ void | CUDAPopulateFaFbFromRaw (cuComplex *out, cuComplex *in, UINT4 numFreqBins, UINT4 offset_bins, UINT4 decimateFFT) |
static int | XLALComputeFaFb_ResampCUDA (ResampCUDAMethodData *resamp, ResampCUDAWorkspace *ws, const PulsarDopplerParams thisPoint, REAL8 dFreq, UINT4 numFreqBins, const COMPLEX8TimeSeries *__restrict__ TimeSeries_SRC_a, const COMPLEX8TimeSeries *__restrict__ TimeSeries_SRC_b) |
__global__ void | CUDAApplySpindownAndFreqShift (cuComplex *out, cuComplex *in, PulsarDopplerParams doppler, REAL8 freqShift, REAL8 dt, REAL8 Dtau0, UINT4 s_max, UINT4 numSamplesIn) |
static int | XLALApplySpindownAndFreqShiftCUDA (cuComplex *__restrict__ xOut, const COMPLEX8TimeSeries *__restrict__ xIn, const PulsarDopplerParams *__restrict__ doppler, REAL8 freqShift) |
__global__ void | CUDAApplyHetAMInterp (cuComplex *TS_a, cuComplex *TS_b, cuComplex *TStmp1, cuComplex *TStmp2, UINT4 numSamples) |
__global__ void | CUDAApplyHetAMSrc (cuComplex *TStmp1_SRC_data, cuComplex *TStmp2_SRC_data, REAL8 *ti_DET_data, UINT4 iStart_SRC_al, REAL8 tStart_SRC_0, REAL8 tStart_DET_0, REAL8 dt_SRC, REAL8 tMid_DET_al, REAL8 tMid_SRC_al, REAL8 Tdot_al, REAL8 fHet, REAL4 a_al, REAL4 b_al, UINT4 numSamples) |
__global__ void | CUDASincInterp (cuComplex *out, REAL8 *t_out, cuComplex *in, REAL8 *win, UINT4 Dterms, UINT4 numSamplesIn, UINT4 numSamplesOut, REAL8 tmin, REAL8 dt, REAL8 oodt) |
__global__ void | CUDACreateHammingWindow (REAL8 *out, UINT4 length) |
int | SincInterp (COMPLEX8Vector *y_out, const REAL8Vector *t_out, const COMPLEX8TimeSeries *ts_in, UINT4 Dterms) |
Data Structures | |
struct | ResampCUDAWorkspace |
struct | ResampCUDAMethodData |
Macros | |
#define | CUDA_BLOCK_SIZE 512 |
#define | XLAL_CHECK_CUDA_CALL(...) |
#define | XLAL_CHECK_CUFFT_CALL(...) |
#define | CPLX_MULT(x, y) (crectf(crealf(x)*crealf(y) - cimagf(x)*cimagf(y), cimagf(x)*crealf(y) + crealf(x)*cimagf(y))) |
Variables | |
__constant__ REAL8 | PI = M_PI |
__device__ __constant__ REAL8 | lal_fact_inv [LAL_FACT_MAX] |
int XLALSetupFstatResampCUDA | ( | void ** | method_data, |
FstatCommon * | common, | ||
FstatMethodFuncs * | funcs, | ||
MultiSFTVector * | multiSFTs, | ||
const FstatOptionalArgs * | optArgs | ||
) |
Definition at line 307 of file ComputeFstat_Resamp_CUDA.cu.
int XLALExtractResampledTimeseries_ResampCUDA | ( | MultiCOMPLEX8TimeSeries ** | multiTimeSeries_SRC_a, |
MultiCOMPLEX8TimeSeries ** | multiTimeSeries_SRC_b, | ||
void * | method_data | ||
) |
Definition at line 1349 of file ComputeFstat_Resamp_CUDA.cu.
int XLALGetFstatTiming_ResampCUDA | ( | const void * | method_data, |
FstatTimingGeneric * | timingGeneric, | ||
FstatTimingModel * | timingModel | ||
) |
Definition at line 1368 of file ComputeFstat_Resamp_CUDA.cu.
|
static |
Definition at line 522 of file ComputeFstat_Resamp_CUDA.cu.
|
static |
|
static |
Performs barycentric resampling on a multi-detector timeseries, updates resampling buffer with results.
NOTE Buffering: this function does check 1) whether the previously-buffered solution can be completely reused (same sky-position and binary parameters), or 2) if at least sky-dependent quantities can be re-used (antenna-patterns + timings) in case only binary parameters changed
Definition at line 1164 of file ComputeFstat_Resamp_CUDA.cu.
|
static |
|
static |
Definition at line 153 of file ComputeFstat_Resamp_CUDA.cu.
|
static |
Definition at line 162 of file ComputeFstat_Resamp_CUDA.cu.
|
static |
Definition at line 171 of file ComputeFstat_Resamp_CUDA.cu.
|
static |
Definition at line 182 of file ComputeFstat_Resamp_CUDA.cu.
|
static |
Definition at line 193 of file ComputeFstat_Resamp_CUDA.cu.
|
static |
Definition at line 201 of file ComputeFstat_Resamp_CUDA.cu.
|
static |
Definition at line 211 of file ComputeFstat_Resamp_CUDA.cu.
|
static |
Definition at line 220 of file ComputeFstat_Resamp_CUDA.cu.
|
static |
Definition at line 242 of file ComputeFstat_Resamp_CUDA.cu.
|
static |
Definition at line 257 of file ComputeFstat_Resamp_CUDA.cu.
|
static |
Definition at line 271 of file ComputeFstat_Resamp_CUDA.cu.
|
static |
Definition at line 285 of file ComputeFstat_Resamp_CUDA.cu.
__global__ void CUDAAddToFaFb | ( | cuComplex * | Fa_k, |
cuComplex * | Fb_k, | ||
cuComplex * | FaX_k, | ||
cuComplex * | FbX_k, | ||
UINT4 | numFreqBins | ||
) |
Definition at line 486 of file ComputeFstat_Resamp_CUDA.cu.
__global__ void CUDAComputeTwoF | ( | REAL4 * | twoF, |
cuComplex * | Fa_k, | ||
cuComplex * | Fb_k, | ||
REAL4 | A, | ||
REAL4 | B, | ||
REAL4 | C, | ||
REAL4 | E, | ||
REAL4 | Dinv, | ||
UINT4 | numFreqBins | ||
) |
Definition at line 496 of file ComputeFstat_Resamp_CUDA.cu.
__global__ void CUDANormFaFb | ( | cuComplex * | Fa_out, |
cuComplex * | Fb_out, | ||
REAL8 | FreqOut0, | ||
REAL8 | dFreq, | ||
REAL8 | dtauX, | ||
REAL8 | dt_SRC, | ||
UINT4 | numFreqBins | ||
) |
Definition at line 801 of file ComputeFstat_Resamp_CUDA.cu.
__global__ void CUDAPopulateFaFbFromRaw | ( | cuComplex * | out, |
cuComplex * | in, | ||
UINT4 | numFreqBins, | ||
UINT4 | offset_bins, | ||
UINT4 | decimateFFT | ||
) |
Definition at line 823 of file ComputeFstat_Resamp_CUDA.cu.
|
static |
[in,out] | resamp | buffered resampling data and workspace |
[in,out] | ws | resampling workspace (memory-sharing across segments) |
[in] | thisPoint | Doppler point to compute {FaX,FbX} for |
[in] | dFreq | output frequency resolution |
[in] | numFreqBins | number of output frequency bins |
[in] | TimeSeries_SRC_a | SRC-frame single-IFO timeseries * a(t) |
[in] | TimeSeries_SRC_b | SRC-frame single-IFO timeseries * b(t) |
Definition at line 833 of file ComputeFstat_Resamp_CUDA.cu.
__global__ void CUDAApplySpindownAndFreqShift | ( | cuComplex * | out, |
cuComplex * | in, | ||
PulsarDopplerParams | doppler, | ||
REAL8 | freqShift, | ||
REAL8 | dt, | ||
REAL8 | Dtau0, | ||
UINT4 | s_max, | ||
UINT4 | numSamplesIn | ||
) |
Definition at line 941 of file ComputeFstat_Resamp_CUDA.cu.
|
static |
[out] | xOut | the spindown-corrected SRC-frame timeseries |
[in] | xIn | the input SRC-frame timeseries |
[in] | doppler | containing spindown parameters |
[in] | freqShift | frequency-shift to apply, sign is "new - old" |
Definition at line 970 of file ComputeFstat_Resamp_CUDA.cu.
__global__ void CUDAApplyHetAMInterp | ( | cuComplex * | TS_a, |
cuComplex * | TS_b, | ||
cuComplex * | TStmp1, | ||
cuComplex * | TStmp2, | ||
UINT4 | numSamples | ||
) |
Definition at line 999 of file ComputeFstat_Resamp_CUDA.cu.
__global__ void CUDAApplyHetAMSrc | ( | cuComplex * | TStmp1_SRC_data, |
cuComplex * | TStmp2_SRC_data, | ||
REAL8 * | ti_DET_data, | ||
UINT4 | iStart_SRC_al, | ||
REAL8 | tStart_SRC_0, | ||
REAL8 | tStart_DET_0, | ||
REAL8 | dt_SRC, | ||
REAL8 | tMid_DET_al, | ||
REAL8 | tMid_SRC_al, | ||
REAL8 | Tdot_al, | ||
REAL8 | fHet, | ||
REAL4 | a_al, | ||
REAL4 | b_al, | ||
UINT4 | numSamples | ||
) |
Definition at line 1009 of file ComputeFstat_Resamp_CUDA.cu.
__global__ void CUDASincInterp | ( | cuComplex * | out, |
REAL8 * | t_out, | ||
cuComplex * | in, | ||
REAL8 * | win, | ||
UINT4 | Dterms, | ||
UINT4 | numSamplesIn, | ||
UINT4 | numSamplesOut, | ||
REAL8 | tmin, | ||
REAL8 | dt, | ||
REAL8 | oodt | ||
) |
Definition at line 1055 of file ComputeFstat_Resamp_CUDA.cu.
Definition at line 1111 of file ComputeFstat_Resamp_CUDA.cu.
int SincInterp | ( | COMPLEX8Vector * | y_out, |
const REAL8Vector * | t_out, | ||
const COMPLEX8TimeSeries * | ts_in, | ||
UINT4 | Dterms | ||
) |
[out] | y_out | output series of interpolated y-values [must be same size as t_out] |
[in] | t_out | output time-steps to interpolate input to |
[in] | ts_in | regularly-spaced input timeseries |
[in] | Dterms | window sinc kernel sum to +-Dterms around max |
Definition at line 1126 of file ComputeFstat_Resamp_CUDA.cu.
#define CUDA_BLOCK_SIZE 512 |
Definition at line 56 of file ComputeFstat_Resamp_CUDA.cu.
#define XLAL_CHECK_CUDA_CALL | ( | ... | ) |
Definition at line 64 of file ComputeFstat_Resamp_CUDA.cu.
#define XLAL_CHECK_CUFFT_CALL | ( | ... | ) |
Definition at line 68 of file ComputeFstat_Resamp_CUDA.cu.
#define CPLX_MULT | ( | x, | |
y | |||
) | (crectf(crealf(x)*crealf(y) - cimagf(x)*cimagf(y), cimagf(x)*crealf(y) + crealf(x)*cimagf(y))) |
Definition at line 73 of file ComputeFstat_Resamp_CUDA.cu.
__constant__ REAL8 PI = M_PI |
Definition at line 58 of file ComputeFstat_Resamp_CUDA.cu.
__device__ __constant__ REAL8 lal_fact_inv[LAL_FACT_MAX] |
Definition at line 60 of file ComputeFstat_Resamp_CUDA.cu.