Loading [MathJax]/extensions/TeX/AMSmath.js
LALPulsar 7.1.1.1-5e288d3
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
ComputeFstat_Resamp_CUDA.cu
Go to the documentation of this file.
1//
2// Copyright (C) 2019, 2020 Liam Dunn
3// Copyright (C) 2009, 2014--2015 Reinhard Prix
4// Copyright (C) 2012--2015 Karl Wette
5// Copyright (C) 2009 Chris Messenger, Pinkesh Patel, Xavier Siemens, Holger Pletsch
6//
7// This program is free software; you can redistribute it and/or modify
8// it under the terms of the GNU General Public License as published by
9// the Free Software Foundation; either version 2 of the License, or
10// (at your option) any later version.
11//
12// This program is distributed in the hope that it will be useful,
13// but WITHOUT ANY WARRANTY; without even the implied warranty of
14// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15// GNU General Public License for more details.
16//
17// You should have received a copy of the GNU General Public License
18// along with with program; see the file COPYING. If not, write to the
19// Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20// MA 02110-1301 USA
21//
22
23#include <stdlib.h>
24#include <stdio.h>
25#include <math.h>
26#include <complex.h>
27#include <cuda.h>
28#include <cuda_runtime_api.h>
29#include <cuComplex.h>
30#include <cufft.h>
31
34
35#include <lal/Factorial.h>
36#include <lal/LFTandTSutils.h>
37#include <lal/LogPrintf.h>
38#include <lal/SinCosLUT.h>
39#include <lal/TimeSeries.h>
40#include <lal/Units.h>
41#include <lal/Window.h>
42
43///
44/// \defgroup ComputeFstat_Resamp_CUDA_cu Module ComputeFstat_Resamp_CUDA.cu
45/// \ingroup ComputeFstat_h
46/// \brief Implements a CUDA version \cite DunnEtAl2022 of the \a Resamp FFT-based resampling algorithm for
47/// computing the \f$\mathcal{F}\f$-statistic \cite JKS98 .
48///
49
50/// @{
51
52// ========== Resamp internals ==========
53
54// ----- local constants ----------
55
56#define CUDA_BLOCK_SIZE 512
57
58__constant__ REAL8 PI = M_PI;
59
60__device__ __constant__ REAL8 lal_fact_inv[LAL_FACT_MAX];
61
62// ----- local macros ----------
63
64#define XLAL_CHECK_CUDA_CALL(...) do { \
65 cudaError_t retn; \
66 XLAL_CHECK ( ( retn = (__VA_ARGS__) ) == cudaSuccess, XLAL_EERR, "%s failed with return code %i", #__VA_ARGS__, retn ); \
67 } while(0)
68#define XLAL_CHECK_CUFFT_CALL(...) do { \
69 cufftResult retn; \
70 XLAL_CHECK ( ( retn = (__VA_ARGS__) ) == CUFFT_SUCCESS, XLAL_EERR, "%s failed with return code %i", #__VA_ARGS__, retn ); \
71 } while(0)
72
73#define CPLX_MULT(x, y) (crectf(crealf(x)*crealf(y) - cimagf(x)*cimagf(y), cimagf(x)*crealf(y) + crealf(x)*cimagf(y)))
74
75// ----- local types ----------
76
77// ----- workspace ----------
78typedef struct tagResampCUDAWorkspace {
79 // intermediate quantities to interpolate and operate on SRC-frame timeseries
80 COMPLEX8Vector *TStmp1_SRC; // can hold a single-detector SRC-frame spindown-corrected timeseries [without zero-padding]
81 COMPLEX8Vector *TStmp2_SRC; // can hold a single-detector SRC-frame spindown-corrected timeseries [without zero-padding]
82 REAL8Vector *SRCtimes_DET; // holds uniformly-spaced SRC-frame timesteps translated into detector frame [for interpolation]
83
84 // input padded timeseries ts(t) and output Fab(f) of length 'numSamplesFFT'
85 UINT4 numSamplesFFTAlloc; // allocated number of zero-padded SRC-frame time samples (related to dFreq)
86 cuComplex *TS_FFT; // zero-padded, spindown-corr SRC-frame TS
87 cuComplex *FabX_Raw; // raw full-band FFT result Fa,Fb
88
89 // arrays of size numFreqBinsOut over frequency bins f_k:
90 cuComplex *FaX_k; // properly normalized F_a^X(f_k) over output bins
91 cuComplex *FbX_k; // properly normalized F_b^X(f_k) over output bins
92 cuComplex *Fa_k; // properly normalized F_a(f_k) over output bins
93 cuComplex *Fb_k; // properly normalized F_b(f_k) over output bins
94 UINT4 numFreqBinsAlloc; // internal: keep track of allocated length of frequency-arrays
95
97
98typedef struct {
99 UINT4 Dterms; // Number of terms to use (on either side) in Windowed-Sinc interpolation kernel
100 MultiCOMPLEX8TimeSeries *multiTimeSeries_DET; // input SFTs converted into a heterodyned timeseries
101 // ----- buffering -----
102 PulsarDopplerParams prev_doppler; // buffering: previous phase-evolution ("doppler") parameters
103 MultiAMCoeffs *multiAMcoef; // buffered antenna-pattern functions
104 MultiSSBtimes *multiSSBtimes; // buffered SSB times, including *only* sky-position corrections, not binary
105 MultiSSBtimes *multiBinaryTimes; // buffered SRC times, including both sky- and binary corrections [to avoid re-allocating this]
106
107 AntennaPatternMatrix Mmunu; // combined multi-IFO antenna-pattern coefficients {A,B,C,E}
108 AntennaPatternMatrix MmunuX[PULSAR_MAX_DETECTORS]; // per-IFO antenna-pattern coefficients {AX,BX,CX,EX}
109
110 MultiCOMPLEX8TimeSeries *multiTimeSeries_SRC_a; // multi-detector SRC-frame timeseries, multiplied by AM function a(t)
111 MultiCOMPLEX8TimeSeries *multiTimeSeries_SRC_b; // multi-detector SRC-frame timeseries, multiplied by AM function b(t)
112
113 MultiCOMPLEX8TimeSeries *hostCopy4ExtractTS_SRC_a; // host copy of multi-detector SRC-frame timeseries, multiplied by AM function a(t), for time series extraction
114 MultiCOMPLEX8TimeSeries *hostCopy4ExtractTS_SRC_b; // host copy of multi-detector SRC-frame timeseries, multiplied by AM function b(t), for time series extraction
115
116 UINT4 numSamplesFFT; // length of zero-padded SRC-frame timeseries (related to dFreq)
117 UINT4 decimateFFT; // output every n-th frequency bin, with n>1 iff (dFreq > 1/Tspan), and was internally decreased by n
118 cufftHandle fftplan; // FFT plan
119
120 // ----- timing -----
121 BOOLEAN collectTiming; // flag whether or not to collect timing information
122 FstatTimingGeneric timingGeneric; // measured (generic) F-statistic timing values
123 FstatTimingResamp timingResamp; // measured Resamp-specific timing model data
124
126
127
128// ----- local prototypes ----------
129
130extern "C" int XLALSetupFstatResampCUDA( void **method_data, FstatCommon *common, FstatMethodFuncs *funcs, MultiSFTVector *multiSFTs, const FstatOptionalArgs *optArgs );
131extern "C" int XLALExtractResampledTimeseries_ResampCUDA( MultiCOMPLEX8TimeSeries **multiTimeSeries_SRC_a, MultiCOMPLEX8TimeSeries **multiTimeSeries_SRC_b, void *method_data );
132extern "C" int XLALGetFstatTiming_ResampCUDA( const void *method_data, FstatTimingGeneric *timingGeneric, FstatTimingModel *timingModel );
133
134static int XLALComputeFstatResampCUDA( FstatResults *Fstats, const FstatCommon *common, void *method_data );
135static int XLALApplySpindownAndFreqShiftCUDA( cuComplex *xOut, const COMPLEX8TimeSeries *xIn, const PulsarDopplerParams *doppler, REAL8 freqShift );
137static int XLALComputeFaFb_ResampCUDA( ResampCUDAMethodData *resamp, ResampCUDAWorkspace *ws, const PulsarDopplerParams thisPoint, REAL8 dFreq, UINT4 numFreqBins, const COMPLEX8TimeSeries *TimeSeries_SRC_a, const COMPLEX8TimeSeries *TimeSeries_SRC_b );
139static REAL8Vector *CreateREAL8VectorCUDA( UINT4 length );
140static void DestroyCOMPLEX8VectorCUDA( COMPLEX8Vector *vec );
141static void DestroyREAL8VectorCUDA( REAL8Vector *vec );
148static void XLALDestroyResampCUDAWorkspace( void *workspace );
149static void XLALDestroyResampCUDAMethodData( void *method_data );
150
151// ==================== function definitions ====================
152
154{
155 COMPLEX8Vector *vec;
156 XLAL_CHECK_NULL( ( vec = ( COMPLEX8Vector * )XLALMalloc( sizeof( COMPLEX8Vector ) ) ) != NULL, XLAL_ENOMEM );
157 vec->length = length;
158 XLAL_CHECK_NULL( cudaMalloc( ( void ** )&vec->data, sizeof( COMPLEX8 )*length ) == cudaSuccess, XLAL_ENOMEM );
159 return vec;
160}
161
163{
164 REAL8Vector *vec;
165 XLAL_CHECK_NULL( ( vec = ( REAL8Vector * )XLALMalloc( sizeof( REAL8Vector ) ) ) != NULL, XLAL_ENOMEM );
166 vec->length = length;
167 XLAL_CHECK_NULL( cudaMalloc( ( void ** )&vec->data, sizeof( REAL8 )*length ) == cudaSuccess, XLAL_ENOMEM );
168 return vec;
169}
170
172{
173 if ( vec != NULL ) {
174 if ( vec->data != NULL ) {
175 cudaFree( vec->data );
176 }
177 vec->data = NULL;
178 XLALFree( vec );
179 }
180}
181
183{
184 if ( vec != NULL ) {
185 if ( vec->data != NULL ) {
186 cudaFree( vec->data );
187 }
188 vec->data = NULL;
189 XLALFree( vec );
190 }
191}
192
194{
195 if ( series != NULL ) {
197 XLALFree( series );
198 }
199}
200
202{
203 XLAL_CHECK( series != NULL, XLAL_EFAULT );
204 COMPLEX8 *cpu_data = series->data->data;
205 XLAL_CHECK( cudaMalloc( ( void ** )&series->data->data, sizeof( COMPLEX8 )*series->data->length ) == cudaSuccess, XLAL_ENOMEM );
206 XLAL_CHECK_CUDA_CALL( cudaMemcpy( ( void * )series->data->data, cpu_data, sizeof( COMPLEX8 )*series->data->length, cudaMemcpyHostToDevice ) );
207 XLALFree( cpu_data );
208 return XLAL_SUCCESS;
209}
210
212{
213 XLAL_CHECK( multi != NULL, XLAL_EFAULT );
214 for ( UINT4 X = 0; X < multi->length; X++ ) {
216 }
217 return XLAL_SUCCESS;
218}
219
221{
222 XLAL_CHECK( dst != NULL, XLAL_EFAULT );
223 XLAL_CHECK( src != NULL, XLAL_EFAULT );
224 if ( *dst == NULL ) {
225 XLAL_CHECK( ( ( *dst ) = ( COMPLEX8TimeSeries * )XLALCalloc( 1, sizeof( COMPLEX8TimeSeries ) ) ) != NULL, XLAL_ENOMEM );
226 XLAL_CHECK( ( ( *dst )->data = ( COMPLEX8Sequence * )XLALCalloc( 1, sizeof( COMPLEX8Sequence ) ) ) != NULL, XLAL_ENOMEM );
227 ( *dst )->data->length = 0;
228 }
229 {
230 COMPLEX8Sequence *dst_data = ( *dst )->data;
231 *( *dst ) = *src;
232 ( *dst )->data = dst_data;
233 }
234 if ( ( ( *dst )->data->data == NULL ) || ( ( *dst )->data->length < src->data->length ) ) {
235 XLAL_CHECK( ( ( *dst )->data->data = ( COMPLEX8 * )XLALRealloc( ( *dst )->data->data, src->data->length * sizeof( COMPLEX8 ) ) ) != NULL, XLAL_ENOMEM );
236 ( *dst )->data->length = src->data->length;
237 }
238 XLAL_CHECK_CUDA_CALL( cudaMemcpy( ( void * )( *dst )->data->data, src->data->data, sizeof( COMPLEX8 )*src->data->length, cudaMemcpyDeviceToHost ) );
239 return XLAL_SUCCESS;
240}
241
243{
244 XLAL_CHECK( dst != NULL, XLAL_EFAULT );
245 XLAL_CHECK( src != NULL, XLAL_EFAULT );
246 if ( *dst == NULL ) {
247 XLAL_CHECK( ( ( *dst ) = ( MultiCOMPLEX8TimeSeries * )XLALCalloc( 1, sizeof( MultiCOMPLEX8TimeSeries ) ) ) != NULL, XLAL_ENOMEM );
248 XLAL_CHECK( ( ( *dst )->data = ( COMPLEX8TimeSeries ** )XLALCalloc( src->length, sizeof( COMPLEX8TimeSeries ) ) ) != NULL, XLAL_ENOMEM );
249 ( *dst )->length = src->length;
250 }
251 for ( UINT4 X = 0; X < src->length; X++ ) {
252 XLAL_CHECK( CopyCOMPLEX8TimeSeriesDtoH( &( *dst )->data[X], src->data[X] ) == XLAL_SUCCESS, XLAL_EFUNC );
253 }
254 return XLAL_SUCCESS;
255}
256
258{
259 if ( multi != NULL ) {
260 if ( multi->data != NULL ) {
261 UINT4 numDetectors = multi->length;
262 for ( UINT4 X = 0; X < numDetectors; X++ ) {
264 }
265 XLALFree( multi->data );
266 }
267 XLALFree( multi );
268 }
269}
270
271static void XLALDestroyResampCUDAWorkspace( void *workspace )
272{
273 ResampCUDAWorkspace *ws = ( ResampCUDAWorkspace * ) workspace;
274
278
279 cudaFree( ws->FabX_Raw );
280 cudaFree( ws->TS_FFT );
281
282 XLALFree( ws );
283} // XLALDestroyResampCUDAWorkspace()
284
285static void XLALDestroyResampCUDAMethodData( void *method_data )
286{
287 ResampCUDAMethodData *resamp = ( ResampCUDAMethodData * ) method_data;
288
290
291 // ----- free buffer
299
300 cufftDestroy( resamp->fftplan );
301
302 XLALFree( resamp );
303} // XLALDestroyResampCUDAMethodData()
304
305
306extern "C" int
307XLALSetupFstatResampCUDA( void **method_data,
308 FstatCommon *common,
309 FstatMethodFuncs *funcs,
311 const FstatOptionalArgs *optArgs
312 )
313{
314 // Check input
315 XLAL_CHECK( method_data != NULL, XLAL_EFAULT );
316 XLAL_CHECK( common != NULL, XLAL_EFAULT );
317 XLAL_CHECK( funcs != NULL, XLAL_EFAULT );
318 XLAL_CHECK( multiSFTs != NULL, XLAL_EFAULT );
319 XLAL_CHECK( optArgs != NULL, XLAL_EFAULT );
320
321 // Allocate method data
322 *method_data = XLALCalloc( 1, sizeof( ResampCUDAMethodData ) );
323 ResampCUDAMethodData *resamp = ( ResampCUDAMethodData * )*method_data;
324 XLAL_CHECK( resamp != NULL, XLAL_ENOMEM );
325
326 resamp->Dterms = optArgs->Dterms;
327
328 // Set method function pointers
332
333 // Copy the inverse factorial lookup table to GPU memory
334 XLAL_CHECK_CUDA_CALL( cudaMemcpyToSymbol( lal_fact_inv, ( void * )&LAL_FACT_INV, sizeof( REAL8 )*LAL_FACT_MAX, 0, cudaMemcpyHostToDevice ) );
335
336 // Extra band needed for resampling: Hamming-windowed sinc used for interpolation has a transition bandwith of
337 // TB=(4/L)*fSamp, where L=2*Dterms+1 is the window-length, and here fSamp=Band (i.e. the full SFT frequency band)
338 // However, we're only interested in the physical band and we'll be throwing away all bins outside of this.
339 // This implies that we're only affected by *half* the transition band TB/2 on either side, as the other half of TB is outside of the band of interest
340 // (and will actually get aliased, i.e. the region [-fNy - TB/2, -fNy] overlaps with [fNy-TB/2,fNy] and vice-versa: [fNy,fNy+TB/2] overlaps with [-fNy,-fNy+TB/2])
341 // ==> therefore we only need to add an extra TB/2 on each side to be able to safely avoid the transition-band effects
342 REAL8 f0 = multiSFTs->data[0]->data[0].f0;
343 REAL8 dFreq = multiSFTs->data[0]->data[0].deltaF;
344 REAL8 Band = multiSFTs->data[0]->data[0].data->length * dFreq;
345 REAL8 extraBand = 2.0 / ( 2 * optArgs->Dterms + 1 ) * Band;
346 XLAL_CHECK( XLALMultiSFTVectorResizeBand( multiSFTs, f0 - extraBand, Band + 2 * extraBand ) == XLAL_SUCCESS, XLAL_EFUNC );
347 // Convert SFTs into heterodyned complex timeseries [in detector frame]
349
350 XLALDestroyMultiSFTVector( multiSFTs ); // don't need them SFTs any more ...
351
353 REAL8 dt_DET = resamp->multiTimeSeries_DET->data[0]->deltaT;
354 REAL8 fHet = resamp->multiTimeSeries_DET->data[0]->f0;
355 REAL8 Tsft = common->multiTimestamps->data[0]->deltaT;
356
357 // determine resampled timeseries parameters
358 REAL8 TspanFFT = 1.0 / common->dFreq;
359
360 // check that TspanFFT >= TspanX for all detectors X, otherwise increase TspanFFT by an integer factor 'decimateFFT' such that this is true
361 REAL8 TspanXMax = 0;
362 for ( UINT4 X = 0; X < numDetectors; X ++ ) {
363 UINT4 numSamples_DETX = resamp->multiTimeSeries_DET->data[X]->data->length;
364 REAL8 TspanX = numSamples_DETX * dt_DET;
365 TspanXMax = fmax( TspanXMax, TspanX );
366 }
367 UINT4 decimateFFT = ( UINT4 )ceil( TspanXMax / TspanFFT ); // larger than 1 means we need to artificially increase dFreqFFT by 'decimateFFT'
368 if ( decimateFFT > 1 ) {
369 XLALPrintWarning( "WARNING: Frequency spacing larger than 1/Tspan, we'll internally decimate FFT frequency bins by a factor of %" LAL_UINT4_FORMAT "\n", decimateFFT );
370 }
371 TspanFFT *= decimateFFT;
372 resamp->decimateFFT = decimateFFT;
373
374 UINT4 numSamplesFFT0 = ( UINT4 ) ceil( TspanFFT / dt_DET ); // we use ceil() so that we artificially widen the band rather than reduce it
375 UINT4 numSamplesFFT = 0;
376 if ( optArgs->resampFFTPowerOf2 ) {
377 numSamplesFFT = ( UINT4 ) pow( 2, ceil( log2( ( double ) numSamplesFFT0 ) ) ); // round numSamplesFFT up to next power of 2 for most effiecient FFT
378 } else {
379 numSamplesFFT = ( UINT4 ) 2 * ceil( numSamplesFFT0 / 2 ); // always ensure numSamplesFFT is even
380 }
381
382 REAL8 dt_SRC = TspanFFT / numSamplesFFT; // adjust sampling rate to allow achieving exact requested dFreq=1/TspanFFT !
383
384 resamp->numSamplesFFT = numSamplesFFT;
385 // ----- allocate buffer Memory ----------
386
387 // Move detector time series over to GPU
389
390 // header for SRC-frame resampled timeseries buffer
394
398
399 LIGOTimeGPS XLAL_INIT_DECL( epoch0 ); // will be set to corresponding SRC-frame epoch when barycentering
400 UINT4 numSamplesMax_SRC = 0;
401 for ( UINT4 X = 0; X < numDetectors; X ++ ) {
402 // ----- check input consistency ----------
403 REAL8 dt_DETX = resamp->multiTimeSeries_DET->data[X]->deltaT;
404 XLAL_CHECK( dt_DET == dt_DETX, XLAL_EINVAL, "Input timeseries must have identical 'deltaT(X=%d)' (%.16g != %.16g)\n", X, dt_DET, dt_DETX );
405
406 REAL8 fHetX = resamp->multiTimeSeries_DET->data[X]->f0;
407 XLAL_CHECK( fabs( fHet - fHetX ) < LAL_REAL8_EPS * fHet, XLAL_EINVAL, "Input timeseries must have identical heterodyning frequency 'f0(X=%d)' (%.16g != %.16g)\n", X, fHet, fHetX );
408
409 REAL8 TsftX = common->multiTimestamps->data[X]->deltaT;
410 XLAL_CHECK( Tsft == TsftX, XLAL_EINVAL, "Input timestamps must have identical stepsize 'Tsft(X=%d)' (%.16g != %.16g)\n", X, Tsft, TsftX );
411
412 // ----- prepare Memory fo SRC-frame timeseries and AM coefficients
413 const char *nameX = resamp->multiTimeSeries_DET->data[X]->name;
414 UINT4 numSamples_DETX = resamp->multiTimeSeries_DET->data[X]->data->length;
415 UINT4 numSamples_SRCX = ( UINT4 )ceil( numSamples_DETX * dt_DET / dt_SRC );
416
417 XLAL_CHECK( ( resamp->multiTimeSeries_SRC_a->data[X] = XLALCreateCOMPLEX8TimeSeries( nameX, &epoch0, fHet, dt_SRC, &lalDimensionlessUnit, numSamples_SRCX ) ) != NULL, XLAL_EFUNC );
418 XLAL_CHECK( ( resamp->multiTimeSeries_SRC_b->data[X] = XLALCreateCOMPLEX8TimeSeries( nameX, &epoch0, fHet, dt_SRC, &lalDimensionlessUnit, numSamples_SRCX ) ) != NULL, XLAL_EFUNC );
419
420 numSamplesMax_SRC = MYMAX( numSamplesMax_SRC, numSamples_SRCX );
421 } // for X < numDetectors
422
425 XLAL_CHECK( numSamplesFFT >= numSamplesMax_SRC, XLAL_EFAILED, "[numSamplesFFT = %d] < [numSamplesMax_SRC = %d]\n", numSamplesFFT, numSamplesMax_SRC );
426
427 // ---- re-use shared workspace, or allocate here ----------
429 if ( ws != NULL ) {
430 if ( numSamplesFFT > ws->numSamplesFFTAlloc ) {
431 cudaFree( ws->FabX_Raw );
432 XLAL_CHECK( cudaMalloc( ( void ** )&ws->FabX_Raw, numSamplesFFT * sizeof( COMPLEX8 ) ) == cudaSuccess, XLAL_ENOMEM );
433 cudaFree( ws->TS_FFT );
434 XLAL_CHECK( cudaMalloc( ( void ** )&ws->TS_FFT, numSamplesFFT * sizeof( COMPLEX8 ) ) == cudaSuccess, XLAL_ENOMEM );
435
436 ws->numSamplesFFTAlloc = numSamplesFFT;
437 }
438
439 // adjust maximal SRC-frame timeseries length, if necessary - in the CPU version this was realloc'd, but I don't think we can do that here.
440 if ( numSamplesMax_SRC > ws->TStmp1_SRC->length ) {
442 XLAL_CHECK( ( ws->TStmp1_SRC = CreateCOMPLEX8VectorCUDA( numSamplesMax_SRC ) ) != NULL, XLAL_EFUNC );
443
445 XLAL_CHECK( ( ws->TStmp2_SRC = CreateCOMPLEX8VectorCUDA( numSamplesMax_SRC ) ) != NULL, XLAL_EFUNC );
446
448 XLAL_CHECK( ( ws->SRCtimes_DET = CreateREAL8VectorCUDA( numSamplesMax_SRC ) ) != NULL, XLAL_EFUNC );
449 }
450
451 } // end: if shared workspace given
452 else {
453 XLAL_CHECK( ( ws = ( ResampCUDAWorkspace * )XLALCalloc( 1, sizeof( *ws ) ) ) != NULL, XLAL_ENOMEM );
454 XLAL_CHECK( ( ws->TStmp1_SRC = CreateCOMPLEX8VectorCUDA( numSamplesMax_SRC ) ) != NULL, XLAL_EFUNC );
455 XLAL_CHECK( ( ws->TStmp2_SRC = CreateCOMPLEX8VectorCUDA( numSamplesMax_SRC ) ) != NULL, XLAL_EFUNC );
456 XLAL_CHECK( ( ws->SRCtimes_DET = CreateREAL8VectorCUDA( numSamplesMax_SRC ) ) != NULL, XLAL_EFUNC );
457 XLAL_CHECK( cudaMalloc( ( void ** )&ws->FabX_Raw, numSamplesFFT * sizeof( COMPLEX8 ) ) == cudaSuccess, XLAL_ENOMEM );
458 XLAL_CHECK( cudaMalloc( ( void ** )&ws->TS_FFT, numSamplesFFT * sizeof( COMPLEX8 ) ) == cudaSuccess, XLAL_ENOMEM );
459 ws->numSamplesFFTAlloc = numSamplesFFT;
460
461 common->workspace = ws;
462 } // end: if we create our own workspace
463
464 // ----- compute and buffer FFT plan ----------
465 XLAL_CHECK( cufftPlan1d( &resamp->fftplan, resamp->numSamplesFFT, CUFFT_C2C, 1 ) == CUFFT_SUCCESS, XLAL_ESYS );
466 XLAL_CHECK_CUDA_CALL( cudaDeviceSynchronize() );
467
468 // turn on timing collection if requested
469 resamp->collectTiming = optArgs->collectTiming;
470
471 // initialize struct for collecting timing data, store invariant 'meta' quantities about this setup
472 if ( resamp->collectTiming ) {
473 XLAL_INIT_MEM( resamp->timingGeneric );
475
476 XLAL_INIT_MEM( resamp->timingResamp );
477 resamp->timingResamp.Resolution = TspanXMax / TspanFFT;
478 resamp->timingResamp.NsampFFT0 = numSamplesFFT0;
479 resamp->timingResamp.NsampFFT = numSamplesFFT;
480 }
481
482 return XLAL_SUCCESS;
483
484} // XLALSetupFstatResampCUDA()
485
486__global__ void CUDAAddToFaFb( cuComplex *Fa_k, cuComplex *Fb_k, cuComplex *FaX_k, cuComplex *FbX_k, UINT4 numFreqBins )
487{
488 int k = threadIdx.x + blockDim.x * blockIdx.x;
489 if ( k >= numFreqBins ) {
490 return;
491 }
492 Fa_k[k] = cuCaddf( Fa_k[k], FaX_k[k] );
493 Fb_k[k] = cuCaddf( Fb_k[k], FbX_k[k] );
494}
495
496__global__ void CUDAComputeTwoF( REAL4 *twoF, cuComplex *Fa_k, cuComplex *Fb_k, REAL4 A, REAL4 B, REAL4 C, REAL4 E, REAL4 Dinv, UINT4 numFreqBins )
497{
498 int k = threadIdx.x + blockDim.x * blockIdx.x;
499 if ( k >= numFreqBins ) {
500 return;
501 }
502 cuComplex Fa = Fa_k[k];
503 cuComplex Fb = Fb_k[k];
504 REAL4 Fa_re = cuCrealf( Fa );
505 REAL4 Fa_im = cuCimagf( Fa );
506 REAL4 Fb_re = cuCrealf( Fb );
507 REAL4 Fb_im = cuCimagf( Fb );
508
509 REAL4 twoF_k = 4; // default fallback = E[2F] in noise when Dinv == 0 due to ill-conditionness of M_munu
510 if ( Dinv > 0 ) {
511 twoF_k = 2.0f * Dinv * ( B * ( SQ( Fa_re ) + SQ( Fa_im ) )
512 + A * ( SQ( Fb_re ) + SQ( Fb_im ) )
513 - 2.0 * C * ( Fa_re * Fb_re + Fa_im * Fb_im )
514 - 2.0 * E * ( - Fa_re * Fb_im + Fa_im * Fb_re ) // nonzero only in RAA case where Ed!=0
515 );
516 }
517
518 twoF[k] = twoF_k;
519}
520
521static int
523 const FstatCommon *common,
524 void *method_data
525 )
526{
527 // Check input
528 XLAL_CHECK( Fstats != NULL, XLAL_EFAULT );
529 XLAL_CHECK( common != NULL, XLAL_EFAULT );
530 XLAL_CHECK( method_data != NULL, XLAL_EFAULT );
531
532 ResampCUDAMethodData *resamp = ( ResampCUDAMethodData * ) method_data;
533
534 const FstatQuantities whatToCompute = Fstats->whatWasComputed;
535 XLAL_CHECK( !( whatToCompute & FSTATQ_ATOMS_PER_DET ), XLAL_EINVAL, "Resampling does not currently support atoms per detector" );
536
538
539 // ----- handy shortcuts ----------
540 PulsarDopplerParams thisPoint = Fstats->doppler;
541 const MultiCOMPLEX8TimeSeries *multiTimeSeries_DET = resamp->multiTimeSeries_DET;
542 UINT4 numDetectors = multiTimeSeries_DET->length;
543
544 // collect internal timing info
545 BOOLEAN collectTiming = resamp->collectTiming;
546 Timings_t *Tau = &( resamp->timingResamp.Tau );
547 XLAL_INIT_MEM( ( *Tau ) ); // these need to be initialized to 0 for each call
548
549 REAL8 ticStart = 0, tocEnd = 0;
550 REAL8 tic = 0, toc = 0;
551 if ( collectTiming ) {
552 XLAL_INIT_MEM( ( *Tau ) ); // re-set all timings to 0 at beginning of each Fstat-call
553 ticStart = XLALGetCPUTime();
554 }
555
556 // Note: all buffering is done within that function
558
559 if ( whatToCompute == FSTATQ_NONE ) {
560 return XLAL_SUCCESS;
561 }
562
563 MultiCOMPLEX8TimeSeries *multiTimeSeries_SRC_a = resamp->multiTimeSeries_SRC_a;
564 MultiCOMPLEX8TimeSeries *multiTimeSeries_SRC_b = resamp->multiTimeSeries_SRC_b;
565
566 // ============================== check workspace is properly allocated and initialized ===========
567
568 // ----- workspace that depends on maximal number of output frequency bins 'numFreqBins' ----------
569 UINT4 numFreqBins = Fstats->numFreqBins;
570
571 if ( collectTiming ) {
572 tic = XLALGetCPUTime();
573 }
574
575 // NOTE: we try to use as much existing memory as possible in FstatResults, so we only
576 // allocate local 'workspace' storage in case there's not already a vector allocated in FstatResults for it
577 // this also avoid having to copy these results in case the user asked for them to be returned
578 // TODO: Reallocating existing memory has not been implemented in the CUDA version.
579 /*
580 if ( whatToCompute & FSTATQ_FAFB )
581 {
582 XLALFree ( ws->Fa_k ); // avoid memory leak if allocated in previous call
583 ws->Fa_k = Fstats->Fa;
584 XLALFree ( ws->Fb_k ); // avoid memory leak if allocated in previous call
585 ws->Fb_k = Fstats->Fb;
586 } // end: if returning FaFb we can use that return-struct as 'workspace'
587 else // otherwise: we (re)allocate it locally
588 {
589 if ( numFreqBins > ws->numFreqBinsAlloc )
590 {
591 XLAL_CHECK ( (ws->Fa_k = (COMPLEX8 *)XLALRealloc ( ws->Fa_k, numFreqBins * sizeof(COMPLEX8))) != NULL, XLAL_ENOMEM );
592 XLAL_CHECK ( (ws->Fb_k = (COMPLEX8 *)XLALRealloc ( ws->Fb_k, numFreqBins * sizeof(COMPLEX8))) != NULL, XLAL_ENOMEM );
593 } // only increase workspace arrays
594 }
595 */
596 XLAL_CHECK( cudaMalloc( ( void ** )&ws->FaX_k, numFreqBins * sizeof( COMPLEX8 ) ) == cudaSuccess, XLAL_ENOMEM );
597 XLAL_CHECK( cudaMalloc( ( void ** )&ws->FbX_k, numFreqBins * sizeof( COMPLEX8 ) ) == cudaSuccess, XLAL_ENOMEM );
598 XLAL_CHECK( cudaMalloc( ( void ** )&ws->Fa_k, numFreqBins * sizeof( COMPLEX8 ) ) == cudaSuccess, XLAL_ENOMEM );
599 XLAL_CHECK( cudaMalloc( ( void ** )&ws->Fb_k, numFreqBins * sizeof( COMPLEX8 ) ) == cudaSuccess, XLAL_ENOMEM );
600 /*if ( whatToCompute & FSTATQ_FAFB_PER_DET )
601 {
602 XLALFree ( ws->FaX_k ); // avoid memory leak if allocated in previous call
603 ws->FaX_k = NULL; // will be set in loop over detectors X
604 XLALFree ( ws->FbX_k ); // avoid memory leak if allocated in previous call
605 ws->FbX_k = NULL; // will be set in loop over detectors X
606 } // end: if returning FaFbPerDet we can use that return-struct as 'workspace'
607 else // otherwise: we (re)allocate it locally
608 {
609 if ( numFreqBins > ws->numFreqBinsAlloc )
610 {
611 XLAL_CHECK ( (ws->FaX_k = (COMPLEX8 *)XLALRealloc ( ws->FaX_k, numFreqBins * sizeof(COMPLEX8))) != NULL, XLAL_ENOMEM );
612 XLAL_CHECK ( (ws->FbX_k = (COMPLEX8 *)XLALRealloc ( ws->FbX_k, numFreqBins * sizeof(COMPLEX8))) != NULL, XLAL_ENOMEM );
613 } // only increase workspace arrays
614 }*/
615 if ( numFreqBins > ws->numFreqBinsAlloc ) {
616 ws->numFreqBinsAlloc = numFreqBins; // keep track of allocated array length
617 }
618
619 if ( collectTiming ) {
620 toc = XLALGetCPUTime();
621 Tau->Mem = ( toc - tic ); // this one doesn't scale with number of detector!
622 }
623 // ====================================================================================================
624
625 // loop over detectors
626 for ( UINT4 X = 0; X < numDetectors; X++ ) {
627 const COMPLEX8TimeSeries *TimeSeriesX_SRC_a = multiTimeSeries_SRC_a->data[X];
628 const COMPLEX8TimeSeries *TimeSeriesX_SRC_b = multiTimeSeries_SRC_b->data[X];
629
630 // compute {Fa^X(f_k), Fb^X(f_k)}: results returned via workspace ws
631 XLAL_CHECK( XLALComputeFaFb_ResampCUDA( resamp, ws, thisPoint, common->dFreq, numFreqBins, TimeSeriesX_SRC_a, TimeSeriesX_SRC_b ) == XLAL_SUCCESS, XLAL_EFUNC );
632
633 if ( collectTiming ) {
634 tic = XLALGetCPUTime();
635 }
636
637 if ( X == 0 ) {
638 // avoid having to memset this array: for the first detector we *copy* results
639 XLAL_CHECK_CUDA_CALL( cudaMemcpy( ws->Fa_k, ws->FaX_k, sizeof( cuComplex )*numFreqBins, cudaMemcpyDeviceToDevice ) );
640 XLAL_CHECK_CUDA_CALL( cudaMemcpy( ws->Fb_k, ws->FbX_k, sizeof( cuComplex )*numFreqBins, cudaMemcpyDeviceToDevice ) );
641 } // end: if X==0
642 else {
643 // for subsequent detectors we *add to* them
644 CUDAAddToFaFb <<< ( numFreqBins + CUDA_BLOCK_SIZE - 1 ) / CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE >>> ( ws->Fa_k, ws->Fb_k, ws->FaX_k, ws->FbX_k, numFreqBins );
645 XLAL_CHECK_CUDA_CALL( cudaGetLastError() );
646 } // end:if X>0
647
648 if ( whatToCompute & FSTATQ_FAFB_PER_DET ) {
649 XLAL_CHECK_CUDA_CALL( cudaMemcpy( Fstats->FaPerDet[X], ws->FaX_k, sizeof( cuComplex )*numFreqBins, cudaMemcpyDeviceToHost ) );
650 XLAL_CHECK_CUDA_CALL( cudaMemcpy( Fstats->FbPerDet[X], ws->FbX_k, sizeof( cuComplex )*numFreqBins, cudaMemcpyDeviceToHost ) );
651 }
652
653 if ( collectTiming ) {
654 toc = XLALGetCPUTime();
655 Tau->SumFabX += ( toc - tic );
656 tic = toc;
657 }
658
659 // ----- if requested: compute per-detector Fstat_X_k
660 if ( whatToCompute & FSTATQ_2F_PER_DET ) {
661 const REAL4 Ad = resamp->MmunuX[X].Ad;
662 const REAL4 Bd = resamp->MmunuX[X].Bd;
663 const REAL4 Cd = resamp->MmunuX[X].Cd;
664 const REAL4 Ed = resamp->MmunuX[X].Ed;
665 const REAL4 Dd_inv = 1.0f / resamp->MmunuX[X].Dd;
666 REAL4 *twoF_gpu;
667 XLAL_CHECK( cudaMalloc( ( void ** )&twoF_gpu, sizeof( REAL4 )*numFreqBins ) == cudaSuccess, XLAL_ENOMEM );
668 CUDAComputeTwoF <<< ( numFreqBins + CUDA_BLOCK_SIZE - 1 ) / CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE >>> ( twoF_gpu, ws->FaX_k, ws->FbX_k, Ad, Bd, Cd, Ed, Dd_inv, numFreqBins );
669 XLAL_CHECK_CUDA_CALL( cudaGetLastError() );
670 XLAL_CHECK_CUDA_CALL( cudaMemcpy( Fstats->twoFPerDet[X], twoF_gpu, sizeof( REAL4 )*numFreqBins, cudaMemcpyDeviceToHost ) );
671 cudaFree( twoF_gpu );
672
673 } // end: if compute F_X
674
675 if ( collectTiming ) {
676 toc = XLALGetCPUTime();
677 Tau->Fab2F += ( toc - tic );
678 }
679
680 } // for X < numDetectors
681
682 if ( whatToCompute & FSTATQ_FAFB ) {
683 XLAL_CHECK_CUDA_CALL( cudaMemcpy( Fstats->Fa, ws->Fa_k, sizeof( cuComplex )*numFreqBins, cudaMemcpyDeviceToHost ) );
684 XLAL_CHECK_CUDA_CALL( cudaMemcpy( Fstats->Fb, ws->Fb_k, sizeof( cuComplex )*numFreqBins, cudaMemcpyDeviceToHost ) );
685 }
686
687 if ( whatToCompute & FSTATQ_FAFB_CUDA ) {
688 if ( Fstats->Fa_CUDA == NULL ) {
689 XLAL_CHECK( cudaMalloc( ( void ** )&Fstats->Fa_CUDA, sizeof( cuComplex )*numFreqBins ) == cudaSuccess, XLAL_ENOMEM );
690 }
691
692 if ( Fstats->Fb_CUDA == NULL ) {
693 XLAL_CHECK( cudaMalloc( ( void ** )&Fstats->Fb_CUDA, sizeof( cuComplex )*numFreqBins ) == cudaSuccess, XLAL_ENOMEM );
694 }
695
696 XLAL_CHECK_CUDA_CALL( cudaMemcpy( Fstats->Fa_CUDA, ws->Fa_k, sizeof( cuComplex )*numFreqBins, cudaMemcpyDeviceToDevice ) );
697 XLAL_CHECK_CUDA_CALL( cudaMemcpy( Fstats->Fb_CUDA, ws->Fb_k, sizeof( cuComplex )*numFreqBins, cudaMemcpyDeviceToDevice ) );
698 }
699
700 if ( collectTiming ) {
701 Tau->SumFabX /= numDetectors;
702 Tau->Fab2F /= numDetectors;
703 tic = XLALGetCPUTime();
704 }
705
706 if ( ( whatToCompute & FSTATQ_2F ) || ( whatToCompute & FSTATQ_2F_CUDA ) ) {
707 const REAL4 Ad = resamp->Mmunu.Ad;
708 const REAL4 Bd = resamp->Mmunu.Bd;
709 const REAL4 Cd = resamp->Mmunu.Cd;
710 const REAL4 Ed = resamp->Mmunu.Ed;
711 const REAL4 Dd_inv = 1.0f / resamp->Mmunu.Dd;
712 if ( Fstats->twoF_CUDA == NULL ) {
713 XLAL_CHECK( cudaMalloc( ( void ** )&Fstats->twoF_CUDA, sizeof( REAL4 )*numFreqBins ) == cudaSuccess, XLAL_ENOMEM );
714 }
715 CUDAComputeTwoF <<< ( numFreqBins + CUDA_BLOCK_SIZE - 1 ) / CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE >>> ( Fstats->twoF_CUDA, ws->Fa_k, ws->Fb_k, Ad, Bd, Cd, Ed, Dd_inv, numFreqBins );
716 XLAL_CHECK_CUDA_CALL( cudaGetLastError() );
717 if ( whatToCompute & FSTATQ_2F ) {
718 XLAL_CHECK_CUDA_CALL( cudaMemcpy( Fstats->twoF, Fstats->twoF_CUDA, sizeof( REAL4 )*numFreqBins, cudaMemcpyDeviceToHost ) );
719 }
720 if ( !( whatToCompute & FSTATQ_2F_CUDA ) ) {
721 cudaFree( Fstats->twoF_CUDA );
722 Fstats->twoF_CUDA = NULL;
723 }
724 } // if FSTATQ_2F
725
726 if ( collectTiming ) {
727 toc = XLALGetCPUTime();
728 Tau->Fab2F += ( toc - tic );
729 }
730
731 // Return F-atoms per detector
732 if ( whatToCompute & FSTATQ_ATOMS_PER_DET ) {
733 XLAL_ERROR( XLAL_EFAILED, "NOT implemented!" );
734 }
735
736 // Return antenna-pattern matrix
737 Fstats->Mmunu = resamp->Mmunu;
738
739 // return per-detector antenna-pattern matrices
740 for ( UINT4 X = 0; X < numDetectors; X ++ ) {
741 Fstats->MmunuX[X] = resamp->MmunuX[X];
742 }
743
744 // ----- workspace memory management:
745 // if we used the return struct directly to store Fa,Fb results,
746 // make sure to wipe those pointers to avoid mistakenly considering them as 'local' memory
747 // and re-allocing it in another call to this function
748 cudaFree( ws->Fa_k );
749 cudaFree( ws->Fb_k );
750 cudaFree( ws->FaX_k );
751 cudaFree( ws->FbX_k );
752 if ( whatToCompute & FSTATQ_FAFB ) {
753 ws->Fa_k = NULL;
754 ws->Fb_k = NULL;
755 }
756 if ( whatToCompute & FSTATQ_FAFB_PER_DET ) {
757 ws->FaX_k = NULL;
758 ws->FbX_k = NULL;
759 }
760
761 if ( collectTiming ) {
762 tocEnd = XLALGetCPUTime();
763
764 FstatTimingGeneric *tiGen = &( resamp->timingGeneric );
765 FstatTimingResamp *tiRS = &( resamp->timingResamp );
766 XLAL_CHECK( numDetectors == tiGen->Ndet, XLAL_EINVAL, "Inconsistent number of detectors between XLALCreateSetup() [%d] and XLALComputeFstat() [%d]\n", tiGen->Ndet, numDetectors );
767
768 Tau->Total = ( tocEnd - ticStart );
769 // rescale all relevant timings to per-detector
770 Tau->Total /= numDetectors;
771 Tau->Bary /= numDetectors;
772 Tau->Spin /= numDetectors;
773 Tau->FFT /= numDetectors;
774 Tau->Norm /= numDetectors;
775 Tau->Copy /= numDetectors;
776 REAL8 Tau_buffer = Tau->Bary;
777 // compute generic F-stat timing model contributions
778 UINT4 NFbin = Fstats->numFreqBins;
779 REAL8 tauF_eff = Tau->Total / NFbin;
780 REAL8 tauF_core = ( Tau->Total - Tau_buffer ) / NFbin;
781
782 // compute resampling timing model coefficients
783 REAL8 tau0_Fbin = ( Tau->Copy + Tau->Norm + Tau->SumFabX + Tau->Fab2F ) / NFbin;
784 REAL8 tau0_spin = Tau->Spin / ( tiRS->Resolution * tiRS->NsampFFT );
785 REAL8 tau0_FFT = Tau->FFT / ( 5.0 * tiRS->NsampFFT * log2( ( double ) tiRS->NsampFFT ) );
786
787 // update the averaged timing-model quantities
788 tiGen->NCalls ++; // keep track of number of Fstat-calls for timing
789#define updateAvgF(q) tiGen->q = ((tiGen->q *(tiGen->NCalls-1) + q)/(tiGen->NCalls))
790 updateAvgF( tauF_eff );
791 updateAvgF( tauF_core );
792 // we also average NFbin, which can be different between different calls to XLALComputeFstat() (contrary to Ndet)
793 updateAvgF( NFbin );
794
795#define updateAvgRS(q) tiRS->q = ((tiRS->q *(tiGen->NCalls-1) + q)/(tiGen->NCalls))
796 updateAvgRS( tau0_Fbin );
797 updateAvgRS( tau0_spin );
798 updateAvgRS( tau0_FFT );
799
800 // buffer-quantities only updated if buffer was actually recomputed
801 if ( Tau->BufferRecomputed ) {
802 REAL8 tau0_bary = Tau_buffer / ( tiRS->Resolution * tiRS->NsampFFT );
803 REAL8 tauF_buffer = Tau_buffer / NFbin;
804
805 updateAvgF( tauF_buffer );
806 updateAvgRS( tau0_bary );
807 } // if BufferRecomputed
808
809 } // if collectTiming
810 return XLAL_SUCCESS;
811
812} // XLALComputeFstatResamp()
813
814__global__ void CUDANormFaFb( cuComplex *Fa_out,
815 cuComplex *Fb_out,
816 REAL8 FreqOut0,
817 REAL8 dFreq,
818 REAL8 dtauX,
819 REAL8 dt_SRC,
820 UINT4 numFreqBins )
821{
822 int k = threadIdx.x + blockDim.x * blockIdx.x;
823 if ( k >= numFreqBins ) {
824 return;
825 }
826 REAL8 f_k = FreqOut0 + k * dFreq;
827 REAL8 cycles = - f_k * dtauX;
828 REAL8 sinphase, cosphase;
829 sincospi( 2 * cycles, &sinphase, &cosphase );
830 cuComplex normX_k = { ( float )( dt_SRC * cosphase ), ( float )( dt_SRC * sinphase ) };
831
832 Fa_out[k] = cuCmulf( Fa_out[k], normX_k );
833 Fb_out[k] = cuCmulf( Fb_out[k], normX_k );
834}
835
836__global__ void CUDAPopulateFaFbFromRaw( cuComplex *out, cuComplex *in, UINT4 numFreqBins, UINT4 offset_bins, UINT4 decimateFFT )
837{
838 int k = threadIdx.x + blockIdx.x * blockDim.x;
839 if ( k >= numFreqBins ) {
840 return;
841 }
842 out[k] = in[offset_bins + k * decimateFFT];
843}
844
845static int
846XLALComputeFaFb_ResampCUDA( ResampCUDAMethodData *resamp, //!< [in,out] buffered resampling data and workspace
847 ResampCUDAWorkspace *ws, //!< [in,out] resampling workspace (memory-sharing across segments)
848 const PulsarDopplerParams thisPoint, //!< [in] Doppler point to compute {FaX,FbX} for
849 REAL8 dFreq, //!< [in] output frequency resolution
850 UINT4 numFreqBins, //!< [in] number of output frequency bins
851 const COMPLEX8TimeSeries *__restrict__ TimeSeries_SRC_a, //!< [in] SRC-frame single-IFO timeseries * a(t)
852 const COMPLEX8TimeSeries *__restrict__ TimeSeries_SRC_b //!< [in] SRC-frame single-IFO timeseries * b(t)
853 )
854{
855 XLAL_CHECK( ( resamp != NULL ) && ( ws != NULL ) && ( TimeSeries_SRC_a != NULL ) && ( TimeSeries_SRC_b != NULL ), XLAL_EINVAL );
856 XLAL_CHECK( dFreq > 0, XLAL_EINVAL );
857 XLAL_CHECK( numFreqBins <= ws->numFreqBinsAlloc, XLAL_EINVAL );
858
859 REAL8 FreqOut0 = thisPoint.fkdot[0];
860
861 // compute frequency shift to align heterodyne frequency with output frequency bins
862 REAL8 fHet = TimeSeries_SRC_a->f0;
863 REAL8 dt_SRC = TimeSeries_SRC_a->deltaT;
864
865 REAL8 dFreqFFT = dFreq / resamp->decimateFFT; // internally may be using higher frequency resolution dFreqFFT than requested
866 REAL8 freqShift = remainder( FreqOut0 - fHet, dFreq ); // frequency shift to closest bin
867 REAL8 fMinFFT = fHet + freqShift - dFreqFFT * ( resamp->numSamplesFFT / 2 ); // we'll shift DC into the *middle bin* N/2 [N always even!]
868 XLAL_CHECK( FreqOut0 >= fMinFFT, XLAL_EDOM, "Lowest output frequency outside the available frequency band: [FreqOut0 = %.16g] < [fMinFFT = %.16g]\n", FreqOut0, fMinFFT );
869 UINT4 offset_bins = ( UINT4 ) lround( ( FreqOut0 - fMinFFT ) / dFreqFFT );
870 UINT4 maxOutputBin = offset_bins + ( numFreqBins - 1 ) * resamp->decimateFFT;
871 XLAL_CHECK( maxOutputBin < resamp->numSamplesFFT, XLAL_EDOM, "Highest output frequency bin outside available band: [maxOutputBin = %d] >= [numSamplesFFT = %d]\n", maxOutputBin, resamp->numSamplesFFT );
872
873 FstatTimingResamp *tiRS = &( resamp->timingResamp );
874 BOOLEAN collectTiming = resamp->collectTiming;
875 REAL8 tic = 0, toc = 0;
876
877 XLAL_CHECK( resamp->numSamplesFFT >= TimeSeries_SRC_a->data->length, XLAL_EFAILED, "[numSamplesFFT = %d] < [len(TimeSeries_SRC_a) = %d]\n", resamp->numSamplesFFT, TimeSeries_SRC_a->data->length );
878 XLAL_CHECK( resamp->numSamplesFFT >= TimeSeries_SRC_b->data->length, XLAL_EFAILED, "[numSamplesFFT = %d] < [len(TimeSeries_SRC_b) = %d]\n", resamp->numSamplesFFT, TimeSeries_SRC_b->data->length );
879
880 if ( collectTiming ) {
881 tic = XLALGetCPUTime();
882 }
883 XLAL_CHECK_CUDA_CALL( cudaMemset( ws->TS_FFT, 0, resamp->numSamplesFFT * sizeof( ws->TS_FFT[0] ) ) );
884
885 // ----- compute FaX_k
886 // apply spindown phase-factors, store result in zero-padded timeseries for 'FFT'ing
887 XLAL_CHECK( XLALApplySpindownAndFreqShiftCUDA( ws->TS_FFT, TimeSeries_SRC_a, &thisPoint, freqShift ) == XLAL_SUCCESS, XLAL_EFUNC );
888
889 if ( collectTiming ) {
890 toc = XLALGetCPUTime();
891 tiRS->Tau.Spin += ( toc - tic );
892 tic = toc;
893 }
894
895 // Fourier transform the resampled Fa(t)
896 XLAL_CHECK_CUDA_CALL( cudaDeviceSynchronize() );
897 XLAL_CHECK_CUFFT_CALL( cufftExecC2C( resamp->fftplan, ws->TS_FFT, ws->FabX_Raw, CUFFT_FORWARD ) );
898 XLAL_CHECK_CUDA_CALL( cudaDeviceSynchronize() );
899 if ( collectTiming ) {
900 toc = XLALGetCPUTime();
901 tiRS->Tau.FFT += ( toc - tic );
902 tic = toc;
903 }
904
905 CUDAPopulateFaFbFromRaw <<< ( numFreqBins + CUDA_BLOCK_SIZE - 1 ) / CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE >>> ( ws->FaX_k, ws->FabX_Raw, numFreqBins, offset_bins, resamp->decimateFFT );
906 XLAL_CHECK_CUDA_CALL( cudaGetLastError() );
907 if ( collectTiming ) {
908 toc = XLALGetCPUTime();
909 tiRS->Tau.Copy += ( toc - tic );
910 tic = toc;
911 }
912
913 // ----- compute FbX_k
914 // apply spindown phase-factors, store result in zero-padded timeseries for 'FFT'ing
915 XLAL_CHECK( XLALApplySpindownAndFreqShiftCUDA( ws->TS_FFT, TimeSeries_SRC_b, &thisPoint, freqShift ) == XLAL_SUCCESS, XLAL_EFUNC );
916
917 if ( collectTiming ) {
918 toc = XLALGetCPUTime();
919 tiRS->Tau.Spin += ( toc - tic );
920 tic = toc;
921 }
922
923 // Fourier transform the resampled Fb(t)
924 XLAL_CHECK_CUFFT_CALL( cufftExecC2C( resamp->fftplan, ws->TS_FFT, ws->FabX_Raw, CUFFT_FORWARD ) );
925
926 if ( collectTiming ) {
927 toc = XLALGetCPUTime();
928 tiRS->Tau.FFT += ( toc - tic );
929 tic = toc;
930 }
931 CUDAPopulateFaFbFromRaw <<< ( numFreqBins + CUDA_BLOCK_SIZE - 1 ) / CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE >>> ( ws->FbX_k, ws->FabX_Raw, numFreqBins, offset_bins, resamp->decimateFFT );
932 XLAL_CHECK_CUDA_CALL( cudaGetLastError() );
933
934 if ( collectTiming ) {
935 toc = XLALGetCPUTime();
936 tiRS->Tau.Copy += ( toc - tic );
937 tic = toc;
938 }
939
940 // ----- normalization factors to be applied to Fa and Fb:
941 const REAL8 dtauX = GPSDIFF( TimeSeries_SRC_a->epoch, thisPoint.refTime );
942 CUDANormFaFb <<< ( numFreqBins + CUDA_BLOCK_SIZE - 1 ) / CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE >>> ( ws->FaX_k, ws->FbX_k, FreqOut0, dFreq, dtauX, dt_SRC, numFreqBins );
943 XLAL_CHECK_CUDA_CALL( cudaGetLastError() );
944 if ( collectTiming ) {
945 toc = XLALGetCPUTime();
946 tiRS->Tau.Norm += ( toc - tic );
947 tic = toc;
948 }
949
950 return XLAL_SUCCESS;
951} // XLALComputeFaFb_ResampCUDA()
952
953
954__global__ void CUDAApplySpindownAndFreqShift( cuComplex *out,
955 cuComplex *in,
956 PulsarDopplerParams doppler,
957 REAL8 freqShift,
958 REAL8 dt,
959 REAL8 Dtau0,
960 UINT4 s_max,
961 UINT4 numSamplesIn )
962
963{
964 int j = threadIdx.x + blockIdx.x * blockDim.x;
965 if ( j >= numSamplesIn ) {
966 return;
967 }
968 REAL8 taup_j = j * dt;
969 REAL8 Dtau_alpha_j = Dtau0 + taup_j;
970 REAL8 cycles = -freqShift * taup_j;
971 REAL8 Dtau_pow_kp1 = Dtau_alpha_j;
972 for ( UINT4 k = 1; k <= s_max; k++ ) {
973 Dtau_pow_kp1 *= Dtau_alpha_j;
974 cycles += - lal_fact_inv[k + 1] * doppler.fkdot[k] * Dtau_pow_kp1;
975 }
976 REAL8 cosphase, sinphase;
977 sincospi( 2 * cycles, &sinphase, &cosphase );
978 cuComplex em2piphase = {( float )cosphase, ( float )sinphase};
979 out[j] = cuCmulf( in[j], em2piphase );
980}
981
982static int
983XLALApplySpindownAndFreqShiftCUDA( cuComplex *__restrict__ xOut, ///< [out] the spindown-corrected SRC-frame timeseries
984 const COMPLEX8TimeSeries *__restrict__ xIn, ///< [in] the input SRC-frame timeseries
985 const PulsarDopplerParams *__restrict__ doppler, ///< [in] containing spindown parameters
986 REAL8 freqShift ///< [in] frequency-shift to apply, sign is "new - old"
987 )
988{
989 // input sanity checks
990 XLAL_CHECK( xOut != NULL, XLAL_EINVAL );
991 XLAL_CHECK( xIn != NULL, XLAL_EINVAL );
992 XLAL_CHECK( doppler != NULL, XLAL_EINVAL );
993
994 // determine number of spin downs to include
995 UINT4 s_max = PULSAR_MAX_SPINS - 1;
996 while ( ( s_max > 0 ) && ( doppler->fkdot[s_max] == 0 ) ) {
997 s_max --;
998 }
999 REAL8 dt = xIn->deltaT;
1000 UINT4 numSamplesIn = xIn->data->length;
1001 LIGOTimeGPS epoch = xIn->epoch;
1002 REAL8 Dtau0 = GPSDIFF( epoch, doppler->refTime );
1003
1004 XLAL_CHECK_CUDA_CALL( cudaDeviceSynchronize() );
1005 CUDAApplySpindownAndFreqShift <<< ( numSamplesIn + CUDA_BLOCK_SIZE - 1 ) / CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE >>> ( xOut, reinterpret_cast<cuComplex *>( xIn->data->data ), *doppler, freqShift, dt, Dtau0, s_max, numSamplesIn );
1006 XLAL_CHECK_CUDA_CALL( cudaGetLastError() );
1007 XLAL_CHECK_CUDA_CALL( cudaDeviceSynchronize() );
1008
1009 return XLAL_SUCCESS;
1010} // XLALApplySpindownAndFreqShiftCUDA()
1011
1012__global__ void CUDAApplyHetAMInterp( cuComplex *TS_a, cuComplex *TS_b, cuComplex *TStmp1, cuComplex *TStmp2, UINT4 numSamples )
1013{
1014 int j = threadIdx.x + blockIdx.x * blockDim.x;
1015 if ( j >= numSamples ) {
1016 return;
1017 }
1018 TS_b[j] = cuCmulf( TS_a[j], TStmp2[j] );
1019 TS_a[j] = cuCmulf( TS_a[j], TStmp1[j] );
1020}
1021
1022__global__ void CUDAApplyHetAMSrc( cuComplex *TStmp1_SRC_data,
1023 cuComplex *TStmp2_SRC_data,
1024 REAL8 *ti_DET_data,
1025 UINT4 iStart_SRC_al,
1026 REAL8 tStart_SRC_0,
1027 REAL8 tStart_DET_0,
1028 REAL8 dt_SRC,
1029 REAL8 tMid_DET_al,
1030 REAL8 tMid_SRC_al,
1031 REAL8 Tdot_al,
1032 REAL8 fHet,
1033 REAL4 a_al,
1034 REAL4 b_al,
1035 UINT4 numSamples )
1036{
1037 int j = threadIdx.x + blockIdx.x * blockDim.x;
1038 if ( j >= numSamples ) {
1039 return;
1040 }
1041
1042 UINT4 iSRC_al_j = iStart_SRC_al + j;
1043
1044 // for each time sample in the SRC frame, we estimate the corresponding detector time,
1045 // using a linear approximation expanding around the midpoint of each SFT
1046 REAL8 t_SRC = tStart_SRC_0 + iSRC_al_j * dt_SRC;
1047 REAL8 ti_DET_al_j = tMid_DET_al + ( t_SRC - tMid_SRC_al ) / Tdot_al;
1048 ti_DET_data [ iSRC_al_j ] = ti_DET_al_j;
1049
1050 // pre-compute correction factors due to non-zero heterodyne frequency of input
1051 REAL8 tDiff = iSRC_al_j * dt_SRC + ( tStart_DET_0 - ti_DET_al_j ); // tSRC_al_j - tDET(tSRC_al_j)
1052 REAL8 cycles = fmod( fHet * tDiff, 1.0 ); // the accumulated heterodyne cycles
1053
1054 REAL8 cosphase, sinphase;
1055 sincospi( -2 * cycles, &sinphase, &cosphase ); // the real and imaginary parts of the phase correction
1056
1057 cuComplex ei2piphase = {( float ) cosphase, ( float ) sinphase};
1058
1059 // apply AM coefficients a(t), b(t) to SRC frame timeseries [alternate sign to get final FFT return DC in the middle]
1060 // equivalent to: REAL4 signum = signumLUT [ (iSRC_al_j % 2) ];
1061 cuComplex signum = {1.0f - 2 * ( iSRC_al_j % 2 ), 0}; // alternating sign, avoid branching
1062 ei2piphase = cuCmulf( ei2piphase, signum );
1063
1064 TStmp1_SRC_data[iSRC_al_j] = make_cuComplex( ei2piphase.x * a_al, ei2piphase.y * a_al );
1065 TStmp2_SRC_data[iSRC_al_j] = make_cuComplex( ei2piphase.x * b_al, ei2piphase.y * b_al );
1066}
1067
1068__global__ void CUDASincInterp( cuComplex *out,
1069 REAL8 *t_out,
1070 cuComplex *in,
1071 REAL8 *win,
1072 UINT4 Dterms,
1073 UINT4 numSamplesIn,
1074 UINT4 numSamplesOut,
1075 REAL8 tmin,
1076 REAL8 dt,
1077 REAL8 oodt )
1078{
1079 int l = threadIdx.x + blockDim.x * blockIdx.x;
1080 if ( l >= numSamplesOut ) {
1081 return;
1082 }
1083 REAL8 t = t_out[l] - tmin; // measure time since start of input timeseries
1084
1085 // samples outside of input timeseries are returned as 0
1086 if ( ( t < 0 ) || ( t > ( numSamplesIn - 1 ) * dt ) ) { // avoid any extrapolations!
1087 out[l] = make_cuComplex( 0, 0 );
1088 return;
1089 }
1090
1091 REAL8 t_by_dt = t * oodt;
1092 INT8 jstar = lround( t_by_dt ); // bin closest to 't', guaranteed to be in [0, numSamples-1]
1093
1094 if ( fabs( t_by_dt - jstar ) < 2.0e-4 ) { // avoid numerical problems near peak
1095 out[l] = in[jstar]; // known analytic solution for exact bin
1096 return;
1097 }
1098
1099 INT4 jStart0 = jstar - Dterms;
1100 UINT4 jEnd0 = jstar + Dterms;
1101 UINT4 jStart = max( jStart0, 0 );
1102 UINT4 jEnd = min( jEnd0, numSamplesIn - 1 );
1103
1104 REAL4 delta_jStart = ( t_by_dt - jStart );
1105 REAL8 sin0 = sinpi( delta_jStart );
1106
1107 REAL4 sin0oopi = sin0 * 1.0 / M_PI;
1108
1109 cuComplex y_l = {0, 0};
1110 REAL8 delta_j = delta_jStart;
1111 for ( UINT8 j = jStart; j <= jEnd; j ++ ) {
1112 cuComplex Cj = {( float )( win[j - jStart0] * sin0oopi / delta_j ), 0};
1113
1114 y_l = cuCaddf( y_l, cuCmulf( Cj, in[j] ) );
1115
1116 sin0oopi = -sin0oopi; // sin-term flips sign every step
1117 delta_j --;
1118 } // for j in [j* - Dterms, ... ,j* + Dterms]
1119
1120 out[l] = y_l;
1121}
1122
1123// Reimplementation of XLALCreateHammingREAL8Window
1124__global__ void CUDACreateHammingWindow( REAL8 *out, UINT4 length )
1125{
1126
1127 int i = threadIdx.x + blockDim.x * blockIdx.x;
1128 if ( i >= ( length + 1 ) / 2 ) {
1129 return;
1130 }
1131
1132 int length_reduced = length - 1;
1133 double Y = length_reduced > 0 ? ( 2 * i - length_reduced ) / ( double ) length_reduced : 0;
1134 out[i] = 0.08 + 0.92 * pow( cos( M_PI / 2 * Y ), 2 );
1135 out[length - i - 1] = 0.08 + 0.92 * pow( cos( M_PI / 2 * Y ), 2 );
1136}
1137
1138int
1139SincInterp( COMPLEX8Vector *y_out, ///< [out] output series of interpolated y-values [must be same size as t_out]
1140 const REAL8Vector *t_out, ///< [in] output time-steps to interpolate input to
1141 const COMPLEX8TimeSeries *ts_in, ///< [in] regularly-spaced input timeseries
1142 UINT4 Dterms ///< [in] window sinc kernel sum to +-Dterms around max
1143 )
1144{
1145 XLAL_CHECK( y_out != NULL, XLAL_EINVAL );
1146 XLAL_CHECK( t_out != NULL, XLAL_EINVAL );
1147 XLAL_CHECK( ts_in != NULL, XLAL_EINVAL );
1148 XLAL_CHECK( y_out->length == t_out->length, XLAL_EINVAL );
1149
1150 UINT4 numSamplesOut = t_out->length;
1151 UINT4 numSamplesIn = ts_in->data->length;
1152 REAL8 dt = ts_in->deltaT;
1153 REAL8 tmin = XLALGPSGetREAL8( &( ts_in->epoch ) ); // time of first bin in input timeseries
1154
1155 UINT4 winLen = 2 * Dterms + 1;
1156
1157 const REAL8 oodt = 1.0 / dt;
1158 REAL8 *win_gpu;
1159 XLAL_CHECK( cudaMalloc( ( void ** )&win_gpu, sizeof( REAL8 )*winLen ) == cudaSuccess, XLAL_ENOMEM );
1160 CUDACreateHammingWindow <<< ( winLen + CUDA_BLOCK_SIZE - 1 ) / CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE >>> ( win_gpu, winLen );
1161 XLAL_CHECK_CUDA_CALL( cudaGetLastError() );
1162 CUDASincInterp <<< ( numSamplesOut + CUDA_BLOCK_SIZE - 1 ) / CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE >>> ( ( cuComplex * )( y_out->data ), t_out->data, ( cuComplex * )( ts_in->data->data ), win_gpu, Dterms, numSamplesIn, numSamplesOut, tmin, dt, oodt );
1163 XLAL_CHECK_CUDA_CALL( cudaGetLastError() );
1164 cudaFree( win_gpu );
1165
1166 return XLAL_SUCCESS;
1167}
1168
1169///
1170/// Performs barycentric resampling on a multi-detector timeseries, updates resampling buffer with results
1171///
1172/// NOTE Buffering: this function does check
1173/// 1) whether the previously-buffered solution can be completely reused (same sky-position and binary parameters), or
1174/// 2) if at least sky-dependent quantities can be re-used (antenna-patterns + timings) in case only binary parameters changed
1175///
1176static int
1178 ResampCUDAMethodData *resamp, // [in/out] resampling input and buffer (to store resampling TS)
1179 const PulsarDopplerParams *thisPoint, // [in] current skypoint and reftime
1180 const FstatCommon *common // [in] various input quantities and parameters used here
1181)
1182{
1183 // check input sanity
1184 XLAL_CHECK( thisPoint != NULL, XLAL_EINVAL );
1185 XLAL_CHECK( common != NULL, XLAL_EINVAL );
1186 XLAL_CHECK( resamp != NULL, XLAL_EINVAL );
1187 XLAL_CHECK( resamp->multiTimeSeries_DET != NULL, XLAL_EINVAL );
1188 XLAL_CHECK( resamp->multiTimeSeries_SRC_a != NULL, XLAL_EINVAL );
1189 XLAL_CHECK( resamp->multiTimeSeries_SRC_b != NULL, XLAL_EINVAL );
1190
1192
1194 XLAL_CHECK( resamp->multiTimeSeries_SRC_a->length == numDetectors, XLAL_EINVAL, "Inconsistent number of detectors tsDET(%d) != tsSRC(%d)\n", numDetectors, resamp->multiTimeSeries_SRC_a->length );
1195 XLAL_CHECK( resamp->multiTimeSeries_SRC_b->length == numDetectors, XLAL_EINVAL, "Inconsistent number of detectors tsDET(%d) != tsSRC(%d)\n", numDetectors, resamp->multiTimeSeries_SRC_b->length );
1196
1197 // ============================== BEGIN: handle buffering =============================
1198 BOOLEAN same_skypos = ( resamp->prev_doppler.Alpha == thisPoint->Alpha ) && ( resamp->prev_doppler.Delta == thisPoint->Delta );
1199 BOOLEAN same_refTime = ( GPSDIFF( resamp->prev_doppler.refTime, thisPoint->refTime ) == 0 );
1200 BOOLEAN same_binary = \
1201 ( resamp->prev_doppler.asini == thisPoint->asini ) &&
1202 ( resamp->prev_doppler.period == thisPoint->period ) &&
1203 ( resamp->prev_doppler.ecc == thisPoint->ecc ) &&
1204 ( GPSDIFF( resamp->prev_doppler.tp, thisPoint->tp ) == 0 ) &&
1205 ( resamp->prev_doppler.argp == thisPoint->argp );
1206
1207 Timings_t *Tau = &( resamp->timingResamp.Tau );
1208 REAL8 tic = 0, toc = 0;
1209 BOOLEAN collectTiming = resamp->collectTiming;
1210
1211 // if same sky-position *and* same binary, we can simply return as there's nothing to be done here
1212 if ( same_skypos && same_refTime && same_binary ) {
1213 Tau->BufferRecomputed = 0;
1214 return XLAL_SUCCESS;
1215 }
1216 // else: keep track of 'buffer miss', ie we need to recompute the buffer
1217 Tau->BufferRecomputed = 1;
1218 resamp->timingGeneric.NBufferMisses ++;
1219 if ( collectTiming ) {
1220 tic = XLALGetCPUTime();
1221 }
1222
1223 MultiSSBtimes *multiSRCtimes = NULL;
1224
1225 // only if different sky-position: re-compute antenna-patterns and SSB timings, re-use from buffer otherwise
1226 if ( !( same_skypos && same_refTime ) ) {
1227 SkyPosition skypos;
1229 skypos.longitude = thisPoint->Alpha;
1230 skypos.latitude = thisPoint->Delta;
1231
1233 XLAL_CHECK( ( resamp->multiAMcoef = XLALComputeMultiAMCoeffs( common->multiDetectorStates, common->multiNoiseWeights, skypos ) ) != NULL, XLAL_EFUNC );
1234 resamp->Mmunu = resamp->multiAMcoef->Mmunu;
1235 for ( UINT4 X = 0; X < numDetectors; X ++ ) {
1236 resamp->MmunuX[X].Ad = resamp->multiAMcoef->data[X]->A;
1237 resamp->MmunuX[X].Bd = resamp->multiAMcoef->data[X]->B;
1238 resamp->MmunuX[X].Cd = resamp->multiAMcoef->data[X]->C;
1239 resamp->MmunuX[X].Ed = 0;
1240 resamp->MmunuX[X].Dd = resamp->multiAMcoef->data[X]->D;
1241 }
1242
1244 XLAL_CHECK( ( resamp->multiSSBtimes = XLALGetMultiSSBtimes( common->multiDetectorStates, skypos, thisPoint->refTime, common->SSBprec ) ) != NULL, XLAL_EFUNC );
1245
1246 } // if cannot re-use buffered solution ie if !(same_skypos && same_binary)
1247
1248 if ( thisPoint->asini > 0 ) { // binary case
1250 multiSRCtimes = resamp->multiBinaryTimes;
1251 } else { // isolated case
1252 multiSRCtimes = resamp->multiSSBtimes;
1253 }
1254
1255 // record barycenter parameters in order to allow re-usal of this result ('buffering')
1256 resamp->prev_doppler = ( *thisPoint );
1257
1258 // shorthands
1259 REAL8 fHet = resamp->multiTimeSeries_DET->data[0]->f0;
1260 REAL8 Tsft = common->multiTimestamps->data[0]->deltaT;
1261 REAL8 dt_SRC = resamp->multiTimeSeries_SRC_a->data[0]->deltaT;
1262
1263 // loop over detectors X
1264 for ( UINT4 X = 0; X < numDetectors; X++ ) {
1265 // shorthand pointers: input
1266 const COMPLEX8TimeSeries *TimeSeries_DETX = resamp->multiTimeSeries_DET->data[X];
1267 const LIGOTimeGPSVector *Timestamps_DETX = common->multiTimestamps->data[X];
1268 const SSBtimes *SRCtimesX = multiSRCtimes->data[X];
1269 const AMCoeffs *AMcoefX = resamp->multiAMcoef->data[X];
1270
1271 // shorthand pointers: output
1272 COMPLEX8TimeSeries *TimeSeries_SRCX_a = resamp->multiTimeSeries_SRC_a->data[X];
1273 COMPLEX8TimeSeries *TimeSeries_SRCX_b = resamp->multiTimeSeries_SRC_b->data[X];
1274 REAL8Vector *ti_DET = ws->SRCtimes_DET;
1275
1276 // useful shorthands
1277 REAL8 refTime8 = GPSGETREAL8( &SRCtimesX->refTime );
1278 UINT4 numSFTsX = Timestamps_DETX->length;
1279 UINT4 numSamples_DETX = TimeSeries_DETX->data->length;
1280 UINT4 numSamples_SRCX = TimeSeries_SRCX_a->data->length;
1281
1282 // sanity checks on input data
1283 XLAL_CHECK( numSamples_SRCX == TimeSeries_SRCX_b->data->length, XLAL_EINVAL );
1284 XLAL_CHECK( dt_SRC == TimeSeries_SRCX_a->deltaT, XLAL_EINVAL );
1285 XLAL_CHECK( dt_SRC == TimeSeries_SRCX_b->deltaT, XLAL_EINVAL );
1286 XLAL_CHECK( numSamples_DETX > 0, XLAL_EINVAL, "Input timeseries for detector X=%d has zero samples. Can't handle that!\n", X );
1287 XLAL_CHECK( ( SRCtimesX->DeltaT->length == numSFTsX ) && ( SRCtimesX->Tdot->length == numSFTsX ), XLAL_EINVAL );
1288 REAL8 fHetX = resamp->multiTimeSeries_DET->data[X]->f0;
1289 XLAL_CHECK( fabs( fHet - fHetX ) < LAL_REAL8_EPS * fHet, XLAL_EINVAL, "Input timeseries must have identical heterodyning frequency 'f0(X=%d)' (%.16g != %.16g)\n", X, fHet, fHetX );
1290 REAL8 TsftX = common->multiTimestamps->data[X]->deltaT;
1291 XLAL_CHECK( Tsft == TsftX, XLAL_EINVAL, "Input timestamps must have identical stepsize 'Tsft(X=%d)' (%.16g != %.16g)\n", X, Tsft, TsftX );
1292
1293 TimeSeries_SRCX_a->f0 = fHet;
1294 TimeSeries_SRCX_b->f0 = fHet;
1295 // set SRC-frame time-series start-time
1296 REAL8 tStart_SRC_0 = refTime8 + SRCtimesX->DeltaT->data[0] - ( 0.5 * Tsft ) * SRCtimesX->Tdot->data[0];
1298 GPSSETREAL8( epoch, tStart_SRC_0 );
1299 TimeSeries_SRCX_a->epoch = epoch;
1300 TimeSeries_SRCX_b->epoch = epoch;
1301
1302 // make sure all output samples are initialized to zero first, in case of gaps
1303 XLAL_CHECK_CUDA_CALL( cudaMemset( TimeSeries_SRCX_a->data->data, 0, TimeSeries_SRCX_a->data->length * sizeof( TimeSeries_SRCX_a->data->data[0] ) ) );
1304 XLAL_CHECK_CUDA_CALL( cudaMemset( TimeSeries_SRCX_b->data->data, 0, TimeSeries_SRCX_b->data->length * sizeof( TimeSeries_SRCX_b->data->data[0] ) ) );
1305 // make sure detector-frame timesteps to interpolate to are initialized to 0, in case of gaps
1306 XLAL_CHECK_CUDA_CALL( cudaMemset( ws->SRCtimes_DET->data, 0, ws->SRCtimes_DET->length * sizeof( ws->SRCtimes_DET->data[0] ) ) );
1307
1308 XLAL_CHECK_CUDA_CALL( cudaMemset( ws->TStmp1_SRC->data, 0, ws->TStmp1_SRC->length * sizeof( ws->TStmp1_SRC->data[0] ) ) );
1309 XLAL_CHECK_CUDA_CALL( cudaMemset( ws->TStmp2_SRC->data, 0, ws->TStmp2_SRC->length * sizeof( ws->TStmp2_SRC->data[0] ) ) );
1310
1311 REAL8 tStart_DET_0 = GPSGETREAL8( &( Timestamps_DETX->data[0] ) ); // START time of the SFT at the detector
1312 // loop over SFT timestamps and compute the detector frame time samples corresponding to uniformly sampled SRC time samples
1313 for ( UINT4 alpha = 0; alpha < numSFTsX; alpha ++ ) {
1314 // define some useful shorthands
1315 REAL8 Tdot_al = SRCtimesX->Tdot->data [ alpha ]; // the instantaneous time derivitive dt_SRC/dt_DET at the MID-POINT of the SFT
1316 REAL8 tMid_SRC_al = refTime8 + SRCtimesX->DeltaT->data[alpha]; // MID-POINT time of the SFT at the SRC
1317 REAL8 tStart_SRC_al = tMid_SRC_al - 0.5 * Tsft * Tdot_al; // approximate START time of the SFT at the SRC
1318 REAL8 tEnd_SRC_al = tMid_SRC_al + 0.5 * Tsft * Tdot_al; // approximate END time of the SFT at the SRC
1319
1320 REAL8 tStart_DET_al = GPSGETREAL8( &( Timestamps_DETX->data[alpha] ) ); // START time of the SFT at the detector
1321 REAL8 tMid_DET_al = tStart_DET_al + 0.5 * Tsft; // MID-POINT time of the SFT at the detector
1322
1323 // indices of first and last SRC-frame sample corresponding to this SFT
1324 UINT4 iStart_SRC_al = lround( ( tStart_SRC_al - tStart_SRC_0 ) / dt_SRC ); // the index of the resampled timeseries corresponding to the start of the SFT
1325 UINT4 iEnd_SRC_al = lround( ( tEnd_SRC_al - tStart_SRC_0 ) / dt_SRC ); // the index of the resampled timeseries corresponding to the end of the SFT
1326
1327 // truncate to actual SRC-frame timeseries
1328 iStart_SRC_al = MYMIN( iStart_SRC_al, numSamples_SRCX - 1 );
1329 iEnd_SRC_al = MYMIN( iEnd_SRC_al, numSamples_SRCX - 1 );
1330 UINT4 numSamplesSFT_SRC_al = iEnd_SRC_al - iStart_SRC_al + 1; // the number of samples in the SRC-frame for this SFT
1331
1332 REAL4 a_al = AMcoefX->a->data[alpha];
1333 REAL4 b_al = AMcoefX->b->data[alpha];
1334 CUDAApplyHetAMSrc <<< ( numSamplesSFT_SRC_al + CUDA_BLOCK_SIZE - 1 ) / CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE >>> ( ( cuComplex * )( ws->TStmp1_SRC->data ), ( cuComplex * )( ws->TStmp2_SRC->data ), ti_DET->data, iStart_SRC_al, tStart_SRC_0, tStart_DET_0, dt_SRC, tMid_DET_al, tMid_SRC_al, Tdot_al, fHet, a_al, b_al, numSamplesSFT_SRC_al );
1335 XLAL_CHECK_CUDA_CALL( cudaGetLastError() );
1336 } // for alpha < numSFTsX
1337
1338 XLAL_CHECK( ti_DET->length >= TimeSeries_SRCX_a->data->length, XLAL_EINVAL );
1339 UINT4 bak_length = ti_DET->length;
1340 ti_DET->length = TimeSeries_SRCX_a->data->length;
1341
1342 XLAL_CHECK( SincInterp( TimeSeries_SRCX_a->data, ti_DET, TimeSeries_DETX, resamp->Dterms ) == XLAL_SUCCESS, XLAL_EFUNC );
1343
1344 ti_DET->length = bak_length;
1345
1346 // apply heterodyne correction and AM-functions a(t) and b(t) to interpolated timeseries
1347 CUDAApplyHetAMInterp <<< ( numSamples_SRCX + CUDA_BLOCK_SIZE - 1 ) / CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE >>> ( ( cuComplex * )( TimeSeries_SRCX_a->data->data ), ( cuComplex * )( TimeSeries_SRCX_b->data->data ), ( cuComplex * )( ws->TStmp1_SRC->data ), ( cuComplex * )( ws->TStmp2_SRC->data ), numSamples_SRCX );
1348 XLAL_CHECK_CUDA_CALL( cudaGetLastError() );
1349
1350 } // for X < numDetectors
1351
1352 if ( collectTiming ) {
1353 toc = XLALGetCPUTime();
1354 Tau->Bary = ( toc - tic );
1355 }
1356
1357 return XLAL_SUCCESS;
1358
1359} // XLALBarycentricResampleMultiCOMPLEX8TimeSeriesCUDA()
1360
1361extern "C" int
1362XLALExtractResampledTimeseries_ResampCUDA( MultiCOMPLEX8TimeSeries **multiTimeSeries_SRC_a, MultiCOMPLEX8TimeSeries **multiTimeSeries_SRC_b, void *method_data )
1363{
1364 XLAL_CHECK( method_data != NULL, XLAL_EINVAL );
1365 XLAL_CHECK( ( multiTimeSeries_SRC_a != NULL ) && ( multiTimeSeries_SRC_b != NULL ), XLAL_EINVAL );
1366 XLAL_CHECK( method_data != NULL, XLAL_EINVAL );
1367
1368 ResampCUDAMethodData *resamp = ( ResampCUDAMethodData * ) method_data;
1369
1372
1373 *multiTimeSeries_SRC_a = resamp->hostCopy4ExtractTS_SRC_a;
1374 *multiTimeSeries_SRC_b = resamp->hostCopy4ExtractTS_SRC_b;
1375
1376 return XLAL_SUCCESS;
1377
1378} // XLALExtractResampledTimeseries_intern()
1379
1380extern "C" int
1381XLALGetFstatTiming_ResampCUDA( const void *method_data, FstatTimingGeneric *timingGeneric, FstatTimingModel *timingModel )
1382{
1383 XLAL_CHECK( method_data != NULL, XLAL_EINVAL );
1384 XLAL_CHECK( timingGeneric != NULL, XLAL_EINVAL );
1385 XLAL_CHECK( timingModel != NULL, XLAL_EINVAL );
1386
1387 const ResampCUDAMethodData *resamp = ( const ResampCUDAMethodData * ) method_data;
1388 XLAL_CHECK( resamp != NULL, XLAL_EINVAL );
1389
1390 ( *timingGeneric ) = resamp->timingGeneric; // struct-copy generic timing measurements
1391
1393
1394 return XLAL_SUCCESS;
1395} // XLALGetFstatTiming_ResampCUDA()
1396
1397/// @}
1398
1399// Local Variables:
1400// mode: c
1401// End:
#define updateAvgF(q)
#define updateAvgRS(q)
#define GPSGETREAL8(x)
#define GPSSETREAL8(gps, r8)
#define GPSDIFF(x, y)
static int XLALGetFstatTiming_Resamp_intern(const FstatTimingResamp *tiRS, FstatTimingModel *timingModel)
#define SQ(x)
#define min(a, b)
int j
int k
UINT2 A
Definition: SFTnaming.c:46
UINT2 B
Definition: SFTnaming.c:47
int l
int SincInterp(COMPLEX8Vector *y_out, const REAL8Vector *t_out, const COMPLEX8TimeSeries *ts_in, UINT4 Dterms)
static int XLALComputeFstatResampCUDA(FstatResults *Fstats, const FstatCommon *common, void *method_data)
__global__ void CUDAApplySpindownAndFreqShift(cuComplex *out, cuComplex *in, PulsarDopplerParams doppler, REAL8 freqShift, REAL8 dt, REAL8 Dtau0, UINT4 s_max, UINT4 numSamplesIn)
__global__ void CUDACreateHammingWindow(REAL8 *out, UINT4 length)
static void XLALDestroyResampCUDAWorkspace(void *workspace)
static void DestroyMultiCOMPLEX8TimeSeriesCUDA(MultiCOMPLEX8TimeSeries *multi)
static void DestroyCOMPLEX8TimeSeriesCUDA(COMPLEX8TimeSeries *series)
__global__ void CUDAPopulateFaFbFromRaw(cuComplex *out, cuComplex *in, UINT4 numFreqBins, UINT4 offset_bins, UINT4 decimateFFT)
static int XLALComputeFaFb_ResampCUDA(ResampCUDAMethodData *resamp, ResampCUDAWorkspace *ws, const PulsarDopplerParams thisPoint, REAL8 dFreq, UINT4 numFreqBins, const COMPLEX8TimeSeries *TimeSeries_SRC_a, const COMPLEX8TimeSeries *TimeSeries_SRC_b)
__global__ void CUDASincInterp(cuComplex *out, REAL8 *t_out, cuComplex *in, REAL8 *win, UINT4 Dterms, UINT4 numSamplesIn, UINT4 numSamplesOut, REAL8 tmin, REAL8 dt, REAL8 oodt)
__constant__ REAL8 PI
static int CopyCOMPLEX8TimeSeriesDtoH(COMPLEX8TimeSeries **dst, COMPLEX8TimeSeries *src)
static void DestroyCOMPLEX8VectorCUDA(COMPLEX8Vector *vec)
static COMPLEX8Vector * CreateCOMPLEX8VectorCUDA(UINT4 length)
__device__ __constant__ REAL8 lal_fact_inv[LAL_FACT_MAX]
int XLALExtractResampledTimeseries_ResampCUDA(MultiCOMPLEX8TimeSeries **multiTimeSeries_SRC_a, MultiCOMPLEX8TimeSeries **multiTimeSeries_SRC_b, void *method_data)
__global__ void CUDAComputeTwoF(REAL4 *twoF, cuComplex *Fa_k, cuComplex *Fb_k, REAL4 A, REAL4 B, REAL4 C, REAL4 E, REAL4 Dinv, UINT4 numFreqBins)
static int MoveCOMPLEX8TimeSeriesHtoD(COMPLEX8TimeSeries *series)
static REAL8Vector * CreateREAL8VectorCUDA(UINT4 length)
static int XLALBarycentricResampleMultiCOMPLEX8TimeSeriesCUDA(ResampCUDAMethodData *resamp, const PulsarDopplerParams *thisPoint, const FstatCommon *common)
Performs barycentric resampling on a multi-detector timeseries, updates resampling buffer with result...
__global__ void CUDAAddToFaFb(cuComplex *Fa_k, cuComplex *Fb_k, cuComplex *FaX_k, cuComplex *FbX_k, UINT4 numFreqBins)
static void DestroyREAL8VectorCUDA(REAL8Vector *vec)
#define CUDA_BLOCK_SIZE
int XLALSetupFstatResampCUDA(void **method_data, FstatCommon *common, FstatMethodFuncs *funcs, MultiSFTVector *multiSFTs, const FstatOptionalArgs *optArgs)
#define XLAL_CHECK_CUDA_CALL(...)
static int CopyMultiCOMPLEX8TimeSeriesDtoH(MultiCOMPLEX8TimeSeries **dst, MultiCOMPLEX8TimeSeries *src)
__global__ void CUDAApplyHetAMInterp(cuComplex *TS_a, cuComplex *TS_b, cuComplex *TStmp1, cuComplex *TStmp2, UINT4 numSamples)
int XLALGetFstatTiming_ResampCUDA(const void *method_data, FstatTimingGeneric *timingGeneric, FstatTimingModel *timingModel)
__global__ void CUDAApplyHetAMSrc(cuComplex *TStmp1_SRC_data, cuComplex *TStmp2_SRC_data, REAL8 *ti_DET_data, UINT4 iStart_SRC_al, REAL8 tStart_SRC_0, REAL8 tStart_DET_0, REAL8 dt_SRC, REAL8 tMid_DET_al, REAL8 tMid_SRC_al, REAL8 Tdot_al, REAL8 fHet, REAL4 a_al, REAL4 b_al, UINT4 numSamples)
static int MoveMultiCOMPLEX8TimeSeriesHtoD(MultiCOMPLEX8TimeSeries *multi)
#define XLAL_CHECK_CUFFT_CALL(...)
__global__ void CUDANormFaFb(cuComplex *Fa_out, cuComplex *Fb_out, REAL8 FreqOut0, REAL8 dFreq, REAL8 dtauX, REAL8 dt_SRC, UINT4 numFreqBins)
static int XLALApplySpindownAndFreqShiftCUDA(cuComplex *xOut, const COMPLEX8TimeSeries *xIn, const PulsarDopplerParams *doppler, REAL8 freqShift)
static void XLALDestroyResampCUDAMethodData(void *method_data)
FstatQuantities
Bit-field of -statistic quantities which can be computed by XLALComputeFstat().
Definition: ComputeFstat.h:94
@ FSTATQ_2F
Compute multi-detector .
Definition: ComputeFstat.h:96
@ FSTATQ_FAFB_PER_DET
Compute and for each detector.
Definition: ComputeFstat.h:99
@ FSTATQ_FAFB
Compute multi-detector and .
Definition: ComputeFstat.h:97
@ FSTATQ_NONE
Do not compute -statistic, still compute buffered quantities.
Definition: ComputeFstat.h:95
@ FSTATQ_2F_CUDA
Compute multi-detector , store results on CUDA GPU (CUDA implementation of Resamp only).
Definition: ComputeFstat.h:101
@ FSTATQ_ATOMS_PER_DET
Compute per-SFT -statistic atoms for each detector (Demod only).
Definition: ComputeFstat.h:100
@ FSTATQ_2F_PER_DET
Compute for each detector.
Definition: ComputeFstat.h:98
@ FSTATQ_FAFB_CUDA
Compute multi-detector and , store results on CUDA GPU (CUDA implementation of Resamp only).
Definition: ComputeFstat.h:102
static const REAL8 LAL_FACT_INV[]
#define LAL_FACT_MAX
void XLALDestroyMultiAMCoeffs(MultiAMCoeffs *multiAMcoef)
Destroy a MultiAMCoeffs structure.
Definition: LALComputeAM.c:469
MultiAMCoeffs * XLALComputeMultiAMCoeffs(const MultiDetectorStateSeries *multiDetStates, const MultiNoiseWeights *multiWeights, SkyPosition skypos)
Multi-IFO version of XLALComputeAMCoeffs().
Definition: LALComputeAM.c:379
#define LAL_REAL8_EPS
unsigned char BOOLEAN
#define XLAL_INIT_MEM(x)
uint64_t UINT8
double REAL8
#define XLAL_INIT_DECL(var,...)
int64_t INT8
uint32_t UINT4
float complex COMPLEX8
int32_t INT4
float REAL4
void * XLALMalloc(size_t n)
void * XLALCalloc(size_t m, size_t n)
void * XLALRealloc(void *p, size_t n)
void XLALFree(void *p)
#define LAL_UINT4_FORMAT
MultiCOMPLEX8TimeSeries * XLALMultiSFTVectorToCOMPLEX8TimeSeries(const MultiSFTVector *multisfts)
Turn the given multiSFTvector into multiple long COMPLEX8TimeSeries, properly dealing with gaps.
void XLALDestroyMultiCOMPLEX8TimeSeries(MultiCOMPLEX8TimeSeries *multiTimes)
Destroy a MultiCOMPLEX8TimeSeries structure.
REAL8 XLALGetCPUTime(void)
#define PULSAR_MAX_DETECTORS
maximal number of detectors we can handle (for static arrays of detector quantities)
#define PULSAR_MAX_SPINS
maximal number of spin-parameters (Freq + spindowns) we can handle
void XLALDestroyMultiSFTVector(MultiSFTVector *multvect)
Destroy a multi SFT-vector.
Definition: SFTtypes.c:424
int XLALMultiSFTVectorResizeBand(MultiSFTVector *multiSFTs, REAL8 f0, REAL8 Band)
Resize the frequency-band of a given multi-SFT vector to [f0, f0+Band].
Definition: SFTtypes.c:946
MultiSSBtimes * XLALGetMultiSSBtimes(const MultiDetectorStateSeries *multiDetStates, SkyPosition skypos, LIGOTimeGPS refTime, SSBprecision precision)
Multi-IFO version of XLALGetSSBtimes().
Definition: SSBtimes.c:657
void XLALDestroyMultiSSBtimes(MultiSSBtimes *multiSSB)
Destroy a MultiSSBtimes structure.
Definition: SSBtimes.c:845
int XLALAddMultiBinaryTimes(MultiSSBtimes **multiSSBOut, const MultiSSBtimes *multiSSBIn, const PulsarDopplerParams *Doppler)
Multi-IFO version of XLALAddBinaryTimes().
Definition: SSBtimes.c:413
COORDINATESYSTEM_EQUATORIAL
COMPLEX8TimeSeries * XLALCreateCOMPLEX8TimeSeries(const CHAR *name, const LIGOTimeGPS *epoch, REAL8 f0, REAL8 deltaT, const LALUnit *sampleUnits, size_t length)
const LALUnit lalDimensionlessUnit
#define XLAL_ERROR(...)
#define XLAL_CHECK(assertion,...)
int int XLALPrintWarning(const char *fmt,...) _LAL_GCC_PRINTF_FORMAT_(1
#define XLAL_CHECK_NULL(assertion,...)
XLAL_ENOMEM
XLAL_SUCCESS
XLAL_EFAULT
XLAL_EFUNC
XLAL_EDOM
XLAL_ESYS
XLAL_EINVAL
XLAL_EFAILED
REAL8 XLALGPSGetREAL8(const LIGOTimeGPS *epoch)
dst
src
out
#define MYMIN(x, y)
#define MYMAX(x, y)
double alpha
size_t numDetectors
This structure contains the per-SFT (weighted) antenna-pattern functions , with the SFT-index,...
Definition: LALComputeAM.h:63
REAL4 B
summed antenna-pattern matrix coefficient:
Definition: LALComputeAM.h:67
REAL4Vector * b
(weighted) per-SFT antenna-pattern function
Definition: LALComputeAM.h:65
REAL4 A
summed antenna-pattern matrix coefficient:
Definition: LALComputeAM.h:66
REAL4 C
summed antenna-pattern matrix coefficient:
Definition: LALComputeAM.h:68
REAL4Vector * a
(weighted) per-SFT antenna-pattern function
Definition: LALComputeAM.h:64
REAL4 D
determinant
Definition: LALComputeAM.h:69
Struct holding the "antenna-pattern" matrix , in terms of the multi-detector scalar product.
Definition: LALComputeAM.h:127
REAL4 Dd
determinant factor , such that
Definition: LALComputeAM.h:132
CHAR name[LALNameLength]
LIGOTimeGPS epoch
COMPLEX8Sequence * data
COMPLEX8 * data
MultiDetectorStateSeries * multiDetectorStates
SSBprecision SSBprec
MultiLIGOTimeGPSVector * multiTimestamps
MultiNoiseWeights * multiNoiseWeights
int(* compute_func)(FstatResults *, const FstatCommon *, void *)
void(* workspace_destroy_func)(void *)
void(* method_data_destroy_func)(void *)
Struct of optional 'advanced level' and (potentially method-specific) arguments to be passed to the ...
Definition: ComputeFstat.h:137
BOOLEAN resampFFTPowerOf2
Resamp: round up FFT lengths to next power of 2; see FstatMethodType.
Definition: ComputeFstat.h:148
BOOLEAN collectTiming
a flag to turn on/off the collection of F-stat-method-specific timing-data
Definition: ComputeFstat.h:147
UINT4 Dterms
Number of Dirichlet kernel terms, used by some Demod methods; see FstatMethodType.
Definition: ComputeFstat.h:140
XLALComputeFstat() computed results structure.
Definition: ComputeFstat.h:202
AntennaPatternMatrix MmunuX[PULSAR_MAX_DETECTORS]
Per detector antenna pattern matrix , used in computing .
Definition: ComputeFstat.h:231
COMPLEX8 * Fb
Definition: ComputeFstat.h:260
AntennaPatternMatrix Mmunu
Antenna pattern matrix , used in computing .
Definition: ComputeFstat.h:228
REAL4 * twoFPerDet[PULSAR_MAX_DETECTORS]
If whatWasComputed & FSTATQ_2F_PER_DET is true, the values computed at numFreqBins frequencies space...
Definition: ComputeFstat.h:277
FstatQuantities whatWasComputed
Bit-field of which -statistic quantities were computed.
Definition: ComputeFstat.h:234
REAL4 * twoF_CUDA
If whatWasComputed & FSTATQ_2F_CUDA is true, the multi-detector values as for twoF,...
Definition: ComputeFstat.h:248
COMPLEX8 * FaPerDet[PULSAR_MAX_DETECTORS]
If whatWasComputed & FSTATQ_PARTS_PER_DET is true, the and values computed at numFreqBins frequenci...
Definition: ComputeFstat.h:287
UINT4 numFreqBins
Number of frequencies at which the were computed.
Definition: ComputeFstat.h:217
REAL4 * twoF
If whatWasComputed & FSTATQ_2F is true, the multi-detector values computed at numFreqBins frequencie...
Definition: ComputeFstat.h:242
COMPLEX8 * Fb_CUDA
Definition: ComputeFstat.h:267
COMPLEX8 * Fa
If whatWasComputed & FSTATQ_PARTS is true, the multi-detector and computed at numFreqBins frequenci...
Definition: ComputeFstat.h:259
COMPLEX8 * FbPerDet[PULSAR_MAX_DETECTORS]
Definition: ComputeFstat.h:288
COMPLEX8 * Fa_CUDA
If whatWasComputed & FSTAT_FAFB_CUDA is true, the multi-detector and computed at numFreqBins freque...
Definition: ComputeFstat.h:266
PulsarDopplerParams doppler
Doppler parameters, including the starting frequency, at which the were computed.
Definition: ComputeFstat.h:206
Generic F-stat timing coefficients (times in seconds) [see https://dcc.ligo.org/LIGO-T1600531-v4 for ...
Definition: ComputeFstat.h:316
Struct to carry the -statistic method-specific timing model in terms of a list of variable names and...
Definition: ComputeFstat.h:335
A vector of 'timestamps' of type LIGOTimeGPS.
Definition: SFTfileIO.h:188
REAL8 deltaT
'length' of each timestamp (e.g.
Definition: SFTfileIO.h:194
LIGOTimeGPS * data
array of timestamps
Definition: SFTfileIO.h:193
UINT4 length
number of timestamps
Definition: SFTfileIO.h:192
Multi-IFO container for antenna-pattern coefficients and atenna-pattern matrix .
Definition: LALComputeAM.h:137
AMCoeffs ** data
noise-weighted AM-coeffs , and
Definition: LALComputeAM.h:142
AntennaPatternMatrix Mmunu
antenna-pattern matrix
Definition: LALComputeAM.h:143
Multi-IFO container for COMPLEX8 resampled timeseries.
Definition: LFTandTSutils.h:52
COMPLEX8TimeSeries ** data
array of COMPLEX8TimeSeries (pointers)
Definition: LFTandTSutils.h:57
UINT4 length
number of IFOs
Definition: LFTandTSutils.h:56
LIGOTimeGPSVector ** data
timestamps vector for each ifo
Definition: SFTfileIO.h:203
A collection of SFT vectors – one for each IFO in a multi-IFO search.
Definition: SFTfileIO.h:179
Multi-IFO container for SSB timings.
Definition: SSBtimes.h:67
SSBtimes ** data
array of SSBtimes (pointers)
Definition: SSBtimes.h:72
Type containing the 'Doppler-parameters' affecting the time-evolution of the phase.
REAL8 period
Binary: orbital period (sec)
LIGOTimeGPS tp
Binary: time of observed periapsis passage (in SSB)
PulsarSpins fkdot
Intrinsic spins: [Freq, f1dot, f2dot, ... ] where fkdot = d^kFreq/dt^k.
REAL8 Delta
Sky position: DEC (latitude) in equatorial coords and radians.
LIGOTimeGPS refTime
Reference time of pulsar parameters (in SSB!)
REAL8 Alpha
Sky position: RA (longitude) in equatorial coords and radians.
REAL8 ecc
Binary: orbital eccentricity.
REAL8 asini
Binary: projected, normalized orbital semi-major axis (s).
REAL8 argp
Binary: argument of periapsis (radians)
REAL4 * data
REAL8 * data
MultiCOMPLEX8TimeSeries * hostCopy4ExtractTS_SRC_b
AntennaPatternMatrix MmunuX[PULSAR_MAX_DETECTORS]
PulsarDopplerParams prev_doppler
MultiCOMPLEX8TimeSeries * hostCopy4ExtractTS_SRC_a
MultiCOMPLEX8TimeSeries * multiTimeSeries_SRC_b
MultiCOMPLEX8TimeSeries * multiTimeSeries_SRC_a
FstatTimingGeneric timingGeneric
MultiCOMPLEX8TimeSeries * multiTimeSeries_DET
Simple container for two REAL8-vectors, namely the SSB-timings DeltaT_alpha and Tdot_alpha,...
Definition: SSBtimes.h:60
REAL8Vector * Tdot
dT/dt : time-derivative of SSB-time wrt local time for SFT-alpha
Definition: SSBtimes.h:63
REAL8Vector * DeltaT
Time-difference of SFT-alpha - tau0 in SSB-frame.
Definition: SSBtimes.h:62
LIGOTimeGPS refTime
reference-time 'tau0'
Definition: SSBtimes.h:61
REAL8 longitude
REAL8 latitude
CoordinateSystem system
#define max(a, b)
enum @1 epoch