LALPulsar  6.1.0.1-89842e6
ComputeFstat_Resamp_CUDA.cu
Go to the documentation of this file.
1 //
2 // Copyright (C) 2019, 2020 Liam Dunn
3 // Copyright (C) 2009, 2014--2015 Reinhard Prix
4 // Copyright (C) 2012--2015 Karl Wette
5 // Copyright (C) 2009 Chris Messenger, Pinkesh Patel, Xavier Siemens, Holger Pletsch
6 //
7 // This program is free software; you can redistribute it and/or modify
8 // it under the terms of the GNU General Public License as published by
9 // the Free Software Foundation; either version 2 of the License, or
10 // (at your option) any later version.
11 //
12 // This program is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU General Public License for more details.
16 //
17 // You should have received a copy of the GNU General Public License
18 // along with with program; see the file COPYING. If not, write to the
19 // Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20 // MA 02110-1301 USA
21 //
22 
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <math.h>
26 #include <complex.h>
27 #include <cuda.h>
28 #include <cuda_runtime_api.h>
29 #include <cuComplex.h>
30 #include <cufft.h>
31 
32 #include "ComputeFstat_internal.h"
34 
35 #include <lal/Factorial.h>
36 #include <lal/LFTandTSutils.h>
37 #include <lal/LogPrintf.h>
38 #include <lal/SinCosLUT.h>
39 #include <lal/TimeSeries.h>
40 #include <lal/Units.h>
41 #include <lal/Window.h>
42 
43 ///
44 /// \defgroup ComputeFstat_Resamp_CUDA_cu Module ComputeFstat_Resamp_CUDA.cu
45 /// \ingroup ComputeFstat_h
46 /// \brief Implements a CUDA version \cite DunnEtAl2022 of the \a Resamp FFT-based resampling algorithm for
47 /// computing the \f$\mathcal{F}\f$-statistic \cite JKS98 .
48 ///
49 
50 /// @{
51 
52 // ========== Resamp internals ==========
53 
54 // ----- local constants ----------
55 
56 #define CUDA_BLOCK_SIZE 512
57 
58 __constant__ REAL8 PI = M_PI;
59 
60 __device__ __constant__ REAL8 lal_fact_inv[LAL_FACT_MAX];
61 
62 // ----- local macros ----------
63 
64 #define XLAL_CHECK_CUDA_CALL(...) do { \
65  cudaError_t retn; \
66  XLAL_CHECK ( ( retn = (__VA_ARGS__) ) == cudaSuccess, XLAL_EERR, "%s failed with return code %i", #__VA_ARGS__, retn ); \
67  } while(0)
68 #define XLAL_CHECK_CUFFT_CALL(...) do { \
69  cufftResult retn; \
70  XLAL_CHECK ( ( retn = (__VA_ARGS__) ) == CUFFT_SUCCESS, XLAL_EERR, "%s failed with return code %i", #__VA_ARGS__, retn ); \
71  } while(0)
72 
73 #define CPLX_MULT(x, y) (crectf(crealf(x)*crealf(y) - cimagf(x)*cimagf(y), cimagf(x)*crealf(y) + crealf(x)*cimagf(y)))
74 
75 // ----- local types ----------
76 
77 // ----- workspace ----------
78 typedef struct tagResampCUDAWorkspace {
79  // intermediate quantities to interpolate and operate on SRC-frame timeseries
80  COMPLEX8Vector *TStmp1_SRC; // can hold a single-detector SRC-frame spindown-corrected timeseries [without zero-padding]
81  COMPLEX8Vector *TStmp2_SRC; // can hold a single-detector SRC-frame spindown-corrected timeseries [without zero-padding]
82  REAL8Vector *SRCtimes_DET; // holds uniformly-spaced SRC-frame timesteps translated into detector frame [for interpolation]
83 
84  // input padded timeseries ts(t) and output Fab(f) of length 'numSamplesFFT'
85  UINT4 numSamplesFFTAlloc; // allocated number of zero-padded SRC-frame time samples (related to dFreq)
86  cuComplex *TS_FFT; // zero-padded, spindown-corr SRC-frame TS
87  cuComplex *FabX_Raw; // raw full-band FFT result Fa,Fb
88 
89  // arrays of size numFreqBinsOut over frequency bins f_k:
90  cuComplex *FaX_k; // properly normalized F_a^X(f_k) over output bins
91  cuComplex *FbX_k; // properly normalized F_b^X(f_k) over output bins
92  cuComplex *Fa_k; // properly normalized F_a(f_k) over output bins
93  cuComplex *Fb_k; // properly normalized F_b(f_k) over output bins
94  UINT4 numFreqBinsAlloc; // internal: keep track of allocated length of frequency-arrays
95 
97 
98 typedef struct {
99  UINT4 Dterms; // Number of terms to use (on either side) in Windowed-Sinc interpolation kernel
100  MultiCOMPLEX8TimeSeries *multiTimeSeries_DET; // input SFTs converted into a heterodyned timeseries
101  // ----- buffering -----
102  PulsarDopplerParams prev_doppler; // buffering: previous phase-evolution ("doppler") parameters
103  MultiAMCoeffs *multiAMcoef; // buffered antenna-pattern functions
104  MultiSSBtimes *multiSSBtimes; // buffered SSB times, including *only* sky-position corrections, not binary
105  MultiSSBtimes *multiBinaryTimes; // buffered SRC times, including both sky- and binary corrections [to avoid re-allocating this]
106 
107  AntennaPatternMatrix Mmunu; // combined multi-IFO antenna-pattern coefficients {A,B,C,E}
108  AntennaPatternMatrix MmunuX[PULSAR_MAX_DETECTORS]; // per-IFO antenna-pattern coefficients {AX,BX,CX,EX}
109 
110  MultiCOMPLEX8TimeSeries *multiTimeSeries_SRC_a; // multi-detector SRC-frame timeseries, multiplied by AM function a(t)
111  MultiCOMPLEX8TimeSeries *multiTimeSeries_SRC_b; // multi-detector SRC-frame timeseries, multiplied by AM function b(t)
112 
113  MultiCOMPLEX8TimeSeries *hostCopy4ExtractTS_SRC_a; // host copy of multi-detector SRC-frame timeseries, multiplied by AM function a(t), for time series extraction
114  MultiCOMPLEX8TimeSeries *hostCopy4ExtractTS_SRC_b; // host copy of multi-detector SRC-frame timeseries, multiplied by AM function b(t), for time series extraction
115 
116  UINT4 numSamplesFFT; // length of zero-padded SRC-frame timeseries (related to dFreq)
117  UINT4 decimateFFT; // output every n-th frequency bin, with n>1 iff (dFreq > 1/Tspan), and was internally decreased by n
118  cufftHandle fftplan; // FFT plan
119 
120  // ----- timing -----
121  BOOLEAN collectTiming; // flag whether or not to collect timing information
122  FstatTimingGeneric timingGeneric; // measured (generic) F-statistic timing values
123  FstatTimingResamp timingResamp; // measured Resamp-specific timing model data
124 
126 
127 
128 // ----- local prototypes ----------
129 
130 extern "C" int XLALSetupFstatResampCUDA( void **method_data, FstatCommon *common, FstatMethodFuncs *funcs, MultiSFTVector *multiSFTs, const FstatOptionalArgs *optArgs );
131 extern "C" int XLALExtractResampledTimeseries_ResampCUDA( MultiCOMPLEX8TimeSeries **multiTimeSeries_SRC_a, MultiCOMPLEX8TimeSeries **multiTimeSeries_SRC_b, void *method_data );
132 extern "C" int XLALGetFstatTiming_ResampCUDA( const void *method_data, FstatTimingGeneric *timingGeneric, FstatTimingModel *timingModel );
133 
134 static int XLALComputeFstatResampCUDA( FstatResults *Fstats, const FstatCommon *common, void *method_data );
135 static int XLALApplySpindownAndFreqShiftCUDA( cuComplex *xOut, const COMPLEX8TimeSeries *xIn, const PulsarDopplerParams *doppler, REAL8 freqShift );
137 static int XLALComputeFaFb_ResampCUDA( ResampCUDAMethodData *resamp, ResampCUDAWorkspace *ws, const PulsarDopplerParams thisPoint, REAL8 dFreq, UINT4 numFreqBins, const COMPLEX8TimeSeries *TimeSeries_SRC_a, const COMPLEX8TimeSeries *TimeSeries_SRC_b );
139 static REAL8Vector *CreateREAL8VectorCUDA( UINT4 length );
140 static void DestroyCOMPLEX8VectorCUDA( COMPLEX8Vector *vec );
141 static void DestroyREAL8VectorCUDA( REAL8Vector *vec );
143 static int MoveCOMPLEX8TimeSeriesHtoD( COMPLEX8TimeSeries *series );
148 static void XLALDestroyResampCUDAWorkspace( void *workspace );
149 static void XLALDestroyResampCUDAMethodData( void *method_data );
150 
151 // ==================== function definitions ====================
152 
154 {
155  COMPLEX8Vector *vec;
156  XLAL_CHECK_NULL( ( vec = ( COMPLEX8Vector * )XLALMalloc( sizeof( COMPLEX8Vector ) ) ) != NULL, XLAL_ENOMEM );
157  vec->length = length;
158  XLAL_CHECK_NULL( cudaMalloc( ( void ** )&vec->data, sizeof( COMPLEX8 )*length ) == cudaSuccess, XLAL_ENOMEM );
159  return vec;
160 }
161 
163 {
164  REAL8Vector *vec;
165  XLAL_CHECK_NULL( ( vec = ( REAL8Vector * )XLALMalloc( sizeof( REAL8Vector ) ) ) != NULL, XLAL_ENOMEM );
166  vec->length = length;
167  XLAL_CHECK_NULL( cudaMalloc( ( void ** )&vec->data, sizeof( REAL8 )*length ) == cudaSuccess, XLAL_ENOMEM );
168  return vec;
169 }
170 
172 {
173  if ( vec != NULL ) {
174  if ( vec->data != NULL ) {
175  cudaFree( vec->data );
176  }
177  vec->data = NULL;
178  XLALFree( vec );
179  }
180 }
181 
183 {
184  if ( vec != NULL ) {
185  if ( vec->data != NULL ) {
186  cudaFree( vec->data );
187  }
188  vec->data = NULL;
189  XLALFree( vec );
190  }
191 }
192 
194 {
195  if ( series != NULL ) {
196  DestroyCOMPLEX8VectorCUDA( series->data );
197  XLALFree( series );
198  }
199 }
200 
202 {
203  XLAL_CHECK( series != NULL, XLAL_EFAULT );
204  COMPLEX8 *cpu_data = series->data->data;
205  XLAL_CHECK( cudaMalloc( ( void ** )&series->data->data, sizeof( COMPLEX8 )*series->data->length ) == cudaSuccess, XLAL_ENOMEM );
206  XLAL_CHECK_CUDA_CALL( cudaMemcpy( ( void * )series->data->data, cpu_data, sizeof( COMPLEX8 )*series->data->length, cudaMemcpyHostToDevice ) );
207  XLALFree( cpu_data );
208  return XLAL_SUCCESS;
209 }
210 
212 {
213  XLAL_CHECK( multi != NULL, XLAL_EFAULT );
214  for ( UINT4 X = 0; X < multi->length; X++ ) {
216  }
217  return XLAL_SUCCESS;
218 }
219 
221 {
222  XLAL_CHECK( dst != NULL, XLAL_EFAULT );
223  XLAL_CHECK( src != NULL, XLAL_EFAULT );
224  if ( *dst == NULL ) {
225  XLAL_CHECK( ( ( *dst ) = ( COMPLEX8TimeSeries * )XLALCalloc( 1, sizeof( COMPLEX8TimeSeries ) ) ) != NULL, XLAL_ENOMEM );
226  XLAL_CHECK( ( ( *dst )->data = ( COMPLEX8Sequence * )XLALCalloc( 1, sizeof( COMPLEX8Sequence ) ) ) != NULL, XLAL_ENOMEM );
227  ( *dst )->data->length = 0;
228  }
229  {
230  COMPLEX8Sequence *dst_data = ( *dst )->data;
231  *( *dst ) = *src;
232  ( *dst )->data = dst_data;
233  }
234  if ( ( ( *dst )->data->data == NULL ) || ( ( *dst )->data->length < src->data->length ) ) {
235  XLAL_CHECK( ( ( *dst )->data->data = ( COMPLEX8 * )XLALRealloc( ( *dst )->data->data, src->data->length * sizeof( COMPLEX8 ) ) ) != NULL, XLAL_ENOMEM );
236  ( *dst )->data->length = src->data->length;
237  }
238  XLAL_CHECK_CUDA_CALL( cudaMemcpy( ( void * )( *dst )->data->data, src->data->data, sizeof( COMPLEX8 )*src->data->length, cudaMemcpyDeviceToHost ) );
239  return XLAL_SUCCESS;
240 }
241 
243 {
244  XLAL_CHECK( dst != NULL, XLAL_EFAULT );
245  XLAL_CHECK( src != NULL, XLAL_EFAULT );
246  if ( *dst == NULL ) {
247  XLAL_CHECK( ( ( *dst ) = ( MultiCOMPLEX8TimeSeries * )XLALCalloc( 1, sizeof( MultiCOMPLEX8TimeSeries ) ) ) != NULL, XLAL_ENOMEM );
248  XLAL_CHECK( ( ( *dst )->data = ( COMPLEX8TimeSeries ** )XLALCalloc( src->length, sizeof( COMPLEX8TimeSeries ) ) ) != NULL, XLAL_ENOMEM );
249  ( *dst )->length = src->length;
250  }
251  for ( UINT4 X = 0; X < src->length; X++ ) {
252  XLAL_CHECK( CopyCOMPLEX8TimeSeriesDtoH( &( *dst )->data[X], src->data[X] ) == XLAL_SUCCESS, XLAL_EFUNC );
253  }
254  return XLAL_SUCCESS;
255 }
256 
258 {
259  if ( multi != NULL ) {
260  if ( multi->data != NULL ) {
261  UINT4 numDetectors = multi->length;
262  for ( UINT4 X = 0; X < numDetectors; X++ ) {
263  DestroyCOMPLEX8TimeSeriesCUDA( multi->data[X] );
264  }
265  XLALFree( multi->data );
266  }
267  XLALFree( multi );
268  }
269 }
270 
271 static void XLALDestroyResampCUDAWorkspace( void *workspace )
272 {
273  ResampCUDAWorkspace *ws = ( ResampCUDAWorkspace * ) workspace;
274 
278 
279  cudaFree( ws->FabX_Raw );
280  cudaFree( ws->TS_FFT );
281 
282  XLALFree( ws );
283 } // XLALDestroyResampCUDAWorkspace()
284 
285 static void XLALDestroyResampCUDAMethodData( void *method_data )
286 {
287  ResampCUDAMethodData *resamp = ( ResampCUDAMethodData * ) method_data;
288 
290 
291  // ----- free buffer
299 
300  cufftDestroy( resamp->fftplan );
301 
302  XLALFree( resamp );
303 } // XLALDestroyResampCUDAMethodData()
304 
305 
306 extern "C" int
307 XLALSetupFstatResampCUDA( void **method_data,
308  FstatCommon *common,
309  FstatMethodFuncs *funcs,
311  const FstatOptionalArgs *optArgs
312  )
313 {
314  // Check input
315  XLAL_CHECK( method_data != NULL, XLAL_EFAULT );
316  XLAL_CHECK( common != NULL, XLAL_EFAULT );
317  XLAL_CHECK( funcs != NULL, XLAL_EFAULT );
318  XLAL_CHECK( multiSFTs != NULL, XLAL_EFAULT );
319  XLAL_CHECK( optArgs != NULL, XLAL_EFAULT );
320 
321  // Allocate method data
322  *method_data = XLALCalloc( 1, sizeof( ResampCUDAMethodData ) );
323  ResampCUDAMethodData *resamp = ( ResampCUDAMethodData * )*method_data;
324  XLAL_CHECK( resamp != NULL, XLAL_ENOMEM );
325 
326  resamp->Dterms = optArgs->Dterms;
327 
328  // Set method function pointers
332 
333  // Copy the inverse factorial lookup table to GPU memory
334  XLAL_CHECK_CUDA_CALL( cudaMemcpyToSymbol( lal_fact_inv, ( void * )&LAL_FACT_INV, sizeof( REAL8 )*LAL_FACT_MAX, 0, cudaMemcpyHostToDevice ) );
335 
336  // Extra band needed for resampling: Hamming-windowed sinc used for interpolation has a transition bandwith of
337  // TB=(4/L)*fSamp, where L=2*Dterms+1 is the window-length, and here fSamp=Band (i.e. the full SFT frequency band)
338  // However, we're only interested in the physical band and we'll be throwing away all bins outside of this.
339  // This implies that we're only affected by *half* the transition band TB/2 on either side, as the other half of TB is outside of the band of interest
340  // (and will actually get aliased, i.e. the region [-fNy - TB/2, -fNy] overlaps with [fNy-TB/2,fNy] and vice-versa: [fNy,fNy+TB/2] overlaps with [-fNy,-fNy+TB/2])
341  // ==> therefore we only need to add an extra TB/2 on each side to be able to safely avoid the transition-band effects
342  REAL8 f0 = multiSFTs->data[0]->data[0].f0;
343  REAL8 dFreq = multiSFTs->data[0]->data[0].deltaF;
344  REAL8 Band = multiSFTs->data[0]->data[0].data->length * dFreq;
345  REAL8 extraBand = 2.0 / ( 2 * optArgs->Dterms + 1 ) * Band;
346  XLAL_CHECK( XLALMultiSFTVectorResizeBand( multiSFTs, f0 - extraBand, Band + 2 * extraBand ) == XLAL_SUCCESS, XLAL_EFUNC );
347  // Convert SFTs into heterodyned complex timeseries [in detector frame]
349 
350  XLALDestroyMultiSFTVector( multiSFTs ); // don't need them SFTs any more ...
351 
353  REAL8 dt_DET = resamp->multiTimeSeries_DET->data[0]->deltaT;
354  REAL8 fHet = resamp->multiTimeSeries_DET->data[0]->f0;
355  REAL8 Tsft = common->multiTimestamps->data[0]->deltaT;
356 
357  // determine resampled timeseries parameters
358  REAL8 TspanFFT = 1.0 / common->dFreq;
359 
360  // check that TspanFFT >= TspanX for all detectors X, otherwise increase TspanFFT by an integer factor 'decimateFFT' such that this is true
361  REAL8 TspanXMax = 0;
362  for ( UINT4 X = 0; X < numDetectors; X ++ ) {
363  UINT4 numSamples_DETX = resamp->multiTimeSeries_DET->data[X]->data->length;
364  REAL8 TspanX = numSamples_DETX * dt_DET;
365  TspanXMax = fmax( TspanXMax, TspanX );
366  }
367  UINT4 decimateFFT = ( UINT4 )ceil( TspanXMax / TspanFFT ); // larger than 1 means we need to artificially increase dFreqFFT by 'decimateFFT'
368  if ( decimateFFT > 1 ) {
369  XLALPrintWarning( "WARNING: Frequency spacing larger than 1/Tspan, we'll internally decimate FFT frequency bins by a factor of %" LAL_UINT4_FORMAT "\n", decimateFFT );
370  }
371  TspanFFT *= decimateFFT;
372  resamp->decimateFFT = decimateFFT;
373 
374  UINT4 numSamplesFFT0 = ( UINT4 ) ceil( TspanFFT / dt_DET ); // we use ceil() so that we artificially widen the band rather than reduce it
375  UINT4 numSamplesFFT = 0;
376  if ( optArgs->resampFFTPowerOf2 ) {
377  numSamplesFFT = ( UINT4 ) pow( 2, ceil( log2( ( double ) numSamplesFFT0 ) ) ); // round numSamplesFFT up to next power of 2 for most effiecient FFT
378  } else {
379  numSamplesFFT = ( UINT4 ) 2 * ceil( numSamplesFFT0 / 2 ); // always ensure numSamplesFFT is even
380  }
381 
382  REAL8 dt_SRC = TspanFFT / numSamplesFFT; // adjust sampling rate to allow achieving exact requested dFreq=1/TspanFFT !
383 
384  resamp->numSamplesFFT = numSamplesFFT;
385  // ----- allocate buffer Memory ----------
386 
387  // Move detector time series over to GPU
389 
390  // header for SRC-frame resampled timeseries buffer
394 
398 
399  LIGOTimeGPS XLAL_INIT_DECL( epoch0 ); // will be set to corresponding SRC-frame epoch when barycentering
400  UINT4 numSamplesMax_SRC = 0;
401  for ( UINT4 X = 0; X < numDetectors; X ++ ) {
402  // ----- check input consistency ----------
403  REAL8 dt_DETX = resamp->multiTimeSeries_DET->data[X]->deltaT;
404  XLAL_CHECK( dt_DET == dt_DETX, XLAL_EINVAL, "Input timeseries must have identical 'deltaT(X=%d)' (%.16g != %.16g)\n", X, dt_DET, dt_DETX );
405 
406  REAL8 fHetX = resamp->multiTimeSeries_DET->data[X]->f0;
407  XLAL_CHECK( fabs( fHet - fHetX ) < LAL_REAL8_EPS * fHet, XLAL_EINVAL, "Input timeseries must have identical heterodyning frequency 'f0(X=%d)' (%.16g != %.16g)\n", X, fHet, fHetX );
408 
409  REAL8 TsftX = common->multiTimestamps->data[X]->deltaT;
410  XLAL_CHECK( Tsft == TsftX, XLAL_EINVAL, "Input timestamps must have identical stepsize 'Tsft(X=%d)' (%.16g != %.16g)\n", X, Tsft, TsftX );
411 
412  // ----- prepare Memory fo SRC-frame timeseries and AM coefficients
413  const char *nameX = resamp->multiTimeSeries_DET->data[X]->name;
414  UINT4 numSamples_DETX = resamp->multiTimeSeries_DET->data[X]->data->length;
415  UINT4 numSamples_SRCX = ( UINT4 )ceil( numSamples_DETX * dt_DET / dt_SRC );
416 
417  XLAL_CHECK( ( resamp->multiTimeSeries_SRC_a->data[X] = XLALCreateCOMPLEX8TimeSeries( nameX, &epoch0, fHet, dt_SRC, &lalDimensionlessUnit, numSamples_SRCX ) ) != NULL, XLAL_EFUNC );
418  XLAL_CHECK( ( resamp->multiTimeSeries_SRC_b->data[X] = XLALCreateCOMPLEX8TimeSeries( nameX, &epoch0, fHet, dt_SRC, &lalDimensionlessUnit, numSamples_SRCX ) ) != NULL, XLAL_EFUNC );
419 
420  numSamplesMax_SRC = MYMAX( numSamplesMax_SRC, numSamples_SRCX );
421  } // for X < numDetectors
422 
425  XLAL_CHECK( numSamplesFFT >= numSamplesMax_SRC, XLAL_EFAILED, "[numSamplesFFT = %d] < [numSamplesMax_SRC = %d]\n", numSamplesFFT, numSamplesMax_SRC );
426 
427  // ---- re-use shared workspace, or allocate here ----------
429  if ( ws != NULL ) {
430  if ( numSamplesFFT > ws->numSamplesFFTAlloc ) {
431  cudaFree( ws->FabX_Raw );
432  XLAL_CHECK( cudaMalloc( ( void ** )&ws->FabX_Raw, numSamplesFFT * sizeof( COMPLEX8 ) ) == cudaSuccess, XLAL_ENOMEM );
433  cudaFree( ws->TS_FFT );
434  XLAL_CHECK( cudaMalloc( ( void ** )&ws->TS_FFT, numSamplesFFT * sizeof( COMPLEX8 ) ) == cudaSuccess, XLAL_ENOMEM );
435 
436  ws->numSamplesFFTAlloc = numSamplesFFT;
437  }
438 
439  // adjust maximal SRC-frame timeseries length, if necessary - in the CPU version this was realloc'd, but I don't think we can do that here.
440  if ( numSamplesMax_SRC > ws->TStmp1_SRC->length ) {
442  XLAL_CHECK( ( ws->TStmp1_SRC = CreateCOMPLEX8VectorCUDA( numSamplesMax_SRC ) ) != NULL, XLAL_EFUNC );
443 
445  XLAL_CHECK( ( ws->TStmp2_SRC = CreateCOMPLEX8VectorCUDA( numSamplesMax_SRC ) ) != NULL, XLAL_EFUNC );
446 
448  XLAL_CHECK( ( ws->SRCtimes_DET = CreateREAL8VectorCUDA( numSamplesMax_SRC ) ) != NULL, XLAL_EFUNC );
449  }
450 
451  } // end: if shared workspace given
452  else {
453  XLAL_CHECK( ( ws = ( ResampCUDAWorkspace * )XLALCalloc( 1, sizeof( *ws ) ) ) != NULL, XLAL_ENOMEM );
454  XLAL_CHECK( ( ws->TStmp1_SRC = CreateCOMPLEX8VectorCUDA( numSamplesMax_SRC ) ) != NULL, XLAL_EFUNC );
455  XLAL_CHECK( ( ws->TStmp2_SRC = CreateCOMPLEX8VectorCUDA( numSamplesMax_SRC ) ) != NULL, XLAL_EFUNC );
456  XLAL_CHECK( ( ws->SRCtimes_DET = CreateREAL8VectorCUDA( numSamplesMax_SRC ) ) != NULL, XLAL_EFUNC );
457  XLAL_CHECK( cudaMalloc( ( void ** )&ws->FabX_Raw, numSamplesFFT * sizeof( COMPLEX8 ) ) == cudaSuccess, XLAL_ENOMEM );
458  XLAL_CHECK( cudaMalloc( ( void ** )&ws->TS_FFT, numSamplesFFT * sizeof( COMPLEX8 ) ) == cudaSuccess, XLAL_ENOMEM );
459  ws->numSamplesFFTAlloc = numSamplesFFT;
460 
461  common->workspace = ws;
462  } // end: if we create our own workspace
463 
464  // ----- compute and buffer FFT plan ----------
465  XLAL_CHECK( cufftPlan1d( &resamp->fftplan, resamp->numSamplesFFT, CUFFT_C2C, 1 ) == CUFFT_SUCCESS, XLAL_ESYS );
466  XLAL_CHECK_CUDA_CALL( cudaDeviceSynchronize() );
467 
468  // turn on timing collection if requested
469  resamp->collectTiming = optArgs->collectTiming;
470 
471  // initialize struct for collecting timing data, store invariant 'meta' quantities about this setup
472  if ( resamp->collectTiming ) {
473  XLAL_INIT_MEM( resamp->timingGeneric );
474  resamp->timingGeneric.Ndet = numDetectors;
475 
476  XLAL_INIT_MEM( resamp->timingResamp );
477  resamp->timingResamp.Resolution = TspanXMax / TspanFFT;
478  resamp->timingResamp.NsampFFT0 = numSamplesFFT0;
479  resamp->timingResamp.NsampFFT = numSamplesFFT;
480  }
481 
482  return XLAL_SUCCESS;
483 
484 } // XLALSetupFstatResampCUDA()
485 
486 __global__ void CUDAAddToFaFb( cuComplex *Fa_k, cuComplex *Fb_k, cuComplex *FaX_k, cuComplex *FbX_k, UINT4 numFreqBins )
487 {
488  int k = threadIdx.x + blockDim.x * blockIdx.x;
489  if ( k >= numFreqBins ) {
490  return;
491  }
492  Fa_k[k] = cuCaddf( Fa_k[k], FaX_k[k] );
493  Fb_k[k] = cuCaddf( Fb_k[k], FbX_k[k] );
494 }
495 
496 __global__ void CUDAComputeTwoF( REAL4 *twoF, cuComplex *Fa_k, cuComplex *Fb_k, REAL4 A, REAL4 B, REAL4 C, REAL4 E, REAL4 Dinv, UINT4 numFreqBins )
497 {
498  int k = threadIdx.x + blockDim.x * blockIdx.x;
499  if ( k >= numFreqBins ) {
500  return;
501  }
502  cuComplex Fa = Fa_k[k];
503  cuComplex Fb = Fb_k[k];
504  REAL4 Fa_re = cuCrealf( Fa );
505  REAL4 Fa_im = cuCimagf( Fa );
506  REAL4 Fb_re = cuCrealf( Fb );
507  REAL4 Fb_im = cuCimagf( Fb );
508 
509  REAL4 twoF_k = 4; // default fallback = E[2F] in noise when Dinv == 0 due to ill-conditionness of M_munu
510  if ( Dinv > 0 ) {
511  twoF_k = 2.0f * Dinv * ( B * ( SQ( Fa_re ) + SQ( Fa_im ) )
512  + A * ( SQ( Fb_re ) + SQ( Fb_im ) )
513  - 2.0 * C * ( Fa_re * Fb_re + Fa_im * Fb_im )
514  - 2.0 * E * ( - Fa_re * Fb_im + Fa_im * Fb_re ) // nonzero only in RAA case where Ed!=0
515  );
516  }
517 
518  twoF[k] = twoF_k;
519 }
520 
521 static int
523  const FstatCommon *common,
524  void *method_data
525  )
526 {
527  // Check input
528  XLAL_CHECK( Fstats != NULL, XLAL_EFAULT );
529  XLAL_CHECK( common != NULL, XLAL_EFAULT );
530  XLAL_CHECK( method_data != NULL, XLAL_EFAULT );
531 
532  ResampCUDAMethodData *resamp = ( ResampCUDAMethodData * ) method_data;
533 
534  const FstatQuantities whatToCompute = Fstats->whatWasComputed;
535  XLAL_CHECK( !( whatToCompute & FSTATQ_ATOMS_PER_DET ), XLAL_EINVAL, "Resampling does not currently support atoms per detector" );
536 
538 
539  // ----- handy shortcuts ----------
540  PulsarDopplerParams thisPoint = Fstats->doppler;
541  const MultiCOMPLEX8TimeSeries *multiTimeSeries_DET = resamp->multiTimeSeries_DET;
542  UINT4 numDetectors = multiTimeSeries_DET->length;
543 
544  // collect internal timing info
545  BOOLEAN collectTiming = resamp->collectTiming;
546  Timings_t *Tau = &( resamp->timingResamp.Tau );
547  XLAL_INIT_MEM( ( *Tau ) ); // these need to be initialized to 0 for each call
548 
549  REAL8 ticStart = 0, tocEnd = 0;
550  REAL8 tic = 0, toc = 0;
551  if ( collectTiming ) {
552  XLAL_INIT_MEM( ( *Tau ) ); // re-set all timings to 0 at beginning of each Fstat-call
553  ticStart = XLALGetCPUTime();
554  }
555 
556  // Note: all buffering is done within that function
558 
559  if ( whatToCompute == FSTATQ_NONE ) {
560  return XLAL_SUCCESS;
561  }
562 
563  MultiCOMPLEX8TimeSeries *multiTimeSeries_SRC_a = resamp->multiTimeSeries_SRC_a;
564  MultiCOMPLEX8TimeSeries *multiTimeSeries_SRC_b = resamp->multiTimeSeries_SRC_b;
565 
566  // ============================== check workspace is properly allocated and initialized ===========
567 
568  // ----- workspace that depends on maximal number of output frequency bins 'numFreqBins' ----------
569  UINT4 numFreqBins = Fstats->numFreqBins;
570 
571  if ( collectTiming ) {
572  tic = XLALGetCPUTime();
573  }
574 
575  // NOTE: we try to use as much existing memory as possible in FstatResults, so we only
576  // allocate local 'workspace' storage in case there's not already a vector allocated in FstatResults for it
577  // this also avoid having to copy these results in case the user asked for them to be returned
578  // TODO: Reallocating existing memory has not been implemented in the CUDA version.
579  /*
580  if ( whatToCompute & FSTATQ_FAFB )
581  {
582  XLALFree ( ws->Fa_k ); // avoid memory leak if allocated in previous call
583  ws->Fa_k = Fstats->Fa;
584  XLALFree ( ws->Fb_k ); // avoid memory leak if allocated in previous call
585  ws->Fb_k = Fstats->Fb;
586  } // end: if returning FaFb we can use that return-struct as 'workspace'
587  else // otherwise: we (re)allocate it locally
588  {
589  if ( numFreqBins > ws->numFreqBinsAlloc )
590  {
591  XLAL_CHECK ( (ws->Fa_k = (COMPLEX8 *)XLALRealloc ( ws->Fa_k, numFreqBins * sizeof(COMPLEX8))) != NULL, XLAL_ENOMEM );
592  XLAL_CHECK ( (ws->Fb_k = (COMPLEX8 *)XLALRealloc ( ws->Fb_k, numFreqBins * sizeof(COMPLEX8))) != NULL, XLAL_ENOMEM );
593  } // only increase workspace arrays
594  }
595  */
596  XLAL_CHECK( cudaMalloc( ( void ** )&ws->FaX_k, numFreqBins * sizeof( COMPLEX8 ) ) == cudaSuccess, XLAL_ENOMEM );
597  XLAL_CHECK( cudaMalloc( ( void ** )&ws->FbX_k, numFreqBins * sizeof( COMPLEX8 ) ) == cudaSuccess, XLAL_ENOMEM );
598  XLAL_CHECK( cudaMalloc( ( void ** )&ws->Fa_k, numFreqBins * sizeof( COMPLEX8 ) ) == cudaSuccess, XLAL_ENOMEM );
599  XLAL_CHECK( cudaMalloc( ( void ** )&ws->Fb_k, numFreqBins * sizeof( COMPLEX8 ) ) == cudaSuccess, XLAL_ENOMEM );
600  /*if ( whatToCompute & FSTATQ_FAFB_PER_DET )
601  {
602  XLALFree ( ws->FaX_k ); // avoid memory leak if allocated in previous call
603  ws->FaX_k = NULL; // will be set in loop over detectors X
604  XLALFree ( ws->FbX_k ); // avoid memory leak if allocated in previous call
605  ws->FbX_k = NULL; // will be set in loop over detectors X
606  } // end: if returning FaFbPerDet we can use that return-struct as 'workspace'
607  else // otherwise: we (re)allocate it locally
608  {
609  if ( numFreqBins > ws->numFreqBinsAlloc )
610  {
611  XLAL_CHECK ( (ws->FaX_k = (COMPLEX8 *)XLALRealloc ( ws->FaX_k, numFreqBins * sizeof(COMPLEX8))) != NULL, XLAL_ENOMEM );
612  XLAL_CHECK ( (ws->FbX_k = (COMPLEX8 *)XLALRealloc ( ws->FbX_k, numFreqBins * sizeof(COMPLEX8))) != NULL, XLAL_ENOMEM );
613  } // only increase workspace arrays
614  }*/
615  if ( numFreqBins > ws->numFreqBinsAlloc ) {
616  ws->numFreqBinsAlloc = numFreqBins; // keep track of allocated array length
617  }
618 
619  if ( collectTiming ) {
620  toc = XLALGetCPUTime();
621  Tau->Mem = ( toc - tic ); // this one doesn't scale with number of detector!
622  }
623  // ====================================================================================================
624 
625  // loop over detectors
626  for ( UINT4 X = 0; X < numDetectors; X++ ) {
627  const COMPLEX8TimeSeries *TimeSeriesX_SRC_a = multiTimeSeries_SRC_a->data[X];
628  const COMPLEX8TimeSeries *TimeSeriesX_SRC_b = multiTimeSeries_SRC_b->data[X];
629 
630  // compute {Fa^X(f_k), Fb^X(f_k)}: results returned via workspace ws
631  XLAL_CHECK( XLALComputeFaFb_ResampCUDA( resamp, ws, thisPoint, common->dFreq, numFreqBins, TimeSeriesX_SRC_a, TimeSeriesX_SRC_b ) == XLAL_SUCCESS, XLAL_EFUNC );
632 
633  if ( collectTiming ) {
634  tic = XLALGetCPUTime();
635  }
636 
637  if ( X == 0 ) {
638  // avoid having to memset this array: for the first detector we *copy* results
639  XLAL_CHECK_CUDA_CALL( cudaMemcpy( ws->Fa_k, ws->FaX_k, sizeof( cuComplex )*numFreqBins, cudaMemcpyDeviceToDevice ) );
640  XLAL_CHECK_CUDA_CALL( cudaMemcpy( ws->Fb_k, ws->FbX_k, sizeof( cuComplex )*numFreqBins, cudaMemcpyDeviceToDevice ) );
641  } // end: if X==0
642  else {
643  // for subsequent detectors we *add to* them
644  CUDAAddToFaFb <<< ( numFreqBins + CUDA_BLOCK_SIZE - 1 ) / CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE >>> ( ws->Fa_k, ws->Fb_k, ws->FaX_k, ws->FbX_k, numFreqBins );
645  XLAL_CHECK_CUDA_CALL( cudaGetLastError() );
646  } // end:if X>0
647 
648  if ( whatToCompute & FSTATQ_FAFB_PER_DET ) {
649  XLAL_CHECK_CUDA_CALL( cudaMemcpy( Fstats->FaPerDet[X], ws->FaX_k, sizeof( cuComplex )*numFreqBins, cudaMemcpyDeviceToHost ) );
650  XLAL_CHECK_CUDA_CALL( cudaMemcpy( Fstats->FbPerDet[X], ws->FbX_k, sizeof( cuComplex )*numFreqBins, cudaMemcpyDeviceToHost ) );
651  }
652 
653  if ( collectTiming ) {
654  toc = XLALGetCPUTime();
655  Tau->SumFabX += ( toc - tic );
656  tic = toc;
657  }
658 
659  // ----- if requested: compute per-detector Fstat_X_k
660  if ( whatToCompute & FSTATQ_2F_PER_DET ) {
661  const REAL4 Ad = resamp->MmunuX[X].Ad;
662  const REAL4 Bd = resamp->MmunuX[X].Bd;
663  const REAL4 Cd = resamp->MmunuX[X].Cd;
664  const REAL4 Ed = resamp->MmunuX[X].Ed;
665  const REAL4 Dd_inv = 1.0f / resamp->MmunuX[X].Dd;
666  REAL4 *twoF_gpu;
667  XLAL_CHECK( cudaMalloc( ( void ** )&twoF_gpu, sizeof( REAL4 )*numFreqBins ) == cudaSuccess, XLAL_ENOMEM );
668  CUDAComputeTwoF <<< ( numFreqBins + CUDA_BLOCK_SIZE - 1 ) / CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE >>> ( twoF_gpu, ws->FaX_k, ws->FbX_k, Ad, Bd, Cd, Ed, Dd_inv, numFreqBins );
669  XLAL_CHECK_CUDA_CALL( cudaGetLastError() );
670  XLAL_CHECK_CUDA_CALL( cudaMemcpy( Fstats->twoFPerDet[X], twoF_gpu, sizeof( REAL4 )*numFreqBins, cudaMemcpyDeviceToHost ) );
671  cudaFree( twoF_gpu );
672 
673  } // end: if compute F_X
674 
675  if ( collectTiming ) {
676  toc = XLALGetCPUTime();
677  Tau->Fab2F += ( toc - tic );
678  }
679 
680  } // for X < numDetectors
681 
682  if ( whatToCompute & FSTATQ_FAFB ) {
683  XLAL_CHECK_CUDA_CALL( cudaMemcpy( Fstats->Fa, ws->Fa_k, sizeof( cuComplex )*numFreqBins, cudaMemcpyDeviceToHost ) );
684  XLAL_CHECK_CUDA_CALL( cudaMemcpy( Fstats->Fb, ws->Fb_k, sizeof( cuComplex )*numFreqBins, cudaMemcpyDeviceToHost ) );
685  }
686 
687  if ( collectTiming ) {
688  Tau->SumFabX /= numDetectors;
689  Tau->Fab2F /= numDetectors;
690  tic = XLALGetCPUTime();
691  }
692 
693  if ( ( whatToCompute & FSTATQ_2F ) || ( whatToCompute & FSTATQ_2F_CUDA ) ) {
694  const REAL4 Ad = resamp->Mmunu.Ad;
695  const REAL4 Bd = resamp->Mmunu.Bd;
696  const REAL4 Cd = resamp->Mmunu.Cd;
697  const REAL4 Ed = resamp->Mmunu.Ed;
698  const REAL4 Dd_inv = 1.0f / resamp->Mmunu.Dd;
699  if ( Fstats->twoF_CUDA == NULL ) {
700  XLAL_CHECK( cudaMalloc( ( void ** )&Fstats->twoF_CUDA, sizeof( REAL4 )*numFreqBins ) == cudaSuccess, XLAL_ENOMEM );
701  }
702  CUDAComputeTwoF <<< ( numFreqBins + CUDA_BLOCK_SIZE - 1 ) / CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE >>> ( Fstats->twoF_CUDA, ws->Fa_k, ws->Fb_k, Ad, Bd, Cd, Ed, Dd_inv, numFreqBins );
703  XLAL_CHECK_CUDA_CALL( cudaGetLastError() );
704  if ( whatToCompute & FSTATQ_2F ) {
705  XLAL_CHECK_CUDA_CALL( cudaMemcpy( Fstats->twoF, Fstats->twoF_CUDA, sizeof( REAL4 )*numFreqBins, cudaMemcpyDeviceToHost ) );
706  }
707  if ( !( whatToCompute & FSTATQ_2F_CUDA ) ) {
708  cudaFree( Fstats->twoF_CUDA );
709  Fstats->twoF_CUDA = NULL;
710  }
711  } // if FSTATQ_2F
712 
713  if ( collectTiming ) {
714  toc = XLALGetCPUTime();
715  Tau->Fab2F += ( toc - tic );
716  }
717 
718  // Return F-atoms per detector
719  if ( whatToCompute & FSTATQ_ATOMS_PER_DET ) {
720  XLAL_ERROR( XLAL_EFAILED, "NOT implemented!" );
721  }
722 
723  // Return antenna-pattern matrix
724  Fstats->Mmunu = resamp->Mmunu;
725 
726  // return per-detector antenna-pattern matrices
727  for ( UINT4 X = 0; X < numDetectors; X ++ ) {
728  Fstats->MmunuX[X] = resamp->MmunuX[X];
729  }
730 
731  // ----- workspace memory management:
732  // if we used the return struct directly to store Fa,Fb results,
733  // make sure to wipe those pointers to avoid mistakenly considering them as 'local' memory
734  // and re-allocing it in another call to this function
735  cudaFree( ws->Fa_k );
736  cudaFree( ws->Fb_k );
737  cudaFree( ws->FaX_k );
738  cudaFree( ws->FbX_k );
739  if ( whatToCompute & FSTATQ_FAFB ) {
740  ws->Fa_k = NULL;
741  ws->Fb_k = NULL;
742  }
743  if ( whatToCompute & FSTATQ_FAFB_PER_DET ) {
744  ws->FaX_k = NULL;
745  ws->FbX_k = NULL;
746  }
747 
748  if ( collectTiming ) {
749  tocEnd = XLALGetCPUTime();
750 
751  FstatTimingGeneric *tiGen = &( resamp->timingGeneric );
752  FstatTimingResamp *tiRS = &( resamp->timingResamp );
753  XLAL_CHECK( numDetectors == tiGen->Ndet, XLAL_EINVAL, "Inconsistent number of detectors between XLALCreateSetup() [%d] and XLALComputeFstat() [%d]\n", tiGen->Ndet, numDetectors );
754 
755  Tau->Total = ( tocEnd - ticStart );
756  // rescale all relevant timings to per-detector
757  Tau->Total /= numDetectors;
758  Tau->Bary /= numDetectors;
759  Tau->Spin /= numDetectors;
760  Tau->FFT /= numDetectors;
761  Tau->Norm /= numDetectors;
762  Tau->Copy /= numDetectors;
763  REAL8 Tau_buffer = Tau->Bary;
764  // compute generic F-stat timing model contributions
765  UINT4 NFbin = Fstats->numFreqBins;
766  REAL8 tauF_eff = Tau->Total / NFbin;
767  REAL8 tauF_core = ( Tau->Total - Tau_buffer ) / NFbin;
768 
769  // compute resampling timing model coefficients
770  REAL8 tau0_Fbin = ( Tau->Copy + Tau->Norm + Tau->SumFabX + Tau->Fab2F ) / NFbin;
771  REAL8 tau0_spin = Tau->Spin / ( tiRS->Resolution * tiRS->NsampFFT );
772  REAL8 tau0_FFT = Tau->FFT / ( 5.0 * tiRS->NsampFFT * log2( ( double ) tiRS->NsampFFT ) );
773 
774  // update the averaged timing-model quantities
775  tiGen->NCalls ++; // keep track of number of Fstat-calls for timing
776 #define updateAvgF(q) tiGen->q = ((tiGen->q *(tiGen->NCalls-1) + q)/(tiGen->NCalls))
777  updateAvgF( tauF_eff );
778  updateAvgF( tauF_core );
779  // we also average NFbin, which can be different between different calls to XLALComputeFstat() (contrary to Ndet)
780  updateAvgF( NFbin );
781 
782 #define updateAvgRS(q) tiRS->q = ((tiRS->q *(tiGen->NCalls-1) + q)/(tiGen->NCalls))
783  updateAvgRS( tau0_Fbin );
784  updateAvgRS( tau0_spin );
785  updateAvgRS( tau0_FFT );
786 
787  // buffer-quantities only updated if buffer was actually recomputed
788  if ( Tau->BufferRecomputed ) {
789  REAL8 tau0_bary = Tau_buffer / ( tiRS->Resolution * tiRS->NsampFFT );
790  REAL8 tauF_buffer = Tau_buffer / NFbin;
791 
792  updateAvgF( tauF_buffer );
793  updateAvgRS( tau0_bary );
794  } // if BufferRecomputed
795 
796  } // if collectTiming
797  return XLAL_SUCCESS;
798 
799 } // XLALComputeFstatResamp()
800 
801 __global__ void CUDANormFaFb( cuComplex *Fa_out,
802  cuComplex *Fb_out,
803  REAL8 FreqOut0,
804  REAL8 dFreq,
805  REAL8 dtauX,
806  REAL8 dt_SRC,
807  UINT4 numFreqBins )
808 {
809  int k = threadIdx.x + blockDim.x * blockIdx.x;
810  if ( k >= numFreqBins ) {
811  return;
812  }
813  REAL8 f_k = FreqOut0 + k * dFreq;
814  REAL8 cycles = - f_k * dtauX;
815  REAL8 sinphase, cosphase;
816  sincospi( 2 * cycles, &sinphase, &cosphase );
817  cuComplex normX_k = { ( float )( dt_SRC * cosphase ), ( float )( dt_SRC * sinphase ) };
818 
819  Fa_out[k] = cuCmulf( Fa_out[k], normX_k );
820  Fb_out[k] = cuCmulf( Fb_out[k], normX_k );
821 }
822 
823 __global__ void CUDAPopulateFaFbFromRaw( cuComplex *out, cuComplex *in, UINT4 numFreqBins, UINT4 offset_bins, UINT4 decimateFFT )
824 {
825  int k = threadIdx.x + blockIdx.x * blockDim.x;
826  if ( k >= numFreqBins ) {
827  return;
828  }
829  out[k] = in[offset_bins + k * decimateFFT];
830 }
831 
832 static int
833 XLALComputeFaFb_ResampCUDA( ResampCUDAMethodData *resamp, //!< [in,out] buffered resampling data and workspace
834  ResampCUDAWorkspace *ws, //!< [in,out] resampling workspace (memory-sharing across segments)
835  const PulsarDopplerParams thisPoint, //!< [in] Doppler point to compute {FaX,FbX} for
836  REAL8 dFreq, //!< [in] output frequency resolution
837  UINT4 numFreqBins, //!< [in] number of output frequency bins
838  const COMPLEX8TimeSeries *__restrict__ TimeSeries_SRC_a, //!< [in] SRC-frame single-IFO timeseries * a(t)
839  const COMPLEX8TimeSeries *__restrict__ TimeSeries_SRC_b //!< [in] SRC-frame single-IFO timeseries * b(t)
840  )
841 {
842  XLAL_CHECK( ( resamp != NULL ) && ( ws != NULL ) && ( TimeSeries_SRC_a != NULL ) && ( TimeSeries_SRC_b != NULL ), XLAL_EINVAL );
843  XLAL_CHECK( dFreq > 0, XLAL_EINVAL );
844  XLAL_CHECK( numFreqBins <= ws->numFreqBinsAlloc, XLAL_EINVAL );
845 
846  REAL8 FreqOut0 = thisPoint.fkdot[0];
847 
848  // compute frequency shift to align heterodyne frequency with output frequency bins
849  REAL8 fHet = TimeSeries_SRC_a->f0;
850  REAL8 dt_SRC = TimeSeries_SRC_a->deltaT;
851 
852  REAL8 dFreqFFT = dFreq / resamp->decimateFFT; // internally may be using higher frequency resolution dFreqFFT than requested
853  REAL8 freqShift = remainder( FreqOut0 - fHet, dFreq ); // frequency shift to closest bin
854  REAL8 fMinFFT = fHet + freqShift - dFreqFFT * ( resamp->numSamplesFFT / 2 ); // we'll shift DC into the *middle bin* N/2 [N always even!]
855  XLAL_CHECK( FreqOut0 >= fMinFFT, XLAL_EDOM, "Lowest output frequency outside the available frequency band: [FreqOut0 = %.16g] < [fMinFFT = %.16g]\n", FreqOut0, fMinFFT );
856  UINT4 offset_bins = ( UINT4 ) lround( ( FreqOut0 - fMinFFT ) / dFreqFFT );
857  UINT4 maxOutputBin = offset_bins + ( numFreqBins - 1 ) * resamp->decimateFFT;
858  XLAL_CHECK( maxOutputBin < resamp->numSamplesFFT, XLAL_EDOM, "Highest output frequency bin outside available band: [maxOutputBin = %d] >= [numSamplesFFT = %d]\n", maxOutputBin, resamp->numSamplesFFT );
859 
860  FstatTimingResamp *tiRS = &( resamp->timingResamp );
861  BOOLEAN collectTiming = resamp->collectTiming;
862  REAL8 tic = 0, toc = 0;
863 
864  XLAL_CHECK( resamp->numSamplesFFT >= TimeSeries_SRC_a->data->length, XLAL_EFAILED, "[numSamplesFFT = %d] < [len(TimeSeries_SRC_a) = %d]\n", resamp->numSamplesFFT, TimeSeries_SRC_a->data->length );
865  XLAL_CHECK( resamp->numSamplesFFT >= TimeSeries_SRC_b->data->length, XLAL_EFAILED, "[numSamplesFFT = %d] < [len(TimeSeries_SRC_b) = %d]\n", resamp->numSamplesFFT, TimeSeries_SRC_b->data->length );
866 
867  if ( collectTiming ) {
868  tic = XLALGetCPUTime();
869  }
870  XLAL_CHECK_CUDA_CALL( cudaMemset( ws->TS_FFT, 0, resamp->numSamplesFFT * sizeof( ws->TS_FFT[0] ) ) );
871 
872  // ----- compute FaX_k
873  // apply spindown phase-factors, store result in zero-padded timeseries for 'FFT'ing
874  XLAL_CHECK( XLALApplySpindownAndFreqShiftCUDA( ws->TS_FFT, TimeSeries_SRC_a, &thisPoint, freqShift ) == XLAL_SUCCESS, XLAL_EFUNC );
875 
876  if ( collectTiming ) {
877  toc = XLALGetCPUTime();
878  tiRS->Tau.Spin += ( toc - tic );
879  tic = toc;
880  }
881 
882  // Fourier transform the resampled Fa(t)
883  XLAL_CHECK_CUDA_CALL( cudaDeviceSynchronize() );
884  XLAL_CHECK_CUFFT_CALL( cufftExecC2C( resamp->fftplan, ws->TS_FFT, ws->FabX_Raw, CUFFT_FORWARD ) );
885  XLAL_CHECK_CUDA_CALL( cudaDeviceSynchronize() );
886  if ( collectTiming ) {
887  toc = XLALGetCPUTime();
888  tiRS->Tau.FFT += ( toc - tic );
889  tic = toc;
890  }
891 
892  CUDAPopulateFaFbFromRaw <<< ( numFreqBins + CUDA_BLOCK_SIZE - 1 ) / CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE >>> ( ws->FaX_k, ws->FabX_Raw, numFreqBins, offset_bins, resamp->decimateFFT );
893  XLAL_CHECK_CUDA_CALL( cudaGetLastError() );
894  if ( collectTiming ) {
895  toc = XLALGetCPUTime();
896  tiRS->Tau.Copy += ( toc - tic );
897  tic = toc;
898  }
899 
900  // ----- compute FbX_k
901  // apply spindown phase-factors, store result in zero-padded timeseries for 'FFT'ing
902  XLAL_CHECK( XLALApplySpindownAndFreqShiftCUDA( ws->TS_FFT, TimeSeries_SRC_b, &thisPoint, freqShift ) == XLAL_SUCCESS, XLAL_EFUNC );
903 
904  if ( collectTiming ) {
905  toc = XLALGetCPUTime();
906  tiRS->Tau.Spin += ( toc - tic );
907  tic = toc;
908  }
909 
910  // Fourier transform the resampled Fb(t)
911  XLAL_CHECK_CUFFT_CALL( cufftExecC2C( resamp->fftplan, ws->TS_FFT, ws->FabX_Raw, CUFFT_FORWARD ) );
912 
913  if ( collectTiming ) {
914  toc = XLALGetCPUTime();
915  tiRS->Tau.FFT += ( toc - tic );
916  tic = toc;
917  }
918  CUDAPopulateFaFbFromRaw <<< ( numFreqBins + CUDA_BLOCK_SIZE - 1 ) / CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE >>> ( ws->FbX_k, ws->FabX_Raw, numFreqBins, offset_bins, resamp->decimateFFT );
919  XLAL_CHECK_CUDA_CALL( cudaGetLastError() );
920 
921  if ( collectTiming ) {
922  toc = XLALGetCPUTime();
923  tiRS->Tau.Copy += ( toc - tic );
924  tic = toc;
925  }
926 
927  // ----- normalization factors to be applied to Fa and Fb:
928  const REAL8 dtauX = GPSDIFF( TimeSeries_SRC_a->epoch, thisPoint.refTime );
929  CUDANormFaFb <<< ( numFreqBins + CUDA_BLOCK_SIZE - 1 ) / CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE >>> ( ws->FaX_k, ws->FbX_k, FreqOut0, dFreq, dtauX, dt_SRC, numFreqBins );
930  XLAL_CHECK_CUDA_CALL( cudaGetLastError() );
931  if ( collectTiming ) {
932  toc = XLALGetCPUTime();
933  tiRS->Tau.Norm += ( toc - tic );
934  tic = toc;
935  }
936 
937  return XLAL_SUCCESS;
938 } // XLALComputeFaFb_ResampCUDA()
939 
940 
941 __global__ void CUDAApplySpindownAndFreqShift( cuComplex *out,
942  cuComplex *in,
943  PulsarDopplerParams doppler,
944  REAL8 freqShift,
945  REAL8 dt,
946  REAL8 Dtau0,
947  UINT4 s_max,
948  UINT4 numSamplesIn )
949 
950 {
951  int j = threadIdx.x + blockIdx.x * blockDim.x;
952  if ( j >= numSamplesIn ) {
953  return;
954  }
955  REAL8 taup_j = j * dt;
956  REAL8 Dtau_alpha_j = Dtau0 + taup_j;
957  REAL8 cycles = -freqShift * taup_j;
958  REAL8 Dtau_pow_kp1 = Dtau_alpha_j;
959  for ( UINT4 k = 1; k <= s_max; k++ ) {
960  Dtau_pow_kp1 *= Dtau_alpha_j;
961  cycles += - lal_fact_inv[k + 1] * doppler.fkdot[k] * Dtau_pow_kp1;
962  }
963  REAL8 cosphase, sinphase;
964  sincospi( 2 * cycles, &sinphase, &cosphase );
965  cuComplex em2piphase = {( float )cosphase, ( float )sinphase};
966  out[j] = cuCmulf( in[j], em2piphase );
967 }
968 
969 static int
970 XLALApplySpindownAndFreqShiftCUDA( cuComplex *__restrict__ xOut, ///< [out] the spindown-corrected SRC-frame timeseries
971  const COMPLEX8TimeSeries *__restrict__ xIn, ///< [in] the input SRC-frame timeseries
972  const PulsarDopplerParams *__restrict__ doppler, ///< [in] containing spindown parameters
973  REAL8 freqShift ///< [in] frequency-shift to apply, sign is "new - old"
974  )
975 {
976  // input sanity checks
977  XLAL_CHECK( xOut != NULL, XLAL_EINVAL );
978  XLAL_CHECK( xIn != NULL, XLAL_EINVAL );
979  XLAL_CHECK( doppler != NULL, XLAL_EINVAL );
980 
981  // determine number of spin downs to include
982  UINT4 s_max = PULSAR_MAX_SPINS - 1;
983  while ( ( s_max > 0 ) && ( doppler->fkdot[s_max] == 0 ) ) {
984  s_max --;
985  }
986  REAL8 dt = xIn->deltaT;
987  UINT4 numSamplesIn = xIn->data->length;
988  LIGOTimeGPS epoch = xIn->epoch;
989  REAL8 Dtau0 = GPSDIFF( epoch, doppler->refTime );
990 
991  XLAL_CHECK_CUDA_CALL( cudaDeviceSynchronize() );
992  CUDAApplySpindownAndFreqShift <<< ( numSamplesIn + CUDA_BLOCK_SIZE - 1 ) / CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE >>> ( xOut, reinterpret_cast<cuComplex *>( xIn->data->data ), *doppler, freqShift, dt, Dtau0, s_max, numSamplesIn );
993  XLAL_CHECK_CUDA_CALL( cudaGetLastError() );
994  XLAL_CHECK_CUDA_CALL( cudaDeviceSynchronize() );
995 
996  return XLAL_SUCCESS;
997 } // XLALApplySpindownAndFreqShiftCUDA()
998 
999 __global__ void CUDAApplyHetAMInterp( cuComplex *TS_a, cuComplex *TS_b, cuComplex *TStmp1, cuComplex *TStmp2, UINT4 numSamples )
1000 {
1001  int j = threadIdx.x + blockIdx.x * blockDim.x;
1002  if ( j >= numSamples ) {
1003  return;
1004  }
1005  TS_b[j] = cuCmulf( TS_a[j], TStmp2[j] );
1006  TS_a[j] = cuCmulf( TS_a[j], TStmp1[j] );
1007 }
1008 
1009 __global__ void CUDAApplyHetAMSrc( cuComplex *TStmp1_SRC_data,
1010  cuComplex *TStmp2_SRC_data,
1011  REAL8 *ti_DET_data,
1012  UINT4 iStart_SRC_al,
1013  REAL8 tStart_SRC_0,
1014  REAL8 tStart_DET_0,
1015  REAL8 dt_SRC,
1016  REAL8 tMid_DET_al,
1017  REAL8 tMid_SRC_al,
1018  REAL8 Tdot_al,
1019  REAL8 fHet,
1020  REAL4 a_al,
1021  REAL4 b_al,
1022  UINT4 numSamples )
1023 {
1024  int j = threadIdx.x + blockIdx.x * blockDim.x;
1025  if ( j >= numSamples ) {
1026  return;
1027  }
1028 
1029  UINT4 iSRC_al_j = iStart_SRC_al + j;
1030 
1031  // for each time sample in the SRC frame, we estimate the corresponding detector time,
1032  // using a linear approximation expanding around the midpoint of each SFT
1033  REAL8 t_SRC = tStart_SRC_0 + iSRC_al_j * dt_SRC;
1034  REAL8 ti_DET_al_j = tMid_DET_al + ( t_SRC - tMid_SRC_al ) / Tdot_al;
1035  ti_DET_data [ iSRC_al_j ] = ti_DET_al_j;
1036 
1037  // pre-compute correction factors due to non-zero heterodyne frequency of input
1038  REAL8 tDiff = iSRC_al_j * dt_SRC + ( tStart_DET_0 - ti_DET_al_j ); // tSRC_al_j - tDET(tSRC_al_j)
1039  REAL8 cycles = fmod( fHet * tDiff, 1.0 ); // the accumulated heterodyne cycles
1040 
1041  REAL8 cosphase, sinphase;
1042  sincospi( -2 * cycles, &sinphase, &cosphase ); // the real and imaginary parts of the phase correction
1043 
1044  cuComplex ei2piphase = {( float ) cosphase, ( float ) sinphase};
1045 
1046  // apply AM coefficients a(t), b(t) to SRC frame timeseries [alternate sign to get final FFT return DC in the middle]
1047  // equivalent to: REAL4 signum = signumLUT [ (iSRC_al_j % 2) ];
1048  cuComplex signum = {1.0f - 2 * ( iSRC_al_j % 2 ), 0}; // alternating sign, avoid branching
1049  ei2piphase = cuCmulf( ei2piphase, signum );
1050 
1051  TStmp1_SRC_data[iSRC_al_j] = make_cuComplex( ei2piphase.x * a_al, ei2piphase.y * a_al );
1052  TStmp2_SRC_data[iSRC_al_j] = make_cuComplex( ei2piphase.x * b_al, ei2piphase.y * b_al );
1053 }
1054 
1055 __global__ void CUDASincInterp( cuComplex *out,
1056  REAL8 *t_out,
1057  cuComplex *in,
1058  REAL8 *win,
1059  UINT4 Dterms,
1060  UINT4 numSamplesIn,
1061  UINT4 numSamplesOut,
1062  REAL8 tmin,
1063  REAL8 dt,
1064  REAL8 oodt )
1065 {
1066  int l = threadIdx.x + blockDim.x * blockIdx.x;
1067  if ( l >= numSamplesOut ) {
1068  return;
1069  }
1070  REAL8 t = t_out[l] - tmin; // measure time since start of input timeseries
1071 
1072  // samples outside of input timeseries are returned as 0
1073  if ( ( t < 0 ) || ( t > ( numSamplesIn - 1 )*dt ) ) { // avoid any extrapolations!
1074  out[l] = make_cuComplex( 0, 0 );
1075  return;
1076  }
1077 
1078  REAL8 t_by_dt = t * oodt;
1079  INT8 jstar = lround( t_by_dt ); // bin closest to 't', guaranteed to be in [0, numSamples-1]
1080 
1081  if ( fabs( t_by_dt - jstar ) < 2.0e-4 ) { // avoid numerical problems near peak
1082  out[l] = in[jstar]; // known analytic solution for exact bin
1083  return;
1084  }
1085 
1086  INT4 jStart0 = jstar - Dterms;
1087  UINT4 jEnd0 = jstar + Dterms;
1088  UINT4 jStart = max( jStart0, 0 );
1089  UINT4 jEnd = min( jEnd0, numSamplesIn - 1 );
1090 
1091  REAL4 delta_jStart = ( t_by_dt - jStart );
1092  REAL8 sin0 = sinpi( delta_jStart );
1093 
1094  REAL4 sin0oopi = sin0 * 1.0 / M_PI;
1095 
1096  cuComplex y_l = {0, 0};
1097  REAL8 delta_j = delta_jStart;
1098  for ( UINT8 j = jStart; j <= jEnd; j ++ ) {
1099  cuComplex Cj = {( float )( win[j - jStart0] * sin0oopi / delta_j ), 0};
1100 
1101  y_l = cuCaddf( y_l, cuCmulf( Cj, in[j] ) );
1102 
1103  sin0oopi = -sin0oopi; // sin-term flips sign every step
1104  delta_j --;
1105  } // for j in [j* - Dterms, ... ,j* + Dterms]
1106 
1107  out[l] = y_l;
1108 }
1109 
1110 // Reimplementation of XLALCreateHammingREAL8Window
1111 __global__ void CUDACreateHammingWindow( REAL8 *out, UINT4 length )
1112 {
1113 
1114  int i = threadIdx.x + blockDim.x * blockIdx.x;
1115  if ( i >= ( length + 1 ) / 2 ) {
1116  return;
1117  }
1118 
1119  int length_reduced = length - 1;
1120  double Y = length_reduced > 0 ? ( 2 * i - length_reduced ) / ( double ) length_reduced : 0;
1121  out[i] = 0.08 + 0.92 * pow( cos( M_PI / 2 * Y ), 2 );
1122  out[length - i - 1] = 0.08 + 0.92 * pow( cos( M_PI / 2 * Y ), 2 );
1123 }
1124 
1125 int
1126 SincInterp( COMPLEX8Vector *y_out, ///< [out] output series of interpolated y-values [must be same size as t_out]
1127  const REAL8Vector *t_out, ///< [in] output time-steps to interpolate input to
1128  const COMPLEX8TimeSeries *ts_in, ///< [in] regularly-spaced input timeseries
1129  UINT4 Dterms ///< [in] window sinc kernel sum to +-Dterms around max
1130  )
1131 {
1132  XLAL_CHECK( y_out != NULL, XLAL_EINVAL );
1133  XLAL_CHECK( t_out != NULL, XLAL_EINVAL );
1134  XLAL_CHECK( ts_in != NULL, XLAL_EINVAL );
1135  XLAL_CHECK( y_out->length == t_out->length, XLAL_EINVAL );
1136 
1137  UINT4 numSamplesOut = t_out->length;
1138  UINT4 numSamplesIn = ts_in->data->length;
1139  REAL8 dt = ts_in->deltaT;
1140  REAL8 tmin = XLALGPSGetREAL8( &( ts_in->epoch ) ); // time of first bin in input timeseries
1141 
1142  UINT4 winLen = 2 * Dterms + 1;
1143 
1144  const REAL8 oodt = 1.0 / dt;
1145  REAL8 *win_gpu;
1146  XLAL_CHECK( cudaMalloc( ( void ** )&win_gpu, sizeof( REAL8 )*winLen ) == cudaSuccess, XLAL_ENOMEM );
1147  CUDACreateHammingWindow <<< ( winLen + CUDA_BLOCK_SIZE - 1 ) / CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE >>> ( win_gpu, winLen );
1148  XLAL_CHECK_CUDA_CALL( cudaGetLastError() );
1149  CUDASincInterp <<< ( numSamplesOut + CUDA_BLOCK_SIZE - 1 ) / CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE >>> ( ( cuComplex * )( y_out->data ), t_out->data, ( cuComplex * )( ts_in->data->data ), win_gpu, Dterms, numSamplesIn, numSamplesOut, tmin, dt, oodt );
1150  XLAL_CHECK_CUDA_CALL( cudaGetLastError() );
1151  cudaFree( win_gpu );
1152 
1153  return XLAL_SUCCESS;
1154 }
1155 
1156 ///
1157 /// Performs barycentric resampling on a multi-detector timeseries, updates resampling buffer with results
1158 ///
1159 /// NOTE Buffering: this function does check
1160 /// 1) whether the previously-buffered solution can be completely reused (same sky-position and binary parameters), or
1161 /// 2) if at least sky-dependent quantities can be re-used (antenna-patterns + timings) in case only binary parameters changed
1162 ///
1163 static int
1165  ResampCUDAMethodData *resamp, // [in/out] resampling input and buffer (to store resampling TS)
1166  const PulsarDopplerParams *thisPoint, // [in] current skypoint and reftime
1167  const FstatCommon *common // [in] various input quantities and parameters used here
1168 )
1169 {
1170  // check input sanity
1171  XLAL_CHECK( thisPoint != NULL, XLAL_EINVAL );
1172  XLAL_CHECK( common != NULL, XLAL_EINVAL );
1173  XLAL_CHECK( resamp != NULL, XLAL_EINVAL );
1174  XLAL_CHECK( resamp->multiTimeSeries_DET != NULL, XLAL_EINVAL );
1175  XLAL_CHECK( resamp->multiTimeSeries_SRC_a != NULL, XLAL_EINVAL );
1176  XLAL_CHECK( resamp->multiTimeSeries_SRC_b != NULL, XLAL_EINVAL );
1177 
1179 
1181  XLAL_CHECK( resamp->multiTimeSeries_SRC_a->length == numDetectors, XLAL_EINVAL, "Inconsistent number of detectors tsDET(%d) != tsSRC(%d)\n", numDetectors, resamp->multiTimeSeries_SRC_a->length );
1182  XLAL_CHECK( resamp->multiTimeSeries_SRC_b->length == numDetectors, XLAL_EINVAL, "Inconsistent number of detectors tsDET(%d) != tsSRC(%d)\n", numDetectors, resamp->multiTimeSeries_SRC_b->length );
1183 
1184  // ============================== BEGIN: handle buffering =============================
1185  BOOLEAN same_skypos = ( resamp->prev_doppler.Alpha == thisPoint->Alpha ) && ( resamp->prev_doppler.Delta == thisPoint->Delta );
1186  BOOLEAN same_refTime = ( GPSDIFF( resamp->prev_doppler.refTime, thisPoint->refTime ) == 0 );
1187  BOOLEAN same_binary = \
1188  ( resamp->prev_doppler.asini == thisPoint->asini ) &&
1189  ( resamp->prev_doppler.period == thisPoint->period ) &&
1190  ( resamp->prev_doppler.ecc == thisPoint->ecc ) &&
1191  ( GPSDIFF( resamp->prev_doppler.tp, thisPoint->tp ) == 0 ) &&
1192  ( resamp->prev_doppler.argp == thisPoint->argp );
1193 
1194  Timings_t *Tau = &( resamp->timingResamp.Tau );
1195  REAL8 tic = 0, toc = 0;
1196  BOOLEAN collectTiming = resamp->collectTiming;
1197 
1198  // if same sky-position *and* same binary, we can simply return as there's nothing to be done here
1199  if ( same_skypos && same_refTime && same_binary ) {
1200  Tau->BufferRecomputed = 0;
1201  return XLAL_SUCCESS;
1202  }
1203  // else: keep track of 'buffer miss', ie we need to recompute the buffer
1204  Tau->BufferRecomputed = 1;
1205  resamp->timingGeneric.NBufferMisses ++;
1206  if ( collectTiming ) {
1207  tic = XLALGetCPUTime();
1208  }
1209 
1210  MultiSSBtimes *multiSRCtimes = NULL;
1211 
1212  // only if different sky-position: re-compute antenna-patterns and SSB timings, re-use from buffer otherwise
1213  if ( !( same_skypos && same_refTime ) ) {
1214  SkyPosition skypos;
1216  skypos.longitude = thisPoint->Alpha;
1217  skypos.latitude = thisPoint->Delta;
1218 
1220  XLAL_CHECK( ( resamp->multiAMcoef = XLALComputeMultiAMCoeffs( common->multiDetectorStates, common->multiNoiseWeights, skypos ) ) != NULL, XLAL_EFUNC );
1221  resamp->Mmunu = resamp->multiAMcoef->Mmunu;
1222  for ( UINT4 X = 0; X < numDetectors; X ++ ) {
1223  resamp->MmunuX[X].Ad = resamp->multiAMcoef->data[X]->A;
1224  resamp->MmunuX[X].Bd = resamp->multiAMcoef->data[X]->B;
1225  resamp->MmunuX[X].Cd = resamp->multiAMcoef->data[X]->C;
1226  resamp->MmunuX[X].Ed = 0;
1227  resamp->MmunuX[X].Dd = resamp->multiAMcoef->data[X]->D;
1228  }
1229 
1231  XLAL_CHECK( ( resamp->multiSSBtimes = XLALGetMultiSSBtimes( common->multiDetectorStates, skypos, thisPoint->refTime, common->SSBprec ) ) != NULL, XLAL_EFUNC );
1232 
1233  } // if cannot re-use buffered solution ie if !(same_skypos && same_binary)
1234 
1235  if ( thisPoint->asini > 0 ) { // binary case
1237  multiSRCtimes = resamp->multiBinaryTimes;
1238  } else { // isolated case
1239  multiSRCtimes = resamp->multiSSBtimes;
1240  }
1241 
1242  // record barycenter parameters in order to allow re-usal of this result ('buffering')
1243  resamp->prev_doppler = ( *thisPoint );
1244 
1245  // shorthands
1246  REAL8 fHet = resamp->multiTimeSeries_DET->data[0]->f0;
1247  REAL8 Tsft = common->multiTimestamps->data[0]->deltaT;
1248  REAL8 dt_SRC = resamp->multiTimeSeries_SRC_a->data[0]->deltaT;
1249 
1250  // loop over detectors X
1251  for ( UINT4 X = 0; X < numDetectors; X++ ) {
1252  // shorthand pointers: input
1253  const COMPLEX8TimeSeries *TimeSeries_DETX = resamp->multiTimeSeries_DET->data[X];
1254  const LIGOTimeGPSVector *Timestamps_DETX = common->multiTimestamps->data[X];
1255  const SSBtimes *SRCtimesX = multiSRCtimes->data[X];
1256  const AMCoeffs *AMcoefX = resamp->multiAMcoef->data[X];
1257 
1258  // shorthand pointers: output
1259  COMPLEX8TimeSeries *TimeSeries_SRCX_a = resamp->multiTimeSeries_SRC_a->data[X];
1260  COMPLEX8TimeSeries *TimeSeries_SRCX_b = resamp->multiTimeSeries_SRC_b->data[X];
1261  REAL8Vector *ti_DET = ws->SRCtimes_DET;
1262 
1263  // useful shorthands
1264  REAL8 refTime8 = GPSGETREAL8( &SRCtimesX->refTime );
1265  UINT4 numSFTsX = Timestamps_DETX->length;
1266  UINT4 numSamples_DETX = TimeSeries_DETX->data->length;
1267  UINT4 numSamples_SRCX = TimeSeries_SRCX_a->data->length;
1268 
1269  // sanity checks on input data
1270  XLAL_CHECK( numSamples_SRCX == TimeSeries_SRCX_b->data->length, XLAL_EINVAL );
1271  XLAL_CHECK( dt_SRC == TimeSeries_SRCX_a->deltaT, XLAL_EINVAL );
1272  XLAL_CHECK( dt_SRC == TimeSeries_SRCX_b->deltaT, XLAL_EINVAL );
1273  XLAL_CHECK( numSamples_DETX > 0, XLAL_EINVAL, "Input timeseries for detector X=%d has zero samples. Can't handle that!\n", X );
1274  XLAL_CHECK( ( SRCtimesX->DeltaT->length == numSFTsX ) && ( SRCtimesX->Tdot->length == numSFTsX ), XLAL_EINVAL );
1275  REAL8 fHetX = resamp->multiTimeSeries_DET->data[X]->f0;
1276  XLAL_CHECK( fabs( fHet - fHetX ) < LAL_REAL8_EPS * fHet, XLAL_EINVAL, "Input timeseries must have identical heterodyning frequency 'f0(X=%d)' (%.16g != %.16g)\n", X, fHet, fHetX );
1277  REAL8 TsftX = common->multiTimestamps->data[X]->deltaT;
1278  XLAL_CHECK( Tsft == TsftX, XLAL_EINVAL, "Input timestamps must have identical stepsize 'Tsft(X=%d)' (%.16g != %.16g)\n", X, Tsft, TsftX );
1279 
1280  TimeSeries_SRCX_a->f0 = fHet;
1281  TimeSeries_SRCX_b->f0 = fHet;
1282  // set SRC-frame time-series start-time
1283  REAL8 tStart_SRC_0 = refTime8 + SRCtimesX->DeltaT->data[0] - ( 0.5 * Tsft ) * SRCtimesX->Tdot->data[0];
1285  GPSSETREAL8( epoch, tStart_SRC_0 );
1286  TimeSeries_SRCX_a->epoch = epoch;
1287  TimeSeries_SRCX_b->epoch = epoch;
1288 
1289  // make sure all output samples are initialized to zero first, in case of gaps
1290  XLAL_CHECK_CUDA_CALL( cudaMemset( TimeSeries_SRCX_a->data->data, 0, TimeSeries_SRCX_a->data->length * sizeof( TimeSeries_SRCX_a->data->data[0] ) ) );
1291  XLAL_CHECK_CUDA_CALL( cudaMemset( TimeSeries_SRCX_b->data->data, 0, TimeSeries_SRCX_b->data->length * sizeof( TimeSeries_SRCX_b->data->data[0] ) ) );
1292  // make sure detector-frame timesteps to interpolate to are initialized to 0, in case of gaps
1293  XLAL_CHECK_CUDA_CALL( cudaMemset( ws->SRCtimes_DET->data, 0, ws->SRCtimes_DET->length * sizeof( ws->SRCtimes_DET->data[0] ) ) );
1294 
1295  XLAL_CHECK_CUDA_CALL( cudaMemset( ws->TStmp1_SRC->data, 0, ws->TStmp1_SRC->length * sizeof( ws->TStmp1_SRC->data[0] ) ) );
1296  XLAL_CHECK_CUDA_CALL( cudaMemset( ws->TStmp2_SRC->data, 0, ws->TStmp2_SRC->length * sizeof( ws->TStmp2_SRC->data[0] ) ) );
1297 
1298  REAL8 tStart_DET_0 = GPSGETREAL8( &( Timestamps_DETX->data[0] ) ); // START time of the SFT at the detector
1299  // loop over SFT timestamps and compute the detector frame time samples corresponding to uniformly sampled SRC time samples
1300  for ( UINT4 alpha = 0; alpha < numSFTsX; alpha ++ ) {
1301  // define some useful shorthands
1302  REAL8 Tdot_al = SRCtimesX->Tdot->data [ alpha ]; // the instantaneous time derivitive dt_SRC/dt_DET at the MID-POINT of the SFT
1303  REAL8 tMid_SRC_al = refTime8 + SRCtimesX->DeltaT->data[alpha]; // MID-POINT time of the SFT at the SRC
1304  REAL8 tStart_SRC_al = tMid_SRC_al - 0.5 * Tsft * Tdot_al; // approximate START time of the SFT at the SRC
1305  REAL8 tEnd_SRC_al = tMid_SRC_al + 0.5 * Tsft * Tdot_al; // approximate END time of the SFT at the SRC
1306 
1307  REAL8 tStart_DET_al = GPSGETREAL8( &( Timestamps_DETX->data[alpha] ) ); // START time of the SFT at the detector
1308  REAL8 tMid_DET_al = tStart_DET_al + 0.5 * Tsft; // MID-POINT time of the SFT at the detector
1309 
1310  // indices of first and last SRC-frame sample corresponding to this SFT
1311  UINT4 iStart_SRC_al = lround( ( tStart_SRC_al - tStart_SRC_0 ) / dt_SRC ); // the index of the resampled timeseries corresponding to the start of the SFT
1312  UINT4 iEnd_SRC_al = lround( ( tEnd_SRC_al - tStart_SRC_0 ) / dt_SRC ); // the index of the resampled timeseries corresponding to the end of the SFT
1313 
1314  // truncate to actual SRC-frame timeseries
1315  iStart_SRC_al = MYMIN( iStart_SRC_al, numSamples_SRCX - 1 );
1316  iEnd_SRC_al = MYMIN( iEnd_SRC_al, numSamples_SRCX - 1 );
1317  UINT4 numSamplesSFT_SRC_al = iEnd_SRC_al - iStart_SRC_al + 1; // the number of samples in the SRC-frame for this SFT
1318 
1319  REAL4 a_al = AMcoefX->a->data[alpha];
1320  REAL4 b_al = AMcoefX->b->data[alpha];
1321  CUDAApplyHetAMSrc <<< ( numSamplesSFT_SRC_al + CUDA_BLOCK_SIZE - 1 ) / CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE >>> ( ( cuComplex * )( ws->TStmp1_SRC->data ), ( cuComplex * )( ws->TStmp2_SRC->data ), ti_DET->data, iStart_SRC_al, tStart_SRC_0, tStart_DET_0, dt_SRC, tMid_DET_al, tMid_SRC_al, Tdot_al, fHet, a_al, b_al, numSamplesSFT_SRC_al );
1322  XLAL_CHECK_CUDA_CALL( cudaGetLastError() );
1323  } // for alpha < numSFTsX
1324 
1325  XLAL_CHECK( ti_DET->length >= TimeSeries_SRCX_a->data->length, XLAL_EINVAL );
1326  UINT4 bak_length = ti_DET->length;
1327  ti_DET->length = TimeSeries_SRCX_a->data->length;
1328 
1329  XLAL_CHECK( SincInterp( TimeSeries_SRCX_a->data, ti_DET, TimeSeries_DETX, resamp->Dterms ) == XLAL_SUCCESS, XLAL_EFUNC );
1330 
1331  ti_DET->length = bak_length;
1332 
1333  // apply heterodyne correction and AM-functions a(t) and b(t) to interpolated timeseries
1334  CUDAApplyHetAMInterp <<< ( numSamples_SRCX + CUDA_BLOCK_SIZE - 1 ) / CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE >>> ( ( cuComplex * )( TimeSeries_SRCX_a->data->data ), ( cuComplex * )( TimeSeries_SRCX_b->data->data ), ( cuComplex * )( ws->TStmp1_SRC->data ), ( cuComplex * )( ws->TStmp2_SRC->data ), numSamples_SRCX );
1335  XLAL_CHECK_CUDA_CALL( cudaGetLastError() );
1336 
1337  } // for X < numDetectors
1338 
1339  if ( collectTiming ) {
1340  toc = XLALGetCPUTime();
1341  Tau->Bary = ( toc - tic );
1342  }
1343 
1344  return XLAL_SUCCESS;
1345 
1346 } // XLALBarycentricResampleMultiCOMPLEX8TimeSeriesCUDA()
1347 
1348 extern "C" int
1349 XLALExtractResampledTimeseries_ResampCUDA( MultiCOMPLEX8TimeSeries **multiTimeSeries_SRC_a, MultiCOMPLEX8TimeSeries **multiTimeSeries_SRC_b, void *method_data )
1350 {
1351  XLAL_CHECK( method_data != NULL, XLAL_EINVAL );
1352  XLAL_CHECK( ( multiTimeSeries_SRC_a != NULL ) && ( multiTimeSeries_SRC_b != NULL ), XLAL_EINVAL );
1353  XLAL_CHECK( method_data != NULL, XLAL_EINVAL );
1354 
1355  ResampCUDAMethodData *resamp = ( ResampCUDAMethodData * ) method_data;
1356 
1359 
1360  *multiTimeSeries_SRC_a = resamp->hostCopy4ExtractTS_SRC_a;
1361  *multiTimeSeries_SRC_b = resamp->hostCopy4ExtractTS_SRC_b;
1362 
1363  return XLAL_SUCCESS;
1364 
1365 } // XLALExtractResampledTimeseries_intern()
1366 
1367 extern "C" int
1368 XLALGetFstatTiming_ResampCUDA( const void *method_data, FstatTimingGeneric *timingGeneric, FstatTimingModel *timingModel )
1369 {
1370  XLAL_CHECK( method_data != NULL, XLAL_EINVAL );
1371  XLAL_CHECK( timingGeneric != NULL, XLAL_EINVAL );
1372  XLAL_CHECK( timingModel != NULL, XLAL_EINVAL );
1373 
1374  const ResampCUDAMethodData *resamp = ( const ResampCUDAMethodData * ) method_data;
1375  XLAL_CHECK( resamp != NULL, XLAL_EINVAL );
1376 
1377  ( *timingGeneric ) = resamp->timingGeneric; // struct-copy generic timing measurements
1378 
1380 
1381  return XLAL_SUCCESS;
1382 } // XLALGetFstatTiming_ResampCUDA()
1383 
1384 /// @}
1385 
1386 // Local Variables:
1387 // mode: c
1388 // End:
#define updateAvgF(q)
#define updateAvgRS(q)
#define GPSGETREAL8(x)
#define GPSSETREAL8(gps, r8)
#define GPSDIFF(x, y)
static int XLALGetFstatTiming_Resamp_intern(const FstatTimingResamp *tiRS, FstatTimingModel *timingModel)
#define SQ(x)
#define min(a, b)
int j
int k
UINT2 A
Definition: SFTnaming.c:46
UINT2 B
Definition: SFTnaming.c:47
int l
int SincInterp(COMPLEX8Vector *y_out, const REAL8Vector *t_out, const COMPLEX8TimeSeries *ts_in, UINT4 Dterms)
static int XLALComputeFstatResampCUDA(FstatResults *Fstats, const FstatCommon *common, void *method_data)
__global__ void CUDAApplySpindownAndFreqShift(cuComplex *out, cuComplex *in, PulsarDopplerParams doppler, REAL8 freqShift, REAL8 dt, REAL8 Dtau0, UINT4 s_max, UINT4 numSamplesIn)
__global__ void CUDACreateHammingWindow(REAL8 *out, UINT4 length)
static void XLALDestroyResampCUDAWorkspace(void *workspace)
static void DestroyMultiCOMPLEX8TimeSeriesCUDA(MultiCOMPLEX8TimeSeries *multi)
static void DestroyCOMPLEX8TimeSeriesCUDA(COMPLEX8TimeSeries *series)
__global__ void CUDAPopulateFaFbFromRaw(cuComplex *out, cuComplex *in, UINT4 numFreqBins, UINT4 offset_bins, UINT4 decimateFFT)
static int XLALComputeFaFb_ResampCUDA(ResampCUDAMethodData *resamp, ResampCUDAWorkspace *ws, const PulsarDopplerParams thisPoint, REAL8 dFreq, UINT4 numFreqBins, const COMPLEX8TimeSeries *TimeSeries_SRC_a, const COMPLEX8TimeSeries *TimeSeries_SRC_b)
__global__ void CUDASincInterp(cuComplex *out, REAL8 *t_out, cuComplex *in, REAL8 *win, UINT4 Dterms, UINT4 numSamplesIn, UINT4 numSamplesOut, REAL8 tmin, REAL8 dt, REAL8 oodt)
__constant__ REAL8 PI
static int CopyCOMPLEX8TimeSeriesDtoH(COMPLEX8TimeSeries **dst, COMPLEX8TimeSeries *src)
static void DestroyCOMPLEX8VectorCUDA(COMPLEX8Vector *vec)
static COMPLEX8Vector * CreateCOMPLEX8VectorCUDA(UINT4 length)
__device__ __constant__ REAL8 lal_fact_inv[LAL_FACT_MAX]
int XLALExtractResampledTimeseries_ResampCUDA(MultiCOMPLEX8TimeSeries **multiTimeSeries_SRC_a, MultiCOMPLEX8TimeSeries **multiTimeSeries_SRC_b, void *method_data)
__global__ void CUDAComputeTwoF(REAL4 *twoF, cuComplex *Fa_k, cuComplex *Fb_k, REAL4 A, REAL4 B, REAL4 C, REAL4 E, REAL4 Dinv, UINT4 numFreqBins)
static int MoveCOMPLEX8TimeSeriesHtoD(COMPLEX8TimeSeries *series)
static REAL8Vector * CreateREAL8VectorCUDA(UINT4 length)
static int XLALBarycentricResampleMultiCOMPLEX8TimeSeriesCUDA(ResampCUDAMethodData *resamp, const PulsarDopplerParams *thisPoint, const FstatCommon *common)
Performs barycentric resampling on a multi-detector timeseries, updates resampling buffer with result...
__global__ void CUDAAddToFaFb(cuComplex *Fa_k, cuComplex *Fb_k, cuComplex *FaX_k, cuComplex *FbX_k, UINT4 numFreqBins)
static void DestroyREAL8VectorCUDA(REAL8Vector *vec)
#define CUDA_BLOCK_SIZE
int XLALSetupFstatResampCUDA(void **method_data, FstatCommon *common, FstatMethodFuncs *funcs, MultiSFTVector *multiSFTs, const FstatOptionalArgs *optArgs)
#define XLAL_CHECK_CUDA_CALL(...)
static int CopyMultiCOMPLEX8TimeSeriesDtoH(MultiCOMPLEX8TimeSeries **dst, MultiCOMPLEX8TimeSeries *src)
__global__ void CUDAApplyHetAMInterp(cuComplex *TS_a, cuComplex *TS_b, cuComplex *TStmp1, cuComplex *TStmp2, UINT4 numSamples)
int XLALGetFstatTiming_ResampCUDA(const void *method_data, FstatTimingGeneric *timingGeneric, FstatTimingModel *timingModel)
__global__ void CUDAApplyHetAMSrc(cuComplex *TStmp1_SRC_data, cuComplex *TStmp2_SRC_data, REAL8 *ti_DET_data, UINT4 iStart_SRC_al, REAL8 tStart_SRC_0, REAL8 tStart_DET_0, REAL8 dt_SRC, REAL8 tMid_DET_al, REAL8 tMid_SRC_al, REAL8 Tdot_al, REAL8 fHet, REAL4 a_al, REAL4 b_al, UINT4 numSamples)
static int MoveMultiCOMPLEX8TimeSeriesHtoD(MultiCOMPLEX8TimeSeries *multi)
#define XLAL_CHECK_CUFFT_CALL(...)
__global__ void CUDANormFaFb(cuComplex *Fa_out, cuComplex *Fb_out, REAL8 FreqOut0, REAL8 dFreq, REAL8 dtauX, REAL8 dt_SRC, UINT4 numFreqBins)
static int XLALApplySpindownAndFreqShiftCUDA(cuComplex *xOut, const COMPLEX8TimeSeries *xIn, const PulsarDopplerParams *doppler, REAL8 freqShift)
static void XLALDestroyResampCUDAMethodData(void *method_data)
FstatQuantities
Bit-field of -statistic quantities which can be computed by XLALComputeFstat().
Definition: ComputeFstat.h:94
@ FSTATQ_2F
Compute multi-detector .
Definition: ComputeFstat.h:96
@ FSTATQ_FAFB_PER_DET
Compute and for each detector.
Definition: ComputeFstat.h:99
@ FSTATQ_FAFB
Compute multi-detector and .
Definition: ComputeFstat.h:97
@ FSTATQ_NONE
Do not compute -statistic, still compute buffered quantities.
Definition: ComputeFstat.h:95
@ FSTATQ_2F_CUDA
Compute multi-detector , store results on CUDA GPU (CUDA implementation of Resamp only).
Definition: ComputeFstat.h:101
@ FSTATQ_ATOMS_PER_DET
Compute per-SFT -statistic atoms for each detector (Demod only).
Definition: ComputeFstat.h:100
@ FSTATQ_2F_PER_DET
Compute for each detector.
Definition: ComputeFstat.h:98
static const REAL8 LAL_FACT_INV[]
#define LAL_FACT_MAX
void XLALDestroyMultiAMCoeffs(MultiAMCoeffs *multiAMcoef)
Destroy a MultiAMCoeffs structure.
Definition: LALComputeAM.c:469
MultiAMCoeffs * XLALComputeMultiAMCoeffs(const MultiDetectorStateSeries *multiDetStates, const MultiNoiseWeights *multiWeights, SkyPosition skypos)
Multi-IFO version of XLALComputeAMCoeffs().
Definition: LALComputeAM.c:379
#define LAL_REAL8_EPS
unsigned char BOOLEAN
#define XLAL_INIT_MEM(x)
uint64_t UINT8
double REAL8
#define XLAL_INIT_DECL(var,...)
int64_t INT8
uint32_t UINT4
float complex COMPLEX8
int32_t INT4
float REAL4
void * XLALCalloc(size_t m, size_t n)
void * XLALMalloc(size_t n)
void * XLALRealloc(void *p, size_t n)
void XLALFree(void *p)
#define LAL_UINT4_FORMAT
MultiCOMPLEX8TimeSeries * XLALMultiSFTVectorToCOMPLEX8TimeSeries(const MultiSFTVector *multisfts)
Turn the given multiSFTvector into multiple long COMPLEX8TimeSeries, properly dealing with gaps.
void XLALDestroyMultiCOMPLEX8TimeSeries(MultiCOMPLEX8TimeSeries *multiTimes)
Destroy a MultiCOMPLEX8TimeSeries structure.
REAL8 XLALGetCPUTime(void)
#define PULSAR_MAX_DETECTORS
maximal number of detectors we can handle (for static arrays of detector quantities)
#define PULSAR_MAX_SPINS
maximal number of spin-parameters (Freq + spindowns) we can handle
void XLALDestroyMultiSFTVector(MultiSFTVector *multvect)
Destroy a multi SFT-vector.
Definition: SFTtypes.c:424
int XLALMultiSFTVectorResizeBand(MultiSFTVector *multiSFTs, REAL8 f0, REAL8 Band)
Resize the frequency-band of a given multi-SFT vector to [f0, f0+Band].
Definition: SFTtypes.c:946
MultiSSBtimes * XLALGetMultiSSBtimes(const MultiDetectorStateSeries *multiDetStates, SkyPosition skypos, LIGOTimeGPS refTime, SSBprecision precision)
Multi-IFO version of XLALGetSSBtimes().
Definition: SSBtimes.c:657
void XLALDestroyMultiSSBtimes(MultiSSBtimes *multiSSB)
Destroy a MultiSSBtimes structure.
Definition: SSBtimes.c:845
int XLALAddMultiBinaryTimes(MultiSSBtimes **multiSSBOut, const MultiSSBtimes *multiSSBIn, const PulsarDopplerParams *Doppler)
Multi-IFO version of XLALAddBinaryTimes().
Definition: SSBtimes.c:413
COORDINATESYSTEM_EQUATORIAL
COMPLEX8TimeSeries * XLALCreateCOMPLEX8TimeSeries(const CHAR *name, const LIGOTimeGPS *epoch, REAL8 f0, REAL8 deltaT, const LALUnit *sampleUnits, size_t length)
const LALUnit lalDimensionlessUnit
#define XLAL_ERROR(...)
#define XLAL_CHECK(assertion,...)
int int XLALPrintWarning(const char *fmt,...) _LAL_GCC_PRINTF_FORMAT_(1
#define XLAL_CHECK_NULL(assertion,...)
XLAL_ENOMEM
XLAL_SUCCESS
XLAL_EFAULT
XLAL_EFUNC
XLAL_EDOM
XLAL_ESYS
XLAL_EINVAL
XLAL_EFAILED
REAL8 XLALGPSGetREAL8(const LIGOTimeGPS *epoch)
dst
src
out
#define MYMIN(x, y)
#define MYMAX(x, y)
double alpha
size_t numDetectors
This structure contains the per-SFT (weighted) antenna-pattern functions , with the SFT-index,...
Definition: LALComputeAM.h:63
REAL4 B
summed antenna-pattern matrix coefficient:
Definition: LALComputeAM.h:67
REAL4Vector * b
(weighted) per-SFT antenna-pattern function
Definition: LALComputeAM.h:65
REAL4 A
summed antenna-pattern matrix coefficient:
Definition: LALComputeAM.h:66
REAL4 C
summed antenna-pattern matrix coefficient:
Definition: LALComputeAM.h:68
REAL4Vector * a
(weighted) per-SFT antenna-pattern function
Definition: LALComputeAM.h:64
REAL4 D
determinant
Definition: LALComputeAM.h:69
Struct holding the "antenna-pattern" matrix , in terms of the multi-detector scalar product.
Definition: LALComputeAM.h:127
REAL4 Dd
determinant factor , such that
Definition: LALComputeAM.h:132
CHAR name[LALNameLength]
LIGOTimeGPS epoch
COMPLEX8Sequence * data
COMPLEX8 * data
MultiDetectorStateSeries * multiDetectorStates
SSBprecision SSBprec
MultiLIGOTimeGPSVector * multiTimestamps
MultiNoiseWeights * multiNoiseWeights
void(* workspace_destroy_func)(void *)
void(* method_data_destroy_func)(void *)
int(* compute_func)(FstatResults *, const FstatCommon *, void *)
Struct of optional 'advanced level' and (potentially method-specific) arguments to be passed to the ...
Definition: ComputeFstat.h:136
BOOLEAN resampFFTPowerOf2
Resamp: round up FFT lengths to next power of 2; see FstatMethodType.
Definition: ComputeFstat.h:147
BOOLEAN collectTiming
a flag to turn on/off the collection of F-stat-method-specific timing-data
Definition: ComputeFstat.h:146
UINT4 Dterms
Number of Dirichlet kernel terms, used by some Demod methods; see FstatMethodType.
Definition: ComputeFstat.h:139
XLALComputeFstat() computed results structure.
Definition: ComputeFstat.h:201
AntennaPatternMatrix MmunuX[PULSAR_MAX_DETECTORS]
Per detector antenna pattern matrix , used in computing .
Definition: ComputeFstat.h:230
COMPLEX8 * Fb
Definition: ComputeFstat.h:259
AntennaPatternMatrix Mmunu
Antenna pattern matrix , used in computing .
Definition: ComputeFstat.h:227
REAL4 * twoFPerDet[PULSAR_MAX_DETECTORS]
If whatWasComputed & FSTATQ_2F_PER_DET is true, the values computed at numFreqBins frequencies space...
Definition: ComputeFstat.h:268
FstatQuantities whatWasComputed
Bit-field of which -statistic quantities were computed.
Definition: ComputeFstat.h:233
REAL4 * twoF_CUDA
If whatWasComputed & FSTATQ_2F_CUDA is true, the multi-detector values as for twoF,...
Definition: ComputeFstat.h:247
COMPLEX8 * FaPerDet[PULSAR_MAX_DETECTORS]
If whatWasComputed & FSTATQ_PARTS_PER_DET is true, the and values computed at numFreqBins frequenci...
Definition: ComputeFstat.h:278
UINT4 numFreqBins
Number of frequencies at which the were computed.
Definition: ComputeFstat.h:216
REAL4 * twoF
If whatWasComputed & FSTATQ_2F is true, the multi-detector values computed at numFreqBins frequencie...
Definition: ComputeFstat.h:241
COMPLEX8 * Fa
If whatWasComputed & FSTATQ_PARTS is true, the multi-detector and computed at numFreqBins frequenci...
Definition: ComputeFstat.h:258
COMPLEX8 * FbPerDet[PULSAR_MAX_DETECTORS]
Definition: ComputeFstat.h:279
PulsarDopplerParams doppler
Doppler parameters, including the starting frequency, at which the were computed.
Definition: ComputeFstat.h:205
Generic F-stat timing coefficients (times in seconds) [see https://dcc.ligo.org/LIGO-T1600531-v4 for ...
Definition: ComputeFstat.h:307
Struct to carry the -statistic method-specific timing model in terms of a list of variable names and...
Definition: ComputeFstat.h:326
A vector of 'timestamps' of type LIGOTimeGPS.
Definition: SFTfileIO.h:188
REAL8 deltaT
'length' of each timestamp (e.g.
Definition: SFTfileIO.h:194
LIGOTimeGPS * data
array of timestamps
Definition: SFTfileIO.h:193
UINT4 length
number of timestamps
Definition: SFTfileIO.h:192
Multi-IFO container for antenna-pattern coefficients and atenna-pattern matrix .
Definition: LALComputeAM.h:137
AMCoeffs ** data
noise-weighted AM-coeffs , and
Definition: LALComputeAM.h:139
AntennaPatternMatrix Mmunu
antenna-pattern matrix
Definition: LALComputeAM.h:140
Multi-IFO container for COMPLEX8 resampled timeseries.
Definition: LFTandTSutils.h:52
COMPLEX8TimeSeries ** data
array of COMPLEX8TimeSeries (pointers)
Definition: LFTandTSutils.h:54
UINT4 length
number of IFOs
Definition: LFTandTSutils.h:53
LIGOTimeGPSVector ** data
timestamps vector for each ifo
Definition: SFTfileIO.h:203
A collection of SFT vectors – one for each IFO in a multi-IFO search.
Definition: SFTfileIO.h:179
Multi-IFO container for SSB timings.
Definition: SSBtimes.h:67
SSBtimes ** data
array of SSBtimes (pointers)
Definition: SSBtimes.h:69
Type containing the 'Doppler-parameters' affecting the time-evolution of the phase.
REAL8 period
Binary: orbital period (sec)
LIGOTimeGPS tp
Binary: time of observed periapsis passage (in SSB)
PulsarSpins fkdot
Intrinsic spins: [Freq, f1dot, f2dot, ...
REAL8 Delta
Sky position: DEC (latitude) in equatorial coords and radians.
LIGOTimeGPS refTime
Reference time of pulsar parameters (in SSB!)
REAL8 Alpha
Sky position: RA (longitude) in equatorial coords and radians.
REAL8 ecc
Binary: orbital eccentricity.
REAL8 asini
Binary: projected, normalized orbital semi-major axis (s).
REAL8 argp
Binary: argument of periapsis (radians)
REAL4 * data
REAL8 * data
MultiCOMPLEX8TimeSeries * hostCopy4ExtractTS_SRC_b
AntennaPatternMatrix MmunuX[PULSAR_MAX_DETECTORS]
PulsarDopplerParams prev_doppler
MultiCOMPLEX8TimeSeries * hostCopy4ExtractTS_SRC_a
MultiCOMPLEX8TimeSeries * multiTimeSeries_SRC_b
MultiCOMPLEX8TimeSeries * multiTimeSeries_SRC_a
FstatTimingGeneric timingGeneric
MultiCOMPLEX8TimeSeries * multiTimeSeries_DET
Simple container for two REAL8-vectors, namely the SSB-timings DeltaT_alpha and Tdot_alpha,...
Definition: SSBtimes.h:60
REAL8Vector * Tdot
dT/dt : time-derivative of SSB-time wrt local time for SFT-alpha
Definition: SSBtimes.h:63
REAL8Vector * DeltaT
Time-difference of SFT-alpha - tau0 in SSB-frame.
Definition: SSBtimes.h:62
LIGOTimeGPS refTime
reference-time 'tau0'
Definition: SSBtimes.h:61
REAL8 longitude
REAL8 latitude
CoordinateSystem system
#define max(a, b)
enum @1 epoch