LALPulsar  6.1.0.1-fe68b98
ComputeFstat_Demod_ComputeFaFb.c
Go to the documentation of this file.
1 //
2 // Copyright (C) 2012--2015 Karl Wette
3 // Copyright (C) 2005--2007, 2009, 2010, 2012, 2014 Reinhard Prix
4 // Copyright (C) 2007--2010, 2012 Bernd Machenschalk
5 // Copyright (C) 2007 Chris Messenger
6 // Copyright (C) 2006 John T. Whelan, Badri Krishnan
7 //
8 // This program is free software; you can redistribute it and/or modify
9 // it under the terms of the GNU General Public License as published by
10 // the Free Software Foundation; either version 2 of the License, or
11 // (at your option) any later version.
12 //
13 // This program is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 // GNU General Public License for more details.
17 //
18 // You should have received a copy of the GNU General Public License
19 // along with with program; see the file COPYING. If not, write to the
20 // Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
21 // MA 02110-1301 USA
22 //
23 
24 // this function definition 'template' requires 2 macros to be set:
25 // FUNC: the function name
26 // HOTLOOP_SOURCE: the filename to be included containing the hotloop source
27 
28 int FUNC( COMPLEX8 *Fa, COMPLEX8 *Fb, FstatAtomVector **FstatAtoms, const SFTVector *sfts,
29  const PulsarSpins fkdot, const SSBtimes *tSSB, const AMCoeffs *amcoe, const UINT4 Dterms );
30 
31 // ComputeFaFb: DTERMS define used for loop unrolling in some hotloop variants
32 #define DTERMS 8
33 #define LD_SMALL4 (2.0e-4) /* "small" number for REAL4*/
34 #define OOTWOPI (1.0 / LAL_TWOPI) /* 1/2pi */
35 #define TWOPI_FLOAT 6.28318530717958f /* single-precision 2*pi */
36 #define OOTWOPI_FLOAT (1.0f / TWOPI_FLOAT) /* single-precision 1 / (2pi) */
37 
38 // somehow the branch prediction of gcc-4.1.2 terribly fails
39 // So let's give gcc a hint which path has a higher probablility
40 #ifdef __GNUC__
41 #define likely(x) __builtin_expect((x),1)
42 #else
43 #define likely(x) (x)
44 #endif
45 
46 // Revamped version of LALDemod() (based on TestLALDemod() in CFS).
47 // Compute JKS's Fa and Fb, which are ingredients for calculating the F-statistic.
48 int
49 FUNC( COMPLEX8 *Fa, /* [out] Fa returned */
50  COMPLEX8 *Fb, /* [out] Fb returned */
51  FstatAtomVector **FstatAtoms, /* [in,out] if !NULL: return Fstat atoms vector */
52  const SFTVector *sfts, /* [in] input SFTs */
53  const PulsarSpins fkdot, /* [in] frequency and derivatives fkdot = d^kf/dt^k */
54  const SSBtimes *tSSB, /* [in] SSB timing series for particular sky-direction */
55  const AMCoeffs *amcoe, /* [in] antenna-pattern coefficients for this sky-direction */
56  const UINT4 Dterms /* [in] Dterms to keep in Dirichlet kernel */
57  )
58 {
59 
60  /* ----- check validity of input */
61  if ( !Fa || !Fb ) {
62  XLALPrintError( "\nOutput-pointer is NULL !\n\n" );
64  }
65 
66  if ( !sfts || !sfts->data ) {
67  XLALPrintError( "\nInput SFTs are NULL!\n\n" );
69  }
70 
71  if ( !tSSB || !tSSB->DeltaT || !tSSB->Tdot || !amcoe || !amcoe->a || !amcoe->b ) {
72  XLALPrintError( "\nIllegal NULL in input !\n\n" );
74  }
75 
77  XLALPrintError( "\nInverse factorials table only up to order s=%d, can't handle %d spin-order\n\n",
80  }
81 
82  UINT4 alpha; /* loop index over SFTs */
83  UINT4 spdnOrder; /* maximal spindown-orders */
84  UINT4 numSFTs; /* number of SFTs (M in the Notes) */
85  REAL8 Tsft; /* length of SFTs in seconds */
86  INT4 freqIndex0; /* index of first frequency-bin in SFTs */
87  INT4 freqIndex1; /* index of last frequency-bin in SFTs */
88 
89  REAL4 *a_al, *b_al; /* pointer to alpha-arrays over a and b */
90  REAL8 *DeltaT_al, *Tdot_al; /* pointer to alpha-arrays of SSB-timings */
91  SFTtype *SFT_al; /* SFT alpha */
92 
93  /* ----- prepare convenience variables */
94  numSFTs = sfts->length;
95  Tsft = 1.0 / sfts->data[0].deltaF;
96  {
97  REAL8 dFreq = sfts->data[0].deltaF;
98  freqIndex0 = ( UINT4 )( sfts->data[0].f0 / dFreq + 0.5 ); /* lowest freqency-index */
99  freqIndex1 = freqIndex0 + sfts->data[0].data->length;
100  }
101 
102  // locally initialize sin/cos lookuptable, as some hotloops use that directly
103  static int firstcall = 1;
104  if ( firstcall ) {
106  firstcall = 0;
107  }
108 
109  /* ----- prepare return of 'FstatAtoms' if requested */
110  if ( FstatAtoms != NULL ) {
111  XLAL_CHECK( ( *FstatAtoms == NULL ), XLAL_EINVAL );
112  XLAL_CHECK( ( ( *FstatAtoms ) = XLALCreateFstatAtomVector( numSFTs ) ) != NULL, XLAL_EFUNC );
113 
114  ( *FstatAtoms )->TAtom = Tsft; /* time-baseline of returned atoms is Tsft */
115 
116  } /* if returnAtoms */
117 
118  // ----- find highest non-zero spindown-entry ----------
119  for ( spdnOrder = PULSAR_MAX_SPINS - 1; spdnOrder > 0 ; spdnOrder -- )
120  if ( fkdot[spdnOrder] != 0.0 ) {
121  break;
122  }
123 
124  ( *Fa ) = 0.0f;
125  ( *Fb ) = 0.0f;
126 
127  a_al = amcoe->a->data; /* point to beginning of alpha-arrays */
128  b_al = amcoe->b->data;
129  DeltaT_al = tSSB->DeltaT->data;
130  Tdot_al = tSSB->Tdot->data;
131  SFT_al = sfts->data;
132 
133  /* Loop over all SFTs */
134  for ( alpha = 0; alpha < numSFTs; alpha++ ) {
135  REAL4 a_alpha, b_alpha;
136 
137  INT4 kstar; /* central frequency-bin k* = round(xhat_alpha) */
138  INT4 k0, k1;
139 
140  COMPLEX8 *Xalpha = SFT_al->data->data; /* pointer to current SFT-data */
141  REAL4 realQ, imagQ; /* Re and Im of Q = e^{i 2 pi lambda_alpha} */
142  REAL4 realXP, imagXP; /* Re/Im of sum_k X_ak * P_ak */
143  REAL4 realQXP, imagQXP; /* Re/Im of Q_alpha R_alpha */
144 
145  REAL8 lambda_alpha, kappa_star;
146 
147  /* ----- calculate lambda_alpha */
148  {
149  UINT4 s; /* loop-index over spindown-order */
150  REAL8 phi_alpha, Dphi_alpha, DT_al;
151  REAL8 Tas; /* temporary variable to calculate (DeltaT_alpha)^s */
152  REAL8 TAS_invfact_s;
153 
154  /* init for s=0 */
155  phi_alpha = 0.0;
156  Dphi_alpha = 0.0;
157  DT_al = ( *DeltaT_al );
158  Tas = 1.0; /* DeltaT_alpha ^ 0 */
159  TAS_invfact_s = 1.0; /* TAS / s! */
160 
161  for ( s = 0; s <= spdnOrder; s++ ) {
162  REAL8 fsdot = fkdot[s];
163  Dphi_alpha += fsdot * TAS_invfact_s; /* here: DT^s/s! */
164  Tas *= DT_al; /* now: DT^(s+1) */
165  TAS_invfact_s = Tas * LAL_FACT_INV[s + 1];
166  phi_alpha += fsdot * TAS_invfact_s;
167  } /* for s <= spdnOrder */
168 
169  /* Step 3: apply global factors to complete Dphi_alpha */
170  Dphi_alpha *= Tsft * ( *Tdot_al ); /* guaranteed > 0 ! */
171  lambda_alpha = 0.5 * Dphi_alpha - phi_alpha;
172 
173  kstar = ( INT4 )( Dphi_alpha ); /* k* = floor(Dphi_alpha) for positive Dphi */
174  kappa_star = Dphi_alpha - 1.0 * kstar; /* remainder of Dphi_alpha: >= 0 ! */
175 
176  /* ----- check that required frequency-bins are found in the SFTs ----- */
177  k0 = kstar - Dterms + 1;
178  k1 = kstar + Dterms;
179 
180  if ( ( k0 < freqIndex0 ) || ( k1 > freqIndex1 ) ) {
181  XLAL_ERROR( XLAL_EDOM, "Required frequency-bins [%d, %d] not covered by SFT-interval [%d, %d]\n"
182  "\t\t[Parameters: alpha:%d, Dphi_alpha:%e, Tsft:%e, *Tdot_al:%e]\n",
183  k0, k1, freqIndex0, freqIndex1, alpha, Dphi_alpha, Tsft, *Tdot_al );
184  }
185 
186  } /* compute kappa_star, lambda_alpha */
187 
188  /* ---------- calculate the (truncated to Dterms) sum over k ---------- */
189 
190  /* ---------- ATTENTION: this the "hot-loop", which will be
191  * executed many millions of times, so anything in here
192  * has a HUGE impact on the whole performance of the code.
193  *
194  * DON'T touch *anything* in here unless you really know
195  * what you're doing !!
196  *------------------------------------------------------------
197  */
198  {
199  // ----- start: hotloop ----------
200  COMPLEX8 *Xalpha_l = Xalpha + k0 - freqIndex0; /* first frequency-bin in sum */
201 
202  /* somehow the branch prediction of gcc-4.1.2 terribly failes
203  with the current case distinction in the hot-loop,
204  having a severe impact on runtime of the E@H Linux App.
205  So let's allow to give gcc a hint which path has a higher probablility */
206  if ( likely( ( kappa_star > LD_SMALL4 ) && ( kappa_star < 1.0 - LD_SMALL4 ) ) ) {
207  /* if |remainder| > LD_SMALL4, ie no danger of denominator -> 0 */
208 
209 #include HOTLOOP_SOURCE
210 
211  } /* if */
212  else {
213  /* otherwise: lim_{rem->0}P_alpha,k = 2pi delta_{k,kstar} */
214  UINT4 ind0;
215 
216  /* real- and imaginary part of e^{i 2 pi lambda_alpha } */
217  XLALSinCos2PiLUT( &imagQ, &realQ, lambda_alpha );
218 
219  if ( kappa_star <= LD_SMALL4 ) {
220  ind0 = Dterms - 1;
221  } else {
222  ind0 = Dterms;
223  }
224 
225  realXP = TWOPI_FLOAT * crealf( Xalpha_l[ind0] );
226  imagXP = TWOPI_FLOAT * cimagf( Xalpha_l[ind0] );
227 
228  } /* if |remainder| <= LD_SMALL4 */
229 
230  } // ----- end: hotloop ----------
231 
232  /* real- and imaginary part of e^{i 2 pi lambda_alpha } */
233  realQXP = realQ * realXP - imagQ * imagXP;
234  imagQXP = realQ * imagXP + imagQ * realXP;
235 
236  /* we're done: ==> combine these into Fa and Fb */
237  a_alpha = ( *a_al );
238  b_alpha = ( *b_al );
239 
240  COMPLEX8 Fa_alpha = crect( a_alpha * realQXP, a_alpha * imagQXP );
241  ( *Fa ) += Fa_alpha;
242 
243  COMPLEX8 Fb_alpha = crect( b_alpha * realQXP, b_alpha * imagQXP );
244  ( *Fb ) += Fb_alpha;
245 
246  /* store per-SFT F-stat 'atoms' for transient-CW search */
247  if ( FstatAtoms != NULL ) {
248  ( *FstatAtoms )->data[alpha].timestamp = SFT_al->epoch.gpsSeconds;
249  ( *FstatAtoms )->data[alpha].a2_alpha = a_alpha * a_alpha;
250  ( *FstatAtoms )->data[alpha].b2_alpha = b_alpha * b_alpha;
251  ( *FstatAtoms )->data[alpha].ab_alpha = a_alpha * b_alpha;
252  ( *FstatAtoms )->data[alpha].Fa_alpha = OOTWOPI * Fa_alpha;
253  ( *FstatAtoms )->data[alpha].Fb_alpha = OOTWOPI * Fb_alpha;
254  }
255 
256  /* advance pointers over alpha */
257  a_al ++;
258  b_al ++;
259  DeltaT_al ++;
260  Tdot_al ++;
261  SFT_al ++;
262 
263  } /* for alpha < numSFTs */
264 
265  /* return result */
266  ( *Fa ) *= OOTWOPI;
267  ( *Fb ) *= OOTWOPI;
268 
269  return XLAL_SUCCESS;
270 
271 } // FUNC()
#define likely(x)
#define TWOPI_FLOAT
int FUNC(COMPLEX8 *Fa, COMPLEX8 *Fb, FstatAtomVector **FstatAtoms, const SFTVector *sfts, const PulsarSpins fkdot, const SSBtimes *tSSB, const AMCoeffs *amcoe, const UINT4 Dterms)
int s
coeffs k0
coeffs k1
FstatAtomVector * XLALCreateFstatAtomVector(const UINT4 length)
Create a FstatAtomVector of the given length.
Definition: ComputeFstat.c:276
static const REAL8 LAL_FACT_INV[]
#define LAL_FACT_MAX
#define crect(re, im)
double REAL8
uint32_t UINT4
float complex COMPLEX8
int32_t INT4
float REAL4
REAL8 PulsarSpins[PULSAR_MAX_SPINS]
Typedef for fixed-size array holding GW frequency and derivatives fk = d^k Freq/dt^k|(tau_ref)
#define PULSAR_MAX_SPINS
maximal number of spin-parameters (Freq + spindowns) we can handle
int XLALSinCos2PiLUT(REAL4 *sin2pix, REAL4 *cos2pix, REAL8 x)
Calculate sin(2*pi*x) and cos(2*pi*x) to roughly 1e-7 precision using a lookup-table and Taylor-expan...
Definition: SinCosLUT.c:97
void XLALSinCosLUTInit(void)
Definition: SinCosLUT.c:49
#define XLAL_ERROR(...)
#define XLAL_CHECK(assertion,...)
int XLALPrintError(const char *fmt,...) _LAL_GCC_PRINTF_FORMAT_(1
XLAL_SUCCESS
XLAL_EFUNC
XLAL_EDOM
XLAL_EINVAL
double alpha
This structure contains the per-SFT (weighted) antenna-pattern functions , with the SFT-index,...
Definition: LALComputeAM.h:63
REAL4Vector * b
(weighted) per-SFT antenna-pattern function
Definition: LALComputeAM.h:65
REAL4Vector * a
(weighted) per-SFT antenna-pattern function
Definition: LALComputeAM.h:64
COMPLEX8Sequence * data
A vector of COMPLEX8FrequencySeries.
COMPLEX8FrequencySeries * data
Pointer to the data array.
UINT4 length
Number of elements in array.
COMPLEX8 * data
A vector of -statistic 'atoms', i.e.
Definition: ComputeFstat.h:174
REAL4 * data
REAL8 * data
Simple container for two REAL8-vectors, namely the SSB-timings DeltaT_alpha and Tdot_alpha,...
Definition: SSBtimes.h:60
REAL8Vector * Tdot
dT/dt : time-derivative of SSB-time wrt local time for SFT-alpha
Definition: SSBtimes.h:63
REAL8Vector * DeltaT
Time-difference of SFT-alpha - tau0 in SSB-frame.
Definition: SSBtimes.h:62