Loading [MathJax]/extensions/TeX/AMSsymbols.js
LALPulsar 7.1.1.1-b246709
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
ppe_utils.c
Go to the documentation of this file.
1/*
2* Copyright (C) 2014 Matthew Pitkin
3*
4* This program is free software; you can redistribute it and/or modify
5* it under the terms of the GNU General Public License as published by
6* the Free Software Foundation; either version 2 of the License, or
7* (at your option) any later version.
8*
9* This program is distributed in the hope that it will be useful,
10* but WITHOUT ANY WARRANTY; without even the implied warranty of
11* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12* GNU General Public License for more details.
13*
14* You should have received a copy of the GNU General Public License
15* along with with program; see the file COPYING. If not, write to the
16* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17* MA 02110-1301 USA
18*/
19
20/******************************************************************************/
21/* HELPER FUNCTIONS */
22/******************************************************************************/
23
24#include "config.h"
25#include "ppe_utils.h"
26#include "ppe_models.h"
27
28/** \brief Compute the noise variance for each data segment
29 *
30 * Once the data has been split into segments calculate the noise variance (using
31 * both the real and imaginary parts) in each segment and fill in the associated
32 * noise vector. To calculate the noise the running median should first be
33 * subtracted from the data.
34 *
35 * \param data [in] the LALInferenceIFOData variable
36 * \param model [in] the LALInferenceIFOModel variable
37 */
39{
40 REAL8 chunkLength = 0.;
41
42 INT4 i = 0, j = 0, length = 0, cl = 0, counter = 0;
43
44 COMPLEX16Vector *meddata = NULL; /* data with running median removed */
45
46 /* subtract a running median value from the data to remove any underlying
47 trends (e.g. caused by a strong signal) */
48 meddata = subtract_running_median( data->compTimeData->data );
49
50 UINT4Vector *chunkLengths = NULL;
51 chunkLengths = *( UINT4Vector ** )LALInferenceGetVariable( model->params, "chunkLength" );
52
53 length = data->compTimeData->data->length;
54
55 for ( i = 0, counter = 0; i < length; i += chunkLength, counter++ ) {
56 REAL8 vari = 0., meani = 0.;
57
58 chunkLength = ( REAL8 )chunkLengths->data[counter];
59 cl = i + ( INT4 )chunkLength;
60
61 /* get the mean (should be close to zero given the running median subtraction), but
62 * probably worth doing anyway) */
63 for ( j = i ; j < cl ; j++ ) {
64 meani += ( creal( meddata->data[j] ) + cimag( meddata->data[j] ) );
65 }
66
67 meani /= ( 2.*chunkLength );
68
69 for ( j = i ; j < cl ; j++ ) {
70 vari += SQUARE( ( creal( meddata->data[j] ) - meani ) );
71 vari += SQUARE( ( cimag( meddata->data[j] ) - meani ) );
72 }
73
74 vari /= ( 2.*chunkLength - 1. ); /* unbiased sample variance */
75
76 /* fill in variance vector */
77 for ( j = i ; j < cl ; j++ ) {
78 data->varTimeData->data->data[j] = vari;
79 }
80 }
81}
82
83
84/**
85 * \brief Split the data into segments
86 *
87 * This function is deprecated to \c chop_n_merge, but gives the functionality of the old code.
88 *
89 * It cuts the data into as many contiguous segments of data as possible of length \c chunkMax. Where contiguous is
90 * defined as containing consecutive point within 180 seconds of each other. The length of segments that do not fit into
91 * a \c chunkMax length are also included.
92 *
93 * \param ifo [in] the LALInferenceIFOModel variable
94 * \param chunkMax [in] the maximum length of a data chunk/segment
95 *
96 * \return A vector of chunk/segment lengths
97 */
99{
100 UINT4 i = 0, j = 0, count = 0;
101 UINT4 length;
102
103 REAL8 t1, t2;
104
105 UINT4Vector *chunkLengths = NULL;
106
107 length = IFO_XTRA_DATA( ifo )->times->length;
108
109 chunkLengths = XLALCreateUINT4Vector( length );
110
111 REAL8 dt = *( REAL8 * )LALInferenceGetVariable( ifo->params, "dt" );
112
113 /* create vector of data segment length */
114 while ( 1 ) {
115 count++; /* counter */
116
117 /* break clause */
118 if ( i > length - 2 ) {
119 /* set final value of chunkLength */
120 chunkLengths->data[j] = count;
121 j++;
122 break;
123 }
124
125 i++;
126
127 t1 = XLALGPSGetREAL8( &IFO_XTRA_DATA( ifo )->times->data[i - 1] );
128 t2 = XLALGPSGetREAL8( &IFO_XTRA_DATA( ifo )->times->data[i] );
129
130 /* if consecutive points are within two sample times of each other count as in the same chunk */
131 if ( t2 - t1 > 2.*dt || count == chunkMax ) {
132 chunkLengths->data[j] = count;
133 count = 0; /* reset counter */
134
135 j++;
136 }
137 }
138
139 chunkLengths = XLALResizeUINT4Vector( chunkLengths, j );
140
141 return chunkLengths;
142}
143
144
145/* function to use change point analysis to chop up and remerge the data to find stationary chunks (i.e. lengths of data
146 * which look like they have the same statistics e.g. the same standard deviation) */
147/**
148 * \brief Chops and remerges data into stationary segments
149 *
150 * This function finds segments of data that appear to be stationary (have the same standard deviation).
151 *
152 * The function first attempts to chop up the data into as many stationary segments as possible. The splitting may not
153 * be optimal, so it then tries remerging consecutive segments to see if the merged segments show more evidence of
154 * stationarity. <b>[NOTE: Remerging is currently turned off and will make very little difference to the algorithm]</b>.
155 * It then, if necessary, chops the segments again to make sure there are none greater than the required \c chunkMax.
156 * The default \c chunkMax is 0, so this rechopping will not normally happen.
157 *
158 * This is all performed on data that has had a running median subtracted, to try and removed any underlying trends in
159 * the data (e.g. those caused by a strong signal), which might affect the calculations (which assume the data is
160 * Gaussian with zero mean).
161 *
162 * If the \c outputchunks is non-zero then a list of the segments will be output to a file called \c data_segment_list.txt,
163 * with a prefix of the detector name.
164 *
165 * \param data [in] A data structure
166 * \param chunkMin [in] The minimum length of a segment
167 * \param chunkMax [in] The maximum length of a segment
168 * \param outputchunks [in] A flag to check whether to output the segments
169 *
170 * \return A vector of segment/chunk lengths
171 *
172 * \sa subtract_running_median
173 * \sa chop_data
174 * \sa merge_data
175 * \sa rechop_data
176 */
177UINT4Vector *chop_n_merge( LALInferenceIFOData *data, UINT4 chunkMin, UINT4 chunkMax, UINT4 outputchunks )
178{
179 UINT4 j = 0;
180
181 UINT4Vector *chunkLengths = NULL;
182 UINT4Vector *chunkIndex = NULL;
183
184 COMPLEX16Vector *meddata = NULL;
185
186 /* subtract a running median value from the data to remove any underlying trends (e.g. caused by a string signal) that
187 * might affect the chunk calculations (which can assume the data is Gaussian with zero mean). */
188 meddata = subtract_running_median( data->compTimeData->data );
189
190 /* pass chop data a gsl_vector_view, so that internally it can use vector views rather than having to create new vectors */
191 gsl_vector_complex_view meddatagsl = gsl_vector_complex_view_array( ( double * )meddata->data, meddata->length );
192 chunkIndex = chop_data( &meddatagsl.vector, chunkMin );
193
194 /* DON'T BOTHER WITH THE MERGING AS IT WILL MAKE VERY LITTLE DIFFERENCE */
195 /* merge_data( meddata, &chunkIndex ); */
196
197 XLALDestroyCOMPLEX16Vector( meddata ); /* free memory */
198
199 /* if a maximum chunk length is defined then rechop up the data, to segment any chunks longer than this value */
200 if ( chunkMax > chunkMin ) {
201 rechop_data( &chunkIndex, chunkMax, chunkMin );
202 }
203
204 chunkLengths = XLALCreateUINT4Vector( chunkIndex->length );
205
206 /* go through segments and turn into vector of chunk lengths */
207 for ( j = 0; j < chunkIndex->length; j++ ) {
208 if ( j == 0 ) {
209 chunkLengths->data[j] = chunkIndex->data[j];
210 } else {
211 chunkLengths->data[j] = chunkIndex->data[j] - chunkIndex->data[j - 1];
212 }
213 }
214
215 /* if required output the chunk end indices and lengths to a file */
216 if ( outputchunks ) {
217 FILE *fpsegs = NULL;
218
219 /* set detector name as prefix */
220 CHAR *outfile = NULL;
221 outfile = XLALStringDuplicate( data->detector->frDetector.prefix );
222 outfile = XLALStringAppend( outfile, "data_segment_list.txt" );
223
224 /* check if file exists, i.e. given mutliple frequency harmonics, and if so open for appending */
225 FILE *fpcheck = NULL;
226 if ( ( fpcheck = fopen( outfile, "r" ) ) != NULL ) {
227 fclose( fpcheck );
228 if ( ( fpsegs = fopen( outfile, "a" ) ) == NULL ) {
229 fprintf( stderr, "Non-fatal error open file to output segment list.\n" );
230 return chunkLengths;
231 }
232 } else {
233 if ( ( fpsegs = fopen( outfile, "w" ) ) == NULL ) {
234 fprintf( stderr, "Non-fatal error open file to output segment list.\n" );
235 return chunkLengths;
236 }
237 }
238 XLALFree( outfile );
239
240 for ( j = 0; j < chunkIndex->length; j++ ) {
241 fprintf( fpsegs, "%u\t%u\n", chunkIndex->data[j], chunkLengths->data[j] );
242 }
243
244 /* add space at the end so that you can separate lists from different detector data streams */
245 fprintf( fpsegs, "\n" );
246
247 fclose( fpsegs );
248 }
249
250 XLALDestroyUINT4Vector( chunkIndex );
251
252 return chunkLengths;
253}
254
255
256/**
257 * \brief Subtract the running median from complex data
258 *
259 * This function uses \c gsl_stats_median_from_sorted_data to subtract a running median, calculated from the 30
260 * consecutive point around a set point, from the data. At the start of the data running median is calculated from
261 * 30-15+(i-1) points, and at the end it is calculated from 15+(N-i) points, where i is the point index and N is the
262 * total number of data points.
263 *
264 * \param data [in] A complex data vector
265 *
266 * \return A complex vector containing data with the running median removed
267 */
269{
270 COMPLEX16Vector *submed = NULL;
271 UINT4 length = data->length, i = 0, j = 0, n = 0;
272 UINT4 mrange = 0;
273 UINT4 N = 0;
274 INT4 sidx = 0;
275
276 if ( length > 30 ) {
277 mrange = 30; /* perform running median with 30 data points */
278 } else {
279 mrange = 2 * floor( ( length - 1 ) / 2 ); /* an even number less than length */
280 }
281 N = ( UINT4 )floor( mrange / 2 );
282
283 submed = XLALCreateCOMPLEX16Vector( length );
284
285 for ( i = 1; i < length + 1; i++ ) {
286 REAL8 *dre = NULL;
287 REAL8 *dim = NULL;
288
289 /* get median of data within RANGE */
290 if ( i < N ) {
291 n = N + i;
292 sidx = 0;
293 } else {
294 if ( i > length - N ) {
295 n = length - i + N;
296 } else {
297 n = mrange;
298 }
299
300 sidx = i - N;
301 }
302
303 dre = XLALCalloc( n, sizeof( REAL8 ) );
304 dim = XLALCalloc( n, sizeof( REAL8 ) );
305
306 for ( j = 0; j < n; j++ ) {
307 dre[j] = creal( data->data[j + sidx] );
308 dim[j] = cimag( data->data[j + sidx] );
309 }
310
311 /* sort data */
312 gsl_sort( dre, 1, n );
313 gsl_sort( dim, 1, n );
314
315 /* get median and subtract from data*/
316 submed->data[i - 1] = ( creal( data->data[i - 1] ) - gsl_stats_median_from_sorted_data( dre, 1, n ) )
317 + I * ( cimag( data->data[i - 1] ) - gsl_stats_median_from_sorted_data( dim, 1, n ) );
318
319 XLALFree( dre );
320 XLALFree( dim );
321 }
322
323 return submed;
324}
325
326
327/**
328 * \brief Chops the data into stationary segments based on Bayesian change point analysis
329 *
330 * This function splits data into two (and recursively runs on those two segments) if it is found that the odds ratio
331 * for them being from two independent Gaussian distributions is greater than a certain threshold.
332 *
333 * The threshold for the natural logarithm of the odds ratio is empirically set to be
334 * \f[
335 * T = 4.07 + 1.33\log{}_{10}{N},
336 * \f]
337 * where \f$ N \f$ is the length in samples of the dataset. This is based on Monte Carlo simulations of
338 * many realisations of Gaussian noise for data of different lengths. The threshold comes from a linear
339 * fit to the log odds ratios required to give a 1% chance of splitting Gaussian data (drawn from a single
340 * distribution) for data of various lengths. Note, however, that this relation is not good for stretches of data
341 * with lengths of less than about 30 points, and in fact is rather consevative for such short stretches
342 * of data, i.e. such short stretches of data will require relatively larger odds ratios for splitting than
343 * longer stretches.
344 *
345 * \param data [in] A complex data vector
346 * \param chunkMin [in] The minimum allowed segment length
347 *
348 * \return A vector of segment lengths
349 *
350 * \sa find_change_point
351 */
352UINT4Vector *chop_data( gsl_vector_complex *data, UINT4 chunkMin )
353{
354 UINT4Vector *chunkIndex = NULL;
355
356 UINT4 length = ( UINT4 )data->size;
357
358 REAL8 logodds = 0.;
359 UINT4 changepoint = 0;
360
361 REAL8 threshold = 0.; /* may need tuning or setting globally */
362
363 chunkIndex = XLALCreateUINT4Vector( 1 );
364
365 changepoint = find_change_point( data, &logodds, chunkMin );
366
367 /* threshold scaling for a 0.5% false alarm probability of splitting Gaussian data */
368 threshold = 4.07 + 1.33 * log10( ( REAL8 )length );
369
370 if ( logodds > threshold ) {
371 UINT4Vector *cp1 = NULL;
372 UINT4Vector *cp2 = NULL;
373
374 gsl_vector_complex_view data1 = gsl_vector_complex_subvector( data, 0, changepoint );
375 gsl_vector_complex_view data2 = gsl_vector_complex_subvector( data, changepoint, length - changepoint );
376
377 UINT4 i = 0, l = 0;
378
379 cp1 = chop_data( &data1.vector, chunkMin );
380 cp2 = chop_data( &data2.vector, chunkMin );
381
382 l = cp1->length + cp2->length;
383
384 chunkIndex = XLALResizeUINT4Vector( chunkIndex, l );
385
386 /* combine new chunks */
387 for ( i = 0; i < cp1->length; i++ ) {
388 chunkIndex->data[i] = cp1->data[i];
389 }
390 for ( i = 0; i < cp2->length; i++ ) {
391 chunkIndex->data[i + cp1->length] = cp2->data[i] + changepoint;
392 }
393
396 } else {
397 chunkIndex->data[0] = length;
398 }
399
400 return chunkIndex;
401}
402
403
404/**
405 * \brief Find a change point in complex data
406 *
407 * This function is based in the Bayesian Blocks algorithm of \cite Scargle1998 that finds "change points" in data -
408 * points at which the statistics of the data change. It is based on calculating evidence, or odds, ratios. The
409 * function first computes the marginal likelihood (or evidence) that the whole of the data is described by a single
410 * Gaussian (with mean of zero). This comes from taking a Gaussian likelihood function and analytically marginalising
411 * over the standard deviation (using a prior on the standard deviation of \f$ 1/\sigma \f$ ), giving (see
412 * [\cite DupuisWoan2005]) a Students-t distribution (see
413 * <a href="https://wiki.ligo.org/foswiki/pub/CW/PulsarParameterEstimationNestedSampling/studentst.pdf">here</a>).
414 * Following this the data is split into two segments (with lengths greater than, or equal to the minimum chunk length)
415 * for all possible combinations, and the joint evidence for each of the two segments consisting of independent
416 * Gaussian (basically multiplying the above equation calculated for each segment separately) is calculated and the
417 * split point recorded. However, the value required for comparing to that for the whole data set, to give the odds
418 * ratio, is the evidence that having any split is better than having no split, so the individual split data evidences
419 * need to be added incoherently to give the total evidence for a split. The index at which the evidence for a single
420 * split is maximum (i.e. the most favoured split point) is that which is returned.
421 *
422 * \param data [in] a complex data vector
423 * \param logodds [in] a pointer to return the natural logarithm of the odds ratio/Bayes factor
424 * \param minlength [in] the minimum chunk length
425 *
426 * \return The position of the change point
427 */
428UINT4 find_change_point( gsl_vector_complex *data, REAL8 *logodds, UINT4 minlength )
429{
430 UINT4 changepoint = 0, i = 0;
431 UINT4 length = ( UINT4 )data->size, lsum = 0;
432
433 REAL8 datasum = 0.;
434
435 REAL8 logsingle = 0., logtot = -INFINITY;
436 REAL8 logdouble = 0., logdouble_min = -INFINITY;
437 REAL8 logratio = 0.;
438
439 REAL8 sumforward = 0., sumback = 0.;
440 gsl_complex dval;
441
442 /* check that data is at least twice the minimum length, if not return an odds ratio of zero (log odds = -inf [or close to that!]) */
443 if ( length < ( UINT4 )( 2 * minlength ) ) {
444 logratio = -INFINITY;
445 memcpy( logodds, &logratio, sizeof( REAL8 ) );
446 return 0;
447 }
448
449 /* calculate the sum of the data squared */
450 for ( i = 0; i < length; i++ ) {
451 dval = gsl_vector_complex_get( data, i );
452 datasum += SQUARE( gsl_complex_abs( dval ) );
453 }
454
455 /* calculate the evidence that the data consists of a Gaussian data with a single standard deviation */
456 logsingle = -LAL_LN2 - ( REAL8 )length * LAL_LNPI + gsl_sf_lnfact( length - 1 ) - ( REAL8 )length * log( datasum );
457
458 lsum = length - 2 * minlength + 1;
459
460 for ( i = 0; i < length; i++ ) {
461 dval = gsl_vector_complex_get( data, i );
462 if ( i < minlength - 1 ) {
463 sumforward += SQUARE( gsl_complex_abs( dval ) );
464 } else {
465 sumback += SQUARE( gsl_complex_abs( dval ) );
466 }
467 }
468
469 /* go through each possible change point and calculate the evidence for the data consisting of two independent
470 * Gaussian's either side of the change point. Also calculate the total evidence for any change point.
471 * Don't allow single points, so start at the second data point. */
472 for ( i = 0; i < lsum; i++ ) {
473 UINT4 ln1 = i + minlength, ln2 = ( length - i - minlength );
474
475 REAL8 log_1 = 0., log_2 = 0.;
476
477 dval = gsl_vector_complex_get( data, ln1 - 1 );
478 REAL8 adval = SQUARE( gsl_complex_abs( dval ) );
479 sumforward += adval;
480 sumback -= adval;
481
482 /* get log evidences for the individual segments */
483 log_1 = -LAL_LN2 - ( REAL8 )ln1 * LAL_LNPI + gsl_sf_lnfact( ln1 - 1 ) - ( REAL8 )ln1 * log( sumforward );
484 log_2 = -LAL_LN2 - ( REAL8 )ln2 * LAL_LNPI + gsl_sf_lnfact( ln2 - 1 ) - ( REAL8 )ln2 * log( sumback );
485
486 /* get evidence for the two segments */
487 logdouble = log_1 + log_2;
488
489 /* add to total evidence for a change point */
490 logtot = LOGPLUS( logtot, logdouble );
491
492 /* find maximum value of logdouble and record that as the change point */
493 if ( logdouble > logdouble_min ) {
494 changepoint = ln1;
495 logdouble_min = logdouble;
496 }
497 }
498
499 /* get the log odds ratio of segmented versus non-segmented model */
500 logratio = logtot - logsingle;
501 memcpy( logodds, &logratio, sizeof( REAL8 ) );
502
503 return changepoint;
504}
505
506
507/**
508 * \brief Chop up the data into chunks smaller the the maximum allowed length
509 *
510 * This function chops any chunks that are greater than \c chunkMax into chunks smaller than, or equal to \c chunkMax,
511 * and greater than \c chunkMin. On some occasions this might result in a segment smaller than \c chunkMin, but these
512 * are ignored in the likelihood calculation anyway.
513 *
514 * \param chunkIndex [in] a vector of segment split positions
515 * \param chunkMax [in] the maximum allowed segment/chunk length
516 * \param chunkMin [in] the minimum allowed segment/chunk length
517 */
518void rechop_data( UINT4Vector **chunkIndex, UINT4 chunkMax, UINT4 chunkMin )
519{
520 UINT4 i = 0, j = 0, count = 0;
521 UINT4Vector *cip = *chunkIndex; /* pointer to chunkIndex */
522 UINT4 length = cip->length;
523 UINT4 endIndex = cip->data[length - 1];
524 UINT4 startindex = 0, chunklength = 0;
525 UINT4 newindex[( UINT4 )ceil( ( REAL8 )endIndex / ( REAL8 )chunkMin )];
526
527 /* chop any chunks that are greater than chunkMax into chunks smaller than, or equal to chunkMax, and greater than chunkMin */
528 for ( i = 0; i < length; i++ ) {
529 if ( i == 0 ) {
530 startindex = 0;
531 } else {
532 startindex = cip->data[i - 1];
533 }
534
535 chunklength = cip->data[i] - startindex;
536
537 if ( chunklength > chunkMax ) {
538 UINT4 remain = chunklength % chunkMax;
539
540 /* cut segment into as many chunkMax chunks as possible */
541 for ( j = 0; j < floor( chunklength / chunkMax ); j++ ) {
542 newindex[count] = startindex + ( j + 1 ) * chunkMax;
543 count++;
544 }
545
546 /* last chunk values */
547 if ( remain != 0 ) {
548 /* set final value */
549 newindex[count] = startindex + j * chunkMax + remain;
550
551 if ( remain < chunkMin ) {
552 /* split the last two cells into one that is chunkMin long and one that is (chunkMax+remainder)-chunkMin long
553 * - this may leave a cell shorter than chunkMin, but we'll have to live with that! */
554 UINT4 n1 = ( chunkMax + remain ) - chunkMin;
555
556 /* reset second to last value two values */
557 newindex[count - 1] = newindex[count] - chunkMin;
558
559 if ( n1 < chunkMin ) {
560 XLAL_PRINT_WARNING( "Non-fatal error... segment no. %d is %d long, which is less than chunkMin = %d.\n", count, n1, chunkMin );
561 }
562 }
563
564 count++;
565 }
566 } else {
567 newindex[count] = cip->data[i];
568 count++;
569 }
570 }
571
572 cip = XLALResizeUINT4Vector( cip, count );
573
574 for ( i = 0; i < count; i++ ) {
575 cip->data[i] = newindex[i];
576 }
577}
578
579
580/**
581 * \brief Merge adjacent segments
582 *
583 * This function will attempt to remerge adjacent segments if statistically favourable (as calculated by the odds
584 * ratio). For each pair of adjacent segments the joint likelihood of them being from two independent distributions is
585 * compared to the likelihood that combined they are from one distribution. If the likelihood is highest for the
586 * combined segments they are merged.
587 *
588 * \param data [in] A complex data vector
589 * \param segments [in] A vector of split segment indexes
590 */
592{
593 UINT4 j = 0;
594 REAL8 threshold = 0.; /* may need to be passed to function in the future, or defined globally */
595 UINT4Vector *segs = *segments;
596
597 /* loop until stopping criterion is reached */
598 while ( 1 ) {
599 UINT4 ncells = segs->length;
600
601 UINT4 mergepoint = 0;
602 REAL8 logodds = 0., minl = -LAL_REAL8_MAX;
603
604 for ( j = 1; j < ncells; j++ ) {
605 REAL8 summerged = 0., sum1 = 0., sum2 = 0.;
606 UINT4 i = 0, n1 = 0, n2 = 0, nm = 0;
607 UINT4 cellstarts1 = 0, cellends1 = 0, cellstarts2 = 0, cellends2 = 0;
608 REAL8 log_merged = 0., log_individual = 0.;
609
610 /* get the evidence for merged adjacent cells */
611 if ( j == 1 ) {
612 cellstarts1 = 0;
613 } else {
614 cellstarts1 = segs->data[j - 2];
615 }
616
617 cellends1 = segs->data[j - 1];
618
619 cellstarts2 = segs->data[j - 1];
620 cellends2 = segs->data[j];
621
622 n1 = cellends1 - cellstarts1;
623 n2 = cellends2 - cellstarts2;
624 nm = cellends2 - cellstarts1;
625
626 for ( i = cellstarts1; i < cellends1; i++ ) {
627 sum1 += SQUARE( cabs( data->data[i] ) );
628 }
629
630 for ( i = cellstarts2; i < cellends2; i++ ) {
631 sum2 += SQUARE( cabs( data->data[i] ) );
632 }
633
634 summerged = sum1 + sum2;
635
636 /* calculated evidences */
637 log_merged = -2 + gsl_sf_lnfact( nm - 1 ) - ( REAL8 )nm * log( summerged );
638
639 log_individual = -2 + gsl_sf_lnfact( n1 - 1 ) - ( REAL8 )n1 * log( sum1 );
640 log_individual += -2 + gsl_sf_lnfact( n2 - 1 ) - ( REAL8 )n2 * log( sum2 );
641
642 logodds = log_merged - log_individual;
643
644 if ( logodds > minl ) {
645 mergepoint = j - 1;
646 minl = logodds;
647 }
648 }
649
650 /* set break criterion */
651 if ( minl < threshold ) {
652 break;
653 } else { /* merge cells */
654 /* remove the cell end value between the two being merged and shift */
655 for ( UINT4 i = 0; i < ncells - ( mergepoint + 1 ); i++ ) {
656 segs->data[mergepoint + i] = segs->data[mergepoint + i + 1];
657 }
658
659 segs = XLALResizeUINT4Vector( segs, ncells - 1 );
660 }
661 }
662}
663
664
665/**
666 * \brief Counts the number of comma separated values in a string
667 *
668 * This function counts the number of comma separated values in a given input string.
669 *
670 * \param csvline [in] Any string
671 *
672 * \return The number of comma separated value in the input string
673 */
674INT4 count_csv( CHAR *csvline )
675{
676 CHAR *inputstr = NULL;
677 INT4 count = 0;
678
679 inputstr = XLALStringDuplicate( csvline );
680
681 /* count number of commas */
682 while ( 1 ) {
683 if ( XLALStringToken( &inputstr, ",", 0 ) == NULL ) {
684 XLAL_ERROR( XLAL_EFUNC, "Error... problem counting number of commas!" );
685 }
686
687 if ( inputstr == NULL ) {
688 break;
689 }
690
691 count++;
692 }
693
694 XLALFree( inputstr );
695
696 return count + 1;
697}
698
699
700/**
701 * \brief Automatically set the solar system ephemeris file based on environment variables and data time span
702 *
703 * This function will attempt to construct the file name for Sun, Earth and time correction ephemeris files
704 * based on the ephemeris used for the equivalent TEMPO(2) pulsar timing information. It assumes that the
705 * ephemeris files are those constructed between 2000 and 2020. The path to the file is not required as this
706 * will be found in \c XLALInitBarycenter.
707 *
708 * \param efile [in] a string that will return the Earth ephemeris file
709 * \param sfile [in] a string that will return the Sun ephemeris file
710 * \param tfile [in] a string that will return the time correction file
711 * \param pulsar [in] the pulsar parameters read from a .par file
712 * \param gpsstart [in] the GPS time of the start of the data
713 * \param gpsend [in] the GPS time of the end of the data
714 *
715 * \return The TimeCorrectionType e.g. TDB or TCB
716 */
718 INT4 gpsstart, INT4 gpsend )
719{
720 /* set the times that the ephemeris files span */
721 INT4 ephemstart = 630720013; /* GPS time of Jan 1, 2000, 00:00:00 UTC */
722 INT4 ephemend = 1893024018; /* GPS time of Jan 1, 2040, 00:00:00 UTC */
724
725 if ( gpsstart < ephemstart || gpsend < ephemstart || gpsstart > ephemend || gpsend > ephemend ) {
726 XLAL_ERROR( XLAL_EFUNC, "Start and end times are outside the ephemeris file ranges!" );
727 }
728
729 *efile = XLALStringDuplicate( "earth00-40-" );
730 *sfile = XLALStringDuplicate( "sun00-40-" );
731
732 if ( !PulsarCheckParam( pulsar, "EPHEM" ) ) {
733 /* default to use DE405 */
734 *efile = XLALStringAppend( *efile, "DE405" );
735 *sfile = XLALStringAppend( *sfile, "DE405" );
736 } else {
737 if ( !strcmp( PulsarGetStringParam( pulsar, "EPHEM" ), "DE405" ) || !strcmp( PulsarGetStringParam( pulsar, "EPHEM" ), "DE200" ) ||
738 !strcmp( PulsarGetStringParam( pulsar, "EPHEM" ), "DE414" ) || !strcmp( PulsarGetStringParam( pulsar, "EPHEM" ), "DE421" ) ||
739 !strcmp( PulsarGetStringParam( pulsar, "EPHEM" ), "DE430" ) || !strcmp( PulsarGetStringParam( pulsar, "EPHEM" ), "DE436" ) ) {
740 *efile = XLALStringAppend( *efile, PulsarGetStringParam( pulsar, "EPHEM" ) );
741 *sfile = XLALStringAppend( *sfile, PulsarGetStringParam( pulsar, "EPHEM" ) );
742 } else {
743 XLAL_ERROR( XLAL_EFUNC, "Unknown ephemeris %s in par file.", PulsarGetStringParam( pulsar, "EPHEM" ) );
744 }
745 }
746
747 /* add .dat.gz extension */
748 *efile = XLALStringAppend( *efile, ".dat.gz" );
749 *sfile = XLALStringAppend( *sfile, ".dat.gz" );
750
751 if ( !PulsarCheckParam( pulsar, "UNITS" ) ) {
752 /* default to using TCB units */
753 *tfile = XLALStringDuplicate( "te405_2000-2040.dat.gz" );
754 ttype = TIMECORRECTION_TCB;
755 } else {
756 if ( !strcmp( PulsarGetStringParam( pulsar, "UNITS" ), "TDB" ) ) {
757 *tfile = XLALStringDuplicate( "tdb_2000-2040.dat.gz" );
758 ttype = TIMECORRECTION_TDB;
759 } else if ( !strcmp( PulsarGetStringParam( pulsar, "UNITS" ), "TCB" ) ) {
760 *tfile = XLALStringDuplicate( "te405_2000-2040.dat.gz" );
761 ttype = TIMECORRECTION_TCB;
762 } else {
763 XLAL_ERROR( XLAL_EFUNC, "Error... unknown units %s in par file!", PulsarGetStringParam( pulsar, "UNITS" ) );
764 }
765 }
766
767 return ttype;
768}
769
770/**
771 * \brief Add a variable, checking first if it already exists and is of type \c LALINFERENCE_PARAM_FIXED
772 * and if so removing it before re-adding it
773 *
774 * This function is for use as an alternative to \c LALInferenceAddVariable, which does not
775 * allow \c LALINFERENCE_PARAM_FIXED variables to be changed. If the variable already exists
776 * and is of type \c LALINFERENCE_PARAM_FIXED, then it will first be removed and then re-added
777 */
779{
780 if ( LALInferenceCheckVariable( vars, name ) ) {
783 }
784 }
786}
int j
int PulsarCheckParam(const PulsarParameters *pars, const CHAR *name)
Check for the existence of the parameter name in the PulsarParameters structure.
const CHAR * PulsarGetStringParam(const PulsarParameters *pars, const CHAR *name)
Return a string parameter.
const char * name
Definition: SearchTiming.c:93
#define fprintf
int l
TimeCorrectionType
Enumerated type denoting the time system type to be produced in the solar system barycentring routine...
Definition: LALBarycenter.h:72
@ TIMECORRECTION_TCB
Definition: LALBarycenter.h:75
@ TIMECORRECTION_NONE
Definition: LALBarycenter.h:73
@ TIMECORRECTION_TDB
Definition: LALBarycenter.h:74
#define LAL_LN2
#define LAL_LNPI
#define LAL_REAL8_MAX
double REAL8
char CHAR
uint32_t UINT4
int32_t INT4
void LALInferenceAddVariable(LALInferenceVariables *vars, const char *name, const void *value, LALInferenceVariableType type, LALInferenceParamVaryType vary)
void LALInferenceRemoveVariable(LALInferenceVariables *vars, const char *name)
void * LALInferenceGetVariable(const LALInferenceVariables *vars, const char *name)
int LALInferenceCheckVariable(LALInferenceVariables *vars, const char *name)
LALInferenceVariableType
LALInferenceParamVaryType LALInferenceGetVariableVaryType(LALInferenceVariables *vars, const char *name)
LALINFERENCE_PARAM_FIXED
void * XLALCalloc(size_t m, size_t n)
void XLALFree(void *p)
char char * XLALStringDuplicate(const char *s)
int char * XLALStringAppend(char *s, const char *append)
char * XLALStringToken(char **s, const char *delim, int empty)
void XLALDestroyUINT4Vector(UINT4Vector *vector)
UINT4Vector * XLALResizeUINT4Vector(UINT4Vector *vector, UINT4 length)
COMPLEX16Vector * XLALCreateCOMPLEX16Vector(UINT4 length)
void XLALDestroyCOMPLEX16Vector(COMPLEX16Vector *vector)
UINT4Vector * XLALCreateUINT4Vector(UINT4 length)
#define XLAL_ERROR(...)
#define XLAL_PRINT_WARNING(...)
XLAL_EFUNC
REAL8 XLALGPSGetREAL8(const LIGOTimeGPS *epoch)
long long count
Definition: hwinject.c:371
float data[BLOCKSIZE]
Definition: hwinject.c:360
n
tfile
int N
Header file for the signal models functions used in parameter estimation code for known pulsar search...
#define IFO_XTRA_DATA(ifo)
Definition: ppe_models.h:35
void rechop_data(UINT4Vector **chunkIndex, UINT4 chunkMax, UINT4 chunkMin)
Chop up the data into chunks smaller the the maximum allowed length.
Definition: ppe_utils.c:518
void check_and_add_fixed_variable(LALInferenceVariables *vars, const char *name, void *value, LALInferenceVariableType type)
Add a variable, checking first if it already exists and is of type LALINFERENCE_PARAM_FIXED and if so...
Definition: ppe_utils.c:778
UINT4Vector * chop_data(gsl_vector_complex *data, UINT4 chunkMin)
Chops the data into stationary segments based on Bayesian change point analysis.
Definition: ppe_utils.c:352
void compute_variance(LALInferenceIFOData *data, LALInferenceIFOModel *model)
Compute the noise variance for each data segment.
Definition: ppe_utils.c:38
UINT4Vector * get_chunk_lengths(LALInferenceIFOModel *ifo, UINT4 chunkMax)
Split the data into segments.
Definition: ppe_utils.c:98
UINT4 find_change_point(gsl_vector_complex *data, REAL8 *logodds, UINT4 minlength)
Find a change point in complex data.
Definition: ppe_utils.c:428
TimeCorrectionType XLALAutoSetEphemerisFiles(CHAR **efile, CHAR **sfile, CHAR **tfile, PulsarParameters *pulsar, INT4 gpsstart, INT4 gpsend)
Automatically set the solar system ephemeris file based on environment variables and data time span.
Definition: ppe_utils.c:717
UINT4Vector * chop_n_merge(LALInferenceIFOData *data, UINT4 chunkMin, UINT4 chunkMax, UINT4 outputchunks)
Chops and remerges data into stationary segments.
Definition: ppe_utils.c:177
void merge_data(COMPLEX16Vector *data, UINT4Vector **segments)
Merge adjacent segments.
Definition: ppe_utils.c:591
COMPLEX16Vector * subtract_running_median(COMPLEX16Vector *data)
Subtract the running median from complex data.
Definition: ppe_utils.c:268
INT4 count_csv(CHAR *csvline)
Counts the number of comma separated values in a string.
Definition: ppe_utils.c:674
Header file for the helper functions for the parameter estimation code for known pulsar searches usin...
#define SQUARE(x)
#define LOGPLUS(x, y)
Macro to perform addition of two values within logarithm space .
COMPLEX16 * data
LALInferenceVariables * params
The PulsarParameters structure to contain a set of pulsar parameters.
UINT4 * data