LAL  7.5.0.1-b72065a
StreamSeriesInput.c
Go to the documentation of this file.
1 /**
2  * \defgroup StreamSeriesInput_c Module StreamSeriesInput.c
3  * \ingroup StreamInput_h
4  * \author Creighton, T. D.
5  *
6  * \brief Converts an input stream into a time or frequency series.
7  *
8  * ### Prototypes ###
9  *
10  * \code
11  * void
12  * LAL<typecode>ReadTSeries( LALStatus *stat,
13  * <datatype>TimeSeries *series,
14  * FILE *stream )
15  *
16  * void
17  * LAL<typecode>ReadTVectorSeries( LALStatus *stat,
18  * <datatype>TimeVectorSeries *series,
19  * FILE *stream )
20  *
21  * void
22  * LAL<typecode>ReadTArraySeries( LALStatus *stat,
23  * <datatype>TimeArraySeries *series,
24  * FILE *stream )
25  *
26  * void
27  * LAL<typecode>ReadFSeries( LALStatus *stat,
28  * <datatype>FrequencySeries *series,
29  * FILE *stream )
30  * \endcode
31  *
32  * ### Description ###
33  *
34  * These routines parse an input stream <tt>*stream</tt> to fill in the
35  * data and metadata fields of a time or frequency series <tt>*series</tt>.
36  * The field <tt>series->data</tt> must be \c NULL, so that it can be
37  * created and filled by the routine. The other fields may be
38  * initialized or not; they will be overwritten by metadata read from
39  * <tt>*stream</tt>. If an error occurs, <tt>*series</tt> will be left
40  * unchanged, but <tt>*stream</tt> will have been read up to the point
41  * where the error occured.
42  *
43  * For each of these prototype templates there are in fact 10 separate
44  * routines corresponding to all the atomic datatypes <tt><datatype></tt>
45  * (except \c CHAR) referred to by <tt><typecode></tt>:
46  *
47  * <table>
48  * <tr><th><typecode></th><th><datatype></th><th><typecode></th><th><datatype></th></tr>
49  * <tr><td>I2</td><td> INT2</td><td> U2</td><td> UINT2</td></tr>
50  * <tr><td>I4</td><td> INT4</td><td> U4</td><td> UINT4</td></tr>
51  * <tr><td>I8</td><td> INT8</td><td> U8</td><td> UINT8</td></tr>
52  * <tr><td> S</td><td>REAL4</td><td> C</td><td>COMPLEX8</td></tr>
53  * <tr><td> D</td><td>REAL8</td><td> Z</td><td>COMPLEX16</td></tr>
54  * </table>
55  *
56  * \par Format for <tt>*stream</tt>:
57  * The input stream is assumed
58  * to be a text stream (ASCII) consisting of a header containing metadata
59  * followed by numerical data in standard integer or floating-point
60  * format, as recognized by the routines in \ref StringConvert.c. The
61  * header consists of zero or more lines beginning with a \c \#
62  * character, followed by a metadata field name and value in the format:
63  *
64  * \code
65  * # fieldname=value
66  * \endcode
67  *
68  * The <tt>=</tt> sign in this format is standard but optional;
69  * it may be replaced or surrounded with any amount of any whitespace
70  * except a newline <tt>\\n</tt>. If \e fieldname is unrecognized,
71  * it is ignored; if it is recognized, then \e value must be in a
72  * suitable format for the field type, as described below. Blank lines,
73  * or lines containing just a \c \# character, are skipped. Once a
74  * line is encountered that contains non-whitespace characters and does
75  * not start with \c \#, that line is assumed to be the beginning of
76  * the numerical data. From that point on, all non-whitespace characters
77  * must be part of parseable numbers; no more comments are permitted
78  * (although blank lines will still be skipped).
79  *
80  * If a metadata field appears twice in the header, the later one takes
81  * precedence. At present these routines do not track which fields have
82  * been previously assigned, so no warnings or errors are generated.
83  *
84  * How the data is packed into the <tt>series->data</tt> structure depends
85  * on what metadata has been provided, as described below.
86  *
87  * ### Required, conditional, and optional metadata: ###
88  *
89  * The input stream need not contain a complete set of metadata, allowing some
90  * metadata to be read from <tt>*stream</tt> and others to be set
91  * elsewhere. For each type of series, some metadata will be
92  * \e required, and the routine will abort if the metadata is not
93  * found. Other metadata are \e conditional, meaning that the
94  * routine will operate differently depending on whether or not these
95  * metadata were found. The remaining metadata are \e optional; if
96  * they are not found in <tt>*stream</tt>, they will be left unchanged.
97  * The recognized metadata fields are listed below.
98  *
99  * <tt><datatype>TimeSeries</tt>:
100  * <dl>
101  * <dt>Required fields:</dt><dd> none</dd>
102  * <dt>Conditional fields:</dt><dd> \c length</dd>
103  * <dt>Optional fields:</dt><dd> \c name, \c epoch, \c deltaT, \c f0, \c sampleUnits, \c datatype</dd>
104  * </dl>
105  *
106  * <tt><datatype>TimeVectorSeries</tt>:
107  * <dl>
108  * <dt>Required fields:</dt><dd> none</dd>
109  * <dt>Conditional fields:</dt><dd> \c length, \c vectorLength</dd>
110  * <dt>Optional fields:</dt><dd> \c name, \c epoch, \c deltaT, \c f0, \c sampleUnits, \c datatype</dd>
111  * </dl>
112  *
113  * <tt><datatype>TimeArraySeries</tt>:
114  * <dl>
115  * <dt>Required fields:</dt><dd> \c dimLength</dd>
116  * <dt>Conditional fields:</dt><dd> \c length, \c arrayDim</dd>
117  * <dt>Optional fields:</dt><dd> \c name, \c epoch, \c deltaT, \c f0, \c sampleUnits, \c datatype</dd>
118  * </dl>
119  *
120  * <tt><datatype>FrequencySeries</tt>:
121  * <dl>
122  * <dt>Required fields:</dt><dd> none</dd>
123  * <dt>Conditional fields:</dt><dd> \c length</dd>
124  * <dt>Optional fields:</dt><dd> \c name, \c epoch, \c deltaT, \c f0, \c deltaF, \c sampleUnits, \c datatype</dd>
125  * </dl>
126  *
127  * Below we describe the required format for the field values, as well as
128  * what occurs if a conditional field is or isn't present.
129  *
130  * ### Required fields: ###
131  *
132  * <dl>
133  * <dt>dimLength</dt><dd> (\c TimeArraySeries only):
134  * \e value consists of a sequence of \c UINT4s separated by
135  * whitespace (but \e not a newline <tt>'\\n'</tt>). These data are
136  * stored in <tt>series->data->dimLength</tt>: the number of integers gives
137  * the number of array indecies, while the value of each integer gives
138  * the dimension of the corresponding array index.</dd>
139  * </dl>
140  *
141  * ### Conditional fields: ###
142  *
143  * <dl>
144  * <dt>arrayDim</dt><dd> (\c TimeArraySeries only): \e value
145  * is a single \c UINT4, to be stored in
146  * <tt>series->data->arrayDim</tt>. This must equal the product of the
147  * index ranges in \c dimLength, above, or an error is returned. If
148  * not given, the \c arrayDim field will be set equal to the product
149  * of the index ranges in \c dimLength. (The \c arrayDim and
150  * \c dimLength fields can appear in any order in <tt>*stream</tt>;
151  * checking is done only after all header lines have been read.)</dd>
152  *
153  * <dt>vectorLength</dt><dd> (\c TimeVectorSeries only):
154  * \e value is a single \c UINT4, to be stored in
155  * <tt>series->data->vectorLength</tt>. If not specified in the header
156  * portion of <tt>*stream</tt>, it will be taken to be the number of data
157  * on the \e first line of the data portion of <tt>*stream</tt>, or
158  * half the number of real data for a complex-valued
159  * \c TimeVectorSeries; if an odd number of real data are found on
160  * the first line of a complex \c TimeVectorSeries, then an error is
161  * returned.</dd>
162  *
163  * <dt>length:</dt><dd> \e value is a single \c UINT4, to be
164  * stored in <tt>series->data->length</tt>. If it is specified in the
165  * header portion of <tt>*stream</tt>, data will be read until
166  * \c length is reached. Otherwise, <tt>*stream</tt> will be read to
167  * its end or until an unparseable character is read, and \c length
168  * will then be set accordingly. (If parsing stops in the middle of
169  * filling a complex, vector, or array valued element, the partly-read
170  * element is discarded.)</dd>
171  * </dl>
172  *
173  * ### Optional fields: ###
174  *
175  * <dl>
176  * <dt>name:</dt><dd>\c value is a string surrounded by double-quotes,
177  * which is parsed in the manner of a string literal in C: it
178  * may contain ordinary printable characters (except double-quote and \\‍),
179  * escape sequences (such as \\t for tab, \\n for
180  * newline, or \\ and double-quote literal backslash and quote
181  * characters), and octal or hexadecimal codes (\\\c ooo or
182  * \\x\c hh, respectively) for arbitrary bytes. Unlike in C,
183  * literals cannot be split between lines, adjacent literals are not
184  * concatenated, and converted strings longer than
185  * \c LALNameLength-1 will be truncated. The resulting string is
186  * stored in <tt>series->name</tt>, and will always contain a \c \\0
187  * terminator, beyond which the contents are unspecified.</dd>
188  *
189  * <dt>epoch:</dt><dd> \e value is a single \c INT8 number
190  * of GPS nanoseconds, or a pair of \c INT4s representing GPS seconds
191  * and nanoseconds separately, separated by non-newline whitespace.</dd>
192  *
193  * <dt>deltaT</dt><dd> (any time series): \e value is a single
194  * \c REAL8 number.</dd>
195  *
196  * <dt>f0:</dt><dd> \e value is a single \c REAL8 number.</dd>
197  *
198  * <dt>deltaF</dt><dd> (\c FrequencySeries only): \e value
199  * is a single \c REAL8 number.</dd>
200  *
201  * <dt>sampleUnits:</dt><dd> \e value is string surrounded by
202  * double-quotes; the quotes are stripped and the string passed to
203  * <tt>XLALParseUnitString()</tt> to determine <tt>series->sampleUnits</tt>.
204  * Since <tt>XLALParseUnitString()</tt> is not very robust, it is
205  * recommended to use only unit strings that have been generated by
206  * <tt>XLALUnitAsString()</tt>, or to remove this metadata field and set
207  * <tt>series->sampleUnits</tt> within the code.</dd>
208  *
209  * <dt>datatype:</dt><dd> \e value is string identifying the
210  * series type; e.g. \c REAL4TimeSeries (\e not surrounded by
211  * quotes). This should correspond to the type of <tt>*series</tt>, not to
212  * any field in <tt>*series</tt>. If there is a type mismatch, a warning
213  * is generated (and errors may occur later while parsing the data).</dd>
214  *
215  * </dl>
216  *
217  * \par Data format:
218  * The data portion of <tt>*stream</tt> consists
219  * of whitespace-separated integer or real numbers. For complex input
220  * routines, the real data are parsed as alternately the real and
221  * imaginary parts of successive complex numbers. By convention, each
222  * line should correspond to a single base, complex, vector, or array
223  * valued element of the <tt>series->data</tt> sequence. However, this is
224  * \e required only in the case of a \c TimeVectorSeries where
225  * the \c vectorLength metadata was not set in the header, since in
226  * this case the value of \c vectorLength will be taken from the
227  * number of elements read on the first data line. After this, and in
228  * all other cases, newlines are treated as any other whitespace.
229  *
230  * If a \c length value is specified in the header, then data are
231  * read until the required length is acheived; if <tt>fscanf()</tt> returns
232  * zero or negative before this (representing either the end-of-input or
233  * a character that cannot be interpreted as part of the numerical data),
234  * an error is returned. If a \c length value was not specified,
235  * data are read until <tt>fscanf()</tt> returns zero or negative: at this
236  * point any partially-completed complex, vector, or array valued element
237  * is discarded, and <tt>series->data->length</tt> set to the number of
238  * elements read.
239  *
240  * ### Algorithm ###
241  *
242  * These routines use <tt>LALCHARReadVector()</tt> to read the header lines
243  * and the first line of data. After this, data are parsed directly from
244  * <tt>*stream</tt> using <tt>fscanf()</tt>. This is done for efficiency:
245  * repeated calling of the LAL string parsing routines in
246  * \ref StringConvert.c involves far too much computational overhead.
247  *
248  * After the first data line has been read, the length of each sequence
249  * element will be known from the atomic type, as well as the specified
250  * \c dimLength (for arrays), \c vectorLength (for vectors), or
251  * number of elements on the first data line (for vectors without an
252  * explicitly specified \c vectorLength). If \c length is also
253  * specified, a sequence of the appropriate size is allocated, and all
254  * the data is copied or read directly into it. If \c length was not
255  * specified, the data read with <tt>fscanf()</tt> are stored in a linked
256  * list of buffers of size \c BUFFSIZE (a local <tt>\# define</tt>d
257  * constant) until parsing stops. Then a sequence of the appropriate
258  * size is allocated and the data copied into it.
259  *
260  */
261 
262 #include <complex.h>
263 #include <stdio.h>
264 #include <string.h>
265 #include <ctype.h>
266 #include <lal/LALStdlib.h>
267 #include <lal/AVFactories.h>
268 #include <lal/SeqFactories.h>
269 #include <lal/Units.h>
270 #include <lal/StringInput.h>
271 #include <lal/StreamInput.h>
272 
273 /* Define a message string for header parsing errors. */
274 #define LALREADSERIESC_HEADER "Skipping badly-formatted line for metadata field "
275 
276 /* Define linked-list of buffers for storing an arbitrary number of
277  arbitrary datatypes. BUFFSIZE should be a multiple of 16. */
278 #define BUFFSIZE 24
279 typedef union tagBuffer {
280  INT2 I2[BUFFSIZE/2];
281  INT4 I4[BUFFSIZE/4];
282  INT8 I8[BUFFSIZE/8];
283  UINT2 U2[BUFFSIZE/2];
284  UINT4 U4[BUFFSIZE/4];
285  UINT8 U8[BUFFSIZE/8];
286  REAL4 S[BUFFSIZE/4];
287  REAL8 D[BUFFSIZE/8];
288 } Buffer;
289 typedef struct tagBufferList {
290  Buffer buf;
291  struct tagBufferList *next;
292 } BufferList;
293 
294 /* Define a macro for freeing the linked list. */
295 #define FREEBUFFERLIST( headPtr ) \
296 if ( headPtr ) { \
297  BufferList *herePtr = headPtr; \
298  while ( herePtr ) { \
299  BufferList *nextPtr = herePtr->next; \
300  LALFree( herePtr ); \
301  herePtr = nextPtr; \
302  } \
303 } else (void)(0)
304 
305 /* Define a function for parsing a string literal. */
306 static void
308  CHAR *string,
309  const CHAR *literal,
310  UINT4 length )
311 {
312  CHAR c; /* Current character being considered. */
313  UINT4 n = 0; /* Counter of number of characters written. */
314 
315  INITSTATUS(stat);
316 
317  /* Find open quote. */
318  while ( ( c = *literal ) != '"' && c != '\n' && c != '\0' )
319  literal++;
320  if ( *literal != '"' ) {
321  LALWarning( stat, "No open quote found" );
322  RETURN( stat );
323  }
324  literal++;
325 
326  /* Start parsing. */
327  while ( n < length - 1 ) {
328 
329  /* End of literal, either implicit or explicit. */
330  if ( ( c = *(literal++) ) == '\0' || c == '\n' ) {
331  LALWarning( stat, "No close quote found" );
332  string[n] = '\0';
333  RETURN( stat );
334  } else if ( c == '"' ) {
335  string[n] = '\0';
336  RETURN( stat );
337  }
338 
339  /* Escape sequence. */
340  else if ( c == '\\' ) {
341 
342  /* Do not allow actual end-of-line or end-of-string to be
343  escaped. */
344  if ( ( c = *(literal++) ) == '\0' || c == '\n' ) {
345  LALWarning( stat, "No close quote found" );
346  string[n] = '\0';
347  RETURN( stat );
348  }
349 
350  /* Other special escape characters. */
351  else if ( c == 'a' || c == 'A' )
352  string[n++] = '\a';
353  else if ( c == 'b' || c == 'B' )
354  string[n++] = '\b';
355  else if ( c == 'f' || c == 'F' )
356  string[n++] = '\f';
357  else if ( c == 'n' || c == 'N' )
358  string[n++] = '\n';
359  else if ( c == 'r' || c == 'R' )
360  string[n++] = '\r';
361  else if ( c == 't' || c == 'T' )
362  string[n++] = '\t';
363  else if ( c == 'v' || c == 'V' )
364  string[n++] = '\v';
365 
366  /* Hexadecimal character code. */
367  else if ( c == 'x' || c == 'X' ) {
368  c = *(literal++); /* first digit */
369  if ( isxdigit( c ) ) {
370  UINT2 value;
371  if ( isdigit( c ) )
372  value = c - '0';
373  else
374  value = 10 + tolower( c ) - 'a';
375  c = *(literal++); /* second digit */
376  if ( isxdigit( c ) ) {
377  value *= 16;
378  if ( isdigit( c ) )
379  value += c - '0';
380  else
381  value += 10 + tolower( c ) - 'a';
382  } else /* no second digit */
383  literal--;
384  string[n++] = (CHAR)( value );
385  if ( value == 0 ) {
386  LALWarning( stat, "Found explicit end-of-string \\0" );
387  RETURN( stat );
388  }
389  } else { /* no first digit */
390  LALWarning( stat, "Treating empty hex cde as explicit"
391  " end-of-string \\0" );
392  string[n] = '\0';
393  RETURN( stat );
394  }
395  }
396 
397  /* Octal character code. */
398  else if ( c >= '0' && c < '8' ) {
399  UINT2 value = c - '0';
400  c = *(literal++); /* second digit */
401  if ( c >= '0' && c < '8' ) {
402  value *= 8;
403  value += c - '0';
404  c = *(literal++); /* third digit */
405  if ( c >= '0' && c < '8' ) {
406  value *= 8;
407  value += c - '0';
408  } else /* no third digit */
409  literal--;
410  } else /* no second digit */
411  literal--;
412  if ( value > 255 )
413  LALWarning( stat, "Ignoring octal character code >= '\\400'" );
414  else
415  string[n++] = (CHAR)( value );
416  if ( value == 0 ) {
417  LALWarning( stat, "Found explicit end-of-string \\0" );
418  RETURN( stat );
419  }
420  }
421 
422  /* Other escaped character. */
423  else {
424  if ( c != '\\' && c != '?' && c != '\'' && c != '"' )
425  LALWarning( stat, "Dropping \\ from unrecognized escape"
426  " sequence" );
427  string[n++] = c;
428  }
429  }
430 
431  /* Other character. */
432  else
433  string[n++] = c;
434  }
435 
436  if ( *literal != '"' )
437  LALWarning( stat, "Reached maximum length before reading close"
438  " quote" );
439  string[n] = '\0';
440  RETURN( stat );
441 }
442 
443 /* tell the GNU compiler to ignore issues with the `ll' length modifier */
444 #ifdef __GNUC__
445 #define fscanf __extension__ fscanf
446 #endif
447 
448 #define TYPECODE I2
449 #define DATACODE TYPECODE
450 #define TYPE INT2
451 #define DATA TYPE
452 #define SIZE 2
453 #define COMPLEX 0
454 #include "StreamSeriesInput_source.c"
455 #undef TYPECODE
456 #undef DATACODE
457 #undef TYPE
458 #undef DATA
459 #undef SIZE
460 #undef COMPLEX
461 
462 #define TYPECODE I4
463 #define DATACODE TYPECODE
464 #define TYPE INT4
465 #define DATA TYPE
466 #define SIZE 4
467 #define COMPLEX 0
468 #include "StreamSeriesInput_source.c"
469 #undef TYPECODE
470 #undef DATACODE
471 #undef TYPE
472 #undef DATA
473 #undef SIZE
474 #undef COMPLEX
475 
476 #define TYPECODE I8
477 #define DATACODE TYPECODE
478 #define TYPE INT8
479 #define DATA TYPE
480 #define SIZE 8
481 #define COMPLEX 0
482 #include "StreamSeriesInput_source.c"
483 #undef TYPECODE
484 #undef DATACODE
485 #undef TYPE
486 #undef DATA
487 #undef SIZE
488 #undef COMPLEX
489 
490 #define TYPECODE U2
491 #define DATACODE TYPECODE
492 #define TYPE UINT2
493 #define DATA TYPE
494 #define SIZE 2
495 #define COMPLEX 0
496 #include "StreamSeriesInput_source.c"
497 #undef TYPECODE
498 #undef DATACODE
499 #undef TYPE
500 #undef DATA
501 #undef SIZE
502 #undef COMPLEX
503 
504 #define TYPECODE U4
505 #define DATACODE TYPECODE
506 #define TYPE UINT4
507 #define DATA TYPE
508 #define SIZE 4
509 #define COMPLEX 0
510 #include "StreamSeriesInput_source.c"
511 #undef TYPECODE
512 #undef DATACODE
513 #undef TYPE
514 #undef DATA
515 #undef SIZE
516 #undef COMPLEX
517 
518 #define TYPECODE U8
519 #define DATACODE TYPECODE
520 #define TYPE UINT8
521 #define DATA TYPE
522 #define SIZE 8
523 #define COMPLEX 0
524 #include "StreamSeriesInput_source.c"
525 #undef TYPECODE
526 #undef DATACODE
527 #undef TYPE
528 #undef DATA
529 #undef SIZE
530 #undef COMPLEX
531 
532 #define TYPECODE S
533 #define DATACODE TYPECODE
534 #define TYPE REAL4
535 #define DATA TYPE
536 #define SIZE 4
537 #define COMPLEX 0
538 #include "StreamSeriesInput_source.c"
539 #undef TYPECODE
540 #undef DATACODE
541 #undef TYPE
542 #undef DATA
543 #undef SIZE
544 #undef COMPLEX
545 
546 #define TYPECODE D
547 #define DATACODE TYPECODE
548 #define TYPE REAL8
549 #define DATA TYPE
550 #define SIZE 8
551 #define COMPLEX 0
552 #include "StreamSeriesInput_source.c"
553 #undef TYPECODE
554 #undef DATACODE
555 #undef TYPE
556 #undef DATA
557 #undef SIZE
558 #undef COMPLEX
559 
560 #define TYPECODE Z
561 #define DATACODE D
562 #define TYPE COMPLEX16
563 #define DATA REAL8
564 #define SIZE 8
565 #define COMPLEX 1
566 #include "StreamSeriesInput_source.c"
567 #undef TYPECODE
568 #undef DATACODE
569 #undef TYPE
570 #undef DATA
571 #undef SIZE
572 #undef COMPLEX
573 
574 #define TYPECODE C
575 #define DATACODE S
576 #define TYPE COMPLEX8
577 #define DATA REAL4
578 #define SIZE 4
579 #define COMPLEX 1
580 #include "StreamSeriesInput_source.c"
581 #undef TYPECODE
582 #undef DATACODE
583 #undef TYPE
584 #undef DATA
585 #undef SIZE
586 #undef COMPLEX
#define LALWarning(statusptr, warning)
Definition: LALError.h:103
#define INITSTATUS(statusptr)
#define RETURN(statusptr)
#define BUFFSIZE
static void LALLiteralToString(LALStatus *stat, CHAR *string, const CHAR *literal, UINT4 length)
uint64_t UINT8
Eight-byte unsigned integer; on some platforms this is equivalent to unsigned long int instead.
double REAL8
Double precision real floating-point number (8 bytes).
int16_t INT2
Two-byte signed integer.
int64_t INT8
Eight-byte signed integer; on some platforms this is equivalent to long int instead.
uint16_t UINT2
Two-byte unsigned integer.
char CHAR
One-byte signed integer, see Headers LAL(Atomic)Datatypes.h for more details.
uint32_t UINT4
Four-byte unsigned integer.
int32_t INT4
Four-byte signed integer.
float REAL4
Single precision real floating-point number (4 bytes).
LAL status structure, see The LALStatus structure for more details.
Definition: LALDatatypes.h:947