LALPulsar  6.1.0.1-b72065a
FindFiles.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2010, 2012, 2014, 2016, 2021, 2022 Karl Wette
3  * Copyright (C) 2010 Chris Messenger
4  * Copyright (C) 2009, 2011 Adam Mercer
5  * Copyright (C) 2004--2006, 2008, 2013 Reinhard Prix
6  * Copyright (C) 2004--2008, 2010 Bernd Machenschalk
7  * Copyright (C) 2004, 2005 Alicia Sintes
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with with program; see the file COPYING. If not, write to the
21  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
22  * MA 02110-1301 USA
23  */
24 
25 /*---------- includes ----------*/
26 
27 #include <string.h>
28 #include <strings.h>
29 #include <ctype.h>
30 
31 #ifndef _MSC_VER
32 #include <dirent.h>
33 #else
34 #include <io.h>
35 #endif
36 
37 #include <lal/ConfigFile.h>
38 
39 #include "SFTinternal.h"
40 
41 /*---------- internal prototypes ----------*/
42 
43 static BOOLEAN is_pattern( const char *c ); /* filename string is a glob-style pattern */
44 static int amatch( char *str, char *p ); /* glob pattern-matcher (public domain)*/
45 
46 /*========== function definitions ==========*/
47 
48 /**
49  * Returns a list of filenames matching the input argument, which may be one of
50  * the following:
51  * - <tt><file>[;<file>;...]</tt>: a list of filenames.
52  * - <tt><glob>[;<glob>;...]</tt>: a list of glob-like pattern(s) such
53  * as <tt>*.sft</tt>, <tt>./dir/\htmlonly\endhtmlonly*.sft</tt>, etc.
54  * - <tt>list:<filelist></tt>: a file containing a list of filenames.
55  * Prefixes of the form <tt>file:\htmlonly\endhtmlonly//localhost/</tt>
56  * or <tt>file:\htmlonly\endhtmlonly///</tt> are removed.
57  *
58  * Note: the list of filenames is returned sorted alphabetically.
59  */
61 XLALFindFiles( const CHAR *globstring )
62 {
63 #ifndef _MSC_VER
64  DIR *dir;
65  struct dirent *entry;
66 #else
67  intptr_t dir;
68  struct _finddata_t entry;
69  CHAR *ptr3;
70 #endif
71  CHAR *dname;
72  const CHAR *ptr1, *ptr2;
73  CHAR *fpattern;
74  size_t dirlen;
75  CHAR **filelist = NULL;
76  UINT4 numFiles = 0, newNumFiles = 0;
77  LALStringVector *ret = NULL;
78  UINT4 j;
79  UINT4 namelen;
80  CHAR *thisFname = NULL;
81 
82  XLAL_CHECK_NULL( globstring != NULL, XLAL_EINVAL );
83 
84 #define FILE_SEPARATOR ';'
85  if ( ( ptr2 = strchr( globstring, FILE_SEPARATOR ) ) ) {
86  /* globstring is multi-pattern ("pattern1;pattern2;pattern3") */
87  /* call XLALFindFiles() with every pattern found in globstring */
88 
89  ptr1 = ( const CHAR * )globstring;
90  while ( ( ptr2 = strchr( ptr1, FILE_SEPARATOR ) ) ) {
91  /* ptr1 points to the beginning of a pattern, ptr2 to the end */
92 
93  /* copy the current name to thisFname */
94  namelen = ptr2 - ptr1;
95  if ( ( thisFname = LALRealloc( thisFname, ( namelen + 1 ) * sizeof( CHAR ) ) ) == NULL ) {
96  for ( j = 0; j < numFiles; j++ ) {
97  LALFree( filelist[j] );
98  }
99  if ( filelist ) {
100  LALFree( filelist );
101  }
103  }
104  strncpy( thisFname, ptr1, namelen );
105  thisFname[namelen] = '\0';
106 
107  /* call XLALFindFiles(thisFname) */
108  ret = XLALFindFiles( thisFname );
109 
110  /* append the output (if any) to the existing filelist */
111  if ( ret ) {
112  newNumFiles = numFiles + ret->length;
113 
114  if ( ( filelist = LALRealloc( filelist, ( newNumFiles ) * sizeof( CHAR * ) ) ) == NULL ) {
116  LALFree( thisFname );
118  }
119 
120  for ( j = 0; j < ret->length; j++ ) {
121  filelist[numFiles + j] = ret->data[j];
122  }
123  LALFree( ret->data );
124  LALFree( ret );
125  numFiles = newNumFiles;
126  } else {
127  for ( j = 0; j < numFiles; j++ ) {
128  LALFree( filelist[j] );
129  }
130  if ( filelist ) {
131  LALFree( filelist );
132  }
133  LALFree( thisFname );
135  }
136 
137  /* skip the separator */
138  ptr1 = ptr2 + 1;
139  } /* while */
140 
141  LALFree( thisFname );
142 
143  ret = XLALFindFiles( ptr1 );
144  if ( ret ) {
145  newNumFiles = numFiles + ret->length;
146 
147  if ( ( filelist = LALRealloc( filelist, ( newNumFiles ) * sizeof( CHAR * ) ) ) == NULL ) {
150  }
151 
152  for ( j = 0; j < ret->length; j++ ) {
153  filelist[numFiles + j] = ret->data[j];
154  }
155  LALFree( ret->data );
156  LALFree( ret );
157  numFiles = newNumFiles;
158  }
159 
160  } /* if multi-pattern */
161 
162  /* read list of file names from a "list file" */
163 #define LIST_PREFIX "list:"
164  else if ( strncmp( globstring, LIST_PREFIX, strlen( LIST_PREFIX ) ) == 0 ) {
165  LALParsedDataFile *list = NULL;
166  CHAR *listfname = NULL;
167 
168  /* extract list file name */
169  if ( ( listfname = XLALStringDuplicate( globstring + strlen( LIST_PREFIX ) ) ) == NULL ) {
171  }
172 #undef LIST_PREFIX
173 
174  /* read list of file names from file */
175  if ( XLALParseDataFile( &list, listfname ) != XLAL_SUCCESS ) {
176  XLAL_ERROR_NULL( XLAL_EFUNC, "Could not parse list file '%s'\n", listfname );
177  }
178 
179  /* allocate "filelist" */
180  numFiles = list->lines->nTokens;
181  if ( numFiles == 0 ) {
182  XLALPrintWarning( "\n%s: List file '%s' contains no file names\n", __func__, listfname );
183  LALFree( listfname );
186  }
187  if ( ( filelist = LALRealloc( filelist, numFiles * sizeof( CHAR * ) ) ) == NULL ) {
188  LALFree( listfname );
191  }
192 
193  /* copy file names from "list" to "filelist" */
194  for ( j = 0; j < numFiles; ++j ) {
195  ptr1 = list->lines->tokens[j];
196 
197  /* these prefixes are added to file names by e.g. ligo_data_find */
198 #define FILE_PREFIX "file://localhost/"
199  if ( strncmp( ptr1, FILE_PREFIX, strlen( FILE_PREFIX ) ) == 0 ) {
200  ptr1 += strlen( FILE_PREFIX ) - 1;
201  }
202 #undef FILE_PREFIX
203  else
204 #define FILE_PREFIX "file:///"
205  if ( strncmp( ptr1, FILE_PREFIX, strlen( FILE_PREFIX ) ) == 0 ) {
206  ptr1 += strlen( FILE_PREFIX ) - 1;
207  }
208 #undef FILE_PREFIX
209 
210  /* allocate "filelist", and cleanup if it fails */
211  if ( ( filelist[j] = LALCalloc( 1, strlen( ptr1 ) + 1 ) ) == NULL ) {
212  while ( j-- > 0 ) {
213  LALFree( filelist[j] );
214  }
215  LALFree( filelist );
216  LALFree( listfname );
219  }
220 
221  /* copy string */
222  strcpy( filelist[j], ptr1 );
223 
224  }
225 
226  /* cleanup */
227  LALFree( listfname );
229 
230  } /* if list file */
231 
232  else if ( is_pattern( globstring ) )
233 
234  { /* globstring is a single glob-style pattern */
235 
236  /* First we separate the globstring into directory-path and file-pattern */
237 
238 #ifndef _WIN32
239 #define DIR_SEPARATOR '/'
240 #else
241 #define DIR_SEPARATOR '\\'
242 #endif
243 
244  /* any path specified or not ? */
245  ptr1 = strrchr( globstring, DIR_SEPARATOR );
246  if ( ptr1 ) {
247  /* yes, copy directory-path */
248  dirlen = ( size_t )( ptr1 - globstring ) + 1;
249  if ( ( dname = LALCalloc( 1, dirlen ) ) == NULL ) {
251  }
252  strncpy( dname, globstring, dirlen );
253  dname[dirlen - 1] = '\0';
254 
255  ptr1 ++; /* skip dir-separator */
256  /* copy the rest as a glob-pattern for matching */
257  if ( ( fpattern = LALCalloc( 1, strlen( ptr1 ) + 1 ) ) == NULL ) {
258  LALFree( dname );
260  }
261  strcpy( fpattern, ptr1 );
262 
263  } /* if ptr1 */
264  else { /* no pathname given, assume "." */
265  if ( ( dname = LALCalloc( 1, 2 ) ) == NULL ) {
267  }
268  strcpy( dname, "." );
269 
270  if ( ( fpattern = LALCalloc( 1, strlen( globstring ) + 1 ) ) == NULL ) {
271  LALFree( dname );
273  }
274  strcpy( fpattern, globstring ); /* just file-pattern given */
275  } /* if !ptr */
276 
277 
278 #ifndef _MSC_VER
279  /* now go through the file-list in this directory */
280  if ( ( dir = opendir( dname ) ) == NULL ) {
281  XLALPrintError( "Can't open data-directory `%s`\n", dname );
282  LALFree( dname );
284  }
285 #else
286  if ( ( ptr3 = ( CHAR * )LALMalloc( strlen( dname ) + 3 ) ) == NULL ) {
287  return ( NULL );
288  }
289  sprintf( ptr3, "%s\\*", dname );
290  dir = _findfirst( ptr3, &entry );
291  LALFree( ptr3 );
292  if ( dir == -1 ) {
293  XLALPrintError( "Can't find file for pattern `%s`\n", ptr3 );
294  LALFree( dname );
296  }
297 #endif
298 
299 #ifndef _MSC_VER
300  while ( ( entry = readdir( dir ) ) != NULL )
301 #else
302  do
303 #endif
304  {
305 #ifndef _MSC_VER
306  thisFname = entry->d_name;
307 #else
308  thisFname = entry.name;
309 #endif
310 
311  /* now check if glob-pattern fpattern matches the current filename */
312  if ( amatch( thisFname, fpattern )
313  /* and check if we didnt' match some obvious garbage like "." or ".." : */
314  && strcmp( thisFname, "." ) && strcmp( thisFname, ".." ) ) {
315 
316  numFiles ++;
317  if ( ( filelist = LALRealloc( filelist, numFiles * sizeof( CHAR * ) ) ) == NULL ) {
318  LALFree( dname );
319  LALFree( fpattern );
321  }
322 
323  namelen = strlen( thisFname ) + strlen( dname ) + 2 ;
324 
325  if ( ( filelist[ numFiles - 1 ] = LALCalloc( 1, namelen ) ) == NULL ) {
326  for ( j = 0; j < numFiles; j++ ) {
327  LALFree( filelist[j] );
328  }
329  LALFree( filelist );
330  LALFree( dname );
331  LALFree( fpattern );
333  }
334 
335  sprintf( filelist[numFiles - 1], "%s%c%s", dname, DIR_SEPARATOR, thisFname );
336 
337  } /* if filename matched pattern */
338 
339  } /* while more directory entries */
340 #ifdef _MSC_VER
341  while ( _findnext( dir, &entry ) == 0 );
342 #endif
343 
344 #ifndef _MSC_VER
345  closedir( dir );
346 #else
347  _findclose( dir );
348 #endif
349 
350  LALFree( dname );
351  LALFree( fpattern );
352 
353  } /* if is_pattern */
354 
355  else
356 
357  { /* globstring is a single simple filename */
358  /* add it to the list of filenames as it is */
359 
360  numFiles++;
361  if ( ( filelist = LALRealloc( filelist, numFiles * sizeof( CHAR * ) ) ) == NULL ) {
363  }
364  namelen = strlen( globstring ) + 1;
365  if ( ( filelist[ numFiles - 1 ] = LALCalloc( 1, namelen ) ) == NULL ) {
366  LALFree( filelist );
368  }
369  strcpy( filelist[numFiles - 1], globstring );
370  }
371 
372  /* ok, did we find anything? */
373  if ( numFiles == 0 ) {
375  }
376 
377 
378  /* make a LALStringVector from the list of filenames */
379  if ( ( ret = LALCalloc( 1, sizeof( LALStringVector ) ) ) == NULL ) {
380  for ( j = 0; j < numFiles; j++ ) {
381  LALFree( filelist[j] );
382  }
383  LALFree( filelist );
385  }
386  ret->length = numFiles;
387  ret->data = filelist;
388 
389  /* sort this alphabetically (in-place) */
390  if ( numFiles > 1 ) {
391  XLALSortStringVector( ret );
392  }
393 
394  return ( ret );
395 
396 } /* XLALFindFiles() */
397 
398 
399 /* filename string is a glob-style pattern, i.e. it contains '*' or '?' or '[' */
400 static BOOLEAN is_pattern( const char *c )
401 {
402  while ( ( *c != '\0' ) && ( *c != '*' ) && ( *c != '?' ) && ( *c != '[' ) ) {
403  c++;
404  }
405  return ( *c != '\0' );
406 }
407 
408 
409 /*======================================================================*/
410 /*
411  * robust glob pattern matcher
412  * ozan s. yigit/dec 1994
413  * public domain
414  *
415  * glob patterns:
416  * * matches zero or more characters
417  * ? matches any single character
418  * [set] matches any character in the set
419  * [^set] matches any character NOT in the set
420  * where a set is a group of characters or ranges. a range
421  * is written as two characters seperated with a hyphen: a-z denotes
422  * all characters between a to z inclusive.
423  * [-set] set matches a literal hypen and any character in the set
424  * []set] matches a literal close bracket and any character in the set
425  *
426  * char matches itself except where char is '*' or '?' or '['
427  * \char matches char, including any pattern character
428  *
429  * examples:
430  * a*c ac abc abbc ...
431  * a?c acc abc aXc ...
432  * a[a-z]c aac abc acc ...
433  * a[-a-z]c a-c aac abc ...
434  *
435  */
436 
437 #ifndef NEGATE
438 #define NEGATE '^' /* std cset negation char */
439 #endif
440 
441 static int
442 amatch( char *str, char *p )
443 {
444  int negate;
445  int match;
446  int c;
447 
448  while ( *p ) {
449  if ( !*str && *p != '*' ) {
450  return FALSE;
451  }
452 
453  switch ( c = *p++ ) {
454 
455  case '*':
456  while ( *p == '*' ) {
457  p++;
458  }
459 
460  if ( !*p ) {
461  return TRUE;
462  }
463 
464  if ( *p != '?' && *p != '[' && *p != '\\' )
465  while ( *str && *p != *str ) {
466  str++;
467  }
468 
469  while ( *str ) {
470  if ( amatch( str, p ) ) {
471  return TRUE;
472  }
473  str++;
474  }
475  return FALSE;
476 
477  case '?':
478  if ( *str ) {
479  break;
480  }
481  return FALSE;
482  /*
483  * set specification is inclusive, that is [a-z] is a, z and
484  * everything in between. this means [z-a] may be interpreted
485  * as a set that contains z, a and nothing in between.
486  */
487  case '[':
488  if ( *p != NEGATE ) {
489  negate = FALSE;
490  } else {
491  negate = TRUE;
492  p++;
493  }
494 
495  match = FALSE;
496 
497  while ( !match && ( c = *p++ ) ) {
498  if ( !*p ) {
499  return FALSE;
500  }
501  if ( *p == '-' ) { /* c-c */
502  if ( !*++p ) {
503  return FALSE;
504  }
505  if ( *p != ']' ) {
506  if ( *str == c || *str == *p ||
507  ( *str > c && *str < *p ) ) {
508  match = TRUE;
509  }
510  } else { /* c-] */
511  if ( *str >= c ) {
512  match = TRUE;
513  }
514  break;
515  }
516  } else { /* cc or c] */
517  if ( c == *str ) {
518  match = TRUE;
519  }
520  if ( *p != ']' ) {
521  if ( *p == *str ) {
522  match = TRUE;
523  }
524  } else {
525  break;
526  }
527  }
528  }
529 
530  if ( negate == match ) {
531  return FALSE;
532  }
533  /*
534  * if there is a match, skip past the cset and continue on
535  */
536  while ( *p && *p != ']' ) {
537  p++;
538  }
539  if ( !*p++ ) { /* oops! */
540  return FALSE;
541  }
542  break;
543 
544  case '\\':
545  if ( *p ) {
546  c = *p++;
547  }
548 #if __GNUC__ >= 7 && !defined __INTEL_COMPILER
549  __attribute__( ( fallthrough ) );
550 #endif
551  default:
552  if ( c != *str ) {
553  return FALSE;
554  }
555  break;
556 
557  }
558  str++;
559  }
560 
561  return !*str;
562 }
#define __func__
log an I/O error, i.e.
#define DIR_SEPARATOR
#define NEGATE
Definition: FindFiles.c:438
static BOOLEAN is_pattern(const char *c)
Definition: FindFiles.c:400
#define FILE_SEPARATOR
static int amatch(char *str, char *p)
Definition: FindFiles.c:442
#define FILE_PREFIX
#define LIST_PREFIX
int j
#define LALRealloc(p, n)
#define LALCalloc(m, n)
#define LALMalloc(n)
#define LALFree(p)
#define c
Internal SFT types and functions.
#define __attribute__(x)
int XLALParseDataFile(LALParsedDataFile **cfgdata, const CHAR *fname)
void XLALDestroyParsedDataFile(LALParsedDataFile *cfgdata)
unsigned char BOOLEAN
char CHAR
uint32_t UINT4
char char * XLALStringDuplicate(const char *s)
LALStringVector * XLALFindFiles(const CHAR *globstring)
Returns a list of filenames matching the input argument, which may be one of the following:
Definition: FindFiles.c:61
void XLALDestroyStringVector(LALStringVector *vect)
int XLALSortStringVector(LALStringVector *strings)
#define XLAL_ERROR_NULL(...)
int XLALPrintError(const char *fmt,...) _LAL_GCC_PRINTF_FORMAT_(1
int int XLALPrintWarning(const char *fmt,...) _LAL_GCC_PRINTF_FORMAT_(1
#define XLAL_CHECK_NULL(assertion,...)
XLAL_ENOMEM
XLAL_SUCCESS
XLAL_EFUNC
XLAL_EIO
XLAL_EINVAL
#define TRUE
#define FALSE
TokenList * lines
CHAR ** tokens
UINT4 nTokens