LAL  7.5.0.1-08ee4f4
StringToken.c
Go to the documentation of this file.
1 /*
2 * Copyright (C) 2007 Jolien Creighton
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with with program; see the file COPYING. If not, write to the
16 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17 * MA 02110-1301 USA
18 */
19 
20 /**
21  * \file
22  * \ingroup StringInput_h
23  * \author Creighton, T. D.
24  *
25  * \brief Converts a string into a series of tokens, for use by other routines.
26  *
27  * ### Description ###
28  *
29  * The routine <tt>XLALCreateTokenList()</tt> parses <tt>*string</tt> as a
30  * sequence of tokens (substrings of non-null characters that do not
31  * appear in \c delimiters), separated by delimiters (substrings
32  * consisting only of characters that appear in \c delimiters), and
33  * terminated by the null character <tt>'\0'</tt>. The structure
34  * <tt>**list</tt> is created, storing the sequence of tokens as a list
35  * null-terminated character strings.
36  *
37  * The output \c list should be a non-\c NULL handle that points
38  * to the value \c NULL (i.e.\ \c list\f$\neq\f$\c NULL but
39  * <tt>*list</tt>=\c NULL). Even if no tokens were found, <tt>*list</tt>
40  * will be created, but will have <tt>(*list)->nTokens</tt>=0,
41  * <tt>(*list)->tokens[0]</tt>=\c NULL, and
42  * <tt>(*list)->list</tt>=\c NULL. Note that this is \e not an
43  * error, so the calling routine need not guarantee in advance that
44  * \c string contain any non-delimiter characters.
45  *
46  * The routine <tt>XLALDestroyTokenList()</tt> destroys a list of tokens as
47  * created by <tt>XLALCreateTokenList()</tt>, setting <tt>*list</tt> to
48  * \c NULL.
49  *
50  * ### Algorithm ###
51  *
52  * The <tt>XLALCreateTokenList()</tt> function is not particularly
53  * memory-efficient, requiring internal storage up to twice the length of
54  * <tt>*string</tt>. It first creates a working copy of
55  * <tt>string->data</tt>, and replaces all occurences of characters
56  * appearing in <tt>*delimiters</tt> with <tt>'\0'</tt>, while at the same
57  * time keeping track of the number and total length of all tokens. It
58  * then allocates a contiguous block of memory to store all the tokens
59  * (separated by and terminated with single <tt>'\0'</tt> characters), and
60  * a set of <tt>CHAR *</tt> pointers to point to the individual tokens in
61  * this block. Then the routine proceeds through the working copy one
62  * last time, copying tokens into the token list and setting the token
63  * pointers accordingly, before destroying the working copy.
64  *
65  */
66 
67 #include <string.h>
68 #include <lal/LALStdlib.h>
69 #include <lal/AVFactories.h>
70 #include <lal/StringInput.h>
71 
72 /**
73  * \deprecated Use XLALCreateTokenList() instead
74  */
75 void
77  TokenList ** list,
78  const CHAR * string, const CHAR * delimiters)
79 {
80  BOOLEAN delimiter = 1; /* whether current character is a delimiter */
81  UINT4 i = 0, j = 0; /* indecies */
82  UINT4 nTokens = 0; /* number of tokens */
83  UINT4 sLength; /* length of string */
84  UINT4 tLength = 0; /* length of token list */
85  CHAR *copy; /* working copy of token list */
86 
87  INITSTATUS(stat);
88  ATTATCHSTATUSPTR(stat);
89 
90  /* Check for valid input arguments. */
91  ASSERT(list, stat, STRINGINPUTH_ENUL, STRINGINPUTH_MSGENUL);
92  ASSERT(string, stat, STRINGINPUTH_ENUL, STRINGINPUTH_MSGENUL);
93  ASSERT(delimiters, stat, STRINGINPUTH_ENUL, STRINGINPUTH_MSGENUL);
94  ASSERT(!*list, stat, STRINGINPUTH_EOUT, STRINGINPUTH_MSGEOUT);
95 
96  /* Create working copy of token list. */
97  sLength = strlen(string) + 1;
98  if (!(copy = (CHAR *) LALMalloc(sLength * sizeof(CHAR)))) {
99  ABORT(stat, STRINGINPUTH_ENUL, STRINGINPUTH_MSGENUL);
100  }
101  for (i = 0; i < sLength; i++) {
102  CHAR c = string[i];
103  if (strchr(delimiters, c)) {
104  copy[i] = '\0';
105  delimiter = 1;
106  } else {
107  copy[i] = c;
108  tLength++;
109  if (delimiter) {
110  delimiter = 0;
111  nTokens++;
112  }
113  }
114  }
115 
116  /* Create the token list. */
117  if (!(*list = (TokenList *) LALMalloc(sizeof(TokenList)))) {
118  LALFree(copy);
119  ABORT(stat, STRINGINPUTH_ENUL, STRINGINPUTH_MSGENUL);
120  }
121  if (!((*list)->tokens =
122  (CHAR **) LALMalloc((nTokens + 1) * sizeof(CHAR *)))) {
123  LALFree(*list);
124  *list = NULL;
125  LALFree(copy);
126  ABORT(stat, STRINGINPUTH_ENUL, STRINGINPUTH_MSGENUL);
127  }
128  (*list)->nTokens = nTokens;
129  (*list)->list = NULL;
130 
131 
132  /* If tokens were found, copy them over and set up pointers. */
133  if (nTokens) {
134  CHAR *listData; /* pointer to token list data */
135  LALCHARCreateVector(stat->statusPtr, &((*list)->list),
136  nTokens + tLength);
137  BEGINFAIL(stat) {
138  LALFree((*list)->tokens);
139  LALFree(*list);
140  *list = NULL;
141  LALFree(copy);
142  }
143  ENDFAIL(stat);
144  listData = (*list)->list->data;
145  i = 0;
146  while (i < sLength) {
147  if (copy[i]) {
148  tLength = strlen(copy + i) + 1;
149  memcpy(listData, copy + i, tLength * sizeof(CHAR));
150  (*list)->tokens[j++] = listData;
151  i += tLength;
152  listData += tLength;
153  } else
154  i++;
155  }
156  }
157  (*list)->tokens[j] = NULL;
158 
159  /* Clean up and exit. */
160  LALFree(copy);
161  DETATCHSTATUSPTR(stat);
162  RETURN(stat);
163 }
164 
165 /** Split given input string into a list of 'tokens' separated by any
166  * of the characters given in 'delimiters'
167  */
168 int XLALCreateTokenList(TokenList ** list, //!< [out] list of tokens
169  const CHAR * string, //!< [in] string to split into tokens
170  const CHAR * delimiters //!< [in] set of token-delimiter characters
171  )
172 {
173  XLAL_CHECK((list != NULL) && ((*list) == NULL), XLAL_EINVAL);
174  XLAL_CHECK(string != NULL, XLAL_EINVAL);
175  XLAL_CHECK(delimiters != NULL, XLAL_EINVAL);
176 
177  // prepare output TokenList structure
178  TokenList *ret;
179  XLAL_CHECK((ret = XLALCalloc(1, sizeof(*ret))) != NULL, XLAL_ENOMEM);
180 
181  size_t stringLen = strlen(string);
182  if ((ret->list = XLALCreateCHARVector(stringLen + 1)) == NULL) {
183  XLALFree(ret);
185  }
186  strcpy(ret->list->data, string);
187 
188  // initialize pointers to walk along local copy of input string
189  char *ptr = ret->list->data;
190  const char *endPtr = ptr + stringLen;
191 
192  UINT4 nTokens = 0;
193  UINT4 nTokensAlloc = 0;
194 
195  while ((ptr != NULL) && (ptr < endPtr)) {
196  // skip and nuke delimiter
197  size_t skip = strspn(ptr, delimiters);
198  memset(ptr, 0, skip); // fill with '0'
199  ptr += skip;
200 
201  if (ptr >= endPtr) {
202  break;
203  }
204  // 'ptr' points at next token
205  nTokens++;
206 
207  // allocate next batch of token-pointers if required
208  if (nTokens > nTokensAlloc) {
209  nTokensAlloc = 2 * nTokens; // proceed by doubling current space
210  if ((ret->tokens = XLALRealloc(ret->tokens, nTokensAlloc * sizeof(char *))) == NULL) {
213  }
214  } // if nTokens > nTokensAlloc
215 
216  // enter new token-pointer into list
217  ret->tokens[nTokens - 1] = ptr;
218 
219  // advance to next delimiter
220  ptr = strpbrk(ptr, delimiters);
221 
222  } // while ptr < endPtr
223 
224  // reduce tokens-array to actual size
225  if ((ret->tokens = XLALRealloc(ret->tokens, nTokens * sizeof(char *))) == NULL) {
228  }
229  ret->nTokens = nTokens;
230 
231  // return result
232  (*list) = ret;
233 
234  return XLAL_SUCCESS;
235 
236 } // XLALCreateTokenList()
237 
238 /**
239  * \deprecated Use XLALDestroyTokenList() instead
240  */
242 {
243  INITSTATUS(stat);
244  ATTATCHSTATUSPTR(stat);
245 
246  /* Check for valid input arguments. */
247  ASSERT(list, stat, STRINGINPUTH_ENUL, STRINGINPUTH_MSGENUL);
248  ASSERT(*list, stat, STRINGINPUTH_ENUL, STRINGINPUTH_MSGENUL);
249 
250  /* Free everything and exit. */
251  if ((*list)->list) {
252  TRY(LALCHARDestroyVector(stat->statusPtr, &((*list)->list)), stat);
253  }
254  LALFree((*list)->tokens);
255  LALFree(*list);
256  *list = NULL;
257  DETATCHSTATUSPTR(stat);
258  RETURN(stat);
259 }
260 
261 /** See StringToken.c for documentation */
263 {
264  /* Free everything and exit. */
265  if (list) {
266  if (list->list)
268  XLALFree(list->tokens);
269  XLALFree(list);
270  }
271 }
#define LALMalloc(n)
Definition: LALMalloc.h:93
#define LALFree(p)
Definition: LALMalloc.h:96
#define ABORT(statusptr, code, mesg)
#define ENDFAIL(statusptr)
#define TRY(func, statusptr)
#define ATTATCHSTATUSPTR(statusptr)
#define ASSERT(assertion, statusptr, code, mesg)
#define DETATCHSTATUSPTR(statusptr)
#define INITSTATUS(statusptr)
#define RETURN(statusptr)
#define BEGINFAIL(statusptr)
void XLALDestroyTokenList(TokenList *list)
See StringToken.c for documentation.
Definition: StringToken.c:262
void LALDestroyTokenList(LALStatus *stat, TokenList **list)
Definition: StringToken.c:241
void LALCreateTokenList(LALStatus *stat, TokenList **list, const CHAR *string, const CHAR *delimiters)
Definition: StringToken.c:76
int XLALCreateTokenList(TokenList **list, const CHAR *string, const CHAR *delimiters)
Split given input string into a list of 'tokens' separated by any of the characters given in 'delimit...
Definition: StringToken.c:168
unsigned char BOOLEAN
Boolean logical type, see Headers LAL(Atomic)Datatypes.h for more details.
char CHAR
One-byte signed integer, see Headers LAL(Atomic)Datatypes.h for more details.
uint32_t UINT4
Four-byte unsigned integer.
#define XLALCalloc(m, n)
Definition: LALMalloc.h:45
#define XLALFree(p)
Definition: LALMalloc.h:47
#define XLALRealloc(p, n)
Definition: LALMalloc.h:46
#define STRINGINPUTH_ENUL
Unexpected null pointer in arguments.
Definition: StringInput.h:120
#define STRINGINPUTH_EOUT
Output handle points to a non-null pointer.
Definition: StringInput.h:121
void LALCHARCreateVector(LALStatus *, CHARVector **, UINT4)
void LALCHARDestroyVector(LALStatus *, CHARVector **)
CHARVector * XLALCreateCHARVector(UINT4 length)
void XLALDestroyCHARVector(CHARVector *vector)
#define XLAL_ERROR(...)
Macro to invoke a failure from a XLAL routine returning an integer.
Definition: XLALError.h:700
#define XLAL_CHECK(assertion,...)
Macro to test an assertion and invoke a failure if it is not true in a function that returns an integ...
Definition: XLALError.h:810
@ XLAL_ENOMEM
Memory allocation error.
Definition: XLALError.h:407
@ XLAL_SUCCESS
Success return value (not an error number)
Definition: XLALError.h:401
@ XLAL_EINVAL
Invalid argument.
Definition: XLALError.h:409
CHAR * data
Pointer to the data array.
Definition: LALDatatypes.h:78
LAL status structure, see The LALStatus structure for more details.
Definition: LALDatatypes.h:947
struct tagLALStatus * statusPtr
Pointer to the next node in the list; NULL if this function is not reporting a subroutine error.
Definition: LALDatatypes.h:954
This structure stores a number of null-terminated strings of arbitrary length.
Definition: StringInput.h:135
CHAR ** tokens
A list of pointers to the individual tokens; the elements tokens[0..nTokens-1] point to tokens,...
Definition: StringInput.h:137
CHARVector * list
The flattened list of tokens, separated by (and terminated with) '\0' characters.
Definition: StringInput.h:141
UINT4 nTokens
The number of tokens in the list.
Definition: StringInput.h:136