LAL  7.5.0.1-b72065a
cache.py
Go to the documentation of this file.
1 # Copyright (C) 2013 Duncan Macleod
2 # Copyright (C) 2016 Kipp Cannon
3 #
4 # This program is free software; you can redistribute it and/or modify it
5 # under the terms of the GNU General Public License as published by the
6 # Free Software Foundation; either version 3 of the License, or (at your
7 # option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful, but
10 # WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
12 # Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License along
15 # with this program; if not, write to the Free Software Foundation, Inc.,
16 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 from __future__ import print_function
18 
19 """Modules extending the Cache file functionality from LAL
20 """
21 
22 import os
23 import re
24 import tempfile
25 from functools import total_ordering
26 from urllib.parse import (
27  urlparse,
28  urlunparse,
29 )
30 
31 from ligo import segments
32 
33 from .. import git_version
34 from ..lal import CacheImport
35 from ..lal import LIGOTimeGPS
36 
37 __author__ = "Duncan Macleod <duncan.macleod@ligo.org>"
38 __version__ = git_version.id
39 __date__ = git_version.date
40 
41 __all__ = ['CacheEntry', 'lalcache_from_gluecache']
42 
43 def lalcache_from_gluecache(cache):
44  """Convert a glue.lal.Cache object to a lal.Cache object.
45  Writes cache to temporary file and reads to Cache.
46 
47  @param cache
48  LAL cache object from GLUE to convert
49  type cache glue.lal.Cache
50 
51  @returns a lal.Cache object representing the same data
52  """
53  with tempfile.NamedTemporaryFile(delete=False, mode="w") as t:
54  cache = cache
55  for e in cache:
56  e.segment = type(e.segment)(int(e.segment[0]), int(e.segment[1]))
57  cache.tofile(t)
58  frcache = CacheImport(t.name)
59  os.remove(t.name)
60  return frcache
61 
62 
63 #
64 # Representation of a line in a LAL cache file
65 #
66 
67 
68 @total_ordering
69 class CacheEntry(object):
70  """
71  A Python object representing one line in a LAL cache file.
72 
73  The LAL cache format is defined elsewhere, and what follows is meant
74  only to be informative, not an official specification. Each line in a
75  LAL cache identifies a single file, and the line consists of five
76  columns of white-space delimited text.
77 
78  The first column, "observatory", generally stores the name of an
79  observatory site or one or more instruments (preferably delimited by
80  ",", but often there is no delimiter between instrument names in which
81  case they should be 2 characters each).
82 
83  The second column, "description", stores a short string tag that is
84  usually all capitals with "_" separating components, in the style of
85  the description part of the LIGO-Virgo frame filename format.
86 
87  The third and fourth columns store the start time and duration in GPS
88  seconds of the interval spanned by the file identified by the cache
89  line. When the file does not start on an integer second or its
90  duration is not an integer number of seconds, the conventions of the
91  LIGO-Virgo frame filename format apply.
92 
93  The fifth (last) column stores the file's URL.
94 
95  The values for these columns are stored in the .observatory,
96  .description, .segment and .url attributes of instances of this class,
97  respectively. The .segment attribute stores a ligo.segments.segment
98  object describing the interval spanned by the file. Any of these
99  attributes except the URL is allowed to be None.
100 
101  Example (parse a string):
102 
103  >>> c = CacheEntry("H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
104  >>> c.scheme
105  'file'
106  >>> c.host
107  'localhost'
108 
109  Example (one-liners to read and write a cache file):
110 
111  >>> import os
112  >>> filename = "874000000-20000.cache"
113  >>> # adjustment for doctest in out-of-tree builds
114  >>> inname = os.path.join(os.environ.get("LAL_TEST_SRCDIR", "."), filename)
115  >>> # one-liner to read
116  >>> cache = list(map(CacheEntry, open(inname)))
117  >>> # one-liner to write
118  >>> print(*cache, sep = "\\n", file = open(filename + ".new", "w"))
119 
120  Example (extract segmentlist dictionary from LAL cache):
121 
122  >>> from ligo import segments
123  >>> seglists = segments.segmentlistdict()
124  >>> for cacheentry in cache:
125  ... seglists |= cacheentry.segmentlistdict
126  ...
127 
128  NOTE: the CacheEntry type defines a comparison operation and a
129  .__hash__() implementation, both of which disregard the URL. That is,
130  if two CacheEntry objects differ only by URL and otherwise have same
131  metadata, they are considered to be redundant copies of the same data.
132  For example, uniquification with a set() will retain only one redundant
133  copy, selected at random.
134 
135  >>> x = CacheEntry("H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
136  >>> y = CacheEntry("H1 S5 815901601 576.5 gsiftp://data.server.org/bigpileofdata/H1-815901601-576.xml")
137  >>> x == y
138  True
139  >>> len(set((x, y)))
140  1
141 
142  NOTE: this is a pure Python object providing an alternative
143  representation of the contents of a LAL cache file to the C
144  implementation in the LAL library proper. The two are not
145  interchangeable.
146 
147  See also:
148 
149  ligo.segments.utils..fromlalcache()
150  """
151  # How to parse a line in a LAL cache file. Five white-space
152  # delimited columns.
153  _regex = re.compile(r"\A\s*(?P<obs>\S+)\s+(?P<dsc>\S+)\s+(?P<strt>\S+)\s+(?P<dur>\S+)\s+(?P<url>\S+)\s*\Z")
154  _url_regex = re.compile(r"\A((.*/)*(?P<obs>[^/]+)-(?P<dsc>[^/]+)-(?P<strt>[^/]+)-(?P<dur>[^/\.]+)\.[^/]+)\Z")
155 
156  def __init__(self, *args, **kwargs):
157  """
158  Intialize a CacheEntry object. The arguments can take two forms:
159  a single string argument, which is interpreted and parsed as a line
160  from a LAL cache file, or four arguments used to explicitly
161  initialize the observatory, description, segment and URL in that
162  order. When parsing a single line of text from a LAL cache, an
163  optional key-word argument "coltype" can be provided to set the
164  type the start and durations are parsed as. The default is
165  lal.LIGOTimeGPS.
166 
167  Example:
168 
169  >>> c = CacheEntry("H1", "S5", segments.segment(815901601, 815902177.5), "file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
170  >>> print(c.segment)
171  [815901601 ... 815902177.5)
172  >>> print(str(c))
173  H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml
174  >>> c = CacheEntry("H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
175  >>> print(c.segment)
176  [815901601 ... 815902177.5)
177  >>> print(CacheEntry("H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml", coltype = float).segment)
178  [815901601.0 ... 815902177.5)
179 
180  See also the .from_T050017() class method for an
181  alternative initialization mechanism.
182  """
183  if len(args) == 1:
184  # parse line of text as an entry in a cache file
185  match = self._regex_regex.search(args[0])
186  try:
187  match = match.groupdict()
188  except AttributeError:
189  raise ValueError("could not convert %s to CacheEntry" % repr(args[0]))
190  self.observatoryobservatory = match["obs"]
191  self.descriptiondescription = match["dsc"]
192  # FIXME: remove typecasts when LIGOTimeGPS can be passed a unicode
193  start = str(match["strt"])
194  duration = str(match["dur"])
195  coltype = kwargs.pop("coltype", LIGOTimeGPS)
196  if start == "-" and duration == "-":
197  # no segment information
198  self.segmentsegment = None
199  else:
200  start = coltype(start)
201  self.segmentsegment = segments.segment(start, start + coltype(duration))
202  self.urlurlurlurl = match["url"]
203  if kwargs:
204  raise TypeError("unrecognized keyword arguments: %s" % ", ".join(kwargs))
205  elif len(args) == 4:
206  # parse arguments as observatory, description,
207  # segment, url
208  if kwargs:
209  raise TypeError("invalid arguments: %s" % ", ".join(kwargs))
210  self.observatoryobservatory, self.descriptiondescription, self.segmentsegment, self.urlurlurlurl = args
211  else:
212  raise TypeError("invalid arguments: %s" % args)
213 
214  # "-" indicates an empty column
215  if self.observatoryobservatory == "-":
216  self.observatoryobservatory = None
217  if self.descriptiondescription == "-":
218  self.descriptiondescription = None
219 
220 
221  def __str__(self):
222  """
223  Convert the CacheEntry to a string in the format of a line in a LAL
224  cache. Used to write the CacheEntry to a file.
225 
226  Example:
227 
228  >>> c = CacheEntry("H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
229  >>> str(c)
230  'H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml'
231  """
232  if self.segmentsegment is not None:
233  start = str(self.segmentsegment[0])
234  duration = str(abs(self.segmentsegment))
235  else:
236  start = "-"
237  duration = "-"
238  return "%s %s %s %s %s" % (self.observatoryobservatory or "-", self.descriptiondescription or "-", start, duration, self.urlurlurlurl)
239 
240  def __lt__(self, other):
241  """
242  Compare two CacheEntry objects by observatory, then description,
243  then segment. CacheEntry objects that have different URLs but for
244  which all other metadata are the same are considered to be
245  equivalent. If two entries differ only by their URL, they are
246  considered to be redundant copies of the same data, and by
247  comparing them as equal the Python sort operation (which is a
248  stable sort) will preserve their relative order. By preserving the
249  order of redundant copies, we allow the preference for the order in
250  which redundant copies are to be attempted to be conveyed by their
251  order in the list, and preserved.
252  """
253  if not isinstance(other, CacheEntry):
254  raise TypeError("can only compare CacheEntry to CacheEntry")
255  return (self.observatoryobservatory, self.descriptiondescription, self.segmentsegment) < (other.observatory, other.description, other.segment)
256 
257  def __eq__(self, other):
258  """
259  Compare two CacheEntry objects by observatory, then description,
260  then segment. CacheEntry objects that have different URLs but for
261  which all other metadata are the same are considered to be
262  equivalent. If two entries differ only by their URL, they are
263  considered to be redundant copies of the same data, and by
264  comparing them as equal the Python sort operation (which is a
265  stable sort) will preserve their relative order. By preserving the
266  order of redundant copies, we allow the preference for the order in
267  which redundant copies are to be attempted to be conveyed by their
268  order in the list, and preserved.
269  """
270  if not isinstance(other, CacheEntry):
271  raise TypeError("can only compare CacheEntry to CacheEntry")
272  return (self.observatoryobservatory, self.descriptiondescription, self.segmentsegment) == (other.observatory, other.description, other.segment)
273 
274  def __hash__(self):
275  """
276  CacheEntry objects are hashed by the tuple (observatory,
277  description, segment), i.e., the URL is disregarded.
278  """
279  return hash((self.observatoryobservatory, self.descriptiondescription, self.segmentsegment))
280 
281  @property
282  def url(self):
283  """
284  The cache entry's URL. The URL is constructed from the values of
285  the scheme, host, and path attributes. Assigning a value to the
286  URL attribute causes the value to be parsed and the scheme, host
287  and path attributes updated.
288  """
289  return urlunparse((self.scheme, self.host, self.pathpath, None, None, None))
290 
291  @url.setter
292  def url(self, url):
293  self.scheme, self.host, self.pathpath = urlparse(url)[:3]
294 
295  @property
296  def segmentlistdict(self):
297  """
298  A segmentlistdict object describing the instruments and time
299  spanned by this CacheEntry. A new object is constructed each time
300  this attribute is accessed (segments are immutable so there is no
301  reason to try to share a reference to the CacheEntry's internal
302  segment; modifications of one would not be reflected in the other
303  anyway).
304 
305  Example:
306 
307  >>> c = CacheEntry("H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
308  >>> c.segmentlistdict['H1']
309  [segment(LIGOTimeGPS(815901601, 0), LIGOTimeGPS(815902177, 500000000))]
310 
311  The \"observatory\" column of the cache entry, which is frequently
312  used to store instrument names, is parsed into instrument names for
313  the dictionary keys using the same rules as
314  ligo.lw.lsctables.instrumentsproperty.get().
315 
316  Example:
317 
318  >>> c = CacheEntry("H1H2, S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1H2-815901601-576.xml")
319  >>> c.segmentlistdict['H1H2']
320  [segment(LIGOTimeGPS(815901601, 0), LIGOTimeGPS(815902177, 500000000))]
321  """
322  if self.observatoryobservatory is None:
323  instruments = (None,)
324  else:
325  instruments = {obs for obs in map(str.strip, self.observatoryobservatory.split(",")) if obs}
326  return segments.segmentlistdict((instrument, segments.segmentlist(self.segmentsegment is not None and [self.segmentsegment] or [])) for instrument in instruments)
327 
328  @classmethod
329  def from_T050017(cls, url, coltype = LIGOTimeGPS):
330  """
331  Parse a URL in the style of T050017-00 into a CacheEntry. The
332  T050017-00 file name format is, essentially,
333 
334  observatory-description-start-duration.extension
335 
336  Example:
337 
338  >>> c = CacheEntry.from_T050017("file://localhost/data/node144/frames/S5/strain-L2/LLO/L-L1_RDS_C03_L2-8365/L-L1_RDS_C03_L2-836562330-83.gwf")
339  >>> c.observatory
340  'L'
341  >>> c.host
342  'localhost'
343  >>> os.path.basename(c.path)
344  'L-L1_RDS_C03_L2-836562330-83.gwf'
345  """
346  match = cls._url_regex_url_regex.search(url)
347  if not match:
348  raise ValueError("could not convert %s to CacheEntry" % repr(url))
349  observatory = match.group("obs")
350  description = match.group("dsc")
351  # FIXME: remove typecasts when LIGOTimeGPS can be passed a unicode
352  start = str(match.group("strt"))
353  duration = str(match.group("dur"))
354  if start == "-" and duration == "-":
355  # no segment information
356  segment = None
357  else:
358  segment = segments.segment(coltype(start), coltype(start) + coltype(duration))
359  return cls(observatory, description, segment, url)
static size_t hash(const char *s)
Definition: LALDict.c:51
A Python object representing one line in a LAL cache file.
Definition: cache.py:150
observatory
Definition: cache.py:190
segment
Definition: cache.py:198
def __lt__(self, other)
Compare two CacheEntry objects by observatory, then description, then segment.
Definition: cache.py:252
def segmentlistdict(self)
A segmentlistdict object describing the instruments and time spanned by this CacheEntry.
Definition: cache.py:321
_regex
Definition: cache.py:153
path
Definition: cache.py:293
description
Definition: cache.py:191
url
Definition: cache.py:202
def __init__(self, *args, **kwargs)
Intialize a CacheEntry object.
Definition: cache.py:182
def url(self, url)
Definition: cache.py:292
def from_T050017(cls, url, coltype=LIGOTimeGPS)
Parse a URL in the style of T050017-00 into a CacheEntry.
Definition: cache.py:345
_url_regex
Definition: cache.py:154
def __str__(self)
Convert the CacheEntry to a string in the format of a line in a LAL cache.
Definition: cache.py:231
def __eq__(self, other)
Compare two CacheEntry objects by observatory, then description, then segment.
Definition: cache.py:269
def url(self)
The cache entry's URL.
Definition: cache.py:288
def __hash__(self)
CacheEntry objects are hashed by the tuple (observatory, description, segment), i....
Definition: cache.py:278
def lalcache_from_gluecache(cache)
Convert a glue.lal.Cache object to a lal.Cache object.
Definition: cache.py:52