Loading [MathJax]/extensions/TeX/AMSsymbols.js
LAL 7.7.0.1-00ddc7f
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
cache.py
Go to the documentation of this file.
1# Copyright (C) 2013 Duncan Macleod
2# Copyright (C) 2016 Kipp Cannon
3#
4# This program is free software; you can redistribute it and/or modify it
5# under the terms of the GNU General Public License as published by the
6# Free Software Foundation; either version 3 of the License, or (at your
7# option) any later version.
8#
9# This program is distributed in the hope that it will be useful, but
10# WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
12# Public License for more details.
13#
14# You should have received a copy of the GNU General Public License along
15# with this program; if not, write to the Free Software Foundation, Inc.,
16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17from __future__ import print_function
18
19"""Modules extending the Cache file functionality from LAL
20"""
21
22import os
23import re
24import tempfile
25from functools import total_ordering
26from urllib.parse import (
27 urlparse,
28 urlunparse,
29)
30
31import igwn_segments as segments
32
33from .. import git_version
34from ..lal import CacheImport
35from ..lal import LIGOTimeGPS
36
37__author__ = "Duncan Macleod <duncan.macleod@ligo.org>"
38__version__ = git_version.id
39__date__ = git_version.date
40
41__all__ = ['CacheEntry', 'lalcache_from_gluecache']
42
44 """Convert a glue.lal.Cache object to a lal.Cache object.
45 Writes cache to temporary file and reads to Cache.
46
47 @param cache
48 LAL cache object from GLUE to convert
49 type cache glue.lal.Cache
50
51 @returns a lal.Cache object representing the same data
52 """
53 with tempfile.NamedTemporaryFile(delete=False, mode="w") as t:
54 cache = cache
55 for e in cache:
56 e.segment = type(e.segment)(int(e.segment[0]), int(e.segment[1]))
57 cache.tofile(t)
58 frcache = CacheImport(t.name)
59 os.remove(t.name)
60 return frcache
61
62
63#
64# Representation of a line in a LAL cache file
65#
66
67
68@total_ordering
69class CacheEntry(object):
70 """
71 A Python object representing one line in a LAL cache file.
72
73 The LAL cache format is defined elsewhere, and what follows is meant
74 only to be informative, not an official specification. Each line in a
75 LAL cache identifies a single file, and the line consists of five
76 columns of white-space delimited text.
77
78 The first column, "observatory", generally stores the name of an
79 observatory site or one or more instruments (preferably delimited by
80 ",", but often there is no delimiter between instrument names in which
81 case they should be 2 characters each).
82
83 The second column, "description", stores a short string tag that is
84 usually all capitals with "_" separating components, in the style of
85 the description part of the LIGO-Virgo frame filename format.
86
87 The third and fourth columns store the start time and duration in GPS
88 seconds of the interval spanned by the file identified by the cache
89 line. When the file does not start on an integer second or its
90 duration is not an integer number of seconds, the conventions of the
91 LIGO-Virgo frame filename format apply.
92
93 The fifth (last) column stores the file's URL.
94
95 The values for these columns are stored in the .observatory,
96 .description, .segment and .url attributes of instances of this class,
97 respectively. The .segment attribute stores a igwn_segments.segment
98 object describing the interval spanned by the file. Any of these
99 attributes except the URL is allowed to be None.
100
101 Example (parse a string):
102
103 >>> c = CacheEntry("H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
104 >>> c.scheme
105 'file'
106 >>> c.host
107 'localhost'
108
109 Example (one-liners to read and write a cache file):
110
111 >>> import os
112 >>> filename = "874000000-20000.cache"
113 >>> # adjustment for doctest in out-of-tree builds
114 >>> inname = os.path.join(os.environ.get("LAL_TEST_SRCDIR", "."), filename)
115 >>> # one-liner to read
116 >>> cache = list(map(CacheEntry, open(inname)))
117 >>> # one-liner to write
118 >>> print(*cache, sep = "\\n", file = open(filename + ".new", "w"))
119
120 Example (extract segmentlist dictionary from LAL cache):
121
122 >>> import igwn_segments as segments
123 >>> seglists = segments.segmentlistdict()
124 >>> for cacheentry in cache:
125 ... seglists |= cacheentry.segmentlistdict
126 ...
127
128 NOTE: the CacheEntry type defines a comparison operation and a
129 .__hash__() implementation, both of which disregard the URL. That is,
130 if two CacheEntry objects differ only by URL and otherwise have same
131 metadata, they are considered to be redundant copies of the same data.
132 For example, uniquification with a set() will retain only one redundant
133 copy, selected at random.
134
135 >>> x = CacheEntry("H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
136 >>> y = CacheEntry("H1 S5 815901601 576.5 gsiftp://data.server.org/bigpileofdata/H1-815901601-576.xml")
137 >>> x == y
138 True
139 >>> len(set((x, y)))
140 1
141
142 NOTE: this is a pure Python object providing an alternative
143 representation of the contents of a LAL cache file to the C
144 implementation in the LAL library proper. The two are not
145 interchangeable.
146
147 See also:
148
149 igwn_segments.utils..fromlalcache()
150 """
151 # How to parse a line in a LAL cache file. Five white-space
152 # delimited columns.
153 _regex = re.compile(r"\A\s*(?P<obs>\S+)\s+(?P<dsc>\S+)\s+(?P<strt>\S+)\s+(?P<dur>\S+)\s+(?P<url>\S+)\s*\Z")
154 _url_regex = re.compile(r"\A((.*/)*(?P<obs>[^/]+)-(?P<dsc>[^/]+)-(?P<strt>[^/]+)-(?P<dur>[^/\.]+)\.[^/]+)\Z")
155
156 def __init__(self, *args, **kwargs):
157 """
158 Intialize a CacheEntry object. The arguments can take two forms:
159 a single string argument, which is interpreted and parsed as a line
160 from a LAL cache file, or four arguments used to explicitly
161 initialize the observatory, description, segment and URL in that
162 order. When parsing a single line of text from a LAL cache, an
163 optional key-word argument "coltype" can be provided to set the
164 type the start and durations are parsed as. The default is
165 lal.LIGOTimeGPS.
166
167 Example:
168
169 >>> c = CacheEntry("H1", "S5", segments.segment(815901601, 815902177.5), "file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
170 >>> print(c.segment)
171 [815901601 ... 815902177.5)
172 >>> print(str(c))
173 H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml
174 >>> c = CacheEntry("H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
175 >>> print(c.segment)
176 [815901601 ... 815902177.5)
177 >>> print(CacheEntry("H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml", coltype = float).segment)
178 [815901601.0 ... 815902177.5)
179
180 See also the .from_T050017() class method for an
181 alternative initialization mechanism.
182 """
183 if len(args) == 1:
184 # parse line of text as an entry in a cache file
185 match = self._regex.search(args[0])
186 try:
187 match = match.groupdict()
188 except AttributeError:
189 raise ValueError("could not convert %s to CacheEntry" % repr(args[0]))
190 self.observatory = match["obs"]
191 self.description = match["dsc"]
192 # FIXME: remove typecasts when LIGOTimeGPS can be passed a unicode
193 start = str(match["strt"])
194 duration = str(match["dur"])
195 coltype = kwargs.pop("coltype", LIGOTimeGPS)
196 if start == "-" and duration == "-":
197 # no segment information
198 self.segment = None
199 else:
200 start = coltype(start)
201 self.segment = segments.segment(start, start + coltype(duration))
202 self.urlurlurl = match["url"]
203 if kwargs:
204 raise TypeError("unrecognized keyword arguments: %s" % ", ".join(kwargs))
205 elif len(args) == 4:
206 # parse arguments as observatory, description,
207 # segment, url
208 if kwargs:
209 raise TypeError("invalid arguments: %s" % ", ".join(kwargs))
210 self.observatory, self.description, self.segment, self.urlurlurl = args
211 else:
212 raise TypeError("invalid arguments: %s" % args)
213
214 # "-" indicates an empty column
215 if self.observatory == "-":
216 self.observatory = None
217 if self.description == "-":
218 self.description = None
219
220
221 def __str__(self):
222 """
223 Convert the CacheEntry to a string in the format of a line in a LAL
224 cache. Used to write the CacheEntry to a file.
225
226 Example:
227
228 >>> c = CacheEntry("H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
229 >>> str(c)
230 'H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml'
231 """
232 if self.segment is not None:
233 start = str(self.segment[0])
234 duration = str(abs(self.segment))
235 else:
236 start = "-"
237 duration = "-"
238 return "%s %s %s %s %s" % (self.observatory or "-", self.description or "-", start, duration, self.urlurlurl)
239
240 def __lt__(self, other):
241 """
242 Compare two CacheEntry objects by observatory, then description,
243 then segment. CacheEntry objects that have different URLs but for
244 which all other metadata are the same are considered to be
245 equivalent. If two entries differ only by their URL, they are
246 considered to be redundant copies of the same data, and by
247 comparing them as equal the Python sort operation (which is a
248 stable sort) will preserve their relative order. By preserving the
249 order of redundant copies, we allow the preference for the order in
250 which redundant copies are to be attempted to be conveyed by their
251 order in the list, and preserved.
252 """
253 if not isinstance(other, CacheEntry):
254 raise TypeError("can only compare CacheEntry to CacheEntry")
255 return (self.observatory, self.description, self.segment) < (other.observatory, other.description, other.segment)
256
257 def __eq__(self, other):
258 """
259 Compare two CacheEntry objects by observatory, then description,
260 then segment. CacheEntry objects that have different URLs but for
261 which all other metadata are the same are considered to be
262 equivalent. If two entries differ only by their URL, they are
263 considered to be redundant copies of the same data, and by
264 comparing them as equal the Python sort operation (which is a
265 stable sort) will preserve their relative order. By preserving the
266 order of redundant copies, we allow the preference for the order in
267 which redundant copies are to be attempted to be conveyed by their
268 order in the list, and preserved.
269 """
270 if not isinstance(other, CacheEntry):
271 raise TypeError("can only compare CacheEntry to CacheEntry")
272 return (self.observatory, self.description, self.segment) == (other.observatory, other.description, other.segment)
273
274 def __hash__(self):
275 """
276 CacheEntry objects are hashed by the tuple (observatory,
277 description, segment), i.e., the URL is disregarded.
278 """
279 return hash((self.observatory, self.description, self.segment))
280
281 @property
282 def url(self):
283 """
284 The cache entry's URL. The URL is constructed from the values of
285 the scheme, host, and path attributes. Assigning a value to the
286 URL attribute causes the value to be parsed and the scheme, host
287 and path attributes updated.
288 """
289 return urlunparse((self.scheme, self.host, self.path, None, None, None))
290
291 @url.setter
292 def url(self, url):
293 self.scheme, self.host, self.path = urlparse(url)[:3]
294
295 @property
296 def segmentlistdict(self):
297 """
298 A segmentlistdict object describing the instruments and time
299 spanned by this CacheEntry. A new object is constructed each time
300 this attribute is accessed (segments are immutable so there is no
301 reason to try to share a reference to the CacheEntry's internal
302 segment; modifications of one would not be reflected in the other
303 anyway).
304
305 Example:
306
307 >>> c = CacheEntry("H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
308 >>> c.segmentlistdict['H1']
309 [segment(LIGOTimeGPS(815901601, 0), LIGOTimeGPS(815902177, 500000000))]
310
311 The \"observatory\" column of the cache entry, which is frequently
312 used to store instrument names, is parsed into instrument names for
313 the dictionary keys using the same rules as
314 igwn_ligolw.lsctables.instrumentsproperty.get().
315
316 Example:
317
318 >>> c = CacheEntry("H1H2, S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1H2-815901601-576.xml")
319 >>> c.segmentlistdict['H1H2']
320 [segment(LIGOTimeGPS(815901601, 0), LIGOTimeGPS(815902177, 500000000))]
321 """
322 if self.observatory is None:
323 instruments = (None,)
324 else:
325 instruments = {obs for obs in map(str.strip, self.observatory.split(",")) if obs}
326 return segments.segmentlistdict((instrument, segments.segmentlist(self.segment is not None and [self.segment] or [])) for instrument in instruments)
327
328 @classmethod
329 def from_T050017(cls, url, coltype = LIGOTimeGPS):
330 """
331 Parse a URL in the style of T050017-00 into a CacheEntry. The
332 T050017-00 file name format is, essentially,
333
334 observatory-description-start-duration.extension
335
336 Example:
337
338 >>> c = CacheEntry.from_T050017("file://localhost/data/node144/frames/S5/strain-L2/LLO/L-L1_RDS_C03_L2-8365/L-L1_RDS_C03_L2-836562330-83.gwf")
339 >>> c.observatory
340 'L'
341 >>> c.host
342 'localhost'
343 >>> os.path.basename(c.path)
344 'L-L1_RDS_C03_L2-836562330-83.gwf'
345 """
346 match = cls._url_regex.search(url)
347 if not match:
348 raise ValueError("could not convert %s to CacheEntry" % repr(url))
349 observatory = match.group("obs")
350 description = match.group("dsc")
351 # FIXME: remove typecasts when LIGOTimeGPS can be passed a unicode
352 start = str(match.group("strt"))
353 duration = str(match.group("dur"))
354 if start == "-" and duration == "-":
355 # no segment information
356 segment = None
357 else:
358 segment = segments.segment(coltype(start), coltype(start) + coltype(duration))
359 return cls(observatory, description, segment, url)
static size_t hash(const char *s)
Definition: LALDict.c:51
A Python object representing one line in a LAL cache file.
Definition: cache.py:150
observatory
Definition: cache.py:190
segment
Definition: cache.py:198
def __lt__(self, other)
Compare two CacheEntry objects by observatory, then description, then segment.
Definition: cache.py:252
def segmentlistdict(self)
A segmentlistdict object describing the instruments and time spanned by this CacheEntry.
Definition: cache.py:321
_regex
Definition: cache.py:153
path
Definition: cache.py:293
description
Definition: cache.py:191
url
Definition: cache.py:202
def __init__(self, *args, **kwargs)
Intialize a CacheEntry object.
Definition: cache.py:182
def url(self, url)
Definition: cache.py:292
def from_T050017(cls, url, coltype=LIGOTimeGPS)
Parse a URL in the style of T050017-00 into a CacheEntry.
Definition: cache.py:345
_url_regex
Definition: cache.py:154
def __str__(self)
Convert the CacheEntry to a string in the format of a line in a LAL cache.
Definition: cache.py:231
def __eq__(self, other)
Compare two CacheEntry objects by observatory, then description, then segment.
Definition: cache.py:269
def url(self)
The cache entry's URL.
Definition: cache.py:288
def __hash__(self)
CacheEntry objects are hashed by the tuple (observatory, description, segment), i....
Definition: cache.py:278
def lalcache_from_gluecache(cache)
Convert a glue.lal.Cache object to a lal.Cache object.
Definition: cache.py:52