17from __future__
import print_function
19"""Modules extending the Cache file functionality from LAL
25from functools
import total_ordering
26from urllib.parse
import (
31import igwn_segments
as segments
33from ..
import git_version
34from ..lal
import CacheImport
35from ..lal
import LIGOTimeGPS
37__author__ =
"Duncan Macleod <duncan.macleod@ligo.org>"
38__version__ = git_version.id
39__date__ = git_version.date
41__all__ = [
'CacheEntry',
'lalcache_from_gluecache']
44 """Convert a glue.lal.Cache object to a lal.Cache object.
45 Writes cache to temporary file and reads to Cache.
48 LAL cache object
from GLUE to convert
49 type cache glue.lal.Cache
51 @returns a lal.Cache object representing the same data
53 with tempfile.NamedTemporaryFile(delete=
False, mode=
"w")
as t:
56 e.segment = type(e.segment)(int(e.segment[0]), int(e.segment[1]))
58 frcache = CacheImport(t.name)
71 A Python object representing one line in a LAL cache file.
73 The LAL cache format
is defined elsewhere,
and what follows
is meant
74 only to be informative,
not an official specification. Each line
in a
75 LAL cache identifies a single file,
and the line consists of five
76 columns of white-space delimited text.
78 The first column,
"observatory", generally stores the name of an
79 observatory site
or one
or more instruments (preferably delimited by
80 ",", but often there
is no delimiter between instrument names
in which
81 case they should be 2 characters each).
83 The second column,
"description", stores a short string tag that
is
84 usually all capitals
with "_" separating components,
in the style of
85 the description part of the LIGO-Virgo frame filename format.
87 The third
and fourth columns store the start time
and duration
in GPS
88 seconds of the interval spanned by the file identified by the cache
89 line. When the file does
not start on an integer second
or its
90 duration
is not an integer number of seconds, the conventions of the
91 LIGO-Virgo frame filename format apply.
93 The fifth (last) column stores the file
's URL.
95 The values for these columns are stored
in the .observatory,
96 .description, .segment
and .url attributes of instances of this
class,
97 respectively. The .segment attribute stores a igwn_segments.segment
98 object describing the interval spanned by the file. Any of these
99 attributes
except the URL
is allowed to be
None.
101 Example (parse a string):
103 >>> c =
CacheEntry(
"H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
109 Example (one-liners to read
and write a cache file):
112 >>> filename =
"874000000-20000.cache"
114 >>> inname = os.path.join(os.environ.get(
"LAL_TEST_SRCDIR",
"."), filename)
116 >>> cache = list(map(CacheEntry, open(inname)))
118 >>> print(*cache, sep =
"\\n", file = open(filename +
".new",
"w"))
120 Example (extract segmentlist dictionary
from LAL cache):
122 >>>
import igwn_segments
as segments
123 >>> seglists = segments.segmentlistdict()
124 >>>
for cacheentry
in cache:
125 ... seglists |= cacheentry.segmentlistdict
128 NOTE: the CacheEntry type defines a comparison operation
and a
129 .__hash__() implementation, both of which disregard the URL. That
is,
130 if two CacheEntry objects differ only by URL
and otherwise have same
131 metadata, they are considered to be redundant copies of the same data.
132 For example, uniquification
with a set() will retain only one redundant
133 copy, selected at random.
135 >>> x =
CacheEntry(
"H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
136 >>> y =
CacheEntry(
"H1 S5 815901601 576.5 gsiftp://data.server.org/bigpileofdata/H1-815901601-576.xml")
142 NOTE: this
is a pure Python object providing an alternative
143 representation of the contents of a LAL cache file to the C
144 implementation
in the LAL library proper. The two are
not
149 igwn_segments.utils..fromlalcache()
153 _regex = re.compile(
r"\A\s*(?P<obs>\S+)\s+(?P<dsc>\S+)\s+(?P<strt>\S+)\s+(?P<dur>\S+)\s+(?P<url>\S+)\s*\Z")
154 _url_regex = re.compile(
r"\A((.*/)*(?P<obs>[^/]+)-(?P<dsc>[^/]+)-(?P<strt>[^/]+)-(?P<dur>[^/\.]+)\.[^/]+)\Z")
156 def __init__(self, *args, **kwargs):
158 Intialize a CacheEntry object. The arguments can take two forms:
159 a single string argument, which is interpreted
and parsed
as a line
160 from a LAL cache file,
or four arguments used to explicitly
161 initialize the observatory, description, segment
and URL
in that
162 order. When parsing a single line of text
from a LAL cache, an
163 optional key-word argument
"coltype" can be provided to set the
164 type the start
and durations are parsed
as. The default
is
169 >>> c =
CacheEntry(
"H1",
"S5", segments.segment(815901601, 815902177.5),
"file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
171 [815901601 ... 815902177.5)
173 H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml
174 >>> c =
CacheEntry(
"H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
176 [815901601 ... 815902177.5)
177 >>> print(
CacheEntry(
"H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml", coltype = float).segment)
178 [815901601.0 ... 815902177.5)
181 alternative initialization mechanism.
185 match = self.
_regex.search(args[0])
187 match = match.groupdict()
188 except AttributeError:
189 raise ValueError(
"could not convert %s to CacheEntry" % repr(args[0]))
193 start = str(match[
"strt"])
194 duration = str(match[
"dur"])
195 coltype = kwargs.pop(
"coltype", LIGOTimeGPS)
196 if start ==
"-" and duration ==
"-":
200 start = coltype(start)
201 self.
segment = segments.segment(start, start + coltype(duration))
204 raise TypeError(
"unrecognized keyword arguments: %s" %
", ".join(kwargs))
209 raise TypeError(
"invalid arguments: %s" %
", ".join(kwargs))
212 raise TypeError(
"invalid arguments: %s" % args)
223 Convert the CacheEntry to a string in the format of a line
in a LAL
224 cache. Used to write the CacheEntry to a file.
228 >>> c =
CacheEntry(
"H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
230 'H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml'
234 duration = str(abs(self.
segment))
242 Compare two CacheEntry objects by observatory, then description,
243 then segment. CacheEntry objects that have different URLs but for
244 which all other metadata are the same are considered to be
245 equivalent. If two entries differ only by their URL, they are
246 considered to be redundant copies of the same data,
and by
247 comparing them
as equal the Python sort operation (which
is a
248 stable sort) will preserve their relative order. By preserving the
249 order of redundant copies, we allow the preference
for the order
in
250 which redundant copies are to be attempted to be conveyed by their
251 order
in the list,
and preserved.
253 if not isinstance(other, CacheEntry):
254 raise TypeError(
"can only compare CacheEntry to CacheEntry")
259 Compare two CacheEntry objects by observatory, then description,
260 then segment. CacheEntry objects that have different URLs but for
261 which all other metadata are the same are considered to be
262 equivalent. If two entries differ only by their URL, they are
263 considered to be redundant copies of the same data,
and by
264 comparing them
as equal the Python sort operation (which
is a
265 stable sort) will preserve their relative order. By preserving the
266 order of redundant copies, we allow the preference
for the order
in
267 which redundant copies are to be attempted to be conveyed by their
268 order
in the list,
and preserved.
270 if not isinstance(other, CacheEntry):
271 raise TypeError(
"can only compare CacheEntry to CacheEntry")
276 CacheEntry objects are hashed by the tuple (observatory,
277 description, segment), i.e., the URL is disregarded.
284 The cache entry's URL. The URL is constructed from the values of
285 the scheme, host, and path attributes. Assigning a value to the
286 URL attribute causes the value to be parsed
and the scheme, host
287 and path attributes updated.
289 return urlunparse((self.scheme, self.host, self.
path,
None,
None,
None))
293 self.scheme, self.host, self.
path = urlparse(url)[:3]
298 A segmentlistdict object describing the instruments and time
299 spanned by this CacheEntry. A new object
is constructed each time
300 this attribute
is accessed (segments are immutable so there
is no
301 reason to
try to share a reference to the CacheEntry
's internal
302 segment; modifications of one would not be reflected
in the other
307 >>> c =
CacheEntry(
"H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
308 >>> c.segmentlistdict[
'H1']
309 [
segment(LIGOTimeGPS(815901601, 0), LIGOTimeGPS(815902177, 500000000))]
311 The \
"observatory\" column of the cache entry, which is frequently
312 used to store instrument names, is parsed into instrument names
for
313 the dictionary keys using the same rules
as
314 igwn_ligolw.lsctables.instrumentsproperty.get().
318 >>> c =
CacheEntry(
"H1H2, S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1H2-815901601-576.xml")
319 >>> c.segmentlistdict[
'H1H2']
320 [
segment(LIGOTimeGPS(815901601, 0), LIGOTimeGPS(815902177, 500000000))]
323 instruments = (
None,)
325 instruments = {obs
for obs
in map(str.strip, self.
observatory.split(
","))
if obs}
326 return segments.segmentlistdict((instrument, segments.segmentlist(self.
segment is not None and [self.
segment]
or []))
for instrument
in instruments)
331 Parse a URL in the style of T050017-00 into a CacheEntry. The
332 T050017-00 file name format
is, essentially,
334 observatory-description-start-duration.extension
338 >>> c = CacheEntry.from_T050017(
"file://localhost/data/node144/frames/S5/strain-L2/LLO/L-L1_RDS_C03_L2-8365/L-L1_RDS_C03_L2-836562330-83.gwf")
343 >>> os.path.basename(c.path)
344 'L-L1_RDS_C03_L2-836562330-83.gwf'
348 raise ValueError(
"could not convert %s to CacheEntry" % repr(url))
349 observatory = match.group(
"obs")
350 description = match.group(
"dsc")
352 start = str(match.group(
"strt"))
353 duration = str(match.group(
"dur"))
354 if start ==
"-" and duration ==
"-":
358 segment = segments.segment(coltype(start), coltype(start) + coltype(duration))
359 return cls(observatory, description, segment, url)
static size_t hash(const char *s)
A Python object representing one line in a LAL cache file.
def __lt__(self, other)
Compare two CacheEntry objects by observatory, then description, then segment.
def segmentlistdict(self)
A segmentlistdict object describing the instruments and time spanned by this CacheEntry.
def __init__(self, *args, **kwargs)
Intialize a CacheEntry object.
def from_T050017(cls, url, coltype=LIGOTimeGPS)
Parse a URL in the style of T050017-00 into a CacheEntry.
def __str__(self)
Convert the CacheEntry to a string in the format of a line in a LAL cache.
def __eq__(self, other)
Compare two CacheEntry objects by observatory, then description, then segment.
def url(self)
The cache entry's URL.
def __hash__(self)
CacheEntry objects are hashed by the tuple (observatory, description, segment), i....
def lalcache_from_gluecache(cache)
Convert a glue.lal.Cache object to a lal.Cache object.