17 from __future__
import print_function
19 """Modules extending the Cache file functionality from LAL
25 from functools
import total_ordering
26 from urllib.parse
import (
31 from ligo
import segments
33 from ..
import git_version
34 from ..lal
import CacheImport
35 from ..lal
import LIGOTimeGPS
37 __author__ =
"Duncan Macleod <duncan.macleod@ligo.org>"
38 __version__ = git_version.id
39 __date__ = git_version.date
41 __all__ = [
'CacheEntry',
'lalcache_from_gluecache']
44 """Convert a glue.lal.Cache object to a lal.Cache object.
45 Writes cache to temporary file and reads to Cache.
48 LAL cache object from GLUE to convert
49 type cache glue.lal.Cache
51 @returns a lal.Cache object representing the same data
53 with tempfile.NamedTemporaryFile(delete=
False, mode=
"w")
as t:
56 e.segment = type(e.segment)(int(e.segment[0]), int(e.segment[1]))
58 frcache = CacheImport(t.name)
71 A Python object representing one line in a LAL cache file.
73 The LAL cache format is defined elsewhere, and what follows is meant
74 only to be informative, not an official specification. Each line in a
75 LAL cache identifies a single file, and the line consists of five
76 columns of white-space delimited text.
78 The first column, "observatory", generally stores the name of an
79 observatory site or one or more instruments (preferably delimited by
80 ",", but often there is no delimiter between instrument names in which
81 case they should be 2 characters each).
83 The second column, "description", stores a short string tag that is
84 usually all capitals with "_" separating components, in the style of
85 the description part of the LIGO-Virgo frame filename format.
87 The third and fourth columns store the start time and duration in GPS
88 seconds of the interval spanned by the file identified by the cache
89 line. When the file does not start on an integer second or its
90 duration is not an integer number of seconds, the conventions of the
91 LIGO-Virgo frame filename format apply.
93 The fifth (last) column stores the file's URL.
95 The values for these columns are stored in the .observatory,
96 .description, .segment and .url attributes of instances of this class,
97 respectively. The .segment attribute stores a ligo.segments.segment
98 object describing the interval spanned by the file. Any of these
99 attributes except the URL is allowed to be None.
101 Example (parse a string):
103 >>> c = CacheEntry("H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
109 Example (one-liners to read and write a cache file):
112 >>> filename = "874000000-20000.cache"
113 >>> # adjustment for doctest in out-of-tree builds
114 >>> inname = os.path.join(os.environ.get("LAL_TEST_SRCDIR", "."), filename)
115 >>> # one-liner to read
116 >>> cache = list(map(CacheEntry, open(inname)))
117 >>> # one-liner to write
118 >>> print(*cache, sep = "\\n", file = open(filename + ".new", "w"))
120 Example (extract segmentlist dictionary from LAL cache):
122 >>> from ligo import segments
123 >>> seglists = segments.segmentlistdict()
124 >>> for cacheentry in cache:
125 ... seglists |= cacheentry.segmentlistdict
128 NOTE: the CacheEntry type defines a comparison operation and a
129 .__hash__() implementation, both of which disregard the URL. That is,
130 if two CacheEntry objects differ only by URL and otherwise have same
131 metadata, they are considered to be redundant copies of the same data.
132 For example, uniquification with a set() will retain only one redundant
133 copy, selected at random.
135 >>> x = CacheEntry("H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
136 >>> y = CacheEntry("H1 S5 815901601 576.5 gsiftp://data.server.org/bigpileofdata/H1-815901601-576.xml")
142 NOTE: this is a pure Python object providing an alternative
143 representation of the contents of a LAL cache file to the C
144 implementation in the LAL library proper. The two are not
149 ligo.segments.utils..fromlalcache()
153 _regex = re.compile(
r"\A\s*(?P<obs>\S+)\s+(?P<dsc>\S+)\s+(?P<strt>\S+)\s+(?P<dur>\S+)\s+(?P<url>\S+)\s*\Z")
154 _url_regex = re.compile(
r"\A((.*/)*(?P<obs>[^/]+)-(?P<dsc>[^/]+)-(?P<strt>[^/]+)-(?P<dur>[^/\.]+)\.[^/]+)\Z")
156 def __init__(self, *args, **kwargs):
158 Intialize a CacheEntry object. The arguments can take two forms:
159 a single string argument, which is interpreted and parsed as a line
160 from a LAL cache file, or four arguments used to explicitly
161 initialize the observatory, description, segment and URL in that
162 order. When parsing a single line of text from a LAL cache, an
163 optional key-word argument "coltype" can be provided to set the
164 type the start and durations are parsed as. The default is
169 >>> c = CacheEntry("H1", "S5", segments.segment(815901601, 815902177.5), "file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
171 [815901601 ... 815902177.5)
173 H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml
174 >>> c = CacheEntry("H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
176 [815901601 ... 815902177.5)
177 >>> print(CacheEntry("H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml", coltype = float).segment)
178 [815901601.0 ... 815902177.5)
180 See also the .from_T050017() class method for an
181 alternative initialization mechanism.
185 match = self.
_regex_regex.search(args[0])
187 match = match.groupdict()
188 except AttributeError:
189 raise ValueError(
"could not convert %s to CacheEntry" % repr(args[0]))
193 start = str(match[
"strt"])
194 duration = str(match[
"dur"])
195 coltype = kwargs.pop(
"coltype", LIGOTimeGPS)
196 if start ==
"-" and duration ==
"-":
200 start = coltype(start)
201 self.
segmentsegment = segments.segment(start, start + coltype(duration))
204 raise TypeError(
"unrecognized keyword arguments: %s" %
", ".join(kwargs))
209 raise TypeError(
"invalid arguments: %s" %
", ".join(kwargs))
212 raise TypeError(
"invalid arguments: %s" % args)
223 Convert the CacheEntry to a string in the format of a line in a LAL
224 cache. Used to write the CacheEntry to a file.
228 >>> c = CacheEntry("H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
230 'H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml'
232 if self.
segmentsegment
is not None:
233 start = str(self.
segmentsegment[0])
234 duration = str(abs(self.
segmentsegment))
242 Compare two CacheEntry objects by observatory, then description,
243 then segment. CacheEntry objects that have different URLs but for
244 which all other metadata are the same are considered to be
245 equivalent. If two entries differ only by their URL, they are
246 considered to be redundant copies of the same data, and by
247 comparing them as equal the Python sort operation (which is a
248 stable sort) will preserve their relative order. By preserving the
249 order of redundant copies, we allow the preference for the order in
250 which redundant copies are to be attempted to be conveyed by their
251 order in the list, and preserved.
253 if not isinstance(other, CacheEntry):
254 raise TypeError(
"can only compare CacheEntry to CacheEntry")
255 return (self.
observatoryobservatory, self.
descriptiondescription, self.
segmentsegment) < (other.observatory, other.description, other.segment)
259 Compare two CacheEntry objects by observatory, then description,
260 then segment. CacheEntry objects that have different URLs but for
261 which all other metadata are the same are considered to be
262 equivalent. If two entries differ only by their URL, they are
263 considered to be redundant copies of the same data, and by
264 comparing them as equal the Python sort operation (which is a
265 stable sort) will preserve their relative order. By preserving the
266 order of redundant copies, we allow the preference for the order in
267 which redundant copies are to be attempted to be conveyed by their
268 order in the list, and preserved.
270 if not isinstance(other, CacheEntry):
271 raise TypeError(
"can only compare CacheEntry to CacheEntry")
272 return (self.
observatoryobservatory, self.
descriptiondescription, self.
segmentsegment) == (other.observatory, other.description, other.segment)
276 CacheEntry objects are hashed by the tuple (observatory,
277 description, segment), i.e., the URL is disregarded.
284 The cache entry's URL. The URL is constructed from the values of
285 the scheme, host, and path attributes. Assigning a value to the
286 URL attribute causes the value to be parsed and the scheme, host
287 and path attributes updated.
289 return urlunparse((self.scheme, self.host, self.
pathpath,
None,
None,
None))
293 self.scheme, self.host, self.
pathpath = urlparse(url)[:3]
298 A segmentlistdict object describing the instruments and time
299 spanned by this CacheEntry. A new object is constructed each time
300 this attribute is accessed (segments are immutable so there is no
301 reason to try to share a reference to the CacheEntry's internal
302 segment; modifications of one would not be reflected in the other
307 >>> c = CacheEntry("H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
308 >>> c.segmentlistdict['H1']
309 [segment(LIGOTimeGPS(815901601, 0), LIGOTimeGPS(815902177, 500000000))]
311 The \"observatory\" column of the cache entry, which is frequently
312 used to store instrument names, is parsed into instrument names for
313 the dictionary keys using the same rules as
314 ligo.lw.lsctables.instrumentsproperty.get().
318 >>> c = CacheEntry("H1H2, S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1H2-815901601-576.xml")
319 >>> c.segmentlistdict['H1H2']
320 [segment(LIGOTimeGPS(815901601, 0), LIGOTimeGPS(815902177, 500000000))]
323 instruments = (
None,)
325 instruments = {obs
for obs
in map(str.strip, self.
observatoryobservatory.split(
","))
if obs}
326 return segments.segmentlistdict((instrument, segments.segmentlist(self.
segmentsegment
is not None and [self.
segmentsegment]
or []))
for instrument
in instruments)
331 Parse a URL in the style of T050017-00 into a CacheEntry. The
332 T050017-00 file name format is, essentially,
334 observatory-description-start-duration.extension
338 >>> c = CacheEntry.from_T050017("file://localhost/data/node144/frames/S5/strain-L2/LLO/L-L1_RDS_C03_L2-8365/L-L1_RDS_C03_L2-836562330-83.gwf")
343 >>> os.path.basename(c.path)
344 'L-L1_RDS_C03_L2-836562330-83.gwf'
348 raise ValueError(
"could not convert %s to CacheEntry" % repr(url))
349 observatory = match.group(
"obs")
350 description = match.group(
"dsc")
352 start = str(match.group(
"strt"))
353 duration = str(match.group(
"dur"))
354 if start ==
"-" and duration ==
"-":
358 segment = segments.segment(coltype(start), coltype(start) + coltype(duration))
359 return cls(observatory, description, segment, url)
static size_t hash(const char *s)
A Python object representing one line in a LAL cache file.
def __lt__(self, other)
Compare two CacheEntry objects by observatory, then description, then segment.
def segmentlistdict(self)
A segmentlistdict object describing the instruments and time spanned by this CacheEntry.
def __init__(self, *args, **kwargs)
Intialize a CacheEntry object.
def from_T050017(cls, url, coltype=LIGOTimeGPS)
Parse a URL in the style of T050017-00 into a CacheEntry.
def __str__(self)
Convert the CacheEntry to a string in the format of a line in a LAL cache.
def __eq__(self, other)
Compare two CacheEntry objects by observatory, then description, then segment.
def url(self)
The cache entry's URL.
def __hash__(self)
CacheEntry objects are hashed by the tuple (observatory, description, segment), i....
def lalcache_from_gluecache(cache)
Convert a glue.lal.Cache object to a lal.Cache object.