LAL  7.5.0.1-b72065a
lal_searchsum2cache.py
Go to the documentation of this file.
1 #
2 # Copyright (C) 2006 Kipp Cannon
3 #
4 # This program is free software; you can redistribute it and/or modify it
5 # under the terms of the GNU General Public License as published by the
6 # Free Software Foundation; either version 2 of the License, or (at your
7 # option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful, but
10 # WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
12 # Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License along
15 # with this program; if not, write to the Free Software Foundation, Inc.,
16 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 
18 
19 #
20 # =============================================================================
21 #
22 # Preamble
23 #
24 # =============================================================================
25 #
26 
27 
28 """
29 Build a LAL cache from a list of LIGO LW XML files containing search
30 summary tables.
31 """
32 
33 
34 import glob
35 from optparse import OptionParser
36 import os
37 import sys
38 
39 from lal.utils import CacheEntry
40 from ligo.lw import ligolw
41 from ligo.lw import lsctables
42 from ligo.lw import utils as ligolw_utils
43 from functools import reduce
44 from lal import git_version
45 
46 
47 __author__ = "Kipp Cannon <kipp.cannon@ligo.org>"
48 __version__ = "git id %s" % git_version.id
49 __date__ = git_version.date
50 
51 
52 #
53 # =============================================================================
54 #
55 # Command Line
56 #
57 # =============================================================================
58 #
59 
60 
62  parser = OptionParser(
63  #version = "Name: %%prog\n%s" % git_version.verbose_msg,
64  usage = "%prog [options] filenames ...",
65  description = "Generates a LAL format cache file describing a collection of LIGO light-weight XML files. The cache is constructed by parsing the search_summary table in each file to extract the instruments and time each file spans. To allow long file lists to be processed, the filenames are interpreted as shell patterns (wildcard expansion is performed)."
66  )
67  parser.add_option("--description", metavar = "string", help = "Set all descriptions to this string. Use \"-\" for no description. If not given then the description will be extracted from the search summary rows, and if the search summary rows do not provide a unique description an error is raised.")
68  parser.add_option("--observatory", metavar = "string", help = "Set all observatories to this string. Use \"-\" for no observatory. If not given then the union of the instruments from the search summary rows will be used to construct an \"observatory\" string.")
69  parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
70  parser.add_option("-o", "--output", metavar = "filename", help = "Write output to this file (default = stdout).")
71  parser.add_option("-p", "--program", metavar = "name", action = "append", help = "Obtain instruments, starts, durations, and descriptions from the search summary rows for this program (default = use all search summary rows). Can be given multiple times to select rows from more than one program.")
72  options, filenames = parser.parse_args()
73 
74  if options.output:
75  options.output = file(options.output, "w")
76  else:
77  options.output = sys.stdout
78 
79  if options.program is not None:
80  options.program = set(options.program)
81 
82  if filenames is None:
83  filenames = []
84  else:
85  filenames = [filename for g in filenames for filename in glob.glob(g)]
86 
87  return options, filenames
88 
89 
90 #
91 # =============================================================================
92 #
93 # Input
94 #
95 # =============================================================================
96 #
97 
98 
99 def element_filter(name, attrs):
100  """
101  Return True if name & attrs describe a search summary table or a
102  process table.
103  """
104  return lsctables.SearchSummaryTable.CheckProperties(name, attrs) or lsctables.ProcessTable.CheckProperties(name, attrs)
105 
106 
107 @lsctables.use_in
108 class ContentHandler(ligolw.PartialLIGOLWContentHandler):
109  def __init__(self, xmldoc):
110  ligolw.PartialLIGOLWContentHandler.__init__(self, xmldoc, element_filter)
111 
112 
113 #
114 # =============================================================================
115 #
116 # Main
117 #
118 # =============================================================================
119 #
120 
121 
122 options, filenames = parse_command_line()
123 
124 
125 for n, filename in enumerate(filenames):
126  # load document and extract search summary table
127  if options.verbose:
128  print("%d/%d:" % (n + 1, len(filenames)), end=' ', file=sys.stderr)
129  xmldoc = ligolw_utils.load_filename(filename, verbose = options.verbose, contenthandler = ContentHandler)
130  searchsumm = lsctables.SearchSummaryTable.get_table(xmldoc)
131 
132  # extract process_ids for the requested program
133  if options.program is not None:
134  process_table = lsctables.ProcessTable.get_table(xmldoc)
135  process_ids = reduce(lambda a, b: a | b, list(map(process_table.get_ids_by_program, options.program)))
136  else:
137  process_ids = None
138 
139  # extract segment lists
140  seglists = searchsumm.get_out_segmentlistdict(process_ids).coalesce()
141  if not seglists:
142  raise ValueError("%s: no matching rows found in search summary table" % filename)
143  if None in seglists:
144  if options.program is not None:
145  raise ValueError("%s: null value in ifos column in search_summary table" % filename)
146  raise ValueError("%s: null value in ifos column in search_summary table, try using --program" % filename)
147 
148  # extract observatory
149  observatory = (options.observatory and options.observatory.strip()) or "+".join(sorted(seglists))
150 
151  # extract description
152  if options.description:
153  description = options.description
154  else:
155  if process_ids is None:
156  description = set(searchsumm.getColumnByName("comment"))
157  else:
158  description = set(row.comment for row in searchsumm if row.process_id in process_ids)
159  if len(description) < 1:
160  raise ValueError("%s: no matching rows found in search summary table" % filename)
161  if len(description) > 1:
162  raise ValueError("%s: comments in matching rows of search summary table are not identical" % filename)
163  description = description.pop().strip() or None
164 
165  # set URL
166  url = "file://localhost" + os.path.abspath(filename)
167 
168  # write cache entry
169  print(str(CacheEntry(observatory, description, seglists.extent_all(), url)), file=options.output)
170 
171  # allow garbage collection
172  xmldoc.unlink()
A Python object representing one line in a LAL cache file.
Definition: cache.py:150
def element_filter(name, attrs)
Return True if name & attrs describe a search summary table or a process table.