Source code for rio

#!/usr/bin/env python
# Copyright (C) 2021  Chad Hanna (crh184@psu.edu)
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

import sqlite3
import sys
from collections import UserDict
from ligo.segments import segment
from ligo.lw.utils import segments as lwseg 
from ligo.lw import dbtables
from ligo.lw import lsctables

[docs]class load(UserDict): def __init__(self, fname, verbose = False, data_segments_name = "datasegments", vetoes_name = "vetoes"): """ Example: >>> from gstlal import rio; >>> D = rio.load('H1L1V1-ALL_LLOID_split_injections-1256655642-732465.sqlite') # iterate over zero-lag events >>> for event in D.zl_events(): ... print (event['coinc_event']['likelihood'], event['coinc_inspiral']['combined_far']); break ... -8.360537003030897 0.06110694885193467 # iterate over found injections >>> for inj in D.found_injections(): ... print (inj['sim_inspiral']['mass1'], inj['coinc_event']['likelihood'], inj['coinc_inspiral']['combined_far']); break ... 35.35828 -23.43134661622628 0.06110694885193467 # iterate over missed injections >>> for inj in D.missed_injections(): ... print (inj['sim_inspiral']['mass1']); break ... 1.283698 # iterate over injections not done (not in analyzed time) >>> for inj in D.not_analyzed_injections(): ... print (inj['sim_inspiral']['mass1']); break ... 2.621253 """ UserDict.__init__(self) self.fname = fname self.verbose = verbose self.con = sqlite3.connect(fname) self.con.row_factory = sqlite3.Row self.tables = {} self.__load_tables([ ('coinc_definer', ('coinc_def_id',)), ('coinc_inspiral', ('coinc_event_id',)), ('segment', ('segment_def_id',)), ('sngl_inspiral', ('event_id',)), ('coinc_event', ('coinc_event_id',)), ('process', ('process_id',)), ('segment_definer', ('segment_def_id',)), ('time_slide', ('time_slide_id',)), ('coinc_event_map', ('coinc_event_id',)), ('process_params', ('process_id',)), ('segment_summary', ('segment_def_id',)), ('sim_inspiral', ('simulation_id',)) ]) xmldoc = dbtables.get_xml(self.con) self.segs = lwseg.segmenttable_get_by_name(xmldoc, data_segments_name).coalesce() - lwseg.segmenttable_get_by_name(xmldoc, vetoes_name).coalesce() xmldoc.unlink() self.d = self.__to_dict() self.con.close() self.__process_injections() def __iter__(self): return iter(self.d)
[docs] def keys(self): return self.d.keys()
[docs] def items(self): return self.d.items()
[docs] def values(self): return self.d.values()
def __getitem__(self, key): return self.d[key] def __setitem__(self, key, item): self.d[key] = item
[docs] def has_key(self, k): return k in self.d
def __cmp__(self, dict_): return self.__cmp__(self.d, dict_) def __contains__(self, item): return item in self.d def __load_tables(self, names): for name, keys in names: try: if self.verbose: print ('loading ... %s' % name, end = ' ', file = sys.stderr) res = self.con.cursor().execute('SELECT * FROM %s' % name) out = {} for row in res.fetchall(): rd = dict(row) for key in keys: out.setdefault(row[key], []).append(rd) self.tables[name] = out if self.verbose: print('... %d' % len(self.tables[name]), file = sys.stderr) except sqlite3.OperationalError: if self.verbose: print('... NOT FOUND!', file = sys.stderr) def __process_injections(self): self.__not_analyzed_injections = set([]) self.__analyzed_ids = set([]) if "sim_inspiral" in self.tables: for sid, row in self.itertable('sim_inspiral'): t = lsctables.LIGOTimeGPS(row[0]['geocent_end_time'], row[0]['geocent_end_time_ns']) seg = segment([t, t]) if self.segs.intersects_segment(seg): self.__analyzed_ids.add(sid) else: self.__not_analyzed_injections.add(sid) self.__found_injections = {} if "sim_inspiral" in self.tables: for cid, event in tuple(self['sim_inspiral<-->coinc_event coincidences (exact)'].values())[0].items(): self.__found_injections.setdefault(event['sim_inspiral'][0]['simulation_id'], []).append(event) self.__missed_injections = self.__analyzed_ids - set(self.__found_injections)
[docs] def missed_injections(self): for sim in self.__missed_injections: yield {'sim_inspiral': self.tables['sim_inspiral'][sim][0]}
[docs] def not_analyzed_injections(self): for sim in self.__not_analyzed_injections: yield {'sim_inspiral': self.tables['sim_inspiral'][sim][0]}
[docs] def found_injections(self): for inj in self.__found_injections.values(): out = {} assert (len(inj) == 1) out['sim_inspiral'] = inj[0]['sim_inspiral'][0] assert (len(inj[0]['coinc_event']) == 1) out['coinc_event'] = inj[0]['coinc_event'][0].copy() out['sngl_inspiral'] = out['coinc_event']['sngl_inspiral'] del out['coinc_event']['sngl_inspiral'] out['coinc_inspiral'] = self.tables['coinc_inspiral'][out['coinc_event']['coinc_event_id']][0] yield out
[docs] def zl_events(self): for k in self['sngl_inspiral<-->sngl_inspiral coincidences']: if not any([x[1] for x in k]): for eid, event in self['sngl_inspiral<-->sngl_inspiral coincidences'][k].items(): out = {} out['coinc_event'] = event.copy() out['sngl_inspiral'] = out['coinc_event']['sngl_inspiral'] del out['coinc_event']['sngl_inspiral'] out['coinc_inspiral'] = self.tables['coinc_inspiral'][eid][0] yield out
[docs] def zl_event_tsv(self, sort_by = {"coinc_inspiral": "combined_far"}, sngl_inspiral_columns = ['snr', 'chisq'], coinc_event_columns = ['likelihood'], coinc_inspiral_columns = ["end_time", "end_time_ns", "mchirp", "mass"], limit = 10, ifos = ('H1', 'L1', 'V1', 'K1')): """ Iterate over strings representing rows in a tsv table with data given by the arguments to this function Example: >>> from gstlal import rio >>> D = rio.load('H1L1V1-ALL_LLOID-1256655642-732465.sqlite') >>> with open("blah.tsv", "w") as f: ... for row in D.zl_event_tsv(limit = 100000000): ... print (row, file = f) ... >>> """ data = [] skey, scol = tuple(sort_by.items())[0] for event in self.zl_events(): # first column will be the sort by row = [event[skey][scol]] row.extend([event['coinc_event'][c] for c in coinc_event_columns]) row.extend([event['coinc_inspiral'][c] for c in coinc_inspiral_columns]) sngls = {e['ifo']:e for e in event['sngl_inspiral']} for ifo in ifos: if ifo in sngls: row.extend([sngls[ifo][c] for c in sngl_inspiral_columns]) else: row.extend(['---' for c in sngl_inspiral_columns]) data.append(row) data.sort() columns = [scol] + coinc_event_columns + coinc_inspiral_columns + ["%s_%s" % (ifo, c) for ifo in ifos for c in sngl_inspiral_columns] yield "\t".join(columns) for row in data[:limit]: yield "\t".join([str(r) for r in row])
[docs] def itertable(self, table): for tid, row in self.tables[table].items(): yield tid, row
def __to_dict(self): if self.verbose: print ('computing time slides ...', file = sys.stderr) tids = {} for tid, rows in self.itertable('time_slide'): for row in rows: tids.setdefault(tid, []).append((row['instrument'], row['offset'])) tids = {tuple(sorted(v)):k for k,v in tids.items()} # reconstruct coincs [0] is okay because there will only be one coinc_types = {r[0]['description']: {} for r in self.tables['coinc_definer'].values()} defids = {r[0]['description']: r[0]['coinc_def_id'] for r in self.tables['coinc_definer'].values()} for description, coinc_type in coinc_types.items(): for offsets, tid in tids.items(): if self.verbose: print ('getting coinc events for %s and time slide %s ...' % (description, offsets), file = sys.stderr) # get all the coinc events for this timeslide # we can use v[0] because there will be one row per coinc_event_id in this case coinc_type[offsets] = {k:v[0] for k,v in self.tables['coinc_event'].items() if v[0]['time_slide_id'] == tid and v[0]['coinc_def_id'] == defids[description]} # get the coinc event maps for cid, coinc_event in coinc_type[offsets].items(): for row in self.tables['coinc_event_map'][cid]: # [0] should be okay here coinc_event.setdefault(row['table_name'], []).append(self.tables[row['table_name']][row['event_id']][0]) return coinc_types