Source code for rio

#!/usr/bin/env python
# Copyright (C) 2021  Chad Hanna (crh184@psu.edu)
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

import sqlite3
import sys
from collections import UserDict
from ligo.segments import segment
from ligo.lw.utils import segments as lwseg 
from ligo.lw import dbtables
from ligo.lw import lsctables

[docs]class load(UserDict):
	def __init__(self, fname, verbose = False, data_segments_name = "datasegments", vetoes_name = "vetoes"):
		"""
Example:

>>> from gstlal import rio;
>>> D = rio.load('H1L1V1-ALL_LLOID_split_injections-1256655642-732465.sqlite')
# iterate over zero-lag events
>>> for event in D.zl_events():
...     print (event['coinc_event']['likelihood'], event['coinc_inspiral']['combined_far']); break
... 
-8.360537003030897 0.06110694885193467
# iterate over found injections
>>> for inj in D.found_injections():
...     print (inj['sim_inspiral']['mass1'], inj['coinc_event']['likelihood'], inj['coinc_inspiral']['combined_far']); break
...
35.35828 -23.43134661622628 0.06110694885193467
# iterate over missed injections
>>> for inj in D.missed_injections():
...     print (inj['sim_inspiral']['mass1']); break
...
1.283698
# iterate over injections not done (not in analyzed time)
>>> for inj in D.not_analyzed_injections():
...     print (inj['sim_inspiral']['mass1']); break
... 
2.621253
		"""

		UserDict.__init__(self)
		self.fname = fname
		self.verbose = verbose
		self.con = sqlite3.connect(fname)
		self.con.row_factory = sqlite3.Row
		self.tables = {}
		self.__load_tables([	('coinc_definer', ('coinc_def_id',)),
					('coinc_inspiral', ('coinc_event_id',)),
					('segment', ('segment_def_id',)),
					('sngl_inspiral', ('event_id',)),
					('coinc_event', ('coinc_event_id',)),
					('process', ('process_id',)),
					('segment_definer', ('segment_def_id',)),
					('time_slide', ('time_slide_id',)),
					('coinc_event_map', ('coinc_event_id',)),
					('process_params', ('process_id',)),
					('segment_summary', ('segment_def_id',)),
					('sim_inspiral', ('simulation_id',))
					])
		xmldoc = dbtables.get_xml(self.con)
		self.segs = lwseg.segmenttable_get_by_name(xmldoc, data_segments_name).coalesce() - lwseg.segmenttable_get_by_name(xmldoc, vetoes_name).coalesce()
		xmldoc.unlink()
		self.d = self.__to_dict()
		self.con.close()
		self.__process_injections()

	def __iter__(self):
		return iter(self.d)

[docs]	def keys(self):
		return self.d.keys()

[docs]	def items(self):
		return self.d.items()

[docs]	def values(self):
		return self.d.values()

	def __getitem__(self, key):
		return self.d[key]
	
	def __setitem__(self, key, item):
		self.d[key] = item

[docs]	def has_key(self, k):
		return k in self.d

	def __cmp__(self, dict_):
		return self.__cmp__(self.d, dict_)

	def __contains__(self, item):
		return item in self.d

	def __load_tables(self, names):
		for name, keys in names:
			try:
				if self.verbose: print ('loading ... %s' % name, end = ' ', file = sys.stderr)
				res = self.con.cursor().execute('SELECT * FROM %s' % name)
				out = {}
				for row in res.fetchall():
					rd = dict(row)
					for key in keys:
						out.setdefault(row[key], []).append(rd)
				self.tables[name] = out
				if self.verbose: print('... %d' % len(self.tables[name]), file = sys.stderr)
			except sqlite3.OperationalError:
				if self.verbose: print('... NOT FOUND!', file = sys.stderr)

	def __process_injections(self):
		self.__not_analyzed_injections = set([])
		self.__analyzed_ids = set([])
		if "sim_inspiral" in self.tables:
			for sid, row in self.itertable('sim_inspiral'):
				t = lsctables.LIGOTimeGPS(row[0]['geocent_end_time'], row[0]['geocent_end_time_ns'])
				seg = segment([t, t])
				if self.segs.intersects_segment(seg):
					self.__analyzed_ids.add(sid)
				else:
					self.__not_analyzed_injections.add(sid)
		self.__found_injections = {}
		if "sim_inspiral" in self.tables:
			for cid, event in tuple(self['sim_inspiral<-->coinc_event coincidences (exact)'].values())[0].items():
				self.__found_injections.setdefault(event['sim_inspiral'][0]['simulation_id'], []).append(event)
		self.__missed_injections = self.__analyzed_ids - set(self.__found_injections)

[docs]	def missed_injections(self):
		for sim in self.__missed_injections:
			yield {'sim_inspiral': self.tables['sim_inspiral'][sim][0]}

[docs]	def not_analyzed_injections(self):
		for sim in self.__not_analyzed_injections:
			yield {'sim_inspiral': self.tables['sim_inspiral'][sim][0]}
			

[docs]	def found_injections(self):
		for inj in self.__found_injections.values():
			out = {}
			assert (len(inj) == 1)
			out['sim_inspiral'] = inj[0]['sim_inspiral'][0]
			assert (len(inj[0]['coinc_event']) == 1)
			out['coinc_event'] = inj[0]['coinc_event'][0].copy()
			out['sngl_inspiral'] = out['coinc_event']['sngl_inspiral']
			del out['coinc_event']['sngl_inspiral']
			out['coinc_inspiral'] = self.tables['coinc_inspiral'][out['coinc_event']['coinc_event_id']][0]
			yield out

[docs]	def zl_events(self):
		for k in self['sngl_inspiral<-->sngl_inspiral coincidences']:
			if not any([x[1] for x in k]):
				for eid, event in self['sngl_inspiral<-->sngl_inspiral coincidences'][k].items():
					out = {}
					out['coinc_event'] = event.copy()
					out['sngl_inspiral'] = out['coinc_event']['sngl_inspiral']
					del out['coinc_event']['sngl_inspiral']
					out['coinc_inspiral'] = self.tables['coinc_inspiral'][eid][0]
					yield out

[docs]	def zl_event_tsv(self, sort_by = {"coinc_inspiral": "combined_far"}, sngl_inspiral_columns = ['snr', 'chisq'], coinc_event_columns = ['likelihood'], coinc_inspiral_columns = ["end_time", "end_time_ns", "mchirp", "mass"], limit = 10, ifos = ('H1', 'L1', 'V1', 'K1')):
		"""
Iterate over strings representing rows in a tsv table with data given by the arguments to this function

Example:

>>> from gstlal import rio
>>> D = rio.load('H1L1V1-ALL_LLOID-1256655642-732465.sqlite')
>>> with open("blah.tsv", "w") as f:
...     for row in D.zl_event_tsv(limit = 100000000):
...         print (row, file = f)
... 
>>> 
		"""

		data = []
		skey, scol = tuple(sort_by.items())[0]
		for event in self.zl_events():
			# first column will be the sort by
			row = [event[skey][scol]]
			row.extend([event['coinc_event'][c] for c in coinc_event_columns])
			row.extend([event['coinc_inspiral'][c] for c in coinc_inspiral_columns])
			sngls = {e['ifo']:e for e in event['sngl_inspiral']}
			for ifo in ifos:
				if ifo in sngls:
					row.extend([sngls[ifo][c] for c in sngl_inspiral_columns])
				else:
					row.extend(['---' for c in sngl_inspiral_columns])

			data.append(row)
		data.sort()
		columns = [scol] + coinc_event_columns + coinc_inspiral_columns + ["%s_%s" % (ifo, c) for ifo in ifos for c in sngl_inspiral_columns]
		yield "\t".join(columns)
		
		for row in data[:limit]:
			yield "\t".join([str(r) for r in row])
				
		


[docs]	def itertable(self, table):
		for tid, row in self.tables[table].items():
			yield tid, row

	def __to_dict(self):
		if self.verbose: print ('computing time slides ...', file = sys.stderr)
		tids = {}
		for tid, rows in self.itertable('time_slide'):
			for row in rows:
				tids.setdefault(tid, []).append((row['instrument'], row['offset']))
		tids = {tuple(sorted(v)):k for k,v in tids.items()}

		# reconstruct coincs [0] is okay because there will only be one
		coinc_types = {r[0]['description']: {} for r in self.tables['coinc_definer'].values()}
		defids = {r[0]['description']: r[0]['coinc_def_id'] for r in self.tables['coinc_definer'].values()}

		for description, coinc_type in coinc_types.items():
			for offsets, tid in tids.items():
				if self.verbose: print ('getting coinc events for %s and time slide %s ...' % (description, offsets), file = sys.stderr)
				# get all the coinc events for this timeslide
				# we can use v[0] because there will be one row per coinc_event_id in this case
				coinc_type[offsets] = {k:v[0] for k,v in self.tables['coinc_event'].items() if v[0]['time_slide_id'] == tid and v[0]['coinc_def_id'] == defids[description]}
				# get the coinc event maps
				for cid, coinc_event in coinc_type[offsets].items():
					for row in self.tables['coinc_event_map'][cid]:
						# [0] should be okay here
						coinc_event.setdefault(row['table_name'], []).append(self.tables[row['table_name']][row['event_id']][0])

		return coinc_types