lalsuite/lalburst/cafe_8py_source.html

# Copyright (C) 2006-2010,2012--2021  Kipp Cannon

#

# This program is free software; you can redistribute it and/or modify it

# under the terms of the GNU General Public License as published by the

# Free Software Foundation; either version 2 of the License, or (at your

# option) any later version.

#

# This program is distributed in the hope that it will be useful, but

# WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General

# Public License for more details.

#

# You should have received a copy of the GNU General Public License along

# with this program; if not, write to the Free Software Foundation, Inc.,

# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.


#

# =============================================================================

#

#                                   Preamble

#

# =============================================================================

#


"""

LIGO Light-Weight XML coincidence analysis front end.

"""


import math

import sys


from lal import LIGOTimeGPS

from lal.utils import CacheEntry

import igwn_segments as segments


from . import offsetvector

from . import packing


__author__ = "Kipp Cannon <kipp.cannon@ligo.org>"

from .git_version import date as __date__

from .git_version import version as __version__


#

# =============================================================================

#

#                                    Input

#

# =============================================================================

#


def load_cache(filename, verbose = False):

        """

        Parse a LAL cache file named filename into a list of

        lal.utils.CacheEntry objects.  If filename is None then input is

        taken from stdin.

        """

        if verbose:

                print("reading %s ..." % (filename or "stdin"), file=sys.stderr)

        if filename is not None:

                f = open(filename)

        else:

                f = sys.stdin

        return [CacheEntry(line) for line in f]


def cache_to_seglistdict(cache):

        """

        Construct a coalesced segmentlistdict object from a list of

        lal.utils.CacheEntry objects.

        """

        s = segments.segmentlistdict()

        for c in cache:

                s |= c.segmentlistdict

        return s


#

# =============================================================================

#

#                             Performance Helpers

#

# =============================================================================

#


def segmentlistdict_normalize(seglistdict, origin):

        """

        Convert the times in a segmentlist dictionary to floats relative to

        origin.  The purpose is to allow segment lists stored as

        LIGOTimeGPS times to be manipulated more quickly without loss of

        precision.  The modification is done in place.

        """

        for seglist in seglistdict.itervalues():

                seglist[:] = (segments.segment(float(seg[0] - origin), float(seg[1] - origin)) for seg in seglist)


def get_coincident_segmentlistdict(seglistdict, offset_vectors):

        """

        Compute the segments for which data is required in order to perform

        a complete coincidence analysis given the segments for which data

        is available and the list of offset vectors to be applied to the

        data during the coincidence analysis.


        seglistdict is a segmentlistdict object defining the instruments

        and times for which data is available.  offset_vectors is a list of

        offset vectors to be applied to the data --- dictionaries of

        instrument/offset pairs.


        The offset vectors in offset_vectors are applied to the input

        segments one by one and the interesection of the shifted segments

        is computed.  The segments surviving the intersection are unshifted

        to their original positions and stored.  The return value is the

        union of the results of this operation.


        In all cases all pair-wise intersections are computed, that is if

        an offset vector lists three instruments then this function returns

        the times when any two of those isntruments are on, including times

        when all three are on.


        For example, let us say that "input" is a segmentlistdict object

        containing segment lists for three instruments, "H1", "H2" and

        "L1".  And let us say that "slides" is a list of dictionaries, and

        is equal to [{"H1":0, "H2":0, "L1":0}, {"H1":0, "H2":10}].  Then if


        output = get_coincident_segmentlistdict(input, slides)


        output will contain, for each of the three instruments, the

        segments (or parts thereof) from the original lists that are

        required in order to perform a triple- and double-coincident

        analyses at zero lag with the three instruments, *and* a

        double-coincident analysis between H1 and H2 with H2 offset by 10

        seconds.


        The segmentlistdict object returned by this function has its

        offsets set to those of the input segmentlistdict.

        """

        # don't modify original

        seglistdict = seglistdict.copy()

        all_instruments = set(seglistdict)


        # save original offsets

        origoffsets = dict(seglistdict.offsets)


        # compute result

        coincseglists = segments.segmentlistdict()

        for offset_vector in offsetvector.component_offsetvectors(offset_vectors, 2):

                if set(offset_vector).issubset(all_instruments):

                        seglistdict.offsets.update(offset_vector)

                        intersection = seglistdict.extract_common(offset_vector.keys())

                        intersection.offsets.clear()

                        coincseglists |= intersection


        # restore original offsets

        coincseglists.offsets.update(origoffsets)


        # done

        return coincseglists


def segmentlistdict_unnormalize(seglistdict, origin):

        """

        The opposite of segmentlistdict_normalize(), restores the times in

        a segmentlist dictionary to absolute times.  The modification is

        done in place.

        """

        for seglist in seglistdict.itervalues():

                seglist[:] = (segments.segment(origin + seg[0], origin + seg[1]) for seg in seglist)


#

# =============================================================================

#

#                             Output Cache Packing

#

# =============================================================================

#


class LALCacheBin(packing.Bin):

        """

        Subclass of the packing.Bin class representing a LAL file cache.

        The files contained in the bin are available in the .objects

        attribute, which is a list of lal.utils.CacheEntry objects.  The

        .size attribute holds a igwn_segments.segmentlistdict object giving

        the times spanned by the files in the bin.  The .extent attribute

        holds the result of running .extent_all() on the .size attribute.

        """

        def __init__(self):

                packing.Bin.__init__(self)

                self.sizesize = segments.segmentlistdict()

                self.extent = None


        def add(self, cache_entry):

                packing.Bin.add(self, cache_entry, cache_entry.segmentlistdict)

                self.extent = self.sizesize.extent_all()

                return self


        def __iadd__(self, *args):

                packing.Bin.__iadd__(self, *args)

                self.extent = self.sizesize.extent_all()

                return self


        def __lt__(self, other):

                return self.extent < other.extent


        def __le__(self, other):

                return self.extent <= other.extent


        def __eq__(self, other):

                return self.extent == other.extent


        def __ne__(self, other):

                return self.extent != other.extent


        def __ge__(self, other):

                return self.extent >= other.extent


        def __gt__(self, other):

                return self.extent > other.extent


        def __str__(self):

                return "\n".join(map(str, self.objects))


class CafePacker(packing.Packer):

        """

        Packing algorithm implementing the ligolw_cafe file list packing

        algorithm.

        """

        def set_offset_vectors(self, offset_vectors):

                """

                Set the list of offset vectors to be considered when

                deciding the bins in which each file belongs.  Must be

                called before packing any files.  The input is a list of

                dictionaries, each mapping instruments to offsets.

                """

                #

                # sort the offset vectors to reduce the number of

                # arithmetic operations performed while applying them

                #


                self.offset_vectors = list(offset_vectors)

                self.offset_vectors.sort(key = lambda offset_vector: sorted(offset_vector.items()))


                #

                # determine the largest gap that can conceivably be closed

                # by the time slides

                #


                min_offset = min(min(offset_vector.values()) for offset_vector in offset_vectors)

                max_offset = max(max(offset_vector.values()) for offset_vector in offset_vectors)

                self.max_gap = max_offset - min_offset

                assert self.max_gap >= 0


        def pack(self, cache_entry):

                """

                Find all bins in which this lal.utils.CacheEntry instance

                belongs, merge them, and add this cache entry to the

                result.  Create a new bin for this cache entry if it does

                not belong in any of the existing bins.


                The cache entry "belongs" in a bin if after each of the

                preset offset vectors (see the .set_offset_vectors()

                method) is applied to both the contents of a bin and the

                cache entry, any of the segment lists of the bin and cache

                entry are found to intersect.  When checking for

                intersection, only the segment lists whose instrument names

                are listed in the offset vector are compared.

                """

                #

                # add the cache entry to a new bin by itself

                #


                new = LALCacheBin()

                new.add(cache_entry)


                #

                # assemble a list of bins in which the cache entry belongs.

                # iterate over existing bins backwards so that we record

                # the indeces of matching bins in descending order.  bail

                # out when we find a bin that precedes the new one

                #


                matching_bins = []

                for n in range(len(self.bins) - 1, -1, -1):

                        bin = self.bins[n]

                        if bin.extent[1] < new.extent[0] - self.max_gap:

                                break

                        for offset_vector in self.offset_vectors:

                                new.size.offsets.update(offset_vector)

                                bin.size.offsets.update(offset_vector)

                                if bin.size.is_coincident(new.size, keys = offset_vector.keys()):

                                        matching_bins.append(n)

                                        break

                        bin.size.offsets.clear()

                new.size.offsets.clear()


                #

                # add new cache entry to bins

                #


                if not matching_bins:

                        #

                        # no existing bins match, add a new one

                        #


                        self.bins.append(new)

                else:

                        #

                        # put cache entry into earliest bin that was found

                        # to match.  if cache entry belongs in more than

                        # one bin, merge them.  note that the matching bin

                        # indexes are given in descending order so the last

                        # is the earliest bin, and after that popping them

                        # in order does not affet the indexes of the

                        # remaining, matching, bins.

                        #


                        dest = self.bins[matching_bins.pop(-1)]

                        dest += new

                        for n in matching_bins:

                                dest += self.bins.pop(n)


                #

                # time-order the bins so the bail-out above works next time

                # this method is called

                #


                self.bins.sort()


def split_bins(cafepacker, extentlimit, verbose = False):

        """

        Split bins in CafePacker so that each bin has an extent no longer

        than extentlimit.

        """


        #

        # loop over all bins in cafepacker.bins.  loop is backwards because

        # list grows in size as bins are split

        #


        for idx in range(len(cafepacker.bins) - 1, -1, -1):

                #

                # retrieve bin

                #


                origbin = cafepacker.bins[idx]


                #

                # how many pieces?  if bin doesn't need splitting move to

                # next

                #


                n = int(math.ceil(float(abs(origbin.extent)) / extentlimit))

                if n <= 1:

                        continue


                #

                # calculate the times of the splits, and then build

                # segmentlistdicts for clipping.

                #


                extents = [origbin.extent[0]] + [LIGOTimeGPS(origbin.extent[0] + i * float(abs(origbin.extent)) / n) for i in range(1, n)] + [origbin.extent[1]]

                if verbose:

                        print("\tsplitting cache spanning %s at %s" % (str(origbin.extent), ", ".join(str(extent) for extent in extents[1:-1])), file=sys.stderr)

                extents = [segments.segment(*bounds) for bounds in zip(extents[:-1], extents[1:])]


                #

                # build new bins, pack objects from origbin into new bins

                #


                newbins = []

                for extent in extents:

                        #

                        # append new bin

                        #


                        newbins.append(LALCacheBin())


                        #

                        # test each cache entry in original bin

                        #


                        extent_plus_max_gap = extent.protract(cafepacker.max_gap)

                        for cache_entry in origbin.objects:

                                #

                                # quick check of gap

                                #


                                if cache_entry.segment.disjoint(extent_plus_max_gap):

                                        continue


                                #

                                # apply each offset vector

                                #


                                cache_entry_segs = cache_entry.segmentlistdict

                                for offset_vector in cafepacker.offset_vectors:

                                        cache_entry_segs.offsets.update(offset_vector)


                                        #

                                        # test against bin

                                        #


                                        if cache_entry_segs.intersects_segment(extent):

                                                #

                                                # object is coicident with

                                                # bin

                                                #


                                                newbins[-1].add(cache_entry)

                                                break


                        #

                        # override the bin's extent

                        #


                        newbins[-1].extent = extent


                #

                # replace original bin with split bins.

                #


                cafepacker.bins[idx:idx+1] = newbins


        #

        # done

        #


#

# =============================================================================

#

#                                    Output

#

# =============================================================================

#


def write_caches(base, bins, instruments = None, verbose = False):

        filenames = []

        if len(bins):

                pattern = "%%s%%0%dd.cache" % int(math.log10(len(bins)) + 1)

        for n, bin in enumerate(bins):

                filename = pattern % (base, n)

                filenames.append(filename)

                if verbose:

                        print("writing %s ..." % filename, file=sys.stderr)

                f = open(filename, "w")

                for cacheentry in bin.objects:

                        if instruments is None or (instruments & set(cacheentry.segmentlistdict)):

                                print(str(cacheentry), file=f)

        return filenames


def write_single_instrument_caches(base, bins, instruments, verbose = False):

        for instrument in instruments:

                write_caches("%s%s_" % (base, instrument), bins, set([instrument]), verbose)


#

# =============================================================================

#

#                                 Library API

#

# =============================================================================

#


def ligolw_cafe(cache, offset_vectors, verbose = False, extentlimit = None):

        """

        Transform a LAL cache into a list of caches each of whose contents

        can be subjected to a coincidence analysis independently of the

        contents of the other caches, assuming the coincidence analyses

        will involve the application of the given offset vectors.


        cache is a sequence (e.g., list, tuple, etc.) of

        lal.utils.CacheEntry objects.  offset_vectors is a sequence of

        instrument/offset dictionaries describing the offset vectors to

        consider.  Set verbose to True for verbosity.


        The output is a two-element tuple.  The first element is a

        igwn_segments.segmentlistdict object describing the times for which

        coincident data is available (derived from the segment metadata of

        the input cache).  The second element is a list of LALCacheBin

        objects, providing the file groups.

        """

        #

        # Construct a segment list dictionary from the cache

        #


        if verbose:

                print("computing segment list ...", file=sys.stderr)

        seglists = cache_to_seglistdict(cache)


        #

        # For each instrument compute the times for which it will (could)

        # contribute to a coincidence analysis.

        #


        epoch = min([min(seg[0] for seg in seglist) for seglist in seglists.values() if seglist] or [None])

        segmentlistdict_normalize(seglists, epoch)

        seglists = get_coincident_segmentlistdict(seglists, offset_vectors)

        segmentlistdict_unnormalize(seglists, epoch)


        #

        # Remove files that will not participate in a coincidence.  Take

        # care not to modify the calling code's data.  Note that because we

        # have established that this segment list describes exactly the

        # times spanned by the input files that are coincident under at

        # least one time slide, a file participates in a multi-instrument

        # coincidence if and only if it intersects these times.

        #


        if verbose:

                print("filtering input cache ...", file=sys.stderr)

        cache = [c for c in cache if seglists.intersects_all(c.segmentlistdict)]


        #

        # Optimization: adding files to bins in time order keeps the number

        # of bins from growing larger than needed.

        #


        if verbose:

                print("sorting input cache ...", file=sys.stderr)

        cache.sort(key = lambda x: x.segment)


        #

        # Pack cache entries into output caches.  Having reduced the file

        # list to just those that participate in coincidences, it only

        # remains to determine which other files each must be grouped with.

        #


        outputcaches = []

        packer = CafePacker(outputcaches)

        packer.set_offset_vectors(offset_vectors)

        if verbose:

                print("packing files (considering %s offset vectors) ..." % len(offset_vectors), file=sys.stderr)

        for n, cacheentry in enumerate(cache):

                if verbose and not n % 13:

                        print("\t%.1f%%\t(%d files, %d caches)\r" % (100.0 * n / len(cache), n + 1, len(outputcaches)), end=' ', file=sys.stderr)

                packer.pack(cacheentry)

        if verbose:

                print("\t100.0%%\t(%d files, %d caches)" % (len(cache), len(outputcaches)), file=sys.stderr)


        #

        # Split caches with extent more than extentlimit

        #


        if extentlimit is not None:

                if verbose:

                        print("splitting caches with extent greater than %g s ..." % extentlimit, file=sys.stderr)

                split_bins(packer, extentlimit, verbose = verbose)

                if verbose:

                        print("\t\t(%d files, %d caches)" % (len(cache), len(outputcaches)), file=sys.stderr)


        #

        # Sort output caches

        #


        if verbose:

                print("sorting output caches ...", file=sys.stderr)

        for cache in outputcaches:

                cache.objects.sort()


        #

        # Done.

        #


        return seglists, outputcaches

max
static double max(double a, double b)
Definition: EPFilters.c:43

min
static double min(double a, double b)
Definition: EPFilters.c:42

lalburst.cafe.CafePacker
Packing algorithm implementing the ligolw_cafe file list packing algorithm.
Definition: cafe.py:237

lalburst.cafe.CafePacker.max_gap
max_gap
Definition: cafe.py:260

lalburst.cafe.CafePacker.pack
def pack(self, cache_entry)
Find all bins in which this lal.utils.CacheEntry instance belongs, merge them, and add this cache ent...
Definition: cafe.py:277

lalburst.cafe.CafePacker.offset_vectors
offset_vectors
Definition: cafe.py:250

lalburst.cafe.CafePacker.set_offset_vectors
def set_offset_vectors(self, offset_vectors)
Set the list of offset vectors to be considered when deciding the bins in which each file belongs.
Definition: cafe.py:244

lalburst.cafe.LALCacheBin
Subclass of the packing.Bin class representing a LAL file cache.
Definition: cafe.py:195

lalburst.cafe.LALCacheBin.extent
extent
Definition: cafe.py:199

lalburst.cafe.LALCacheBin.add
def add(self, cache_entry)
Add the object, whose size is as given, to the bin.
Definition: cafe.py:201

lalburst.cafe.LALCacheBin.__eq__
def __eq__(self, other)
Definition: cafe.py:217

lalburst.cafe.LALCacheBin.size
size
Definition: cafe.py:198

lalburst.cafe.LALCacheBin.__iadd__
def __iadd__(self, *args)
Add the contents of another Bin object to this one.
Definition: cafe.py:206

lalburst.cafe.LALCacheBin.__ne__
def __ne__(self, other)
Definition: cafe.py:220

lalburst.cafe.LALCacheBin.__gt__
def __gt__(self, other)
Definition: cafe.py:226

lalburst.cafe.LALCacheBin.__ge__
def __ge__(self, other)
Definition: cafe.py:223

lalburst.cafe.LALCacheBin.__le__
def __le__(self, other)
Definition: cafe.py:214

lalburst.cafe.LALCacheBin.__lt__
def __lt__(self, other)
Definition: cafe.py:211

lalburst.packing.Bin
Bin object for use in packing algorithm implementations.
Definition: packing.py:66

lalburst.packing.Bin.size
size
Definition: packing.py:72

lalburst.packing.Bin.objects
objects
Definition: packing.py:71

lalburst.packing.Packer
Parent class for packing algorithms.
Definition: packing.py:137

lalburst.packing.Packer.bins
bins
Definition: packing.py:142

lal::utils

lalburst.cafe.split_bins
def split_bins(cafepacker, extentlimit, verbose=False)
Split bins in CafePacker so that each bin has an extent no longer than extentlimit.
Definition: cafe.py:344

lalburst.cafe.segmentlistdict_normalize
def segmentlistdict_normalize(seglistdict, origin)
Convert the times in a segmentlist dictionary to floats relative to origin.
Definition: cafe.py:100

lalburst.cafe.load_cache
def load_cache(filename, verbose=False)
Parse a LAL cache file named filename into a list of lal.utils.CacheEntry objects.
Definition: cafe.py:64

lalburst.cafe.write_caches
def write_caches(base, bins, instruments=None, verbose=False)
Definition: cafe.py:449

lalburst.cafe.get_coincident_segmentlistdict
def get_coincident_segmentlistdict(seglistdict, offset_vectors)
Compute the segments for which data is required in order to perform a complete coincidence analysis g...
Definition: cafe.py:144

lalburst.cafe.segmentlistdict_unnormalize
def segmentlistdict_unnormalize(seglistdict, origin)
The opposite of segmentlistdict_normalize(), restores the times in a segmentlist dictionary to absolu...
Definition: cafe.py:173

lalburst.cafe.cache_to_seglistdict
def cache_to_seglistdict(cache)
Construct a coalesced segmentlistdict object from a list of lal.utils.CacheEntry objects.
Definition: cafe.py:78

lalburst.cafe.write_single_instrument_caches
def write_single_instrument_caches(base, bins, instruments, verbose=False)
Definition: cafe.py:465

lalburst.cafe.ligolw_cafe
def ligolw_cafe(cache, offset_vectors, verbose=False, extentlimit=None)
Transform a LAL cache into a list of caches each of whose contents can be subjected to a coincidence ...
Definition: cafe.py:496

lalburst_plot_tisi.append
append
Definition: lalburst_plot_tisi.py:96