Loading [MathJax]/extensions/TeX/AMSsymbols.js
LALPulsar 7.1.1.1-5e288d3
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
lineFileParser.py
Go to the documentation of this file.
1# Copyright (C) 2021 Rodrigo Tenorio
2#
3# This program is free software; you can redistribute it and/or modify it
4# under the terms of the GNU General Public License as published by the
5# Free Software Foundation; either version 2 of the License, or (at your
6# option) any later version.
7#
8# This program is distributed in the hope that it will be useful, but
9# WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
11# Public License for more details.
12#
13# You should have received a copy of the GNU General Public License along
14# with this program; if not, write to the Free Software Foundation, Inc.,
15# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16
17## \defgroup lalpulsar_py_lineFileParser LineFileParser
18## \ingroup lalpulsar_python
19"""
20Parse identified and unidentified line files following the O3a convention.
21
22The identified lines file contains detector spectral artifacts (lines and combs)
23verified to be non-astrophysical in origin.
24Each line of the file identifies an artifact with certain fundamental frequency,
25(possibly non-identical) frequency wings,
26the number of visible harmonics (if identified as a comb),
27the scaling of the affected band with harmonics (constant or linearly scaling),
28and a systematic off-set with respect to the specified frequency.
29
30The unidentified lines file contains spectral artifacts that cannot yet be
31convincingly associated with non-astrophysical detector noise.
32These files only list the frequency of the most prominent peak.
33
34This module provides a class, `LineFileParser`, to seamlessly read those files and return
35the list of lines as a list of [left_most_line_frequency, right_most_line_frequency]
36intervals.
37This format is easier to use for example when applying line vetoes on CW outliers.
38
39Input files are fed into the class using `LineFileParser.parse_identified_lines_csv` or
40`LineFileParser.parse_unidentified_lines_csv`.
41Intervals are accesible through attributes `LineFileParser.lines_left_side` and
42`LineFileParser.lines_right_side`.
43If several files are read, their resulting intervals are concatenated
44(but not sorted, and duplicate or overlapping entries are kept as they are).
45"""
46## @{
47
48import numpy as np
49
50from . import git_version
51
52__author__ = "Rodrigo Tenorio <rodrigo.tenorio@ligo.org>"
53__version__ = git_version.id
54__date__ = git_version.date
55
56
57class LineFileParser:
58 """
59 # LineFileParser
60 This class is taylored to work using the O3 - era line files.
61 There are essentially two types of these files,
62 their main difference being the number of columns.
63
64 ## Identified lines file:
65
66 - frequency: Column containing the central frequency of the line.
67 - left_wing: Column containing the frequency limit towards
68 lower frequencies of the line as a width (central frequency - low frequency limit) Hz.
69 - right_wing: Column containing the frequency limit towards
70 higher frequencies of the line as a width (high frequency limit - central frequency) Hz.
71 - first_harmonic: Column with the index of the first visible harmonic of the line.
72 - last_harmonic: Column with the index of the last visible harmonic of the line.
73 - comb_type: Column containing line type
74 (0=single line, 1=comb, or 2=comb whose width scales with the harmonic number).
75 - offset: Column with the frequency line off-set.
76
77 ## Unidentified lines file:
78
79 - frequency: Column containing the central frequency of the line.
80 """
81
82 default_genfromtxt_kwargs = {"delimiter": ",", "skip_header": 4}
83 identified_lines_keys = {
84 "frequency": 0,
85 "left_wing": 5,
86 "right_wing": 6,
87 "first_harmonic": 3,
88 "last_harmonic": 4,
89 "comb_type": 1,
90 "offset": 2,
91 }
92
93 unidentified_lines_keys = {"frequency": 0}
94
95 def __init__(self):
96 """
97 Read and expand line files into frequency intervals.
98 If multiple files are read, resulting interval are concatenated.
99
100 Intervals can be retrieved using the attributes `lines_left_side` and `lines_right_side`.
101
102 """
103 self.lines_left_side = None
105
106 self._lines_are_set = False
107
108 def _check_columns(self, columns, default_keys):
109 columns = columns or default_keys
110 if not all(key in columns for key in default_keys):
111 raise ValueError(
112 f"`columns` dictionary {columns} does not contain "
113 f"all of the required keys: {list(default_keys.keys())}"
114 )
115 return columns
116
118 self, lines_file, columns=None, extra_wing_Hz=0.0, genfromtxt_kwargs=None
119 ):
120 """
121 Parse a csv file containing lines of known origin (Advanced LIGO format).
122
123 @param lines_file: Path to csv format lines file.
124 @param columns: Dictionary with header fields as key
125 and the corresponding (0-based) column index as value.
126 If None, default ordering specified in the class attribute will be used.
127 @param extra_wing_Hz: Extra wings to add at both sides of the resulting intervals, in Hz.
128 @param genfromtxt_kwargs: kwargs to be passed to numpy.genfromtxt.
129 Default is `delimiter=",", skip_header=4`.
130 """
131
132 columns = self._check_columns(columns, self.identified_lines_keys)
133
134 expanded_lines_and_wings = self._get_identified_lines_center_left_right(
135 lines_file, columns, genfromtxt_kwargs
136 )
137
138 lines_left_side, lines_right_side = self._add_frequency_wings(
139 *expanded_lines_and_wings, extra_wing_Hz
140 )
141
142 self._set_lines(lines_left_side, lines_right_side)
143
145 self, lines_file, columns=None, extra_wing_Hz=0.0, genfromtxt_kwargs=None
146 ):
147 """
148 Parse a csv file containing unidentified lines (Advanced LIGO format).
149
150 @param lines_file: Path to csv format lines file.
151 @param columns: Dictionary with header fields as key
152 and the corresponding (0-based) column index as value.
153 If None, default ordering specified in the class attribute will be used.
154 @param extra_wing_Hz: Extra wings to add at both sides of the resulting intervals, in Hz.
155 @param genfromtxt_kwargs: kwargs to be passed to numpy.genfromtxt.
156 Default is `delimiter=",", skip_header=4`.
157 """
158 columns = self._check_columns(columns, self.unidentified_lines_keys)
159
160 unidentified_lines = np.genfromtxt(
161 lines_file,
162 usecols=[columns[key] for key in self.unidentified_lines_keys],
163 **(genfromtxt_kwargs or self.default_genfromtxt_kwargs),
164 )
165
166 lines_left_side, lines_right_side = self._add_frequency_wings(
167 unidentified_lines, 0.0, 0.0, extra_wing_Hz
168 )
169 self._set_lines(lines_left_side, lines_right_side)
170
171 def _get_identified_lines_center_left_right(
172 self, lines_file, columns, genfromtxt_kwargs=None
173 ):
174 lines_with_wings = np.genfromtxt(
175 lines_file,
176 usecols=[columns[key] for key in self.identified_lines_keys],
177 **(genfromtxt_kwargs or self.default_genfromtxt_kwargs),
178 )
179 return self._expand_harmonics(*lines_with_wings.T)
180
181 def _set_lines(self, lines_left_side, lines_right_side):
182 """
183 Properly add left and right boundaries to the class attributes.
184 That means concatenating instead of overwriting if a list of lines
185 has been already read. This may happen e.g. when reading identified
186 and unidentified line file.
187 """
188 if not self._lines_are_set:
189 self.lines_left_side = lines_left_side
190 self.lines_right_side = lines_right_side
191 self._lines_are_set = True
192 else:
193 self.lines_left_side = np.concatenate(
194 (self.lines_left_side, lines_left_side)
195 )
196 self.lines_right_side = np.concatenate(
197 (self.lines_right_side, lines_right_side)
198 )
199
200 def _expand_harmonics(
201 self,
202 central_frequency,
203 left_wing,
204 right_wing,
205 first_harmonic,
206 last_harmonic,
207 comb_type,
208 offset,
209 ):
210 """
211 Known line files contain only one of the harmonics of the lines. This method
212 expands those harmonics, explicitely adding frequencies and wings to the line
213 list in order to apply the veto.
214
215 Comb type are used to properly re-scale harmonic wings. As of now (O3a),
216 0 means line, 1 means non-scaling and 2 means scaling.
217
218 Offsets shift the whole left-center-right structure as an overall adding term.
219 """
220 harmonics_per_line = (last_harmonic - first_harmonic + 1).astype(np.int32)
221 total_number_of_lines = np.sum(harmonics_per_line, dtype=np.int32)
222
223 expanded_central_frequency = np.zeros(total_number_of_lines)
224 expanded_left_wing = np.zeros(total_number_of_lines)
225 expanded_right_wing = np.zeros(total_number_of_lines)
226
227 # If scaling comb (i.e. comb type 2), scale wings
228 dont_scale_wings = comb_type != 2
229
230 line_pointer = 0
231 for line in range(len(central_frequency)):
232 harmonic_index = np.arange(first_harmonic[line], last_harmonic[line] + 1)
233 wing_scaling = (
234 np.ones(harmonics_per_line[line])
235 if dont_scale_wings[line]
236 else harmonic_index
237 )
238
239 expanded_central_frequency[
240 line_pointer : line_pointer + harmonics_per_line[line]
241 ] = (harmonic_index * central_frequency[line] + offset[line])
242 expanded_left_wing[
243 line_pointer : line_pointer + harmonics_per_line[line]
244 ] = (wing_scaling * left_wing[line])
245 expanded_right_wing[
246 line_pointer : line_pointer + harmonics_per_line[line]
247 ] = (wing_scaling * right_wing[line])
248
249 line_pointer += harmonics_per_line[line]
250
251 return expanded_central_frequency, expanded_left_wing, expanded_right_wing
252
253 def _add_frequency_wings(self, central_frequency, left_wing, right_wing, extra_Hz):
254 """
255 Given a line frequency and its wings, convert to a range of frequencies
256 occupied by the line. Extra bins are added according to the specified
257 input.
258 """
259 lines_left_side = central_frequency - left_wing - extra_Hz
260 lines_right_side = central_frequency + right_wing + extra_Hz
261 return lines_left_side, lines_right_side
262
263
264## @}
def _add_frequency_wings(self, central_frequency, left_wing, right_wing, extra_Hz)
def _set_lines(self, lines_left_side, lines_right_side)
def __init__(self)
Read and expand line files into frequency intervals.
def parse_identified_lines_csv(self, lines_file, columns=None, extra_wing_Hz=0.0, genfromtxt_kwargs=None)
Parse a csv file containing lines of known origin (Advanced LIGO format).
def _get_identified_lines_center_left_right(self, lines_file, columns, genfromtxt_kwargs=None)
def _expand_harmonics(self, central_frequency, left_wing, right_wing, first_harmonic, last_harmonic, comb_type, offset)
def parse_unidentified_lines_csv(self, lines_file, columns=None, extra_wing_Hz=0.0, genfromtxt_kwargs=None)
Parse a csv file containing unidentified lines (Advanced LIGO format).
def _check_columns(self, columns, default_keys)