Loading [MathJax]/extensions/TeX/AMSsymbols.js
LAL 7.7.0.1-00ddc7f
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
VectorMath.c
Go to the documentation of this file.
1//
2// Copyright (C) 2015 Reinhard Prix, Karl Wette
3//
4// This program is free software; you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation; either version 2 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with with program; see the file COPYING. If not, write to the
16// Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17// MA 02110-1301 USA
18//
19
20// ---------- INCLUDES ----------
21#include <stdlib.h>
22#include <stdio.h>
23#include <stdbool.h>
24#include <math.h>
25
26#include <config.h>
27#include <simd_dispatch.h>
28
29#include <lal/LALString.h>
30#include <lal/LALConstants.h>
31#include <lal/VectorMath.h>
32
33#include "VectorMath_internal.h"
34
35//==================== FUNCTION DEFINITIONS ====================*/
36
37// -------------------- our own failsafe aligned memory handling --------------------
38
39///
40/// Create a special <TYPE>Vector with n-byte aligned memory \c data array.
41///
42/// This does not rely on \c posix_memalign() being available, and should compile+run everywhere.
43/// Use XLALDestroy<TYPE>VectorAligned() to free this.
44///
45#define DEFINE_ALIGNED_VECT_API(TYPE) \
46TYPE##VectorAligned *XLALCreate##TYPE##VectorAligned ( const UINT4 length, const UINT4 align ) \
47{ \
48 TYPE##VectorAligned *ret; \
49 XLAL_CHECK_NULL ( (ret = XLALCalloc ( 1, sizeof(*ret) )) != NULL, XLAL_ENOMEM ); \
50 \
51 XLAL_CHECK_NULL ( (ret = XLALResize##TYPE##VectorAligned ( ret, length, align )) != NULL, XLAL_ENOMEM ); \
52 \
53 return ret; \
54} /* XLALCreate<TYPE>VectorAligned() */ \
55 \
56TYPE##VectorAligned *XLALResize##TYPE##VectorAligned ( TYPE##VectorAligned *in, const UINT4 length, const UINT4 align ) \
57{ \
58 if ( in == NULL ) { \
59 return XLALCreate##TYPE##VectorAligned ( length, align ); \
60 } \
61 if ( length == 0 ) { \
62 XLALDestroy##TYPE##VectorAligned ( in ); \
63 return NULL; \
64 } \
65 \
66 in->length = length; \
67 UINT4 paddedLength = length + align - 1; \
68 XLAL_CHECK_NULL ( (in->data0 = XLALRealloc ( in->data0, paddedLength * sizeof(in->data0[0]) )) != NULL, XLAL_ENOMEM ); \
69 \
70 size_t remBytes = ((size_t)in->data0) % align; \
71 size_t offsetBytes = (align - remBytes) % align; \
72 in->data = (void*)(((char*)in->data0) + offsetBytes); \
73 \
74 XLAL_CHECK_NULL ( ((size_t)in->data) % align == 0, XLAL_EFAULT, "Failed to allocate %zd-byte aligned memory. Must be a coding error.\n", (size_t)align ); \
75 \
76 return in; \
77} /* XLALResize<TYPE>VectorAligned() */ \
78 \
79void XLALDestroy##TYPE##VectorAligned ( TYPE##VectorAligned *in ) \
80{ \
81 if ( !in ) { return; } \
82 if ( in->data0 ) { \
83 XLALFree ( in->data0 ); \
84 } \
85 XLALFree ( in ); \
86 return; \
87} /* XLALDestroy<TYPE>VectorAligned() */
88
94
95// -------------------- export vector-operation functions --------------------
96
97/* Declare the function pointer, define the dispatch function, and export vector math function with supported instruction sets */
98#define EXPORT_VECTORMATH_ANY(NAME, ARG_DEF, ARG_CALL, ISET1, ISET2, ISET3, ISET4) \
99 \
100 static int XLALVector##NAME##_DISPATCH ARG_DEF; \
101 \
102 static int (*XLALVector##NAME##_ptr) ARG_DEF = XLALVector##NAME##_DISPATCH; \
103 const char* XLALVector##NAME##_name = "\0"; \
104 \
105 int XLALVector##NAME##_DISPATCH ARG_DEF { \
106 \
107 DISPATCH_SELECT_BEGIN(); \
108 CONCAT2(DISPATCH_SELECT_,ISET1)(XLALVector##NAME##_ptr = XLALVector##NAME##_##ISET1, XLALVector##NAME##_name = "XLALVector"#NAME"_"#ISET1); \
109 CONCAT2(DISPATCH_SELECT_,ISET2)(XLALVector##NAME##_ptr = XLALVector##NAME##_##ISET2, XLALVector##NAME##_name = "XLALVector"#NAME"_"#ISET2); \
110 CONCAT2(DISPATCH_SELECT_,ISET3)(XLALVector##NAME##_ptr = XLALVector##NAME##_##ISET3, XLALVector##NAME##_name = "XLALVector"#NAME"_"#ISET3); \
111 CONCAT2(DISPATCH_SELECT_,ISET4)(XLALVector##NAME##_ptr = XLALVector##NAME##_##ISET4, XLALVector##NAME##_name = "XLALVector"#NAME"_"#ISET4); \
112 DISPATCH_SELECT_END( XLALVector##NAME##_ptr = XLALVector##NAME##_GEN, XLALVector##NAME##_name = "XLALVector"#NAME"_GEN" ); \
113 \
114 return XLALVector##NAME ARG_CALL; \
115 \
116 } \
117 \
118 int XLALVector##NAME ARG_DEF { \
119 \
120 return (XLALVector##NAME##_ptr) ARG_CALL; \
121 \
122 }
123
124// ---------- define exported vector math functions with 1 REAL4 vector input to 1 INT4 vector output (S2I) ----------
125#define EXPORT_VECTORMATH_S2I(NAME, ...) \
126 EXPORT_VECTORMATH_ANY( NAME ## REAL4, (INT4 *out, const REAL4 *in, const UINT4 len), (out, in, len), __VA_ARGS__ )
127
128EXPORT_VECTORMATH_S2I(INT4From, SSE2, NONE, NONE, NONE)
129
130// ---------- define exported vector math functions with 1 REAL4 vector input to 1 REAL4 scalar output (S2s) ----------
131#define EXPORT_VECTORMATH_S2s(NAME, ...) \
132 EXPORT_VECTORMATH_ANY( NAME ## REAL4, (REAL4 *out, const REAL4 *in, const UINT4 len), (out, in, len), __VA_ARGS__ )
133
134EXPORT_VECTORMATH_S2s(ScalarMax, AVX2, AVX, SSE2, NONE)
135
136// ---------- define exported vector math functions with 1 REAL4 vector input to 1 REAL4 vector output (S2S) ----------
137#define EXPORT_VECTORMATH_S2S(NAME, ...) \
138 EXPORT_VECTORMATH_ANY( NAME ## REAL4, (REAL4 *out, const REAL4 *in, const UINT4 len), (out, in, len), __VA_ARGS__ )
139
140EXPORT_VECTORMATH_S2S(Sin, AVX2, AVX, SSE2, NONE)
141EXPORT_VECTORMATH_S2S(Cos, AVX2, AVX, SSE2, NONE)
142EXPORT_VECTORMATH_S2S(Exp, AVX2, AVX, SSE2, NONE)
143EXPORT_VECTORMATH_S2S(Log, AVX2, AVX, SSE2, NONE)
144EXPORT_VECTORMATH_S2S(Round, AVX2, AVX, NONE, NONE)
145
146// ---------- define exported vector math functions with 1 REAL4 vector input to 2 REAL4 vector outputs (S2SS) ----------
147#define EXPORT_VECTORMATH_S2SS(NAME, ...) \
148 EXPORT_VECTORMATH_ANY( NAME ## REAL4, (REAL4 *out1, REAL4 *out2, const REAL4 *in, const UINT4 len), (out1, out2, in, len), __VA_ARGS__ )
149
150EXPORT_VECTORMATH_S2SS(SinCos, AVX2, AVX, SSE2, NONE)
151EXPORT_VECTORMATH_S2SS(SinCos2Pi, AVX2, AVX, SSE2, NONE)
152
153// ---------- define exported vector math functions with 2 REAL4 vector inputs to 1 REAL4 vector output (SS2S) ----------
154#define EXPORT_VECTORMATH_SS2S(NAME, ...) \
155 EXPORT_VECTORMATH_ANY( NAME ## REAL4, (REAL4 *out, const REAL4 *in1, const REAL4 *in2, const UINT4 len), (out, in1, in2, len), __VA_ARGS__ )
156
157EXPORT_VECTORMATH_SS2S(Add, AVX2, AVX, SSE2, NONE)
158EXPORT_VECTORMATH_SS2S(Sub, AVX2, AVX, SSE2, NONE)
159EXPORT_VECTORMATH_SS2S(Multiply, AVX2, AVX, SSE2, NONE)
160EXPORT_VECTORMATH_SS2S(Max, AVX2, AVX, SSE2, NONE)
161
162// ---------- define exported vector math functions with 1 REAL4 scalar, 1 REAL4 vector inputs to 1 REAL4 vector output (sS2S) ----------
163#define EXPORT_VECTORMATH_sS2S(NAME, ...) \
164 EXPORT_VECTORMATH_ANY( NAME ## REAL4, (REAL4 *out, REAL4 scalar, const REAL4 *in, const UINT4 len), (out, scalar, in, len), __VA_ARGS__ )
165
166EXPORT_VECTORMATH_sS2S(Scale, AVX2, AVX, SSE2, NONE)
167EXPORT_VECTORMATH_sS2S(Shift, AVX2, AVX, SSE2, NONE)
168
169// ---------- define exported vector math functions with 1 REAL4 scalar, 2 REAL4 vector inputs to 1 REAL4 vector output (sSS2S) ----------
170#define EXPORT_VECTORMATH_sSS2S(NAME, ...) \
171 EXPORT_VECTORMATH_ANY( NAME ## REAL4, (REAL4 *out, REAL4 scalar, const REAL4 *in1, const REAL4 *in2, const UINT4 len), (out, scalar, in1, in2, len), __VA_ARGS__ )
172
173EXPORT_VECTORMATH_sSS2S(ScaleAdd, AVX2, AVX, SSE2, NONE)
174
175// ---------- define exported vector math functions with 2 REAL4 vector inputs to 1 UINT4 scalar and 1 UINT4 vector output (SS2uU) ----------
176#define EXPORT_VECTORMATH_SS2uU(NAME, ...) \
177 EXPORT_VECTORMATH_ANY( NAME ## REAL4, ( UINT4* count, UINT4 *out, const REAL4 *in1, const REAL4 *in2, const UINT4 len ), (count, out, in1, in2, len), __VA_ARGS__ )
178
179EXPORT_VECTORMATH_SS2uU(FindVectorLessEqual, AVX2, SSSE3, NONE, NONE)
180
181// ---------- define exported vector math functions with 1 REAL4 scalar and 1 REAL4 vector inputs to 1 UINT4 scalar and 1 UINT4 vector output (sS2uU) ----------
182#define EXPORT_VECTORMATH_sS2uU(NAME, ...) \
183 EXPORT_VECTORMATH_ANY( NAME ## REAL4, ( UINT4* count, UINT4 *out, REAL4 scalar, const REAL4 *in, const UINT4 len ), (count, out, scalar, in, len), __VA_ARGS__ )
184
185EXPORT_VECTORMATH_sS2uU(FindScalarLessEqual, AVX2, SSSE3, NONE, NONE)
186
187// ---------- define exported vector math functions with 1 REAL8 scalar, 1 REAL8 vector inputs to 1 REAL8 vector output (dD2D) ----------
188#define EXPORT_VECTORMATH_dD2D(NAME, ...) \
189 EXPORT_VECTORMATH_ANY( NAME ## REAL8, (REAL8 *out, REAL8 scalar, const REAL8 *in, const UINT4 len), (out, scalar, in, len), __VA_ARGS__ )
190
191EXPORT_VECTORMATH_dD2D(Scale, AVX2, AVX, SSE2, NONE)
192EXPORT_VECTORMATH_dD2D(Shift, AVX2, AVX, SSE2, NONE)
193
194// ---------- define exported vector math functions with 1 REAL8 scalar, 2 REAL8 vector inputs to 1 REAL8 vector output (dDD2D) ----------
195#define EXPORT_VECTORMATH_dDD2D(NAME, ...) \
196 EXPORT_VECTORMATH_ANY( NAME ## REAL8, (REAL8 *out, REAL8 scalar, const REAL8 *in1, const REAL8 *in2, const UINT4 len), (out, scalar, in1, in2, len), __VA_ARGS__ )
197
198EXPORT_VECTORMATH_dDD2D(ScaleAdd, AVX2, AVX, SSE2, NONE)
199
200// ---------- define exported vector math functions with 2 REAL8 vector inputs to 1 REAL8 vector output (DD2D) ----------
201#define EXPORT_VECTORMATH_DD2D(NAME, ...) \
202 EXPORT_VECTORMATH_ANY( NAME ## REAL8, (REAL8 *out, const REAL8 *in1, const REAL8 *in2, const UINT4 len), (out, in1, in2, len), __VA_ARGS__ )
203
204EXPORT_VECTORMATH_DD2D(Add, AVX2, AVX, SSE2, NONE)
205EXPORT_VECTORMATH_DD2D(Sub, AVX2, AVX, SSE2, NONE)
206EXPORT_VECTORMATH_DD2D(Multiply, AVX2, AVX, SSE2, NONE)
207EXPORT_VECTORMATH_DD2D(Max, AVX2, AVX, SSE2, NONE)
208
209// ---------- define exported vector math functions with 2 COMPLEX8 vector inputs to 1 COMPLEX8 vector output (CC2C) ----------
210#define EXPORT_VECTORMATH_CC2C(NAME, ...) \
211 EXPORT_VECTORMATH_ANY( NAME ## COMPLEX8, (COMPLEX8 *out, const COMPLEX8 *in1, const COMPLEX8 *in2, const UINT4 len), (out, in1, in2, len), __VA_ARGS__ )
212
213EXPORT_VECTORMATH_CC2C(Multiply, AVX2, AVX, SSE2, NONE)
214EXPORT_VECTORMATH_CC2C(Add, AVX2, AVX, SSE2, NONE)
215
216// ---------- define exported vector math functions with 1 COMPLEX8 scalar and 1 COMPLEX8 vector inputs to 1 COMPLEX8 vector output (cC2C) ----------
217#define EXPORT_VECTORMATH_cC2C(NAME, ...) \
218 EXPORT_VECTORMATH_ANY( NAME ## COMPLEX8, (COMPLEX8 *out, COMPLEX8 scalar, const COMPLEX8 *in, const UINT4 len), (out, scalar, in, len), __VA_ARGS__ )
219
220EXPORT_VECTORMATH_cC2C(Scale, AVX2, AVX, SSE2, NONE)
221EXPORT_VECTORMATH_cC2C(Shift, AVX2, AVX, SSE2, NONE)
222
223// ---------- define exported vector math functions with 1 REAL4 scalar, 2 COMPLEX8 vector inputs to 1 COMPLEX8 vector output (sCC2C) ----------
224#define EXPORT_VECTORMATH_sCC2C(NAME, ...) \
225 EXPORT_VECTORMATH_ANY( NAME ## COMPLEX8, (COMPLEX8 *out, REAL4 scalar, const COMPLEX8 *in1, const COMPLEX8 *in2, const UINT4 len), (out, scalar, in1, in2, len), __VA_ARGS__ )
226
227EXPORT_VECTORMATH_sCC2C(ScaleAdd, AVX2, AVX, SSE2, NONE)
228
229// ---------- define exported vector math functions with 1 REAL8 vector input to 1 REAL8 scalar output (D2d) ----------
230#define EXPORT_VECTORMATH_D2d(NAME, ...) \
231 EXPORT_VECTORMATH_ANY( NAME ## REAL8, (REAL8 *out, const REAL8 *in, const UINT4 len), (out, in, len), __VA_ARGS__ )
232
233EXPORT_VECTORMATH_D2d(ScalarMax, AVX2, AVX, SSE2, NONE)
234
235// ---------- define exported vector math functions with 1 REAL8 vector input to 1 REAL8 vector output (D2D) ----------
236#define EXPORT_VECTORMATH_D2D(NAME, ...) \
237 EXPORT_VECTORMATH_ANY( NAME ## REAL8, (REAL8 *out, const REAL8 *in, const UINT4 len), (out, in, len), __VA_ARGS__ )
238
239EXPORT_VECTORMATH_D2D(Round, AVX2, AVX, NONE, NONE)
#define EXPORT_VECTORMATH_S2S(NAME,...)
#define EXPORT_VECTORMATH_cC2C(NAME,...)
#define EXPORT_VECTORMATH_S2I(NAME,...)
Definition: VectorMath.c:125
#define EXPORT_VECTORMATH_sSS2S(NAME,...)
#define EXPORT_VECTORMATH_DD2D(NAME,...)
#define EXPORT_VECTORMATH_sCC2C(NAME,...)
#define EXPORT_VECTORMATH_D2d(NAME,...)
#define EXPORT_VECTORMATH_S2s(NAME,...)
Definition: VectorMath.c:131
#define EXPORT_VECTORMATH_dDD2D(NAME,...)
#define DEFINE_ALIGNED_VECT_API(TYPE)
Create a special <TYPE>Vector with n-byte aligned memory data array.
Definition: VectorMath.c:45
#define EXPORT_VECTORMATH_SS2uU(NAME,...)
#define EXPORT_VECTORMATH_D2D(NAME,...)
#define EXPORT_VECTORMATH_sS2uU(NAME,...)
#define EXPORT_VECTORMATH_SS2S(NAME,...)
#define EXPORT_VECTORMATH_S2SS(NAME,...)
#define EXPORT_VECTORMATH_dD2D(NAME,...)
#define EXPORT_VECTORMATH_CC2C(NAME,...)
#define EXPORT_VECTORMATH_sS2S(NAME,...)
double complex COMPLEX16
Double-precision floating-point complex number (16 bytes total)
double REAL8
Double precision real floating-point number (8 bytes).
uint32_t UINT4
Four-byte unsigned integer.
float complex COMPLEX8
Single-precision floating-point complex number (8 bytes total)
float REAL4
Single precision real floating-point number (4 bytes).