LAL  7.5.0.1-b72065a
VectorMath.c
Go to the documentation of this file.
1 //
2 // Copyright (C) 2015 Reinhard Prix, Karl Wette
3 //
4 // This program is free software; you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation; either version 2 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with with program; see the file COPYING. If not, write to the
16 // Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17 // MA 02110-1301 USA
18 //
19 
20 // ---------- INCLUDES ----------
21 #include <stdlib.h>
22 #include <stdio.h>
23 #include <stdbool.h>
24 #include <math.h>
25 
26 #include <config.h>
27 #include <simd_dispatch.h>
28 
29 #include <lal/LALString.h>
30 #include <lal/LALConstants.h>
31 #include <lal/VectorMath.h>
32 
33 #include "VectorMath_internal.h"
34 
35 //==================== FUNCTION DEFINITIONS ====================*/
36 
37 // -------------------- our own failsafe aligned memory handling --------------------
38 
39 ///
40 /// Create a special <TYPE>Vector with n-byte aligned memory \c data array.
41 ///
42 /// This does not rely on \c posix_memalign() being available, and should compile+run everywhere.
43 /// Use XLALDestroy<TYPE>VectorAligned() to free this.
44 ///
45 #define DEFINE_ALIGNED_VECT_API(TYPE) \
46 TYPE##VectorAligned *XLALCreate##TYPE##VectorAligned ( const UINT4 length, const UINT4 align ) \
47 { \
48  TYPE##VectorAligned *ret; \
49  XLAL_CHECK_NULL ( (ret = XLALCalloc ( 1, sizeof(*ret) )) != NULL, XLAL_ENOMEM ); \
50  \
51  XLAL_CHECK_NULL ( (ret = XLALResize##TYPE##VectorAligned ( ret, length, align )) != NULL, XLAL_ENOMEM ); \
52  \
53  return ret; \
54 } /* XLALCreate<TYPE>VectorAligned() */ \
55  \
56 TYPE##VectorAligned *XLALResize##TYPE##VectorAligned ( TYPE##VectorAligned *in, const UINT4 length, const UINT4 align ) \
57 { \
58  if ( in == NULL ) { \
59  return XLALCreate##TYPE##VectorAligned ( length, align ); \
60  } \
61  if ( length == 0 ) { \
62  XLALDestroy##TYPE##VectorAligned ( in ); \
63  return NULL; \
64  } \
65  \
66  in->length = length; \
67  UINT4 paddedLength = length + align - 1; \
68  XLAL_CHECK_NULL ( (in->data0 = XLALRealloc ( in->data0, paddedLength * sizeof(in->data0[0]) )) != NULL, XLAL_ENOMEM ); \
69  \
70  size_t remBytes = ((size_t)in->data0) % align; \
71  size_t offsetBytes = (align - remBytes) % align; \
72  in->data = (void*)(((char*)in->data0) + offsetBytes); \
73  \
74  XLAL_CHECK_NULL ( ((size_t)in->data) % align == 0, XLAL_EFAULT, "Failed to allocate %zd-byte aligned memory. Must be a coding error.\n", (size_t)align ); \
75  \
76  return in; \
77 } /* XLALResize<TYPE>VectorAligned() */ \
78  \
79 void XLALDestroy##TYPE##VectorAligned ( TYPE##VectorAligned *in ) \
80 { \
81  if ( !in ) { return; } \
82  if ( in->data0 ) { \
83  XLALFree ( in->data0 ); \
84  } \
85  XLALFree ( in ); \
86  return; \
87 } /* XLALDestroy<TYPE>VectorAligned() */
88 
94 
95 // -------------------- export vector-operation functions --------------------
96 
97 /* Declare the function pointer, define the dispatch function, and export vector math function with supported instruction sets */
98 #define EXPORT_VECTORMATH_ANY(NAME, ARG_DEF, ARG_CALL, ISET1, ISET2, ISET3, ISET4) \
99  \
100  static int XLALVector##NAME##_DISPATCH ARG_DEF; \
101  \
102  static int (*XLALVector##NAME##_ptr) ARG_DEF = XLALVector##NAME##_DISPATCH; \
103  const char* XLALVector##NAME##_name = "\0"; \
104  \
105  int XLALVector##NAME##_DISPATCH ARG_DEF { \
106  \
107  DISPATCH_SELECT_BEGIN(); \
108  CONCAT2(DISPATCH_SELECT_,ISET1)(XLALVector##NAME##_ptr = XLALVector##NAME##_##ISET1, XLALVector##NAME##_name = "XLALVector"#NAME"_"#ISET1); \
109  CONCAT2(DISPATCH_SELECT_,ISET2)(XLALVector##NAME##_ptr = XLALVector##NAME##_##ISET2, XLALVector##NAME##_name = "XLALVector"#NAME"_"#ISET2); \
110  CONCAT2(DISPATCH_SELECT_,ISET3)(XLALVector##NAME##_ptr = XLALVector##NAME##_##ISET3, XLALVector##NAME##_name = "XLALVector"#NAME"_"#ISET3); \
111  CONCAT2(DISPATCH_SELECT_,ISET4)(XLALVector##NAME##_ptr = XLALVector##NAME##_##ISET4, XLALVector##NAME##_name = "XLALVector"#NAME"_"#ISET4); \
112  DISPATCH_SELECT_END( XLALVector##NAME##_ptr = XLALVector##NAME##_GEN, XLALVector##NAME##_name = "XLALVector"#NAME"_GEN" ); \
113  \
114  return XLALVector##NAME ARG_CALL; \
115  \
116  } \
117  \
118  int XLALVector##NAME ARG_DEF { \
119  \
120  return (XLALVector##NAME##_ptr) ARG_CALL; \
121  \
122  }
123 
124 // ---------- define exported vector math functions with 1 REAL4 vector input to 1 INT4 vector output (S2I) ----------
125 #define EXPORT_VECTORMATH_S2I(NAME, ...) \
126  EXPORT_VECTORMATH_ANY( NAME ## REAL4, (INT4 *out, const REAL4 *in, const UINT4 len), (out, in, len), __VA_ARGS__ )
127 
128 EXPORT_VECTORMATH_S2I(INT4From, SSE2, NONE, NONE, NONE)
129 
130 // ---------- define exported vector math functions with 1 REAL4 vector input to 1 REAL4 vector output (S2S) ----------
131 #define EXPORT_VECTORMATH_S2S(NAME, ...) \
132  EXPORT_VECTORMATH_ANY( NAME ## REAL4, (REAL4 *out, const REAL4 *in, const UINT4 len), (out, in, len), __VA_ARGS__ )
133 
134 EXPORT_VECTORMATH_S2S(Sin, AVX2, AVX, SSE2, NONE)
135 EXPORT_VECTORMATH_S2S(Cos, AVX2, AVX, SSE2, NONE)
136 EXPORT_VECTORMATH_S2S(Exp, AVX2, AVX, SSE2, NONE)
137 EXPORT_VECTORMATH_S2S(Log, AVX2, AVX, SSE2, NONE)
138 EXPORT_VECTORMATH_S2S(Round, AVX2, AVX, NONE, NONE)
139 
140 // ---------- define exported vector math functions with 1 REAL4 vector input to 2 REAL4 vector outputs (S2SS) ----------
141 #define EXPORT_VECTORMATH_S2SS(NAME, ...) \
142  EXPORT_VECTORMATH_ANY( NAME ## REAL4, (REAL4 *out1, REAL4 *out2, const REAL4 *in, const UINT4 len), (out1, out2, in, len), __VA_ARGS__ )
143 
144 EXPORT_VECTORMATH_S2SS(SinCos, AVX2, AVX, SSE2, NONE)
145 EXPORT_VECTORMATH_S2SS(SinCos2Pi, AVX2, AVX, SSE2, NONE)
146 
147 // ---------- define exported vector math functions with 2 REAL4 vector inputs to 1 REAL4 vector output (SS2S) ----------
148 #define EXPORT_VECTORMATH_SS2S(NAME, ...) \
149  EXPORT_VECTORMATH_ANY( NAME ## REAL4, (REAL4 *out, const REAL4 *in1, const REAL4 *in2, const UINT4 len), (out, in1, in2, len), __VA_ARGS__ )
150 
151 EXPORT_VECTORMATH_SS2S(Add, AVX2, AVX, SSE2, NONE)
152 EXPORT_VECTORMATH_SS2S(Sub, AVX2, AVX, SSE2, NONE)
153 EXPORT_VECTORMATH_SS2S(Multiply, AVX2, AVX, SSE2, NONE)
154 EXPORT_VECTORMATH_SS2S(Max, AVX2, AVX, SSE2, NONE)
155 
156 // ---------- define exported vector math functions with 1 REAL4 scalar, 1 REAL4 vector inputs to 1 REAL4 vector output (sS2S) ----------
157 #define EXPORT_VECTORMATH_sS2S(NAME, ...) \
158  EXPORT_VECTORMATH_ANY( NAME ## REAL4, (REAL4 *out, REAL4 scalar, const REAL4 *in, const UINT4 len), (out, scalar, in, len), __VA_ARGS__ )
159 
160 EXPORT_VECTORMATH_sS2S(Scale, AVX2, AVX, SSE2, NONE)
161 EXPORT_VECTORMATH_sS2S(Shift, AVX2, AVX, SSE2, NONE)
162 
163 // ---------- define exported vector math functions with 2 REAL4 vector inputs to 1 UINT4 scalar and 1 UINT4 vector output (SS2uU) ----------
164 #define EXPORT_VECTORMATH_SS2uU(NAME, ...) \
165  EXPORT_VECTORMATH_ANY( NAME ## REAL4, ( UINT4* count, UINT4 *out, const REAL4 *in1, const REAL4 *in2, const UINT4 len ), (count, out, in1, in2, len), __VA_ARGS__ )
166 
167 EXPORT_VECTORMATH_SS2uU(FindVectorLessEqual, AVX2, SSSE3, NONE, NONE)
168 
169 // ---------- define exported vector math functions with 1 REAL4 scalar and 1 REAL4 vector inputs to 1 UINT4 scalar and 1 UINT4 vector output (sS2uU) ----------
170 #define EXPORT_VECTORMATH_sS2uU(NAME, ...) \
171  EXPORT_VECTORMATH_ANY( NAME ## REAL4, ( UINT4* count, UINT4 *out, REAL4 scalar, const REAL4 *in, const UINT4 len ), (count, out, scalar, in, len), __VA_ARGS__ )
172 
173 EXPORT_VECTORMATH_sS2uU(FindScalarLessEqual, AVX2, SSSE3, NONE, NONE)
174 
175 // ---------- define exported vector math functions with 1 REAL8 scalar, 1 REAL8 vector inputs to 1 REAL8 vector output (dD2D) ----------
176 #define EXPORT_VECTORMATH_dD2D(NAME, ...) \
177  EXPORT_VECTORMATH_ANY( NAME ## REAL8, (REAL8 *out, REAL8 scalar, const REAL8 *in, const UINT4 len), (out, scalar, in, len), __VA_ARGS__ )
178 
179 EXPORT_VECTORMATH_dD2D(Scale, AVX2, AVX, SSE2, NONE)
180 EXPORT_VECTORMATH_dD2D(Shift, AVX2, AVX, SSE2, NONE)
181 
182 // ---------- define exported vector math functions with 2 REAL8 vector inputs to 1 REAL8 vector output (DD2D) ----------
183 #define EXPORT_VECTORMATH_DD2D(NAME, ...) \
184  EXPORT_VECTORMATH_ANY( NAME ## REAL8, (REAL8 *out, const REAL8 *in1, const REAL8 *in2, const UINT4 len), (out, in1, in2, len), __VA_ARGS__ )
185 
186 EXPORT_VECTORMATH_DD2D(Add, AVX2, AVX, SSE2, NONE)
187 EXPORT_VECTORMATH_DD2D(Sub, AVX2, AVX, SSE2, NONE)
188 EXPORT_VECTORMATH_DD2D(Multiply, AVX2, AVX, SSE2, NONE)
189 EXPORT_VECTORMATH_DD2D(Max, AVX2, AVX, NONE, NONE)
190 
191 // ---------- define exported vector math functions with 2 COMPLEX8 vector inputs to 1 COMPLEX8 vector output (CC2C) ----------
192 #define EXPORT_VECTORMATH_CC2C(NAME, ...) \
193  EXPORT_VECTORMATH_ANY( NAME ## COMPLEX8, (COMPLEX8 *out, const COMPLEX8 *in1, const COMPLEX8 *in2, const UINT4 len), (out, in1, in2, len), __VA_ARGS__ )
194 
195 EXPORT_VECTORMATH_CC2C(Multiply, AVX2, AVX, SSE2, NONE)
196 EXPORT_VECTORMATH_CC2C(Add, AVX2, AVX, SSE2, NONE)
197 
198 // ---------- define exported vector math functions with 1 COMPLEX8 scalar and 1 COMPLEX8 vector inputs to 1 COMPLEX8 vector output (cC2C) ----------
199 #define EXPORT_VECTORMATH_cC2C(NAME, ...) \
200  EXPORT_VECTORMATH_ANY( NAME ## COMPLEX8, (COMPLEX8 *out, COMPLEX8 scalar, const COMPLEX8 *in, const UINT4 len), (out, scalar, in, len), __VA_ARGS__ )
201 
202 EXPORT_VECTORMATH_cC2C(Scale, AVX2, AVX, SSE2, NONE)
203 EXPORT_VECTORMATH_cC2C(Shift, AVX2, AVX, SSE2, NONE)
204 
205 // ---------- define exported vector math functions with 1 REAL8 vector input to 1 REAL8 vector output (D2D) ----------
206 #define EXPORT_VECTORMATH_D2D(NAME, ...) \
207  EXPORT_VECTORMATH_ANY( NAME ## REAL8, (REAL8 *out, const REAL8 *in, const UINT4 len), (out, in, len), __VA_ARGS__ )
208 
209 EXPORT_VECTORMATH_D2D(Round, AVX2, AVX, NONE, NONE)
210 
#define EXPORT_VECTORMATH_S2S(NAME,...)
Definition: VectorMath.c:131
#define EXPORT_VECTORMATH_cC2C(NAME,...)
#define EXPORT_VECTORMATH_S2I(NAME,...)
Definition: VectorMath.c:125
#define EXPORT_VECTORMATH_DD2D(NAME,...)
#define DEFINE_ALIGNED_VECT_API(TYPE)
Create a special <TYPE>Vector with n-byte aligned memory data array.
Definition: VectorMath.c:45
#define EXPORT_VECTORMATH_SS2uU(NAME,...)
#define EXPORT_VECTORMATH_D2D(NAME,...)
#define EXPORT_VECTORMATH_sS2uU(NAME,...)
#define EXPORT_VECTORMATH_SS2S(NAME,...)
Definition: VectorMath.c:148
#define EXPORT_VECTORMATH_S2SS(NAME,...)
Definition: VectorMath.c:141
#define EXPORT_VECTORMATH_dD2D(NAME,...)
#define EXPORT_VECTORMATH_CC2C(NAME,...)
#define EXPORT_VECTORMATH_sS2S(NAME,...)
Definition: VectorMath.c:157
double complex COMPLEX16
Double-precision floating-point complex number (16 bytes total)
double REAL8
Double precision real floating-point number (8 bytes).
uint32_t UINT4
Four-byte unsigned integer.
float complex COMPLEX8
Single-precision floating-point complex number (8 bytes total)
float REAL4
Single precision real floating-point number (4 bytes).