LAL 7.7.0.1-678514e
LALSIMD.c
Go to the documentation of this file.
1/*
2 * Copyright (C) 2015 Reinhard Prix, Karl Wette
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with with program; see the file COPYING. If not, write to the
16 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17 * MA 02110-1301 USA
18 */
19
20/*
21 * SIMD extension detection and runtime selection for LALSuite
22 *
23 * SIMD detection based on the following files from:
24 * http://www.agner.org/optimize/#vectorclass
25 **************************** instrset.h **********************************
26 * Author: Agner Fog
27 * Date created: 2012-05-30
28 * Last modified: 2014-10-22
29 * Version: 1.16
30 * Project: vector classes
31 *
32 * (c) Copyright 2012 - 2014 GNU General Public License www.gnu.org/licenses
33 ************************** instrset_detect.cpp ****************************
34 * Author: Agner Fog
35 * Date created: 2012-05-30
36 * Last modified: 2014-07-23
37 * Version: 1.14
38 * Project: vector classes
39 *
40 * (c) Copyright 2012 - 2014 GNU General Public License http://www.gnu.org/licenses
41 ******************************************************************************
42 */
43
44#include <stdlib.h>
45#include <stdint.h>
46#include <string.h>
47
48#include <config.h>
49
50#include <lal/LALSIMD.h>
51#include <lal/LALConfig.h>
52#include <lal/LALError.h>
53#include <lal/XLALError.h>
54#include <lal/LALString.h>
55
56/* Check that this file is being compiled for x86 */
57#if defined(__x86_64__) || defined(_M_X64)
58#define HAVE_X86 1 /* x86 64-bit */
59#elif defined(__i386) || defined(_M_IX86)
60#define HAVE_X86 1 /* x86 32-bit */
61#else
62#define HAVE_X86 0
63#endif
64
65#if HAVE_X86 && ( defined(__GNUC__) || defined(__clang__) ) && defined(HAVE_CPUID_H)
66#include <cpuid.h>
67#define HAVE__GET_CPUID 1
68#endif
69
70#ifdef __GNUC__
71#define UNUSED __attribute__ ((unused))
72#else
73#define UNUSED
74#endif
75
76/* selected SIMD instruction set */
78
79/* array of instruction set names */
80static const char *const iset_names[LAL_SIMD_ISET_MAX] = {
81 [LAL_SIMD_ISET_GEN] = "GEN",
82 [LAL_SIMD_ISET_SSE] = "SSE",
83 [LAL_SIMD_ISET_SSE2] = "SSE2",
84 [LAL_SIMD_ISET_SSE3] = "SSE3",
85 [LAL_SIMD_ISET_SSSE3] = "SSSE3",
86 [LAL_SIMD_ISET_SSE4_1] = "SSE4.1",
87 [LAL_SIMD_ISET_SSE4_2] = "SSE4.2",
88 [LAL_SIMD_ISET_AVX] = "AVX",
89 [LAL_SIMD_ISET_AVX2] = "AVX2",
90};
91
92/* pthread locking to make SIMD detection thread-safe */
93#ifdef LAL_PTHREAD_LOCK
94#include <pthread.h>
95static pthread_once_t lalOnce = PTHREAD_ONCE_INIT;
96#define LAL_ONCE(init) pthread_once(&lalOnce, (init))
97#else
98static int lalOnce = 1;
99#define LAL_ONCE(init) (lalOnce ? (init)(), lalOnce = 0 : 0)
100#endif
101
102/*
103 * Define interface to 'cpuid' instruction
104 * input: eax = functionnumber, ecx = 0
105 * output: eax = output[0], ebx = output[1], ecx = output[2], edx = output[3]
106 */
107static inline UNUSED void cpuid( uint32_t output[4], UNUSED int functionnumber ) {
108
109#if HAVE_X86
110
111#if HAVE__GET_CPUID
112
113 __get_cpuid(functionnumber, &output[0], &output[1], &output[2], &output[3]);
114
115#elif defined(__GNUC__) || defined(__clang__) // weird case: gcc|clang but NO cpuid.h file, can happen on Macs for old gcc's: give up here
116
117 output[0] = output[1] = output[2] = output[3] = 0;
118
119#else
120
121 /* Use MASM/Intel inline assembly */
122 __asm__ {
123 mov eax, functionnumber
124 xor ecx, ecx
125 cpuid
126 mov esi, output
127 mov [esi], eax
128 mov [esi+4], ebx
129 mov [esi+8], ecx
130 mov [esi+12], edx
131 }
132
133#endif
134
135#else /* for non-X86 platforms */
136
137 output[0] = output[1] = output[2] = output[3] = 0;
138
139#endif
140
141 return;
142
143} // cpuid()
144
145/*
146 * Define interface to 'xgetbv' instruction
147 */
148static inline UNUSED int64_t xgetbv( UNUSED int ctr ) {
149
150#if HAVE_X86
151
152#if defined(__GNUC__) || defined(__clang__)
153
154 /* Use GNU/AT&T inline assembly */
155 uint32_t a, d;
156 __asm__(".byte 0x0f,0x01,0xd0" \
157 : "=a"(a),"=d"(d) \
158 : "c"(ctr)
159 );
160 return a | (((uint64_t) d) << 32);
161
162#else
163
164 /* Use MASM/Intel inline assembly */
165 uint32_t a, d;
166 __asm__ {
167 mov ecx, ctr
168 _emit 0x0f
169 _emit 0x01
170 _emit 0xd0
171 mov a, eax
172 mov d, edx
173 }
174 return a | (((uint64_t) d) << 32);
175
176#endif /* inline assembly */
177
178#else /* !HAVE_X86 */
179
180 return 0;
181
182#endif /* HAVE_X86 */
183
184}
185
186/*
187 * Detect instruction set
188 */
190
191 /* cpuid results */
192 uint32_t abcd[4] = {0, 0, 0, 0};
193
195
196 cpuid(abcd, 0); /* call cpuid function 0 */
197 if (abcd[0] == 0) return iset; /* no further cpuid function supported */
198 cpuid(abcd, 1); /* call cpuid function 1 for feature flags */
199 if ((abcd[3] & (1 << 0)) == 0) return iset; /* no floating point */
200 if ((abcd[3] & (1 << 23)) == 0) return iset; /* no MMX */
201 if ((abcd[3] & (1 << 15)) == 0) return iset; /* no conditional move */
202 if ((abcd[3] & (1 << 24)) == 0) return iset; /* no FXSAVE */
203 if ((abcd[3] & (1 << 25)) == 0) return iset; /* no SSE */
204 iset = LAL_SIMD_ISET_SSE; /* SSE detected */
205
206 if ((abcd[3] & (1 << 26)) == 0) return iset; /* no SSE2 */
207 iset = LAL_SIMD_ISET_SSE2; /* SSE2 detected */
208
209 if ((abcd[2] & (1 << 0)) == 0) return iset; /* no SSE3 */
210 iset = LAL_SIMD_ISET_SSE3; /* SSE3 detected */
211
212 if ((abcd[2] & (1 << 9)) == 0) return iset; /* no SSSE3 */
213 iset = LAL_SIMD_ISET_SSSE3; /* SSSE3 detected */
214
215 if ((abcd[2] & (1 << 19)) == 0) return iset; /* no SSE4.1 */
216 iset = LAL_SIMD_ISET_SSE4_1; /* SSE4.1 detected */
217
218 if ((abcd[2] & (1 << 23)) == 0) return iset; /* no POPCNT */
219 if ((abcd[2] & (1 << 20)) == 0) return iset; /* no SSE4.2 */
220 iset = LAL_SIMD_ISET_SSE4_2; /* SSE4.2 detected */
221
222 if ((abcd[2] & (1 << 27)) == 0) return iset; /* XSAVE not enabled in O.S. */
223 if ((xgetbv(0) & 6) != 6) return iset; /* AVX not enabled in O.S. */
224 if ((abcd[2] & (1 << 28)) == 0) return iset; /* no AVX */
225 iset = LAL_SIMD_ISET_AVX; /* AVX detected */
226
227#if HAVE_X86 && defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
228 /* GCC's __get_cpuid() fails to detect AVX2, see bug report at https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77756 */
229 if (!__builtin_cpu_supports("avx2")) return iset; /* no AVX2 */
230#else
231 cpuid(abcd, 7); /* call cpuid function 7 for feature flags */
232 if ((abcd[1] & (1 << 5)) == 0) return iset; /* no AVX2 */
233#endif
234 iset = LAL_SIMD_ISET_AVX2; /* AVX2 detected */
235
236 return iset;
237
238}
239
240/*
241 * Select instruction set, allowing guru users to down-select
242 */
243static void select_instruction_set(void) {
244
245 /* Detect instruction set */
248 lalAbortHook("%s: SIMD instruction set detection failed!!\n", __func__);
249 return;
250 }
251
252 /* Check if user wants to down-select instruction set */
253 const char *env = getenv("LAL_SIMD_ISET");
254 if (env == NULL || *env == '\0') {
255 return;
256 }
257
258 /* Try to match LAL_SIMD_ISET to an instruction set name */
260 for (LAL_SIMD_ISET i = 0; i < LAL_SIMD_ISET_MAX; ++i) {
261 if (XLALStringCaseCompare(env, iset_names[i]) == 0) {
262 user_iset = i;
263 break;
264 }
265 }
266 if (user_iset == LAL_SIMD_ISET_MAX) {
267 lalAbortHook("%s: LAL_SIMD_ISET='%s' does not match a SIMD instruction set\n", __func__, env);
268 return;
269 }
270
271 /* Check user is not trying to select an unavailable instruction set */
272 if (user_iset > selected_iset) {
273 lalAbortHook("%s: LAL_SIMD_ISET='%s' is not available on this machine\n", __func__, env);
274 return;
275 }
276
277 /* select user-requested instruction set */
278 selected_iset = user_iset;
279
280 return;
281
282}
283
286 return (iset < LAL_SIMD_ISET_MAX) && (iset <= selected_iset);
287}
288
291 return iset_names[iset];
292}
void(* lalAbortHook)(const char *,...)
Definition: LALError.c:75
static UNUSED void cpuid(uint32_t output[4], UNUSED int functionnumber)
Definition: LALSIMD.c:107
static UNUSED int64_t xgetbv(UNUSED int ctr)
Definition: LALSIMD.c:148
#define LAL_ONCE(init)
Definition: LALSIMD.c:99
static LAL_SIMD_ISET detect_instruction_set(void)
Definition: LALSIMD.c:189
static void select_instruction_set(void)
Definition: LALSIMD.c:243
static int lalOnce
Definition: LALSIMD.c:98
static const char *const iset_names[LAL_SIMD_ISET_MAX]
Definition: LALSIMD.c:80
static LAL_SIMD_ISET selected_iset
Definition: LALSIMD.c:77
int XLALHaveSIMDInstructionSet(LAL_SIMD_ISET iset)
Return true if the executing machine supports the given instruction set.
Definition: LALSIMD.c:284
LAL_SIMD_ISET
SIMD instruction sets this module can detect.
Definition: LALSIMD.h:47
const char * XLALSIMDInstructionSetName(LAL_SIMD_ISET iset)
Return the name of a given instruction set as a string.
Definition: LALSIMD.c:289
@ LAL_SIMD_ISET_SSE4_1
SSE version 4.1.
Definition: LALSIMD.h:54
@ LAL_SIMD_ISET_SSE3
SSE version 3.
Definition: LALSIMD.h:52
@ LAL_SIMD_ISET_SSE
SSE (Streaming SIMD Extensions)
Definition: LALSIMD.h:50
@ LAL_SIMD_ISET_AVX2
AVX version 2.
Definition: LALSIMD.h:57
@ LAL_SIMD_ISET_SSE4_2
SSE version 4.2.
Definition: LALSIMD.h:55
@ LAL_SIMD_ISET_SSE2
SSE version 2.
Definition: LALSIMD.h:51
@ LAL_SIMD_ISET_GEN
GENeric floating-point unit.
Definition: LALSIMD.h:49
@ LAL_SIMD_ISET_AVX
AVX (Advanced Vector Extensions)
Definition: LALSIMD.h:56
@ LAL_SIMD_ISET_SSSE3
Supplemental SSE version 3.
Definition: LALSIMD.h:53
@ LAL_SIMD_ISET_MAX
Definition: LALSIMD.h:59
int XLALStringCaseCompare(const char *s1, const char *s2)
Compare two strings, ignoring case and without using locale-dependent functions.
Definition: LALString.c:210
static const INT4 a
Definition: Random.c:79
#define XLAL_CHECK_NULL(assertion,...)
Macro to test an assertion and invoke a failure if it is not true in a function that returns a pointe...
Definition: XLALError.h:825
@ XLAL_EINVAL
Invalid argument.
Definition: XLALError.h:409
void output(int gps_sec, int output_type)
Definition: tconvert.c:440