LAL  7.5.0.1-bede9b2
LALSIMD.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2015 Reinhard Prix, Karl Wette
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with with program; see the file COPYING. If not, write to the
16  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17  * MA 02110-1301 USA
18  */
19 
20 /*
21  * SIMD extension detection and runtime selection for LALSuite
22  *
23  * SIMD detection based on the following files from:
24  * http://www.agner.org/optimize/#vectorclass
25  **************************** instrset.h **********************************
26  * Author: Agner Fog
27  * Date created: 2012-05-30
28  * Last modified: 2014-10-22
29  * Version: 1.16
30  * Project: vector classes
31  *
32  * (c) Copyright 2012 - 2014 GNU General Public License www.gnu.org/licenses
33  ************************** instrset_detect.cpp ****************************
34  * Author: Agner Fog
35  * Date created: 2012-05-30
36  * Last modified: 2014-07-23
37  * Version: 1.14
38  * Project: vector classes
39  *
40  * (c) Copyright 2012 - 2014 GNU General Public License http://www.gnu.org/licenses
41  ******************************************************************************
42  */
43 
44 #include <stdlib.h>
45 #include <stdint.h>
46 #include <string.h>
47 
48 #include <config.h>
49 
50 #include <lal/LALSIMD.h>
51 #include <lal/LALConfig.h>
52 #include <lal/LALError.h>
53 #include <lal/XLALError.h>
54 #include <lal/LALString.h>
55 
56 /* Check that this file is being compiled for x86 */
57 #if defined(__x86_64__) || defined(_M_X64)
58 #define HAVE_X86 1 /* x86 64-bit */
59 #elif defined(__i386) || defined(_M_IX86)
60 #define HAVE_X86 1 /* x86 32-bit */
61 #else
62 #define HAVE_X86 0
63 #endif
64 
65 #if HAVE_X86 && ( defined(__GNUC__) || defined(__clang__) ) && defined(HAVE_CPUID_H)
66 #include <cpuid.h>
67 #define HAVE__GET_CPUID 1
68 #endif
69 
70 #ifdef __GNUC__
71 #define UNUSED __attribute__ ((unused))
72 #else
73 #define UNUSED
74 #endif
75 
76 /* selected SIMD instruction set */
78 
79 /* array of instruction set names */
80 static const char *const iset_names[LAL_SIMD_ISET_MAX] = {
81  [LAL_SIMD_ISET_GEN] = "GEN",
82  [LAL_SIMD_ISET_SSE] = "SSE",
83  [LAL_SIMD_ISET_SSE2] = "SSE2",
84  [LAL_SIMD_ISET_SSE3] = "SSE3",
85  [LAL_SIMD_ISET_SSSE3] = "SSSE3",
86  [LAL_SIMD_ISET_SSE4_1] = "SSE4.1",
87  [LAL_SIMD_ISET_SSE4_2] = "SSE4.2",
88  [LAL_SIMD_ISET_AVX] = "AVX",
89  [LAL_SIMD_ISET_AVX2] = "AVX2",
90 };
91 
92 /* pthread locking to make SIMD detection thread-safe */
93 #ifdef LAL_PTHREAD_LOCK
94 #include <pthread.h>
95 static pthread_once_t lalOnce = PTHREAD_ONCE_INIT;
96 #define LAL_ONCE(init) pthread_once(&lalOnce, (init))
97 #else
98 static int lalOnce = 1;
99 #define LAL_ONCE(init) (lalOnce ? (init)(), lalOnce = 0 : 0)
100 #endif
101 
102 /*
103  * Define interface to 'cpuid' instruction
104  * input: eax = functionnumber, ecx = 0
105  * output: eax = output[0], ebx = output[1], ecx = output[2], edx = output[3]
106  */
107 static inline UNUSED void cpuid( uint32_t output[4], UNUSED int functionnumber ) {
108 
109 #if HAVE_X86
110 
111 #if HAVE__GET_CPUID
112 
113  __get_cpuid(functionnumber, &output[0], &output[1], &output[2], &output[3]);
114 
115 #elif defined(__GNUC__) || defined(__clang__) // weird case: gcc|clang but NO cpuid.h file, can happen on Macs for old gcc's: give up here
116 
117  output[0] = output[1] = output[2] = output[3] = 0;
118 
119 #else
120 
121  /* Use MASM/Intel inline assembly */
122  __asm__ {
123  mov eax, functionnumber
124  xor ecx, ecx
125  cpuid
126  mov esi, output
127  mov [esi], eax
128  mov [esi+4], ebx
129  mov [esi+8], ecx
130  mov [esi+12], edx
131  }
132 
133 #endif
134 
135 #else /* for non-X86 platforms */
136 
137  output[0] = output[1] = output[2] = output[3] = 0;
138 
139 #endif
140 
141  return;
142 
143 } // cpuid()
144 
145 /*
146  * Define interface to 'xgetbv' instruction
147  */
148 static inline UNUSED int64_t xgetbv( UNUSED int ctr ) {
149 
150 #if HAVE_X86
151 
152 #if defined(__GNUC__) || defined(__clang__)
153 
154  /* Use GNU/AT&T inline assembly */
155  uint32_t a, d;
156  __asm__(".byte 0x0f,0x01,0xd0" \
157  : "=a"(a),"=d"(d) \
158  : "c"(ctr)
159  );
160  return a | (((uint64_t) d) << 32);
161 
162 #else
163 
164  /* Use MASM/Intel inline assembly */
165  uint32_t a, d;
166  __asm__ {
167  mov ecx, ctr
168  _emit 0x0f
169  _emit 0x01
170  _emit 0xd0
171  mov a, eax
172  mov d, edx
173  }
174  return a | (((uint64_t) d) << 32);
175 
176 #endif /* inline assembly */
177 
178 #else /* !HAVE_X86 */
179 
180  return 0;
181 
182 #endif /* HAVE_X86 */
183 
184 }
185 
186 /*
187  * Detect instruction set
188  */
190 
191  /* cpuid results */
192  uint32_t abcd[4] = {0, 0, 0, 0};
193 
195 
196  cpuid(abcd, 0); /* call cpuid function 0 */
197  if (abcd[0] == 0) return iset; /* no further cpuid function supported */
198  cpuid(abcd, 1); /* call cpuid function 1 for feature flags */
199  if ((abcd[3] & (1 << 0)) == 0) return iset; /* no floating point */
200  if ((abcd[3] & (1 << 23)) == 0) return iset; /* no MMX */
201  if ((abcd[3] & (1 << 15)) == 0) return iset; /* no conditional move */
202  if ((abcd[3] & (1 << 24)) == 0) return iset; /* no FXSAVE */
203  if ((abcd[3] & (1 << 25)) == 0) return iset; /* no SSE */
204  iset = LAL_SIMD_ISET_SSE; /* SSE detected */
205 
206  if ((abcd[3] & (1 << 26)) == 0) return iset; /* no SSE2 */
207  iset = LAL_SIMD_ISET_SSE2; /* SSE2 detected */
208 
209  if ((abcd[2] & (1 << 0)) == 0) return iset; /* no SSE3 */
210  iset = LAL_SIMD_ISET_SSE3; /* SSE3 detected */
211 
212  if ((abcd[2] & (1 << 9)) == 0) return iset; /* no SSSE3 */
213  iset = LAL_SIMD_ISET_SSSE3; /* SSSE3 detected */
214 
215  if ((abcd[2] & (1 << 19)) == 0) return iset; /* no SSE4.1 */
216  iset = LAL_SIMD_ISET_SSE4_1; /* SSE4.1 detected */
217 
218  if ((abcd[2] & (1 << 23)) == 0) return iset; /* no POPCNT */
219  if ((abcd[2] & (1 << 20)) == 0) return iset; /* no SSE4.2 */
220  iset = LAL_SIMD_ISET_SSE4_2; /* SSE4.2 detected */
221 
222  if ((abcd[2] & (1 << 27)) == 0) return iset; /* XSAVE not enabled in O.S. */
223  if ((xgetbv(0) & 6) != 6) return iset; /* AVX not enabled in O.S. */
224  if ((abcd[2] & (1 << 28)) == 0) return iset; /* no AVX */
225  iset = LAL_SIMD_ISET_AVX; /* AVX detected */
226 
227 #if HAVE_X86 && defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
228  /* GCC's __get_cpuid() fails to detect AVX2, see bug report at https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77756 */
229  if (!__builtin_cpu_supports("avx2")) return iset; /* no AVX2 */
230 #else
231  cpuid(abcd, 7); /* call cpuid function 7 for feature flags */
232  if ((abcd[1] & (1 << 5)) == 0) return iset; /* no AVX2 */
233 #endif
234  iset = LAL_SIMD_ISET_AVX2; /* AVX2 detected */
235 
236  return iset;
237 
238 }
239 
240 /*
241  * Select instruction set, allowing guru users to down-select
242  */
243 static void select_instruction_set(void) {
244 
245  /* Detect instruction set */
248  lalAbortHook("%s: SIMD instruction set detection failed!!\n", __func__);
249  return;
250  }
251 
252  /* Check if user wants to down-select instruction set */
253  const char *env = getenv("LAL_SIMD_ISET");
254  if (env == NULL || *env == '\0') {
255  return;
256  }
257 
258  /* Try to match LAL_SIMD_ISET to an instruction set name */
259  LAL_SIMD_ISET user_iset = LAL_SIMD_ISET_MAX;
260  for (LAL_SIMD_ISET i = 0; i < LAL_SIMD_ISET_MAX; ++i) {
261  if (XLALStringCaseCompare(env, iset_names[i]) == 0) {
262  user_iset = i;
263  break;
264  }
265  }
266  if (user_iset == LAL_SIMD_ISET_MAX) {
267  lalAbortHook("%s: LAL_SIMD_ISET='%s' does not match a SIMD instruction set\n", __func__, env);
268  return;
269  }
270 
271  /* Check user is not trying to select an unavailable instruction set */
272  if (user_iset > selected_iset) {
273  lalAbortHook("%s: LAL_SIMD_ISET='%s' is not available on this machine\n", __func__, env);
274  return;
275  }
276 
277  /* select user-requested instruction set */
278  selected_iset = user_iset;
279 
280  return;
281 
282 }
283 
286  return (iset < LAL_SIMD_ISET_MAX) && (iset <= selected_iset);
287 }
288 
291  return iset_names[iset];
292 }
void(* lalAbortHook)(const char *,...)
Definition: LALError.c:75
static UNUSED void cpuid(uint32_t output[4], UNUSED int functionnumber)
Definition: LALSIMD.c:107
static UNUSED int64_t xgetbv(UNUSED int ctr)
Definition: LALSIMD.c:148
#define LAL_ONCE(init)
Definition: LALSIMD.c:99
static LAL_SIMD_ISET detect_instruction_set(void)
Definition: LALSIMD.c:189
static void select_instruction_set(void)
Definition: LALSIMD.c:243
static int lalOnce
Definition: LALSIMD.c:98
static const char *const iset_names[LAL_SIMD_ISET_MAX]
Definition: LALSIMD.c:80
static LAL_SIMD_ISET selected_iset
Definition: LALSIMD.c:77
const char * XLALSIMDInstructionSetName(LAL_SIMD_ISET iset)
Return the name of a given instruction set as a string.
Definition: LALSIMD.c:289
int XLALHaveSIMDInstructionSet(LAL_SIMD_ISET iset)
Return true if the executing machine supports the given instruction set.
Definition: LALSIMD.c:284
LAL_SIMD_ISET
SIMD instruction sets this module can detect.
Definition: LALSIMD.h:47
@ LAL_SIMD_ISET_SSE4_1
SSE version 4.1.
Definition: LALSIMD.h:54
@ LAL_SIMD_ISET_SSE3
SSE version 3.
Definition: LALSIMD.h:52
@ LAL_SIMD_ISET_SSE
SSE (Streaming SIMD Extensions)
Definition: LALSIMD.h:50
@ LAL_SIMD_ISET_AVX2
AVX version 2.
Definition: LALSIMD.h:57
@ LAL_SIMD_ISET_SSE4_2
SSE version 4.2.
Definition: LALSIMD.h:55
@ LAL_SIMD_ISET_SSE2
SSE version 2.
Definition: LALSIMD.h:51
@ LAL_SIMD_ISET_GEN
GENeric floating-point unit.
Definition: LALSIMD.h:49
@ LAL_SIMD_ISET_AVX
AVX (Advanced Vector Extensions)
Definition: LALSIMD.h:56
@ LAL_SIMD_ISET_SSSE3
Supplemental SSE version 3.
Definition: LALSIMD.h:53
@ LAL_SIMD_ISET_MAX
Definition: LALSIMD.h:59
int XLALStringCaseCompare(const char *s1, const char *s2)
Compare two strings, ignoring case and without using locale-dependent functions.
Definition: LALString.c:210
static const INT4 a
Definition: Random.c:79
#define XLAL_CHECK_NULL(assertion,...)
Macro to test an assertion and invoke a failure if it is not true in a function that returns a pointe...
Definition: XLALError.h:825
@ XLAL_EINVAL
Invalid argument.
Definition: XLALError.h:409
void output(int gps_sec, int output_type)
Definition: tconvert.c:440