LALPulsar  6.1.0.1-89842e6
SinCosLUT.i
Go to the documentation of this file.
1 //
2 // Copyright (C) 2009, 2010, 2011, 2012, 2013 Bernd Machenschalk, Reinhard Prix
3 //
4 // This program is free software; you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation; either version 2 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with with program; see the file COPYING. If not, write to the
16 // Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17 // MA 02110-1301 USA
18 //
19 
20 /*
21  This file defines:
22  - a macro SINCOS_TRIM_X(y,x) which trims the value x to interval [0..2)
23  - global REAL4 arrays sincosLUTbase[] and sincosLUTdiff[] as lookup tables
24  - macros SINCOS_STEP1..6 for the individual steps of
25  XLALSinCos2PiLUTtrimmed() (to be mixed into the hotloop code)
26  - a type ux_t for a variable ux to be used in these macros
27  - macros SINCOS_LUT_RES, SINCOS_ADDS, SINCOS_MASK1, SINCOS_MASK2, SINCOS_SHIFT
28 
29  The following macros determine the code that is actually built:
30  _MSC_VER : are we using the Microsoft compiler that doesn't know C99?
31  _ARCH_PPC : are we compiling for PowerPC?
32  __BIG_ENDIAN__ : has the architecture big-endian byt order?
33 */
34 
35 #include <lal/LALConstants.h>
36 #include <lal/XLALError.h>
37 
38 #ifdef __GNUC__
39 #define UNUSED __attribute__ ((unused))
40 #else
41 #define UNUSED
42 #endif
43 
44 /*
45  Trimming macro
46 */
47 
48 /* the way of trimming x to the interval [0..2) for the sin_cos_LUT functions
49  give significant differences in speed, so we provide various ways here. */
50 #ifdef SINCOS_REAL4_ARG
51 
52 #ifdef _MSC_VER /* no C99 rint() */
53 #define SINCOS_TRIM_X(y,x) \
54  { \
55  __asm FLD QWORD PTR x \
56  __asm FRNDINT \
57  __asm FSUBR QWORD PTR x \
58  __asm FLD1 \
59  __asm FADDP ST(1),ST \
60  __asm FSTP QWORD PTR y \
61  }
62 #elif _ARCH_PPC
63 /* floor() is actually faster here, as we don't have to set the rounding mode */
64 #define SINCOS_TRIM_X(y,x) y = x - floor(x);
65 #else
66 #define SINCOS_TRIM_X(y,x) y = x - rint(x) + 1.0;
67 #endif
68 
69 #else /* SINCOS_REAL4_ARG */
70 
71 #ifdef _MSC_VER /* no C99 rint() */
72 #define SINCOS_TRIM_X(y,x) \
73  { \
74  __asm FLD DWORD PTR x \
75  __asm FRNDINT \
76  __asm FSUBR DWORD PTR x \
77  __asm FLD1 \
78  __asm FADDP ST(1),ST \
79  __asm FSTP DWORD PTR y \
80  }
81 #elif _ARCH_PPC
82 /* floor() is actually faster here, as we don't have to set the rounding mode */
83 #define SINCOS_TRIM_X(y,x) y = x - floorf(x);
84 #else
85 #define SINCOS_TRIM_X(y,x) y = x - rintf(x) + 1.0;
86 #endif
87 
88 #endif /* SINCOS_REAL4_ARG */
89 
90 
91 
92 /*
93  Lookup tables (LUT) data
94  */
95 
96 /* Constants */
97 #define SINCOS_ADDS 402653184.0
98 #define SINCOS_MASK1 0xFFFFFF
99 #define SINCOS_MASK2 0x003FFF
100 #define SINCOS_SHIFT 14
101 #define SINCOS_LUT_RES 1024 /* should be multiple of 4 */
102 
103 /* global VARIABLES to be used in (global) macros */
106 /* shift cos tables 90 deg. to sin table */
107 static const UNUSED REAL4* cosLUTbase = sincosLUTbase + (SINCOS_LUT_RES/4);
108 static const UNUSED REAL4* cosLUTdiff = sincosLUTdiff + (SINCOS_LUT_RES/4);
109 
110 /* Variables */
111 
112 static UNUSED INT4 sincosI, sincosN;
113 
114 /* A REAL8 variable that allows to read its higher bits as an INT4 */
115 static UNUSED union {
117  struct {
118 #ifdef __BIG_ENDIAN__
119  INT4 dummy;
120  INT4 intval;
121 #else
124 #endif
127 
128 
129 
130 /* Macros to interleave linear sin/cos calculation
131  in other code (e.g. AltiVec) */
132 
133 /* x must already been trimmed to interval [0..2) */
134 /* - syntactic sugar -| |- here is the actual code - */
135 #define SINCOS_PROLOG
136 #define SINCOS_STEP1(x) sincosUX.asreal = x + SINCOS_ADDS;
137 #define SINCOS_STEP2 sincosI = sincosUX.as2int.intval & SINCOS_MASK1;
138 #define SINCOS_STEP3 sincosN = sincosUX.as2int.intval & SINCOS_MASK2;
139 #define SINCOS_STEP4 sincosI = sincosI >> SINCOS_SHIFT;
140 #define SINCOS_STEP5(s) *s = sincosLUTbase[sincosI] + sincosN * sincosLUTdiff[sincosI];
141 #define SINCOS_STEP6(c) *c = cosLUTbase[sincosI] + sincosN * cosLUTdiff[sincosI];
142 #define SINCOS_EPILOG(s,c,x)
143 
144 
145 
146 /* Macros to interleave linear sin/cos calculation
147  (in x87 opcodes) with SSE hotloop.*/
148 
149 /* these actually define only string constants, so they don't harm non-gcc code */
150 /* handle different register names in AMD64 */
151 #if defined(__x86_64__)
152 #define RAX "rax"
153 #define RDX "rdx"
154 #define RDI "rdi"
155 #else
156 #define RAX "eax"
157 #define RDX "edx"
158 #define RDI "edi"
159 #endif
160 #define PAX "%%"RAX
161 #define PDX "%%"RDX
162 #define PDI "%%"RDI
163 
164 #ifdef SINCOS_REAL4_ARG
165 #define SINCOS_FLD "fld"
166 #else
167 #define SINCOS_FLD "fldl"
168 #endif
169 
170 /* Version 1 : with trimming of input argument to [0,2) */
171 #define SINCOS_TRIM_P0A(alpha) \
172  SINCOS_FLD " %[" #alpha "] \n\t" /* st: alpha */ \
173  "fistpll %[tmp] \n\t" /* tmp=(INT8)(round((alpha)) */ \
174  "fld1 \n\t" /* st: 1.0 */ \
175  "fildll %[tmp] \n\t" /* st: 1.0;(round((alpha))*/
176 
177 #define SINCOS_TRIM_P0B(alpha) \
178  "fsubrp %%st,%%st(1) \n\t" /* st: 1.0 -round(alpha) */ \
179  "faddl %[" #alpha "] \n\t" /* st: alpha -round(alpha)+1.0*/ \
180  "faddl %[sincos_adds] \n\t" /* ..continue lin. sin/cos as lebow */ \
181  "fstpl %[tmp] \n\t"
182 
183 /* Version 2 : assumes input argument is already trimmed */
184 #define SINCOS_P0(alpha) \
185  SINCOS_FLD " %[" #alpha "] \n\t" /*st:alpha */ \
186  "faddl %[sincos_adds] \n\t" /*st:alpha+A */ \
187  "fstpl %[tmp] \n\t"
188 
189 #define SINCOS_P1 \
190  "mov %[tmp],"PAX" \n\t" /* alpha +A ->eax (ix)*/ \
191  "mov "PAX","PDX" \n\t" /* n = ix & SINCOS_MASK2 */ \
192  "and $0x3fff,"PAX" \n\t"
193 #define SINCOS_P2 \
194  "mov "PAX",%[tmp] \n\t" \
195  "mov %[scd], "PAX" \n\t" \
196  "and $0xffffff,"PDX" \n\t" /* i = ix & SINCOS_MASK1;*/
197 #define SINCOS_P3 \
198  "fildl %[tmp] \n\t" \
199  "sar $0xe,"PDX" \n\t" /* i = i >> SINCOS_SHIFT;*/
200 #define SINCOS_P4 \
201  "fld %%st \n\t" /* st: n; n; */ \
202  "fmuls ("PAX","PDX",4) \n\t" \
203  "mov %[scb], "PDI" \n\t"
204 #define SINCOS_P4A /* P4 w/o doubling, used when P6 is not */ \
205  "fmuls ("PAX","PDX",4) \n\t" \
206  "mov %[scb], "PDI" \n\t"
207 #define SINCOS_P5(sin) \
208  "fadds ("PDI","PDX",4) \n\t" /* st: sincosLUTbase[i]+n*sincosLUTdiff[i]; n*/ \
209  "add $0x100,"PDX" \n\t" /* edx+=SINCOS_LUT_RES/4*/ \
210  "fstps %[" #sin "] \n\t" /* (*sin)=sincosLUTbase[i]+n*sincosLUTdiff[i]*/
211 #define SINCOS_P6(cos) \
212  "fmuls ("PAX","PDX",4) \n\t" \
213  "fadds ("PDI","PDX",4) \n\t" \
214  "fstps %[" #cos "] \n\t" /* (*cos)=cosbase[i]+n*cosdiff[i];*/
215 /* list of clobbered registers */
216 #define SINCOS_REGISTERS RAX,RDX,RDI,"st","st(1)","st(2)","cc"
REAL8 asreal
Definition: SinCosLUT.i:116
#define SINCOS_LUT_RES
Definition: SinCosLUT.i:101
struct @7::@8 as2int
static const UNUSED REAL4 * cosLUTbase
Definition: SinCosLUT.i:107
static UNUSED union @7 sincosUX
UNUSED REAL4 sincosLUTbase[SINCOS_LUT_RES+SINCOS_LUT_RES/4]
Definition: SinCosLUT.c:40
static UNUSED INT4 sincosN
Definition: SinCosLUT.i:112
UNUSED REAL4 sincosLUTdiff[SINCOS_LUT_RES+SINCOS_LUT_RES/4]
Definition: SinCosLUT.c:41
INT4 dummy
Definition: SinCosLUT.i:123
INT4 intval
Definition: SinCosLUT.i:122
static const UNUSED REAL4 * cosLUTdiff
Definition: SinCosLUT.i:108
static UNUSED INT4 sincosI
Definition: SinCosLUT.i:112
double REAL8
int32_t INT4
float REAL4