Loading [MathJax]/extensions/TeX/AMSsymbols.js
LALPulsar 7.1.1.1-5e288d3
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
SinCosLUT.i
Go to the documentation of this file.
1//
2// Copyright (C) 2009, 2010, 2011, 2012, 2013 Bernd Machenschalk, Reinhard Prix
3//
4// This program is free software; you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation; either version 2 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with with program; see the file COPYING. If not, write to the
16// Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17// MA 02110-1301 USA
18//
19
20/*
21 This file defines:
22 - a macro SINCOS_TRIM_X(y,x) which trims the value x to interval [0..2)
23 - global REAL4 arrays sincosLUTbase[] and sincosLUTdiff[] as lookup tables
24 - macros SINCOS_STEP1..6 for the individual steps of
25 XLALSinCos2PiLUTtrimmed() (to be mixed into the hotloop code)
26 - a type ux_t for a variable ux to be used in these macros
27 - macros SINCOS_LUT_RES, SINCOS_ADDS, SINCOS_MASK1, SINCOS_MASK2, SINCOS_SHIFT
28
29 The following macros determine the code that is actually built:
30 _MSC_VER : are we using the Microsoft compiler that doesn't know C99?
31 _ARCH_PPC : are we compiling for PowerPC?
32 __BIG_ENDIAN__ : has the architecture big-endian byt order?
33*/
34
35#include <lal/LALConstants.h>
36#include <lal/XLALError.h>
37
38#ifdef __GNUC__
39#define UNUSED __attribute__ ((unused))
40#else
41#define UNUSED
42#endif
43
44/*
45 Trimming macro
46*/
47
48/* the way of trimming x to the interval [0..2) for the sin_cos_LUT functions
49 give significant differences in speed, so we provide various ways here. */
50#ifdef SINCOS_REAL4_ARG
51
52#ifdef _MSC_VER /* no C99 rint() */
53#define SINCOS_TRIM_X(y,x) \
54 { \
55 __asm FLD QWORD PTR x \
56 __asm FRNDINT \
57 __asm FSUBR QWORD PTR x \
58 __asm FLD1 \
59 __asm FADDP ST(1),ST \
60 __asm FSTP QWORD PTR y \
61 }
62#elif _ARCH_PPC
63/* floor() is actually faster here, as we don't have to set the rounding mode */
64#define SINCOS_TRIM_X(y,x) y = x - floor(x);
65#else
66#define SINCOS_TRIM_X(y,x) y = x - rint(x) + 1.0;
67#endif
68
69#else /* SINCOS_REAL4_ARG */
70
71#ifdef _MSC_VER /* no C99 rint() */
72#define SINCOS_TRIM_X(y,x) \
73 { \
74 __asm FLD DWORD PTR x \
75 __asm FRNDINT \
76 __asm FSUBR DWORD PTR x \
77 __asm FLD1 \
78 __asm FADDP ST(1),ST \
79 __asm FSTP DWORD PTR y \
80 }
81#elif _ARCH_PPC
82/* floor() is actually faster here, as we don't have to set the rounding mode */
83#define SINCOS_TRIM_X(y,x) y = x - floorf(x);
84#else
85#define SINCOS_TRIM_X(y,x) y = x - rintf(x) + 1.0;
86#endif
87
88#endif /* SINCOS_REAL4_ARG */
89
90
91
92/*
93 Lookup tables (LUT) data
94 */
95
96/* Constants */
97#define SINCOS_ADDS 402653184.0
98#define SINCOS_MASK1 0xFFFFFF
99#define SINCOS_MASK2 0x003FFF
100#define SINCOS_SHIFT 14
101#define SINCOS_LUT_RES 1024 /* should be multiple of 4 */
102
103/* global VARIABLES to be used in (global) macros */
106/* shift cos tables 90 deg. to sin table */
107static const UNUSED REAL4* cosLUTbase = sincosLUTbase + (SINCOS_LUT_RES/4);
108static const UNUSED REAL4* cosLUTdiff = sincosLUTdiff + (SINCOS_LUT_RES/4);
109
110/* Variables */
111
112static UNUSED INT4 sincosI, sincosN;
113
114/* A REAL8 variable that allows to read its higher bits as an INT4 */
115static UNUSED union {
117 struct {
118#ifdef __BIG_ENDIAN__
119 INT4 dummy;
120 INT4 intval;
121#else
124#endif
127
128
129
130/* Macros to interleave linear sin/cos calculation
131 in other code (e.g. AltiVec) */
132
133/* x must already been trimmed to interval [0..2) */
134/* - syntactic sugar -| |- here is the actual code - */
135#define SINCOS_PROLOG
136#define SINCOS_STEP1(x) sincosUX.asreal = x + SINCOS_ADDS;
137#define SINCOS_STEP2 sincosI = sincosUX.as2int.intval & SINCOS_MASK1;
138#define SINCOS_STEP3 sincosN = sincosUX.as2int.intval & SINCOS_MASK2;
139#define SINCOS_STEP4 sincosI = sincosI >> SINCOS_SHIFT;
140#define SINCOS_STEP5(s) *s = sincosLUTbase[sincosI] + sincosN * sincosLUTdiff[sincosI];
141#define SINCOS_STEP6(c) *c = cosLUTbase[sincosI] + sincosN * cosLUTdiff[sincosI];
142#define SINCOS_EPILOG(s,c,x)
143
144
145
146/* Macros to interleave linear sin/cos calculation
147 (in x87 opcodes) with SSE hotloop.*/
148
149/* these actually define only string constants, so they don't harm non-gcc code */
150/* handle different register names in AMD64 */
151#if defined(__x86_64__)
152#define RAX "rax"
153#define RDX "rdx"
154#define RDI "rdi"
155#else
156#define RAX "eax"
157#define RDX "edx"
158#define RDI "edi"
159#endif
160#define PAX "%%"RAX
161#define PDX "%%"RDX
162#define PDI "%%"RDI
163
164#ifdef SINCOS_REAL4_ARG
165#define SINCOS_FLD "fld"
166#else
167#define SINCOS_FLD "fldl"
168#endif
169
170/* Version 1 : with trimming of input argument to [0,2) */
171#define SINCOS_TRIM_P0A(alpha) \
172 SINCOS_FLD " %[" #alpha "] \n\t" /* st: alpha */ \
173 "fistpll %[tmp] \n\t" /* tmp=(INT8)(round((alpha)) */ \
174 "fld1 \n\t" /* st: 1.0 */ \
175 "fildll %[tmp] \n\t" /* st: 1.0;(round((alpha))*/
176
177#define SINCOS_TRIM_P0B(alpha) \
178 "fsubrp %%st,%%st(1) \n\t" /* st: 1.0 -round(alpha) */ \
179 "faddl %[" #alpha "] \n\t" /* st: alpha -round(alpha)+1.0*/ \
180 "faddl %[sincos_adds] \n\t" /* ..continue lin. sin/cos as lebow */ \
181 "fstpl %[tmp] \n\t"
182
183/* Version 2 : assumes input argument is already trimmed */
184#define SINCOS_P0(alpha) \
185 SINCOS_FLD " %[" #alpha "] \n\t" /*st:alpha */ \
186 "faddl %[sincos_adds] \n\t" /*st:alpha+A */ \
187 "fstpl %[tmp] \n\t"
188
189#define SINCOS_P1 \
190 "mov %[tmp],"PAX" \n\t" /* alpha +A ->eax (ix)*/ \
191 "mov "PAX","PDX" \n\t" /* n = ix & SINCOS_MASK2 */ \
192 "and $0x3fff,"PAX" \n\t"
193#define SINCOS_P2 \
194 "mov "PAX",%[tmp] \n\t" \
195 "mov %[scd], "PAX" \n\t" \
196 "and $0xffffff,"PDX" \n\t" /* i = ix & SINCOS_MASK1;*/
197#define SINCOS_P3 \
198 "fildl %[tmp] \n\t" \
199 "sar $0xe,"PDX" \n\t" /* i = i >> SINCOS_SHIFT;*/
200#define SINCOS_P4 \
201 "fld %%st \n\t" /* st: n; n; */ \
202 "fmuls ("PAX","PDX",4) \n\t" \
203 "mov %[scb], "PDI" \n\t"
204#define SINCOS_P4A /* P4 w/o doubling, used when P6 is not */ \
205 "fmuls ("PAX","PDX",4) \n\t" \
206 "mov %[scb], "PDI" \n\t"
207#define SINCOS_P5(sin) \
208 "fadds ("PDI","PDX",4) \n\t" /* st: sincosLUTbase[i]+n*sincosLUTdiff[i]; n*/ \
209 "add $0x100,"PDX" \n\t" /* edx+=SINCOS_LUT_RES/4*/ \
210 "fstps %[" #sin "] \n\t" /* (*sin)=sincosLUTbase[i]+n*sincosLUTdiff[i]*/
211#define SINCOS_P6(cos) \
212 "fmuls ("PAX","PDX",4) \n\t" \
213 "fadds ("PDI","PDX",4) \n\t" \
214 "fstps %[" #cos "] \n\t" /* (*cos)=cosbase[i]+n*cosdiff[i];*/
215/* list of clobbered registers */
216#define SINCOS_REGISTERS RAX,RDX,RDI,"st","st(1)","st(2)","cc"
REAL8 asreal
Definition: SinCosLUT.i:116
#define SINCOS_LUT_RES
Definition: SinCosLUT.i:101
struct @7::@8 as2int
static const UNUSED REAL4 * cosLUTbase
Definition: SinCosLUT.i:107
static UNUSED union @7 sincosUX
UNUSED REAL4 sincosLUTbase[SINCOS_LUT_RES+SINCOS_LUT_RES/4]
Definition: SinCosLUT.c:40
static UNUSED INT4 sincosN
Definition: SinCosLUT.i:112
UNUSED REAL4 sincosLUTdiff[SINCOS_LUT_RES+SINCOS_LUT_RES/4]
Definition: SinCosLUT.c:41
INT4 dummy
Definition: SinCosLUT.i:123
INT4 intval
Definition: SinCosLUT.i:122
static const UNUSED REAL4 * cosLUTdiff
Definition: SinCosLUT.i:108
static UNUSED INT4 sincosI
Definition: SinCosLUT.i:112
double REAL8
int32_t INT4
float REAL4