25#include <lal/LALConstants.h>
26#include <lal/SeqFactories.h>
27#include <lal/LALStdlib.h>
29#include "../vectormath.h"
31#define Relfloaterr(dx,x) (fabsf(x)>0 ? fabsf((dx)/(x)) : fabsf(dx) )
32#define Relerr(dx,x) (fabs(x)>0 ? fabs((dx)/(x)) : fabs(dx) )
39 INT4 ii, length = 100000;
40 REAL4VectorAligned *floatvalues0 = NULL, *floatvalues1 = NULL, *floatvalues2 = NULL, *floatvalues3 = NULL;
41 alignedREAL8Vector *doublevalues1 = NULL, *doublevalues2 = NULL, *doublevalues3 = NULL;
42 alignedREAL4VectorArray *floatvalues = NULL;
50 XLAL_CHECK( ( floatvalues = createAlignedREAL4VectorArray( 2, length, 32 ) ) != NULL,
XLAL_EFUNC );
52 for ( ii = 0; ii < length; ii++ ) {
53 floatvalues1->data[ii] = (
REAL4 )( ii - length / 2 ) * 2.0e-3;
54 doublevalues1->
data[ii] = (
REAL8 )( ii - length / 2 ) * 2.0e-3;
55 floatvalues2->data[ii] = (
REAL4 )( ii ) * 1.0e-3;
56 doublevalues2->data[ii] = (
REAL8 )( ii ) * 1.0e-3;
57 floatvalues3->data[ii] = (
REAL4 )( ii - length / 2 ) * 2.0e-4;
58 doublevalues3->data[ii] = (
REAL8 )( ii - length / 2 ) * 2.0e-4;
60 memcpy( floatvalues->data[0]->data, floatvalues1->data,
sizeof(
REAL4 )*length );
61 memcpy( floatvalues->data[1]->data, floatvalues2->data,
sizeof(
REAL4 )*length );
62 memcpy( floatvalues0->
data, floatvalues1->data,
sizeof(
REAL4 )*length );
64 REAL4VectorAligned *floatresult_vecsum = NULL, *floatresult_vecmult = NULL, *floatresult_addscalar = NULL, *floatresult_scale = NULL;
65 alignedREAL8Vector *doubleresult_exp = NULL, *doubleresult_addscalar = NULL, *doubleresult_scale = NULL;
66 alignedREAL4VectorArray *arraysumresult = NULL;
74 XLAL_CHECK( ( arraysumresult = createAlignedREAL4VectorArray( 2, length, 32 ) ) != NULL,
XLAL_EFUNC );
75 memset( arraysumresult->data[0]->data, 0,
sizeof(
REAL4 )*length );
76 memset( arraysumresult->data[1]->data, 0,
sizeof(
REAL4 )*length );
91 REAL4 maxfloaterr_vecsum = 0.0, maxfloatrelerr_vecsum = 0.0, maxfloaterr_vecmult = 0.0, maxfloatrelerr_vecmult = 0.0, maxfloaterr_addscalar = 0.0, maxfloatrelerr_addscalar = 0.0, maxfloaterr_scale = 0.0, maxfloatrelerr_scale = 0.0, maxfloaterr_seqsum = 0.0, maxfloatrelerr_seqsum = 0.0;
92 REAL8 maxdoubleerr_exp = 0.0, maxdoublerelerr_exp = 0.0, maxdoubleerr_addscalar = 0.0, maxdoublerelerr_addscalar = 0.0, maxdoubleerr_scale = 0.0, maxdoublerelerr_scale = 0.0;
93 for ( ii = 0; ii < length; ii++ ) {
94 REAL8 exp_libm = exp( doublevalues3->data[ii] );
95 REAL8 doubleerr = fabs( doubleresult_exp->
data[ii] - exp_libm );
97 maxdoubleerr_exp =
fmax( doubleerr, maxdoubleerr_exp );
98 maxdoublerelerr_exp =
fmax( doublerelerr, maxdoublerelerr_exp );
101 REAL4 floaterr = fabsf( floatvalues1->data[ii] - sumval );
103 maxfloaterr_vecsum = fmaxf( floaterr, maxfloaterr_vecsum );
104 maxfloatrelerr_vecsum = fmaxf( floatrelerr, maxfloatrelerr_vecsum );
106 REAL4 multval = (
REAL4 )( floatvalues1->data[ii] * floatvalues2->data[ii] );
107 floaterr = fabsf( floatresult_vecmult->data[ii] - multval );
109 maxfloaterr_vecmult = fmaxf( floaterr, maxfloaterr_vecmult );
110 maxfloatrelerr_vecmult = fmaxf( floatrelerr, maxfloatrelerr_vecmult );
112 sumval = (
REAL4 )( floatvalues1->data[ii] + (
REAL4 )100.0 );
113 REAL8 sumvald = ( doublevalues1->
data[ii] + 100.0 );
114 floaterr = fabsf( floatresult_addscalar->data[ii] - sumval );
116 doubleerr = fabs( doubleresult_addscalar->data[ii] - sumvald );
117 doublerelerr =
Relerr( doubleerr, sumvald );
118 maxfloaterr_addscalar = fmaxf( floaterr, maxfloaterr_addscalar );
119 maxfloatrelerr_addscalar = fmaxf( floatrelerr, maxfloatrelerr_addscalar );
120 maxdoubleerr_addscalar =
fmax( doubleerr, maxdoubleerr_addscalar );
121 maxdoublerelerr_addscalar =
fmax( doublerelerr, maxdoublerelerr_addscalar );
123 multval = (
REAL4 )( floatvalues1->data[ii] * (
REAL4 )100.0 );
124 REAL8 multvald = ( doublevalues1->
data[ii] * 100.0 );
125 floaterr = fabsf( floatresult_scale->data[ii] - multval );
127 doubleerr = fabs( doubleresult_scale->data[ii] - multvald );
128 doublerelerr =
Relerr( doubleerr, multvald );
129 maxfloaterr_scale = fmaxf( floaterr, maxfloaterr_scale );
130 maxfloatrelerr_scale = fmaxf( floatrelerr, maxfloatrelerr_scale );
131 maxdoubleerr_scale =
fmax( doubleerr, maxdoubleerr_scale );
132 maxdoublerelerr_scale =
fmax( doublerelerr, maxdoublerelerr_scale );
134 floaterr = fabsf( arraysumresult->data[0]->data[ii] - (
REAL4 )( floatvalues0->
data[ii] + floatvalues2->data[ii] ) );
135 floatrelerr =
Relfloaterr( floaterr, (
REAL4 )( floatvalues0->
data[ii] + floatvalues2->data[ii] ) );
136 maxfloaterr_seqsum = fmaxf( floaterr, maxfloaterr_seqsum );
137 maxfloatrelerr_seqsum = fmaxf( floatrelerr, maxfloatrelerr_seqsum );
140 fprintf( stderr,
"Test results SSE:\n" );
141 fprintf( stderr,
"-----------------\n" );
142 fprintf( stderr,
"Add REAL4Vectors: max error = %g, max relative error = %g\n", maxfloaterr_vecsum, maxfloatrelerr_vecsum );
143 fprintf( stderr,
"Multiply REAL4Vectors: max error = %g, max relative error = %g\n", maxfloaterr_vecmult, maxfloatrelerr_vecmult );
144 fprintf( stderr,
"Add scalar to REAL4Vector: max error = %g, max relative error = %g\n", maxfloaterr_addscalar, maxfloatrelerr_addscalar );
145 fprintf( stderr,
"Add scalar to REAL8Vector: max error = %g, max relative error = %g\n", maxdoubleerr_addscalar, maxdoublerelerr_addscalar );
146 fprintf( stderr,
"Scale REAL4Vector: max error = %g, max relative error = %g\n", maxfloaterr_scale, maxfloatrelerr_scale );
147 fprintf( stderr,
"Scale REAL8Vector: max error = %g, max relative error = %g\n", maxdoubleerr_scale, maxdoublerelerr_scale );
148 fprintf( stderr,
"exp(REAL8Vector): max error = %g, max relative error = %g\n", maxdoubleerr_exp, maxdoublerelerr_exp );
149 fprintf( stderr,
"Sum vectors of vector array into vector array: max error = %g, max relative error = %g\n", maxfloaterr_seqsum, maxfloatrelerr_seqsum );
167 for ( ii = 0; ii < length; ii++ ) {
168 REAL4 floaterr = fabsf( floatresult_vecsum->
data[ii] - (
REAL4 )( floatvalues1->data[ii] + floatvalues2->data[ii] ) );
169 REAL4 floatrelerr = fabsf( (
REAL4 )( 1.0 - floatresult_vecsum->
data[ii] / (
REAL4 )( floatvalues1->data[ii] + floatvalues2->data[ii] ) ) );
170 if ( floaterr > maxfloaterr_vecsum ) {
171 maxfloaterr_vecsum = floaterr;
173 if ( floatrelerr > maxfloatrelerr_vecsum ) {
174 maxfloatrelerr_vecsum = floatrelerr;
177 floaterr = fabsf( floatresult_vecmult->data[ii] - (
REAL4 )( floatvalues1->data[ii] * floatvalues2->data[ii] ) );
178 floatrelerr = fabsf( (
REAL4 )( 1.0 - floatresult_vecmult->data[ii] / (
REAL4 )( floatvalues1->data[ii] * floatvalues2->data[ii] ) ) );
179 if ( floaterr > maxfloaterr_vecmult ) {
180 maxfloaterr_vecmult = floaterr;
182 if ( floatrelerr > maxfloatrelerr_vecmult ) {
183 maxfloatrelerr_vecmult = floatrelerr;
186 floaterr = fabsf( floatresult_addscalar->data[ii] - (
REAL4 )( floatvalues1->data[ii] + (
REAL4 )100.0 ) );
187 REAL8 doubleerr = fabs( doubleresult_addscalar->data[ii] - ( doublevalues1->
data[ii] + 100.0 ) );
188 floatrelerr = fabsf( (
REAL4 )( 1.0 - floatresult_addscalar->data[ii] / (
REAL4 )( floatvalues1->data[ii] + (
REAL4 )100.0 ) ) );
189 REAL8 doublerelerr = fabs( 1.0 - doubleresult_addscalar->data[ii] / ( doublevalues1->
data[ii] + 100.0 ) );
190 if ( floaterr > maxfloaterr_addscalar ) {
191 maxfloaterr_addscalar = floaterr;
193 if ( floatrelerr > maxfloatrelerr_addscalar ) {
194 maxfloatrelerr_addscalar = floatrelerr;
196 if ( doubleerr > maxdoubleerr_addscalar ) {
197 maxdoubleerr_addscalar = doubleerr;
199 if ( doublerelerr > maxdoublerelerr_addscalar ) {
200 maxdoublerelerr_addscalar = doublerelerr;
203 floaterr = fabsf( floatresult_scale->data[ii] - (
REAL4 )( floatvalues1->data[ii] * (
REAL4 )100.0 ) );
204 doubleerr = fabs( doubleresult_scale->data[ii] - ( doublevalues1->
data[ii] * 100.0 ) );
205 floatrelerr = fabsf( (
REAL4 )( 1.0 - floatresult_scale->data[ii] / (
REAL4 )( floatvalues1->data[ii] * (
REAL4 )100.0 ) ) );
206 doublerelerr = fabs( 1.0 - doubleresult_scale->data[ii] / ( doublevalues1->
data[ii] * 100.0 ) );
207 if ( floaterr > maxfloaterr_scale ) {
208 maxfloaterr_scale = floaterr;
210 if ( floatrelerr > maxfloatrelerr_scale ) {
211 maxfloatrelerr_scale = floatrelerr;
213 if ( doubleerr > maxdoubleerr_scale ) {
214 maxdoubleerr_scale = doubleerr;
216 if ( doublerelerr > maxdoublerelerr_scale ) {
217 maxdoublerelerr_scale = doublerelerr;
220 floaterr = fabsf( arraysumresult->data[ii] - (
REAL4 )( floatvalues1->data[ii] + floatvalues2->data[ii] ) );
221 floatrelerr = fabsf( (
REAL4 )( 1.0 - arraysumresult->data[ii] / (
REAL4 )( floatvalues1->data[ii] + floatvalues2->data[ii] ) ) );
222 if ( floaterr > maxfloaterr_seqsum ) {
223 maxfloaterr_seqsum = floaterr;
225 if ( floatrelerr > maxfloatrelerr_seqsum ) {
226 maxfloatrelerr_seqsum = floatrelerr;
230 fprintf( stderr,
"Test results AVX:\n" );
231 fprintf( stderr,
"-----------------\n" );
232 fprintf( stderr,
"Add REAL4Vectors: max error = %g, max relative error = %g\n", maxfloaterr_vecsum, maxfloatrelerr_vecsum );
233 fprintf( stderr,
"Multiply REAL4Vectors: max error = %g, max relative error = %g\n", maxfloaterr_vecmult, maxfloatrelerr_vecmult );
234 fprintf( stderr,
"Add scalar to REAL4Vector: max error = %g, max relative error = %g\n", maxfloaterr_addscalar, maxfloatrelerr_addscalar );
235 fprintf( stderr,
"Add scalar to REAL8Vector: max error = %g, max relative error = %g\n", maxdoubleerr_addscalar, maxdoublerelerr_addscalar );
236 fprintf( stderr,
"Scale REAL4Vector: max error = %g, max relative error = %g\n", maxfloaterr_scale, maxfloatrelerr_scale );
237 fprintf( stderr,
"Scale REAL8Vector: max error = %g, max relative error = %g\n", maxdoubleerr_scale, maxdoublerelerr_scale );
238 fprintf( stderr,
"Sum vectors of vector array into vector array: max error = %g, max relative error = %g\n", maxfloaterr_seqsum, maxfloatrelerr_seqsum );
254 destroyAlignedREAL4VectorArray( floatvalues );
255 destroyAlignedREAL4VectorArray( arraysumresult );
int XLALVectorScaleREAL4(REAL4 *out, REAL4 scalar, const REAL4 *in, const UINT4 len)
void XLALDestroyREAL4VectorAligned(REAL4VectorAligned *in)
REAL4VectorAligned * XLALCreateREAL4VectorAligned(const UINT4 length, const UINT4 align)
int XLALVectorAddREAL4(REAL4 *out, const REAL4 *in1, const REAL4 *in2, const UINT4 len)
int XLALVectorMultiplyREAL4(REAL4 *out, const REAL4 *in1, const REAL4 *in2, const UINT4 len)
int XLALVectorShiftREAL4(REAL4 *out, REAL4 scalar, const REAL4 *in, const UINT4 len)
#define XLAL_CHECK(assertion,...)
#define Relfloaterr(dx, x)
INT4 avxScaleREAL8Vector(alignedREAL8Vector *output, alignedREAL8Vector *input, REAL8 scale)
Scale the elements of a alignedREAL8Vector by a REAL8 value using AVX.
INT4 sseScaleREAL8Vector(alignedREAL8Vector *output, alignedREAL8Vector *input, REAL8 scale)
Scale the elements of a alignedREAL8Vector by a REAL8 value using SSE.
void destroyAlignedREAL8Vector(alignedREAL8Vector *vector)
INT4 sse_exp_REAL8Vector(alignedREAL8Vector *output, alignedREAL8Vector *input)
Exponential of input vector is computed using SSE, based on the Cephes library.
INT4 avxAddScalarToREAL8Vector(alignedREAL8Vector *output, alignedREAL8Vector *input, REAL8 scalar)
Add a REAL8 scalar value to the elements of a alignedREAL8Vector using AVX.
INT4 sseAddScalarToREAL8Vector(alignedREAL8Vector *output, alignedREAL8Vector *input, REAL8 scalar)
Add a REAL8 scalar value to the elements of a alignedREAL8Vector using SSE.
alignedREAL8Vector * createAlignedREAL8Vector(UINT4 length, const size_t align)