Loading [MathJax]/extensions/TeX/AMSsymbols.js
LALPulsar 7.1.1.1-b246709
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
ComputeResults_CUDA.cu
Go to the documentation of this file.
1//
2// Copyright (C) 2020 Karl Wette
3//
4// This program is free software; you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation; either version 2 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with with program; see the file COPYING. If not, write to the
16// Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17// MA 02110-1301 USA
18//
19
20///
21/// \file
22/// \ingroup lalpulsar_bin_Weave
23///
24
25#include "config.h"
26
27#include <cuda.h>
28#include <cuda_runtime_api.h>
29
30#include <lal/LALStdlib.h>
31
32#define CUDA_BLOCK_SIZE 512
33
34#define XLAL_CHECK_CUDA_CALL(...) do { \
35 cudaError_t retn; \
36 XLAL_CHECK ( ( retn = (__VA_ARGS__) ) == cudaSuccess, XLAL_EERR, "%s failed with return code %i", #__VA_ARGS__, retn ); \
37 } while(0)
38
39///
40/// CUDA kernel to find the maximum of \a nvec vectors
41///
42__global__ void VectorsMaxREAL4CUDA( REAL4 *max, const REAL4 **vec, const size_t nvec, const size_t nbin )
43{
44 int k = threadIdx.x + blockDim.x * blockIdx.x;
45 if ( k >= nbin ) {
46 return;
47 }
48 max[k] = vec[0][k];
49 for ( size_t j = 1; j < nvec; ++j ) {
50 if ( vec[j][k] > max[k] ) {
51 max[k] = vec[j][k];
52 }
53 }
54}
55
56///
57/// Find the maximum of \a nvec vectors in \a vec[], of length \a nbin, and return the result in \a max
58///
59extern "C" int XLALVectorsMaxREAL4CUDA( REAL4 *max, const REAL4 **vec, const size_t nvec, const size_t nbin )
60{
61 XLAL_CHECK( max != NULL, XLAL_EFAULT );
62 XLAL_CHECK( vec != NULL, XLAL_EFAULT );
63 XLAL_CHECK( vec[0] != NULL, XLAL_EFAULT );
64 XLAL_CHECK( nvec > 0, XLAL_EINVAL );
65 XLAL_CHECK( nbin > 0, XLAL_EINVAL );
66
67 VectorsMaxREAL4CUDA <<< ( nbin + CUDA_BLOCK_SIZE - 1 ) / CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE >>> ( max, vec, nvec, nbin );
68 XLAL_CHECK_CUDA_CALL( cudaGetLastError() );
69 XLAL_CHECK_CUDA_CALL( cudaDeviceSynchronize() );
70
71 return XLAL_SUCCESS;
72}
73
74///
75/// CUDA kernel to add \a nvec vectors
76///
77__global__ void VectorsAddREAL4CUDA( REAL4 *sum, const REAL4 **vec, const size_t nvec, const size_t nbin )
78{
79 int k = threadIdx.x + blockDim.x * blockIdx.x;
80 if ( k >= nbin ) {
81 return;
82 }
83 sum[k] = vec[0][k];
84 for ( size_t j = 1; j < nvec; ++j ) {
85 sum[k] += vec[j][k];
86 }
87}
88
89///
90/// Add \a nvec vectors in \a vec[], of length \a nbin, and return the result in \a sum
91///
92extern "C" int XLALVectorsAddREAL4CUDA( REAL4 *sum, const REAL4 **vec, const size_t nvec, const size_t nbin )
93{
94 XLAL_CHECK( sum != NULL, XLAL_EFAULT );
95 XLAL_CHECK( vec != NULL, XLAL_EFAULT );
96 XLAL_CHECK( vec[0] != NULL, XLAL_EFAULT );
97 XLAL_CHECK( nvec > 0, XLAL_EINVAL );
98 XLAL_CHECK( nbin > 0, XLAL_EINVAL );
99
100 VectorsAddREAL4CUDA <<< ( nbin + CUDA_BLOCK_SIZE - 1 ) / CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE >>> ( sum, vec, nvec, nbin );
101 XLAL_CHECK_CUDA_CALL( cudaGetLastError() );
102 XLAL_CHECK_CUDA_CALL( cudaDeviceSynchronize() );
103
104 return XLAL_SUCCESS;
105}
106
107// Local Variables:
108// mode: c
109// c-file-style: "linux"
110// c-basic-offset: 2
111// End:
__global__ void VectorsMaxREAL4CUDA(REAL4 *max, const REAL4 **vec, const size_t nvec, const size_t nbin)
CUDA kernel to find the maximum of nvec vectors.
int XLALVectorsAddREAL4CUDA(REAL4 *sum, const REAL4 **vec, const size_t nvec, const size_t nbin)
Add nvec vectors in vec[], of length nbin, and return the result in sum.
#define CUDA_BLOCK_SIZE
#define XLAL_CHECK_CUDA_CALL(...)
int XLALVectorsMaxREAL4CUDA(REAL4 *max, const REAL4 **vec, const size_t nvec, const size_t nbin)
Find the maximum of nvec vectors in vec[], of length nbin, and return the result in max.
__global__ void VectorsAddREAL4CUDA(REAL4 *sum, const REAL4 **vec, const size_t nvec, const size_t nbin)
CUDA kernel to add nvec vectors.
int j
int k
float REAL4
#define XLAL_CHECK(assertion,...)
XLAL_SUCCESS
XLAL_EFAULT
XLAL_EINVAL
#define max(a, b)