28#include <cuda_runtime_api.h>
30#include <lal/LALStdlib.h>
32#define CUDA_BLOCK_SIZE 512
34#define XLAL_CHECK_CUDA_CALL(...) do { \
36 XLAL_CHECK ( ( retn = (__VA_ARGS__) ) == cudaSuccess, XLAL_EERR, "%s failed with return code %i", #__VA_ARGS__, retn ); \
44 int k = threadIdx.x + blockDim.x * blockIdx.x;
49 for (
size_t j = 1;
j < nvec; ++
j ) {
50 if ( vec[
j][
k] >
max[
k] ) {
79 int k = threadIdx.x + blockDim.x * blockIdx.x;
84 for (
size_t j = 1;
j < nvec; ++
j ) {
__global__ void VectorsMaxREAL4CUDA(REAL4 *max, const REAL4 **vec, const size_t nvec, const size_t nbin)
CUDA kernel to find the maximum of nvec vectors.
int XLALVectorsAddREAL4CUDA(REAL4 *sum, const REAL4 **vec, const size_t nvec, const size_t nbin)
Add nvec vectors in vec[], of length nbin, and return the result in sum.
#define XLAL_CHECK_CUDA_CALL(...)
int XLALVectorsMaxREAL4CUDA(REAL4 *max, const REAL4 **vec, const size_t nvec, const size_t nbin)
Find the maximum of nvec vectors in vec[], of length nbin, and return the result in max.
__global__ void VectorsAddREAL4CUDA(REAL4 *sum, const REAL4 **vec, const size_t nvec, const size_t nbin)
CUDA kernel to add nvec vectors.
#define XLAL_CHECK(assertion,...)