55#ifdef WITH_NVIDIA_GPU_VERSION
56int cudaMemcpyHostToDeviceFromC();
57int cudaMemcpyDeviceToHostFromC();
58int cudaGetDeviceCountFromC(
int *count);
59int cudaSetDeviceFromC(
int n);
60int cudaMallocFromC(intptr_t *a,
size_t width_height);
61int cudaFreeFromC(intptr_t *a);
62int cudaMemcpyFromC(intptr_t *dest, intptr_t *src,
size_t count,
int dir);
63int cudaDeviceSynchronizeFromC();
64int cudaMemsetFromC(intptr_t *a,
int value,
size_t count);
65void cublasDgemm_elpa_wrapper_intptr_handle(intptr_t* gpuHandle,
char transa,
char transb,
int m,
int n,
int k,
66 double alpha,
const double *A,
int lda,
67 const double *B,
int ldb,
double beta,
69void cublasSgemm_elpa_wrapper_intptr_handle(intptr_t* gpuHandle,
char transa,
char transb,
int m,
int n,
int k,
70 float alpha,
const float *A,
int lda,
71 const float *B,
int ldb,
float beta,
73void cublasZgemm_elpa_wrapper_intptr_handle(intptr_t* gpuHandle,
char transa,
char transb,
int m,
int n,
int k,
74 double _Complex alpha,
const double _Complex *A,
int lda,
75 const double _Complex *B,
int ldb,
double _Complex beta,
76 double _Complex *C,
int ldc);
77void cublasCgemm_elpa_wrapper_intptr_handle(intptr_t* gpuHandle,
char transa,
char transb,
int m,
int n,
int k,
78 float _Complex alpha,
const float _Complex *A,
int lda,
79 const float _Complex *B,
int ldb,
float _Complex beta,
80 float _Complex *C,
int ldc);
82#ifdef WITH_AMD_GPU_VERSION
83int hipMemcpyHostToDeviceFromC();
84int hipMemcpyDeviceToHostFromC();
85int hipGetDeviceCountFromC(
int *count);
86int hipSetDeviceFromC(
int n);
87int hipMallocFromC(intptr_t *a,
size_t width_height);
88int hipFreeFromC(intptr_t *a);
89int hipMemcpyFromC (intptr_t *dest, intptr_t *src,
size_t count,
int dir);
90int hipDeviceSynchronizeFromC();
91int hipMemsetFromC(intptr_t *a,
int value,
size_t count);
92void rocblasDgemm_elpa_wrapper_intptr_handle(intptr_t* gpuHandle,
char transa,
char transb,
int m,
int n,
int k,
93 double alpha,
const double *A,
int lda,
94 const double *B,
int ldb,
double beta,
96void rocblasSgemm_elpa_wrapper_intptr_handle(intptr_t* gpuHandle,
char transa,
char transb,
int m,
int n,
int k,
97 float alpha,
const float *A,
int lda,
98 const float *B,
int ldb,
float beta,
100void rocblasZgemm_elpa_wrapper_intptr_handle(intptr_t* gpuHandle,
char transa,
char transb,
int m,
int n,
int k,
101 double _Complex alpha,
const double _Complex *A,
int lda,
102 const double _Complex *B,
int ldb,
double _Complex beta,
103 double _Complex *C,
int ldc);
104void rocblasCgemm_elpa_wrapper_intptr_handle(intptr_t* gpuHandle,
char transa,
char transb,
int m,
int n,
int k,
105 float _Complex alpha,
const float _Complex *A,
int lda,
106 const float _Complex *B,
int ldb,
float _Complex beta,
107 float _Complex *C,
int ldc);
109#ifdef WITH_SYCL_GPU_VERSION
110int syclMemcpyHostToDeviceFromC();
111int syclMemcpyDeviceToHostFromC();
112int syclGetDeviceCountFromC(
int *count);
113int syclSetDeviceFromC(
int n);
114int syclMallocFromC(intptr_t *a,
size_t width_height);
115int syclFreeFromC(intptr_t *a);
116int syclMemcpyFromC(intptr_t *dest, intptr_t *src,
size_t count,
int dir);
117int syclDeviceSynchronizeFromC();
118int syclMemsetFromC(intptr_t *a,
int value,
size_t count);
119void syclblasDgemm_elpa_wrapper(intptr_t* gpuHandle,
char transa,
char transb,
int m,
int n,
int k,
120 double alpha,
const double *A,
int lda,
121 const double *B,
int ldb,
double beta,
123void syclblasSgemm_elpa_wrapper(intptr_t* gpuHandle,
char transa,
char transb,
int m,
int n,
int k,
124 float alpha,
const float *A,
int lda,
125 const float *B,
int ldb,
float beta,
127void syclblasZgemm_elpa_wrapper(intptr_t* gpuHandle,
char transa,
char transb,
int m,
int n,
int k,
128 double _Complex alpha,
const double _Complex *A,
int lda,
129 const double _Complex *B,
int ldb,
double _Complex beta,
130 double _Complex *C,
int ldc);
131void syclblasCgemm_elpa_wrapper(intptr_t* gpuHandle,
char transa,
char transb,
int m,
int n,
int k,
132 float _Complex alpha,
const float _Complex *A,
int lda,
133 const float _Complex *B,
int ldb,
float _Complex beta,
134 float _Complex *C,
int ldc);
136#ifdef WITH_OPENMP_OFFLOAD_GPU_VERSION
137#error "openmp_offload missing"
146int gpuMalloc(intptr_t *a,
size_t width_height);
150int gpuMemcpy(intptr_t *dest, intptr_t *src,
size_t count,
int dir);
156int gpuMemset(intptr_t *a,
int value,
size_t count);
158void gpublasDgemm(intptr_t* gpuHandle,
char transa,
char transb,
int m,
int n,
int k,
159 double alpha,
const double *A,
int lda,
160 const double *B,
int ldb,
double beta,
163void gpublasSgemm (intptr_t* gpuHandle,
char transa,
char transb,
int m,
int n,
int k,
164 float alpha,
const float *A,
int lda,
165 const float *B,
int ldb,
float beta,
168void gpublasZgemm (intptr_t* gpuHandle,
char transa,
char transb,
int m,
int n,
int k,
169 double _Complex alpha,
const double _Complex *A,
int lda,
170 const double _Complex *B,
int ldb,
double _Complex beta,
171 double _Complex *C,
int ldc);
173void gpublasCgemm (intptr_t* gpuHandle,
char transa,
char transb,
int m,
int n,
int k,
174 float _Complex alpha,
const float _Complex *A,
int lda,
175 const float _Complex *B,
int ldb,
float _Complex beta,
176 float _Complex *C,
int ldc);
int gpuMemcpyDeviceToHost
Definition cannon.c:104
int gpuMemcpyHostToDevice
Definition cannon.c:103
int syclGetCpuCount(int numberOfDevices)
int gpuMemcpy(intptr_t *dest, intptr_t *src, size_t count, int dir)
Definition gpu_vendor_agnostic_layer.c:144
void gpublasCgemm(intptr_t *gpuHandle, char transa, char transb, int m, int n, int k, float _Complex alpha, const float _Complex *A, int lda, const float _Complex *B, int ldb, float _Complex beta, float _Complex *C, int ldc)
Definition gpu_vendor_agnostic_layer.c:246
int gpuGetDeviceCount(int *count)
Definition gpu_vendor_agnostic_layer.c:80
int gpuDeviceSynchronize()
Definition gpu_vendor_agnostic_layer.c:160
int gpuSetDevice(int n)
Definition gpu_vendor_agnostic_layer.c:96
int gpuFree(intptr_t *a)
Definition gpu_vendor_agnostic_layer.c:128
void gpublasSgemm(intptr_t *gpuHandle, char transa, char transb, int m, int n, int k, float alpha, const float *A, int lda, const float *B, int ldb, float beta, float *C, int ldc)
Definition gpu_vendor_agnostic_layer.c:210
int gpuMalloc(intptr_t *a, size_t width_height)
Definition gpu_vendor_agnostic_layer.c:112
int gpuMemset(intptr_t *a, int value, size_t count)
Definition gpu_vendor_agnostic_layer.c:176
void gpublasZgemm(intptr_t *gpuHandle, char transa, char transb, int m, int n, int k, double _Complex alpha, const double _Complex *A, int lda, const double _Complex *B, int ldb, double _Complex beta, double _Complex *C, int ldc)
Definition gpu_vendor_agnostic_layer.c:228
void gpublasDgemm(intptr_t *gpuHandle, char transa, char transb, int m, int n, int k, double alpha, const double *A, int lda, const double *B, int ldb, double beta, double *C, int ldc)
Definition gpu_vendor_agnostic_layer.c:192
void set_gpu_parameters(int *gpuMemcpyHostToDevice, int *gpuMemcpyDeviceToHost)
Definition gpu_vendor_agnostic_layer.c:62