Eigenvalue SoLvers for Petaflop-Applications (ELPA) 2024.05.001
Loading...
Searching...
No Matches
gpu_vendor_agnostic_layer.h
Go to the documentation of this file.
1//
2// Copyright 2022, P. Karpov, MPCDF
3//
4// This file is part of ELPA.
5//
6// The ELPA library was originally created by the ELPA consortium,
7// consisting of the following organizations:
8//
9// - Max Planck Computing and Data Facility (MPCDF), formerly known as
10// Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
11// - Bergische Universität Wuppertal, Lehrstuhl für angewandte
12// Informatik,
13// - Technische Universität München, Lehrstuhl für Informatik mit
14// Schwerpunkt Wissenschaftliches Rechnen ,
15// - Fritz-Haber-Institut, Berlin, Abt. Theorie,
16// - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
17// Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
18// and
19// - IBM Deutschland GmbH
20//
21// This particular source code file contains additions, changes and
22// enhancements authored by Intel Corporation which is not part of
23// the ELPA consortium.
24//
25// More information can be found here:
26// http://elpa.mpcdf.mpg.de/
27//
28// ELPA is free software: you can redistribute it and/or modify
29// it under the terms of the version 3 of the license of the
30// GNU Lesser General Public License as published by the Free
31// Software Foundation.
32//
33// ELPA is distributed in the hope that it will be useful,
34// but WITHOUT ANY WARRANTY; without even the implied warranty of
35// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36// GNU Lesser General Public License for more details.
37//
38// You should have received a copy of the GNU Lesser General Public License
39// along with ELPA. If not, see <http://www.gnu.org/licenses/>
40//
41// ELPA reflects a substantial effort on the part of the original
42// ELPA consortium, and we ask you to respect the spirit of the
43// license that we chose: i.e., please contribute any changes you
44// may have back to the original ELPA library distribution, and keep
45// any derivatives of ELPA under the same license that we chose for
46// the original distribution, the GNU Lesser General Public License.
47//
48#pragma once
49#include <stdint.h> // for intptr_t
50
51#ifdef __cplusplus
52extern "C" {
53#endif
54
55#ifdef WITH_NVIDIA_GPU_VERSION
56int cudaMemcpyHostToDeviceFromC();
57int cudaMemcpyDeviceToHostFromC();
58int cudaGetDeviceCountFromC(int *count);
59int cudaSetDeviceFromC(int n);
60int cudaMallocFromC(intptr_t *a, size_t width_height);
61int cudaFreeFromC(intptr_t *a);
62int cudaMemcpyFromC(intptr_t *dest, intptr_t *src, size_t count, int dir);
63int cudaDeviceSynchronizeFromC();
64int cudaMemsetFromC(intptr_t *a, int value, size_t count);
65void cublasDgemm_elpa_wrapper_intptr_handle(intptr_t* gpuHandle, char transa, char transb, int m, int n, int k,
66 double alpha, const double *A, int lda,
67 const double *B, int ldb, double beta,
68 double *C, int ldc);
69void cublasSgemm_elpa_wrapper_intptr_handle(intptr_t* gpuHandle, char transa, char transb, int m, int n, int k,
70 float alpha, const float *A, int lda,
71 const float *B, int ldb, float beta,
72 float *C, int ldc);
73void cublasZgemm_elpa_wrapper_intptr_handle(intptr_t* gpuHandle, char transa, char transb, int m, int n, int k,
74 double _Complex alpha, const double _Complex *A, int lda,
75 const double _Complex *B, int ldb, double _Complex beta,
76 double _Complex *C, int ldc);
77void cublasCgemm_elpa_wrapper_intptr_handle(intptr_t* gpuHandle, char transa, char transb, int m, int n, int k,
78 float _Complex alpha, const float _Complex *A, int lda,
79 const float _Complex *B, int ldb, float _Complex beta,
80 float _Complex *C, int ldc);
81#endif
82#ifdef WITH_AMD_GPU_VERSION
83int hipMemcpyHostToDeviceFromC();
84int hipMemcpyDeviceToHostFromC();
85int hipGetDeviceCountFromC(int *count);
86int hipSetDeviceFromC(int n);
87int hipMallocFromC(intptr_t *a, size_t width_height);
88int hipFreeFromC(intptr_t *a);
89int hipMemcpyFromC (intptr_t *dest, intptr_t *src, size_t count, int dir);
90int hipDeviceSynchronizeFromC();
91int hipMemsetFromC(intptr_t *a, int value, size_t count);
92void rocblasDgemm_elpa_wrapper_intptr_handle(intptr_t* gpuHandle, char transa, char transb, int m, int n, int k,
93 double alpha, const double *A, int lda,
94 const double *B, int ldb, double beta,
95 double *C, int ldc);
96void rocblasSgemm_elpa_wrapper_intptr_handle(intptr_t* gpuHandle, char transa, char transb, int m, int n, int k,
97 float alpha, const float *A, int lda,
98 const float *B, int ldb, float beta,
99 float *C, int ldc);
100void rocblasZgemm_elpa_wrapper_intptr_handle(intptr_t* gpuHandle, char transa, char transb, int m, int n, int k,
101 double _Complex alpha, const double _Complex *A, int lda,
102 const double _Complex *B, int ldb, double _Complex beta,
103 double _Complex *C, int ldc);
104void rocblasCgemm_elpa_wrapper_intptr_handle(intptr_t* gpuHandle, char transa, char transb, int m, int n, int k,
105 float _Complex alpha, const float _Complex *A, int lda,
106 const float _Complex *B, int ldb, float _Complex beta,
107 float _Complex *C, int ldc);
108#endif
109#ifdef WITH_SYCL_GPU_VERSION
110int syclMemcpyHostToDeviceFromC();
111int syclMemcpyDeviceToHostFromC();
112int syclGetDeviceCountFromC(int *count);
113int syclSetDeviceFromC(int n);
114int syclMallocFromC(intptr_t *a, size_t width_height);
115int syclFreeFromC(intptr_t *a);
116int syclMemcpyFromC(intptr_t *dest, intptr_t *src, size_t count, int dir);
117int syclDeviceSynchronizeFromC();
118int syclMemsetFromC(intptr_t *a, int value, size_t count);
119void syclblasDgemm_elpa_wrapper(intptr_t* gpuHandle, char transa, char transb, int m, int n, int k,
120 double alpha, const double *A, int lda,
121 const double *B, int ldb, double beta,
122 double *C, int ldc);
123void syclblasSgemm_elpa_wrapper(intptr_t* gpuHandle, char transa, char transb, int m, int n, int k,
124 float alpha, const float *A, int lda,
125 const float *B, int ldb, float beta,
126 float *C, int ldc);
127void syclblasZgemm_elpa_wrapper(intptr_t* gpuHandle, char transa, char transb, int m, int n, int k,
128 double _Complex alpha, const double _Complex *A, int lda,
129 const double _Complex *B, int ldb, double _Complex beta,
130 double _Complex *C, int ldc);
131void syclblasCgemm_elpa_wrapper(intptr_t* gpuHandle, char transa, char transb, int m, int n, int k,
132 float _Complex alpha, const float _Complex *A, int lda,
133 const float _Complex *B, int ldb, float _Complex beta,
134 float _Complex *C, int ldc);
135#endif
136#ifdef WITH_OPENMP_OFFLOAD_GPU_VERSION
137#error "openmp_offload missing"
138#endif
139
140
142
143int gpuGetDeviceCount(int *count);
144int gpuSetDevice(int n);
145
146int gpuMalloc(intptr_t *a, size_t width_height);
147
148int gpuFree(intptr_t *a);
149
150int gpuMemcpy(intptr_t *dest, intptr_t *src, size_t count, int dir);
151
152int syclGetCpuCount(int numberOfDevices);
153
155
156int gpuMemset(intptr_t *a, int value, size_t count);
157
158void gpublasDgemm(intptr_t* gpuHandle, char transa, char transb, int m, int n, int k,
159 double alpha, const double *A, int lda,
160 const double *B, int ldb, double beta,
161 double *C, int ldc);
162
163void gpublasSgemm (intptr_t* gpuHandle, char transa, char transb, int m, int n, int k,
164 float alpha, const float *A, int lda,
165 const float *B, int ldb, float beta,
166 float *C, int ldc);
167
168void gpublasZgemm (intptr_t* gpuHandle, char transa, char transb, int m, int n, int k,
169 double _Complex alpha, const double _Complex *A, int lda,
170 const double _Complex *B, int ldb, double _Complex beta,
171 double _Complex *C, int ldc);
172
173void gpublasCgemm (intptr_t* gpuHandle, char transa, char transb, int m, int n, int k,
174 float _Complex alpha, const float _Complex *A, int lda,
175 const float _Complex *B, int ldb, float _Complex beta,
176 float _Complex *C, int ldc);
177
178#ifdef __cplusplus
179}
180#endif
int gpuMemcpyDeviceToHost
Definition cannon.c:104
int gpuMemcpyHostToDevice
Definition cannon.c:103
int syclGetCpuCount(int numberOfDevices)
int gpuMemcpy(intptr_t *dest, intptr_t *src, size_t count, int dir)
Definition gpu_vendor_agnostic_layer.c:144
void gpublasCgemm(intptr_t *gpuHandle, char transa, char transb, int m, int n, int k, float _Complex alpha, const float _Complex *A, int lda, const float _Complex *B, int ldb, float _Complex beta, float _Complex *C, int ldc)
Definition gpu_vendor_agnostic_layer.c:246
int gpuGetDeviceCount(int *count)
Definition gpu_vendor_agnostic_layer.c:80
int gpuDeviceSynchronize()
Definition gpu_vendor_agnostic_layer.c:160
int gpuSetDevice(int n)
Definition gpu_vendor_agnostic_layer.c:96
int gpuFree(intptr_t *a)
Definition gpu_vendor_agnostic_layer.c:128
void gpublasSgemm(intptr_t *gpuHandle, char transa, char transb, int m, int n, int k, float alpha, const float *A, int lda, const float *B, int ldb, float beta, float *C, int ldc)
Definition gpu_vendor_agnostic_layer.c:210
int gpuMalloc(intptr_t *a, size_t width_height)
Definition gpu_vendor_agnostic_layer.c:112
int gpuMemset(intptr_t *a, int value, size_t count)
Definition gpu_vendor_agnostic_layer.c:176
void gpublasZgemm(intptr_t *gpuHandle, char transa, char transb, int m, int n, int k, double _Complex alpha, const double _Complex *A, int lda, const double _Complex *B, int ldb, double _Complex beta, double _Complex *C, int ldc)
Definition gpu_vendor_agnostic_layer.c:228
void gpublasDgemm(intptr_t *gpuHandle, char transa, char transb, int m, int n, int k, double alpha, const double *A, int lda, const double *B, int ldb, double beta, double *C, int ldc)
Definition gpu_vendor_agnostic_layer.c:192
void set_gpu_parameters(int *gpuMemcpyHostToDevice, int *gpuMemcpyDeviceToHost)
Definition gpu_vendor_agnostic_layer.c:62