Eigenvalue SoLvers for Petaflop-Applications (ELPA) 2024.05.001.rc1
|
#include "config-f90.h"
#include <CL/sycl.hpp>
#include <stdlib.h>
#include <stdio.h>
#include <complex>
#include <iostream>
#include <cstdint>
#include <vector>
#include <optional>
#include <type_traits>
#include "src/GPU/SYCL/syclCommon.hpp"
Classes | |
struct | extract_float_type< std::complex< X > > |
Macros | |
#define | GET_POINTER(x) x.template get_multi_ptr<sycl::access::decorated::yes>().get() |
Functions | |
template<typename T , int wg_size, int sg_size, int step> | |
void | reduction_step (T *local_mem, sycl::nd_item< 1 > &it) |
template<typename T , int wg_size, int sg_size> | |
T | parallel_sum_group (sycl::nd_item< 1 > &it, T *local_mem) |
template<typename T , int wg_size, int sg_size, int step> | |
void | reduction_step_complex (T *local_mem, sycl::nd_item< 1 > &it) |
template<typename T , int sg_size, int step> | |
void | sg_reduction_step_complex (T *local_mem, T &accu, sycl::nd_item< 1 > &it) |
template<typename T , int wg_size, int sg_size> | |
__attribute__ ((flatten)) std | |
template<typename T , int wg_size, int sg_size, bool is_using_custom_reduction = true> | |
void | compute_hh_trafo_c_sycl_kernel (T *q, T const *hh, T const *hh_tau, int const nev, int const nb, int const ldq, int const ncols) |
template<typename T > | |
void | launch_compute_hh_trafo_c_sycl_kernel (T *q, const T *hh, const T *hh_tau, const int nev, const int nb, const int ldq, const int ncols) |
void | launch_compute_hh_trafo_c_sycl_kernel_real_double (double *q, const double *hh, const double *hh_tau, const int nev, const int nb, const int ldq, const int ncols) |
void | launch_compute_hh_trafo_c_sycl_kernel_real_single (float *q, const float *hh, const float *hh_tau, const int nev, const int nb, const int ldq, const int ncols) |
void | launch_compute_hh_trafo_c_sycl_kernel_complex_double (std::complex< double > *q, const std::complex< double > *hh, const std::complex< double > *hh_tau, const int nev, const int nb, const int ldq, const int ncols) |
void | launch_compute_hh_trafo_c_sycl_kernel_complex_single (std::complex< float > *q, const std::complex< float > *hh, const std::complex< float > *hh_tau, const int nev, const int nb, const int ldq, const int ncols) |
#define GET_POINTER | ( | x | ) | x.template get_multi_ptr<sycl::access::decorated::yes>().get() |
__attribute__ | ( | (flatten) | ) |
void compute_hh_trafo_c_sycl_kernel | ( | T * | q, |
T const * | hh, | ||
T const * | hh_tau, | ||
int const | nev, | ||
int const | nb, | ||
int const | ldq, | ||
int const | ncols ) |
void launch_compute_hh_trafo_c_sycl_kernel | ( | T * | q, |
const T * | hh, | ||
const T * | hh_tau, | ||
const int | nev, | ||
const int | nb, | ||
const int | ldq, | ||
const int | ncols ) |
void launch_compute_hh_trafo_c_sycl_kernel_complex_double | ( | std::complex< double > * | q, |
const std::complex< double > * | hh, | ||
const std::complex< double > * | hh_tau, | ||
const int | nev, | ||
const int | nb, | ||
const int | ldq, | ||
const int | ncols ) |
void launch_compute_hh_trafo_c_sycl_kernel_complex_single | ( | std::complex< float > * | q, |
const std::complex< float > * | hh, | ||
const std::complex< float > * | hh_tau, | ||
const int | nev, | ||
const int | nb, | ||
const int | ldq, | ||
const int | ncols ) |
void launch_compute_hh_trafo_c_sycl_kernel_real_double | ( | double * | q, |
const double * | hh, | ||
const double * | hh_tau, | ||
const int | nev, | ||
const int | nb, | ||
const int | ldq, | ||
const int | ncols ) |
void launch_compute_hh_trafo_c_sycl_kernel_real_single | ( | float * | q, |
const float * | hh, | ||
const float * | hh_tau, | ||
const int | nev, | ||
const int | nb, | ||
const int | ldq, | ||
const int | ncols ) |
T parallel_sum_group | ( | sycl::nd_item< 1 > & | it, |
T * | local_mem ) |
|
inline |
|
inline |
|
inline |