Eigenvalue SoLvers for Petaflop-Applications (ELPA) 2024.05.001.rc1
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Classes | Macros | Functions
compute_hh.cpp File Reference
#include "config-f90.h"
#include <CL/sycl.hpp>
#include <stdlib.h>
#include <stdio.h>
#include <complex>
#include <iostream>
#include <cstdint>
#include <vector>
#include <optional>
#include <type_traits>
#include "src/GPU/SYCL/syclCommon.hpp"

Classes

struct  extract_float_type< std::complex< X > >
 

Macros

#define GET_POINTER(x)   x.template get_multi_ptr<sycl::access::decorated::yes>().get()
 

Functions

template<typename T , int wg_size, int sg_size, int step>
void reduction_step (T *local_mem, sycl::nd_item< 1 > &it)
 
template<typename T , int wg_size, int sg_size>
parallel_sum_group (sycl::nd_item< 1 > &it, T *local_mem)
 
template<typename T , int wg_size, int sg_size, int step>
void reduction_step_complex (T *local_mem, sycl::nd_item< 1 > &it)
 
template<typename T , int sg_size, int step>
void sg_reduction_step_complex (T *local_mem, T &accu, sycl::nd_item< 1 > &it)
 
template<typename T , int wg_size, int sg_size>
 __attribute__ ((flatten)) std
 
template<typename T , int wg_size, int sg_size, bool is_using_custom_reduction = true>
void compute_hh_trafo_c_sycl_kernel (T *q, T const *hh, T const *hh_tau, int const nev, int const nb, int const ldq, int const ncols)
 
template<typename T >
void launch_compute_hh_trafo_c_sycl_kernel (T *q, const T *hh, const T *hh_tau, const int nev, const int nb, const int ldq, const int ncols)
 
void launch_compute_hh_trafo_c_sycl_kernel_real_double (double *q, const double *hh, const double *hh_tau, const int nev, const int nb, const int ldq, const int ncols)
 
void launch_compute_hh_trafo_c_sycl_kernel_real_single (float *q, const float *hh, const float *hh_tau, const int nev, const int nb, const int ldq, const int ncols)
 
void launch_compute_hh_trafo_c_sycl_kernel_complex_double (std::complex< double > *q, const std::complex< double > *hh, const std::complex< double > *hh_tau, const int nev, const int nb, const int ldq, const int ncols)
 
void launch_compute_hh_trafo_c_sycl_kernel_complex_single (std::complex< float > *q, const std::complex< float > *hh, const std::complex< float > *hh_tau, const int nev, const int nb, const int ldq, const int ncols)
 

Macro Definition Documentation

◆ GET_POINTER

#define GET_POINTER ( x)    x.template get_multi_ptr<sycl::access::decorated::yes>().get()

Function Documentation

◆ __attribute__()

template<typename T , int wg_size, int sg_size>
__attribute__ ( (flatten) )

◆ compute_hh_trafo_c_sycl_kernel()

template<typename T , int wg_size, int sg_size, bool is_using_custom_reduction = true>
void compute_hh_trafo_c_sycl_kernel ( T * q,
T const * hh,
T const * hh_tau,
int const nev,
int const nb,
int const ldq,
int const ncols )

◆ launch_compute_hh_trafo_c_sycl_kernel()

template<typename T >
void launch_compute_hh_trafo_c_sycl_kernel ( T * q,
const T * hh,
const T * hh_tau,
const int nev,
const int nb,
const int ldq,
const int ncols )

◆ launch_compute_hh_trafo_c_sycl_kernel_complex_double()

void launch_compute_hh_trafo_c_sycl_kernel_complex_double ( std::complex< double > * q,
const std::complex< double > * hh,
const std::complex< double > * hh_tau,
const int nev,
const int nb,
const int ldq,
const int ncols )

◆ launch_compute_hh_trafo_c_sycl_kernel_complex_single()

void launch_compute_hh_trafo_c_sycl_kernel_complex_single ( std::complex< float > * q,
const std::complex< float > * hh,
const std::complex< float > * hh_tau,
const int nev,
const int nb,
const int ldq,
const int ncols )

◆ launch_compute_hh_trafo_c_sycl_kernel_real_double()

void launch_compute_hh_trafo_c_sycl_kernel_real_double ( double * q,
const double * hh,
const double * hh_tau,
const int nev,
const int nb,
const int ldq,
const int ncols )

◆ launch_compute_hh_trafo_c_sycl_kernel_real_single()

void launch_compute_hh_trafo_c_sycl_kernel_real_single ( float * q,
const float * hh,
const float * hh_tau,
const int nev,
const int nb,
const int ldq,
const int ncols )

◆ parallel_sum_group()

template<typename T , int wg_size, int sg_size>
T parallel_sum_group ( sycl::nd_item< 1 > & it,
T * local_mem )

◆ reduction_step()

template<typename T , int wg_size, int sg_size, int step>
void reduction_step ( T * local_mem,
sycl::nd_item< 1 > & it )
inline

◆ reduction_step_complex()

template<typename T , int wg_size, int sg_size, int step>
void reduction_step_complex ( T * local_mem,
sycl::nd_item< 1 > & it )
inline

◆ sg_reduction_step_complex()

template<typename T , int sg_size, int step>
void sg_reduction_step_complex ( T * local_mem,
T & accu,
sycl::nd_item< 1 > & it )
inline