#include "config-f90.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <complex.h>
#include <stdint.h>
#include "../helpers/scalapack_interfaces.h"
#include "./gpu_vendor_agnostic_layer.h"
#include <mpi.h>
#include "../general/precision_macros.h"
#include "cannon_forw_template.h"
#include "cannon_back_template.h"

Macros
#define	C_INT_TYPE_PTR int*

#define	C_INT_TYPE int

#define	BLAS_KIND c_int

#define	C_INT_MPI_TYPE_PTR int*

#define	C_INT_MPI_TYPE int

#define	MPI_KIND c_int

#define	NVTX_RANGE_PUSH(msg) ((void)0)

#define	NVTX_RANGE_POP() ((void)0)

#define	gpuErrCheck(ans) { gpuAssert((ans), __FILE__, __LINE__); }

#define	REALCASE 1

#define	DOUBLE_PRECISION 1

#define	cublasXgemm cublasDgemm

#define	gpublasXgemm gpublasDgemm

#define	REALCASE 1

#define	SINGLE_PRECISION 1

#define	cublasXgemm cublasSgemm

#define	gpublasXgemm gpublasSgemm

#define	COMPLEXCASE 1

#define	DOUBLE_PRECISION 1

#define	cublasXgemm cublasZgemm

#define	gpublasXgemm gpublasZgemm

#define	COMPLEXCASE 1

#define	SINGLE_PRECISION 1

#define	cublasXgemm cublasCgemm

#define	gpublasXgemm gpublasCgemm

Functions
static void	gpuAssert (int status, const char *file, int line)

void	cannons_reduction_c_d (double A, double U, int local_rowsCast, int local_colsCast, int a_desc, double Res, int ToStore, int row_comm, int col_comm, int wantDebug, int useGPU, intptr_t *gpublasHandle)

void	cannons_triang_rectangular_c_d (double U, double B, int local_rowsCast, int local_colsCast, int u_desc, int b_desc, double Res, int row_comm, int col_comm, int wantDebug, int useGPU, intptr_t gpublasHandle)

void	cannons_reduction_c_f (float A, float U, int local_rowsCast, int local_colsCast, int a_desc, float Res, int ToStore, int row_comm, int col_comm, int wantDebug, int useGPU, intptr_t *gpublasHandle)

void	cannons_triang_rectangular_c_f (float U, float B, int local_rowsCast, int local_colsCast, int u_desc, int b_desc, float Res, int row_comm, int col_comm, int wantDebug, int useGPU, intptr_t gpublasHandle)

void	cannons_reduction_c_dc (double complex A, double complex U, int local_rowsCast, int local_colsCasr, int a_desc, double complex Res, int ToStore, int row_comm, int col_comm, int wantDebug, int useGPU, intptr_t *gpublasHandle)

void	cannons_triang_rectangular_c_dc (double complex U, double complex B, int local_rowsCast, int local_colsCast, int u_desc, int b_desc, double complex Res, int row_comm, int col_comm, int wantDebug, int useGPU, intptr_t gpublasHandle)

void	cannons_reduction_c_fc (float complex A, float complex U, int local_rowsCast, int local_colsCast, int a_desc, float complex Res, int ToStore, int row_comm, int col_comm, int wantDebug, int useGPU, intptr_t *gpublasHandle)

void	cannons_triang_rectangular_c_fc (float complex U, float complex B, int local_rowsCast, int local_colsCast, int u_desc, int b_desc, float complex Res, int row_comm, int col_comm, int wantDebug, int useGPU, intptr_t gpublasHandle)

Variables
int	gpuMemcpyHostToDevice

int	gpuMemcpyDeviceToHost

Macro Definition Documentation

◆ BLAS_KIND

#define BLAS_KIND c_int

◆ C_INT_MPI_TYPE

#define C_INT_MPI_TYPE int

◆ C_INT_MPI_TYPE_PTR

#define C_INT_MPI_TYPE_PTR int*

◆ C_INT_TYPE

#define C_INT_TYPE int

◆ C_INT_TYPE_PTR

#define C_INT_TYPE_PTR int*

◆ COMPLEXCASE [1/2]

#define COMPLEXCASE 1

◆ COMPLEXCASE [2/2]

#define COMPLEXCASE 1

◆ cublasXgemm [1/4]

#define cublasXgemm cublasDgemm

◆ cublasXgemm [2/4]

#define cublasXgemm cublasSgemm

◆ cublasXgemm [3/4]

#define cublasXgemm cublasZgemm

◆ cublasXgemm [4/4]

#define cublasXgemm cublasCgemm

◆ DOUBLE_PRECISION [1/2]

#define DOUBLE_PRECISION 1

◆ DOUBLE_PRECISION [2/2]

#define DOUBLE_PRECISION 1

◆ gpublasXgemm [1/4]

#define gpublasXgemm gpublasDgemm

◆ gpublasXgemm [2/4]

#define gpublasXgemm gpublasSgemm

◆ gpublasXgemm [3/4]

#define gpublasXgemm gpublasZgemm

◆ gpublasXgemm [4/4]

#define gpublasXgemm gpublasCgemm

◆ gpuErrCheck

#define gpuErrCheck ( ans ) { gpuAssert((ans), __FILE__, __LINE__); }

◆ MPI_KIND

#define MPI_KIND c_int

◆ NVTX_RANGE_POP

#define NVTX_RANGE_POP ( ) ((void)0)

◆ NVTX_RANGE_PUSH

#define NVTX_RANGE_PUSH ( msg ) ((void)0)

◆ REALCASE [1/2]

#define REALCASE 1

◆ REALCASE [2/2]

#define REALCASE 1

◆ SINGLE_PRECISION [1/2]

#define SINGLE_PRECISION 1

◆ SINGLE_PRECISION [2/2]

#define SINGLE_PRECISION 1

Function Documentation

◆ cannons_reduction_c_d()

void cannons_reduction_c_d	(	double *	A,
		double *	U,
		int	local_rowsCast,
		int	local_colsCast,
		int *	a_desc,
		double *	Res,
		int	ToStore,
		int	row_comm,
		int	col_comm,
		int	wantDebug,
		int	useGPU,
		intptr_t *	gpublasHandle )

◆ cannons_reduction_c_dc()

void cannons_reduction_c_dc	(	double complex *	A,
		double complex *	U,
		int	local_rowsCast,
		int	local_colsCasr,
		int *	a_desc,
		double complex *	Res,
		int	ToStore,
		int	row_comm,
		int	col_comm,
		int	wantDebug,
		int	useGPU,
		intptr_t *	gpublasHandle )

◆ cannons_reduction_c_f()

void cannons_reduction_c_f	(	float *	A,
		float *	U,
		int	local_rowsCast,
		int	local_colsCast,
		int *	a_desc,
		float *	Res,
		int	ToStore,
		int	row_comm,
		int	col_comm,
		int	wantDebug,
		int	useGPU,
		intptr_t *	gpublasHandle )

◆ cannons_reduction_c_fc()

void cannons_reduction_c_fc	(	float complex *	A,
		float complex *	U,
		int	local_rowsCast,
		int	local_colsCast,
		int *	a_desc,
		float complex *	Res,
		int	ToStore,
		int	row_comm,
		int	col_comm,
		int	wantDebug,
		int	useGPU,
		intptr_t *	gpublasHandle )

◆ cannons_triang_rectangular_c_d()

void cannons_triang_rectangular_c_d	(	double *	U,
		double *	B,
		int	local_rowsCast,
		int	local_colsCast,
		int *	u_desc,
		int *	b_desc,
		double *	Res,
		int	row_comm,
		int	col_comm,
		int	wantDebug,
		int	useGPU,
		intptr_t *	gpublasHandle )

◆ cannons_triang_rectangular_c_dc()

void cannons_triang_rectangular_c_dc	(	double complex *	U,
		double complex *	B,
		int	local_rowsCast,
		int	local_colsCast,
		int *	u_desc,
		int *	b_desc,
		double complex *	Res,
		int	row_comm,
		int	col_comm,
		int	wantDebug,
		int	useGPU,
		intptr_t *	gpublasHandle )

◆ cannons_triang_rectangular_c_f()

void cannons_triang_rectangular_c_f	(	float *	U,
		float *	B,
		int	local_rowsCast,
		int	local_colsCast,
		int *	u_desc,
		int *	b_desc,
		float *	Res,
		int	row_comm,
		int	col_comm,
		int	wantDebug,
		int	useGPU,
		intptr_t *	gpublasHandle )

◆ cannons_triang_rectangular_c_fc()

void cannons_triang_rectangular_c_fc	(	float complex *	U,
		float complex *	B,
		int	local_rowsCast,
		int	local_colsCast,
		int *	u_desc,
		int *	b_desc,
		float complex *	Res,
		int	row_comm,
		int	col_comm,
		int	wantDebug,
		int	useGPU,
		intptr_t *	gpublasHandle )

◆ gpuAssert()

static void gpuAssert	(	int	status,
		const char *	file,
		int	line )

inlinestatic

Variable Documentation

◆ gpuMemcpyDeviceToHost

int gpuMemcpyDeviceToHost

◆ gpuMemcpyHostToDevice

int gpuMemcpyHostToDevice

Macros

Functions

Variables