Eigenvalue SoLvers for Petaflop-Applications (ELPA)  2019.05.002
Macros | Functions | Variables
real_fjsp_2hv_template.c File Reference
#include <x86intrin.h>
#include <stdio.h>
#include <stdlib.h>

Macros

#define __forceinline   __attribute__((always_inline)) static
 

Functions

 for (i=2;i< nb;i++)
 
 if (nq==i)
 
 _SSE_STORE (q, q1)
 

Variables

int nb = *pnb
 
int nq = *pldq
 
int ldq = *pldq
 
int ldh = *pldh
 
int worked_on = 0
 
__SSE_DATATYPE x1 = _SSE_LOAD(&q[ldq])
 
__SSE_DATATYPE x2 = _SSE_LOAD(&q[ldq+offset])
 
__SSE_DATATYPE x3 = _SSE_LOAD(&q[ldq+2*offset])
 
__SSE_DATATYPE x4 = _SSE_LOAD(&q[ldq+3*offset])
 
__SSE_DATATYPE x5 = _SSE_LOAD(&q[ldq+4*offset])
 
__SSE_DATATYPE x6 = _SSE_LOAD(&q[ldq+5*offset])
 
__SSE_DATATYPE h2 = _SSE_MUL(h1, vs)
 
__SSE_DATATYPE q1 = _SSE_LOAD(q)
 
__SSE_DATATYPE y1 = _SSE_ADD(q1, _SSE_MUL(x1, h1))
 
__SSE_DATATYPE q2 = _SSE_LOAD(&q[offset])
 
__SSE_DATATYPE y2 = _SSE_ADD(q2, _SSE_MUL(x2, h1))
 
__SSE_DATATYPE q3 = _SSE_LOAD(&q[2*offset])
 
__SSE_DATATYPE y3 = _SSE_ADD(q3, _SSE_MUL(x3, h1))
 
__SSE_DATATYPE q4 = _SSE_LOAD(&q[3*offset])
 
__SSE_DATATYPE y4 = _SSE_ADD(q4, _SSE_MUL(x4, h1))
 
__SSE_DATATYPE q5 = _SSE_LOAD(&q[4*offset])
 
__SSE_DATATYPE y5 = _SSE_ADD(q5, _SSE_MUL(x5, h1))
 
__SSE_DATATYPE q6 = _SSE_LOAD(&q[5*offset])
 
__SSE_DATATYPE y6 = _SSE_ADD(q6, _SSE_MUL(x6, h1))
 
 h1 = _SSE_XOR(tau1, sign)
 
_SSE_STORE &[offset] q
 

Macro Definition Documentation

◆ __forceinline

#define __forceinline   __attribute__((always_inline)) static

Function Documentation

◆ _SSE_STORE()

_SSE_STORE ( q  ,
q1   
)

◆ for()

for ( )

◆ if()

if ( nq  = = i)

Variable Documentation

◆ h1

h1 = _SSE_XOR(tau1, sign)

◆ h2

__SSE_DATATYPE h2 = _SSE_MUL(h1, vs)

◆ ldh

int ldh = *pldh

◆ ldq

int ldq = *pldq

◆ nb

int nb = *pnb

◆ nq

int nq = *pldq

◆ q

◆ q1

__SSE_DATATYPE q1 = _SSE_LOAD(q)

◆ q2

__SSE_DATATYPE q2 = _SSE_LOAD(&q[offset])

◆ q3

__SSE_DATATYPE q3 = _SSE_LOAD(&q[2*offset])

◆ q4

__SSE_DATATYPE q4 = _SSE_LOAD(&q[3*offset])

◆ q5

__SSE_DATATYPE q5 = _SSE_LOAD(&q[4*offset])

◆ q6

q6 = _SSE_LOAD(&q[5*offset])

◆ worked_on

worked_on = 0

◆ x1

__SSE_DATATYPE x1 = _SSE_LOAD(&q[ldq])

Unrolled kernel that computes #ifdef DOUBLE_PRECISION_REAL 12 rows of Q simultaneously, a #endif #ifdef SINGLE_PRECISION_REAL 24 rows of Q simultaneously, a #endif matrix Vector product with two householder vectors + a rank 2 update is performed

Unrolled kernel that computes #ifdef DOUBLE_PRECISION_REAL 10 rows of Q simultaneously, a #endif #ifdef SINGLE_PRECISION_REAL 20 rows of Q simultaneously, a #endif matrix Vector product with two householder vectors + a rank 2 update is performed

Unrolled kernel that computes #ifdef DOUBLE_PRECISION_REAL 8 rows of Q simultaneously, a #endif #ifdef SINGLE_PRECISION_REAL 16 rows of Q simultaneously, a #endif matrix Vector product with two householder vectors + a rank 2 update is performed

Unrolled kernel that computes #ifdef DOUBLE_PRECISION_REAL 6 rows of Q simultaneously, a #endif #ifdef SINGLE_PRECISION_REAL 12 rows of Q simultaneously, a #endif matrix Vector product with two householder vectors + a rank 2 update is performed

Unrolled kernel that computes #ifdef DOUBLE_PRECISION_REAL 4 rows of Q simultaneously, a #endif #ifdef SINGLE_PRECISION_REAL 8 rows of Q simultaneously, a #endif matrix Vector product with two householder vectors + a rank 2 update is performed

Unrolled kernel that computes #ifdef DOUBLE_PRECISION_REAL 2 rows of Q simultaneously, a #endif #ifdef SINGLE_PRECISION_REAL 4 rows of Q simultaneously, a #endif matrix Vector product with two householder vectors + a rank 2 update is performed

◆ x2

__SSE_DATATYPE x2 = _SSE_LOAD(&q[ldq+offset])

◆ x3

__SSE_DATATYPE x3 = _SSE_LOAD(&q[ldq+2*offset])

◆ x4

__SSE_DATATYPE x4 = _SSE_LOAD(&q[ldq+3*offset])

◆ x5

__SSE_DATATYPE x5 = _SSE_LOAD(&q[ldq+4*offset])

◆ x6

x6 = _SSE_LOAD(&q[ldq+5*offset])

◆ y1

__SSE_DATATYPE y1 = _SSE_ADD(q1, _SSE_MUL(x1, h1))

◆ y2

__SSE_DATATYPE y2 = _SSE_ADD(q2, _SSE_MUL(x2, h1))

◆ y3

__SSE_DATATYPE y3 = _SSE_ADD(q3, _SSE_MUL(x3, h1))

◆ y4

__SSE_DATATYPE y4 = _SSE_ADD(q4, _SSE_MUL(x4, h1))

◆ y5

__SSE_DATATYPE y5 = _SSE_ADD(q5, _SSE_MUL(x5, h1))

◆ y6

y6 = _SSE_ADD(q6, _SSE_MUL(x6, h1))