Eigenvalue SoLvers for Petaflop-Applications (ELPA) 2021.11.001
elpa_constants.h
Go to the documentation of this file.
1#pragma once
2
3/* This might seem over-engineered, but helps to re-use this file also on the
4 * Fortran side and thus to keep the definitions in this one place here
5 */
6
7/* Private helper macros */
8#define ELPA_ENUM_ENTRY(name, value, ...) \
9 name = value,
10#define ELPA_ENUM_SUM(name, value, ...) +1
11
12/* MATRIX layout */
13#define ELPA_FOR_ALL_MATRIX_LAYOUTS(X) \
14 X(COLUMN_MAJOR_ORDER, 1) \
15 X(ROW_MAJOR_ORDER, 2)
16
19};
21#define ELPA_NUMBER_OF_MATRIX_LAYOUTS (0 ELPA_FOR_ALL_MATRIX_LAYOUTS(ELPA_ENUM_SUM))
22
23/* Solver constants */
24#define ELPA_FOR_ALL_SOLVERS(X) \
25 X(ELPA_SOLVER_1STAGE, 1) \
26 X(ELPA_SOLVER_2STAGE, 2)
30};
31
32#define ELPA_NUMBER_OF_SOLVERS (0 ELPA_FOR_ALL_SOLVERS(ELPA_ENUM_SUM))
34/* Kernel constants */
35#define ELPA_FOR_ALL_2STAGE_REAL_KERNELS(X, ...) \
36 X(ELPA_2STAGE_REAL_GENERIC, 1, 1, __VA_ARGS__) \
37 X(ELPA_2STAGE_REAL_GENERIC_SIMPLE, 2, 1, __VA_ARGS__) \
38 X(ELPA_2STAGE_REAL_BGP, 3, 0, __VA_ARGS__) \
39 X(ELPA_2STAGE_REAL_BGQ, 4, 0, __VA_ARGS__) \
40 X(ELPA_2STAGE_REAL_SSE_ASSEMBLY, 5, 1, __VA_ARGS__) \
41 X(ELPA_2STAGE_REAL_SSE_BLOCK2, 6, 1, __VA_ARGS__) \
42 X(ELPA_2STAGE_REAL_SSE_BLOCK4, 7, 1, __VA_ARGS__) \
43 X(ELPA_2STAGE_REAL_SSE_BLOCK6, 8, 1, __VA_ARGS__) \
44 X(ELPA_2STAGE_REAL_AVX_BLOCK2, 9, 0, __VA_ARGS__) \
45 X(ELPA_2STAGE_REAL_AVX_BLOCK4, 10, 0, __VA_ARGS__) \
46 X(ELPA_2STAGE_REAL_AVX_BLOCK6, 11, 0, __VA_ARGS__) \
47 X(ELPA_2STAGE_REAL_AVX2_BLOCK2, 12, 0, __VA_ARGS__) \
48 X(ELPA_2STAGE_REAL_AVX2_BLOCK4, 13, 0, __VA_ARGS__) \
49 X(ELPA_2STAGE_REAL_AVX2_BLOCK6, 14, 0, __VA_ARGS__) \
50 X(ELPA_2STAGE_REAL_AVX512_BLOCK2, 15, 0, __VA_ARGS__) \
51 X(ELPA_2STAGE_REAL_AVX512_BLOCK4, 16, 0, __VA_ARGS__) \
52 X(ELPA_2STAGE_REAL_AVX512_BLOCK6, 17, 0, __VA_ARGS__) \
53 X(ELPA_2STAGE_REAL_NVIDIA_GPU, 18, 0, __VA_ARGS__) \
54 X(ELPA_2STAGE_REAL_AMD_GPU, 19, 0, __VA_ARGS__) \
55 X(ELPA_2STAGE_REAL_INTEL_GPU, 20, 0, __VA_ARGS__) \
56 X(ELPA_2STAGE_REAL_SPARC64_BLOCK2, 21, 0, __VA_ARGS__) \
57 X(ELPA_2STAGE_REAL_SPARC64_BLOCK4, 22, 0, __VA_ARGS__) \
58 X(ELPA_2STAGE_REAL_SPARC64_BLOCK6, 23, 0, __VA_ARGS__) \
59 X(ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK2, 24, 0, __VA_ARGS__) \
60 X(ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK4, 25, 0, __VA_ARGS__) \
61 X(ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK6, 26, 0, __VA_ARGS__) \
62 X(ELPA_2STAGE_REAL_VSX_BLOCK2, 27, 0, __VA_ARGS__) \
63 X(ELPA_2STAGE_REAL_VSX_BLOCK4, 28, 0, __VA_ARGS__) \
64 X(ELPA_2STAGE_REAL_VSX_BLOCK6, 29, 0, __VA_ARGS__) \
65 X(ELPA_2STAGE_REAL_SVE128_BLOCK2, 30, 0, __VA_ARGS__) \
66 X(ELPA_2STAGE_REAL_SVE128_BLOCK4, 31, 0, __VA_ARGS__) \
67 X(ELPA_2STAGE_REAL_SVE128_BLOCK6, 32, 0, __VA_ARGS__) \
68 X(ELPA_2STAGE_REAL_SVE256_BLOCK2, 33, 0, __VA_ARGS__) \
69 X(ELPA_2STAGE_REAL_SVE256_BLOCK4, 34, 0, __VA_ARGS__) \
70 X(ELPA_2STAGE_REAL_SVE256_BLOCK6, 35, 0, __VA_ARGS__) \
71 X(ELPA_2STAGE_REAL_SVE512_BLOCK2, 36, 0, __VA_ARGS__) \
72 X(ELPA_2STAGE_REAL_SVE512_BLOCK4, 37, 0, __VA_ARGS__) \
73 X(ELPA_2STAGE_REAL_SVE512_BLOCK6, 38, 0, __VA_ARGS__) \
74 X(ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK4, 39, 1, __VA_ARGS__) \
75 X(ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK6, 40, 1, __VA_ARGS__) \
76 X(ELPA_2STAGE_REAL_NVIDIA_SM80_GPU, 41, 0, __VA_ARGS__)
77
78#define ELPA_FOR_ALL_2STAGE_REAL_KERNELS_AND_DEFAULT(X) \
79 ELPA_FOR_ALL_2STAGE_REAL_KERNELS(X) \
80 X(ELPA_2STAGE_REAL_INVALID, -1, choke me) \
81 X(ELPA_2STAGE_REAL_DEFAULT, 6, choke me)
85};
86
87
88#define ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(X, ...) \
89 X(ELPA_2STAGE_COMPLEX_GENERIC, 1, 1, __VA_ARGS__) \
90 X(ELPA_2STAGE_COMPLEX_GENERIC_SIMPLE, 2, 1, __VA_ARGS__) \
91 X(ELPA_2STAGE_COMPLEX_BGP, 3, 0, __VA_ARGS__) \
92 X(ELPA_2STAGE_COMPLEX_BGQ, 4, 0, __VA_ARGS__) \
93 X(ELPA_2STAGE_COMPLEX_SSE_ASSEMBLY, 5, 1, __VA_ARGS__) \
94 X(ELPA_2STAGE_COMPLEX_SSE_BLOCK1, 6, 1, __VA_ARGS__) \
95 X(ELPA_2STAGE_COMPLEX_SSE_BLOCK2, 7, 1, __VA_ARGS__) \
96 X(ELPA_2STAGE_COMPLEX_AVX_BLOCK1, 8, 0, __VA_ARGS__) \
97 X(ELPA_2STAGE_COMPLEX_AVX_BLOCK2, 9, 0, __VA_ARGS__) \
98 X(ELPA_2STAGE_COMPLEX_AVX2_BLOCK1, 10, 0, __VA_ARGS__) \
99 X(ELPA_2STAGE_COMPLEX_AVX2_BLOCK2, 11, 0, __VA_ARGS__) \
100 X(ELPA_2STAGE_COMPLEX_AVX512_BLOCK1, 12, 0, __VA_ARGS__) \
101 X(ELPA_2STAGE_COMPLEX_AVX512_BLOCK2, 13, 0, __VA_ARGS__) \
102 X(ELPA_2STAGE_COMPLEX_SVE128_BLOCK1, 14, 0, __VA_ARGS__) \
103 X(ELPA_2STAGE_COMPLEX_SVE128_BLOCK2, 15, 0, __VA_ARGS__) \
104 X(ELPA_2STAGE_COMPLEX_SVE256_BLOCK1, 16, 0, __VA_ARGS__) \
105 X(ELPA_2STAGE_COMPLEX_SVE256_BLOCK2, 17, 0, __VA_ARGS__) \
106 X(ELPA_2STAGE_COMPLEX_SVE512_BLOCK1, 18, 0, __VA_ARGS__) \
107 X(ELPA_2STAGE_COMPLEX_SVE512_BLOCK2, 19, 0, __VA_ARGS__) \
108 X(ELPA_2STAGE_COMPLEX_NEON_ARCH64_BLOCK1, 20, 0, __VA_ARGS__) \
109 X(ELPA_2STAGE_COMPLEX_NEON_ARCH64_BLOCK2, 21, 0, __VA_ARGS__) \
110 X(ELPA_2STAGE_COMPLEX_NVIDIA_GPU, 22, 0, __VA_ARGS__) \
111 X(ELPA_2STAGE_COMPLEX_AMD_GPU, 23, 0, __VA_ARGS__) \
112 X(ELPA_2STAGE_COMPLEX_INTEL_GPU, 24, 0, __VA_ARGS__) \
113 X(ELPA_2STAGE_COMPLEX_NVIDIA_SM80_GPU, 25, 0, __VA_ARGS__)
114
115#define ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS_AND_DEFAULT(X) \
116 ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(X) \
117 X(ELPA_2STAGE_COMPLEX_INVALID, -1, choke me) \
118 X(ELPA_2STAGE_COMPLEX_DEFAULT, 6, choke me)
119
122};
124
125
126/* General constants */
127#define ELPA_FOR_ALL_ERRORS(X) \
128 X(ELPA_OK, 0) \
129 X(ELPA_ERROR, -1) \
130 X(ELPA_ERROR_ENTRY_NOT_FOUND, -2) \
131 X(ELPA_ERROR_ENTRY_INVALID_VALUE, -3) \
132 X(ELPA_ERROR_ENTRY_ALREADY_SET, -4) \
133 X(ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION, -5) \
134 X(ELPA_ERROR_SETUP, -6) \
135 X(ELPA_ERROR_CRITICAL, -7) \
136 X(ELPA_ERROR_API_VERSION, -8) \
137 X(ELPA_ERROR_AUTOTUNE_API_VERSION, -9) \
138 X(ELPA_ERROR_AUTOTUNE_OBJECT_CHANGED, -10) \
139 X(ELPA_ERROR_ENTRY_READONLY, -11) \
140 X(ELPA_ERROR_CANNOT_OPEN_FILE, -12)
144};
145
149};
150
151#define ELPA_FOR_ALL_AUTOTUNE_LEVELS(X, ...) \
152 X(ELPA_AUTOTUNE_NOT_TUNABLE, 0) \
153 X(ELPA_AUTOTUNE_GPU, 1) \
154 X(ELPA2_AUTOTUNE_KERNEL, 2) \
155 X(ELPA_AUTOTUNE_OPENMP, 3) \
156 X(ELPA_AUTOTUNE_TRANSPOSE_VECTORS, 4) \
157 X(ELPA2_AUTOTUNE_FULL_TO_BAND, 5) \
158 X(ELPA2_AUTOTUNE_BAND_TO_TRIDI, 6) \
159 X(ELPA_AUTOTUNE_SOLVE, 7) \
160 X(ELPA2_AUTOTUNE_TRIDI_TO_BAND, 8) \
161 X(ELPA2_AUTOTUNE_BAND_TO_FULL, 9) \
162 X(ELPA2_AUTOTUNE_MAIN, 10) \
163 X(ELPA1_AUTOTUNE_FULL_TO_TRIDI, 11) \
164 X(ELPA1_AUTOTUNE_TRIDI_TO_FULL, 12) \
165 X(ELPA_AUTOTUNE_MPI, 13) \
166 X(ELPA_AUTOTUNE_FAST, 14) \
167 X(ELPA_AUTOTUNE_MEDIUM, 15) \
168 X(ELPA2_AUTOTUNE_BAND_TO_FULL_BLOCKING, 16) \
169 X(ELPA1_AUTOTUNE_MAX_STORED_ROWS, 17) \
170 X(ELPA2_AUTOTUNE_TRIDI_TO_BAND_STRIPEWIDTH, 18) \
171 X(ELPA_AUTOTUNE_EXTENSIVE, 19)
172
173 //X(ELPA_AUTOTUNE_MEDIUM, 16)
174
177};
178
179#define ELPA_NUMBER_OF_AUTOTUNE_LEVELS (0 ELPA_FOR_ALL_AUTOTUNE_LEVELS(ELPA_ENUM_SUM))
181#define ELPA_FOR_ALL_AUTOTUNE_DOMAINS(X, ...) \
182 X(ELPA_AUTOTUNE_DOMAIN_REAL, 1) \
183 X(ELPA_AUTOTUNE_DOMAIN_COMPLEX, 2) \
184 X(ELPA_AUTOTUNE_DOMAIN_ANY, 3)
185
188};
189
190
191#define ELPA_FOR_ALL_AUTOTUNE_PARTS(X, ...) \
192 X(ELPA_AUTOTUNE_PART_NONE, 0) \
193 X(ELPA_AUTOTUNE_PART_ANY, 1) \
194 X(ELPA_AUTOTUNE_PART_GENERALIZED, 2) \
195 X(ELPA_AUTOTUNE_PART_ELPA1, 3) \
196 X(ELPA_AUTOTUNE_PART_ELPA2, 4)
197
200};
201
202
ELPA_SOLVERS
Definition: elpa_constants.h:27
ELPA_AUTOTUNE_DOMAINS
Definition: elpa_constants.h:180
#define ELPA_FOR_ALL_AUTOTUNE_DOMAINS(X,...)
Definition: elpa_constants.h:175
#define ELPA_FOR_ALL_2STAGE_REAL_KERNELS(X,...)
Definition: elpa_constants.h:33
#define ELPA_FOR_ALL_MATRIX_LAYOUTS(X)
Definition: elpa_constants.h:13
ELPA_CONSTANTS
Definition: elpa_constants.h:141
@ ELPA_2STAGE_NUMBER_OF_COMPLEX_KERNELS
Definition: elpa_constants.h:142
@ ELPA_2STAGE_NUMBER_OF_REAL_KERNELS
Definition: elpa_constants.h:143
#define ELPA_FOR_ALL_2STAGE_REAL_KERNELS_AND_DEFAULT(X)
Definition: elpa_constants.h:76
#define ELPA_FOR_ALL_AUTOTUNE_LEVELS(X,...)
Definition: elpa_constants.h:146
#define ELPA_ENUM_SUM(name, value,...)
Definition: elpa_constants.h:10
ELPA_REAL_KERNELS
Definition: elpa_constants.h:81
#define ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(X,...)
Definition: elpa_constants.h:85
ELPA_ERRORS
Definition: elpa_constants.h:138
#define ELPA_FOR_ALL_AUTOTUNE_PARTS(X,...)
Definition: elpa_constants.h:184
#define ELPA_ENUM_ENTRY(name, value,...)
Definition: elpa_constants.h:8
#define ELPA_FOR_ALL_SOLVERS(X)
Definition: elpa_constants.h:23
#define ELPA_FOR_ALL_ERRORS(X)
Definition: elpa_constants.h:123
MATRIX_LAYOUTS
Definition: elpa_constants.h:17
#define ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS_AND_DEFAULT(X)
Definition: elpa_constants.h:112
ELPA_COMPLEX_KERNELS
Definition: elpa_constants.h:117
ELPA_AUTOTUNE_LEVELS
Definition: elpa_constants.h:170
ELPA_AUTOTUNE_PARTS
Definition: elpa_constants.h:191