19#ifndef OPM_DILU_KERNELS_HPP
20#define OPM_DILU_KERNELS_HPP
24#include <cuda_runtime.h>
25#include <opm/simulators/linalg/gpuistl/detail/kernel_enums.hpp>
28namespace Opm::gpuistl::detail::DILU
47template <
class T,
int blocksize>
48void solveLowerLevelSet(T* reorderedMat,
73template <
class T,
int blocksize>
74void solveLowerLevelSetNoReorder(
const T* mat,
75 const int* rowIndices,
76 const int* colIndices,
77 const size_t* indexConversion,
102template <
int blocksize,
class LinearSolverScalar,
class MatrixScalar,
class DiagonalScalar>
103void solveLowerLevelSetSplit(MatrixScalar* reorderedUpperMat,
106 int* indexConversion,
109 const DiagonalScalar* dInv,
110 const LinearSolverScalar* d,
111 LinearSolverScalar* v,
113 cudaStream_t stream);
130template <
class T,
int blocksize>
131void solveUpperLevelSet(T* reorderedMat,
134 int* indexConversion,
140 cudaStream_t stream);
155template <
class T,
int blocksize>
156void solveUpperLevelSetNoReorder(
const T* mat,
157 const int* rowIndices,
158 const int* colIndices,
159 const size_t* indexConversion,
165 cudaStream_t stream);
182template <
int blocksize,
class LinearSolverScalar,
class MatrixScalar,
class DiagonalScalar>
183void solveUpperLevelSetSplit(MatrixScalar* reorderedUpperMat,
186 int* indexConversion,
189 const DiagonalScalar* dInv,
190 LinearSolverScalar* v,
192 cudaStream_t stream);
210template <
class T,
int blocksize>
211void computeDiluDiagonal(T* reorderedMat,
214 int* reorderedToNatural,
215 int* naturalToReordered,
220 int threadBlockSize);
234template <
class T,
int blocksize>
235void computeDiluDiagonalNoReorder(
const T* mat,
236 const int* rowIndices,
237 const int* colIndices,
238 const size_t* indexConversion,
239 const size_t* diagIndices,
243 int threadBlockSize);
266template <
int blocksize,
class InputScalar,
class OutputScalar, MatrixStorageMPScheme>
267void computeDiluDiagonalSplit(
const InputScalar* srcReorderedLowerMat,
268 int* lowerRowIndices,
269 int* lowerColIndices,
270 const InputScalar* srcReorderedUpperMat,
271 int* upperRowIndices,
272 int* upperColIndices,
273 const InputScalar* srcDiagonal,
274 int* reorderedToNatural,
275 int* naturalToReordered,
279 OutputScalar* dstDiagonal,
280 OutputScalar* dstLowerMat,
281 OutputScalar* dstUpperMat,
282 int threadBlockSize);