opm-simulators
Loading...
Searching...
No Matches
ISTLSolverGPUISTL.hpp
1/*
2 Copyright 2025 Equinor ASA
3
4 This file is part of the Open Porous Media project (OPM).
5 OPM is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9 OPM is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13 You should have received a copy of the GNU General Public License
14 along with OPM. If not, see <http://www.gnu.org/licenses/>.
15*/
16
17#ifndef OPM_ISTLSOLVERGPUISTL_HEADER_INCLUDED
18#define OPM_ISTLSOLVERGPUISTL_HEADER_INCLUDED
19
20#include <dune/istl/operators.hh>
21#include <memory>
22#include <optional>
23#include <opm/grid/utility/ElementChunks.hpp>
24#include <opm/simulators/linalg/AbstractISTLSolver.hpp>
25#include <opm/simulators/linalg/getQuasiImpesWeights.hpp>
26#include <opm/simulators/linalg/ISTLSolver.hpp>
27
28#if USE_HIP
29#include <opm/simulators/linalg/gpuistl_hip/GpuSparseMatrixWrapper.hpp>
30#include <opm/simulators/linalg/gpuistl_hip/GpuVector.hpp>
31#include <opm/simulators/linalg/gpuistl_hip/PinnedMemoryHolder.hpp>
32#else
33#include <opm/simulators/linalg/gpuistl/GpuSparseMatrixWrapper.hpp>
34#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
35#include <opm/simulators/linalg/gpuistl/PinnedMemoryHolder.hpp>
36#endif
37
38#include <opm/simulators/linalg/ExtractParallelGridInformationToISTL.hpp>
39#include <opm/simulators/linalg/ParallelIstlInformation.hpp>
40#include <opm/simulators/linalg/findOverlapRowsAndColumns.hpp>
41#include <opm/simulators/linalg/gpuistl/detail/FlexibleSolverWrapper.hpp>
42#include <opm/simulators/linalg/printlinearsolverparameter.hpp>
43
44namespace Opm::gpuistl
45{
46
59template <class TypeTag>
61{
62public:
67 using Matrix = typename SparseMatrixAdapter::IstlMatrix;
71 using ElementChunksType = Opm::ElementChunks<GridView, Dune::Partitions::All>;
72
73 using real_type = typename Vector::field_type;
74
76 using GPUVector = Opm::gpuistl::GpuVector<real_type>;
77 using GPUVectorInt = Opm::gpuistl::GpuVector<int>;
78
79 constexpr static std::size_t pressureIndex = GetPropType<TypeTag, Properties::Indices>::pressureSwitchIdx;
80
81
82#if HAVE_MPI
83 using CommunicationType = Dune::OwnerOverlapCopyCommunication<int, int>;
84#else
85 using CommunicationType = Dune::Communication<int>;
86#endif
87
89
97 ISTLSolverGPUISTL(const Simulator& simulator,
98 const FlowLinearSolverParameters& parameters,
99 bool forceSerial = false)
100 : m_parameters(parameters)
101 , m_simulator(simulator)
102 , m_forceSerial(forceSerial)
103 , m_element_chunks(simulator.vanguard().gridView(), Dune::Partitions::all, ThreadManager::maxThreads())
104 {
105#if HAVE_MPI
106 m_comm = std::make_shared<CommunicationType>(simulator.vanguard().grid().comm());
107 // Extract parallel grid information to populate index sets
108 extractParallelGridInformationToISTL(simulator.vanguard().grid(), m_parallelInformation);
109 // Set up element mapper manually
110 ElementMapper elemMapper(simulator.vanguard().gridView(), Dune::mcmgElementLayout());
111 // Overlap rows are needed for making overlap rows invalid in parallel mode
112 Opm::detail::findOverlapAndInterior(simulator.vanguard().grid(), elemMapper, m_overlapRows, m_interiorRows);
113 if (isParallel()) {
114 const std::size_t size = simulator.vanguard().grid().leafGridView().size(0);
115 // Copy parallel information to communication object (index sets and remote indices)
116 Opm::detail::copyParValues(m_parallelInformation, size, *m_comm);
117 }
118
119#else
120 m_comm = std::make_shared<CommunicationType>(simulator.gridView().comm());
121#endif
122 m_parameters.init(simulator.vanguard().eclState().getSimulationConfig().useCPR());
123 m_propertyTree = setupPropertyTree(m_parameters,
127 OPM_THROW(std::logic_error, "Well operators are currently not supported for the GPU backend. "
128 "Use --matrix-add-well-contributions=true to add well contributions to the matrix instead.");
129 }
130
131 Opm::detail::printLinearSolverParameters(m_parameters, m_propertyTree, simulator.gridView().comm());
132 }
133
136 explicit ISTLSolverGPUISTL(const Simulator& simulator)
137 : ISTLSolverGPUISTL(simulator, FlowLinearSolverParameters(), false)
138 {
139 }
140
146 void eraseMatrix() override
147 {
148 // Nothing, this is the same as the ISTLSolver
149 }
150
157 void setActiveSolver(int num) override
158 {
159 if (num != 0) {
160 OPM_THROW(std::logic_error, "Only one solver available for the GPU backend.");
161 }
162 }
163
169 int numAvailableSolvers() const override
170 {
171 return 1;
172 }
173
183 void prepare(const SparseMatrixAdapter& M, Vector& b) override
184 {
185 prepare(M.istlMatrix(), b);
186 }
187
197 void prepare(const Matrix& M, Vector& b) override
198 {
199 try {
200 if (isParallel() && !m_overlapRows.empty()) {
201 Opm::detail::makeOverlapRowsInvalid(const_cast<Matrix&>(M), m_overlapRows);
202 }
203 updateMatrix(M);
204 updateRhs(b);
205 }
206 OPM_CATCH_AND_RETHROW_AS_CRITICAL_ERROR("This is likely due to a faulty linear solver JSON specification. "
207 "Check for errors related to missing nodes.");
208 }
209
215 void setResidual(Vector&) override
216 {
217 // Should be handled in prepare() instead.
218 }
219
228 void getResidual(Vector& b) const override
229 {
230 if (!m_rhs) {
231 OPM_THROW(std::runtime_error, "m_rhs not initialized, prepare(matrix, rhs); needs to be called");
232 }
233 m_rhs->copyToHost(b);
234 }
235
241 void setMatrix(const SparseMatrixAdapter&) override
242 {
243 // Should be handled in prepare() instead.
244 }
245
257 bool solve(Vector& x) override
258 {
259 // TODO: Write matrix to disk if needed
260 Dune::InverseOperatorResult result;
261 if (!m_matrix) {
262 OPM_THROW(std::runtime_error, "m_matrix not initialized, prepare(matrix, rhs); needs to be called");
263 }
264 if (!m_rhs) {
265 OPM_THROW(std::runtime_error, "m_rhs not initialized, prepare(matrix, rhs); needs to be called");
266 }
267 if (!m_gpuSolver) {
268 OPM_THROW(std::runtime_error,
269 "m_gpuFlexibleSolver not initialized, prepare(matrix, rhs); needs to be called");
270 }
271
272 if (!m_x) {
273 m_x = std::make_unique<GPUVector>(x);
274 m_pinnedXMemory = std::make_unique<PinnedMemoryHolder<real_type>>(
275 const_cast<real_type*>(&x[0][0]),
276 x.dim()
277 );
278 } else {
279 // copy from host to device using main stream and asynchronous transfer
280 m_x->copyFromHostAsync(x);
281 }
282 m_gpuSolver->apply(*m_x, *m_rhs, result);
283
284 m_x->copyToHost(x);
285
286 ++m_solveCount;
287
288 m_lastSeenIterations = result.iterations;
289 return checkConvergence(result);
290 }
291
297 int iterations() const override
298 {
299 return m_lastSeenIterations;
300 }
301
305 const CommunicationType* comm() const override
306 {
307 return m_comm.get();
308 }
309
315 bool isParallel() const
316 {
317 #if HAVE_MPI
318 return !m_forceSerial && m_comm->communicator().size() > 1;
319 #else
320 return false;
321 #endif
322 }
323
327 int getSolveCount() const override
328 {
329 return m_solveCount;
330 }
331
332private:
333 bool checkConvergence(const Dune::InverseOperatorResult& result) const
334 {
335 return AbstractISTLSolver<TypeTag>::checkConvergence(result, m_parameters);
336 }
337
338 // Weights to make approximate pressure equations.
339 std::function<GPUVector&()> getWeightsCalculator()
340 {
341 std::function<GPUVector&()> weightsCalculator;
342
343 using namespace std::string_literals;
344
345 auto preconditionerType = m_propertyTree.get("preconditioner.type"s, "cpr"s);
346 // Make the preconditioner type lowercase for internal canonical representation
347 std::transform(preconditionerType.begin(), preconditionerType.end(), preconditionerType.begin(), ::tolower);
348 if (preconditionerType == "cpr" || preconditionerType == "cprt"
349 || preconditionerType == "cprw" || preconditionerType == "cprwt") {
350 const bool transpose = preconditionerType == "cprt" || preconditionerType == "cprwt";
351 const auto weightsType = m_propertyTree.get("preconditioner.weight_type"s, "quasiimpes"s);
352 if (weightsType == "quasiimpes") {
353 m_weights.emplace(m_matrix->N() * m_matrix->blockSize());
354 // Pre-compute diagonal indices once when setting up the calculator
355 auto diagonalIndices = Amg::precomputeDiagonalIndices(*m_matrix);
356 m_diagonalIndices.emplace(diagonalIndices);
357
358 if (transpose) {
359 weightsCalculator = [this]() -> GPUVector& {
360 Amg::getQuasiImpesWeights<real_type, true>(
361 *m_matrix, pressureIndex, *m_weights, *m_diagonalIndices);
362 return *m_weights;
363 };
364 } else {
365 weightsCalculator = [this]() -> GPUVector& {
366 Amg::getQuasiImpesWeights<real_type, false>(
367 *m_matrix, pressureIndex, *m_weights, *m_diagonalIndices);
368 return *m_weights;
369 };
370 }
371 } else if (weightsType == "trueimpes") {
372 // Create CPU vector for the weights and initialize GPU vector
373 m_cpuWeights.resize(m_matrix->N());
374 m_pinnedWeightsMemory = std::make_unique<PinnedMemoryHolder<real_type>>(
375 const_cast<real_type*>(&m_cpuWeights[0][0]), m_cpuWeights.dim());
376 m_weights.emplace(m_cpuWeights);
377
378 const bool enableThreadParallel = m_parameters.cpr_weights_thread_parallel_;
379 // CPU implementation wrapped for GPU
380 weightsCalculator = [this, enableThreadParallel]() -> GPUVector& {
381 // Use the CPU implementation to calculate the weights
382 ElementContext elemCtx(m_simulator);
383 Amg::getTrueImpesWeights(pressureIndex,
384 m_cpuWeights,
385 elemCtx, m_simulator.model(),
386 m_element_chunks,
387 enableThreadParallel);
388
389 // Copy CPU vector to GPU vector using main stream and asynchronous transfer
390 m_weights->copyFromHostAsync(m_cpuWeights);
391 return *m_weights;
392 };
393 } else if (weightsType == "trueimpesanalytic") {
394 // Create CPU vector for the weights and initialize GPU vector
395 m_cpuWeights.resize(m_matrix->N());
396 m_pinnedWeightsMemory = std::make_unique<PinnedMemoryHolder<real_type>>(
397 const_cast<real_type*>(&m_cpuWeights[0][0]), m_cpuWeights.dim());
398 m_weights.emplace(m_cpuWeights);
399 const bool enableThreadParallel = m_parameters.cpr_weights_thread_parallel_;
400 // CPU implementation wrapped for GPU
401 weightsCalculator = [this, enableThreadParallel]() -> GPUVector& {
402 // Use the CPU implementation to calculate the weights
403 ElementContext elemCtx(m_simulator);
404 Amg::getTrueImpesWeightsAnalytic(pressureIndex,
405 m_cpuWeights,
406 elemCtx, m_simulator.model(),
407 m_element_chunks,
408 enableThreadParallel);
409
410 // Copy CPU vector to GPU vector using main stream and asynchronous transfer
411 m_weights->copyFromHostAsync(m_cpuWeights);
412 return *m_weights;
413 };
414 } else {
415 OPM_THROW(std::invalid_argument,
416 "Weights type " + weightsType
417 + " not implemented for cpr."
418 " Please use quasiimpes, trueimpes or trueimpesanalytic.");
419 }
420 }
421 return weightsCalculator;
422 }
423
424 void updateMatrix(const Matrix& M)
425 {
426 if (!m_matrix) {
427 m_matrix.reset(new auto(GPUMatrix::fromMatrix(M)));
428 m_pinnedMatrixMemory = std::make_unique<PinnedMemoryHolder<real_type>>(
429 const_cast<real_type*>(&M[0][0][0][0]),
430 M.nonzeroes() * M[0][0].N() * M[0][0].M()
431 );
432 std::function<GPUVector&()> weightsCalculator = getWeightsCalculator();
433 m_gpuSolver = std::make_unique<SolverType>(
434 *m_matrix, isParallel(), m_propertyTree, pressureIndex, weightsCalculator, m_forceSerial, m_comm.get());
435 } else {
436 m_matrix->updateNonzeroValues(M, true);
437 m_gpuSolver->update();
438 }
439 }
440
441 void updateRhs(const Vector& b)
442 {
443 if (!m_rhs) {
444 m_rhs = std::make_unique<GPUVector>(b);
445 m_pinnedRhsMemory = std::make_unique<PinnedMemoryHolder<real_type>>(
446 const_cast<real_type*>(&b[0][0]),
447 b.dim()
448 );
449 } else {
450 // copy from host to device using main stream and asynchronous transfer
451 m_rhs->copyFromHostAsync(b);
452 }
453 }
454
455 FlowLinearSolverParameters m_parameters;
456 const Simulator& m_simulator;
457 const bool m_forceSerial;
458 PropertyTree m_propertyTree;
459 ElementChunksType m_element_chunks;
460
461 int m_lastSeenIterations = 0;
462 int m_solveCount = 0;
463
464 std::unique_ptr<GPUMatrix> m_matrix;
465
466 std::unique_ptr<SolverType> m_gpuSolver;
467
468 std::unique_ptr<GPUVector> m_rhs;
469 std::unique_ptr<GPUVector> m_x;
470
471 std::unique_ptr<PinnedMemoryHolder<real_type>> m_pinnedMatrixMemory;
472 std::unique_ptr<PinnedMemoryHolder<real_type>> m_pinnedRhsMemory;
473 std::unique_ptr<PinnedMemoryHolder<real_type>> m_pinnedXMemory;
474 std::unique_ptr<PinnedMemoryHolder<real_type>> m_pinnedWeightsMemory;
475
476 Vector m_cpuWeights;
477 std::optional<GPUVector> m_weights;
478 std::optional<GPUVectorInt> m_diagonalIndices;
479
480 std::shared_ptr<CommunicationType> m_comm;
481 std::vector<int> m_interiorRows;
482 std::vector<int> m_overlapRows;
483 std::any m_parallelInformation;
484
485};
486} // namespace Opm::gpuistl
487
488#endif
Abstract interface for ISTL solvers.
Definition AbstractISTLSolver.hpp:45
static bool checkConvergence(const Dune::InverseOperatorResult &result, const FlowLinearSolverParameters &parameters)
Check the convergence of the linear solver.
Definition AbstractISTLSolver.hpp:194
T get(const std::string &key) const
Retrieve property value given hierarchical property key.
Definition PropertyTree.cpp:59
The GpuSparseMatrixWrapper Checks CUDA/HIP version and dispatches a version either using the old or t...
Definition GpuSparseMatrixWrapper.hpp:61
static GpuSparseMatrixWrapper< real_type > fromMatrix(const MatrixType &matrix, bool copyNonZeroElementsDirectly=false)
Definition GpuSparseMatrixWrapper.hpp:179
Definition gpu_type_detection.hpp:30
bool isParallel() const
Check if we are running in parallel mode.
Definition ISTLSolverGPUISTL.hpp:315
ISTLSolverGPUISTL(const Simulator &simulator, const FlowLinearSolverParameters &parameters, bool forceSerial=false)
Construct a system solver.
Definition ISTLSolverGPUISTL.hpp:97
void setMatrix(const SparseMatrixAdapter &) override
Set the matrix for the solver.
Definition ISTLSolverGPUISTL.hpp:241
void prepare(const SparseMatrixAdapter &M, Vector &b) override
Prepare the solver with the given matrix and right-hand side vector.
Definition ISTLSolverGPUISTL.hpp:183
const CommunicationType * comm() const override
Get the communication object used by the solver.
Definition ISTLSolverGPUISTL.hpp:305
int getSolveCount() const override
Get the count of how many times the solver has been called.
Definition ISTLSolverGPUISTL.hpp:327
void setResidual(Vector &) override
Set the residual vector.
Definition ISTLSolverGPUISTL.hpp:215
void getResidual(Vector &b) const override
Get the residual vector.
Definition ISTLSolverGPUISTL.hpp:228
bool solve(Vector &x) override
Solve the system of linear equations Ax = b.
Definition ISTLSolverGPUISTL.hpp:257
ISTLSolverGPUISTL(const Simulator &simulator)
Construct a system solver.
Definition ISTLSolverGPUISTL.hpp:136
int iterations() const override
Definition ISTLSolverGPUISTL.hpp:297
void eraseMatrix() override
Signals that the memory for the matrix internally in the solver could be erased.
Definition ISTLSolverGPUISTL.hpp:146
int numAvailableSolvers() const override
Get the number of available solvers.
Definition ISTLSolverGPUISTL.hpp:169
void setActiveSolver(int num) override
Set the active solver by its index.
Definition ISTLSolverGPUISTL.hpp:157
void prepare(const Matrix &M, Vector &b) override
Prepare the solver with the given matrix and right-hand side vector.
Definition ISTLSolverGPUISTL.hpp:197
FlexibleSolverWrapper is compilational trick to reduce compile time overhead.
Definition FlexibleSolverWrapper.hpp:45
typename Properties::Detail::GetPropImpl< TypeTag, Property >::type::type GetPropType
get the type alias defined in the property (equivalent to old macro GET_PROP_TYPE(....
Definition propertysystem.hh:233
PropertyTree setupPropertyTree(FlowLinearSolverParameters p, bool linearSolverMaxIterSet, bool linearSolverReductionSet)
Set up a property tree intended for FlexibleSolver by either reading the tree from a JSON file or cre...
Definition setupPropertyTree.cpp:182
bool IsSet(bool errorIfNotRegistered=true)
Returns true if a parameter has been specified at runtime, false otherwise.
Definition parametersystem.hpp:270
auto Get(bool errorIfNotRegistered=true)
Retrieve a runtime parameter.
Definition parametersystem.hpp:187
This class carries all parameters for the NewtonIterationBlackoilInterleaved class.
Definition FlowLinearSolverParameters.hpp:98