123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645 |
- // Ceres Solver - A fast non-linear least squares minimizer
- // Copyright 2022 Google Inc. All rights reserved.
- // http://ceres-solver.org/
- //
- // Redistribution and use in source and binary forms, with or without
- // modification, are permitted provided that the following conditions are met:
- //
- // * Redistributions of source code must retain the above copyright notice,
- // this list of conditions and the following disclaimer.
- // * Redistributions in binary form must reproduce the above copyright notice,
- // this list of conditions and the following disclaimer in the documentation
- // and/or other materials provided with the distribution.
- // * Neither the name of Google Inc. nor the names of its contributors may be
- // used to endorse or promote products derived from this software without
- // specific prior written permission.
- //
- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- // POSSIBILITY OF SUCH DAMAGE.
- //
- // Author: sameeragarwal@google.com (Sameer Agarwal)
- #include "ceres/dense_cholesky.h"
- #include <algorithm>
- #include <memory>
- #include <string>
- #include <utility>
- #include <vector>
- #include "ceres/internal/config.h"
- #include "ceres/iterative_refiner.h"
- #ifndef CERES_NO_CUDA
- #include "ceres/context_impl.h"
- #include "ceres/cuda_kernels_vector_ops.h"
- #include "cuda_runtime.h"
- #include "cusolverDn.h"
- #endif // CERES_NO_CUDA
- #ifndef CERES_NO_LAPACK
- // C interface to the LAPACK Cholesky factorization and triangular solve.
- extern "C" void dpotrf_(
- const char* uplo, const int* n, double* a, const int* lda, int* info);
- extern "C" void dpotrs_(const char* uplo,
- const int* n,
- const int* nrhs,
- const double* a,
- const int* lda,
- double* b,
- const int* ldb,
- int* info);
- extern "C" void spotrf_(
- const char* uplo, const int* n, float* a, const int* lda, int* info);
- extern "C" void spotrs_(const char* uplo,
- const int* n,
- const int* nrhs,
- const float* a,
- const int* lda,
- float* b,
- const int* ldb,
- int* info);
- #endif
- namespace ceres::internal {
- DenseCholesky::~DenseCholesky() = default;
- std::unique_ptr<DenseCholesky> DenseCholesky::Create(
- const LinearSolver::Options& options) {
- std::unique_ptr<DenseCholesky> dense_cholesky;
- switch (options.dense_linear_algebra_library_type) {
- case EIGEN:
- // Eigen mixed precision solver not yet implemented.
- if (options.use_mixed_precision_solves) {
- dense_cholesky = std::make_unique<FloatEigenDenseCholesky>();
- } else {
- dense_cholesky = std::make_unique<EigenDenseCholesky>();
- }
- break;
- case LAPACK:
- #ifndef CERES_NO_LAPACK
- // LAPACK mixed precision solver not yet implemented.
- if (options.use_mixed_precision_solves) {
- dense_cholesky = std::make_unique<FloatLAPACKDenseCholesky>();
- } else {
- dense_cholesky = std::make_unique<LAPACKDenseCholesky>();
- }
- break;
- #else
- LOG(FATAL) << "Ceres was compiled without support for LAPACK.";
- #endif
- case CUDA:
- #ifndef CERES_NO_CUDA
- if (options.use_mixed_precision_solves) {
- dense_cholesky = CUDADenseCholeskyMixedPrecision::Create(options);
- } else {
- dense_cholesky = CUDADenseCholesky::Create(options);
- }
- break;
- #else
- LOG(FATAL) << "Ceres was compiled without support for CUDA.";
- #endif
- default:
- LOG(FATAL) << "Unknown dense linear algebra library type : "
- << DenseLinearAlgebraLibraryTypeToString(
- options.dense_linear_algebra_library_type);
- }
- if (options.max_num_refinement_iterations > 0) {
- auto refiner = std::make_unique<DenseIterativeRefiner>(
- options.max_num_refinement_iterations);
- dense_cholesky = std::make_unique<RefinedDenseCholesky>(
- std::move(dense_cholesky), std::move(refiner));
- }
- return dense_cholesky;
- }
- LinearSolverTerminationType DenseCholesky::FactorAndSolve(
- int num_cols,
- double* lhs,
- const double* rhs,
- double* solution,
- std::string* message) {
- LinearSolverTerminationType termination_type =
- Factorize(num_cols, lhs, message);
- if (termination_type == LinearSolverTerminationType::SUCCESS) {
- termination_type = Solve(rhs, solution, message);
- }
- return termination_type;
- }
- LinearSolverTerminationType EigenDenseCholesky::Factorize(
- int num_cols, double* lhs, std::string* message) {
- Eigen::Map<Eigen::MatrixXd> m(lhs, num_cols, num_cols);
- llt_ = std::make_unique<LLTType>(m);
- if (llt_->info() != Eigen::Success) {
- *message = "Eigen failure. Unable to perform dense Cholesky factorization.";
- return LinearSolverTerminationType::FAILURE;
- }
- *message = "Success.";
- return LinearSolverTerminationType::SUCCESS;
- }
- LinearSolverTerminationType EigenDenseCholesky::Solve(const double* rhs,
- double* solution,
- std::string* message) {
- if (llt_->info() != Eigen::Success) {
- *message = "Eigen failure. Unable to perform dense Cholesky factorization.";
- return LinearSolverTerminationType::FAILURE;
- }
- VectorRef(solution, llt_->cols()) =
- llt_->solve(ConstVectorRef(rhs, llt_->cols()));
- *message = "Success.";
- return LinearSolverTerminationType::SUCCESS;
- }
- LinearSolverTerminationType FloatEigenDenseCholesky::Factorize(
- int num_cols, double* lhs, std::string* message) {
- // TODO(sameeragarwal): Check if this causes a double allocation.
- lhs_ = Eigen::Map<Eigen::MatrixXd>(lhs, num_cols, num_cols).cast<float>();
- llt_ = std::make_unique<LLTType>(lhs_);
- if (llt_->info() != Eigen::Success) {
- *message = "Eigen failure. Unable to perform dense Cholesky factorization.";
- return LinearSolverTerminationType::FAILURE;
- }
- *message = "Success.";
- return LinearSolverTerminationType::SUCCESS;
- }
- LinearSolverTerminationType FloatEigenDenseCholesky::Solve(
- const double* rhs, double* solution, std::string* message) {
- if (llt_->info() != Eigen::Success) {
- *message = "Eigen failure. Unable to perform dense Cholesky factorization.";
- return LinearSolverTerminationType::FAILURE;
- }
- rhs_ = ConstVectorRef(rhs, llt_->cols()).cast<float>();
- solution_ = llt_->solve(rhs_);
- VectorRef(solution, llt_->cols()) = solution_.cast<double>();
- *message = "Success.";
- return LinearSolverTerminationType::SUCCESS;
- }
- #ifndef CERES_NO_LAPACK
- LinearSolverTerminationType LAPACKDenseCholesky::Factorize(
- int num_cols, double* lhs, std::string* message) {
- lhs_ = lhs;
- num_cols_ = num_cols;
- const char uplo = 'L';
- int info = 0;
- dpotrf_(&uplo, &num_cols_, lhs_, &num_cols_, &info);
- if (info < 0) {
- termination_type_ = LinearSolverTerminationType::FATAL_ERROR;
- LOG(FATAL) << "Congratulations, you found a bug in Ceres. "
- << "Please report it. "
- << "LAPACK::dpotrf fatal error. "
- << "Argument: " << -info << " is invalid.";
- } else if (info > 0) {
- termination_type_ = LinearSolverTerminationType::FAILURE;
- *message = StringPrintf(
- "LAPACK::dpotrf numerical failure. "
- "The leading minor of order %d is not positive definite.",
- info);
- } else {
- termination_type_ = LinearSolverTerminationType::SUCCESS;
- *message = "Success.";
- }
- return termination_type_;
- }
- LinearSolverTerminationType LAPACKDenseCholesky::Solve(const double* rhs,
- double* solution,
- std::string* message) {
- const char uplo = 'L';
- const int nrhs = 1;
- int info = 0;
- VectorRef(solution, num_cols_) = ConstVectorRef(rhs, num_cols_);
- dpotrs_(
- &uplo, &num_cols_, &nrhs, lhs_, &num_cols_, solution, &num_cols_, &info);
- if (info < 0) {
- termination_type_ = LinearSolverTerminationType::FATAL_ERROR;
- LOG(FATAL) << "Congratulations, you found a bug in Ceres. "
- << "Please report it. "
- << "LAPACK::dpotrs fatal error. "
- << "Argument: " << -info << " is invalid.";
- }
- *message = "Success";
- termination_type_ = LinearSolverTerminationType::SUCCESS;
- return termination_type_;
- }
- LinearSolverTerminationType FloatLAPACKDenseCholesky::Factorize(
- int num_cols, double* lhs, std::string* message) {
- num_cols_ = num_cols;
- lhs_ = Eigen::Map<Eigen::MatrixXd>(lhs, num_cols, num_cols).cast<float>();
- const char uplo = 'L';
- int info = 0;
- spotrf_(&uplo, &num_cols_, lhs_.data(), &num_cols_, &info);
- if (info < 0) {
- termination_type_ = LinearSolverTerminationType::FATAL_ERROR;
- LOG(FATAL) << "Congratulations, you found a bug in Ceres. "
- << "Please report it. "
- << "LAPACK::spotrf fatal error. "
- << "Argument: " << -info << " is invalid.";
- } else if (info > 0) {
- termination_type_ = LinearSolverTerminationType::FAILURE;
- *message = StringPrintf(
- "LAPACK::spotrf numerical failure. "
- "The leading minor of order %d is not positive definite.",
- info);
- } else {
- termination_type_ = LinearSolverTerminationType::SUCCESS;
- *message = "Success.";
- }
- return termination_type_;
- }
- LinearSolverTerminationType FloatLAPACKDenseCholesky::Solve(
- const double* rhs, double* solution, std::string* message) {
- const char uplo = 'L';
- const int nrhs = 1;
- int info = 0;
- rhs_and_solution_ = ConstVectorRef(rhs, num_cols_).cast<float>();
- spotrs_(&uplo,
- &num_cols_,
- &nrhs,
- lhs_.data(),
- &num_cols_,
- rhs_and_solution_.data(),
- &num_cols_,
- &info);
- if (info < 0) {
- termination_type_ = LinearSolverTerminationType::FATAL_ERROR;
- LOG(FATAL) << "Congratulations, you found a bug in Ceres. "
- << "Please report it. "
- << "LAPACK::dpotrs fatal error. "
- << "Argument: " << -info << " is invalid.";
- }
- *message = "Success";
- termination_type_ = LinearSolverTerminationType::SUCCESS;
- VectorRef(solution, num_cols_) =
- rhs_and_solution_.head(num_cols_).cast<double>();
- return termination_type_;
- }
- #endif // CERES_NO_LAPACK
- RefinedDenseCholesky::RefinedDenseCholesky(
- std::unique_ptr<DenseCholesky> dense_cholesky,
- std::unique_ptr<DenseIterativeRefiner> iterative_refiner)
- : dense_cholesky_(std::move(dense_cholesky)),
- iterative_refiner_(std::move(iterative_refiner)) {}
- RefinedDenseCholesky::~RefinedDenseCholesky() = default;
- LinearSolverTerminationType RefinedDenseCholesky::Factorize(
- const int num_cols, double* lhs, std::string* message) {
- lhs_ = lhs;
- num_cols_ = num_cols;
- return dense_cholesky_->Factorize(num_cols, lhs, message);
- }
- LinearSolverTerminationType RefinedDenseCholesky::Solve(const double* rhs,
- double* solution,
- std::string* message) {
- CHECK(lhs_ != nullptr);
- auto termination_type = dense_cholesky_->Solve(rhs, solution, message);
- if (termination_type != LinearSolverTerminationType::SUCCESS) {
- return termination_type;
- }
- iterative_refiner_->Refine(
- num_cols_, lhs_, rhs, dense_cholesky_.get(), solution);
- return LinearSolverTerminationType::SUCCESS;
- }
- #ifndef CERES_NO_CUDA
- CUDADenseCholesky::CUDADenseCholesky(ContextImpl* context)
- : context_(context),
- lhs_{context},
- rhs_{context},
- device_workspace_{context},
- error_(context, 1) {}
- LinearSolverTerminationType CUDADenseCholesky::Factorize(int num_cols,
- double* lhs,
- std::string* message) {
- factorize_result_ = LinearSolverTerminationType::FATAL_ERROR;
- lhs_.Reserve(num_cols * num_cols);
- num_cols_ = num_cols;
- lhs_.CopyFromCpu(lhs, num_cols * num_cols);
- int device_workspace_size = 0;
- if (cusolverDnDpotrf_bufferSize(context_->cusolver_handle_,
- CUBLAS_FILL_MODE_LOWER,
- num_cols,
- lhs_.data(),
- num_cols,
- &device_workspace_size) !=
- CUSOLVER_STATUS_SUCCESS) {
- *message = "cuSolverDN::cusolverDnDpotrf_bufferSize failed.";
- return LinearSolverTerminationType::FATAL_ERROR;
- }
- device_workspace_.Reserve(device_workspace_size);
- if (cusolverDnDpotrf(context_->cusolver_handle_,
- CUBLAS_FILL_MODE_LOWER,
- num_cols,
- lhs_.data(),
- num_cols,
- reinterpret_cast<double*>(device_workspace_.data()),
- device_workspace_.size(),
- error_.data()) != CUSOLVER_STATUS_SUCCESS) {
- *message = "cuSolverDN::cusolverDnDpotrf failed.";
- return LinearSolverTerminationType::FATAL_ERROR;
- }
- int error = 0;
- error_.CopyToCpu(&error, 1);
- if (error < 0) {
- LOG(FATAL) << "Congratulations, you found a bug in Ceres - "
- << "please report it. "
- << "cuSolverDN::cusolverDnXpotrf fatal error. "
- << "Argument: " << -error << " is invalid.";
- // The following line is unreachable, but return failure just to be
- // pedantic, since the compiler does not know that.
- return LinearSolverTerminationType::FATAL_ERROR;
- } else if (error > 0) {
- *message = StringPrintf(
- "cuSolverDN::cusolverDnDpotrf numerical failure. "
- "The leading minor of order %d is not positive definite.",
- error);
- factorize_result_ = LinearSolverTerminationType::FAILURE;
- return LinearSolverTerminationType::FAILURE;
- }
- *message = "Success";
- factorize_result_ = LinearSolverTerminationType::SUCCESS;
- return LinearSolverTerminationType::SUCCESS;
- }
- LinearSolverTerminationType CUDADenseCholesky::Solve(const double* rhs,
- double* solution,
- std::string* message) {
- if (factorize_result_ != LinearSolverTerminationType::SUCCESS) {
- *message = "Factorize did not complete successfully previously.";
- return factorize_result_;
- }
- rhs_.CopyFromCpu(rhs, num_cols_);
- if (cusolverDnDpotrs(context_->cusolver_handle_,
- CUBLAS_FILL_MODE_LOWER,
- num_cols_,
- 1,
- lhs_.data(),
- num_cols_,
- rhs_.data(),
- num_cols_,
- error_.data()) != CUSOLVER_STATUS_SUCCESS) {
- *message = "cuSolverDN::cusolverDnDpotrs failed.";
- return LinearSolverTerminationType::FATAL_ERROR;
- }
- int error = 0;
- error_.CopyToCpu(&error, 1);
- if (error != 0) {
- LOG(FATAL) << "Congratulations, you found a bug in Ceres. "
- << "Please report it."
- << "cuSolverDN::cusolverDnDpotrs fatal error. "
- << "Argument: " << -error << " is invalid.";
- }
- rhs_.CopyToCpu(solution, num_cols_);
- *message = "Success";
- return LinearSolverTerminationType::SUCCESS;
- }
- std::unique_ptr<CUDADenseCholesky> CUDADenseCholesky::Create(
- const LinearSolver::Options& options) {
- if (options.dense_linear_algebra_library_type != CUDA ||
- options.context == nullptr || !options.context->IsCudaInitialized()) {
- return nullptr;
- }
- return std::unique_ptr<CUDADenseCholesky>(
- new CUDADenseCholesky(options.context));
- }
- std::unique_ptr<CUDADenseCholeskyMixedPrecision>
- CUDADenseCholeskyMixedPrecision::Create(const LinearSolver::Options& options) {
- if (options.dense_linear_algebra_library_type != CUDA ||
- !options.use_mixed_precision_solves || options.context == nullptr ||
- !options.context->IsCudaInitialized()) {
- return nullptr;
- }
- return std::unique_ptr<CUDADenseCholeskyMixedPrecision>(
- new CUDADenseCholeskyMixedPrecision(
- options.context, options.max_num_refinement_iterations));
- }
- LinearSolverTerminationType
- CUDADenseCholeskyMixedPrecision::CudaCholeskyFactorize(std::string* message) {
- int device_workspace_size = 0;
- if (cusolverDnSpotrf_bufferSize(context_->cusolver_handle_,
- CUBLAS_FILL_MODE_LOWER,
- num_cols_,
- lhs_fp32_.data(),
- num_cols_,
- &device_workspace_size) !=
- CUSOLVER_STATUS_SUCCESS) {
- *message = "cuSolverDN::cusolverDnSpotrf_bufferSize failed.";
- return LinearSolverTerminationType::FATAL_ERROR;
- }
- device_workspace_.Reserve(device_workspace_size);
- if (cusolverDnSpotrf(context_->cusolver_handle_,
- CUBLAS_FILL_MODE_LOWER,
- num_cols_,
- lhs_fp32_.data(),
- num_cols_,
- device_workspace_.data(),
- device_workspace_.size(),
- error_.data()) != CUSOLVER_STATUS_SUCCESS) {
- *message = "cuSolverDN::cusolverDnSpotrf failed.";
- return LinearSolverTerminationType::FATAL_ERROR;
- }
- int error = 0;
- error_.CopyToCpu(&error, 1);
- if (error < 0) {
- LOG(FATAL) << "Congratulations, you found a bug in Ceres - "
- << "please report it. "
- << "cuSolverDN::cusolverDnSpotrf fatal error. "
- << "Argument: " << -error << " is invalid.";
- // The following line is unreachable, but return failure just to be
- // pedantic, since the compiler does not know that.
- return LinearSolverTerminationType::FATAL_ERROR;
- }
- if (error > 0) {
- *message = StringPrintf(
- "cuSolverDN::cusolverDnSpotrf numerical failure. "
- "The leading minor of order %d is not positive definite.",
- error);
- factorize_result_ = LinearSolverTerminationType::FAILURE;
- return LinearSolverTerminationType::FAILURE;
- }
- *message = "Success";
- return LinearSolverTerminationType::SUCCESS;
- }
- LinearSolverTerminationType CUDADenseCholeskyMixedPrecision::CudaCholeskySolve(
- std::string* message) {
- CHECK_EQ(cudaMemcpyAsync(correction_fp32_.data(),
- residual_fp32_.data(),
- num_cols_ * sizeof(float),
- cudaMemcpyDeviceToDevice,
- context_->DefaultStream()),
- cudaSuccess);
- if (cusolverDnSpotrs(context_->cusolver_handle_,
- CUBLAS_FILL_MODE_LOWER,
- num_cols_,
- 1,
- lhs_fp32_.data(),
- num_cols_,
- correction_fp32_.data(),
- num_cols_,
- error_.data()) != CUSOLVER_STATUS_SUCCESS) {
- *message = "cuSolverDN::cusolverDnDpotrs failed.";
- return LinearSolverTerminationType::FATAL_ERROR;
- }
- int error = 0;
- error_.CopyToCpu(&error, 1);
- if (error != 0) {
- LOG(FATAL) << "Congratulations, you found a bug in Ceres. "
- << "Please report it."
- << "cuSolverDN::cusolverDnDpotrs fatal error. "
- << "Argument: " << -error << " is invalid.";
- }
- *message = "Success";
- return LinearSolverTerminationType::SUCCESS;
- }
- CUDADenseCholeskyMixedPrecision::CUDADenseCholeskyMixedPrecision(
- ContextImpl* context, int max_num_refinement_iterations)
- : context_(context),
- lhs_fp64_{context},
- rhs_fp64_{context},
- lhs_fp32_{context},
- device_workspace_{context},
- error_(context, 1),
- x_fp64_{context},
- correction_fp32_{context},
- residual_fp32_{context},
- residual_fp64_{context},
- max_num_refinement_iterations_(max_num_refinement_iterations) {}
- LinearSolverTerminationType CUDADenseCholeskyMixedPrecision::Factorize(
- int num_cols, double* lhs, std::string* message) {
- num_cols_ = num_cols;
- // Copy fp64 version of lhs to GPU.
- lhs_fp64_.Reserve(num_cols * num_cols);
- lhs_fp64_.CopyFromCpu(lhs, num_cols * num_cols);
- // Create an fp32 copy of lhs, lhs_fp32.
- lhs_fp32_.Reserve(num_cols * num_cols);
- CudaFP64ToFP32(lhs_fp64_.data(),
- lhs_fp32_.data(),
- num_cols * num_cols,
- context_->DefaultStream());
- // Factorize lhs_fp32.
- factorize_result_ = CudaCholeskyFactorize(message);
- return factorize_result_;
- }
- LinearSolverTerminationType CUDADenseCholeskyMixedPrecision::Solve(
- const double* rhs, double* solution, std::string* message) {
- // If factorization failed, return failure.
- if (factorize_result_ != LinearSolverTerminationType::SUCCESS) {
- *message = "Factorize did not complete successfully previously.";
- return factorize_result_;
- }
- // Reserve memory for all arrays.
- rhs_fp64_.Reserve(num_cols_);
- x_fp64_.Reserve(num_cols_);
- correction_fp32_.Reserve(num_cols_);
- residual_fp32_.Reserve(num_cols_);
- residual_fp64_.Reserve(num_cols_);
- // Initialize x = 0.
- CudaSetZeroFP64(x_fp64_.data(), num_cols_, context_->DefaultStream());
- // Initialize residual = rhs.
- rhs_fp64_.CopyFromCpu(rhs, num_cols_);
- residual_fp64_.CopyFromGPUArray(rhs_fp64_.data(), num_cols_);
- for (int i = 0; i <= max_num_refinement_iterations_; ++i) {
- // Cast residual from fp64 to fp32.
- CudaFP64ToFP32(residual_fp64_.data(),
- residual_fp32_.data(),
- num_cols_,
- context_->DefaultStream());
- // [fp32] c = lhs^-1 * residual.
- auto result = CudaCholeskySolve(message);
- if (result != LinearSolverTerminationType::SUCCESS) {
- return result;
- }
- // [fp64] x += c.
- CudaDsxpy(x_fp64_.data(),
- correction_fp32_.data(),
- num_cols_,
- context_->DefaultStream());
- if (i < max_num_refinement_iterations_) {
- // [fp64] residual = rhs - lhs * x
- // This is done in two steps:
- // 1. [fp64] residual = rhs
- residual_fp64_.CopyFromGPUArray(rhs_fp64_.data(), num_cols_);
- // 2. [fp64] residual = residual - lhs * x
- double alpha = -1.0;
- double beta = 1.0;
- cublasDsymv(context_->cublas_handle_,
- CUBLAS_FILL_MODE_LOWER,
- num_cols_,
- &alpha,
- lhs_fp64_.data(),
- num_cols_,
- x_fp64_.data(),
- 1,
- &beta,
- residual_fp64_.data(),
- 1);
- }
- }
- x_fp64_.CopyToCpu(solution, num_cols_);
- *message = "Success.";
- return LinearSolverTerminationType::SUCCESS;
- }
- #endif // CERES_NO_CUDA
- } // namespace ceres::internal
|