LiuZe
/
env


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645
							// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
//   this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
//   this list of conditions and the following disclaimer in the documentation
//   and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
//   used to endorse or promote products derived from this software without
//   specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: sameeragarwal@google.com (Sameer Agarwal)

#include "ceres/dense_cholesky.h"

#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "ceres/internal/config.h"
#include "ceres/iterative_refiner.h"

#ifndef CERES_NO_CUDA
#include "ceres/context_impl.h"
#include "ceres/cuda_kernels_vector_ops.h"
#include "cuda_runtime.h"
#include "cusolverDn.h"
#endif  // CERES_NO_CUDA

#ifndef CERES_NO_LAPACK

// C interface to the LAPACK Cholesky factorization and triangular solve.
extern "C" void dpotrf_(
    const char* uplo, const int* n, double* a, const int* lda, int* info);

extern "C" void dpotrs_(const char* uplo,
                        const int* n,
                        const int* nrhs,
                        const double* a,
                        const int* lda,
                        double* b,
                        const int* ldb,
                        int* info);

extern "C" void spotrf_(
    const char* uplo, const int* n, float* a, const int* lda, int* info);

extern "C" void spotrs_(const char* uplo,
                        const int* n,
                        const int* nrhs,
                        const float* a,
                        const int* lda,
                        float* b,
                        const int* ldb,
                        int* info);
#endif

namespace ceres::internal {

DenseCholesky::~DenseCholesky() = default;

std::unique_ptr<DenseCholesky> DenseCholesky::Create(
    const LinearSolver::Options& options) {
  std::unique_ptr<DenseCholesky> dense_cholesky;

  switch (options.dense_linear_algebra_library_type) {
    case EIGEN:
      // Eigen mixed precision solver not yet implemented.
      if (options.use_mixed_precision_solves) {
        dense_cholesky = std::make_unique<FloatEigenDenseCholesky>();
      } else {
        dense_cholesky = std::make_unique<EigenDenseCholesky>();
      }
      break;

    case LAPACK:
#ifndef CERES_NO_LAPACK
      // LAPACK mixed precision solver not yet implemented.
      if (options.use_mixed_precision_solves) {
        dense_cholesky = std::make_unique<FloatLAPACKDenseCholesky>();
      } else {
        dense_cholesky = std::make_unique<LAPACKDenseCholesky>();
      }
      break;
#else
      LOG(FATAL) << "Ceres was compiled without support for LAPACK.";
#endif

    case CUDA:
#ifndef CERES_NO_CUDA
      if (options.use_mixed_precision_solves) {
        dense_cholesky = CUDADenseCholeskyMixedPrecision::Create(options);
      } else {
        dense_cholesky = CUDADenseCholesky::Create(options);
      }
      break;
#else
      LOG(FATAL) << "Ceres was compiled without support for CUDA.";
#endif

    default:
      LOG(FATAL) << "Unknown dense linear algebra library type : "
                 << DenseLinearAlgebraLibraryTypeToString(
                        options.dense_linear_algebra_library_type);
  }

  if (options.max_num_refinement_iterations > 0) {
    auto refiner = std::make_unique<DenseIterativeRefiner>(
        options.max_num_refinement_iterations);
    dense_cholesky = std::make_unique<RefinedDenseCholesky>(
        std::move(dense_cholesky), std::move(refiner));
  }

  return dense_cholesky;
}

LinearSolverTerminationType DenseCholesky::FactorAndSolve(
    int num_cols,
    double* lhs,
    const double* rhs,
    double* solution,
    std::string* message) {
  LinearSolverTerminationType termination_type =
      Factorize(num_cols, lhs, message);
  if (termination_type == LinearSolverTerminationType::SUCCESS) {
    termination_type = Solve(rhs, solution, message);
  }
  return termination_type;
}

LinearSolverTerminationType EigenDenseCholesky::Factorize(
    int num_cols, double* lhs, std::string* message) {
  Eigen::Map<Eigen::MatrixXd> m(lhs, num_cols, num_cols);
  llt_ = std::make_unique<LLTType>(m);
  if (llt_->info() != Eigen::Success) {
    *message = "Eigen failure. Unable to perform dense Cholesky factorization.";
    return LinearSolverTerminationType::FAILURE;
  }

  *message = "Success.";
  return LinearSolverTerminationType::SUCCESS;
}

LinearSolverTerminationType EigenDenseCholesky::Solve(const double* rhs,
                                                      double* solution,
                                                      std::string* message) {
  if (llt_->info() != Eigen::Success) {
    *message = "Eigen failure. Unable to perform dense Cholesky factorization.";
    return LinearSolverTerminationType::FAILURE;
  }

  VectorRef(solution, llt_->cols()) =
      llt_->solve(ConstVectorRef(rhs, llt_->cols()));
  *message = "Success.";
  return LinearSolverTerminationType::SUCCESS;
}

LinearSolverTerminationType FloatEigenDenseCholesky::Factorize(
    int num_cols, double* lhs, std::string* message) {
  // TODO(sameeragarwal): Check if this causes a double allocation.
  lhs_ = Eigen::Map<Eigen::MatrixXd>(lhs, num_cols, num_cols).cast<float>();
  llt_ = std::make_unique<LLTType>(lhs_);
  if (llt_->info() != Eigen::Success) {
    *message = "Eigen failure. Unable to perform dense Cholesky factorization.";
    return LinearSolverTerminationType::FAILURE;
  }

  *message = "Success.";
  return LinearSolverTerminationType::SUCCESS;
}

LinearSolverTerminationType FloatEigenDenseCholesky::Solve(
    const double* rhs, double* solution, std::string* message) {
  if (llt_->info() != Eigen::Success) {
    *message = "Eigen failure. Unable to perform dense Cholesky factorization.";
    return LinearSolverTerminationType::FAILURE;
  }

  rhs_ = ConstVectorRef(rhs, llt_->cols()).cast<float>();
  solution_ = llt_->solve(rhs_);
  VectorRef(solution, llt_->cols()) = solution_.cast<double>();
  *message = "Success.";
  return LinearSolverTerminationType::SUCCESS;
}

#ifndef CERES_NO_LAPACK
LinearSolverTerminationType LAPACKDenseCholesky::Factorize(
    int num_cols, double* lhs, std::string* message) {
  lhs_ = lhs;
  num_cols_ = num_cols;

  const char uplo = 'L';
  int info = 0;
  dpotrf_(&uplo, &num_cols_, lhs_, &num_cols_, &info);

  if (info < 0) {
    termination_type_ = LinearSolverTerminationType::FATAL_ERROR;
    LOG(FATAL) << "Congratulations, you found a bug in Ceres. "
               << "Please report it. "
               << "LAPACK::dpotrf fatal error. "
               << "Argument: " << -info << " is invalid.";
  } else if (info > 0) {
    termination_type_ = LinearSolverTerminationType::FAILURE;
    *message = StringPrintf(
        "LAPACK::dpotrf numerical failure. "
        "The leading minor of order %d is not positive definite.",
        info);
  } else {
    termination_type_ = LinearSolverTerminationType::SUCCESS;
    *message = "Success.";
  }
  return termination_type_;
}

LinearSolverTerminationType LAPACKDenseCholesky::Solve(const double* rhs,
                                                       double* solution,
                                                       std::string* message) {
  const char uplo = 'L';
  const int nrhs = 1;
  int info = 0;

  VectorRef(solution, num_cols_) = ConstVectorRef(rhs, num_cols_);
  dpotrs_(
      &uplo, &num_cols_, &nrhs, lhs_, &num_cols_, solution, &num_cols_, &info);

  if (info < 0) {
    termination_type_ = LinearSolverTerminationType::FATAL_ERROR;
    LOG(FATAL) << "Congratulations, you found a bug in Ceres. "
               << "Please report it. "
               << "LAPACK::dpotrs fatal error. "
               << "Argument: " << -info << " is invalid.";
  }

  *message = "Success";
  termination_type_ = LinearSolverTerminationType::SUCCESS;

  return termination_type_;
}

LinearSolverTerminationType FloatLAPACKDenseCholesky::Factorize(
    int num_cols, double* lhs, std::string* message) {
  num_cols_ = num_cols;
  lhs_ = Eigen::Map<Eigen::MatrixXd>(lhs, num_cols, num_cols).cast<float>();

  const char uplo = 'L';
  int info = 0;
  spotrf_(&uplo, &num_cols_, lhs_.data(), &num_cols_, &info);

  if (info < 0) {
    termination_type_ = LinearSolverTerminationType::FATAL_ERROR;
    LOG(FATAL) << "Congratulations, you found a bug in Ceres. "
               << "Please report it. "
               << "LAPACK::spotrf fatal error. "
               << "Argument: " << -info << " is invalid.";
  } else if (info > 0) {
    termination_type_ = LinearSolverTerminationType::FAILURE;
    *message = StringPrintf(
        "LAPACK::spotrf numerical failure. "
        "The leading minor of order %d is not positive definite.",
        info);
  } else {
    termination_type_ = LinearSolverTerminationType::SUCCESS;
    *message = "Success.";
  }
  return termination_type_;
}

LinearSolverTerminationType FloatLAPACKDenseCholesky::Solve(
    const double* rhs, double* solution, std::string* message) {
  const char uplo = 'L';
  const int nrhs = 1;
  int info = 0;
  rhs_and_solution_ = ConstVectorRef(rhs, num_cols_).cast<float>();
  spotrs_(&uplo,
          &num_cols_,
          &nrhs,
          lhs_.data(),
          &num_cols_,
          rhs_and_solution_.data(),
          &num_cols_,
          &info);

  if (info < 0) {
    termination_type_ = LinearSolverTerminationType::FATAL_ERROR;
    LOG(FATAL) << "Congratulations, you found a bug in Ceres. "
               << "Please report it. "
               << "LAPACK::dpotrs fatal error. "
               << "Argument: " << -info << " is invalid.";
  }

  *message = "Success";
  termination_type_ = LinearSolverTerminationType::SUCCESS;
  VectorRef(solution, num_cols_) =
      rhs_and_solution_.head(num_cols_).cast<double>();
  return termination_type_;
}

#endif  // CERES_NO_LAPACK

RefinedDenseCholesky::RefinedDenseCholesky(
    std::unique_ptr<DenseCholesky> dense_cholesky,
    std::unique_ptr<DenseIterativeRefiner> iterative_refiner)
    : dense_cholesky_(std::move(dense_cholesky)),
      iterative_refiner_(std::move(iterative_refiner)) {}

RefinedDenseCholesky::~RefinedDenseCholesky() = default;

LinearSolverTerminationType RefinedDenseCholesky::Factorize(
    const int num_cols, double* lhs, std::string* message) {
  lhs_ = lhs;
  num_cols_ = num_cols;
  return dense_cholesky_->Factorize(num_cols, lhs, message);
}

LinearSolverTerminationType RefinedDenseCholesky::Solve(const double* rhs,
                                                        double* solution,
                                                        std::string* message) {
  CHECK(lhs_ != nullptr);
  auto termination_type = dense_cholesky_->Solve(rhs, solution, message);
  if (termination_type != LinearSolverTerminationType::SUCCESS) {
    return termination_type;
  }

  iterative_refiner_->Refine(
      num_cols_, lhs_, rhs, dense_cholesky_.get(), solution);
  return LinearSolverTerminationType::SUCCESS;
}

#ifndef CERES_NO_CUDA

CUDADenseCholesky::CUDADenseCholesky(ContextImpl* context)
    : context_(context),
      lhs_{context},
      rhs_{context},
      device_workspace_{context},
      error_(context, 1) {}

LinearSolverTerminationType CUDADenseCholesky::Factorize(int num_cols,
                                                         double* lhs,
                                                         std::string* message) {
  factorize_result_ = LinearSolverTerminationType::FATAL_ERROR;
  lhs_.Reserve(num_cols * num_cols);
  num_cols_ = num_cols;
  lhs_.CopyFromCpu(lhs, num_cols * num_cols);
  int device_workspace_size = 0;
  if (cusolverDnDpotrf_bufferSize(context_->cusolver_handle_,
                                  CUBLAS_FILL_MODE_LOWER,
                                  num_cols,
                                  lhs_.data(),
                                  num_cols,
                                  &device_workspace_size) !=
      CUSOLVER_STATUS_SUCCESS) {
    *message = "cuSolverDN::cusolverDnDpotrf_bufferSize failed.";
    return LinearSolverTerminationType::FATAL_ERROR;
  }
  device_workspace_.Reserve(device_workspace_size);
  if (cusolverDnDpotrf(context_->cusolver_handle_,
                       CUBLAS_FILL_MODE_LOWER,
                       num_cols,
                       lhs_.data(),
                       num_cols,
                       reinterpret_cast<double*>(device_workspace_.data()),
                       device_workspace_.size(),
                       error_.data()) != CUSOLVER_STATUS_SUCCESS) {
    *message = "cuSolverDN::cusolverDnDpotrf failed.";
    return LinearSolverTerminationType::FATAL_ERROR;
  }
  int error = 0;
  error_.CopyToCpu(&error, 1);
  if (error < 0) {
    LOG(FATAL) << "Congratulations, you found a bug in Ceres - "
               << "please report it. "
               << "cuSolverDN::cusolverDnXpotrf fatal error. "
               << "Argument: " << -error << " is invalid.";
    // The following line is unreachable, but return failure just to be
    // pedantic, since the compiler does not know that.
    return LinearSolverTerminationType::FATAL_ERROR;
  } else if (error > 0) {
    *message = StringPrintf(
        "cuSolverDN::cusolverDnDpotrf numerical failure. "
        "The leading minor of order %d is not positive definite.",
        error);
    factorize_result_ = LinearSolverTerminationType::FAILURE;
    return LinearSolverTerminationType::FAILURE;
  }
  *message = "Success";
  factorize_result_ = LinearSolverTerminationType::SUCCESS;
  return LinearSolverTerminationType::SUCCESS;
}

LinearSolverTerminationType CUDADenseCholesky::Solve(const double* rhs,
                                                     double* solution,
                                                     std::string* message) {
  if (factorize_result_ != LinearSolverTerminationType::SUCCESS) {
    *message = "Factorize did not complete successfully previously.";
    return factorize_result_;
  }
  rhs_.CopyFromCpu(rhs, num_cols_);
  if (cusolverDnDpotrs(context_->cusolver_handle_,
                       CUBLAS_FILL_MODE_LOWER,
                       num_cols_,
                       1,
                       lhs_.data(),
                       num_cols_,
                       rhs_.data(),
                       num_cols_,
                       error_.data()) != CUSOLVER_STATUS_SUCCESS) {
    *message = "cuSolverDN::cusolverDnDpotrs failed.";
    return LinearSolverTerminationType::FATAL_ERROR;
  }
  int error = 0;
  error_.CopyToCpu(&error, 1);
  if (error != 0) {
    LOG(FATAL) << "Congratulations, you found a bug in Ceres. "
               << "Please report it."
               << "cuSolverDN::cusolverDnDpotrs fatal error. "
               << "Argument: " << -error << " is invalid.";
  }
  rhs_.CopyToCpu(solution, num_cols_);
  *message = "Success";
  return LinearSolverTerminationType::SUCCESS;
}

std::unique_ptr<CUDADenseCholesky> CUDADenseCholesky::Create(
    const LinearSolver::Options& options) {
  if (options.dense_linear_algebra_library_type != CUDA ||
      options.context == nullptr || !options.context->IsCudaInitialized()) {
    return nullptr;
  }
  return std::unique_ptr<CUDADenseCholesky>(
      new CUDADenseCholesky(options.context));
}

std::unique_ptr<CUDADenseCholeskyMixedPrecision>
CUDADenseCholeskyMixedPrecision::Create(const LinearSolver::Options& options) {
  if (options.dense_linear_algebra_library_type != CUDA ||
      !options.use_mixed_precision_solves || options.context == nullptr ||
      !options.context->IsCudaInitialized()) {
    return nullptr;
  }
  return std::unique_ptr<CUDADenseCholeskyMixedPrecision>(
      new CUDADenseCholeskyMixedPrecision(
          options.context, options.max_num_refinement_iterations));
}

LinearSolverTerminationType
CUDADenseCholeskyMixedPrecision::CudaCholeskyFactorize(std::string* message) {
  int device_workspace_size = 0;
  if (cusolverDnSpotrf_bufferSize(context_->cusolver_handle_,
                                  CUBLAS_FILL_MODE_LOWER,
                                  num_cols_,
                                  lhs_fp32_.data(),
                                  num_cols_,
                                  &device_workspace_size) !=
      CUSOLVER_STATUS_SUCCESS) {
    *message = "cuSolverDN::cusolverDnSpotrf_bufferSize failed.";
    return LinearSolverTerminationType::FATAL_ERROR;
  }
  device_workspace_.Reserve(device_workspace_size);
  if (cusolverDnSpotrf(context_->cusolver_handle_,
                       CUBLAS_FILL_MODE_LOWER,
                       num_cols_,
                       lhs_fp32_.data(),
                       num_cols_,
                       device_workspace_.data(),
                       device_workspace_.size(),
                       error_.data()) != CUSOLVER_STATUS_SUCCESS) {
    *message = "cuSolverDN::cusolverDnSpotrf failed.";
    return LinearSolverTerminationType::FATAL_ERROR;
  }
  int error = 0;
  error_.CopyToCpu(&error, 1);
  if (error < 0) {
    LOG(FATAL) << "Congratulations, you found a bug in Ceres - "
               << "please report it. "
               << "cuSolverDN::cusolverDnSpotrf fatal error. "
               << "Argument: " << -error << " is invalid.";
    // The following line is unreachable, but return failure just to be
    // pedantic, since the compiler does not know that.
    return LinearSolverTerminationType::FATAL_ERROR;
  }
  if (error > 0) {
    *message = StringPrintf(
        "cuSolverDN::cusolverDnSpotrf numerical failure. "
        "The leading minor of order %d is not positive definite.",
        error);
    factorize_result_ = LinearSolverTerminationType::FAILURE;
    return LinearSolverTerminationType::FAILURE;
  }
  *message = "Success";
  return LinearSolverTerminationType::SUCCESS;
}

LinearSolverTerminationType CUDADenseCholeskyMixedPrecision::CudaCholeskySolve(
    std::string* message) {
  CHECK_EQ(cudaMemcpyAsync(correction_fp32_.data(),
                           residual_fp32_.data(),
                           num_cols_ * sizeof(float),
                           cudaMemcpyDeviceToDevice,
                           context_->DefaultStream()),
           cudaSuccess);
  if (cusolverDnSpotrs(context_->cusolver_handle_,
                       CUBLAS_FILL_MODE_LOWER,
                       num_cols_,
                       1,
                       lhs_fp32_.data(),
                       num_cols_,
                       correction_fp32_.data(),
                       num_cols_,
                       error_.data()) != CUSOLVER_STATUS_SUCCESS) {
    *message = "cuSolverDN::cusolverDnDpotrs failed.";
    return LinearSolverTerminationType::FATAL_ERROR;
  }
  int error = 0;
  error_.CopyToCpu(&error, 1);
  if (error != 0) {
    LOG(FATAL) << "Congratulations, you found a bug in Ceres. "
               << "Please report it."
               << "cuSolverDN::cusolverDnDpotrs fatal error. "
               << "Argument: " << -error << " is invalid.";
  }
  *message = "Success";
  return LinearSolverTerminationType::SUCCESS;
}

CUDADenseCholeskyMixedPrecision::CUDADenseCholeskyMixedPrecision(
    ContextImpl* context, int max_num_refinement_iterations)
    : context_(context),
      lhs_fp64_{context},
      rhs_fp64_{context},
      lhs_fp32_{context},
      device_workspace_{context},
      error_(context, 1),
      x_fp64_{context},
      correction_fp32_{context},
      residual_fp32_{context},
      residual_fp64_{context},
      max_num_refinement_iterations_(max_num_refinement_iterations) {}

LinearSolverTerminationType CUDADenseCholeskyMixedPrecision::Factorize(
    int num_cols, double* lhs, std::string* message) {
  num_cols_ = num_cols;

  // Copy fp64 version of lhs to GPU.
  lhs_fp64_.Reserve(num_cols * num_cols);
  lhs_fp64_.CopyFromCpu(lhs, num_cols * num_cols);

  // Create an fp32 copy of lhs, lhs_fp32.
  lhs_fp32_.Reserve(num_cols * num_cols);
  CudaFP64ToFP32(lhs_fp64_.data(),
                 lhs_fp32_.data(),
                 num_cols * num_cols,
                 context_->DefaultStream());

  // Factorize lhs_fp32.
  factorize_result_ = CudaCholeskyFactorize(message);
  return factorize_result_;
}

LinearSolverTerminationType CUDADenseCholeskyMixedPrecision::Solve(
    const double* rhs, double* solution, std::string* message) {
  // If factorization failed, return failure.
  if (factorize_result_ != LinearSolverTerminationType::SUCCESS) {
    *message = "Factorize did not complete successfully previously.";
    return factorize_result_;
  }

  // Reserve memory for all arrays.
  rhs_fp64_.Reserve(num_cols_);
  x_fp64_.Reserve(num_cols_);
  correction_fp32_.Reserve(num_cols_);
  residual_fp32_.Reserve(num_cols_);
  residual_fp64_.Reserve(num_cols_);

  // Initialize x = 0.
  CudaSetZeroFP64(x_fp64_.data(), num_cols_, context_->DefaultStream());

  // Initialize residual = rhs.
  rhs_fp64_.CopyFromCpu(rhs, num_cols_);
  residual_fp64_.CopyFromGPUArray(rhs_fp64_.data(), num_cols_);

  for (int i = 0; i <= max_num_refinement_iterations_; ++i) {
    // Cast residual from fp64 to fp32.
    CudaFP64ToFP32(residual_fp64_.data(),
                   residual_fp32_.data(),
                   num_cols_,
                   context_->DefaultStream());
    // [fp32] c = lhs^-1 * residual.
    auto result = CudaCholeskySolve(message);
    if (result != LinearSolverTerminationType::SUCCESS) {
      return result;
    }
    // [fp64] x += c.
    CudaDsxpy(x_fp64_.data(),
              correction_fp32_.data(),
              num_cols_,
              context_->DefaultStream());
    if (i < max_num_refinement_iterations_) {
      // [fp64] residual = rhs - lhs * x
      // This is done in two steps:
      // 1. [fp64] residual = rhs
      residual_fp64_.CopyFromGPUArray(rhs_fp64_.data(), num_cols_);
      // 2. [fp64] residual = residual - lhs * x
      double alpha = -1.0;
      double beta = 1.0;
      cublasDsymv(context_->cublas_handle_,
                  CUBLAS_FILL_MODE_LOWER,
                  num_cols_,
                  &alpha,
                  lhs_fp64_.data(),
                  num_cols_,
                  x_fp64_.data(),
                  1,
                  &beta,
                  residual_fp64_.data(),
                  1);
    }
  }
  x_fp64_.CopyToCpu(solution, num_cols_);
  *message = "Success.";
  return LinearSolverTerminationType::SUCCESS;
}

#endif  // CERES_NO_CUDA

}  // namespace ceres::internal