123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391 |
- // Ceres Solver - A fast non-linear least squares minimizer
- // Copyright 2015 Google Inc. All rights reserved.
- // http://ceres-solver.org/
- //
- // Redistribution and use in source and binary forms, with or without
- // modification, are permitted provided that the following conditions are met:
- //
- // * Redistributions of source code must retain the above copyright notice,
- // this list of conditions and the following disclaimer.
- // * Redistributions in binary form must reproduce the above copyright notice,
- // this list of conditions and the following disclaimer in the documentation
- // and/or other materials provided with the distribution.
- // * Neither the name of Google Inc. nor the names of its contributors may be
- // used to endorse or promote products derived from this software without
- // specific prior written permission.
- //
- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- // POSSIBILITY OF SUCH DAMAGE.
- //
- // Author: keir@google.com (Keir Mierle)
- #include "ceres/cgnr_solver.h"
- #include <memory>
- #include <utility>
- #include "ceres/block_jacobi_preconditioner.h"
- #include "ceres/conjugate_gradients_solver.h"
- #include "ceres/cuda_sparse_matrix.h"
- #include "ceres/cuda_vector.h"
- #include "ceres/internal/eigen.h"
- #include "ceres/linear_solver.h"
- #include "ceres/subset_preconditioner.h"
- #include "ceres/wall_time.h"
- #include "glog/logging.h"
- namespace ceres::internal {
- // A linear operator which takes a matrix A and a diagonal vector D and
- // performs products of the form
- //
- // (A^T A + D^T D)x
- //
- // This is used to implement iterative general sparse linear solving with
- // conjugate gradients, where A is the Jacobian and D is a regularizing
- // parameter. A brief proof that D^T D is the correct regularizer:
- //
- // Given a regularized least squares problem:
- //
- // min ||Ax - b||^2 + ||Dx||^2
- // x
- //
- // First expand into matrix notation:
- //
- // (Ax - b)^T (Ax - b) + xD^TDx
- //
- // Then multiply out to get:
- //
- // = xA^TAx - 2b^T Ax + b^Tb + xD^TDx
- //
- // Take the derivative:
- //
- // 0 = 2A^TAx - 2A^T b + 2 D^TDx
- // 0 = A^TAx - A^T b + D^TDx
- // 0 = (A^TA + D^TD)x - A^T b
- //
- // Thus, the symmetric system we need to solve for CGNR is
- //
- // Sx = z
- //
- // with S = A^TA + D^TD
- // and z = A^T b
- //
- // Note: This class is not thread safe, since it uses some temporary storage.
- class CERES_NO_EXPORT CgnrLinearOperator final
- : public ConjugateGradientsLinearOperator<Vector> {
- public:
- CgnrLinearOperator(const LinearOperator& A,
- const double* D,
- ContextImpl* context,
- int num_threads)
- : A_(A),
- D_(D),
- z_(Vector::Zero(A.num_rows())),
- context_(context),
- num_threads_(num_threads) {}
- void RightMultiplyAndAccumulate(const Vector& x, Vector& y) final {
- // z = Ax
- // y = y + Atz
- z_.setZero();
- A_.RightMultiplyAndAccumulate(x, z_, context_, num_threads_);
- A_.LeftMultiplyAndAccumulate(z_, y, context_, num_threads_);
- // y = y + DtDx
- if (D_ != nullptr) {
- int n = A_.num_cols();
- ParallelAssign(
- context_,
- num_threads_,
- y,
- y.array() + ConstVectorRef(D_, n).array().square() * x.array());
- }
- }
- private:
- const LinearOperator& A_;
- const double* D_;
- Vector z_;
- ContextImpl* context_;
- int num_threads_;
- };
- CgnrSolver::CgnrSolver(LinearSolver::Options options)
- : options_(std::move(options)) {
- if (options_.preconditioner_type != JACOBI &&
- options_.preconditioner_type != IDENTITY &&
- options_.preconditioner_type != SUBSET) {
- LOG(FATAL)
- << "Preconditioner = "
- << PreconditionerTypeToString(options_.preconditioner_type) << ". "
- << "Congratulations, you found a bug in Ceres. Please report it.";
- }
- }
- CgnrSolver::~CgnrSolver() {
- for (int i = 0; i < 4; ++i) {
- if (scratch_[i]) {
- delete scratch_[i];
- scratch_[i] = nullptr;
- }
- }
- }
- LinearSolver::Summary CgnrSolver::SolveImpl(
- BlockSparseMatrix* A,
- const double* b,
- const LinearSolver::PerSolveOptions& per_solve_options,
- double* x) {
- EventLogger event_logger("CgnrSolver::Solve");
- if (!preconditioner_) {
- Preconditioner::Options preconditioner_options;
- preconditioner_options.type = options_.preconditioner_type;
- preconditioner_options.subset_preconditioner_start_row_block =
- options_.subset_preconditioner_start_row_block;
- preconditioner_options.sparse_linear_algebra_library_type =
- options_.sparse_linear_algebra_library_type;
- preconditioner_options.ordering_type = options_.ordering_type;
- preconditioner_options.num_threads = options_.num_threads;
- preconditioner_options.context = options_.context;
- if (options_.preconditioner_type == JACOBI) {
- preconditioner_ = std::make_unique<BlockSparseJacobiPreconditioner>(
- preconditioner_options, *A);
- } else if (options_.preconditioner_type == SUBSET) {
- preconditioner_ =
- std::make_unique<SubsetPreconditioner>(preconditioner_options, *A);
- } else {
- preconditioner_ = std::make_unique<IdentityPreconditioner>(A->num_cols());
- }
- }
- preconditioner_->Update(*A, per_solve_options.D);
- ConjugateGradientsSolverOptions cg_options;
- cg_options.min_num_iterations = options_.min_num_iterations;
- cg_options.max_num_iterations = options_.max_num_iterations;
- cg_options.residual_reset_period = options_.residual_reset_period;
- cg_options.q_tolerance = per_solve_options.q_tolerance;
- cg_options.r_tolerance = per_solve_options.r_tolerance;
- cg_options.context = options_.context;
- cg_options.num_threads = options_.num_threads;
- // lhs = AtA + DtD
- CgnrLinearOperator lhs(
- *A, per_solve_options.D, options_.context, options_.num_threads);
- // rhs = Atb.
- Vector rhs(A->num_cols());
- rhs.setZero();
- A->LeftMultiplyAndAccumulate(
- b, rhs.data(), options_.context, options_.num_threads);
- cg_solution_ = Vector::Zero(A->num_cols());
- for (int i = 0; i < 4; ++i) {
- if (scratch_[i] == nullptr) {
- scratch_[i] = new Vector(A->num_cols());
- }
- }
- event_logger.AddEvent("Setup");
- LinearOperatorAdapter preconditioner(*preconditioner_);
- auto summary = ConjugateGradientsSolver(
- cg_options, lhs, rhs, preconditioner, scratch_, cg_solution_);
- VectorRef(x, A->num_cols()) = cg_solution_;
- event_logger.AddEvent("Solve");
- return summary;
- }
- #ifndef CERES_NO_CUDA
- // A linear operator which takes a matrix A and a diagonal vector D and
- // performs products of the form
- //
- // (A^T A + D^T D)x
- //
- // This is used to implement iterative general sparse linear solving with
- // conjugate gradients, where A is the Jacobian and D is a regularizing
- // parameter. A brief proof is included in cgnr_linear_operator.h.
- class CERES_NO_EXPORT CudaCgnrLinearOperator final
- : public ConjugateGradientsLinearOperator<CudaVector> {
- public:
- CudaCgnrLinearOperator(CudaSparseMatrix& A,
- const CudaVector& D,
- CudaVector* z)
- : A_(A), D_(D), z_(z) {}
- void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector& y) final {
- // z = Ax
- z_->SetZero();
- A_.RightMultiplyAndAccumulate(x, z_);
- // y = y + Atz
- // = y + AtAx
- A_.LeftMultiplyAndAccumulate(*z_, &y);
- // y = y + DtDx
- y.DtDxpy(D_, x);
- }
- private:
- CudaSparseMatrix& A_;
- const CudaVector& D_;
- CudaVector* z_ = nullptr;
- };
- class CERES_NO_EXPORT CudaIdentityPreconditioner final
- : public CudaPreconditioner {
- public:
- void Update(const CompressedRowSparseMatrix& A, const double* D) final {}
- void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector& y) final {
- y.Axpby(1.0, x, 1.0);
- }
- };
- // This class wraps the existing CPU Jacobi preconditioner, caches the structure
- // of the block diagonal, and for each CGNR solve updates the values on the CPU
- // and then copies them over to the GPU.
- class CERES_NO_EXPORT CudaJacobiPreconditioner final
- : public CudaPreconditioner {
- public:
- explicit CudaJacobiPreconditioner(Preconditioner::Options options,
- const CompressedRowSparseMatrix& A)
- : options_(std::move(options)),
- cpu_preconditioner_(options_, A),
- m_(options_.context, cpu_preconditioner_.matrix()) {}
- ~CudaJacobiPreconditioner() = default;
- void Update(const CompressedRowSparseMatrix& A, const double* D) final {
- cpu_preconditioner_.Update(A, D);
- m_.CopyValuesFromCpu(cpu_preconditioner_.matrix());
- }
- void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector& y) final {
- m_.RightMultiplyAndAccumulate(x, &y);
- }
- private:
- Preconditioner::Options options_;
- BlockCRSJacobiPreconditioner cpu_preconditioner_;
- CudaSparseMatrix m_;
- };
- CudaCgnrSolver::CudaCgnrSolver(LinearSolver::Options options)
- : options_(std::move(options)) {}
- CudaCgnrSolver::~CudaCgnrSolver() {
- for (int i = 0; i < 4; ++i) {
- if (scratch_[i]) {
- delete scratch_[i];
- scratch_[i] = nullptr;
- }
- }
- }
- std::unique_ptr<CudaCgnrSolver> CudaCgnrSolver::Create(
- LinearSolver::Options options, std::string* error) {
- CHECK(error != nullptr);
- if (options.preconditioner_type != IDENTITY &&
- options.preconditioner_type != JACOBI) {
- *error =
- "CudaCgnrSolver does not support preconditioner type " +
- std::string(PreconditionerTypeToString(options.preconditioner_type)) +
- ". ";
- return nullptr;
- }
- CHECK(options.context->IsCudaInitialized())
- << "CudaCgnrSolver requires CUDA initialization.";
- auto solver = std::make_unique<CudaCgnrSolver>(options);
- return solver;
- }
- void CudaCgnrSolver::CpuToGpuTransfer(const CompressedRowSparseMatrix& A,
- const double* b,
- const double* D) {
- if (A_ == nullptr) {
- // Assume structure is not cached, do an initialization and structural copy.
- A_ = std::make_unique<CudaSparseMatrix>(options_.context, A);
- b_ = std::make_unique<CudaVector>(options_.context, A.num_rows());
- x_ = std::make_unique<CudaVector>(options_.context, A.num_cols());
- Atb_ = std::make_unique<CudaVector>(options_.context, A.num_cols());
- Ax_ = std::make_unique<CudaVector>(options_.context, A.num_rows());
- D_ = std::make_unique<CudaVector>(options_.context, A.num_cols());
- Preconditioner::Options preconditioner_options;
- preconditioner_options.type = options_.preconditioner_type;
- preconditioner_options.subset_preconditioner_start_row_block =
- options_.subset_preconditioner_start_row_block;
- preconditioner_options.sparse_linear_algebra_library_type =
- options_.sparse_linear_algebra_library_type;
- preconditioner_options.ordering_type = options_.ordering_type;
- preconditioner_options.num_threads = options_.num_threads;
- preconditioner_options.context = options_.context;
- if (options_.preconditioner_type == JACOBI) {
- preconditioner_ =
- std::make_unique<CudaJacobiPreconditioner>(preconditioner_options, A);
- } else {
- preconditioner_ = std::make_unique<CudaIdentityPreconditioner>();
- }
- for (int i = 0; i < 4; ++i) {
- scratch_[i] = new CudaVector(options_.context, A.num_cols());
- }
- } else {
- // Assume structure is cached, do a value copy.
- A_->CopyValuesFromCpu(A);
- }
- b_->CopyFromCpu(ConstVectorRef(b, A.num_rows()));
- D_->CopyFromCpu(ConstVectorRef(D, A.num_cols()));
- }
- LinearSolver::Summary CudaCgnrSolver::SolveImpl(
- CompressedRowSparseMatrix* A,
- const double* b,
- const LinearSolver::PerSolveOptions& per_solve_options,
- double* x) {
- EventLogger event_logger("CudaCgnrSolver::Solve");
- LinearSolver::Summary summary;
- summary.num_iterations = 0;
- summary.termination_type = LinearSolverTerminationType::FATAL_ERROR;
- CpuToGpuTransfer(*A, b, per_solve_options.D);
- event_logger.AddEvent("CPU to GPU Transfer");
- preconditioner_->Update(*A, per_solve_options.D);
- event_logger.AddEvent("Preconditioner Update");
- // Form z = Atb.
- Atb_->SetZero();
- A_->LeftMultiplyAndAccumulate(*b_, Atb_.get());
- // Solve (AtA + DtD)x = z (= Atb).
- x_->SetZero();
- CudaCgnrLinearOperator lhs(*A_, *D_, Ax_.get());
- event_logger.AddEvent("Setup");
- ConjugateGradientsSolverOptions cg_options;
- cg_options.min_num_iterations = options_.min_num_iterations;
- cg_options.max_num_iterations = options_.max_num_iterations;
- cg_options.residual_reset_period = options_.residual_reset_period;
- cg_options.q_tolerance = per_solve_options.q_tolerance;
- cg_options.r_tolerance = per_solve_options.r_tolerance;
- summary = ConjugateGradientsSolver(
- cg_options, lhs, *Atb_, *preconditioner_, scratch_, *x_);
- x_->CopyTo(x);
- event_logger.AddEvent("Solve");
- return summary;
- }
- #endif // CERES_NO_CUDA
- } // namespace ceres::internal
|