cgnr_solver.cc 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. // Ceres Solver - A fast non-linear least squares minimizer
  2. // Copyright 2015 Google Inc. All rights reserved.
  3. // http://ceres-solver.org/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are met:
  7. //
  8. // * Redistributions of source code must retain the above copyright notice,
  9. // this list of conditions and the following disclaimer.
  10. // * Redistributions in binary form must reproduce the above copyright notice,
  11. // this list of conditions and the following disclaimer in the documentation
  12. // and/or other materials provided with the distribution.
  13. // * Neither the name of Google Inc. nor the names of its contributors may be
  14. // used to endorse or promote products derived from this software without
  15. // specific prior written permission.
  16. //
  17. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  18. // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  21. // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22. // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23. // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  24. // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  25. // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  26. // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  27. // POSSIBILITY OF SUCH DAMAGE.
  28. //
  29. // Author: keir@google.com (Keir Mierle)
  30. #include "ceres/cgnr_solver.h"
  31. #include <memory>
  32. #include <utility>
  33. #include "ceres/block_jacobi_preconditioner.h"
  34. #include "ceres/conjugate_gradients_solver.h"
  35. #include "ceres/cuda_sparse_matrix.h"
  36. #include "ceres/cuda_vector.h"
  37. #include "ceres/internal/eigen.h"
  38. #include "ceres/linear_solver.h"
  39. #include "ceres/subset_preconditioner.h"
  40. #include "ceres/wall_time.h"
  41. #include "glog/logging.h"
  42. namespace ceres::internal {
  43. // A linear operator which takes a matrix A and a diagonal vector D and
  44. // performs products of the form
  45. //
  46. // (A^T A + D^T D)x
  47. //
  48. // This is used to implement iterative general sparse linear solving with
  49. // conjugate gradients, where A is the Jacobian and D is a regularizing
  50. // parameter. A brief proof that D^T D is the correct regularizer:
  51. //
  52. // Given a regularized least squares problem:
  53. //
  54. // min ||Ax - b||^2 + ||Dx||^2
  55. // x
  56. //
  57. // First expand into matrix notation:
  58. //
  59. // (Ax - b)^T (Ax - b) + xD^TDx
  60. //
  61. // Then multiply out to get:
  62. //
  63. // = xA^TAx - 2b^T Ax + b^Tb + xD^TDx
  64. //
  65. // Take the derivative:
  66. //
  67. // 0 = 2A^TAx - 2A^T b + 2 D^TDx
  68. // 0 = A^TAx - A^T b + D^TDx
  69. // 0 = (A^TA + D^TD)x - A^T b
  70. //
  71. // Thus, the symmetric system we need to solve for CGNR is
  72. //
  73. // Sx = z
  74. //
  75. // with S = A^TA + D^TD
  76. // and z = A^T b
  77. //
  78. // Note: This class is not thread safe, since it uses some temporary storage.
  79. class CERES_NO_EXPORT CgnrLinearOperator final
  80. : public ConjugateGradientsLinearOperator<Vector> {
  81. public:
  82. CgnrLinearOperator(const LinearOperator& A,
  83. const double* D,
  84. ContextImpl* context,
  85. int num_threads)
  86. : A_(A),
  87. D_(D),
  88. z_(Vector::Zero(A.num_rows())),
  89. context_(context),
  90. num_threads_(num_threads) {}
  91. void RightMultiplyAndAccumulate(const Vector& x, Vector& y) final {
  92. // z = Ax
  93. // y = y + Atz
  94. z_.setZero();
  95. A_.RightMultiplyAndAccumulate(x, z_, context_, num_threads_);
  96. A_.LeftMultiplyAndAccumulate(z_, y, context_, num_threads_);
  97. // y = y + DtDx
  98. if (D_ != nullptr) {
  99. int n = A_.num_cols();
  100. ParallelAssign(
  101. context_,
  102. num_threads_,
  103. y,
  104. y.array() + ConstVectorRef(D_, n).array().square() * x.array());
  105. }
  106. }
  107. private:
  108. const LinearOperator& A_;
  109. const double* D_;
  110. Vector z_;
  111. ContextImpl* context_;
  112. int num_threads_;
  113. };
  114. CgnrSolver::CgnrSolver(LinearSolver::Options options)
  115. : options_(std::move(options)) {
  116. if (options_.preconditioner_type != JACOBI &&
  117. options_.preconditioner_type != IDENTITY &&
  118. options_.preconditioner_type != SUBSET) {
  119. LOG(FATAL)
  120. << "Preconditioner = "
  121. << PreconditionerTypeToString(options_.preconditioner_type) << ". "
  122. << "Congratulations, you found a bug in Ceres. Please report it.";
  123. }
  124. }
  125. CgnrSolver::~CgnrSolver() {
  126. for (int i = 0; i < 4; ++i) {
  127. if (scratch_[i]) {
  128. delete scratch_[i];
  129. scratch_[i] = nullptr;
  130. }
  131. }
  132. }
  133. LinearSolver::Summary CgnrSolver::SolveImpl(
  134. BlockSparseMatrix* A,
  135. const double* b,
  136. const LinearSolver::PerSolveOptions& per_solve_options,
  137. double* x) {
  138. EventLogger event_logger("CgnrSolver::Solve");
  139. if (!preconditioner_) {
  140. Preconditioner::Options preconditioner_options;
  141. preconditioner_options.type = options_.preconditioner_type;
  142. preconditioner_options.subset_preconditioner_start_row_block =
  143. options_.subset_preconditioner_start_row_block;
  144. preconditioner_options.sparse_linear_algebra_library_type =
  145. options_.sparse_linear_algebra_library_type;
  146. preconditioner_options.ordering_type = options_.ordering_type;
  147. preconditioner_options.num_threads = options_.num_threads;
  148. preconditioner_options.context = options_.context;
  149. if (options_.preconditioner_type == JACOBI) {
  150. preconditioner_ = std::make_unique<BlockSparseJacobiPreconditioner>(
  151. preconditioner_options, *A);
  152. } else if (options_.preconditioner_type == SUBSET) {
  153. preconditioner_ =
  154. std::make_unique<SubsetPreconditioner>(preconditioner_options, *A);
  155. } else {
  156. preconditioner_ = std::make_unique<IdentityPreconditioner>(A->num_cols());
  157. }
  158. }
  159. preconditioner_->Update(*A, per_solve_options.D);
  160. ConjugateGradientsSolverOptions cg_options;
  161. cg_options.min_num_iterations = options_.min_num_iterations;
  162. cg_options.max_num_iterations = options_.max_num_iterations;
  163. cg_options.residual_reset_period = options_.residual_reset_period;
  164. cg_options.q_tolerance = per_solve_options.q_tolerance;
  165. cg_options.r_tolerance = per_solve_options.r_tolerance;
  166. cg_options.context = options_.context;
  167. cg_options.num_threads = options_.num_threads;
  168. // lhs = AtA + DtD
  169. CgnrLinearOperator lhs(
  170. *A, per_solve_options.D, options_.context, options_.num_threads);
  171. // rhs = Atb.
  172. Vector rhs(A->num_cols());
  173. rhs.setZero();
  174. A->LeftMultiplyAndAccumulate(
  175. b, rhs.data(), options_.context, options_.num_threads);
  176. cg_solution_ = Vector::Zero(A->num_cols());
  177. for (int i = 0; i < 4; ++i) {
  178. if (scratch_[i] == nullptr) {
  179. scratch_[i] = new Vector(A->num_cols());
  180. }
  181. }
  182. event_logger.AddEvent("Setup");
  183. LinearOperatorAdapter preconditioner(*preconditioner_);
  184. auto summary = ConjugateGradientsSolver(
  185. cg_options, lhs, rhs, preconditioner, scratch_, cg_solution_);
  186. VectorRef(x, A->num_cols()) = cg_solution_;
  187. event_logger.AddEvent("Solve");
  188. return summary;
  189. }
  190. #ifndef CERES_NO_CUDA
  191. // A linear operator which takes a matrix A and a diagonal vector D and
  192. // performs products of the form
  193. //
  194. // (A^T A + D^T D)x
  195. //
  196. // This is used to implement iterative general sparse linear solving with
  197. // conjugate gradients, where A is the Jacobian and D is a regularizing
  198. // parameter. A brief proof is included in cgnr_linear_operator.h.
  199. class CERES_NO_EXPORT CudaCgnrLinearOperator final
  200. : public ConjugateGradientsLinearOperator<CudaVector> {
  201. public:
  202. CudaCgnrLinearOperator(CudaSparseMatrix& A,
  203. const CudaVector& D,
  204. CudaVector* z)
  205. : A_(A), D_(D), z_(z) {}
  206. void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector& y) final {
  207. // z = Ax
  208. z_->SetZero();
  209. A_.RightMultiplyAndAccumulate(x, z_);
  210. // y = y + Atz
  211. // = y + AtAx
  212. A_.LeftMultiplyAndAccumulate(*z_, &y);
  213. // y = y + DtDx
  214. y.DtDxpy(D_, x);
  215. }
  216. private:
  217. CudaSparseMatrix& A_;
  218. const CudaVector& D_;
  219. CudaVector* z_ = nullptr;
  220. };
  221. class CERES_NO_EXPORT CudaIdentityPreconditioner final
  222. : public CudaPreconditioner {
  223. public:
  224. void Update(const CompressedRowSparseMatrix& A, const double* D) final {}
  225. void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector& y) final {
  226. y.Axpby(1.0, x, 1.0);
  227. }
  228. };
  229. // This class wraps the existing CPU Jacobi preconditioner, caches the structure
  230. // of the block diagonal, and for each CGNR solve updates the values on the CPU
  231. // and then copies them over to the GPU.
  232. class CERES_NO_EXPORT CudaJacobiPreconditioner final
  233. : public CudaPreconditioner {
  234. public:
  235. explicit CudaJacobiPreconditioner(Preconditioner::Options options,
  236. const CompressedRowSparseMatrix& A)
  237. : options_(std::move(options)),
  238. cpu_preconditioner_(options_, A),
  239. m_(options_.context, cpu_preconditioner_.matrix()) {}
  240. ~CudaJacobiPreconditioner() = default;
  241. void Update(const CompressedRowSparseMatrix& A, const double* D) final {
  242. cpu_preconditioner_.Update(A, D);
  243. m_.CopyValuesFromCpu(cpu_preconditioner_.matrix());
  244. }
  245. void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector& y) final {
  246. m_.RightMultiplyAndAccumulate(x, &y);
  247. }
  248. private:
  249. Preconditioner::Options options_;
  250. BlockCRSJacobiPreconditioner cpu_preconditioner_;
  251. CudaSparseMatrix m_;
  252. };
  253. CudaCgnrSolver::CudaCgnrSolver(LinearSolver::Options options)
  254. : options_(std::move(options)) {}
  255. CudaCgnrSolver::~CudaCgnrSolver() {
  256. for (int i = 0; i < 4; ++i) {
  257. if (scratch_[i]) {
  258. delete scratch_[i];
  259. scratch_[i] = nullptr;
  260. }
  261. }
  262. }
  263. std::unique_ptr<CudaCgnrSolver> CudaCgnrSolver::Create(
  264. LinearSolver::Options options, std::string* error) {
  265. CHECK(error != nullptr);
  266. if (options.preconditioner_type != IDENTITY &&
  267. options.preconditioner_type != JACOBI) {
  268. *error =
  269. "CudaCgnrSolver does not support preconditioner type " +
  270. std::string(PreconditionerTypeToString(options.preconditioner_type)) +
  271. ". ";
  272. return nullptr;
  273. }
  274. CHECK(options.context->IsCudaInitialized())
  275. << "CudaCgnrSolver requires CUDA initialization.";
  276. auto solver = std::make_unique<CudaCgnrSolver>(options);
  277. return solver;
  278. }
  279. void CudaCgnrSolver::CpuToGpuTransfer(const CompressedRowSparseMatrix& A,
  280. const double* b,
  281. const double* D) {
  282. if (A_ == nullptr) {
  283. // Assume structure is not cached, do an initialization and structural copy.
  284. A_ = std::make_unique<CudaSparseMatrix>(options_.context, A);
  285. b_ = std::make_unique<CudaVector>(options_.context, A.num_rows());
  286. x_ = std::make_unique<CudaVector>(options_.context, A.num_cols());
  287. Atb_ = std::make_unique<CudaVector>(options_.context, A.num_cols());
  288. Ax_ = std::make_unique<CudaVector>(options_.context, A.num_rows());
  289. D_ = std::make_unique<CudaVector>(options_.context, A.num_cols());
  290. Preconditioner::Options preconditioner_options;
  291. preconditioner_options.type = options_.preconditioner_type;
  292. preconditioner_options.subset_preconditioner_start_row_block =
  293. options_.subset_preconditioner_start_row_block;
  294. preconditioner_options.sparse_linear_algebra_library_type =
  295. options_.sparse_linear_algebra_library_type;
  296. preconditioner_options.ordering_type = options_.ordering_type;
  297. preconditioner_options.num_threads = options_.num_threads;
  298. preconditioner_options.context = options_.context;
  299. if (options_.preconditioner_type == JACOBI) {
  300. preconditioner_ =
  301. std::make_unique<CudaJacobiPreconditioner>(preconditioner_options, A);
  302. } else {
  303. preconditioner_ = std::make_unique<CudaIdentityPreconditioner>();
  304. }
  305. for (int i = 0; i < 4; ++i) {
  306. scratch_[i] = new CudaVector(options_.context, A.num_cols());
  307. }
  308. } else {
  309. // Assume structure is cached, do a value copy.
  310. A_->CopyValuesFromCpu(A);
  311. }
  312. b_->CopyFromCpu(ConstVectorRef(b, A.num_rows()));
  313. D_->CopyFromCpu(ConstVectorRef(D, A.num_cols()));
  314. }
  315. LinearSolver::Summary CudaCgnrSolver::SolveImpl(
  316. CompressedRowSparseMatrix* A,
  317. const double* b,
  318. const LinearSolver::PerSolveOptions& per_solve_options,
  319. double* x) {
  320. EventLogger event_logger("CudaCgnrSolver::Solve");
  321. LinearSolver::Summary summary;
  322. summary.num_iterations = 0;
  323. summary.termination_type = LinearSolverTerminationType::FATAL_ERROR;
  324. CpuToGpuTransfer(*A, b, per_solve_options.D);
  325. event_logger.AddEvent("CPU to GPU Transfer");
  326. preconditioner_->Update(*A, per_solve_options.D);
  327. event_logger.AddEvent("Preconditioner Update");
  328. // Form z = Atb.
  329. Atb_->SetZero();
  330. A_->LeftMultiplyAndAccumulate(*b_, Atb_.get());
  331. // Solve (AtA + DtD)x = z (= Atb).
  332. x_->SetZero();
  333. CudaCgnrLinearOperator lhs(*A_, *D_, Ax_.get());
  334. event_logger.AddEvent("Setup");
  335. ConjugateGradientsSolverOptions cg_options;
  336. cg_options.min_num_iterations = options_.min_num_iterations;
  337. cg_options.max_num_iterations = options_.max_num_iterations;
  338. cg_options.residual_reset_period = options_.residual_reset_period;
  339. cg_options.q_tolerance = per_solve_options.q_tolerance;
  340. cg_options.r_tolerance = per_solve_options.r_tolerance;
  341. summary = ConjugateGradientsSolver(
  342. cg_options, lhs, *Atb_, *preconditioner_, scratch_, *x_);
  343. x_->CopyTo(x);
  344. event_logger.AddEvent("Solve");
  345. return summary;
  346. }
  347. #endif // CERES_NO_CUDA
  348. } // namespace ceres::internal