block_jacobi_preconditioner.cc 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. // Ceres Solver - A fast non-linear least squares minimizer
  2. // Copyright 2022 Google Inc. All rights reserved.
  3. // http://ceres-solver.org/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are met:
  7. //
  8. // * Redistributions of source code must retain the above copyright notice,
  9. // this list of conditions and the following disclaimer.
  10. // * Redistributions in binary form must reproduce the above copyright notice,
  11. // this list of conditions and the following disclaimer in the documentation
  12. // and/or other materials provided with the distribution.
  13. // * Neither the name of Google Inc. nor the names of its contributors may be
  14. // used to endorse or promote products derived from this software without
  15. // specific prior written permission.
  16. //
  17. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  18. // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  21. // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22. // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23. // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  24. // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  25. // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  26. // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  27. // POSSIBILITY OF SUCH DAMAGE.
  28. //
  29. // Author: keir@google.com (Keir Mierle)
  30. #include "ceres/block_jacobi_preconditioner.h"
  31. #include <memory>
  32. #include <mutex>
  33. #include <utility>
  34. #include <vector>
  35. #include "Eigen/Dense"
  36. #include "ceres/block_random_access_diagonal_matrix.h"
  37. #include "ceres/block_sparse_matrix.h"
  38. #include "ceres/block_structure.h"
  39. #include "ceres/casts.h"
  40. #include "ceres/internal/eigen.h"
  41. #include "ceres/parallel_for.h"
  42. #include "ceres/small_blas.h"
  43. namespace ceres::internal {
  44. BlockSparseJacobiPreconditioner::BlockSparseJacobiPreconditioner(
  45. Preconditioner::Options options, const BlockSparseMatrix& A)
  46. : options_(std::move(options)) {
  47. m_ = std::make_unique<BlockRandomAccessDiagonalMatrix>(
  48. A.block_structure()->cols, options_.context, options_.num_threads);
  49. }
  50. BlockSparseJacobiPreconditioner::~BlockSparseJacobiPreconditioner() = default;
  51. bool BlockSparseJacobiPreconditioner::UpdateImpl(const BlockSparseMatrix& A,
  52. const double* D) {
  53. const CompressedRowBlockStructure* bs = A.block_structure();
  54. const double* values = A.values();
  55. m_->SetZero();
  56. ParallelFor(options_.context,
  57. 0,
  58. bs->rows.size(),
  59. options_.num_threads,
  60. [this, bs, values](int i) {
  61. const int row_block_size = bs->rows[i].block.size;
  62. const std::vector<Cell>& cells = bs->rows[i].cells;
  63. for (const auto& cell : cells) {
  64. const int block_id = cell.block_id;
  65. const int col_block_size = bs->cols[block_id].size;
  66. int r, c, row_stride, col_stride;
  67. CellInfo* cell_info = m_->GetCell(
  68. block_id, block_id, &r, &c, &row_stride, &col_stride);
  69. MatrixRef m(cell_info->values, row_stride, col_stride);
  70. ConstMatrixRef b(
  71. values + cell.position, row_block_size, col_block_size);
  72. auto lock =
  73. MakeConditionalLock(options_.num_threads, cell_info->m);
  74. // clang-format off
  75. MatrixTransposeMatrixMultiply<Eigen::Dynamic, Eigen::Dynamic,
  76. Eigen::Dynamic,Eigen::Dynamic, 1>(
  77. values + cell.position, row_block_size,col_block_size,
  78. values + cell.position, row_block_size,col_block_size,
  79. cell_info->values,r, c,row_stride,col_stride);
  80. // clang-format on
  81. }
  82. });
  83. if (D != nullptr) {
  84. // Add the diagonal.
  85. ParallelFor(options_.context,
  86. 0,
  87. bs->cols.size(),
  88. options_.num_threads,
  89. [this, bs, D](int i) {
  90. const int block_size = bs->cols[i].size;
  91. int r, c, row_stride, col_stride;
  92. CellInfo* cell_info =
  93. m_->GetCell(i, i, &r, &c, &row_stride, &col_stride);
  94. MatrixRef m(cell_info->values, row_stride, col_stride);
  95. m.block(r, c, block_size, block_size).diagonal() +=
  96. ConstVectorRef(D + bs->cols[i].position, block_size)
  97. .array()
  98. .square()
  99. .matrix();
  100. });
  101. }
  102. m_->Invert();
  103. return true;
  104. }
  105. BlockCRSJacobiPreconditioner::BlockCRSJacobiPreconditioner(
  106. Preconditioner::Options options, const CompressedRowSparseMatrix& A)
  107. : options_(std::move(options)), locks_(A.col_blocks().size()) {
  108. auto& col_blocks = A.col_blocks();
  109. // Compute the number of non-zeros in the preconditioner. This is needed so
  110. // that we can construct the CompressedRowSparseMatrix.
  111. const int m_nnz = SumSquaredSizes(col_blocks);
  112. m_ = std::make_unique<CompressedRowSparseMatrix>(
  113. A.num_cols(), A.num_cols(), m_nnz);
  114. const int num_col_blocks = col_blocks.size();
  115. // Populate the sparsity structure of the preconditioner matrix.
  116. int* m_cols = m_->mutable_cols();
  117. int* m_rows = m_->mutable_rows();
  118. m_rows[0] = 0;
  119. for (int i = 0, idx = 0; i < num_col_blocks; ++i) {
  120. // For each column block populate a diagonal block in the preconditioner.
  121. // Not that the because of the way the CompressedRowSparseMatrix format
  122. // works, the entire diagonal block is laid out contiguously in memory as a
  123. // row-major matrix. We will use this when updating the block.
  124. auto& block = col_blocks[i];
  125. for (int j = 0; j < block.size; ++j) {
  126. for (int k = 0; k < block.size; ++k, ++idx) {
  127. m_cols[idx] = block.position + k;
  128. }
  129. m_rows[block.position + j + 1] = idx;
  130. }
  131. }
  132. // In reality we only need num_col_blocks locks, however that would require
  133. // that in UpdateImpl we are able to look up the column block from the it
  134. // first column. To save ourselves this map we will instead spend a few extra
  135. // lock objects.
  136. std::vector<std::mutex> locks(A.num_cols());
  137. locks_.swap(locks);
  138. CHECK_EQ(m_rows[A.num_cols()], m_nnz);
  139. }
  140. BlockCRSJacobiPreconditioner::~BlockCRSJacobiPreconditioner() = default;
  141. bool BlockCRSJacobiPreconditioner::UpdateImpl(
  142. const CompressedRowSparseMatrix& A, const double* D) {
  143. const auto& col_blocks = A.col_blocks();
  144. const auto& row_blocks = A.row_blocks();
  145. const int num_col_blocks = col_blocks.size();
  146. const int num_row_blocks = row_blocks.size();
  147. const int* a_rows = A.rows();
  148. const int* a_cols = A.cols();
  149. const double* a_values = A.values();
  150. double* m_values = m_->mutable_values();
  151. const int* m_rows = m_->rows();
  152. m_->SetZero();
  153. ParallelFor(
  154. options_.context,
  155. 0,
  156. num_row_blocks,
  157. options_.num_threads,
  158. [this, row_blocks, a_rows, a_cols, a_values, m_values, m_rows](int i) {
  159. const int row = row_blocks[i].position;
  160. const int row_block_size = row_blocks[i].size;
  161. const int row_nnz = a_rows[row + 1] - a_rows[row];
  162. ConstMatrixRef row_block(
  163. a_values + a_rows[row], row_block_size, row_nnz);
  164. int c = 0;
  165. while (c < row_nnz) {
  166. const int idx = a_rows[row] + c;
  167. const int col = a_cols[idx];
  168. const int col_block_size = m_rows[col + 1] - m_rows[col];
  169. // We make use of the fact that the entire diagonal block is
  170. // stored contiguously in memory as a row-major matrix.
  171. MatrixRef m(m_values + m_rows[col], col_block_size, col_block_size);
  172. // We do not have a row_stride version of
  173. // MatrixTransposeMatrixMultiply, otherwise we could use it
  174. // here to further speed up the following expression.
  175. auto b = row_block.middleCols(c, col_block_size);
  176. auto lock = MakeConditionalLock(options_.num_threads, locks_[col]);
  177. m.noalias() += b.transpose() * b;
  178. c += col_block_size;
  179. }
  180. });
  181. ParallelFor(
  182. options_.context,
  183. 0,
  184. num_col_blocks,
  185. options_.num_threads,
  186. [col_blocks, m_rows, m_values, D](int i) {
  187. const int col = col_blocks[i].position;
  188. const int col_block_size = col_blocks[i].size;
  189. MatrixRef m(m_values + m_rows[col], col_block_size, col_block_size);
  190. if (D != nullptr) {
  191. m.diagonal() +=
  192. ConstVectorRef(D + col, col_block_size).array().square().matrix();
  193. }
  194. // TODO(sameeragarwal): Deal with Cholesky inversion failure here and
  195. // elsewhere.
  196. m = m.llt().solve(Matrix::Identity(col_block_size, col_block_size));
  197. });
  198. return true;
  199. }
  200. } // namespace ceres::internal