cuda_block_structure.cc 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. // Ceres Solver - A fast non-linear least squares minimizer
  2. // Copyright 2023 Google Inc. All rights reserved.
  3. // http://ceres-solver.org/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are met:
  7. //
  8. // * Redistributions of source code must retain the above copyright notice,
  9. // this list of conditions and the following disclaimer.
  10. // * Redistributions in binary form must reproduce the above copyright notice,
  11. // this list of conditions and the following disclaimer in the documentation
  12. // and/or other materials provided with the distribution.
  13. // * Neither the name of Google Inc. nor the names of its contributors may be
  14. // used to endorse or promote products derived from this software without
  15. // specific prior written permission.
  16. //
  17. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  18. // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  21. // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22. // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23. // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  24. // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  25. // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  26. // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  27. // POSSIBILITY OF SUCH DAMAGE.
  28. //
  29. // Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
  30. #include "ceres/cuda_block_structure.h"
  31. #ifndef CERES_NO_CUDA
  32. namespace ceres::internal {
  33. namespace {
  34. // Dimension of a sorted array of blocks
  35. inline int Dimension(const std::vector<Block>& blocks) {
  36. if (blocks.empty()) {
  37. return 0;
  38. }
  39. const auto& last = blocks.back();
  40. return last.size + last.position;
  41. }
  42. } // namespace
  43. CudaBlockSparseStructure::CudaBlockSparseStructure(
  44. const CompressedRowBlockStructure& block_structure, ContextImpl* context)
  45. : first_cell_in_row_block_(context),
  46. cells_(context),
  47. row_blocks_(context),
  48. col_blocks_(context) {
  49. // Row blocks extracted from CompressedRowBlockStructure::rows
  50. std::vector<Block> row_blocks;
  51. // Column blocks can be reused as-is
  52. const auto& col_blocks = block_structure.cols;
  53. // Row block offset is an index of the first cell corresponding to row block
  54. std::vector<int> first_cell_in_row_block;
  55. // Flat array of all cells from all row-blocks
  56. std::vector<Cell> cells;
  57. int f_values_offset = 0;
  58. is_crs_compatible_ = true;
  59. num_row_blocks_ = block_structure.rows.size();
  60. num_col_blocks_ = col_blocks.size();
  61. row_blocks.reserve(num_row_blocks_);
  62. first_cell_in_row_block.reserve(num_row_blocks_ + 1);
  63. num_nonzeros_ = 0;
  64. sequential_layout_ = true;
  65. for (const auto& r : block_structure.rows) {
  66. const int row_block_size = r.block.size;
  67. if (r.cells.size() > 1 && row_block_size > 1) {
  68. is_crs_compatible_ = false;
  69. }
  70. row_blocks.emplace_back(r.block);
  71. first_cell_in_row_block.push_back(cells.size());
  72. for (const auto& c : r.cells) {
  73. const int col_block_size = col_blocks[c.block_id].size;
  74. const int cell_size = col_block_size * row_block_size;
  75. cells.push_back(c);
  76. sequential_layout_ &= c.position == num_nonzeros_;
  77. num_nonzeros_ += cell_size;
  78. }
  79. }
  80. first_cell_in_row_block.push_back(cells.size());
  81. num_cells_ = cells.size();
  82. num_rows_ = Dimension(row_blocks);
  83. num_cols_ = Dimension(col_blocks);
  84. is_crs_compatible_ &= sequential_layout_;
  85. if (VLOG_IS_ON(3)) {
  86. const size_t first_cell_in_row_block_size =
  87. first_cell_in_row_block.size() * sizeof(int);
  88. const size_t cells_size = cells.size() * sizeof(Cell);
  89. const size_t row_blocks_size = row_blocks.size() * sizeof(Block);
  90. const size_t col_blocks_size = col_blocks.size() * sizeof(Block);
  91. const size_t total_size = first_cell_in_row_block_size + cells_size +
  92. col_blocks_size + row_blocks_size;
  93. const double ratio =
  94. (100. * total_size) / (num_nonzeros_ * (sizeof(int) + sizeof(double)) +
  95. num_rows_ * sizeof(int));
  96. VLOG(3) << "\nCudaBlockSparseStructure:\n"
  97. "\tRow block offsets: "
  98. << first_cell_in_row_block_size
  99. << " bytes\n"
  100. "\tColumn blocks: "
  101. << col_blocks_size
  102. << " bytes\n"
  103. "\tRow blocks: "
  104. << row_blocks_size
  105. << " bytes\n"
  106. "\tCells: "
  107. << cells_size << " bytes\n\tTotal: " << total_size
  108. << " bytes of GPU memory (" << ratio << "% of CRS matrix size)";
  109. }
  110. first_cell_in_row_block_.CopyFromCpuVector(first_cell_in_row_block);
  111. cells_.CopyFromCpuVector(cells);
  112. row_blocks_.CopyFromCpuVector(row_blocks);
  113. col_blocks_.CopyFromCpuVector(col_blocks);
  114. }
  115. } // namespace ceres::internal
  116. #endif // CERES_NO_CUDA