TimeTBB.cpp 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. /* ----------------------------------------------------------------------------
  2. * GTSAM Copyright 2010, Georgia Tech Research Corporation,
  3. * Atlanta, Georgia 30332-0415
  4. * All Rights Reserved
  5. * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
  6. * See LICENSE for the license information
  7. * -------------------------------------------------------------------------- */
  8. /**
  9. * @file TimeTBB.cpp
  10. * @brief Measure task scheduling overhead in TBB
  11. * @author Richard Roberts
  12. * @date November 6, 2013
  13. */
  14. #include <gtsam/global_includes.h>
  15. #include <gtsam/base/Matrix.h>
  16. #include <boost/assign/list_of.hpp>
  17. #include <map>
  18. #include <iostream>
  19. using namespace std;
  20. using namespace gtsam;
  21. using boost::assign::list_of;
  22. #ifdef GTSAM_USE_TBB
  23. #include <tbb/blocked_range.h> // tbb::blocked_range
  24. #include <tbb/tick_count.h> // tbb::tick_count
  25. #include <tbb/parallel_for.h> // tbb::parallel_for
  26. #include <tbb/cache_aligned_allocator.h> // tbb::cache_aligned_allocator
  27. #include <tbb/task_arena.h> // tbb::task_arena
  28. #include <tbb/task_group.h> // tbb::task_group
  29. static const DenseIndex numberOfProblems = 1000000;
  30. static const DenseIndex problemSize = 4;
  31. typedef Eigen::Matrix<double, problemSize, problemSize> FixedMatrix;
  32. /* ************************************************************************* */
  33. struct ResultWithThreads
  34. {
  35. typedef map<int, double>::value_type value_type;
  36. map<int, double> grainSizesWithoutAllocation;
  37. map<int, double> grainSizesWithAllocation;
  38. };
  39. typedef map<int, ResultWithThreads> Results;
  40. /* ************************************************************************* */
  41. struct WorkerWithoutAllocation
  42. {
  43. vector<double>& results;
  44. WorkerWithoutAllocation(vector<double>& results) : results(results) {}
  45. void operator()(const tbb::blocked_range<size_t>& r) const
  46. {
  47. for(size_t i = r.begin(); i != r.end(); ++i)
  48. {
  49. FixedMatrix m1 = FixedMatrix::Random();
  50. FixedMatrix m2 = FixedMatrix::Random();
  51. FixedMatrix prod = m1 * m2;
  52. results[i] = prod.norm();
  53. }
  54. }
  55. };
  56. /* ************************************************************************* */
  57. map<int, double> testWithoutMemoryAllocation(int num_threads)
  58. {
  59. // A function to do some matrix operations without allocating any memory
  60. // Create task_arena and task_group
  61. tbb::task_arena arena(num_threads);
  62. tbb::task_group tg;
  63. // Now call it
  64. vector<double> results(numberOfProblems);
  65. const vector<size_t> grainSizes = list_of(1)(10)(100)(1000);
  66. map<int, double> timingResults;
  67. for(size_t grainSize: grainSizes)
  68. {
  69. tbb::tick_count t0 = tbb::tick_count::now();
  70. // Run parallel code (as a task group) inside of task arena
  71. arena.execute([&]{
  72. tg.run_and_wait([&]{
  73. tbb::parallel_for(tbb::blocked_range<size_t>(0, numberOfProblems), WorkerWithoutAllocation(results));
  74. });
  75. });
  76. tbb::tick_count t1 = tbb::tick_count::now();
  77. cout << "Without memory allocation, grain size = " << grainSize << ", time = " << (t1 - t0).seconds() << endl;
  78. timingResults[(int)grainSize] = (t1 - t0).seconds();
  79. }
  80. return timingResults;
  81. }
  82. /* ************************************************************************* */
  83. struct WorkerWithAllocation
  84. {
  85. vector<double>& results;
  86. WorkerWithAllocation(vector<double>& results) : results(results) {}
  87. void operator()(const tbb::blocked_range<size_t>& r) const
  88. {
  89. tbb::cache_aligned_allocator<double> allocator;
  90. for(size_t i = r.begin(); i != r.end(); ++i)
  91. {
  92. double *m1data = allocator.allocate(problemSize * problemSize);
  93. Eigen::Map<Matrix> m1(m1data, problemSize, problemSize);
  94. double *m2data = allocator.allocate(problemSize * problemSize);
  95. Eigen::Map<Matrix> m2(m2data, problemSize, problemSize);
  96. double *proddata = allocator.allocate(problemSize * problemSize);
  97. Eigen::Map<Matrix> prod(proddata, problemSize, problemSize);
  98. m1 = Eigen::Matrix4d::Random(problemSize, problemSize);
  99. m2 = Eigen::Matrix4d::Random(problemSize, problemSize);
  100. prod = m1 * m2;
  101. results[i] = prod.norm();
  102. allocator.deallocate(m1data, problemSize * problemSize);
  103. allocator.deallocate(m2data, problemSize * problemSize);
  104. allocator.deallocate(proddata, problemSize * problemSize);
  105. }
  106. }
  107. };
  108. /* ************************************************************************* */
  109. map<int, double> testWithMemoryAllocation(int num_threads)
  110. {
  111. // A function to do some matrix operations with allocating memory
  112. // Create task_arena and task_group
  113. tbb::task_arena arena(num_threads);
  114. tbb::task_group tg;
  115. // Now call it
  116. vector<double> results(numberOfProblems);
  117. const vector<size_t> grainSizes = list_of(1)(10)(100)(1000);
  118. map<int, double> timingResults;
  119. for(size_t grainSize: grainSizes)
  120. {
  121. tbb::tick_count t0 = tbb::tick_count::now();
  122. // Run parallel code (as a task group) inside of task arena
  123. arena.execute([&]{
  124. tg.run_and_wait([&]{
  125. tbb::parallel_for(tbb::blocked_range<size_t>(0, numberOfProblems), WorkerWithAllocation(results));
  126. });
  127. });
  128. tbb::tick_count t1 = tbb::tick_count::now();
  129. cout << "With memory allocation, grain size = " << grainSize << ", time = " << (t1 - t0).seconds() << endl;
  130. timingResults[(int)grainSize] = (t1 - t0).seconds();
  131. }
  132. return timingResults;
  133. }
  134. /* ************************************************************************* */
  135. int main(int argc, char* argv[])
  136. {
  137. cout << "numberOfProblems = " << numberOfProblems << endl;
  138. cout << "problemSize = " << problemSize << endl;
  139. const vector<int> numThreads = list_of(1)(4)(8);
  140. Results results;
  141. for(size_t n: numThreads)
  142. {
  143. cout << "With " << n << " threads:" << endl;
  144. results[(int)n].grainSizesWithoutAllocation = testWithoutMemoryAllocation((int)n);
  145. results[(int)n].grainSizesWithAllocation = testWithMemoryAllocation((int)n);
  146. cout << endl;
  147. }
  148. cout << "Summary of results:" << endl;
  149. for(const Results::value_type& threads_result: results)
  150. {
  151. const int threads = threads_result.first;
  152. const ResultWithThreads& result = threads_result.second;
  153. if(threads != 1)
  154. {
  155. for(const ResultWithThreads::value_type& grainsize_time: result.grainSizesWithoutAllocation)
  156. {
  157. const int grainsize = grainsize_time.first;
  158. const double speedup = results[1].grainSizesWithoutAllocation[grainsize] / grainsize_time.second;
  159. cout << threads << " threads, without allocation, grain size = " << grainsize << ", speedup = " << speedup << endl;
  160. }
  161. for(const ResultWithThreads::value_type& grainsize_time: result.grainSizesWithAllocation)
  162. {
  163. const int grainsize = grainsize_time.first;
  164. const double speedup = results[1].grainSizesWithAllocation[grainsize] / grainsize_time.second;
  165. cout << threads << " threads, with allocation, grain size = " << grainsize << ", speedup = " << speedup << endl;
  166. }
  167. }
  168. }
  169. return 0;
  170. }
  171. #else
  172. /* ************************************************************************* */
  173. int main(int argc, char* argv [])
  174. {
  175. cout << "GTSAM is compiled without TBB, please compile with TBB to use this program." << endl;
  176. return 0;
  177. }
  178. #endif