timeMatrix.cpp 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. /* ----------------------------------------------------------------------------
  2. * GTSAM Copyright 2010, Georgia Tech Research Corporation,
  3. * Atlanta, Georgia 30332-0415
  4. * All Rights Reserved
  5. * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
  6. * See LICENSE for the license information
  7. * -------------------------------------------------------------------------- */
  8. /*
  9. * @file timeMatrix.cpp
  10. * @brief Performs timing and profiling for Matrix operations
  11. * @author Alex Cunningham
  12. */
  13. #include <iostream>
  14. #include <gtsam/base/timing.h>
  15. #include <gtsam/base/Matrix.h>
  16. using namespace std;
  17. using namespace gtsam;
  18. /*
  19. * Results:
  20. * Alex's Machine:
  21. * (using p = 100000 m = 10 n = 12 reps = 50) - Average times
  22. * - (1st pass of simple changes) no pass: 0.184 sec , pass: 0.181 sec
  23. * - (1st rev memcpy) no pass: 0.181 sec , pass: 0.180 sec
  24. * - (1st rev matrix_range) no pass: 0.186 sec , pass: 0.184 sec
  25. * (using p = 10 m = 10 n = 12 reps = 10000000)
  26. * - (matrix_range version) no pass: 24.21 sec , pass: 23.97 sec
  27. * - (memcpy version) no pass: 18.96 sec , pass: 18.39 sec
  28. * - (original version) no pass: 23.45 sec , pass: 22.80 sec
  29. * - rev 2100 no pass: 18.45 sec , pass: 18.35 sec
  30. */
  31. double timeCollect(size_t p, size_t m, size_t n, bool passDims, size_t reps) {
  32. // create a large number of matrices
  33. // p = number of matrices
  34. // m = rows per matrix
  35. // n = columns per matrix
  36. // reps = number of repetitions
  37. // fill the matrices with identities
  38. vector<const Matrix *> matrices;
  39. for (size_t i=0; i<p;++i) {
  40. Matrix * M = new Matrix;
  41. (*M) = Matrix::Identity(m,n);
  42. matrices.push_back(M);
  43. }
  44. // start timing
  45. Matrix result;
  46. double elapsed;
  47. {
  48. gttic_(elapsed);
  49. if (passDims)
  50. for (size_t i=0; i<reps; ++i)
  51. result = collect(matrices, m, n);
  52. else
  53. for (size_t i=0; i<reps; ++i)
  54. result = collect(matrices);
  55. gttoc_(elapsed);
  56. tictoc_getNode(elapsedNode, elapsed);
  57. elapsed = elapsedNode->secs();
  58. tictoc_reset_();
  59. }
  60. // delete the matrices
  61. for (size_t i=0; i<p;++i) {
  62. delete matrices[i];
  63. }
  64. return elapsed;
  65. //return elapsed/reps;
  66. }
  67. /*
  68. * Results:
  69. * Alex's Machine:
  70. * - Original : 0.60 sec (x1000)
  71. * - 1st Rev : 0.49 sec (x1000)
  72. * - rev 2100 : 0.52 sec (x1000)
  73. */
  74. double timeVScaleColumn(size_t m, size_t n, size_t reps) {
  75. // make a matrix to scale
  76. Matrix M(m, n);
  77. for (size_t i=0; i<m; ++i)
  78. for (size_t j=0; j<n; ++j)
  79. M(i,j) = 2*i+j;
  80. // make a vector to use for scaling
  81. Vector V(m);
  82. for (size_t i=0; i<m; ++i)
  83. V(i) = i*2;
  84. double elapsed;
  85. Matrix result;
  86. {
  87. gttic_(elapsed);
  88. for (size_t i=0; i<reps; ++i)
  89. Matrix result = vector_scale(M,V);
  90. gttoc_(elapsed);
  91. tictoc_getNode(elapsedNode, elapsed);
  92. elapsed = elapsedNode->secs();
  93. tictoc_reset_();
  94. }
  95. return elapsed;
  96. }
  97. /*
  98. * Results:
  99. * Alex's Machine:
  100. * - Original : 0.54 sec (x1000)
  101. * - 1st rev : 0.44 sec (x1000)
  102. * - rev 2100 : 1.69 sec (x1000)
  103. */
  104. double timeVScaleRow(size_t m, size_t n, size_t reps) {
  105. // make a matrix to scale
  106. Matrix M(m, n);
  107. for (size_t i=0; i<m; ++i)
  108. for (size_t j=0; j<n; ++j)
  109. M(i,j) = 2*i+j;
  110. // make a vector to use for scaling
  111. Vector V(n);
  112. for (size_t i=0; i<n; ++i)
  113. V(i) = i*2;
  114. double elapsed;
  115. Matrix result;
  116. {
  117. gttic_(elapsed);
  118. for (size_t i=0; i<reps; ++i)
  119. result = vector_scale(V,M);
  120. gttoc_(elapsed);
  121. tictoc_getNode(elapsedNode, elapsed);
  122. elapsed = elapsedNode->secs();
  123. tictoc_reset_();
  124. }
  125. return elapsed;
  126. }
  127. /**
  128. * Results:
  129. * Alex's Machine (reps = 200000)
  130. * - ublas matrix_column : 4.63 sec
  131. * - naive implementation : 4.70 sec
  132. *
  133. * reps = 2000000
  134. * - rev 2100 : 45.11 sec
  135. */
  136. double timeColumn(size_t reps) {
  137. // create a matrix
  138. size_t m = 100; size_t n = 100;
  139. Matrix M(m, n);
  140. for (size_t i=0; i<m; ++i)
  141. for (size_t j=0; j<n; ++j)
  142. M(i,j) = 2*i+j;
  143. // extract a column
  144. double elapsed;
  145. Vector result;
  146. {
  147. gttic_(elapsed);
  148. for (size_t i=0; i<reps; ++i)
  149. for (size_t j = 0; j<n; ++j)
  150. //result = ublas::matrix_column<Matrix>(M, j);
  151. result = column(M, j);
  152. gttoc_(elapsed);
  153. tictoc_getNode(elapsedNode, elapsed);
  154. elapsed = elapsedNode->secs();
  155. tictoc_reset_();
  156. }
  157. return elapsed;
  158. }
  159. /*
  160. * Results
  161. * Alex's machine
  162. *
  163. * Runs at reps = 500000
  164. * Baseline (no householder, just matrix copy) : 0.05 sec
  165. * Initial : 8.20 sec
  166. * All in one function : 7.89 sec
  167. * Replace householder update with GSL, ATLAS : 0.92 sec
  168. *
  169. * Runs at reps = 2000000
  170. * Baseline (GSL/ATLAS householder update) : 3.61 sec
  171. *
  172. * Runs at reps = 5000000
  173. * Baseline : 8.76 sec
  174. * GSL/Atlas version of updateAb : 9.03 sec // Why does this have an effect?
  175. * Inlining house() : 6.33 sec
  176. * Inlining householder_update [GSL] : 6.15 sec
  177. * Rev 2100 : 5.75 sec
  178. *
  179. */
  180. double timeHouseholder(size_t reps) {
  181. // create a matrix
  182. Matrix Abase = (Matrix(4, 7) <<
  183. -5, 0, 5, 0, 0, 0, -1,
  184. 00, -5, 0, 5, 0, 0, 1.5,
  185. 10, 0, 0, 0,-10, 0, 2,
  186. 00, 10, 0, 0, 0,-10, -1).finished();
  187. // perform timing
  188. double elapsed;
  189. {
  190. gttic_(elapsed);
  191. for (size_t i=0; i<reps; ++i) {
  192. Matrix A = Abase;
  193. householder_(A,3);
  194. }
  195. gttoc_(elapsed);
  196. tictoc_getNode(elapsedNode, elapsed);
  197. elapsed = elapsedNode->secs();
  198. tictoc_reset_();
  199. }
  200. return elapsed;
  201. }
  202. /**
  203. * Results: (Alex's machine)
  204. * reps: 200000
  205. *
  206. * Initial (boost matrix proxies) - 12.08
  207. * Direct pointer method - 4.62
  208. */
  209. double timeMatrixInsert(size_t reps) {
  210. // create a matrix
  211. Matrix bigBase = Matrix::Zero(100, 100);
  212. Matrix small = Matrix::Identity(5,5);
  213. // perform timing
  214. double elapsed;
  215. {
  216. gttic_(elapsed);
  217. Matrix big = bigBase;
  218. for (size_t rep=0; rep<reps; ++rep)
  219. for (size_t i=0; i<100; i += 5)
  220. for (size_t j=0; j<100; j += 5)
  221. insertSub(big, small, i,j);
  222. gttoc_(elapsed);
  223. tictoc_getNode(elapsedNode, elapsed);
  224. elapsed = elapsedNode->secs();
  225. tictoc_reset_();
  226. }
  227. return elapsed;
  228. }
  229. int main(int argc, char ** argv) {
  230. // Time collect()
  231. cout << "Starting Matrix::collect() Timing" << endl;
  232. //size_t p = 100000; size_t m = 10; size_t n = 12; size_t reps = 50;
  233. size_t p = 10; size_t m = 10; size_t n = 12; size_t reps = 10000000;
  234. double collect_time1 = timeCollect(p, m, n, false, reps);
  235. double collect_time2 = timeCollect(p, m, n, true, reps);
  236. cout << "Average Elapsed time for collect (no pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time1 << endl;
  237. cout << "Average Elapsed time for collect (pass) [" << p << " (" << m << ", " << n << ") matrices] : " << collect_time2 << endl;
  238. // Time vector_scale_column
  239. cout << "Starting Matrix::vector_scale(column) Timing" << endl;
  240. size_t m1 = 400; size_t n1 = 480; size_t reps1 = 1000;
  241. double vsColumn_time = timeVScaleColumn(m1, n1, reps1);
  242. cout << "Elapsed time for vector_scale(column) [(" << m1 << ", " << n1 << ") matrix] : " << vsColumn_time << endl;
  243. // Time vector_scale_row
  244. cout << "Starting Matrix::vector_scale(row) Timing" << endl;
  245. double vsRow_time = timeVScaleRow(m1, n1, reps1);
  246. cout << "Elapsed time for vector_scale(row) [(" << m1 << ", " << n1 << ") matrix] : " << vsRow_time << endl;
  247. // Time column() NOTE: using the ublas version
  248. cout << "Starting column() Timing" << endl;
  249. size_t reps2 = 2000000;
  250. double column_time = timeColumn(reps2);
  251. cout << "Time: " << column_time << " sec" << endl;
  252. // Time householder_ function
  253. cout << "Starting householder_() Timing" << endl;
  254. size_t reps_house = 5000000;
  255. double house_time = timeHouseholder(reps_house);
  256. cout << "Elapsed time for householder_() : " << house_time << " sec" << endl;
  257. // Time matrix insertion
  258. cout << "Starting insertSub() Timing" << endl;
  259. size_t reps_insert = 200000;
  260. double insert_time = timeMatrixInsert(reps_insert);
  261. cout << "Elapsed time for insertSub() : " << insert_time << " sec" << endl;
  262. return 0;
  263. }