123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220 |
- /* ----------------------------------------------------------------------------
- * GTSAM Copyright 2010, Georgia Tech Research Corporation,
- * Atlanta, Georgia 30332-0415
- * All Rights Reserved
- * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
- * See LICENSE for the license information
- * -------------------------------------------------------------------------- */
- /**
- * @file TimeTBB.cpp
- * @brief Measure task scheduling overhead in TBB
- * @author Richard Roberts
- * @date November 6, 2013
- */
- #include <gtsam/global_includes.h>
- #include <gtsam/base/Matrix.h>
- #include <boost/assign/list_of.hpp>
- #include <map>
- #include <iostream>
- using namespace std;
- using namespace gtsam;
- using boost::assign::list_of;
- #ifdef GTSAM_USE_TBB
- #include <tbb/blocked_range.h> // tbb::blocked_range
- #include <tbb/tick_count.h> // tbb::tick_count
- #include <tbb/parallel_for.h> // tbb::parallel_for
- #include <tbb/cache_aligned_allocator.h> // tbb::cache_aligned_allocator
- #include <tbb/task_arena.h> // tbb::task_arena
- #include <tbb/task_group.h> // tbb::task_group
- static const DenseIndex numberOfProblems = 1000000;
- static const DenseIndex problemSize = 4;
- typedef Eigen::Matrix<double, problemSize, problemSize> FixedMatrix;
- /* ************************************************************************* */
- struct ResultWithThreads
- {
- typedef map<int, double>::value_type value_type;
- map<int, double> grainSizesWithoutAllocation;
- map<int, double> grainSizesWithAllocation;
- };
- typedef map<int, ResultWithThreads> Results;
- /* ************************************************************************* */
- struct WorkerWithoutAllocation
- {
- vector<double>& results;
- WorkerWithoutAllocation(vector<double>& results) : results(results) {}
- void operator()(const tbb::blocked_range<size_t>& r) const
- {
- for(size_t i = r.begin(); i != r.end(); ++i)
- {
- FixedMatrix m1 = FixedMatrix::Random();
- FixedMatrix m2 = FixedMatrix::Random();
- FixedMatrix prod = m1 * m2;
- results[i] = prod.norm();
- }
- }
- };
- /* ************************************************************************* */
- map<int, double> testWithoutMemoryAllocation(int num_threads)
- {
- // A function to do some matrix operations without allocating any memory
- // Create task_arena and task_group
- tbb::task_arena arena(num_threads);
- tbb::task_group tg;
- // Now call it
- vector<double> results(numberOfProblems);
- const vector<size_t> grainSizes = list_of(1)(10)(100)(1000);
- map<int, double> timingResults;
- for(size_t grainSize: grainSizes)
- {
- tbb::tick_count t0 = tbb::tick_count::now();
- // Run parallel code (as a task group) inside of task arena
- arena.execute([&]{
- tg.run_and_wait([&]{
- tbb::parallel_for(tbb::blocked_range<size_t>(0, numberOfProblems), WorkerWithoutAllocation(results));
- });
- });
- tbb::tick_count t1 = tbb::tick_count::now();
- cout << "Without memory allocation, grain size = " << grainSize << ", time = " << (t1 - t0).seconds() << endl;
- timingResults[(int)grainSize] = (t1 - t0).seconds();
- }
- return timingResults;
- }
- /* ************************************************************************* */
- struct WorkerWithAllocation
- {
- vector<double>& results;
- WorkerWithAllocation(vector<double>& results) : results(results) {}
- void operator()(const tbb::blocked_range<size_t>& r) const
- {
- tbb::cache_aligned_allocator<double> allocator;
- for(size_t i = r.begin(); i != r.end(); ++i)
- {
- double *m1data = allocator.allocate(problemSize * problemSize);
- Eigen::Map<Matrix> m1(m1data, problemSize, problemSize);
- double *m2data = allocator.allocate(problemSize * problemSize);
- Eigen::Map<Matrix> m2(m2data, problemSize, problemSize);
- double *proddata = allocator.allocate(problemSize * problemSize);
- Eigen::Map<Matrix> prod(proddata, problemSize, problemSize);
- m1 = Eigen::Matrix4d::Random(problemSize, problemSize);
- m2 = Eigen::Matrix4d::Random(problemSize, problemSize);
- prod = m1 * m2;
- results[i] = prod.norm();
- allocator.deallocate(m1data, problemSize * problemSize);
- allocator.deallocate(m2data, problemSize * problemSize);
- allocator.deallocate(proddata, problemSize * problemSize);
- }
- }
- };
- /* ************************************************************************* */
- map<int, double> testWithMemoryAllocation(int num_threads)
- {
- // A function to do some matrix operations with allocating memory
- // Create task_arena and task_group
- tbb::task_arena arena(num_threads);
- tbb::task_group tg;
- // Now call it
- vector<double> results(numberOfProblems);
- const vector<size_t> grainSizes = list_of(1)(10)(100)(1000);
- map<int, double> timingResults;
- for(size_t grainSize: grainSizes)
- {
- tbb::tick_count t0 = tbb::tick_count::now();
- // Run parallel code (as a task group) inside of task arena
- arena.execute([&]{
- tg.run_and_wait([&]{
- tbb::parallel_for(tbb::blocked_range<size_t>(0, numberOfProblems), WorkerWithAllocation(results));
- });
- });
- tbb::tick_count t1 = tbb::tick_count::now();
- cout << "With memory allocation, grain size = " << grainSize << ", time = " << (t1 - t0).seconds() << endl;
- timingResults[(int)grainSize] = (t1 - t0).seconds();
- }
- return timingResults;
- }
- /* ************************************************************************* */
- int main(int argc, char* argv[])
- {
- cout << "numberOfProblems = " << numberOfProblems << endl;
- cout << "problemSize = " << problemSize << endl;
- const vector<int> numThreads = list_of(1)(4)(8);
- Results results;
- for(size_t n: numThreads)
- {
- cout << "With " << n << " threads:" << endl;
- results[(int)n].grainSizesWithoutAllocation = testWithoutMemoryAllocation((int)n);
- results[(int)n].grainSizesWithAllocation = testWithMemoryAllocation((int)n);
- cout << endl;
- }
- cout << "Summary of results:" << endl;
- for(const Results::value_type& threads_result: results)
- {
- const int threads = threads_result.first;
- const ResultWithThreads& result = threads_result.second;
- if(threads != 1)
- {
- for(const ResultWithThreads::value_type& grainsize_time: result.grainSizesWithoutAllocation)
- {
- const int grainsize = grainsize_time.first;
- const double speedup = results[1].grainSizesWithoutAllocation[grainsize] / grainsize_time.second;
- cout << threads << " threads, without allocation, grain size = " << grainsize << ", speedup = " << speedup << endl;
- }
- for(const ResultWithThreads::value_type& grainsize_time: result.grainSizesWithAllocation)
- {
- const int grainsize = grainsize_time.first;
- const double speedup = results[1].grainSizesWithAllocation[grainsize] / grainsize_time.second;
- cout << threads << " threads, with allocation, grain size = " << grainsize << ", speedup = " << speedup << endl;
- }
- }
- }
- return 0;
- }
- #else
- /* ************************************************************************* */
- int main(int argc, char* argv [])
- {
- cout << "GTSAM is compiled without TBB, please compile with TBB to use this program." << endl;
- return 0;
- }
- #endif
|