diff options
Diffstat (limited to 'gnu/llvm/libcxx/utils/google-benchmark/test/benchmark_test.cc')
-rw-r--r-- | gnu/llvm/libcxx/utils/google-benchmark/test/benchmark_test.cc | 245 |
1 files changed, 245 insertions, 0 deletions
diff --git a/gnu/llvm/libcxx/utils/google-benchmark/test/benchmark_test.cc b/gnu/llvm/libcxx/utils/google-benchmark/test/benchmark_test.cc new file mode 100644 index 00000000000..3cd4f5565fa --- /dev/null +++ b/gnu/llvm/libcxx/utils/google-benchmark/test/benchmark_test.cc @@ -0,0 +1,245 @@ +#include "benchmark/benchmark.h" + +#include <assert.h> +#include <math.h> +#include <stdint.h> + +#include <chrono> +#include <cstdlib> +#include <iostream> +#include <limits> +#include <list> +#include <map> +#include <mutex> +#include <set> +#include <sstream> +#include <string> +#include <thread> +#include <utility> +#include <vector> + +#if defined(__GNUC__) +#define BENCHMARK_NOINLINE __attribute__((noinline)) +#else +#define BENCHMARK_NOINLINE +#endif + +namespace { + +int BENCHMARK_NOINLINE Factorial(uint32_t n) { + return (n == 1) ? 1 : n * Factorial(n - 1); +} + +double CalculatePi(int depth) { + double pi = 0.0; + for (int i = 0; i < depth; ++i) { + double numerator = static_cast<double>(((i % 2) * 2) - 1); + double denominator = static_cast<double>((2 * i) - 1); + pi += numerator / denominator; + } + return (pi - 1.0) * 4; +} + +std::set<int64_t> ConstructRandomSet(int64_t size) { + std::set<int64_t> s; + for (int i = 0; i < size; ++i) s.insert(s.end(), i); + return s; +} + +std::mutex test_vector_mu; +std::vector<int>* test_vector = nullptr; + +} // end namespace + +static void BM_Factorial(benchmark::State& state) { + int fac_42 = 0; + for (auto _ : state) fac_42 = Factorial(8); + // Prevent compiler optimizations + std::stringstream ss; + ss << fac_42; + state.SetLabel(ss.str()); +} +BENCHMARK(BM_Factorial); +BENCHMARK(BM_Factorial)->UseRealTime(); + +static void BM_CalculatePiRange(benchmark::State& state) { + double pi = 0.0; + for (auto _ : state) pi = CalculatePi(static_cast<int>(state.range(0))); + std::stringstream ss; + ss << pi; + state.SetLabel(ss.str()); +} +BENCHMARK_RANGE(BM_CalculatePiRange, 1, 1024 * 1024); + +static void BM_CalculatePi(benchmark::State& state) { + static const int depth = 1024; + for (auto _ : state) { + benchmark::DoNotOptimize(CalculatePi(static_cast<int>(depth))); + } +} +BENCHMARK(BM_CalculatePi)->Threads(8); +BENCHMARK(BM_CalculatePi)->ThreadRange(1, 32); +BENCHMARK(BM_CalculatePi)->ThreadPerCpu(); + +static void BM_SetInsert(benchmark::State& state) { + std::set<int64_t> data; + for (auto _ : state) { + state.PauseTiming(); + data = ConstructRandomSet(state.range(0)); + state.ResumeTiming(); + for (int j = 0; j < state.range(1); ++j) data.insert(rand()); + } + state.SetItemsProcessed(state.iterations() * state.range(1)); + state.SetBytesProcessed(state.iterations() * state.range(1) * sizeof(int)); +} + +// Test many inserts at once to reduce the total iterations needed. Otherwise, the slower, +// non-timed part of each iteration will make the benchmark take forever. +BENCHMARK(BM_SetInsert)->Ranges({{1 << 10, 8 << 10}, {128, 512}}); + +template <typename Container, + typename ValueType = typename Container::value_type> +static void BM_Sequential(benchmark::State& state) { + ValueType v = 42; + for (auto _ : state) { + Container c; + for (int64_t i = state.range(0); --i;) c.push_back(v); + } + const int64_t items_processed = state.iterations() * state.range(0); + state.SetItemsProcessed(items_processed); + state.SetBytesProcessed(items_processed * sizeof(v)); +} +BENCHMARK_TEMPLATE2(BM_Sequential, std::vector<int>, int) + ->Range(1 << 0, 1 << 10); +BENCHMARK_TEMPLATE(BM_Sequential, std::list<int>)->Range(1 << 0, 1 << 10); +// Test the variadic version of BENCHMARK_TEMPLATE in C++11 and beyond. +#ifdef BENCHMARK_HAS_CXX11 +BENCHMARK_TEMPLATE(BM_Sequential, std::vector<int>, int)->Arg(512); +#endif + +static void BM_StringCompare(benchmark::State& state) { + size_t len = static_cast<size_t>(state.range(0)); + std::string s1(len, '-'); + std::string s2(len, '-'); + for (auto _ : state) benchmark::DoNotOptimize(s1.compare(s2)); +} +BENCHMARK(BM_StringCompare)->Range(1, 1 << 20); + +static void BM_SetupTeardown(benchmark::State& state) { + if (state.thread_index == 0) { + // No need to lock test_vector_mu here as this is running single-threaded. + test_vector = new std::vector<int>(); + } + int i = 0; + for (auto _ : state) { + std::lock_guard<std::mutex> l(test_vector_mu); + if (i % 2 == 0) + test_vector->push_back(i); + else + test_vector->pop_back(); + ++i; + } + if (state.thread_index == 0) { + delete test_vector; + } +} +BENCHMARK(BM_SetupTeardown)->ThreadPerCpu(); + +static void BM_LongTest(benchmark::State& state) { + double tracker = 0.0; + for (auto _ : state) { + for (int i = 0; i < state.range(0); ++i) + benchmark::DoNotOptimize(tracker += i); + } +} +BENCHMARK(BM_LongTest)->Range(1 << 16, 1 << 28); + +static void BM_ParallelMemset(benchmark::State& state) { + int64_t size = state.range(0) / static_cast<int64_t>(sizeof(int)); + int thread_size = static_cast<int>(size) / state.threads; + int from = thread_size * state.thread_index; + int to = from + thread_size; + + if (state.thread_index == 0) { + test_vector = new std::vector<int>(static_cast<size_t>(size)); + } + + for (auto _ : state) { + for (int i = from; i < to; i++) { + // No need to lock test_vector_mu as ranges + // do not overlap between threads. + benchmark::DoNotOptimize(test_vector->at(i) = 1); + } + } + + if (state.thread_index == 0) { + delete test_vector; + } +} +BENCHMARK(BM_ParallelMemset)->Arg(10 << 20)->ThreadRange(1, 4); + +static void BM_ManualTiming(benchmark::State& state) { + int64_t slept_for = 0; + int64_t microseconds = state.range(0); + std::chrono::duration<double, std::micro> sleep_duration{ + static_cast<double>(microseconds)}; + + for (auto _ : state) { + auto start = std::chrono::high_resolution_clock::now(); + // Simulate some useful workload with a sleep + std::this_thread::sleep_for( + std::chrono::duration_cast<std::chrono::nanoseconds>(sleep_duration)); + auto end = std::chrono::high_resolution_clock::now(); + + auto elapsed = + std::chrono::duration_cast<std::chrono::duration<double>>(end - start); + + state.SetIterationTime(elapsed.count()); + slept_for += microseconds; + } + state.SetItemsProcessed(slept_for); +} +BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseRealTime(); +BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseManualTime(); + +#ifdef BENCHMARK_HAS_CXX11 + +template <class... Args> +void BM_with_args(benchmark::State& state, Args&&...) { + for (auto _ : state) { + } +} +BENCHMARK_CAPTURE(BM_with_args, int_test, 42, 43, 44); +BENCHMARK_CAPTURE(BM_with_args, string_and_pair_test, std::string("abc"), + std::pair<int, double>(42, 3.8)); + +void BM_non_template_args(benchmark::State& state, int, double) { + while(state.KeepRunning()) {} +} +BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0); + +#endif // BENCHMARK_HAS_CXX11 + +static void BM_DenseThreadRanges(benchmark::State& st) { + switch (st.range(0)) { + case 1: + assert(st.threads == 1 || st.threads == 2 || st.threads == 3); + break; + case 2: + assert(st.threads == 1 || st.threads == 3 || st.threads == 4); + break; + case 3: + assert(st.threads == 5 || st.threads == 8 || st.threads == 11 || + st.threads == 14); + break; + default: + assert(false && "Invalid test case number"); + } + while (st.KeepRunning()) { + } +} +BENCHMARK(BM_DenseThreadRanges)->Arg(1)->DenseThreadRange(1, 3); +BENCHMARK(BM_DenseThreadRanges)->Arg(2)->DenseThreadRange(1, 4, 2); +BENCHMARK(BM_DenseThreadRanges)->Arg(3)->DenseThreadRange(5, 14, 3); + +BENCHMARK_MAIN(); |