diff options
author | 2019-06-17 22:18:29 +0000 | |
---|---|---|
committer | 2019-06-17 22:18:29 +0000 | |
commit | 504b10ec5101b237e4c07e1f2de4b6c48138181e (patch) | |
tree | 979c9ce8ab11efd05e4413305758dc5d6bc76ab4 /lib/libcxx/utils/google-benchmark/src | |
parent | A bit more KNF no binary change (diff) | |
download | wireguard-openbsd-504b10ec5101b237e4c07e1f2de4b6c48138181e.tar.xz wireguard-openbsd-504b10ec5101b237e4c07e1f2de4b6c48138181e.zip |
Import libc++ 8.0.0.
Diffstat (limited to 'lib/libcxx/utils/google-benchmark/src')
20 files changed, 740 insertions, 325 deletions
diff --git a/lib/libcxx/utils/google-benchmark/src/benchmark.cc b/lib/libcxx/utils/google-benchmark/src/benchmark.cc index b14bc629143..aab07500af4 100644 --- a/lib/libcxx/utils/google-benchmark/src/benchmark.cc +++ b/lib/libcxx/utils/google-benchmark/src/benchmark.cc @@ -14,6 +14,7 @@ #include "benchmark/benchmark.h" #include "benchmark_api_internal.h" +#include "benchmark_runner.h" #include "internal_macros.h" #ifndef BENCHMARK_OS_WINDOWS @@ -34,6 +35,7 @@ #include <memory> #include <string> #include <thread> +#include <utility> #include "check.h" #include "colorprint.h" @@ -55,9 +57,9 @@ DEFINE_bool(benchmark_list_tests, false, DEFINE_string(benchmark_filter, ".", "A regular expression that specifies the set of benchmarks " - "to execute. If this flag is empty, no benchmarks are run. " - "If this flag is the string \"all\", all benchmarks linked " - "into the process are run."); + "to execute. If this flag is empty, or if this flag is the " + "string \"all\", all benchmarks linked into the binary are " + "run."); DEFINE_double(benchmark_min_time, 0.5, "Minimum number of seconds we should run benchmark before " @@ -72,10 +74,19 @@ DEFINE_int32(benchmark_repetitions, 1, "The number of runs of each benchmark. If greater than 1, the " "mean and standard deviation of the runs will be reported."); -DEFINE_bool(benchmark_report_aggregates_only, false, - "Report the result of each benchmark repetitions. When 'true' is " - "specified only the mean, standard deviation, and other statistics " - "are reported for repeated benchmarks."); +DEFINE_bool( + benchmark_report_aggregates_only, false, + "Report the result of each benchmark repetitions. When 'true' is specified " + "only the mean, standard deviation, and other statistics are reported for " + "repeated benchmarks. Affects all reporters."); + +DEFINE_bool( + benchmark_display_aggregates_only, false, + "Display the result of each benchmark repetitions. When 'true' is " + "specified only the mean, standard deviation, and other statistics are " + "displayed for repeated benchmarks. Unlike " + "benchmark_report_aggregates_only, only affects the display reporter, but " + "*NOT* file reporter, which will still contain all the output."); DEFINE_string(benchmark_format, "console", "The format to use for console output. Valid values are " @@ -103,193 +114,11 @@ DEFINE_int32(v, 0, "The level of verbose logging to output"); namespace benchmark { -namespace { -static const size_t kMaxIterations = 1000000000; -} // end namespace - namespace internal { +// FIXME: wouldn't LTO mess this up? void UseCharPointer(char const volatile*) {} -namespace { - -BenchmarkReporter::Run CreateRunReport( - const benchmark::internal::Benchmark::Instance& b, - const internal::ThreadManager::Result& results, double seconds) { - // Create report about this benchmark run. - BenchmarkReporter::Run report; - - report.benchmark_name = b.name; - report.error_occurred = results.has_error_; - report.error_message = results.error_message_; - report.report_label = results.report_label_; - // This is the total iterations across all threads. - report.iterations = results.iterations; - report.time_unit = b.time_unit; - - if (!report.error_occurred) { - double bytes_per_second = 0; - if (results.bytes_processed > 0 && seconds > 0.0) { - bytes_per_second = (results.bytes_processed / seconds); - } - double items_per_second = 0; - if (results.items_processed > 0 && seconds > 0.0) { - items_per_second = (results.items_processed / seconds); - } - - if (b.use_manual_time) { - report.real_accumulated_time = results.manual_time_used; - } else { - report.real_accumulated_time = results.real_time_used; - } - report.cpu_accumulated_time = results.cpu_time_used; - report.bytes_per_second = bytes_per_second; - report.items_per_second = items_per_second; - report.complexity_n = results.complexity_n; - report.complexity = b.complexity; - report.complexity_lambda = b.complexity_lambda; - report.statistics = b.statistics; - report.counters = results.counters; - internal::Finish(&report.counters, results.iterations, seconds, b.threads); - } - return report; -} - -// Execute one thread of benchmark b for the specified number of iterations. -// Adds the stats collected for the thread into *total. -void RunInThread(const benchmark::internal::Benchmark::Instance* b, - size_t iters, int thread_id, - internal::ThreadManager* manager) { - internal::ThreadTimer timer; - State st(iters, b->arg, thread_id, b->threads, &timer, manager); - b->benchmark->Run(st); - CHECK(st.iterations() >= st.max_iterations) - << "Benchmark returned before State::KeepRunning() returned false!"; - { - MutexLock l(manager->GetBenchmarkMutex()); - internal::ThreadManager::Result& results = manager->results; - results.iterations += st.iterations(); - results.cpu_time_used += timer.cpu_time_used(); - results.real_time_used += timer.real_time_used(); - results.manual_time_used += timer.manual_time_used(); - results.bytes_processed += st.bytes_processed(); - results.items_processed += st.items_processed(); - results.complexity_n += st.complexity_length_n(); - internal::Increment(&results.counters, st.counters); - } - manager->NotifyThreadComplete(); -} - -std::vector<BenchmarkReporter::Run> RunBenchmark( - const benchmark::internal::Benchmark::Instance& b, - std::vector<BenchmarkReporter::Run>* complexity_reports) { - std::vector<BenchmarkReporter::Run> reports; // return value - - const bool has_explicit_iteration_count = b.iterations != 0; - size_t iters = has_explicit_iteration_count ? b.iterations : 1; - std::unique_ptr<internal::ThreadManager> manager; - std::vector<std::thread> pool(b.threads - 1); - const int repeats = - b.repetitions != 0 ? b.repetitions : FLAGS_benchmark_repetitions; - const bool report_aggregates_only = - repeats != 1 && - (b.report_mode == internal::RM_Unspecified - ? FLAGS_benchmark_report_aggregates_only - : b.report_mode == internal::RM_ReportAggregatesOnly); - for (int repetition_num = 0; repetition_num < repeats; repetition_num++) { - for (;;) { - // Try benchmark - VLOG(2) << "Running " << b.name << " for " << iters << "\n"; - - manager.reset(new internal::ThreadManager(b.threads)); - for (std::size_t ti = 0; ti < pool.size(); ++ti) { - pool[ti] = std::thread(&RunInThread, &b, iters, - static_cast<int>(ti + 1), manager.get()); - } - RunInThread(&b, iters, 0, manager.get()); - manager->WaitForAllThreads(); - for (std::thread& thread : pool) thread.join(); - internal::ThreadManager::Result results; - { - MutexLock l(manager->GetBenchmarkMutex()); - results = manager->results; - } - manager.reset(); - // Adjust real/manual time stats since they were reported per thread. - results.real_time_used /= b.threads; - results.manual_time_used /= b.threads; - - VLOG(2) << "Ran in " << results.cpu_time_used << "/" - << results.real_time_used << "\n"; - - // Base decisions off of real time if requested by this benchmark. - double seconds = results.cpu_time_used; - if (b.use_manual_time) { - seconds = results.manual_time_used; - } else if (b.use_real_time) { - seconds = results.real_time_used; - } - - const double min_time = - !IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time; - - // clang-format off - // turn off clang-format since it mangles prettiness here - // Determine if this run should be reported; Either it has - // run for a sufficient amount of time or because an error was reported. - const bool should_report = repetition_num > 0 - || has_explicit_iteration_count // An exact iteration count was requested - || results.has_error_ - || iters >= kMaxIterations // No chance to try again, we hit the limit. - || seconds >= min_time // the elapsed time is large enough - // CPU time is specified but the elapsed real time greatly exceeds the - // minimum time. Note that user provided timers are except from this - // sanity check. - || ((results.real_time_used >= 5 * min_time) && !b.use_manual_time); - // clang-format on - - if (should_report) { - BenchmarkReporter::Run report = CreateRunReport(b, results, seconds); - if (!report.error_occurred && b.complexity != oNone) - complexity_reports->push_back(report); - reports.push_back(report); - break; - } - - // See how much iterations should be increased by - // Note: Avoid division by zero with max(seconds, 1ns). - double multiplier = min_time * 1.4 / std::max(seconds, 1e-9); - // If our last run was at least 10% of FLAGS_benchmark_min_time then we - // use the multiplier directly. Otherwise we use at most 10 times - // expansion. - // NOTE: When the last run was at least 10% of the min time the max - // expansion should be 14x. - bool is_significant = (seconds / min_time) > 0.1; - multiplier = is_significant ? multiplier : std::min(10.0, multiplier); - if (multiplier <= 1.0) multiplier = 2.0; - double next_iters = std::max(multiplier * iters, iters + 1.0); - if (next_iters > kMaxIterations) { - next_iters = kMaxIterations; - } - VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n"; - iters = static_cast<int>(next_iters + 0.5); - } - } - // Calculate additional statistics - auto stat_reports = ComputeStats(reports); - if ((b.complexity != oNone) && b.last_benchmark_instance) { - auto additional_run_stats = ComputeBigO(*complexity_reports); - stat_reports.insert(stat_reports.end(), additional_run_stats.begin(), - additional_run_stats.end()); - complexity_reports->clear(); - } - - if (report_aggregates_only) reports.clear(); - reports.insert(reports.end(), stat_reports.begin(), stat_reports.end()); - return reports; -} - -} // namespace } // namespace internal State::State(size_t max_iters, const std::vector<int64_t>& ranges, int thread_i, @@ -302,8 +131,6 @@ State::State(size_t max_iters, const std::vector<int64_t>& ranges, int thread_i, finished_(false), error_occurred_(false), range_(ranges), - bytes_processed_(0), - items_processed_(0), complexity_n_(0), counters(), thread_index(thread_i), @@ -394,25 +221,25 @@ void State::FinishKeepRunning() { namespace internal { namespace { -void RunBenchmarks(const std::vector<Benchmark::Instance>& benchmarks, - BenchmarkReporter* console_reporter, +void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks, + BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter) { // Note the file_reporter can be null. - CHECK(console_reporter != nullptr); + CHECK(display_reporter != nullptr); // Determine the width of the name field using a minimum width of 10. - bool has_repetitions = FLAGS_benchmark_repetitions > 1; + bool might_have_aggregates = FLAGS_benchmark_repetitions > 1; size_t name_field_width = 10; size_t stat_field_width = 0; - for (const Benchmark::Instance& benchmark : benchmarks) { + for (const BenchmarkInstance& benchmark : benchmarks) { name_field_width = std::max<size_t>(name_field_width, benchmark.name.size()); - has_repetitions |= benchmark.repetitions > 1; + might_have_aggregates |= benchmark.repetitions > 1; for (const auto& Stat : *benchmark.statistics) stat_field_width = std::max<size_t>(stat_field_width, Stat.name_.size()); } - if (has_repetitions) name_field_width += 1 + stat_field_width; + if (might_have_aggregates) name_field_width += 1 + stat_field_width; // Print header here BenchmarkReporter::Context context; @@ -429,22 +256,36 @@ void RunBenchmarks(const std::vector<Benchmark::Instance>& benchmarks, std::flush(reporter->GetErrorStream()); }; - if (console_reporter->ReportContext(context) && + if (display_reporter->ReportContext(context) && (!file_reporter || file_reporter->ReportContext(context))) { - flushStreams(console_reporter); + flushStreams(display_reporter); flushStreams(file_reporter); + for (const auto& benchmark : benchmarks) { - std::vector<BenchmarkReporter::Run> reports = - RunBenchmark(benchmark, &complexity_reports); - console_reporter->ReportRuns(reports); - if (file_reporter) file_reporter->ReportRuns(reports); - flushStreams(console_reporter); + RunResults run_results = RunBenchmark(benchmark, &complexity_reports); + + auto report = [&run_results](BenchmarkReporter* reporter, + bool report_aggregates_only) { + assert(reporter); + // If there are no aggregates, do output non-aggregates. + report_aggregates_only &= !run_results.aggregates_only.empty(); + if (!report_aggregates_only) + reporter->ReportRuns(run_results.non_aggregates); + if (!run_results.aggregates_only.empty()) + reporter->ReportRuns(run_results.aggregates_only); + }; + + report(display_reporter, run_results.display_report_aggregates_only); + if (file_reporter) + report(file_reporter, run_results.file_report_aggregates_only); + + flushStreams(display_reporter); flushStreams(file_reporter); } } - console_reporter->Finalize(); + display_reporter->Finalize(); if (file_reporter) file_reporter->Finalize(); - flushStreams(console_reporter); + flushStreams(display_reporter); flushStreams(file_reporter); } @@ -471,15 +312,20 @@ bool IsZero(double n) { ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color) { int output_opts = ConsoleReporter::OO_Defaults; - if ((FLAGS_benchmark_color == "auto" && IsColorTerminal()) || - IsTruthyFlagValue(FLAGS_benchmark_color)) { + auto is_benchmark_color = [force_no_color] () -> bool { + if (force_no_color) { + return false; + } + if (FLAGS_benchmark_color == "auto") { + return IsColorTerminal(); + } + return IsTruthyFlagValue(FLAGS_benchmark_color); + }; + if (is_benchmark_color()) { output_opts |= ConsoleReporter::OO_Color; } else { output_opts &= ~ConsoleReporter::OO_Color; } - if (force_no_color) { - output_opts &= ~ConsoleReporter::OO_Color; - } if (FLAGS_benchmark_counters_tabular) { output_opts |= ConsoleReporter::OO_Tabular; } else { @@ -494,11 +340,11 @@ size_t RunSpecifiedBenchmarks() { return RunSpecifiedBenchmarks(nullptr, nullptr); } -size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter) { - return RunSpecifiedBenchmarks(console_reporter, nullptr); +size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter) { + return RunSpecifiedBenchmarks(display_reporter, nullptr); } -size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter, +size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter) { std::string spec = FLAGS_benchmark_filter; if (spec.empty() || spec == "all") @@ -506,15 +352,15 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter, // Setup the reporters std::ofstream output_file; - std::unique_ptr<BenchmarkReporter> default_console_reporter; + std::unique_ptr<BenchmarkReporter> default_display_reporter; std::unique_ptr<BenchmarkReporter> default_file_reporter; - if (!console_reporter) { - default_console_reporter = internal::CreateReporter( + if (!display_reporter) { + default_display_reporter = internal::CreateReporter( FLAGS_benchmark_format, internal::GetOutputOptions()); - console_reporter = default_console_reporter.get(); + display_reporter = default_display_reporter.get(); } - auto& Out = console_reporter->GetOutputStream(); - auto& Err = console_reporter->GetErrorStream(); + auto& Out = display_reporter->GetOutputStream(); + auto& Err = display_reporter->GetErrorStream(); std::string const& fname = FLAGS_benchmark_out; if (fname.empty() && file_reporter) { @@ -538,7 +384,7 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter, file_reporter->SetErrorStream(&output_file); } - std::vector<internal::Benchmark::Instance> benchmarks; + std::vector<internal::BenchmarkInstance> benchmarks; if (!FindBenchmarksInternal(spec, &benchmarks, &Err)) return 0; if (benchmarks.empty()) { @@ -549,12 +395,16 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter, if (FLAGS_benchmark_list_tests) { for (auto const& benchmark : benchmarks) Out << benchmark.name << "\n"; } else { - internal::RunBenchmarks(benchmarks, console_reporter, file_reporter); + internal::RunBenchmarks(benchmarks, display_reporter, file_reporter); } return benchmarks.size(); } +void RegisterMemoryManager(MemoryManager* manager) { + internal::memory_manager = manager; +} + namespace internal { void PrintUsageAndExit() { @@ -564,7 +414,8 @@ void PrintUsageAndExit() { " [--benchmark_filter=<regex>]\n" " [--benchmark_min_time=<min_time>]\n" " [--benchmark_repetitions=<num_repetitions>]\n" - " [--benchmark_report_aggregates_only={true|false}\n" + " [--benchmark_report_aggregates_only={true|false}]\n" + " [--benchmark_display_aggregates_only={true|false}]\n" " [--benchmark_format=<console|json|csv>]\n" " [--benchmark_out=<filename>]\n" " [--benchmark_out_format=<json|console|csv>]\n" @@ -588,6 +439,8 @@ void ParseCommandLineFlags(int* argc, char** argv) { &FLAGS_benchmark_repetitions) || ParseBoolFlag(argv[i], "benchmark_report_aggregates_only", &FLAGS_benchmark_report_aggregates_only) || + ParseBoolFlag(argv[i], "benchmark_display_aggregates_only", + &FLAGS_benchmark_display_aggregates_only) || ParseStringFlag(argv[i], "benchmark_format", &FLAGS_benchmark_format) || ParseStringFlag(argv[i], "benchmark_out", &FLAGS_benchmark_out) || ParseStringFlag(argv[i], "benchmark_out_format", diff --git a/lib/libcxx/utils/google-benchmark/src/benchmark_api_internal.cc b/lib/libcxx/utils/google-benchmark/src/benchmark_api_internal.cc new file mode 100644 index 00000000000..8d3108363b8 --- /dev/null +++ b/lib/libcxx/utils/google-benchmark/src/benchmark_api_internal.cc @@ -0,0 +1,15 @@ +#include "benchmark_api_internal.h" + +namespace benchmark { +namespace internal { + +State BenchmarkInstance::Run( + size_t iters, int thread_id, internal::ThreadTimer* timer, + internal::ThreadManager* manager) const { + State st(iters, arg, thread_id, threads, timer, manager); + benchmark->Run(st); + return st; +} + +} // internal +} // benchmark diff --git a/lib/libcxx/utils/google-benchmark/src/benchmark_api_internal.h b/lib/libcxx/utils/google-benchmark/src/benchmark_api_internal.h index dd7a3ffe8cb..0524a85c01d 100644 --- a/lib/libcxx/utils/google-benchmark/src/benchmark_api_internal.h +++ b/lib/libcxx/utils/google-benchmark/src/benchmark_api_internal.h @@ -2,10 +2,12 @@ #define BENCHMARK_API_INTERNAL_H #include "benchmark/benchmark.h" +#include "commandlineflags.h" #include <cmath> #include <iosfwd> #include <limits> +#include <memory> #include <string> #include <vector> @@ -13,10 +15,10 @@ namespace benchmark { namespace internal { // Information kept per benchmark we may want to run -struct Benchmark::Instance { +struct BenchmarkInstance { std::string name; Benchmark* benchmark; - ReportMode report_mode; + AggregationReportMode aggregation_report_mode; std::vector<int64_t> arg; TimeUnit time_unit; int range_multiplier; @@ -31,10 +33,13 @@ struct Benchmark::Instance { double min_time; size_t iterations; int threads; // Number of concurrent threads to us + + State Run(size_t iters, int thread_id, internal::ThreadTimer* timer, + internal::ThreadManager* manager) const; }; bool FindBenchmarksInternal(const std::string& re, - std::vector<Benchmark::Instance>* benchmarks, + std::vector<BenchmarkInstance>* benchmarks, std::ostream* Err); bool IsZero(double n); diff --git a/lib/libcxx/utils/google-benchmark/src/benchmark_register.cc b/lib/libcxx/utils/google-benchmark/src/benchmark_register.cc index 26a89721c78..f17f5b223ce 100644 --- a/lib/libcxx/utils/google-benchmark/src/benchmark_register.cc +++ b/lib/libcxx/utils/google-benchmark/src/benchmark_register.cc @@ -78,7 +78,7 @@ class BenchmarkFamilies { // Extract the list of benchmark instances that match the specified // regular expression. bool FindBenchmarks(std::string re, - std::vector<Benchmark::Instance>* benchmarks, + std::vector<BenchmarkInstance>* benchmarks, std::ostream* Err); private: @@ -107,7 +107,7 @@ void BenchmarkFamilies::ClearBenchmarks() { } bool BenchmarkFamilies::FindBenchmarks( - std::string spec, std::vector<Benchmark::Instance>* benchmarks, + std::string spec, std::vector<BenchmarkInstance>* benchmarks, std::ostream* ErrStream) { CHECK(ErrStream); auto& Err = *ErrStream; @@ -152,10 +152,10 @@ bool BenchmarkFamilies::FindBenchmarks( for (auto const& args : family->args_) { for (int num_threads : *thread_counts) { - Benchmark::Instance instance; + BenchmarkInstance instance; instance.name = family->name_; instance.benchmark = family.get(); - instance.report_mode = family->report_mode_; + instance.aggregation_report_mode = family->aggregation_report_mode_; instance.arg = args; instance.time_unit = family->time_unit_; instance.range_multiplier = family->range_multiplier_; @@ -182,14 +182,19 @@ bool BenchmarkFamilies::FindBenchmarks( } } - instance.name += StrFormat("%d", arg); + // we know that the args are always non-negative (see 'AddRange()'), + // thus print as 'unsigned'. BUT, do a cast due to the 32-bit builds. + instance.name += StrFormat("%lu", static_cast<unsigned long>(arg)); ++arg_i; } if (!IsZero(family->min_time_)) instance.name += StrFormat("/min_time:%0.3f", family->min_time_); - if (family->iterations_ != 0) - instance.name += StrFormat("/iterations:%d", family->iterations_); + if (family->iterations_ != 0) { + instance.name += + StrFormat("/iterations:%lu", + static_cast<unsigned long>(family->iterations_)); + } if (family->repetitions_ != 0) instance.name += StrFormat("/repeats:%d", family->repetitions_); @@ -225,7 +230,7 @@ Benchmark* RegisterBenchmarkInternal(Benchmark* bench) { // FIXME: This function is a hack so that benchmark.cc can access // `BenchmarkFamilies` bool FindBenchmarksInternal(const std::string& re, - std::vector<Benchmark::Instance>* benchmarks, + std::vector<BenchmarkInstance>* benchmarks, std::ostream* Err) { return BenchmarkFamilies::GetInstance()->FindBenchmarks(re, benchmarks, Err); } @@ -236,7 +241,7 @@ bool FindBenchmarksInternal(const std::string& re, Benchmark::Benchmark(const char* name) : name_(name), - report_mode_(RM_Unspecified), + aggregation_report_mode_(ARM_Unspecified), time_unit_(kNanosecond), range_multiplier_(kRangeMultiplier), min_time_(0), @@ -369,7 +374,23 @@ Benchmark* Benchmark::Repetitions(int n) { } Benchmark* Benchmark::ReportAggregatesOnly(bool value) { - report_mode_ = value ? RM_ReportAggregatesOnly : RM_Default; + aggregation_report_mode_ = value ? ARM_ReportAggregatesOnly : ARM_Default; + return this; +} + +Benchmark* Benchmark::DisplayAggregatesOnly(bool value) { + // If we were called, the report mode is no longer 'unspecified', in any case. + aggregation_report_mode_ = static_cast<AggregationReportMode>( + aggregation_report_mode_ | ARM_Default); + + if (value) { + aggregation_report_mode_ = static_cast<AggregationReportMode>( + aggregation_report_mode_ | ARM_DisplayReportAggregatesOnly); + } else { + aggregation_report_mode_ = static_cast<AggregationReportMode>( + aggregation_report_mode_ & ~ARM_DisplayReportAggregatesOnly); + } + return this; } diff --git a/lib/libcxx/utils/google-benchmark/src/benchmark_runner.cc b/lib/libcxx/utils/google-benchmark/src/benchmark_runner.cc new file mode 100644 index 00000000000..38faeec8e3e --- /dev/null +++ b/lib/libcxx/utils/google-benchmark/src/benchmark_runner.cc @@ -0,0 +1,350 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "benchmark_runner.h" +#include "benchmark/benchmark.h" +#include "benchmark_api_internal.h" +#include "internal_macros.h" + +#ifndef BENCHMARK_OS_WINDOWS +#ifndef BENCHMARK_OS_FUCHSIA +#include <sys/resource.h> +#endif +#include <sys/time.h> +#include <unistd.h> +#endif + +#include <algorithm> +#include <atomic> +#include <condition_variable> +#include <cstdio> +#include <cstdlib> +#include <fstream> +#include <iostream> +#include <memory> +#include <string> +#include <thread> +#include <utility> + +#include "check.h" +#include "colorprint.h" +#include "commandlineflags.h" +#include "complexity.h" +#include "counter.h" +#include "internal_macros.h" +#include "log.h" +#include "mutex.h" +#include "re.h" +#include "statistics.h" +#include "string_util.h" +#include "thread_manager.h" +#include "thread_timer.h" + +namespace benchmark { + +namespace internal { + +MemoryManager* memory_manager = nullptr; + +namespace { + +static const size_t kMaxIterations = 1000000000; + +BenchmarkReporter::Run CreateRunReport( + const benchmark::internal::BenchmarkInstance& b, + const internal::ThreadManager::Result& results, size_t memory_iterations, + const MemoryManager::Result& memory_result, double seconds) { + // Create report about this benchmark run. + BenchmarkReporter::Run report; + + report.run_name = b.name; + report.error_occurred = results.has_error_; + report.error_message = results.error_message_; + report.report_label = results.report_label_; + // This is the total iterations across all threads. + report.iterations = results.iterations; + report.time_unit = b.time_unit; + + if (!report.error_occurred) { + if (b.use_manual_time) { + report.real_accumulated_time = results.manual_time_used; + } else { + report.real_accumulated_time = results.real_time_used; + } + report.cpu_accumulated_time = results.cpu_time_used; + report.complexity_n = results.complexity_n; + report.complexity = b.complexity; + report.complexity_lambda = b.complexity_lambda; + report.statistics = b.statistics; + report.counters = results.counters; + + if (memory_iterations > 0) { + report.has_memory_result = true; + report.allocs_per_iter = + memory_iterations ? static_cast<double>(memory_result.num_allocs) / + memory_iterations + : 0; + report.max_bytes_used = memory_result.max_bytes_used; + } + + internal::Finish(&report.counters, results.iterations, seconds, b.threads); + } + return report; +} + +// Execute one thread of benchmark b for the specified number of iterations. +// Adds the stats collected for the thread into *total. +void RunInThread(const BenchmarkInstance* b, size_t iters, int thread_id, + ThreadManager* manager) { + internal::ThreadTimer timer; + State st = b->Run(iters, thread_id, &timer, manager); + CHECK(st.iterations() >= st.max_iterations) + << "Benchmark returned before State::KeepRunning() returned false!"; + { + MutexLock l(manager->GetBenchmarkMutex()); + internal::ThreadManager::Result& results = manager->results; + results.iterations += st.iterations(); + results.cpu_time_used += timer.cpu_time_used(); + results.real_time_used += timer.real_time_used(); + results.manual_time_used += timer.manual_time_used(); + results.complexity_n += st.complexity_length_n(); + internal::Increment(&results.counters, st.counters); + } + manager->NotifyThreadComplete(); +} + +class BenchmarkRunner { + public: + BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_, + std::vector<BenchmarkReporter::Run>* complexity_reports_) + : b(b_), + complexity_reports(*complexity_reports_), + min_time(!IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time), + repeats(b.repetitions != 0 ? b.repetitions + : FLAGS_benchmark_repetitions), + has_explicit_iteration_count(b.iterations != 0), + pool(b.threads - 1), + iters(has_explicit_iteration_count ? b.iterations : 1) { + run_results.display_report_aggregates_only = + (FLAGS_benchmark_report_aggregates_only || + FLAGS_benchmark_display_aggregates_only); + run_results.file_report_aggregates_only = + FLAGS_benchmark_report_aggregates_only; + if (b.aggregation_report_mode != internal::ARM_Unspecified) { + run_results.display_report_aggregates_only = + (b.aggregation_report_mode & + internal::ARM_DisplayReportAggregatesOnly); + run_results.file_report_aggregates_only = + (b.aggregation_report_mode & internal::ARM_FileReportAggregatesOnly); + } + + for (int repetition_num = 0; repetition_num < repeats; repetition_num++) { + const bool is_the_first_repetition = repetition_num == 0; + DoOneRepetition(is_the_first_repetition); + } + + // Calculate additional statistics + run_results.aggregates_only = ComputeStats(run_results.non_aggregates); + + // Maybe calculate complexity report + if ((b.complexity != oNone) && b.last_benchmark_instance) { + auto additional_run_stats = ComputeBigO(complexity_reports); + run_results.aggregates_only.insert(run_results.aggregates_only.end(), + additional_run_stats.begin(), + additional_run_stats.end()); + complexity_reports.clear(); + } + } + + RunResults&& get_results() { return std::move(run_results); } + + private: + RunResults run_results; + + const benchmark::internal::BenchmarkInstance& b; + std::vector<BenchmarkReporter::Run>& complexity_reports; + + const double min_time; + const int repeats; + const bool has_explicit_iteration_count; + + std::vector<std::thread> pool; + + size_t iters; // preserved between repetitions! + // So only the first repetition has to find/calculate it, + // the other repetitions will just use that precomputed iteration count. + + struct IterationResults { + internal::ThreadManager::Result results; + size_t iters; + double seconds; + }; + IterationResults DoNIterations() { + VLOG(2) << "Running " << b.name << " for " << iters << "\n"; + + std::unique_ptr<internal::ThreadManager> manager; + manager.reset(new internal::ThreadManager(b.threads)); + + // Run all but one thread in separate threads + for (std::size_t ti = 0; ti < pool.size(); ++ti) { + pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti + 1), + manager.get()); + } + // And run one thread here directly. + // (If we were asked to run just one thread, we don't create new threads.) + // Yes, we need to do this here *after* we start the separate threads. + RunInThread(&b, iters, 0, manager.get()); + + // The main thread has finished. Now let's wait for the other threads. + manager->WaitForAllThreads(); + for (std::thread& thread : pool) thread.join(); + + IterationResults i; + // Acquire the measurements/counters from the manager, UNDER THE LOCK! + { + MutexLock l(manager->GetBenchmarkMutex()); + i.results = manager->results; + } + + // And get rid of the manager. + manager.reset(); + + // Adjust real/manual time stats since they were reported per thread. + i.results.real_time_used /= b.threads; + i.results.manual_time_used /= b.threads; + + VLOG(2) << "Ran in " << i.results.cpu_time_used << "/" + << i.results.real_time_used << "\n"; + + // So for how long were we running? + i.iters = iters; + // Base decisions off of real time if requested by this benchmark. + i.seconds = i.results.cpu_time_used; + if (b.use_manual_time) { + i.seconds = i.results.manual_time_used; + } else if (b.use_real_time) { + i.seconds = i.results.real_time_used; + } + + return i; + } + + size_t PredictNumItersNeeded(const IterationResults& i) const { + // See how much iterations should be increased by. + // Note: Avoid division by zero with max(seconds, 1ns). + double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9); + // If our last run was at least 10% of FLAGS_benchmark_min_time then we + // use the multiplier directly. + // Otherwise we use at most 10 times expansion. + // NOTE: When the last run was at least 10% of the min time the max + // expansion should be 14x. + bool is_significant = (i.seconds / min_time) > 0.1; + multiplier = is_significant ? multiplier : std::min(10.0, multiplier); + if (multiplier <= 1.0) multiplier = 2.0; + + // So what seems to be the sufficiently-large iteration count? Round up. + const size_t max_next_iters = + 0.5 + std::max(multiplier * i.iters, i.iters + 1.0); + // But we do have *some* sanity limits though.. + const size_t next_iters = std::min(max_next_iters, kMaxIterations); + + VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n"; + return next_iters; // round up before conversion to integer. + } + + bool ShouldReportIterationResults(const IterationResults& i) const { + // Determine if this run should be reported; + // Either it has run for a sufficient amount of time + // or because an error was reported. + return i.results.has_error_ || + i.iters >= kMaxIterations || // Too many iterations already. + i.seconds >= min_time || // The elapsed time is large enough. + // CPU time is specified but the elapsed real time greatly exceeds + // the minimum time. + // Note that user provided timers are except from this sanity check. + ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time); + } + + void DoOneRepetition(bool is_the_first_repetition) { + IterationResults i; + + // We *may* be gradually increasing the length (iteration count) + // of the benchmark until we decide the results are significant. + // And once we do, we report those last results and exit. + // Please do note that the if there are repetitions, the iteration count + // is *only* calculated for the *first* repetition, and other repetitions + // simply use that precomputed iteration count. + for (;;) { + i = DoNIterations(); + + // Do we consider the results to be significant? + // If we are doing repetitions, and the first repetition was already done, + // it has calculated the correct iteration time, so we have run that very + // iteration count just now. No need to calculate anything. Just report. + // Else, the normal rules apply. + const bool results_are_significant = !is_the_first_repetition || + has_explicit_iteration_count || + ShouldReportIterationResults(i); + + if (results_are_significant) break; // Good, let's report them! + + // Nope, bad iteration. Let's re-estimate the hopefully-sufficient + // iteration count, and run the benchmark again... + + iters = PredictNumItersNeeded(i); + assert(iters > i.iters && + "if we did more iterations than we want to do the next time, " + "then we should have accepted the current iteration run."); + } + + // Oh, one last thing, we need to also produce the 'memory measurements'.. + MemoryManager::Result memory_result; + size_t memory_iterations = 0; + if (memory_manager != nullptr) { + // Only run a few iterations to reduce the impact of one-time + // allocations in benchmarks that are not properly managed. + memory_iterations = std::min<size_t>(16, iters); + memory_manager->Start(); + std::unique_ptr<internal::ThreadManager> manager; + manager.reset(new internal::ThreadManager(1)); + RunInThread(&b, memory_iterations, 0, manager.get()); + manager->WaitForAllThreads(); + manager.reset(); + + memory_manager->Stop(&memory_result); + } + + // Ok, now actualy report. + BenchmarkReporter::Run report = CreateRunReport( + b, i.results, memory_iterations, memory_result, i.seconds); + + if (!report.error_occurred && b.complexity != oNone) + complexity_reports.push_back(report); + + run_results.non_aggregates.push_back(report); + } +}; + +} // end namespace + +RunResults RunBenchmark( + const benchmark::internal::BenchmarkInstance& b, + std::vector<BenchmarkReporter::Run>* complexity_reports) { + internal::BenchmarkRunner r(b, complexity_reports); + return r.get_results(); +} + +} // end namespace internal + +} // end namespace benchmark diff --git a/lib/libcxx/utils/google-benchmark/src/benchmark_runner.h b/lib/libcxx/utils/google-benchmark/src/benchmark_runner.h new file mode 100644 index 00000000000..96e8282a11a --- /dev/null +++ b/lib/libcxx/utils/google-benchmark/src/benchmark_runner.h @@ -0,0 +1,51 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef BENCHMARK_RUNNER_H_ +#define BENCHMARK_RUNNER_H_ + +#include "benchmark_api_internal.h" +#include "internal_macros.h" + +DECLARE_double(benchmark_min_time); + +DECLARE_int32(benchmark_repetitions); + +DECLARE_bool(benchmark_report_aggregates_only); + +DECLARE_bool(benchmark_display_aggregates_only); + +namespace benchmark { + +namespace internal { + +extern MemoryManager* memory_manager; + +struct RunResults { + std::vector<BenchmarkReporter::Run> non_aggregates; + std::vector<BenchmarkReporter::Run> aggregates_only; + + bool display_report_aggregates_only = false; + bool file_report_aggregates_only = false; +}; + +RunResults RunBenchmark( + const benchmark::internal::BenchmarkInstance& b, + std::vector<BenchmarkReporter::Run>* complexity_reports); + +} // namespace internal + +} // end namespace benchmark + +#endif // BENCHMARK_RUNNER_H_ diff --git a/lib/libcxx/utils/google-benchmark/src/colorprint.cc b/lib/libcxx/utils/google-benchmark/src/colorprint.cc index 2dec4a8b28b..fff6a98818b 100644 --- a/lib/libcxx/utils/google-benchmark/src/colorprint.cc +++ b/lib/libcxx/utils/google-benchmark/src/colorprint.cc @@ -25,7 +25,7 @@ #include "internal_macros.h" #ifdef BENCHMARK_OS_WINDOWS -#include <Windows.h> +#include <windows.h> #include <io.h> #else #include <unistd.h> diff --git a/lib/libcxx/utils/google-benchmark/src/complexity.cc b/lib/libcxx/utils/google-benchmark/src/complexity.cc index aafd538df21..6ef17660c95 100644 --- a/lib/libcxx/utils/google-benchmark/src/complexity.cc +++ b/lib/libcxx/utils/google-benchmark/src/complexity.cc @@ -73,8 +73,8 @@ std::string GetBigOString(BigO complexity) { // - time : Vector containing the times for the benchmark tests. // - fitting_curve : lambda expression (e.g. [](int64_t n) {return n; };). -// For a deeper explanation on the algorithm logic, look the README file at -// http://github.com/ismaelJimenez/Minimal-Cpp-Least-Squared-Fit +// For a deeper explanation on the algorithm logic, please refer to +// https://en.wikipedia.org/wiki/Least_squares#Least_squares,_regression_analysis_and_statistics LeastSq MinimalLeastSq(const std::vector<int64_t>& n, const std::vector<double>& time, @@ -182,12 +182,15 @@ std::vector<BenchmarkReporter::Run> ComputeBigO( result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity); result_real = MinimalLeastSq(n, real_time, result_cpu.complexity); } - std::string benchmark_name = - reports[0].benchmark_name.substr(0, reports[0].benchmark_name.find('/')); + + std::string run_name = reports[0].benchmark_name().substr( + 0, reports[0].benchmark_name().find('/')); // Get the data from the accumulator to BenchmarkReporter::Run's. Run big_o; - big_o.benchmark_name = benchmark_name + "_BigO"; + big_o.run_name = run_name; + big_o.run_type = BenchmarkReporter::Run::RT_Aggregate; + big_o.aggregate_name = "BigO"; big_o.iterations = 0; big_o.real_accumulated_time = result_real.coef; big_o.cpu_accumulated_time = result_cpu.coef; @@ -203,8 +206,10 @@ std::vector<BenchmarkReporter::Run> ComputeBigO( // Only add label to mean/stddev if it is same for all runs Run rms; + rms.run_name = run_name; big_o.report_label = reports[0].report_label; - rms.benchmark_name = benchmark_name + "_RMS"; + rms.run_type = BenchmarkReporter::Run::RT_Aggregate; + rms.aggregate_name = "RMS"; rms.report_label = big_o.report_label; rms.iterations = 0; rms.real_accumulated_time = result_real.rms / multiplier; diff --git a/lib/libcxx/utils/google-benchmark/src/console_reporter.cc b/lib/libcxx/utils/google-benchmark/src/console_reporter.cc index 48920ca7829..ca364727cb4 100644 --- a/lib/libcxx/utils/google-benchmark/src/console_reporter.cc +++ b/lib/libcxx/utils/google-benchmark/src/console_reporter.cc @@ -53,7 +53,7 @@ bool ConsoleReporter::ReportContext(const Context& context) { } void ConsoleReporter::PrintHeader(const Run& run) { - std::string str = FormatString("%-*s %13s %13s %10s", static_cast<int>(name_field_width_), + std::string str = FormatString("%-*s %13s %15s %12s", static_cast<int>(name_field_width_), "Benchmark", "Time", "CPU", "Iterations"); if(!run.counters.empty()) { if(output_options_ & OO_Tabular) { @@ -98,6 +98,21 @@ static void IgnoreColorPrint(std::ostream& out, LogColor, const char* fmt, va_end(args); } + +static std::string FormatTime(double time) { + // Align decimal places... + if (time < 1.0) { + return FormatString("%10.3f", time); + } + if (time < 10.0) { + return FormatString("%10.2f", time); + } + if (time < 100.0) { + return FormatString("%10.1f", time); + } + return FormatString("%10.0f", time); +} + void ConsoleReporter::PrintRunData(const Run& result) { typedef void(PrinterFn)(std::ostream&, LogColor, const char*, ...); auto& Out = GetOutputStream(); @@ -106,7 +121,7 @@ void ConsoleReporter::PrintRunData(const Run& result) { auto name_color = (result.report_big_o || result.report_rms) ? COLOR_BLUE : COLOR_GREEN; printer(Out, name_color, "%-*s ", name_field_width_, - result.benchmark_name.c_str()); + result.benchmark_name().c_str()); if (result.error_occurred) { printer(Out, COLOR_RED, "ERROR OCCURRED: \'%s\'", @@ -114,33 +129,24 @@ void ConsoleReporter::PrintRunData(const Run& result) { printer(Out, COLOR_DEFAULT, "\n"); return; } - // Format bytes per second - std::string rate; - if (result.bytes_per_second > 0) { - rate = StrCat(" ", HumanReadableNumber(result.bytes_per_second), "B/s"); - } - - // Format items per second - std::string items; - if (result.items_per_second > 0) { - items = - StrCat(" ", HumanReadableNumber(result.items_per_second), " items/s"); - } const double real_time = result.GetAdjustedRealTime(); const double cpu_time = result.GetAdjustedCPUTime(); + const std::string real_time_str = FormatTime(real_time); + const std::string cpu_time_str = FormatTime(cpu_time); + if (result.report_big_o) { std::string big_o = GetBigOString(result.complexity); - printer(Out, COLOR_YELLOW, "%10.2f %s %10.2f %s ", real_time, big_o.c_str(), + printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ", real_time, big_o.c_str(), cpu_time, big_o.c_str()); } else if (result.report_rms) { - printer(Out, COLOR_YELLOW, "%10.0f %% %10.0f %% ", real_time * 100, - cpu_time * 100); + printer(Out, COLOR_YELLOW, "%10.0f %-4s %10.0f %-4s ", real_time * 100, "%", + cpu_time * 100, "%"); } else { const char* timeLabel = GetTimeUnitString(result.time_unit); - printer(Out, COLOR_YELLOW, "%10.0f %s %10.0f %s ", real_time, timeLabel, - cpu_time, timeLabel); + printer(Out, COLOR_YELLOW, "%s %-4s %s %-4s ", real_time_str.c_str(), timeLabel, + cpu_time_str.c_str(), timeLabel); } if (!result.report_big_o && !result.report_rms) { @@ -150,7 +156,7 @@ void ConsoleReporter::PrintRunData(const Run& result) { for (auto& c : result.counters) { const std::size_t cNameLen = std::max(std::string::size_type(10), c.first.length()); - auto const& s = HumanReadableNumber(c.second.value, 1000); + auto const& s = HumanReadableNumber(c.second.value, c.second.oneK); if (output_options_ & OO_Tabular) { if (c.second.flags & Counter::kIsRate) { printer(Out, COLOR_DEFAULT, " %*s/s", cNameLen - 2, s.c_str()); @@ -164,14 +170,6 @@ void ConsoleReporter::PrintRunData(const Run& result) { } } - if (!rate.empty()) { - printer(Out, COLOR_DEFAULT, " %*s", 13, rate.c_str()); - } - - if (!items.empty()) { - printer(Out, COLOR_DEFAULT, " %*s", 18, items.c_str()); - } - if (!result.report_label.empty()) { printer(Out, COLOR_DEFAULT, " %s", result.report_label.c_str()); } diff --git a/lib/libcxx/utils/google-benchmark/src/csv_reporter.cc b/lib/libcxx/utils/google-benchmark/src/csv_reporter.cc index 4a641909d80..d2f1d27eb62 100644 --- a/lib/libcxx/utils/google-benchmark/src/csv_reporter.cc +++ b/lib/libcxx/utils/google-benchmark/src/csv_reporter.cc @@ -49,6 +49,8 @@ void CSVReporter::ReportRuns(const std::vector<Run>& reports) { // save the names of all the user counters for (const auto& run : reports) { for (const auto& cnt : run.counters) { + if (cnt.first == "bytes_per_second" || cnt.first == "items_per_second") + continue; user_counter_names_.insert(cnt.first); } } @@ -69,6 +71,8 @@ void CSVReporter::ReportRuns(const std::vector<Run>& reports) { // check that all the current counters are saved in the name set for (const auto& run : reports) { for (const auto& cnt : run.counters) { + if (cnt.first == "bytes_per_second" || cnt.first == "items_per_second") + continue; CHECK(user_counter_names_.find(cnt.first) != user_counter_names_.end()) << "All counters must be present in each run. " << "Counter named \"" << cnt.first @@ -88,7 +92,7 @@ void CSVReporter::PrintRunData(const Run& run) { // Field with embedded double-quote characters must be doubled and the field // delimited with double-quotes. - std::string name = run.benchmark_name; + std::string name = run.benchmark_name(); ReplaceAll(&name, "\"", "\"\""); Out << '"' << name << "\","; if (run.error_occurred) { @@ -117,12 +121,12 @@ void CSVReporter::PrintRunData(const Run& run) { } Out << ","; - if (run.bytes_per_second > 0.0) { - Out << run.bytes_per_second; + if (run.counters.find("bytes_per_second") != run.counters.end()) { + Out << run.counters.at("bytes_per_second"); } Out << ","; - if (run.items_per_second > 0.0) { - Out << run.items_per_second; + if (run.counters.find("items_per_second") != run.counters.end()) { + Out << run.counters.at("items_per_second"); } Out << ","; if (!run.report_label.empty()) { diff --git a/lib/libcxx/utils/google-benchmark/src/cycleclock.h b/lib/libcxx/utils/google-benchmark/src/cycleclock.h index 00d57641676..f5e37b011b9 100644 --- a/lib/libcxx/utils/google-benchmark/src/cycleclock.h +++ b/lib/libcxx/utils/google-benchmark/src/cycleclock.h @@ -41,7 +41,7 @@ extern "C" uint64_t __rdtsc(); #pragma intrinsic(__rdtsc) #endif -#ifndef BENCHMARK_OS_WINDOWS +#if !defined(BENCHMARK_OS_WINDOWS) || defined(BENCHMARK_OS_MINGW) #include <sys/time.h> #include <time.h> #endif diff --git a/lib/libcxx/utils/google-benchmark/src/internal_macros.h b/lib/libcxx/utils/google-benchmark/src/internal_macros.h index b7e9203ff60..5dbf4fd2752 100644 --- a/lib/libcxx/utils/google-benchmark/src/internal_macros.h +++ b/lib/libcxx/utils/google-benchmark/src/internal_macros.h @@ -11,9 +11,6 @@ #ifndef __has_feature #define __has_feature(x) 0 #endif -#ifndef __has_builtin -#define __has_builtin(x) 0 -#endif #if defined(__clang__) #if !defined(COMPILER_CLANG) @@ -43,6 +40,9 @@ #define BENCHMARK_OS_CYGWIN 1 #elif defined(_WIN32) #define BENCHMARK_OS_WINDOWS 1 + #if defined(__MINGW32__) + #define BENCHMARK_OS_MINGW 1 + #endif #elif defined(__APPLE__) #define BENCHMARK_OS_APPLE 1 #include "TargetConditionals.h" @@ -87,14 +87,6 @@ #define BENCHMARK_MAYBE_UNUSED #endif -#if defined(COMPILER_GCC) || __has_builtin(__builtin_unreachable) - #define BENCHMARK_UNREACHABLE() __builtin_unreachable() -#elif defined(COMPILER_MSVC) - #define BENCHMARK_UNREACHABLE() __assume(false) -#else - #define BENCHMARK_UNREACHABLE() ((void)0) -#endif - // clang-format on #endif // BENCHMARK_INTERNAL_MACROS_H_ diff --git a/lib/libcxx/utils/google-benchmark/src/json_reporter.cc b/lib/libcxx/utils/google-benchmark/src/json_reporter.cc index 611605af6b5..7d01e8e4e31 100644 --- a/lib/libcxx/utils/google-benchmark/src/json_reporter.cc +++ b/lib/libcxx/utils/google-benchmark/src/json_reporter.cc @@ -77,8 +77,15 @@ bool JSONReporter::ReportContext(const Context& context) { std::string walltime_value = LocalDateTimeString(); out << indent << FormatKV("date", walltime_value) << ",\n"; + out << indent << FormatKV("host_name", context.sys_info.name) << ",\n"; + if (Context::executable_name) { - out << indent << FormatKV("executable", Context::executable_name) << ",\n"; + // windows uses backslash for its path separator, + // which must be escaped in JSON otherwise it blows up conforming JSON + // decoders + std::string executable_name = Context::executable_name; + ReplaceAll(&executable_name, "\\", "\\\\"); + out << indent << FormatKV("executable", executable_name) << ",\n"; } CPUInfo const& info = context.cpu_info; @@ -111,6 +118,12 @@ bool JSONReporter::ReportContext(const Context& context) { } indent = std::string(4, ' '); out << indent << "],\n"; + out << indent << "\"load_avg\": ["; + for (auto it = info.load_avg.begin(); it != info.load_avg.end();) { + out << *it++; + if (it != info.load_avg.end()) out << ","; + } + out << "],\n"; #if defined(NDEBUG) const char build_type[] = "release"; @@ -154,7 +167,20 @@ void JSONReporter::Finalize() { void JSONReporter::PrintRunData(Run const& run) { std::string indent(6, ' '); std::ostream& out = GetOutputStream(); - out << indent << FormatKV("name", run.benchmark_name) << ",\n"; + out << indent << FormatKV("name", run.benchmark_name()) << ",\n"; + out << indent << FormatKV("run_name", run.run_name) << ",\n"; + out << indent << FormatKV("run_type", [&run]() -> const char* { + switch (run.run_type) { + case BenchmarkReporter::Run::RT_Iteration: + return "iteration"; + case BenchmarkReporter::Run::RT_Aggregate: + return "aggregate"; + } + BENCHMARK_UNREACHABLE(); + }()) << ",\n"; + if (run.run_type == BenchmarkReporter::Run::RT_Aggregate) { + out << indent << FormatKV("aggregate_name", run.aggregate_name) << ",\n"; + } if (run.error_occurred) { out << indent << FormatKV("error_occurred", run.error_occurred) << ",\n"; out << indent << FormatKV("error_message", run.error_message) << ",\n"; @@ -175,17 +201,16 @@ void JSONReporter::PrintRunData(Run const& run) { } else if (run.report_rms) { out << indent << FormatKV("rms", run.GetAdjustedCPUTime()); } - if (run.bytes_per_second > 0.0) { - out << ",\n" - << indent << FormatKV("bytes_per_second", run.bytes_per_second); - } - if (run.items_per_second > 0.0) { - out << ",\n" - << indent << FormatKV("items_per_second", run.items_per_second); - } + for (auto& c : run.counters) { out << ",\n" << indent << FormatKV(c.first, c.second); } + + if (run.has_memory_result) { + out << ",\n" << indent << FormatKV("allocs_per_iter", run.allocs_per_iter); + out << ",\n" << indent << FormatKV("max_bytes_used", run.max_bytes_used); + } + if (!run.report_label.empty()) { out << ",\n" << indent << FormatKV("label", run.report_label); } diff --git a/lib/libcxx/utils/google-benchmark/src/reporter.cc b/lib/libcxx/utils/google-benchmark/src/reporter.cc index 541661a25f0..59bc5f71023 100644 --- a/lib/libcxx/utils/google-benchmark/src/reporter.cc +++ b/lib/libcxx/utils/google-benchmark/src/reporter.cc @@ -22,6 +22,7 @@ #include <vector> #include "check.h" +#include "string_util.h" namespace benchmark { @@ -54,6 +55,14 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out, Out << "\n"; } } + if (!info.load_avg.empty()) { + Out << "Load Average: "; + for (auto It = info.load_avg.begin(); It != info.load_avg.end();) { + Out << StrFormat("%.2f", *It++); + if (It != info.load_avg.end()) Out << ", "; + } + Out << "\n"; + } if (info.scaling_enabled) { Out << "***WARNING*** CPU scaling is enabled, the benchmark " @@ -70,7 +79,16 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out, // No initializer because it's already initialized to NULL. const char *BenchmarkReporter::Context::executable_name; -BenchmarkReporter::Context::Context() : cpu_info(CPUInfo::Get()) {} +BenchmarkReporter::Context::Context() + : cpu_info(CPUInfo::Get()), sys_info(SystemInfo::Get()) {} + +std::string BenchmarkReporter::Run::benchmark_name() const { + std::string name = run_name; + if (run_type == RT_Aggregate) { + name += "_" + aggregate_name; + } + return name; +} double BenchmarkReporter::Run::GetAdjustedRealTime() const { double new_time = real_accumulated_time * GetTimeUnitMultiplier(time_unit); diff --git a/lib/libcxx/utils/google-benchmark/src/sleep.cc b/lib/libcxx/utils/google-benchmark/src/sleep.cc index 54aa04a4224..1512ac90f7e 100644 --- a/lib/libcxx/utils/google-benchmark/src/sleep.cc +++ b/lib/libcxx/utils/google-benchmark/src/sleep.cc @@ -21,7 +21,7 @@ #include "internal_macros.h" #ifdef BENCHMARK_OS_WINDOWS -#include <Windows.h> +#include <windows.h> #endif namespace benchmark { diff --git a/lib/libcxx/utils/google-benchmark/src/statistics.cc b/lib/libcxx/utils/google-benchmark/src/statistics.cc index 612dda2d1a7..e821aec18b7 100644 --- a/lib/libcxx/utils/google-benchmark/src/statistics.cc +++ b/lib/libcxx/utils/google-benchmark/src/statistics.cc @@ -91,13 +91,9 @@ std::vector<BenchmarkReporter::Run> ComputeStats( // Accumulators. std::vector<double> real_accumulated_time_stat; std::vector<double> cpu_accumulated_time_stat; - std::vector<double> bytes_per_second_stat; - std::vector<double> items_per_second_stat; real_accumulated_time_stat.reserve(reports.size()); cpu_accumulated_time_stat.reserve(reports.size()); - bytes_per_second_stat.reserve(reports.size()); - items_per_second_stat.reserve(reports.size()); // All repetitions should be run with the same number of iterations so we // can take this information from the first benchmark. @@ -123,13 +119,11 @@ std::vector<BenchmarkReporter::Run> ComputeStats( // Populate the accumulators. for (Run const& run : reports) { - CHECK_EQ(reports[0].benchmark_name, run.benchmark_name); + CHECK_EQ(reports[0].benchmark_name(), run.benchmark_name()); CHECK_EQ(run_iterations, run.iterations); if (run.error_occurred) continue; real_accumulated_time_stat.emplace_back(run.real_accumulated_time); cpu_accumulated_time_stat.emplace_back(run.cpu_accumulated_time); - items_per_second_stat.emplace_back(run.items_per_second); - bytes_per_second_stat.emplace_back(run.bytes_per_second); // user counters for (auto const& cnt : run.counters) { auto it = counter_stats.find(cnt.first); @@ -147,24 +141,43 @@ std::vector<BenchmarkReporter::Run> ComputeStats( } } + const double iteration_rescale_factor = + double(reports.size()) / double(run_iterations); + for (const auto& Stat : *reports[0].statistics) { // Get the data from the accumulator to BenchmarkReporter::Run's. Run data; - data.benchmark_name = reports[0].benchmark_name + "_" + Stat.name_; + data.run_name = reports[0].benchmark_name(); + data.run_type = BenchmarkReporter::Run::RT_Aggregate; + data.aggregate_name = Stat.name_; data.report_label = report_label; - data.iterations = run_iterations; + + // It is incorrect to say that an aggregate is computed over + // run's iterations, because those iterations already got averaged. + // Similarly, if there are N repetitions with 1 iterations each, + // an aggregate will be computed over N measurements, not 1. + // Thus it is best to simply use the count of separate reports. + data.iterations = reports.size(); data.real_accumulated_time = Stat.compute_(real_accumulated_time_stat); data.cpu_accumulated_time = Stat.compute_(cpu_accumulated_time_stat); - data.bytes_per_second = Stat.compute_(bytes_per_second_stat); - data.items_per_second = Stat.compute_(items_per_second_stat); + + // We will divide these times by data.iterations when reporting, but the + // data.iterations is not nessesairly the scale of these measurements, + // because in each repetition, these timers are sum over all the iterations. + // And if we want to say that the stats are over N repetitions and not + // M iterations, we need to multiply these by (N/M). + data.real_accumulated_time *= iteration_rescale_factor; + data.cpu_accumulated_time *= iteration_rescale_factor; data.time_unit = reports[0].time_unit; // user counters for (auto const& kv : counter_stats) { + // Do *NOT* rescale the custom counters. They are already properly scaled. const auto uc_stat = Stat.compute_(kv.second.s); - auto c = Counter(uc_stat, counter_stats[kv.first].c.flags); + auto c = Counter(uc_stat, counter_stats[kv.first].c.flags, + counter_stats[kv.first].c.oneK); data.counters[kv.first] = c; } diff --git a/lib/libcxx/utils/google-benchmark/src/string_util.h b/lib/libcxx/utils/google-benchmark/src/string_util.h index 4a5501273cf..fc5f8b0304b 100644 --- a/lib/libcxx/utils/google-benchmark/src/string_util.h +++ b/lib/libcxx/utils/google-benchmark/src/string_util.h @@ -12,7 +12,11 @@ void AppendHumanReadable(int n, std::string* str); std::string HumanReadableNumber(double n, double one_k = 1024.0); -std::string StrFormat(const char* format, ...); +#ifdef __GNUC__ +__attribute__((format(printf, 1, 2))) +#endif +std::string +StrFormat(const char* format, ...); inline std::ostream& StrCatImp(std::ostream& out) BENCHMARK_NOEXCEPT { return out; diff --git a/lib/libcxx/utils/google-benchmark/src/sysinfo.cc b/lib/libcxx/utils/google-benchmark/src/sysinfo.cc index 73064b97ba2..c0c07e5e62a 100644 --- a/lib/libcxx/utils/google-benchmark/src/sysinfo.cc +++ b/lib/libcxx/utils/google-benchmark/src/sysinfo.cc @@ -15,10 +15,11 @@ #include "internal_macros.h" #ifdef BENCHMARK_OS_WINDOWS -#include <Shlwapi.h> +#include <shlwapi.h> #undef StrCat // Don't let StrCat in string_util.h be renamed to lstrcatA -#include <VersionHelpers.h> -#include <Windows.h> +#include <versionhelpers.h> +#include <windows.h> +#include <codecvt> #else #include <fcntl.h> #ifndef BENCHMARK_OS_FUCHSIA @@ -52,6 +53,7 @@ #include <limits> #include <memory> #include <sstream> +#include <locale> #include "check.h" #include "cycleclock.h" @@ -288,7 +290,7 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizesMacOSX() { std::string name; std::string type; int level; - size_t num_sharing; + uint64_t num_sharing; } Cases[] = {{"hw.l1dcachesize", "Data", 1, CacheCounts[1]}, {"hw.l1icachesize", "Instruction", 1, CacheCounts[1]}, {"hw.l2cachesize", "Unified", 2, CacheCounts[2]}, @@ -366,6 +368,35 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizes() { #endif } +std::string GetSystemName() { +#if defined(BENCHMARK_OS_WINDOWS) + std::string str; + const unsigned COUNT = MAX_COMPUTERNAME_LENGTH+1; + TCHAR hostname[COUNT] = {'\0'}; + DWORD DWCOUNT = COUNT; + if (!GetComputerName(hostname, &DWCOUNT)) + return std::string(""); +#ifndef UNICODE + str = std::string(hostname, DWCOUNT); +#else + //Using wstring_convert, Is deprecated in C++17 + using convert_type = std::codecvt_utf8<wchar_t>; + std::wstring_convert<convert_type, wchar_t> converter; + std::wstring wStr(hostname, DWCOUNT); + str = converter.to_bytes(wStr); +#endif + return str; +#else // defined(BENCHMARK_OS_WINDOWS) +#ifdef BENCHMARK_OS_MACOSX //Mac Doesnt have HOST_NAME_MAX defined +#define HOST_NAME_MAX 64 +#endif + char hostname[HOST_NAME_MAX]; + int retVal = gethostname(hostname, HOST_NAME_MAX); + if (retVal != 0) return std::string(""); + return std::string(hostname); +#endif // Catch-all POSIX block. +} + int GetNumCPUs() { #ifdef BENCHMARK_HAS_SYSCTL int NumCPU = -1; @@ -404,7 +435,13 @@ int GetNumCPUs() { if (ln.empty()) continue; size_t SplitIdx = ln.find(':'); std::string value; +#if defined(__s390__) + // s390 has another format in /proc/cpuinfo + // it needs to be parsed differently + if (SplitIdx != std::string::npos) value = ln.substr(Key.size()+1,SplitIdx-Key.size()-1); +#else if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1); +#endif if (ln.size() >= Key.size() && ln.compare(0, Key.size(), Key) == 0) { NumCPUs++; if (!value.empty()) { @@ -571,6 +608,24 @@ double GetCPUCyclesPerSecond() { return static_cast<double>(cycleclock::Now() - start_ticks); } +std::vector<double> GetLoadAvg() { +#if defined BENCHMARK_OS_FREEBSD || defined(BENCHMARK_OS_LINUX) || \ + defined BENCHMARK_OS_MACOSX || defined BENCHMARK_OS_NETBSD || \ + defined BENCHMARK_OS_OPENBSD + constexpr int kMaxSamples = 3; + std::vector<double> res(kMaxSamples, 0.0); + const int nelem = getloadavg(res.data(), kMaxSamples); + if (nelem < 1) { + res.clear(); + } else { + res.resize(nelem); + } + return res; +#else + return {}; +#endif +} + } // end namespace const CPUInfo& CPUInfo::Get() { @@ -582,6 +637,14 @@ CPUInfo::CPUInfo() : num_cpus(GetNumCPUs()), cycles_per_second(GetCPUCyclesPerSecond()), caches(GetCacheSizes()), - scaling_enabled(CpuScalingEnabled(num_cpus)) {} + scaling_enabled(CpuScalingEnabled(num_cpus)), + load_avg(GetLoadAvg()) {} + + +const SystemInfo& SystemInfo::Get() { + static const SystemInfo* info = new SystemInfo(); + return *info; +} +SystemInfo::SystemInfo() : name(GetSystemName()) {} } // end namespace benchmark diff --git a/lib/libcxx/utils/google-benchmark/src/thread_manager.h b/lib/libcxx/utils/google-benchmark/src/thread_manager.h index 82b4d72b62f..6e274c7ea6b 100644 --- a/lib/libcxx/utils/google-benchmark/src/thread_manager.h +++ b/lib/libcxx/utils/google-benchmark/src/thread_manager.h @@ -42,8 +42,6 @@ class ThreadManager { double real_time_used = 0; double cpu_time_used = 0; double manual_time_used = 0; - int64_t bytes_processed = 0; - int64_t items_processed = 0; int64_t complexity_n = 0; std::string report_label_; std::string error_message_; diff --git a/lib/libcxx/utils/google-benchmark/src/timers.cc b/lib/libcxx/utils/google-benchmark/src/timers.cc index 2010e2450b4..7613ff92c6e 100644 --- a/lib/libcxx/utils/google-benchmark/src/timers.cc +++ b/lib/libcxx/utils/google-benchmark/src/timers.cc @@ -16,10 +16,10 @@ #include "internal_macros.h" #ifdef BENCHMARK_OS_WINDOWS -#include <Shlwapi.h> +#include <shlwapi.h> #undef StrCat // Don't let StrCat in string_util.h be renamed to lstrcatA -#include <VersionHelpers.h> -#include <Windows.h> +#include <versionhelpers.h> +#include <windows.h> #else #include <fcntl.h> #ifndef BENCHMARK_OS_FUCHSIA |