This is the code in Python that generates log-spaces values at a very quick time:
import numpy
print(numpy.logspace(0,1,num=10000000))
My try to simulate its output in C++, is the following:
#include <iostream>
#include <cmath>
#include <vector>
std::vector<double> logspace (const double &a, const double &b, const int &k)
{
std::vector<double> logspace;
for (int i = 0; i < k; i++)
{
logspace.push_back(pow(10, i * (b - a) / (k - 1)));
}
return logspace;
}
void logspace_print (std::vector<double> logspace)
{
for (auto ls : logspace)
{
std::cout << ls << "\n";
}
std::cout << "\n";
}
int main ()
{
std::vector<double> my_ls = logspace(0, 1, 10000000);
logspace_print(my_ls);
}
Waiver of floating-points arithmetic, using the function pow(., .) and a for-loop (and maybe lots of other reasons), makes my code as a naive one such as its run-time is hugely faint with respect to the Python's one. I saw recommendations at Is there something like numpy.logspace in C++? also. But, there is not mentionable significant difference. So, how can I modify my code or write a new one comparable with python's version?
Interesting question! My answer has the different versions of the functions at the top. Below is only the benchmarking code. Use google-benchmark as the library.
std::endl's flush, which is good! Furthermore, printf might be faster than std::cout. Also take a look at fmtlib 2. It is fast and easy to use.logspace_v3 in my version.) It consists of first running linspace and then taking to the power of 10 in-place. -march=native -mtune=native, and fast-math) vectorization should kick in. But I don't believe it does. Here is some Godbolt with vectorization (Line 590) 3.pow call. Note that this accumulates floating point error and leads to inaccurate results.
#include <algorithm>
#include <benchmark/benchmark.h>
#include <cmath>
#include <iostream>
#include <numeric>
#include <vector>
#include <gtest/gtest.h>
std::vector<double> logspace(double a, double b, int k) {
std::vector<double> logspace;
for (int i = 0; i < k; i++) {
logspace.push_back(pow(10, i * (b - a) / (k - 1)));
}
return logspace;
}
// Pre-allocate the correct size using .reserve()
std::vector<double> logspace_v1(double a, double b, int k) {
std::vector<double> logspace;
logspace.reserve(k);
for (int i = 0; i < k; i++) {
logspace.push_back(pow(10, i * (b - a) / (k - 1)));
}
return logspace;
}
/// Manually extract the constant factor.
std::vector<double> logspace_v2(double a, double b, int k) {
std::vector<double> logspace;
logspace.reserve(k);
const auto exp_scale = (b - a) / (k - 1);
for (int i = 0; i < k; i++) {
logspace.push_back(pow(10, i * exp_scale));
}
return logspace;
}
/// Copy the impl behavior of numpy.linspace: First linspace then power.
std::vector<double> logspace_v3(double a, double b, int k) {
/*
y = linspace(start, stop, num=num, endpoint=endpoint, axis=axis)
if dtype is None:
return _nx.power(base, y)
return _nx.power(base, y).astype(dtype, copy=False)
*/
const auto exp_scale = (b - a) / (k - 1);
std::vector<double> logspace;
logspace.reserve(k);
for (int i = 0; i < k; i++) {
logspace.push_back(i * exp_scale);
}
std::for_each(logspace.begin(), logspace.end(),
[](double &x) { x = pow(10, x); });
return logspace;
}
/// Improve on v3 by applying pow directly
std::vector<double> logspace_v4(double a, double b, int k) {
const auto exp_scale = (b - a) / (k - 1);
std::vector<double> logspace(k, 0.);
std::generate(logspace.begin(), logspace.end(),
[n = -1, exp_scale]() mutable {
n++;
return pow(10, n * exp_scale);
});
return logspace;
}
/// Use generate_n : First linspace then power.
std::vector<double> logspace_v5(double a, double b, int k) {
const auto exp_scale = (b - a) / (k - 1);
std::vector<double> logspace(k, 0.);
std::iota(logspace.begin(), logspace.end(), 0);
std::for_each(logspace.begin(), logspace.end(),
[exp_scale](double &x) { x *= exp_scale; });
std::for_each(logspace.begin(), logspace.end(),
[](double &x) { x = pow(10, x); });
return logspace;
}
std::vector<double> logspace_v6(double a, double b, int k) {
const auto exp_scale = (b - a) / (k - 1);
const auto factor = pow(10, exp_scale);
std::vector<double> logspace;
logspace.reserve(k);
// val = pow(b, i * exp_scale);
// = pow(pow(b, exp_scale), i);
// = pow(f, i); with f := pow(b, exp_scale);
// next = cur * f;
// first = pow(b, a);
double val = pow(10, a);
for (int i = 0; i < k; i++) {
logspace.push_back(val);
val *= factor;
}
return logspace;
}
template <std::vector<double> (*F)(double, double, int)>
static void LogspaceBench(benchmark::State &state) {
for (auto _ : state) {
benchmark::DoNotOptimize(F(0, 1, state.range(0)));
}
}
BENCHMARK_TEMPLATE(LogspaceBench, logspace)->Arg(1000);
BENCHMARK_TEMPLATE(LogspaceBench, logspace_v1)->Arg(1000);
BENCHMARK_TEMPLATE(LogspaceBench, logspace_v2)->Arg(1000);
BENCHMARK_TEMPLATE(LogspaceBench, logspace_v3)->Arg(1000)->Arg(10000000);
BENCHMARK_TEMPLATE(LogspaceBench, logspace_v4)->Arg(1000);
BENCHMARK_TEMPLATE(LogspaceBench, logspace_v5)->Arg(1000);
BENCHMARK_TEMPLATE(LogspaceBench, logspace_v6)->Arg(1000)->Arg(10000000);
class LogspaceTest
: public testing::TestWithParam<
std::function<std::vector<double>(double, double, int)>> {};
TEST_P(LogspaceTest, IsSame) {
auto func = GetParam();
const auto actual = func(0, 1., 1000);
const auto expected = logspace(0., 1., 1000);
// TODO: Buggy with (3, 70, 1000) and (0, 1, 1000)
ASSERT_EQ(expected.size(), actual.size());
for (int i = 0; i < expected.size(); i++) {
ASSERT_DOUBLE_EQ(actual[i], expected[i]) << i;
}
}
INSTANTIATE_TEST_SUITE_P(InstantiationName, LogspaceTest,
testing::Values(logspace, logspace_v1, logspace_v2,
logspace_v3, logspace_v4, logspace_v5,
logspace_v6));
int main(int argc, char **argv) {
::benchmark::Initialize(&argc, argv);
::benchmark::RunSpecifiedBenchmarks();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
There are at least three obvious optimizations that can be easily made to the shown code.
1) Compile in C++17 mode to get guaranteed copy elision when returning from logspace.
2)
std::vector<double> logspace;
for (int i = 0; i < k; i++)
Use logspace.reserve() to preallocate the vector, to avoid useless repeated reallocations, while this vector gets populated.
3)
void logspace_print (std::vector<double> logspace)
Passing by value here creates an entire duplicate copy of the vector, for no useful purpose whatsoever. Change this function so that it takes the logspace parameter by reference.
There's one possible micro-optimization that may or may not make any difference:
logspace.push_back(pow(10, i * (b - a) / (k - 1)));
The "(b-a)/(k-1)" part of this formula is constant and can be unrolled out of the loop. I would, though, expect the compiler to do it on its own, it's a fairly basic optimization.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With