Suppose I have <pre class="prettyprint"><code>std::vector<T1> vec1 {/* filled with T1's */}; std::vector<T2> vec2 {/* filled with T2's */}; </code></pre> and some function <code>T1 f(T2)</code> which could of course be a lambda. What is the optimal way to concatenate <code>vec1</code> and <code>vec2</code> whilst applying <code>f</code> to each <code>T2</code> in <code>vec2</code>? The apparently obvious solution is <code>std::transform</code>, i.e. <pre class="prettyprint"><code>vec1.reserve(vec1.size() + vec2.size()); std::transform(vec2.begin(), vec2.end(), std::back_inserter(vec1), f); </code></pre> but I say this is not optimal as <code>std::back_inserter</code> must make an unnecessary capacity check on each inserted element. What would be optimal is something like <pre class="prettyprint"><code>vec1.insert(vec1.end(), vec2.begin(), vec2.end(), f); </code></pre> which could get away with a single capacity check. Sadly this is not valid C++. Essentially this is the same reason why <code>std::vector::insert</code> is optimal for vector concatenation, see this question and the comments in this question for further discussion on this point. So: <ol> <li>Is <code>std::transform</code> the optimal method using the STL?</li> <li>If so, can we do better?</li> <li>Is there a good reason why the <code>insert</code> function described above was left out of the STL?</li> </ol> UPDATE I've had a go at verifying if the multiple capacity checks do have any noticeable cost. To do this I basically just pass the id function (<code>f(x) = x</code>) to the <code>std::transform</code> and <code>push_back</code> methods discussed in the answers. The full code is: <pre class="prettyprint"><code>#include <iostream> #include <vector> #include <iterator> #include <algorithm> #include <cstdint> #include <chrono> #include <numeric> #include <random> using std::size_t; std::vector<int> generate_random_ints(size_t n) { std::default_random_engine generator; auto seed1 = std::chrono::system_clock::now().time_since_epoch().count(); generator.seed((unsigned) seed1); std::uniform_int_distribution<int> uniform {}; std::vector<int> v(n); std::generate_n(v.begin(), n, [&] () { return uniform(generator); }); return v; } template <typename D=std::chrono::nanoseconds, typename F> D benchmark(F f, unsigned num_tests) { D total {0}; for (unsigned i = 0; i < num_tests; ++i) { auto start = std::chrono::system_clock::now(); f(); auto end = std::chrono::system_clock::now(); total += std::chrono::duration_cast<D>(end - start); } return D {total / num_tests}; } template <typename T> void std_insert(std::vector<T> vec1, const std::vector<T> &vec2) { vec1.insert(vec1.end(), vec2.begin(), vec2.end()); } template <typename T1, typename T2, typename UnaryOperation> void push_back_concat(std::vector<T1> vec1, const std::vector<T2> &vec2, UnaryOperation op) { vec1.reserve(vec1.size() + vec2.size()); for (const auto& x : vec2) { vec1.push_back(op(x)); } } template <typename T1, typename T2, typename UnaryOperation> void transform_concat(std::vector<T1> vec1, const std::vector<T2> &vec2, UnaryOperation op) { vec1.reserve(vec1.size() + vec2.size()); std::transform(vec2.begin(), vec2.end(), std::back_inserter(vec1), op); } int main(int argc, char **argv) { unsigned num_tests {1000}; size_t vec1_size {10000000}; size_t vec2_size {10000000}; auto vec1 = generate_random_ints(vec1_size); auto vec2 = generate_random_ints(vec1_size); auto f_std_insert = [&vec1, &vec2] () { std_insert(vec1, vec2); }; auto f_push_back_id = [&vec1, &vec2] () { push_back_concat(vec1, vec2, [] (int i) { return i; }); }; auto f_transform_id = [&vec1, &vec2] () { transform_concat(vec1, vec2, [] (int i) { return i; }); }; auto std_insert_time = benchmark<std::chrono::milliseconds>(f_std_insert, num_tests).count(); auto push_back_id_time = benchmark<std::chrono::milliseconds>(f_push_back_id, num_tests).count(); auto transform_id_time = benchmark<std::chrono::milliseconds>(f_transform_id, num_tests).count(); std::cout << "std_insert: " << std_insert_time << "ms" << std::endl; std::cout << "push_back_id: " << push_back_id_time << "ms" << std::endl; std::cout << "transform_id: " << transform_id_time << "ms" << std::endl; return 0; } </code></pre> Compiled with: <pre class="prettyprint"><code>g++ vector_insert_demo.cpp -std=c++11 -O3 -o vector_insert_demo </code></pre> Output: <pre class="prettyprint"><code>std_insert: 44ms push_back_id: 61ms transform_id: 61ms </code></pre> The compiler will have inlined the lambda, so that cost can be safely be discounted. Unless anyone else has a viable explanation for these results (or is willing to check the assembly), I think it's reasonable to conclude there is a noticeable cost of the multiple capacity checks.

UPDATE: The performance difference is due to the <code>reserve()</code> calls, which, in libstdc++ at least, make the capacity be exactly what you request instead of using the exponential growth factor. <hr> I did some timing tests, with interesting results. Using <code>std::vector::insert</code> along with <code>boost::transform_iterator</code> was the fastest way I found by a large margin: Version 1: <pre class="prettyprint"><code>void appendTransformed1( std::vector<int> &vec1, const std::vector<float> &vec2 ) { auto v2begin = boost::make_transform_iterator(vec2.begin(),f); auto v2end = boost::make_transform_iterator(vec2.end(),f); vec1.insert(vec1.end(),v2begin,v2end); } </code></pre> Version 2: <pre class="prettyprint"><code>void appendTransformed2( std::vector<int> &vec1, const std::vector<float> &vec2 ) { vec1.reserve(vec1.size()+vec2.size()); for (auto x : vec2) { vec1.push_back(f(x)); } } </code></pre> Version 3: <pre class="prettyprint"><code>void appendTransformed3( std::vector<int> &vec1, const std::vector<float> &vec2 ) { vec1.reserve(vec1.size()+vec2.size()); std::transform(vec2.begin(),vec2.end(),std::inserter(vec1,vec1.end()),f); } </code></pre> Timing: <pre class="prettyprint"> Version 1: 0.59s Version 2: 8.22s Version 3: 8.42s </pre> main.cpp: <pre class="prettyprint"><code>#include <algorithm> #include <cassert> #include <chrono> #include <iterator> #include <iostream> #include <random> #include <vector> #include "appendtransformed.hpp" using std::cerr; template <typename Engine> static std::vector<int> randomInts(Engine &engine,size_t n) { auto distribution = std::uniform_int_distribution<int>(0,999); auto generator = [&]{return distribution(engine);}; auto vec = std::vector<int>(); std::generate_n(std::inserter(vec,vec.end()),n,generator); return vec; } template <typename Engine> static std::vector<float> randomFloats(Engine &engine,size_t n) { auto distribution = std::uniform_real_distribution<float>(0,1000); auto generator = [&]{return distribution(engine);}; auto vec = std::vector<float>(); std::generate_n(std::inserter(vec,vec.end()),n,generator); return vec; } static auto appendTransformedFunction(int version) -> void(*)(std::vector<int>&,const std::vector<float> &) { switch (version) { case 1: return appendTransformed1; case 2: return appendTransformed2; case 3: return appendTransformed3; default: cerr << "Unknown version: " << version << "\n"; exit(EXIT_FAILURE); } return 0; } int main(int argc,char **argv) { if (argc!=2) { cerr << "Usage: appendtest (1|2|3)\n"; exit(EXIT_FAILURE); } auto version = atoi(argv[1]); auto engine = std::default_random_engine(); auto vec1_size = 1000000u; auto vec2_size = 1000000u; auto count = 100; auto vec1 = randomInts(engine,vec1_size); auto vec2 = randomFloats(engine,vec2_size); namespace chrono = std::chrono; using chrono::system_clock; auto appendTransformed = appendTransformedFunction(version); auto start_time = system_clock::now(); for (auto i=0; i!=count; ++i) { appendTransformed(vec1,vec2); } auto end_time = system_clock::now(); assert(vec1.size() == vec1_size+count*vec2_size); auto sum = std::accumulate(vec1.begin(),vec1.end(),0u); auto elapsed_seconds = chrono::duration<float>(end_time-start_time).count(); cerr << "Using version " << version << ":\n"; cerr << " sum=" << sum << "\n"; cerr << " elapsed: " << elapsed_seconds << "s\n"; } </code></pre> Compiler: g++ 4.9.1 Options: -std=c++11 -O2

What is the optimal way to concatenate two vectors whilst transforming elements of one vector?

Suppose I have

std::vector<T1> vec1 {/* filled with T1's */};
std::vector<T2> vec2 {/* filled with T2's */};

and some function T1 f(T2) which could of course be a lambda. What is the optimal way to concatenate vec1 and vec2 whilst applying f to each T2 in vec2?

The apparently obvious solution is std::transform, i.e.

vec1.reserve(vec1.size() + vec2.size());
std::transform(vec2.begin(), vec2.end(), std::back_inserter(vec1), f);

but I say this is not optimal as std::back_inserter must make an unnecessary capacity check on each inserted element. What would be optimal is something like

vec1.insert(vec1.end(), vec2.begin(), vec2.end(), f);

which could get away with a single capacity check. Sadly this is not valid C++. Essentially this is the same reason why std::vector::insert is optimal for vector concatenation, see this question and the comments in this question for further discussion on this point.

So:

Is std::transform the optimal method using the STL?
If so, can we do better?
Is there a good reason why the insert function described above was left out of the STL?

UPDATE

I've had a go at verifying if the multiple capacity checks do have any noticeable cost. To do this I basically just pass the id function (f(x) = x) to the std::transform and push_back methods discussed in the answers. The full code is:

#include <iostream>
#include <vector>
#include <iterator>
#include <algorithm>
#include <cstdint>
#include <chrono>
#include <numeric>
#include <random>

using std::size_t;

std::vector<int> generate_random_ints(size_t n)
{
    std::default_random_engine generator;
    auto seed1 = std::chrono::system_clock::now().time_since_epoch().count();
    generator.seed((unsigned) seed1);
    std::uniform_int_distribution<int> uniform {};
    std::vector<int> v(n);
    std::generate_n(v.begin(), n, [&] () { return uniform(generator); });
    return v;
}

template <typename D=std::chrono::nanoseconds, typename F>
D benchmark(F f, unsigned num_tests)
{
    D total {0};
    for (unsigned i = 0; i < num_tests; ++i) {
        auto start = std::chrono::system_clock::now();
        f();
        auto end = std::chrono::system_clock::now();
        total += std::chrono::duration_cast<D>(end - start);
    }
    return D {total / num_tests};
}

template <typename T>
void std_insert(std::vector<T> vec1, const std::vector<T> &vec2)
{
    vec1.insert(vec1.end(), vec2.begin(), vec2.end());
}

template <typename T1, typename T2, typename UnaryOperation>
void push_back_concat(std::vector<T1> vec1, const std::vector<T2> &vec2, UnaryOperation op)
{
    vec1.reserve(vec1.size() + vec2.size());
    for (const auto& x : vec2) {
        vec1.push_back(op(x));
    }
}

template <typename T1, typename T2, typename UnaryOperation>
void transform_concat(std::vector<T1> vec1, const std::vector<T2> &vec2, UnaryOperation op)
{
    vec1.reserve(vec1.size() + vec2.size());
    std::transform(vec2.begin(), vec2.end(), std::back_inserter(vec1), op);
}

int main(int argc, char **argv)
{
    unsigned num_tests {1000};
    size_t vec1_size {10000000};
    size_t vec2_size {10000000};

    auto vec1 = generate_random_ints(vec1_size);
    auto vec2 = generate_random_ints(vec1_size);

    auto f_std_insert = [&vec1, &vec2] () {
        std_insert(vec1, vec2);
    };
    auto f_push_back_id = [&vec1, &vec2] () {
        push_back_concat(vec1, vec2, [] (int i) { return i; });
    };
    auto f_transform_id = [&vec1, &vec2] () {
        transform_concat(vec1, vec2, [] (int i) { return i; });
    };

    auto std_insert_time   = benchmark<std::chrono::milliseconds>(f_std_insert, num_tests).count();
    auto push_back_id_time = benchmark<std::chrono::milliseconds>(f_push_back_id, num_tests).count();
    auto transform_id_time = benchmark<std::chrono::milliseconds>(f_transform_id, num_tests).count();

    std::cout << "std_insert: " << std_insert_time << "ms" << std::endl;
    std::cout << "push_back_id: " << push_back_id_time << "ms" << std::endl;
    std::cout << "transform_id: " << transform_id_time << "ms" << std::endl;

    return 0;
}

Compiled with:

g++ vector_insert_demo.cpp -std=c++11 -O3 -o vector_insert_demo

Output:

std_insert: 44ms
push_back_id: 61ms
transform_id: 61ms

The compiler will have inlined the lambda, so that cost can be safely be discounted. Unless anyone else has a viable explanation for these results (or is willing to check the assembly), I think it's reasonable to conclude there is a noticeable cost of the multiple capacity checks.

How do you concatenate vectors?

The concatenation of vectors can be done by using combination function c. For example, if we have three vectors x, y, z then the concatenation of these vectors can be done as c(x,y,z). Also, we can concatenate different types of vectors at the same time using the same same function.

What effect does concatenate function has on vectors explain with an example?

Concatenate function: This is a function that combines its arguments. This method combines the arguments and results in a vector. All the arguments are converted to a common type that is the return value type. This function is also used to convert the array into vectors.

How do you concatenate a vector in C++?

C++ std::vector Concatenating VectorsOne std::vector can be append to another by using the member function insert() : std::vector<int> a = {0, 1, 2, 3, 4}; std::vector<int> b = {5, 6, 7, 8, 9}; a. insert(a. end(), b.

How do you add elements from one vector to another?

Appending a vector elements to another vector To insert/append a vector's elements to another vector, we use vector::insert() function. Syntax: //inserting elements from other containers vector::insert(iterator position, iterator start_position, iterator end_position);

UPDATE: The performance difference is due to the reserve() calls, which, in libstdc++ at least, make the capacity be exactly what you request instead of using the exponential growth factor.

I did some timing tests, with interesting results. Using std::vector::insert along with boost::transform_iterator was the fastest way I found by a large margin:

Version 1:

void
  appendTransformed1(
    std::vector<int> &vec1,
    const std::vector<float> &vec2
  )
{
  auto v2begin = boost::make_transform_iterator(vec2.begin(),f);
  auto v2end   = boost::make_transform_iterator(vec2.end(),f);
  vec1.insert(vec1.end(),v2begin,v2end);
}

Version 2:

void
  appendTransformed2(
    std::vector<int> &vec1,
    const std::vector<float> &vec2
  )
{
  vec1.reserve(vec1.size()+vec2.size());
  for (auto x : vec2) {
    vec1.push_back(f(x));
  }
}

Version 3:

void
  appendTransformed3(
    std::vector<int> &vec1,
    const std::vector<float> &vec2
  )
{
  vec1.reserve(vec1.size()+vec2.size());
  std::transform(vec2.begin(),vec2.end(),std::inserter(vec1,vec1.end()),f);
}

Timing:

    Version 1: 0.59s
    Version 2: 8.22s
    Version 3: 8.42s

main.cpp:

#include <algorithm>
#include <cassert>
#include <chrono>
#include <iterator>
#include <iostream>
#include <random>
#include <vector>
#include "appendtransformed.hpp"

using std::cerr;

template <typename Engine>
static std::vector<int> randomInts(Engine &engine,size_t n)
{
  auto distribution = std::uniform_int_distribution<int>(0,999);
  auto generator = [&]{return distribution(engine);};
  auto vec = std::vector<int>();
  std::generate_n(std::inserter(vec,vec.end()),n,generator);
  return vec;
}

template <typename Engine>
static std::vector<float> randomFloats(Engine &engine,size_t n)
{
  auto distribution = std::uniform_real_distribution<float>(0,1000);
  auto generator = [&]{return distribution(engine);};
  auto vec = std::vector<float>();
  std::generate_n(std::inserter(vec,vec.end()),n,generator);
  return vec;
}

static auto
  appendTransformedFunction(int version) ->
    void(*)(std::vector<int>&,const std::vector<float> &)
{
  switch (version) {
    case 1: return appendTransformed1;
    case 2: return appendTransformed2;
    case 3: return appendTransformed3;
    default:
      cerr << "Unknown version: " << version << "\n";
      exit(EXIT_FAILURE);
  }

  return 0;
}

int main(int argc,char **argv)
{
  if (argc!=2) {
    cerr << "Usage: appendtest (1|2|3)\n";
    exit(EXIT_FAILURE);
  }
  auto version = atoi(argv[1]);
  auto engine = std::default_random_engine();
  auto vec1_size = 1000000u;
  auto vec2_size = 1000000u;
  auto count = 100;
  auto vec1 = randomInts(engine,vec1_size);
  auto vec2 = randomFloats(engine,vec2_size);
  namespace chrono = std::chrono;
  using chrono::system_clock;
  auto appendTransformed = appendTransformedFunction(version);
  auto start_time = system_clock::now();
  for (auto i=0; i!=count; ++i) {
    appendTransformed(vec1,vec2);
  }
  auto end_time = system_clock::now();
  assert(vec1.size() == vec1_size+count*vec2_size);
  auto sum = std::accumulate(vec1.begin(),vec1.end(),0u);
  auto elapsed_seconds = chrono::duration<float>(end_time-start_time).count();

  cerr << "Using version " << version << ":\n";
  cerr << "  sum=" << sum << "\n";
  cerr << "  elapsed: " << elapsed_seconds << "s\n";
}

Compiler: g++ 4.9.1

Options: -std=c++11 -O2

What is the optimal way to concatenate two vectors whilst transforming elements of one vector?

Tags:

c++

stl

vector

Daniel

People also ask

1 Answers

Vaughn Cato

Recent Activity

Donate For Us

What is the optimal way to concatenate two vectors whilst transforming elements of one vector?

Tags:

c++

stl

vector

Daniel

People also ask

1 Answers

Vaughn Cato

Related questions

Recent Activity

Donate For Us