Static Polymorphism with boost variant single visitor vs multi visitor vs dynamic polymorphism

Tags:

I am comparing the performance of the following methods of C++ Polymorphism:

Method [1]. static polymorphism using boost variants with a separate visitor for each method Method [2]. static polymorphism using boost variants with a single visitor that calls different method using method overloading Method [3]. Plain old dynamic polymorphism

Platform: - Intel x86 64 bit Red Hat modern multi-core processor, 32 GB RAM - gcc (GCC) 4.8.1 with -O2 optimization - Boost 1.6.0

Some findings:

Method [1] seems to have significantly outperform Methods [2] and [3]
Method [3] outperforms Method [2] most of the time

My question is, why does Method [2] where I am using a visitor but using method overloading to call the correct method give worse performance than virtual methods. I would expect static polymorphism to fare better than dynamic polymorphism. I understand there is some cost of the extra parameter that is being passed in method [2] to figure which visit() method of the class to call and possibly some more branching due to method overloading? But shouldn"t this still outperform virtual methods?

Code is below:

// qcpptest.hpp

#ifndef INCLUDED_QCPPTEST_H
#define INCLUDED_QCPPTEST_H

#include <boost/variant.hpp>

class IShape {
 public:
  virtual void rotate() = 0;
  virtual void spin() = 0;
};

class Square : public IShape {
 public:
  void rotate() {
   // std::cout << "Square:I am rotating" << std::endl;
    }
  void spin() { 
    // std::cout << "Square:I am spinning" << std::endl; 
  }
};

class Circle : public IShape {
 public:
  void rotate() { 
    // std::cout << "Circle:I am rotating" << std::endl; 
  }
  void spin() {
   // std::cout << "Circle:I am spinning" << std::endl; 
}
};

// template variation

// enum class M {ADD, DEL};
struct ADD {};
struct DEL {};

class TSquare {
    int i;
 public:
    void visit(const ADD& add) {
        this->i++;
    // std::cout << "TSquare:I am rotating" << std::endl;
  }
    void visit(const DEL& del) {
        this->i++;
    // std::cout << "TSquare:I am spinning" << std::endl;
  }

    void spin() {
        this->i++;
     // std::cout << "TSquare:I am rotating" << std::endl; 
 }
    void rotate() {
        this->i++;
     // std::cout << "TSquare:I am spinning" << std::endl; 
 }
};

class TCircle {
    int i;
 public:
    void visit(const ADD& add) {
        this->i++;
    // std::cout << "TCircle:I am rotating" << std::endl;
  }
    void visit(const DEL& del) {
        this->i++;
    // std::cout << "TCircle:I am spinning" << std::endl;
  }
    void spin() { 
        this->i++;
        // std::cout << "TSquare:I am rotating" << std::endl; 
    }
    void rotate() {
    this->i++; 
        // std::cout << "TSquare:I am spinning" << std::endl; 
    }
};

class MultiVisitor : public boost::static_visitor<void> {
 public:
  template <typename T, typename U>

    void operator()(T& t, const U& u) {
    // std::cout << "visit" << std::endl;
    t.visit(u);
  }
};

// separate visitors, single dispatch

class RotateVisitor : public boost::static_visitor<void> {
 public:
  template <class T>
  void operator()(T& x) {
    x.rotate();
  }
};

class SpinVisitor : public boost::static_visitor<void> {
 public:
  template <class T>
  void operator()(T& x) {
    x.spin();
  }
};

#endif

// qcpptest.cpp

#include <iostream>
#include "qcpptest.hpp"
#include <vector>
#include <boost/chrono.hpp>

using MV = boost::variant<ADD, DEL>;
// MV const add = M::ADD;
// MV const del = M::DEL;
static MV const add = ADD();
static MV const del = DEL();

void make_virtual_shapes(int iters) {
  // std::cout << "make_virtual_shapes" << std::endl;
  std::vector<IShape*> shapes;
  shapes.push_back(new Square());
  shapes.push_back(new Circle());

  boost::chrono::high_resolution_clock::time_point start =
      boost::chrono::high_resolution_clock::now();

  for (int i = 0; i < iters; i++) {
    for (IShape* shape : shapes) {
      shape->rotate();
      shape->spin();
    }
  }

  boost::chrono::nanoseconds nanos =
      boost::chrono::high_resolution_clock::now() - start;
  std::cout << "make_virtual_shapes took " << nanos.count() * 1e-6
            << " millis\n";
}

void make_template_shapes(int iters) {
  // std::cout << "make_template_shapes" << std::endl;
  using TShapes = boost::variant<TSquare, TCircle>;
  // using MV = boost::variant< M >;

  // xyz
  std::vector<TShapes> tshapes;
  tshapes.push_back(TSquare());
  tshapes.push_back(TCircle());
  MultiVisitor mv;

  boost::chrono::high_resolution_clock::time_point start =
      boost::chrono::high_resolution_clock::now();

  for (int i = 0; i < iters; i++) {
    for (TShapes& shape : tshapes) {
      boost::apply_visitor(mv, shape, add);
      boost::apply_visitor(mv, shape, del);
      // boost::apply_visitor(sv, shape);
    }
  }
  boost::chrono::nanoseconds nanos =
      boost::chrono::high_resolution_clock::now() - start;
  std::cout << "make_template_shapes took " << nanos.count() * 1e-6
            << " millis\n";
}

void make_template_shapes_single(int iters) {
  // std::cout << "make_template_shapes_single" << std::endl;
  using TShapes = boost::variant<TSquare, TCircle>;
  // xyz
  std::vector<TShapes> tshapes;
  tshapes.push_back(TSquare());
  tshapes.push_back(TCircle());
  SpinVisitor sv;
  RotateVisitor rv;

  boost::chrono::high_resolution_clock::time_point start =
      boost::chrono::high_resolution_clock::now();

  for (int i = 0; i < iters; i++) {
    for (TShapes& shape : tshapes) {
      boost::apply_visitor(rv, shape);
      boost::apply_visitor(sv, shape);
    }
  }
  boost::chrono::nanoseconds nanos =
      boost::chrono::high_resolution_clock::now() - start;
  std::cout << "make_template_shapes_single took " << nanos.count() * 1e-6
            << " millis\n";
}

int main(int argc, const char* argv[]) {
  std::cout << "Hello, cmake" << std::endl;

  int iters = atoi(argv[1]);

  make_virtual_shapes(iters);
  make_template_shapes(iters);
  make_template_shapes_single(iters);

  return 0;
}

416

asked May 13 '16 18:05

Sid

1 Answers

Method 2 is basically reimplementing dynamic dispatch inefficiently. When you have:

shape->rotate();
shape->spin();

That's involves looking up the right function in the vtable and calling it. The inefficiency from that lookup. But when you have:

boost::apply_visitor(mv, shape, add);

That explodes roughly into (assuming an add<> member function template which is just a reinterpret_cast without checking):

if (shape.which() == 0) {
    if (add.which() == 0) {
        mv(shape.as<TSquare&>(), add.as<ADD&>());
    }
    else if (add.which() == 1) {
        mv(shape.as<TSquare&>(), add.as<DEL&>());
    }
    else {
        // ???
    }
}
else if (shape.which() == 1) {
    if (add.which() == 0) {
        mv(shape.as<TCircle&>(), add.as<ADD&>());
    }
    else if (add.which() == 1) {
        mv(shape.as<TCircle&>(), add.as<DEL&>());
    }
    else {
        // ???
    }
}
else {
   // ???
}

Here, we have a combinatorial explosion of branches (which we didn't have to do in Method 1) but we actually have to check every possible static type of every variant to figure out what we had to do (which we didn't have to do in Method 3). And those branches won't be able to be predicted since you're taking a different one every time, so you can't pipeline any sort of code without coming to a screeching halt.

The overloading on mv() is free - it's the figuring out what we're calling mv with that isn't. Note also the delta time that would happen based on changing either of the two axes:

+---------------+----------------+----------------+----------+
|               |    Method 1    |    Method 2    | Method 3 |
+---------------+----------------+----------------+----------+
|    New Type   | More Expensive | More Expensive |   Free   |
| New Operation |      Free      | More Expensive |   Free*  |
+---------------+----------------+----------------+----------+

Method 1 becomes more expensive on adding new types because we have to explicitly iterate over all of our types. Adding new operations is free since it doesn't matter what the operation is.

Method 3 is free to add new types and freeish to add new operations - the only change is increase in vtable. That will have some effects due to object size but will generally be smaller than the increased iteration over types.

107

answered Oct 26 '22 15:10

Barry

Related questions
                            
                                Can a class hierarchy be safe and trivially copyable?
                            
                                Limitations of Common Subexpression Elimination in C++
                            
                                Linker error with variable templates
                            
                                Initialize member before base constructor. Possible? [duplicate]
                            
                                In C++11, how can I implement an arithmetic type that fits into the hierarchy of builtin types?
                            
                                decltype() of captured variable in lambda: GCC bug and/or Clang bug?
                            
                                Does the C++ standard force capture-by-reference of local variables to be inefficient? [duplicate]
                            
                                Safely convert int to enum
                            
                                Is there a C++11 CSPRNG?
                            
                                How to apply SWIG OUTPUT typemaps for class types in Python?
                            
                                C++: need warning for: unsigned int i = -1;
                            
                                How to forbid assignment
                            
                                What is the scope of a namespace identifier?
                            
                                Using t( *this ) results RuntimeError, while t( std::ref( *this ) does not
                            
                                How to turn off exception handling?
                            
                                What's wrong with -O3 (optimization level 3)? [duplicate]
                            
                                C++ Type and Value Category for Expression and Variable
                            
                                Why does repeated memory allocation in windows slow down?
                            
                                Android ndk nested modules
                            
                                May a random access iterator to end be incremented by zero?

Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!

Donate Us With

Static Polymorphism with boost variant single visitor vs multi visitor vs dynamic polymorphism

Tags:

c++

performance

templates

polymorphism

boost

Sid

People also ask

1 Answers

Barry

Recent Activity

Donate For Us