I am trying to build a python module in C++ using pybind11. I have the following code:
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <pybind11/numpy.h>
namespace py = pybind11;
struct ContainerElement
{
uint8_t i;
double d;
double d2;
};
class Container
{
private:
std::vector<uint8_t> ints;
std::vector<double> doubles;
std::vector<double> doubles2;
public:
std::vector<uint8_t>& getInts() { return ints; }
std::vector<double>& getDoubles() { return doubles; }
std::vector<double>& getDoubles2() { return doubles2; }
void addElement(ContainerElement element)
{
ints.emplace_back(element.i);
doubles.emplace_back(element.d);
doubles2.emplace_back(element.d2);
}
};
void fillContainer(Container& container)
{
for (int i = 0; i < 1e6; ++i)
{
container.addElement({(uint8_t)i, (double)i,(double)i });
}
}
PYBIND11_MODULE(containerInterface, m) {
py::class_<Container>(m, "Container")
.def(py::init<>())
.def("getInts", [](Container& container)
{
return py::array_t<uint8_t>(
{ container.getInts().size() },
{ sizeof(uint8_t) },
container.getInts().data());
})
.def("getDoubles", [](Container& container)
{
return py::array_t<double>(
{ container.getDoubles().size() },
{ sizeof(double) },
container.getDoubles().data());
})
.def("getDoubles2", [](Container& container)
{
return py::array_t<double>(
{ container.getDoubles2().size() },
{ sizeof(double) },
container.getDoubles2().data());
});
m.def("fillContainer", &fillContainer);
}
When I call this code in python:
import containerInterface
container = containerInterface.Container()
containerInterface.fillContainer(container)
i = container.getInts()
d = container.getDoubles()
d2 = container.getDoubles2()
This works, however when I check the memory usage of the program (using psutil.Process(os.getpid()).memory_info().rss
) it seems to make a copy when I call the functions getInts, getDoubles
and getDoubles2
. Is there a way to avoid this?
I have tried using np.array(container.getInts(), copy=False)
, but it still makes a copy. Also I tried using the py::buffer_protocol()
on the Container class as mentioned here: https://pybind11.readthedocs.io/en/stable/advanced/pycpp/numpy.html . However I can only make that work for either the Ints vector or the Doubles vectors and not for all at the same time.
PYBIND11_MODULE(containerInterface, m) {
py::class_<Container>(m, "Container", py::buffer_protocol())
.def(py::init<>())
.def("getInts", &Container::getInts)
.def("getDoubles", &Container::getDoubles)
.def_buffer([](Container& container) -> py::buffer_info {
return py::buffer_info(
container.getInts().data(),
sizeof(uint8_t),
py::format_descriptor<uint8_t>::format(),
1,
{ container.getInts().size() },
{ sizeof(uint8_t) * container.getInts().size() }
);
});
m.def("fillContainer", &fillContainer);
Then I can use i = np.array(container, copy=False)
, without a copy being made. However as I said it only works for the Ints
vector now.
I have found a solution that works. Though it might not be the most elegant. I have created three new classes Ints
, Doubles
and Doubles2
that take the original container and expose the respective vectors by a function call getValues()
. With these three classes I can specify the buffer protocol three times for all classes.
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <pybind11/numpy.h>
#include <pybind11/buffer_info.h>
namespace py = pybind11;
struct ContainerElement
{
uint8_t i;
double d;
double d2;
};
class Container
{
private:
std::vector<uint8_t> ints;
std::vector<double> doubles;
std::vector<double> doubles2;
public:
std::vector<uint8_t>& getInts() { return ints; }
std::vector<double>& getDoubles() { return doubles; }
std::vector<double>& getDoubles2() { return doubles2; }
void addElement(ContainerElement element)
{
ints.emplace_back(element.i);
doubles.emplace_back(element.d);
doubles2.emplace_back(element.d2);
}
};
void fillContainer(Container& container)
{
for (int i = 0; i < 1e6; ++i)
{
container.addElement({ (uint8_t)i, (double)i,(double)i });
}
}
class Ints
{
private:
Container& cont;
public:
Ints(Container& cont) : cont(cont) {}
std::vector<uint8_t>& getValues() { return cont.getInts(); }
};
class Doubles
{
private:
Container& cont;
public:
Doubles(Container& cont) : cont(cont) {}
std::vector<double>& getValues() { return cont.getDoubles(); }
};
class Doubles2
{
private:
Container& cont;
public:
Doubles2(Container& cont) : cont(cont) {}
std::vector<double>& getValues() { return cont.getDoubles2(); }
};
PYBIND11_MODULE(newInterface, m) {
py::class_<Container>(m, "Container")
.def(py::init<>());
py::class_<Ints>(m, "Ints", py::buffer_protocol())
.def(py::init<Container&>(), py::keep_alive<1, 2>())
.def_buffer([](Ints& ints) -> py::buffer_info {
return py::buffer_info(
ints.getValues().data(),
sizeof(uint8_t),
py::format_descriptor<uint8_t>::format(),
ints.getValues().size()
);
});
py::class_<Doubles>(m, "Doubles", py::buffer_protocol())
.def(py::init<Container&>(), py::keep_alive<1, 2>())
.def_buffer([](Doubles& doubles) -> py::buffer_info {
return py::buffer_info(
doubles.getValues().data(),
sizeof(double),
py::format_descriptor<double>::format(),
doubles.getValues().size()
);
});
py::class_<Doubles2>(m, "Doubles2", py::buffer_protocol())
.def(py::init<Container&>(), py::keep_alive<1, 2>())
.def_buffer([](Doubles2& doubles2) -> py::buffer_info {
return py::buffer_info(
doubles2.getValues().data(),
sizeof(double),
py::format_descriptor<double>::format(),
doubles2.getValues().size()
);
});
m.def("fillContainer", &fillContainer);
}
This way I can use the code in the following way in Python:
import newInterface as ci
import numpy as np
container = ci.Container()
ci.fillContainer(container)
i = np.array(ci.Ints(container), copy=False)
d = np.array(ci.Doubles(container), copy=False)
d2 = np.array(ci.Doubles2(container), copy=False)
Once the fillContainer
has filled the container, the construction of the the numpy arrays does not copy the values from this container.
I'm guessing that you have to specify that the access functions return references instead of a copy, which is probably the default. I don't know how you do this with pybind but I've done this with boost::python and Ponder.
I.e. you need to specify the return policy.
This doesn't directly solve the question, but still allows for returning an array buffer without doing a copy. Inspiration was taken from this thread: https://github.com/pybind/pybind11/issues/1042
Basically, just supply a py::capsule to the py::array() constructor. With this, the py::array() constructor does not allocate a separate buffer and copy. e.g.:
// Use this if the C++ buffer should NOT be deallocated
// once Python no longer has a reference to it
py::capsule buffer_handle([](){});
// Use this if the C++ buffer SHOULD be deallocated
// once the Python no longer has a reference to it
// py::capsule buffer_handle(data_buffer, [](void* p){ free(p); });
return py::array(py::buffer_info(
data_buffer,
element_size,
data_type,
dims_length,
dims,
strides
), buffer_handle);
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With