Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

returning multiple py::array without copying in pybind11

I am trying to build a python module in C++ using pybind11. I have the following code:

#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <pybind11/numpy.h>

namespace py = pybind11;

struct ContainerElement
{
    uint8_t i;
    double d;
    double d2;
};

class Container
{
private:
    std::vector<uint8_t> ints;
    std::vector<double> doubles;
    std::vector<double> doubles2;

public:

    std::vector<uint8_t>& getInts() { return ints; }
    std::vector<double>& getDoubles() { return doubles; }
    std::vector<double>& getDoubles2() { return doubles2; }

    void addElement(ContainerElement element)
    {
        ints.emplace_back(element.i);
        doubles.emplace_back(element.d);
        doubles2.emplace_back(element.d2);
    }
};

void fillContainer(Container& container)
{
    for (int i = 0; i < 1e6; ++i)
    {
        container.addElement({(uint8_t)i, (double)i,(double)i });
    }
}

PYBIND11_MODULE(containerInterface, m) {
    py::class_<Container>(m, "Container")
        .def(py::init<>())
        .def("getInts", [](Container& container)
        {
            return py::array_t<uint8_t>(
                    { container.getInts().size() },
                    { sizeof(uint8_t) },
                    container.getInts().data());
        })
        .def("getDoubles", [](Container& container)
        {
            return py::array_t<double>(
                    { container.getDoubles().size() },
                    { sizeof(double) },
                    container.getDoubles().data());
        })
        .def("getDoubles2", [](Container& container)
        {
            return py::array_t<double>(
                    { container.getDoubles2().size() },
                    { sizeof(double) },
                    container.getDoubles2().data());
        });

    m.def("fillContainer", &fillContainer);
}

When I call this code in python:

import containerInterface

container = containerInterface.Container()

containerInterface.fillContainer(container)

i = container.getInts()
d = container.getDoubles()
d2 = container.getDoubles2()

This works, however when I check the memory usage of the program (using psutil.Process(os.getpid()).memory_info().rss) it seems to make a copy when I call the functions getInts, getDoubles and getDoubles2. Is there a way to avoid this?

I have tried using np.array(container.getInts(), copy=False), but it still makes a copy. Also I tried using the py::buffer_protocol() on the Container class as mentioned here: https://pybind11.readthedocs.io/en/stable/advanced/pycpp/numpy.html . However I can only make that work for either the Ints vector or the Doubles vectors and not for all at the same time.

PYBIND11_MODULE(containerInterface, m) {
    py::class_<Container>(m, "Container", py::buffer_protocol())
        .def(py::init<>())
        .def("getInts", &Container::getInts)
        .def("getDoubles", &Container::getDoubles)
        .def_buffer([](Container& container) -> py::buffer_info {
            return py::buffer_info(
                container.getInts().data(),
                sizeof(uint8_t),
                py::format_descriptor<uint8_t>::format(),
                1,
                { container.getInts().size() },
                { sizeof(uint8_t) * container.getInts().size() }
        );
        });
m.def("fillContainer", &fillContainer);

Then I can use i = np.array(container, copy=False), without a copy being made. However as I said it only works for the Ints vector now.

like image 351
Frank Avatar asked Sep 26 '19 09:09

Frank


3 Answers

I have found a solution that works. Though it might not be the most elegant. I have created three new classes Ints, Doubles and Doubles2 that take the original container and expose the respective vectors by a function call getValues(). With these three classes I can specify the buffer protocol three times for all classes.

#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <pybind11/numpy.h>
#include <pybind11/buffer_info.h>

namespace py = pybind11;

struct ContainerElement
{
    uint8_t i;
    double d;
    double d2;
};

class Container
{
private:
    std::vector<uint8_t> ints;
    std::vector<double> doubles;
    std::vector<double> doubles2;

public:

    std::vector<uint8_t>& getInts() { return ints; }
    std::vector<double>& getDoubles() { return doubles; }
    std::vector<double>& getDoubles2() { return doubles2; }

    void addElement(ContainerElement element)
    {
        ints.emplace_back(element.i);
        doubles.emplace_back(element.d);
        doubles2.emplace_back(element.d2);
    }
};

void fillContainer(Container& container)
{
    for (int i = 0; i < 1e6; ++i)
    {
        container.addElement({ (uint8_t)i, (double)i,(double)i });
    }
}

class Ints
{
private:
    Container& cont;
public:
    Ints(Container& cont) : cont(cont) {}
    std::vector<uint8_t>& getValues() { return cont.getInts(); }
};

class Doubles
{
private:
    Container& cont;
public:
    Doubles(Container& cont) : cont(cont) {}
    std::vector<double>& getValues() { return cont.getDoubles(); }
};

class Doubles2
{
private:
    Container& cont;
public:
    Doubles2(Container& cont) : cont(cont) {}
    std::vector<double>& getValues() { return cont.getDoubles2(); }
};

PYBIND11_MODULE(newInterface, m) {
    py::class_<Container>(m, "Container")
        .def(py::init<>());

    py::class_<Ints>(m, "Ints", py::buffer_protocol())
        .def(py::init<Container&>(), py::keep_alive<1, 2>())
        .def_buffer([](Ints& ints) -> py::buffer_info {
            return py::buffer_info(
                ints.getValues().data(),
                sizeof(uint8_t),
                py::format_descriptor<uint8_t>::format(),
                ints.getValues().size()
            );
        });

    py::class_<Doubles>(m, "Doubles", py::buffer_protocol())
        .def(py::init<Container&>(), py::keep_alive<1, 2>())
        .def_buffer([](Doubles& doubles) -> py::buffer_info {
        return py::buffer_info(
            doubles.getValues().data(),
            sizeof(double),
            py::format_descriptor<double>::format(),
            doubles.getValues().size()
            );
        });

    py::class_<Doubles2>(m, "Doubles2", py::buffer_protocol())
        .def(py::init<Container&>(), py::keep_alive<1, 2>())
        .def_buffer([](Doubles2& doubles2) -> py::buffer_info {
        return py::buffer_info(
            doubles2.getValues().data(),
            sizeof(double),
            py::format_descriptor<double>::format(),
            doubles2.getValues().size()
            );
        });

    m.def("fillContainer", &fillContainer);
}

This way I can use the code in the following way in Python:

import newInterface as ci
import numpy as np

container = ci.Container()
ci.fillContainer(container)

i = np.array(ci.Ints(container), copy=False)   
d = np.array(ci.Doubles(container), copy=False)    
d2 = np.array(ci.Doubles2(container), copy=False)

Once the fillContainer has filled the container, the construction of the the numpy arrays does not copy the values from this container.

like image 115
Frank Avatar answered Nov 12 '22 15:11

Frank


I'm guessing that you have to specify that the access functions return references instead of a copy, which is probably the default. I don't know how you do this with pybind but I've done this with boost::python and Ponder.

I.e. you need to specify the return policy.

like image 1
Nick Avatar answered Nov 12 '22 14:11

Nick


This doesn't directly solve the question, but still allows for returning an array buffer without doing a copy. Inspiration was taken from this thread: https://github.com/pybind/pybind11/issues/1042

Basically, just supply a py::capsule to the py::array() constructor. With this, the py::array() constructor does not allocate a separate buffer and copy. e.g.:

// Use this if the C++ buffer should NOT be deallocated
// once Python no longer has a reference to it
py::capsule buffer_handle([](){});

// Use this if the C++ buffer SHOULD be deallocated
// once the Python no longer has a reference to it
// py::capsule buffer_handle(data_buffer, [](void* p){ free(p); });

return py::array(py::buffer_info(
        data_buffer,
        element_size,
        data_type,
        dims_length,
        dims,
        strides
), buffer_handle);
like image 1
driedler Avatar answered Nov 12 '22 14:11

driedler