Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Memory mapped file storage in stl vector

I'm trying to implement custom allocator for storing memory mapped files in the std::vector. Files mapping performed by boost::iostreams::mapped_file

Allocator type for file memory mapping:

template<typename T>
class mmap_allocator 
{
public:
  typedef T value_type;

  mmap_allocator(const std::string& filename) 
  : _mmfile(filename) {  } 

  T* allocate (size_t n) 
  { 
     return reinterpret_cast<T*>(_mmfile.data());
  }
  void deallocate (T* p, size_t n) 
  { 
     p = nullptr;
     _mmfile.close();
  }

private:
  boost::iostreams::mapped_file _mmfile;
};

Container for memory mapped file, based on std::vector:

//Get file size
long GetFileSize(std::string filename)
{
    FILE *p_file = NULL;
    p_file = fopen(filename.c_str(),"rb");
    fseek(p_file,0,SEEK_END);
    int size = ftell(p_file);
    fclose(p_file);
    return size;
}

template<typename T>
class mm_vector : public std::vector<T, mmap_allocator<T> >
{
public:
  typedef mmap_allocator<T> allocator_type;
  typedef std::vector<T, allocator_type > b_vector;

  mm_vector(const std::string filename) : b_vector(GetFileSize(filename)/sizeof(T), allocator_type(filename)) 
  {  
    b_vector::reserve(GetFileSize(filename)/sizeof(T));
  }
};

Test code:

int main()
{
  mm_vector<int> v("test.f");//test.f - binary file contain several integers
  for(auto x : v) std::cout<<x<<"  ";
}

This code don't work properly - output always equals to zero. File contains correct content - several integers. This code works well:

boost::iostreams::mapped_file _mmfile("test.f");
int* p = (int*)(_mmfile.data());
std::cout<<p[0];

What am I doing wrong?

like image 411
gorill Avatar asked Jul 23 '14 02:07

gorill


2 Answers

The problem is zero initialization, calling the constructor that receive the size and the allocator would initialize the vector elements to the default value of the element type (in this case 0). This is mandated by the standard.

23.3.7.2 vector constructors, copy, and assignment [vector.cons] § 23.3.7.2 789

explicit vector(size_type n, const Allocator& = Allocator());

-Effects: Constructs a vector with n default-inserted elements using the specified allocator.
-Requires: T shall be DefaultInsertable into *this.
-Complexity: Linear in n.

In my case the used file was filled with 0 too. Tested in GCC 4.9.0. Has logic because the default mapmode of mapped_file is readwrite.

In the sample code i added print of the mapped memory content when the allocation is happen (in the custom allocator), in the construction of the vector and the existed print in main. The first print output the correct data of the file and the second output the zeroed version.

#include <vector>
#include <iostream>
#include <chrono>
#include <iomanip>
#include <boost/iostreams/device/mapped_file.hpp>

template <typename T>
class mmap_allocator {
public:
    typedef T value_type;

    mmap_allocator(const std::string& filename) : _mmfile(filename) {}

    T* allocate(size_t n) {
        std::cout << "OUTPUT 1:" << std::endl;
        auto v = reinterpret_cast<T*>(_mmfile.data());
        for (unsigned long idx = 0; idx < _mmfile.size()/sizeof(int); idx++)
            std::cout << v[idx] << " ";
        return reinterpret_cast<T*>(_mmfile.data());
    }
    void deallocate(T* p, size_t n) {
        p = nullptr;
        _mmfile.close();
    }

private:
    boost::iostreams::mapped_file _mmfile;
};

// Get file size
long GetFileSize(std::string filename) {
    FILE* p_file = NULL;
    p_file = fopen(filename.c_str(), "rb");
    fseek(p_file, 0, SEEK_END);
    int size = ftell(p_file);
    fclose(p_file);
    return size;
}

template <typename T>
class mm_vector : public std::vector<T, mmap_allocator<T>> {
public:
    typedef mmap_allocator<T> allocator_type;
    typedef std::vector<T, allocator_type> b_vector;

    mm_vector(const std::string filename)
        : b_vector(GetFileSize(filename) / sizeof(T),
                   allocator_type(filename)) {
        std::cout << std::endl << std::endl << "OUTPUT 2:" << std::endl;
        for (auto x : *this)
            std::cout << x << "  ";
        b_vector::reserve(GetFileSize(filename) / sizeof(T));
    }
};

int main(int argc, char* argv[]) {
    std::chrono::system_clock::time_point begin_time =
        std::chrono::system_clock::now();

    mm_vector<int> v("H:\\save.txt");
    std::cout << std::endl << std::endl << "OUTPUT 2:" << std::endl;
    for (auto x : v)
        std::cout << x << "  ";

    std::chrono::system_clock::time_point end_time =
        std::chrono::system_clock::now();
    long long elapsed_miliseconds =
        std::chrono::duration_cast<std::chrono::milliseconds>(
            end_time - begin_time).count();
    std::cout << "Duration (min:seg:mili): " << std::setw(2)
              << std::setfill('0') << (elapsed_miliseconds / 60000) << ":"
              << std::setw(2) << std::setfill('0')
              << ((elapsed_miliseconds / 1000) % 60) << ":" << std::setw(2)
              << std::setfill('0') << (elapsed_miliseconds % 1000) << std::endl;
    std::cout << "Total milliseconds: " << elapsed_miliseconds << std::endl;

    return 0;
}
like image 114
NetVipeC Avatar answered Nov 07 '22 23:11

NetVipeC


You might want to give

https://github.com/johannesthoma/mmap_allocator

a try. It uses contents of an mmap'ed file as backing storage for a vector and is LGPL so you should be able to use it in your projects. Note that currently, gcc is a requirement but it can be easily extended.

like image 21
Johannes Thoma Avatar answered Nov 07 '22 23:11

Johannes Thoma