Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Why segmentation fault with multithreading? [closed]

I am working on a project about multithreading. Here Operation is a class which contains a type, a key, a time and an answer. Here is my code:

#include <cstdlib>
#include <fstream>
#include <string>
#include <iomanip>
#include <pthread.h>
#include <vector>
#include "block.h"

using namespace std;

std::vector<Operation> *data;
block_bloom_filter filter(10000000, 0.01);
int ans[30000000];
void *test(void *arg)
{
    int thread_id = *((int *)arg);
    for (auto &op : data[thread_id])
    {
        if (op.type == 1)
        {
            filter.insert(op);
        }
        else
        {
            filter.query(op);
        }
    }
    return 0;
}
int main(int argc, char **argv)
{
    int k = atoi(argv[1]);
    int *op_num = new int[k];
    data = new vector<Operation>[k];
    for (int i = 0; i < k; i++)
    {
        string tmp = "data" + to_string(i + 1) + ".in";
        const char *s = tmp.c_str();
        ifstream fin;
        fin.open(s);
        fin >> op_num[i];
        //data[i] = new Operation[op_num[i]];
        for (int j = 0; j < op_num[i]; j++)
        {
            string tmp1;
            fin >> tmp1;
            if (tmp1 == "insert")
            {
                Operation tmp2;
                tmp2.type = 1;
                fin >> tmp2.key >> tmp2.time;
                tmp2.ans = -1;
                data[i].push_back(tmp2);
            }
            else
            {
                Operation tmp2;
                tmp2.type = 2;
                fin >> tmp2.key >> tmp2.time;
                tmp2.ans = -1;
                data[i].push_back(tmp2);
            }
        }
        fin.close();
    }
    auto start = std::chrono::high_resolution_clock::now();
    int num_threads = k;
    pthread_t *threads = new pthread_t[num_threads];
    //auto **threads = new thread *[num_threads];
    //pthread_t *threads = new pthread_t[k];
    /*for (int i = 0; i < num_threads; i++)
    {
        threads[i] = new thread(test, i);
    }
    for (int i = 0; i < num_threads; i++)
    {
        threads[i]->join();
    }*/
    for (int i = 0; i < k; i++)
    {
        pthread_create(&threads[i], NULL, test, (void *)&(i));
    }
    auto stop = std::chrono::high_resolution_clock::now();

    auto duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
    //std::cerr << "duration = " << duration.count() << "us" << std::endl;

    double time_used = duration.count() / 1e3;
    std::ofstream f_time("time.out");
    f_time << std::fixed << std::setprecision(3) << time_used << std::endl;
    f_time.close();
    for (int i = 0; i < k; i++)
    {
        for (int j = 0; j < op_num[i]; j++)
        {
            ans[data[i][j].time - 1] = data[i][j].ans;
        }
    }
    ofstream fout;
    fout.open("result.out");
    for (int i = 0; i < 30000000; i++)
    {
        if (ans[i] >= 0)
            fout << ans[i] << endl;
    }
    fout.close();

    delete[] data;
    delete[] threads;
    delete[] op_num;
    //pthread_exit(NULL);
}

My code can compile, but when running it shows segmentation fault and can only generate time.out no result.out. I've been working on it for a long time but still do not know why. Hope someone can help me.

Below is block.h

#include <algorithm>
#include <chrono>
#include <cmath>
#include <ctime>
#include <fstream>
#include <iostream>
#include <numeric>
#include <string>
#include <vector>
#include "Headers/MurmurHash3.h"
#include "xxHash/xxhash.c"
#define M_LN2 0.69314718055994530942
using namespace std;

typedef std::vector<bool> bit_vector;

class Operation
{
public:
    int type; // 1: insert, 2: query
    char key[17];
    int time;
    int ans;
};

int str_len = 16;
int cache_size = 64;
int block_size = 512;
int key_num = 10000000;
int slot_num = 1 << 27;
int hash_num = int((double)slot_num / key_num * M_LN2);
int block_num = (slot_num + block_size - 1) / block_size;

class bloom_filter
{
    uint32_t size; // Probable Number of elements in universe
    double fpr;    // False positive rate
    int m;         // optimal size of bloom filter
    int k;         // Number of hash functions
    bit_vector bloom;

public:
    int get_size() { return size; }

    double get_fpr() { return fpr; }

    bloom_filter(int n, double fpr)
    {
        this->size = n;
        this->fpr = fpr;
        this->m = ceil(
            -((n * log(fpr)) /
              pow(log(2), 2.0))); // Natural logarithm  m = −n ln p/(ln 2)2
        // cout << m<<  "\n";
        this->k = ceil(
            (m / n) * log(2)); // Calculate k k = (m/n) ln 2 􃱺 2-k ≈ 0.6185 m/n
        // cout << k;
        bloom.resize(m, false);
    }

    void insert(string S)
    {
        uint32_t *p = new uint32_t(1); // For storing Hash Vaue
        const void *str = S.c_str();   // Convert string to C string to use  as a
                                       // parameter for constant void

        int index;
        // cout<<S.length()<<"\t"<<sizeof(str)<<"\n";

        // cout<<S<<"\n";

        for (int i = 0; i < k; i++)
        {
            // MurmurHash3_x64_128();
            MurmurHash3_x86_32(str, S.length(), i + 1,
                               p); // String, String size
            index = *p % m;

            // cout<<*p<<"\t"<<index<<"\t";
            bloom[index] = true;
        }

        // cout<<"\n";
        // print();
    }

    /*void print()
    {
        for (int i = 0; i < bloom.size(); i++)
        {
            cout << bloom.at(i);
        }
    }*/

    char query(string S)
    {
        uint32_t *p = new uint32_t(1); // For storing Hash Vaue
        const void *str = S.c_str();   // Convert string to C string to use  as a
                                       // parameter for constant void

        int index;
        // cout << S.length() << "\t" << sizeof(str) << "\n";
        // cout<<S<<"\n";
        for (int i = 0; i < k; i++)
        {
            // MurmurHash3_x64_128();
            MurmurHash3_x86_32(str, S.length(), i + 1,
                               p); // String, String size
            index = *p % m;

            // cout<<*p<<"\t"<<index<<"\t";
            if (bloom[index] == false)
                return 'N';
        }
        return 'Y';
    }
};

class block_bloom_filter
{
    int size;   // Probable Number of elements in universe
    double fpr; // False positive rate
    int m;      // optimal size of bloom filter
    int k;      // Number of hash functions
    int s;      // Number of bloom filters
    bit_vector block_bloom;
    int cache_line_size;

public:
    int get_size() { return size; }

    double get_fpr() { return fpr; }
    block_bloom_filter(int n, double fpr)
    {
        this->size = n;
        this->fpr = fpr;
        this->m = ceil(
            -((n * log(fpr)) /
              pow(log(2), 2.0))); // Natural logarithm  m = −n ln p/(ln 2)2
        // cout << m << "\n";
        this->k = ceil(
            (m / n) * log(2)); // Calculate k k = (m/n) ln 2 􃱺 2-k ≈ 0.6185 m/n
        // cout << k<<"\n";
        this->cache_line_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE) * 8;

        this->s =
            ceil((double)m / cache_line_size); // Total number of Bloom Filters
        // cout<<s<<"s valye\n";
        block_bloom.resize(cache_line_size * s, false);
    }
    /*void insert(Operation &S)
    {
        int block_number;
        int first_index, last_index;
        int index;

        uint32_t *p = new uint32_t(1);   // For storing Hash Value
        const void *str = S.key.c_str(); // Convert string to C string to use  as a
                                         // parameter for constant void

        MurmurHash3_x86_32(str, sizeof(str), 1,
                           p); // String, String size//Find out block number

        // if(s!=0)
        block_number = *p % s;
        first_index = block_number * cache_line_size;

        for (int i = 1; i < k; i++)
        {
            // MurmurHash3_x64_128();
            MurmurHash3_x86_32(str, S.key.length(), i + 1,
                               p); // String, String size
            // cout<<*p<<"\n";
            // cout<<"div="<<div << "\n";
            index = (*p) % cache_line_size;
            // cout<<index<<"\t";
            // if(index>m) cout<<"\n"<<index<<"\tError detected\n";
            // cout<<"\n"<<index<<"a\t\n";
            // cout<<"\n"<<first_index<<"a\t\n";

            // cout<<(index+first_index)<<"a\t\n";

            block_bloom[index + first_index] = true;
        }
        // cout<<"\n";
        // print();
    }*/

    XXH64_hash_t GetHash(const char *str)
    {
        return XXH3_64bits_withSeed(str, 16, /* Seed */ 123976235672331983ll);
    }
    void insert(Operation &s)
    {
        XXH64_hash_t hash = GetHash(s.key);
        XXH64_hash_t hash1 = hash % m;
        XXH64_hash_t hash2 = (hash / m) % m;
        for (int i = 0; i < k; i++)
        {
            int pos = (hash1 + i * hash2) % m;
            block_bloom[pos] = 1;
        }
    }
    void query(Operation &s)
    {
        XXH64_hash_t hash = GetHash(s.key);
        XXH64_hash_t hash1 = hash % m;
        XXH64_hash_t hash2 = (hash / m) % m;
        for (int i = 0; i < k; i++)
        {
            int pos = (hash1 + i * hash2) % m;
            if (!block_bloom[pos])
            {
                s.ans = 0;
                return;
            }
        }
        s.ans = 1;
        return;
    }
};
like image 656
dubugger Avatar asked Mar 14 '26 11:03

dubugger


1 Answers

for (int i = 0; i < k; i++)
{
    pthread_create(&threads[i], NULL, test, (void *)&(i));

The third parameter to pthread_create(), the thread function's parameter, is a pointer to the loop variable. The thread function reads it, as follows:

void *test(void *arg)
{
    int thread_id = *((int *)arg);

There are no guarantees whatsoever that this gets executed by the new execution thread before the parent execution thread increments i. When it comes to multiple execution threads, neither POSIX nor the C++ library gives you any guarantees as to the relative execution order of multiple threads.

All that pthread_create() guarantees you is that at some point in time later, which can before before or after pthread_create() returns, the new execution thread pops into existence and begins executing the thread function.

And it may very well be that one or more (if not all) execution threads finally begin executing, for real, after the for loop terminates and i gets destroyed. At which pointL when they do start executing, they will discover a pointer to a destroyed variable as their argument, and dereferencing it becomes undefined behavior.

Or, some of those execution threads get their gear running, at some point after they get created. By this time i's been incremented a couple of times already. So they both read the *(int *)arg, whose value is now -- who knows? And, just to make things interesting, both execution threads do this at the same time, and read the same value. At this point, the end result is already going to be garbage. It is clear that the intent here is for each execution thread getting a unique value for its parameter, but this very unlikely to happen here. There's nothing in the shown code that ensures that each execution threads actually gets its own unique thread_id.

Additionally, the original parent execution thread seems to assume that all the execution threads will all finish their job before the parent execution thread reads their results, and writes them out to a file.

Unfortunately, there's no code in the parent execution thread that appears to actually wait for all execution threads to finish. As soon as they're all started, it takes it on faith that they complete instantly, and it reads the partial results, and writes it out to a file:

auto stop = std::chrono::high_resolution_clock::now();

Well, the bad news here is that there's nothing that actually waits for all execution threads to actually stop, at this point. They're still running here. Even if the program manages to avoid crashing, the output results will be incomplete, and mostly junk.

ans[data[i][j].time - 1]

It appears that the value of .time here was originally read from the input file. There does not appear to be any bounds checking here. It's possible for this vector/array access to be out of bounds, resulting in an undefined behavior and a likely crash.

Also, another problem with the shown code: There are plenty of calls to new, but only some of those get deleted, resulting in multiple memory leaks. Inspecting the shown code, there is no clear reason to new anything, in the first place.

In conclusion, there are multiple problems with the shown code that result in undefined behavior, and any of them will be the reason for the observed crash. The shown approach is very much error-prone, and will require much more substantial work, and proper multi-threading support, and inter-thread sequencing, in order to get the sequence of all events happen in the correct order, across all the execution threads.

like image 123
Sam Varshavchik Avatar answered Mar 16 '26 02:03

Sam Varshavchik



Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!