How can multiply vector by a matrix using CUDA c++ [closed]

Question

Can any one help me, how can I multiply vector(1*N) and matrix(N*M) and store the result on new vector(1*M) using CUDA c++.

lashgar · Accepted Answer

I guess StackOverflow is the place to ask a question and discuss solutions. While this question has been downvoted by many people, I've answered this question. Maybe the asker need some discussions on the available solutions. Here is the code intended for large M:

#include <stdio.h>
#include <cuda.h>
#include <time.h>

__global__
void kernel(float *vec, float *mat, float *out, const int N, const int M){
    int tid=threadIdx.x+blockIdx.x*blockDim.x;
        float sum=0;
    if(tid<M){
        for(int i=0; i<N; i++)
            sum += vec[i]*mat[(i*M)+tid];
        out[tid]=sum;
    }
}

// debuging functions
void init_array(float *a, const int N);
void init_mat(float *a, const int N, const int M);
void print_array(float *a, const int N, char *d);
void print_mat(float *a, const int N, const int M, char *d);

int main (void) {
        srand( time(NULL) );

    float *a, *b, *c;
        float *dev_a, *dev_b, *dev_c;

    int N=3;
    int M=4;
    a=(float*)malloc(sizeof(float)*N);
    b=(float*)malloc(sizeof(float)*N*M);
    c=(float*)malloc(sizeof(float)*M);
        init_array(a, N);
        init_mat(b, N, M);
        init_array(c, M);

    printf("<<<<<<<<<< initial data:
");
        print_array(a, N, "in-vector");
        print_mat(b, N, M, "matrix");
        print_array(c, M, "out-vector");

        cudaMalloc((void**)&dev_a, sizeof(float)*N);
        cudaMalloc((void**)&dev_b, sizeof(float)*N*M);
        cudaMalloc((void**)&dev_c, sizeof(float)*M);

        cudaMemcpy(dev_a, a, sizeof(float)*N, cudaMemcpyHostToDevice);
        cudaMemcpy(dev_b, b, sizeof(float)*N*M, cudaMemcpyHostToDevice);

    printf("

Running Kernel...

");
        kernel<<<M/256+1, 256>>>(dev_a, dev_b, dev_c, N, M);
        //printf("error code: %s
",cudaGetErrorString(cudaGetLastError()));

        cudaMemcpy(c, dev_c, sizeof(float)*M, cudaMemcpyDeviceToHost);

        cudaFree(dev_a);
        cudaFree(dev_b);
        cudaFree(dev_c);

    printf(">>>>>>>>>> final data:
");
        print_array(c, M, "out-vector");

        return 0;
};

void init_array(float *a, const int N) {
        int i;
        for(i=0; i<N; i++)
                a[i] = rand() % 4 + 1;
}
void init_mat(float *a, const int N, const int M) {
        int i, j;
        for(i=0; i<N; i++)
            for(j=0; j<M; j++)
                    a[i*M+j] = rand() % 4 + 1;
}
void print_array(float *a, const int N, char *d) {
        int i;
        for(i=0; i<N; i++)
                printf("
%s[%d]: %f",d, i, a[i]);
    printf("
");
}
void print_mat(float *a, const int N, const int M, char *d) {
        int i, j;
        for(i=0; i<N; i++){
        printf("
%s[%d]:", d, i);
        for (j=0; j<M; j++)
                    printf("	%6.4f", a[i*M+j]);
    }
    printf("
");
}

It needs minor modifications to suit for large N.

How can multiply vector by a matrix using CUDA c++ [closed]

Tags:

c++

cuda

abdo.eng 2006210

1 Answers

lashgar

Recent Activity

Donate For Us

How can multiply vector by a matrix using CUDA c++ [closed]

Tags:

c++

cuda

abdo.eng 2006210

1 Answers

lashgar

Related questions

Recent Activity

Donate For Us