Cuda

Question

I'm trying to use nvcc with the most simple example, but it doesn't work correctly. I'm compiling and execute the example from https://devblogs.nvidia.com/easy-introduction-cuda-c-and-c/, however my server can't execute the global function. I rewrite the code to get some error message and I receive the following message: "no kernel image is available for execution on the device"

My GPU is a Quadro 6000 and the cuda version is 9.0.

#include <stdio.h>
#include <cuda_runtime.h>

__global__ void saxpy(int n, float a, float *x, float *y)
{
  int i = blockIdx.x*blockDim.x + threadIdx.x;
  y[i] = 10.0; //a*x[i] + y[i];  
}

int main(int argc, char *argv[])
{
  int N = 120;
  int nDevices;
  float *x, *y, *d_x, *d_y;

  cudaError_t err = cudaGetDeviceCount(&nDevices);
  if (err != cudaSuccess) 
    printf("%s
", cudaGetErrorString(err));
  else
    printf("Number of devices %d
", nDevices);

  x = (float*)malloc(N*sizeof(float));
  y = (float*)malloc(N*sizeof(float));

  cudaMalloc(&d_x, N*sizeof(float)); 
  cudaMalloc(&d_y, N*sizeof(float));

  for (int i = 0; i < N; i++) {
    x[i] = 1.0f;
    y[i] = 2.0f;
  }

  cudaMemcpy(d_x, x, N*sizeof(float), cudaMemcpyHostToDevice);
  cudaMemcpy(d_y, y, N*sizeof(float), cudaMemcpyHostToDevice);

  // Perform SAXPY on 1M elements  
  saxpy<<<1, 1>>>(N, 2.0f, d_x, d_y);
  cudaDeviceSynchronize(); 

  err = cudaMemcpy(y, d_y, N*sizeof(float), cudaMemcpyDeviceToHost);  

  printf("%s
",cudaGetErrorString(err));

  cudaError_t errSync  = cudaGetLastError();
  cudaError_t errAsync = cudaDeviceSynchronize();
  if (errSync != cudaSuccess) 
    printf("Sync kernel error: %s
", cudaGetErrorString(errSync));
  if (errAsync != cudaSuccess)
    printf("Async kernel error: %s
", cudaGetErrorString(errAsync)); 


  cudaFree(d_x);
  cudaFree(d_y);
  free(x);
  free(y);
}"

Execution command

bash-4.1$ nvcc  -o sapx simples_cuda.cu
bash-4.1$ ./sapx
Number of devices 1
no error
Sync kernel error: no kernel image is available for execution on the device

Robert Crovella · Accepted Answer

GPUs of compute capability less than 2.0 are only supported by CUDA toolkits of version 6.5 and older.

GPUs of compute capability less than 3.0 (but greater than or equal to 2.0) are only supported by CUDA toolkits of version 8.0 and older.

Your Quadro 6000 is a compute capability 2.0 GPU. This can be determined programmatically with the deviceQuery CUDA sample code, or via a google search. It is not supported by CUDA 9.0

Cuda - nvcc - No kernel image is available for execution on the device. What is the problem?

Tags:

nvcc

ACC_80

1 Answers

Robert Crovella

Recent Activity

Donate For Us