OpenCL problem with double type

Question

I'm trying use double type in openCL, but doesn't work anyway, i want use double for more precision, if have any other type make this, please, tell me.

if you don't have time for read my code, resuming is: I want use double(or other type) in openCL for more precision in calculation of pi.

My code:

 #pragma OPENCL EXTENSION cl_amd_fp64 : enable

 #include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <math.h>
 #include <unistd.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <OpenCL/opencl.h>

 ////////////////////////////////////////////////////////////////////////////////

 // Use a static data size for simplicity
 //
 #define DATA_SIZE (1000000)
 ////////////////////////////////////////////////////////////////////////////////
 #define TIPO double
 // Simple compute kernel that computes the calcpi of an input array. [1]
 //
 const char *KernelSource = "
" \
 "#pragma OPENCL EXTENSION cl_amd_fp64 : enable 
" \
 "__kernel void calcpi( 
" \
 " __global double* input, 
" \
 " __global double* output, 
" \
 " const unsigned int count) 
" \
 "{ 
" \
 " int i = get_global_id(0); 
" \
 " double z = get_global_id(0)*2+1; 
" \
 " if(i < count) 
" \
 " output[i] = 4.0/z; 
" \
 "} 
" \
 "
";

 ////////////////////////////////////////////////////////////////////////////////

 int main(int argc, char** argv)
 {
 int err; // error code returned from api calls
 //printf("%d",sizeof(TIPO));
 //scanf("%d",&err);
 TIPO data[2]; // original data set given to device
 TIPO *results = malloc(sizeof(TIPO)*DATA_SIZE); // results returned from device
 //unsigned int correct; // number of correct results returned
 //printf("TESTE");

 size_t global; // global domain size for our calculation
 size_t local; // local domain size for our calculation

 cl_device_id device_id; // device ID
 cl_context context; // context
 cl_command_queue queue; // command queue
 cl_program program; // program
 cl_kernel kernel; // kernel

 cl_mem input; // device memory used for the input array
 cl_mem output; // device memory used for the output array

 // Get data on which to operate
 //

 //int i = 0;
 //int n = 3;
 unsigned int count = DATA_SIZE;
 //for(i = 0; i < count; i+=2) {
 //data[i] = n;
 //n += 2;
 //}
 //printf("TESTE");
 // Get an ID for the device [2]
 int gpu = 1;
 err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1,&device_id,      NULL);
 if (err != CL_SUCCESS)
      printf("ERROR CLGETDEVICEIDS!
");     // [3]

 // Create a context [4]
 //
 context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
 if (!context) {
      printf("ERROR CONTEXT
");
 }

 // Create a command queue [5]
 //
 queue = clCreateCommandQueue(context, device_id, 0, &err);
 if (!queue) {
      printf("ERROR QUEUE
");
 }

 // Create the compute program from the source buffer [6]
 //
 program = clCreateProgramWithSource(context, 1,(const char **) & KernelSource, NULL, &err);
 if ( !program) {
      printf("ERROR PROGRAM
");
 }

 // Build the program executable [7]
 //
 err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
 if (err != CL_SUCCESS)
 {
      size_t len;
      char buffer[2048];

      printf("Error: Failed to build program executable
"); //[8]
      clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG,sizeof(buffer), buffer, &len);
      printf("%s
", buffer);
      exit(1);
 }

 // Create the compute kernel in the program we wish to run [9]
 //
 kernel = clCreateKernel(program, "calcpi", &err);
 if (!kernel || err != CL_SUCCESS) {
      printf("ERROR KERNEL OR CL_SUCESS
");
 }

 // Create the input and output arrays in device memory for our calculation
 // [10]
 input = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(TIPO) *count,NULL, NULL);
 output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(TIPO) *count,NULL, NULL);
 if (!input || !output) {
      printf("ERROR !INPUT OR !OUTPUT
");
 }

 // Write our data set into the input array in device memory [11]
 //
 err = clEnqueueWriteBuffer(queue, input, CL_TRUE, 0,sizeof(TIPO) *2, data, 0, NULL, NULL);
 if (err != CL_SUCCESS) {
      printf("ERROR WRITE OUR DATA
");
 }

 // Set the arguments to our compute kernel [12]
 //
 err = 0;
 err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
 err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
 err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
 if (err != CL_SUCCESS) {
      printf("ERROR ARGUMENTS COMPUTE KERNEL - ERROR NUMBER: %d
",err);
      exit(1);
 }

 // Get the maximum work-group size for executing the kernel on the device
 // [13]
 err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE,sizeof(size_t), &local, NULL);
 if (err != CL_SUCCESS) {
      printf("ERROR MAXIMUM WORK-GROUP - ERROR NUMBER: %d
",err);
      exit(1);
 }

 // Execute the kernel over the entire range of the data set [14]
 //
 global = count;
 //printf("TESTE");
 err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL,0, NULL, NULL);
 if (err) {
      printf("ERROR EXECUTE KERNEL - ERROR NUMBER: %d
",err);
      exit(1);
 }

 // Wait for the command queue to get serviced before reading back results
 // [15]
 clFinish(queue);

 // Read the results from the device [16]
 //
 err = clEnqueueReadBuffer(queue, output, CL_TRUE, 0,sizeof(TIPO) *count, results, 0, NULL, NULL );
 if (err != CL_SUCCESS) {
      printf("ERROR READ RESULTS - ERROR NUMBER: %d
",err);
 }
 //printf("TESTE");
 TIPO pi = 0.0;
 int i;
 for (i=0;i<count-1;i++) {
      //printf("%f",results[i]);
      pi += (pow(-1.0,i)) * (TIPO) results[i];
      //pi = (TIPO) results[i];
      //printf("casa %d deu: %1.50f
",i,pi);
      //printf("%f",(pow(-1,i)));
      //pi += (pow(-1.0,i));
 }
 printf("PI: %1.50f",pi);

 // Shut down and clean up
 //
 clReleaseMemObject(input);
 clReleaseMemObject(output);
 clReleaseProgram(program);
 clReleaseKernel(kernel);
 clReleaseCommandQueue(queue);
 clReleaseContext(context);

 scanf("%d",&i);
 return 0;
 }

when i put in kernelSource:

output = 4.0;

only like this, i get 512.000123023986816406250000000000000000000000000

in results..

or 1.0 = 0.00781250184809323400259017944335937500000000000

prunge · Accepted Answer

Are you running under an AMD OpenCL provider? If not, maybe the double-precision OpenCL extension isn't recognized.

If you can/want to support both extensions, you can do the following:

#ifdef cl_khr_fp64
    #pragma OPENCL EXTENSION cl_khr_fp64 : enable
#elif defined(cl_amd_fp64)
    #pragma OPENCL EXTENSION cl_amd_fp64 : enable
#else
    #error "Double precision floating point not supported by OpenCL implementation."
#endif

But be aware that some functions are not supported under cl_amd_fp64 that are supported under cl_khr_fp64.

OpenCL problem with double type

Tags:

double

precision

opencl

kavain

1 Answers

prunge

Recent Activity

Donate For Us

OpenCL problem with double type

Tags:

double

precision

opencl

kavain

1 Answers

prunge

Related questions

Recent Activity

Donate For Us