I have used atomicMax()
to find the maximum value in the CUDA kernel:
__global__ void global_max(float* values, float* gl_max)
{
int i=threadIdx.x + blockDim.x * blockIdx.x;
float val=values[i];
atomicMax(gl_max, val);
}
It is throwing the following error:
error: no instance of overloaded function "atomicMax" matches the argument list
The argument types are: (float *, float)
.
Based on the CUDA Toolkit Documentation v9.2.148, there are no atomic operations for float. But we can implement it by mixing atomicMax and atomicMin with signed and unsigned integer casts!
This is a float atomic min:
__device__ __forceinline__ float atomicMinFloat (float * addr, float value) {
float old;
old = (value >= 0) ? __int_as_float(atomicMin((int *)addr, __float_as_int(value))) :
__uint_as_float(atomicMax((unsigned int *)addr, __float_as_uint(value)));
return old;
}
This is a float atomic max:
__device__ __forceinline__ float atomicMaxFloat (float * addr, float value) {
float old;
old = (value >= 0) ? __int_as_float(atomicMax((int *)addr, __float_as_int(value))) :
__uint_as_float(atomicMin((unsigned int *)addr, __float_as_uint(value)));
return old;
}
atomicMax
is not available for float types. But you can implement it via atomicCAS
:
__device__ static float atomicMax(float* address, float val)
{
int* address_as_i = (int*) address;
int old = *address_as_i, assumed;
do {
assumed = old;
old = ::atomicCAS(address_as_i, assumed,
__float_as_int(::fmaxf(val, __int_as_float(assumed))));
} while (assumed != old);
return __int_as_float(old);
}
You need to map float to orderedIntFloat to use atomicMax!
__device__ __forceinline__ int floatToOrderedInt( float floatVal ) {
int intVal = __float_as_int( floatVal );
return (intVal >= 0 ) ? intVal : intVal ^ 0x7FFFFFFF;
}
__device__ __forceinline__ float orderedIntToFloat( int intVal ) {
return __int_as_float( (intVal >= 0) ? intVal : intVal ^ 0x7FFFFFFF);
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With