I want to compute for vectors and , where denotes the magnitude of the vector . Since this involves taking the square root of the sum of the squares of the differences between each corresponding component of the two vectors, it should be a highly parallelizable task. I am using Cuda and Thrust, through Cygwin, on Windows 10. Both Cuda and Thrust are in general working.
The below code compiles and runs (with nvcc), but only because I have commented out three lines toward the bottom of main
, each of which I think should work but does not. func::operator()(tup t)
thinks that the arguments I'm passing it are not in fact of type tup.
I have also commented out the actual body of the operator, in the interest of making it more likely to at least compile. The operator is supposed to find the squared difference between the elements of the input tup. The reduction unary_op
from transform_reduce
(which in this case is func()
) would then add these, giving me the norm squared of the difference of vectors.
#include <iostream>
#include <stdlib.h>
#include <thrust/device_vector.h>
#include <thrust/transform.h>
#include <thrust/tuple.h>
#include <thrust/transform_reduce.h>
#include <thrust/iterator/zip_iterator.h>
typedef thrust::device_vector<float> dvec;
typedef dvec::iterator iter;
typedef thrust::tuple<iter, iter> tup;
struct func: public thrust::unary_function<tup, float>
{
__device__ float operator()(tup t) //difsq
{
// I've commented out these two lines for testing purposes:
// float f = thrust::get<0>(t) - thrust::get<1>(t);
// return f*f;
return 3.14;
}
};
int main()
{
dvec a(40, 4.f);
dvec b(40, 3.f);
auto begin = thrust::make_zip_iterator(thrust::make_tuple(a.begin(), b.begin()));
auto end = thrust::make_zip_iterator(thrust::make_tuple(a.end(), b.end()));
//these two lines work
thrust::get<0>(begin[0]);
std::cout << thrust::get<0>(begin[0]) - thrust::get<1>(begin[0]);
//these three lines do not
//thrust::transform_reduce(begin, end, func(), 0.0f, thrust::plus<float>());
//func()(begin[0]);
//thrust::transform(begin, end, begin, func());
std::cout << "done" << std::endl;
return 0;
}
I get this error: (my program is called sandbox.cu
)
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\bin/../include\thrust/detail/tuple.inl(310): error: no instance of constructor "thrust::detail::normal_iterator<Pointer>::normal_iterator [with Pointer=thrust::device_ptr<float>]" matches the argument list
argument types are: (const thrust::device_reference<float>)
detected during:
instantiation of "thrust::detail::cons<HT, TT>::cons(const thrust::detail::cons<HT2, TT2> &) [with HT=iter, TT=thrust::detail::cons<iter, thrust::null_type>, HT2=thrust::device_reference<float>, TT2=thrust::detail::cons<thrust::device_reference<float>, thrust::null_type>]"
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\bin/../include\thrust/tuple.h(361): here
instantiation of "thrust::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9>::tuple(const thrust::detail::cons<U1, U2> &) [with T0=iter, T1=iter, T2=thrust::null_type, T3=thrust::null_type, T4=thrust::null_type, T5=thrust::null_type, T6=thrust::null_type, T7=thrust::null_type, T8=thrust::null_type, T9=thrust::null_type, U1=thrust::device_reference<float>, U2=thrust::detail::cons<thrust::device_reference<float>, thrust::null_type>]"
sandbox.cu(37): here
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\bin/../include\thrust/detail/tuple.inl(411): error: no instance of constructor "thrust::detail::normal_iterator<Pointer>::normal_iterator [with Pointer=thrust::device_ptr<float>]" matches the argument list
argument types are: (const thrust::device_reference<float>)
detected during:
instantiation of "thrust::detail::cons<HT, thrust::null_type>::cons(const thrust::detail::cons<HT2, thrust::null_type> &) [with HT=iter, HT2=thrust::device_reference<float>]"
(310): here
instantiation of "thrust::detail::cons<HT, TT>::cons(const thrust::detail::cons<HT2, TT2> &) [with HT=iter, TT=thrust::detail::cons<iter, thrust::null_type>, HT2=thrust::device_reference<float>, TT2=thrust::detail::cons<thrust::device_reference<float>, thrust::null_type>]"
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\bin/../include\thrust/tuple.h(361): here
instantiation of "thrust::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9>::tuple(const thrust::detail::cons<U1, U2> &) [with T0=iter, T1=iter, T2=thrust::null_type, T3=thrust::null_type, T4=thrust::null_type, T5=thrust::null_type, T6=thrust::null_type, T7=thrust::null_type, T8=thrust::null_type, T9=thrust::null_type, U1=thrust::device_reference<float>, U2=thrust::detail::cons<thrust::device_reference<float>, thrust::null_type>]"
sandbox.cu(37): here
2 errors detected in the compilation of "C:/cygwin64/tmp/tmpxft_00001a90_00000000-10_sandbox.cpp1.ii".
Solved! tup
should have been thrust::tuple<float, float>
, not thrust::tuple<iter, iter>
. Full solution:
#include <iostream>
#include <stdlib.h>
#include <thrust/device_vector.h>
#include <thrust/transform.h>
#include <thrust/tuple.h>
#include <thrust/transform_reduce.h>
#include <thrust/iterator/zip_iterator.h>
typedef thrust::device_vector<float> dvec;
typedef thrust::tuple<float, float> tup;
struct func
{
__device__ float operator()(tup t) //difsq
{
float f = thrust::get<0>(t) - thrust::get<1>(t);
return f*f;
}
};
int main()
{
dvec a(4, 3.f);
dvec b(4, 2.f);
auto begin = thrust::make_zip_iterator(thrust::make_tuple(a.begin(), b.begin()));
auto end = thrust::make_zip_iterator(thrust::make_tuple(a.end(), b.end()));
std::cout << thrust::transform_reduce(begin, end, func(), 0.0f, thrust::plus<float>()) << std::endl;
std::cout << "done" << std::endl;
return 0;
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With