Hi everyone I'm trying to use the intel intrinsics like so
void test()
{
uint16_t n1 = 5;
uint16_t n2 = 2;
__m64 vec1, vec2, res;
vec1 = _mm_set_pi16(n1, n1, n1, n1);
vec2 = _mm_set_pi16(n2, n2, n2, n2);
res = _mm_add_pi16(vec1, vec2);
printf("%u %u %u %u \n", vec1[0], vec1[1], vec1[2], vec1[3]);
printf("%u %u %u %u \n", vec2[0], vec2[1], vec2[2], vec2[3]);
printf("%u %u %u %u \n", res[0], res[1], res[2], res[3]);
}
but the weird thing that I'm getting this results :
327685 327685 131074 131074
131074 131074 458759 458759
458759 458759 327685 327685
I'm using eclipse Mars... and I'm including the mmintrin.h, xmmintrin.h, emmintrin.h.
Please can someone explain what's wrong with this
Referencing __m64
as array is non-standard.
I don't know how your compiler handles it.
I am using Intel Compiler in Visual Studio, and get a compilation error.
You should extract uint16
elements from MMX register to ALU register before printing.
Use _mm_extract_pi16
intrinsic to extract the value.
Don't forget to call _mm_empty()
intrinsic function before exit the function.
See the following code sample:
#include <stdint.h>
#include <stdio.h>
#include <mmintrin.h>
#include <xmmintrin.h>
#include <emmintrin.h>
static void Test()
{
uint16_t n1=5;
uint16_t n2=2;
__m64 vec1,vec2,res;
vec1 = _mm_set_pi16 (n1 ,n1 ,n1 ,n1);
vec2 = _mm_set_pi16 (n2 ,n2 ,n2 ,n2);
res = _mm_add_pi16 (vec1, vec2);
//uint16_t res0 = _mm_extract_pi16(res, 0);
//uint16_t res1 = _mm_extract_pi16(res, 1);
//uint16_t res2 = _mm_extract_pi16(res, 2);
//uint16_t res3 = _mm_extract_pi16(res, 3);
printf("%u %u %u %u \n",_mm_extract_pi16(vec1, 0),_mm_extract_pi16(vec1, 1),_mm_extract_pi16(vec1, 2),_mm_extract_pi16(vec1, 3));
printf("%u %u %u %u \n",_mm_extract_pi16(vec2, 0),_mm_extract_pi16(vec2, 1),_mm_extract_pi16(vec2, 2),_mm_extract_pi16(vec2, 3));
printf("%u %u %u %u \n",_mm_extract_pi16(res, 0),_mm_extract_pi16(res, 1),_mm_extract_pi16(res, 2),_mm_extract_pi16(res, 3));
_mm_empty();
}
int main()
{
Test();
return 0;
}
Output:
5 5 5 5 2 2 2 2 7 7 7 7
Let's convert those values to hexadecimal strings:
0x00050005 0x00050005 0x00020002 0x00020002
0x00020002 0x00020002 0x00070007 0x00070007
0x00070007 0x00070007 0x00050005 0x00050005
It appears that the compiler is not applying the usual integral promotions to a subscripted __m64
variable, so that each value you pass is consuming 16 bits of parameter space (probably on the stack), and then printf
is decoding 32 bits for each %u
.
You should be able to fix this with explicit casts, like:
printf("%u %u %u %u \n", (unsigned int)vec1[0], (unsigned int)vec1[1],
(unsigned int)vec1[2], (unsigned int)vec1[3]);
Integral promotions are supposed to be applied to parameters of variadic functions... but if the result of subscripting here is not exactly one of the integral types, then that rule no longer applies.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With