Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How can I use SIMD to accelerate XOR two blocks of memory?

Tags:

I want to XOR two blocks of memory as quickly as possible, How can I use SIMD to accelerate it?

My original code is below:

void region_xor_w64(   unsigned char *r1,         /* Region 1 */
                       unsigned char *r2,         /* Region 2 */
                       int nbytes)       /* Number of bytes in region */
{
    uint64_t *l1;
    uint64_t *l2;
    uint64_t *ltop;
    unsigned char *ctop;

    ctop = r1 + nbytes;
    ltop = (uint64_t *) ctop;
    l1 = (uint64_t *) r1;
    l2 = (uint64_t *) r2;

    while (l1 < ltop) {
        *l2 = ((*l1)  ^ (*l2));
        l1++;
        l2++;
    }
}

I wrote one myself, but little speed increased.

void region_xor_sse(   unsigned char* dst,
                       unsigned char* src,
                       int block_size){
  const __m128i* wrd_ptr = (__m128i*)src;
  const __m128i* wrd_end = (__m128i*)(src+block_size);
  __m128i* dst_ptr = (__m128i*)dst;

  do{
    __m128i xmm1 = _mm_load_si128(wrd_ptr);
    __m128i xmm2 = _mm_load_si128(dst_ptr);

    xmm2 = _mm_xor_si128(xmm1, xmm2);
    _mm_store_si128(dst_ptr, xmm2);
    ++dst_ptr;
    ++wrd_ptr;
  }while(wrd_ptr < wrd_end);
}