I've had to do this many times in the past, and I've never been satisfied with the results.
Can anyone suggest a fast way of copying a contiguous bit array from source to destination where both the source and destination's may not be aligned (right shifted) on convenient processor boundaries?
If both the source and destination's aren't aligned , the problem can quickly be changed into one where only either of them aren't aligned (after the first copy say).
As a starting point, my code inevitably ends up looking something like the following (untested, ignore side effects this is just an off the cuff example):
const char mask[8] = { 1, 3, 7, 15, 31, 63, 127, 255 };
/* Assume:
* - destination is already zeroed,
* - offsets are right shifts
* - bits to copy is big (> 32 say)
*/
int bitarray_copy(char * src, int src_bit_offset, int src_bit_len,
char * dst, int dst_bit_offset) {
if (src_bit_offset == dst_bit_offset) { /* Not very interesting */
} else {
int bit_diff_offset = src_bit_offset - dst_bit_offset; /* assume positive */
int loop_count;
char c;
char mask_val = mask[bit_diff_offset];
/* Get started, line up the destination. */
c = (*src++ << bit_diff_offset) | ((*src >> (8 - bit_diff_offset)) & mask_val);
c &= mask[8-dst_bit_offset];
*dst++ |= c;
src_bit_len -= 8 - dst_bit_offset;
loop_count = src_bit_len >> 3;
while (--loop_count >= 0)
* dst ++ = (*src++ << bit_diff_offset) | ((*src >> (8 - bit_diff_offset)) & mask_val);
/* Trailing tail copy etc ... */
if (src_bit_len % 8) /* ... */
}
}
(actually this is better than I've done before. It doesn't look too bad)
This is what I ended up doing. (EDIT Changed on 8/21/2014 for a single bit copy bug.)
#include <limits.h>
#include <string.h>
#include <stddef.h>
#define PREPARE_FIRST_COPY() \
do { \
if (src_len >= (CHAR_BIT - dst_offset_modulo)) { \
*dst &= reverse_mask[dst_offset_modulo]; \
src_len -= CHAR_BIT - dst_offset_modulo; \
} else { \
*dst &= reverse_mask[dst_offset_modulo] \
| reverse_mask_xor[dst_offset_modulo + src_len]; \
c &= reverse_mask[dst_offset_modulo + src_len]; \
src_len = 0; \
} } while (0)
static void
bitarray_copy(const unsigned char *src_org, int src_offset, int src_len,
unsigned char *dst_org, int dst_offset)
{
static const unsigned char mask[] =
{ 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff };
static const unsigned char reverse_mask[] =
{ 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff };
static const unsigned char reverse_mask_xor[] =
{ 0xff, 0x7f, 0x3f, 0x1f, 0x0f, 0x07, 0x03, 0x01, 0x00 };
if (src_len) {
const unsigned char *src;
unsigned char *dst;
int src_offset_modulo,
dst_offset_modulo;
src = src_org + (src_offset / CHAR_BIT);
dst = dst_org + (dst_offset / CHAR_BIT);
src_offset_modulo = src_offset % CHAR_BIT;
dst_offset_modulo = dst_offset % CHAR_BIT;
if (src_offset_modulo == dst_offset_modulo) {
int byte_len;
int src_len_modulo;
if (src_offset_modulo) {
unsigned char c;
c = reverse_mask_xor[dst_offset_modulo] & *src++;
PREPARE_FIRST_COPY();
*dst++ |= c;
}
byte_len = src_len / CHAR_BIT;
src_len_modulo = src_len % CHAR_BIT;
if (byte_len) {
memcpy(dst, src, byte_len);
src += byte_len;
dst += byte_len;
}
if (src_len_modulo) {
*dst &= reverse_mask_xor[src_len_modulo];
*dst |= reverse_mask[src_len_modulo] & *src;
}
} else {
int bit_diff_ls,
bit_diff_rs;
int byte_len;
int src_len_modulo;
unsigned char c;
/*
* Begin: Line things up on destination.
*/
if (src_offset_modulo > dst_offset_modulo) {
bit_diff_ls = src_offset_modulo - dst_offset_modulo;
bit_diff_rs = CHAR_BIT - bit_diff_ls;
c = *src++ << bit_diff_ls;
c |= *src >> bit_diff_rs;
c &= reverse_mask_xor[dst_offset_modulo];
} else {
bit_diff_rs = dst_offset_modulo - src_offset_modulo;
bit_diff_ls = CHAR_BIT - bit_diff_rs;
c = *src >> bit_diff_rs &
reverse_mask_xor[dst_offset_modulo];
}
PREPARE_FIRST_COPY();
*dst++ |= c;
/*
* Middle: copy with only shifting the source.
*/
byte_len = src_len / CHAR_BIT;
while (--byte_len >= 0) {
c = *src++ << bit_diff_ls;
c |= *src >> bit_diff_rs;
*dst++ = c;
}
/*
* End: copy the remaing bits;
*/
src_len_modulo = src_len % CHAR_BIT;
if (src_len_modulo) {
c = *src++ << bit_diff_ls;
c |= *src >> bit_diff_rs;
c &= reverse_mask[src_len_modulo];
*dst &= reverse_mask_xor[src_len_modulo];
*dst |= c;
}
}
}
}
Your inner loop takes pieces of two bytes and moves them to a destination byte. That's almost optimal. Here are a few more hints in no particular order:
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With