This is what I'm currently doing:
int dataLen = 500;
char data[dataLen];
int desired = 1; // between 1 and 6, inclusive
...
char bits[dataLen * 8];
for (int32 j = 0; j < dataLen; j++) {
for (int i = 0; i < 8; i++) {
bits[j*8+i] = ( (data[j] & (1 << (7-i))) ? '1' : '0' );
}
}
int offset = std::search_n(bits, bits + dataLen*8, desired, '0') - bits;
Really nasty, I know, and it's killing performance.
What's the fastest way to find the bit offset of the first set of x
consecutive 0 bits in a char array, where 0 < x < 7
? I'm on GCC with SSE 4.2 so builtins like __builtin_ctz, __builtin_popcountl are an option, I just can't figure out the best way to use them.
Here's a function that matches the output of the one provided in the question (at least under limited testing). It uses a table lookup , with the table having been generated by a one-off script. I'm honestly not sure if its performance is competitive with the suggestions that use bit testing hackery or GCC builtins, but I'll bet it's not too far off.
struct zeros {
unsigned char leading;
unsigned char internal;
unsigned char trailing;
};
// forward declaration so the long, non-interesting table is at the
// end of this
static struct zeros const zero_table[256];
int find_zero_bits_offset( char const* data, int datalen, int desired)
{
int offset = -1;
int found = 0;
char const* dataptr = &data[0];
char const* endptr = &data[datalen];
// first, find which byte the sequence of zeros begins
while (!found && (dataptr != endptr)) {
unsigned char ch1 = *dataptr++;
unsigned char ch2 = (dataptr != endptr) ? *dataptr : 0xff;
int internal = zero_table[ch1].internal;
int trailing = zero_table[ch1].trailing;
int leading = zero_table[ch2].leading;
found = (desired <= internal) ||
(trailing && (desired <= (trailing + leading)));
}
// now zero in on where the sequence starts within the byte
if (found) {
char ch = 0;
unsigned int mask = 0;
--dataptr;
// dataptr points to the byte where the sequence of zeros starts.
// figure out exactly where the sequence is...
// there's possibly some opportunity for optimization, if neccesary,
// by testing if the sequence was found in the "leading", "internal", or
// "trailing" cases. But the explicit loop will only iterate at most
// 8 times (and will early-out on the first iteration if the match is
// for the "leading" case) that it didn't seem too important
ch = *dataptr;
offset = (dataptr - data) * 8;
// figure out where the appropriate internal run starts
// note that the offset we need to return isn't necessarily the
// offset for the run of zeros counted by zero_table[tmp].internal_offset
// since a sufficient shorter run might come first
// there may be a more efficient bithack for this, but the
// loop will iterate at most 8 times...
mask = ((1 << desired) - 1);
mask <<= (8 - desired);
while ((ch & mask) != 0) {
++offset;
mask >>= 1;
}
}
else {
// however you want to handle the "not found" situation.
// This is equivalent to what was in the question:
offset = (endptr - data) * 8;
}
return offset;
}
static struct zeros const zero_table[256] = {
{ 8, 8, 8 }, // 0000 0000
{ 7, 7, 0 }, // 0000 0001
{ 6, 6, 1 }, // 0000 0010
{ 6, 6, 0 }, // 0000 0011
{ 5, 5, 2 }, // 0000 0100
{ 5, 5, 0 }, // 0000 0101
{ 5, 5, 1 }, // 0000 0110
{ 5, 5, 0 }, // 0000 0111
{ 4, 4, 3 }, // 0000 1000
{ 4, 4, 0 }, // 0000 1001
{ 4, 4, 1 }, // 0000 1010
{ 4, 4, 0 }, // 0000 1011
{ 4, 4, 2 }, // 0000 1100
{ 4, 4, 0 }, // 0000 1101
{ 4, 4, 1 }, // 0000 1110
{ 4, 4, 0 }, // 0000 1111
{ 3, 4, 4 }, // 0001 0000
{ 3, 3, 0 }, // 0001 0001
{ 3, 3, 1 }, // 0001 0010
{ 3, 3, 0 }, // 0001 0011
{ 3, 3, 2 }, // 0001 0100
{ 3, 3, 0 }, // 0001 0101
{ 3, 3, 1 }, // 0001 0110
{ 3, 3, 0 }, // 0001 0111
{ 3, 3, 3 }, // 0001 1000
{ 3, 3, 0 }, // 0001 1001
{ 3, 3, 1 }, // 0001 1010
{ 3, 3, 0 }, // 0001 1011
{ 3, 3, 2 }, // 0001 1100
{ 3, 3, 0 }, // 0001 1101
{ 3, 3, 1 }, // 0001 1110
{ 3, 3, 0 }, // 0001 1111
{ 2, 5, 5 }, // 0010 0000
{ 2, 4, 0 }, // 0010 0001
{ 2, 3, 1 }, // 0010 0010
{ 2, 3, 0 }, // 0010 0011
{ 2, 2, 2 }, // 0010 0100
{ 2, 2, 0 }, // 0010 0101
{ 2, 2, 1 }, // 0010 0110
{ 2, 2, 0 }, // 0010 0111
{ 2, 3, 3 }, // 0010 1000
{ 2, 2, 0 }, // 0010 1001
{ 2, 2, 1 }, // 0010 1010
{ 2, 2, 0 }, // 0010 1011
{ 2, 2, 2 }, // 0010 1100
{ 2, 2, 0 }, // 0010 1101
{ 2, 2, 1 }, // 0010 1110
{ 2, 2, 0 }, // 0010 1111
{ 2, 4, 4 }, // 0011 0000
{ 2, 3, 0 }, // 0011 0001
{ 2, 2, 1 }, // 0011 0010
{ 2, 2, 0 }, // 0011 0011
{ 2, 2, 2 }, // 0011 0100
{ 2, 2, 0 }, // 0011 0101
{ 2, 2, 1 }, // 0011 0110
{ 2, 2, 0 }, // 0011 0111
{ 2, 3, 3 }, // 0011 1000
{ 2, 2, 0 }, // 0011 1001
{ 2, 2, 1 }, // 0011 1010
{ 2, 2, 0 }, // 0011 1011
{ 2, 2, 2 }, // 0011 1100
{ 2, 2, 0 }, // 0011 1101
{ 2, 2, 1 }, // 0011 1110
{ 2, 2, 0 }, // 0011 1111
{ 1, 6, 6 }, // 0100 0000
{ 1, 5, 0 }, // 0100 0001
{ 1, 4, 1 }, // 0100 0010
{ 1, 4, 0 }, // 0100 0011
{ 1, 3, 2 }, // 0100 0100
{ 1, 3, 0 }, // 0100 0101
{ 1, 3, 1 }, // 0100 0110
{ 1, 3, 0 }, // 0100 0111
{ 1, 3, 3 }, // 0100 1000
{ 1, 2, 0 }, // 0100 1001
{ 1, 2, 1 }, // 0100 1010
{ 1, 2, 0 }, // 0100 1011
{ 1, 2, 2 }, // 0100 1100
{ 1, 2, 0 }, // 0100 1101
{ 1, 2, 1 }, // 0100 1110
{ 1, 2, 0 }, // 0100 1111
{ 1, 4, 4 }, // 0101 0000
{ 1, 3, 0 }, // 0101 0001
{ 1, 2, 1 }, // 0101 0010
{ 1, 2, 0 }, // 0101 0011
{ 1, 2, 2 }, // 0101 0100
{ 1, 1, 0 }, // 0101 0101
{ 1, 1, 1 }, // 0101 0110
{ 1, 1, 0 }, // 0101 0111
{ 1, 3, 3 }, // 0101 1000
{ 1, 2, 0 }, // 0101 1001
{ 1, 1, 1 }, // 0101 1010
{ 1, 1, 0 }, // 0101 1011
{ 1, 2, 2 }, // 0101 1100
{ 1, 1, 0 }, // 0101 1101
{ 1, 1, 1 }, // 0101 1110
{ 1, 1, 0 }, // 0101 1111
{ 1, 5, 5 }, // 0110 0000
{ 1, 4, 0 }, // 0110 0001
{ 1, 3, 1 }, // 0110 0010
{ 1, 3, 0 }, // 0110 0011
{ 1, 2, 2 }, // 0110 0100
{ 1, 2, 0 }, // 0110 0101
{ 1, 2, 1 }, // 0110 0110
{ 1, 2, 0 }, // 0110 0111
{ 1, 3, 3 }, // 0110 1000
{ 1, 2, 0 }, // 0110 1001
{ 1, 1, 1 }, // 0110 1010
{ 1, 1, 0 }, // 0110 1011
{ 1, 2, 2 }, // 0110 1100
{ 1, 1, 0 }, // 0110 1101
{ 1, 1, 1 }, // 0110 1110
{ 1, 1, 0 }, // 0110 1111
{ 1, 4, 4 }, // 0111 0000
{ 1, 3, 0 }, // 0111 0001
{ 1, 2, 1 }, // 0111 0010
{ 1, 2, 0 }, // 0111 0011
{ 1, 2, 2 }, // 0111 0100
{ 1, 1, 0 }, // 0111 0101
{ 1, 1, 1 }, // 0111 0110
{ 1, 1, 0 }, // 0111 0111
{ 1, 3, 3 }, // 0111 1000
{ 1, 2, 0 }, // 0111 1001
{ 1, 1, 1 }, // 0111 1010
{ 1, 1, 0 }, // 0111 1011
{ 1, 2, 2 }, // 0111 1100
{ 1, 1, 0 }, // 0111 1101
{ 1, 1, 1 }, // 0111 1110
{ 1, 1, 0 }, // 0111 1111
{ 0, 7, 7 }, // 1000 0000
{ 0, 6, 0 }, // 1000 0001
{ 0, 5, 1 }, // 1000 0010
{ 0, 5, 0 }, // 1000 0011
{ 0, 4, 2 }, // 1000 0100
{ 0, 4, 0 }, // 1000 0101
{ 0, 4, 1 }, // 1000 0110
{ 0, 4, 0 }, // 1000 0111
{ 0, 3, 3 }, // 1000 1000
{ 0, 3, 0 }, // 1000 1001
{ 0, 3, 1 }, // 1000 1010
{ 0, 3, 0 }, // 1000 1011
{ 0, 3, 2 }, // 1000 1100
{ 0, 3, 0 }, // 1000 1101
{ 0, 3, 1 }, // 1000 1110
{ 0, 3, 0 }, // 1000 1111
{ 0, 4, 4 }, // 1001 0000
{ 0, 3, 0 }, // 1001 0001
{ 0, 2, 1 }, // 1001 0010
{ 0, 2, 0 }, // 1001 0011
{ 0, 2, 2 }, // 1001 0100
{ 0, 2, 0 }, // 1001 0101
{ 0, 2, 1 }, // 1001 0110
{ 0, 2, 0 }, // 1001 0111
{ 0, 3, 3 }, // 1001 1000
{ 0, 2, 0 }, // 1001 1001
{ 0, 2, 1 }, // 1001 1010
{ 0, 2, 0 }, // 1001 1011
{ 0, 2, 2 }, // 1001 1100
{ 0, 2, 0 }, // 1001 1101
{ 0, 2, 1 }, // 1001 1110
{ 0, 2, 0 }, // 1001 1111
{ 0, 5, 5 }, // 1010 0000
{ 0, 4, 0 }, // 1010 0001
{ 0, 3, 1 }, // 1010 0010
{ 0, 3, 0 }, // 1010 0011
{ 0, 2, 2 }, // 1010 0100
{ 0, 2, 0 }, // 1010 0101
{ 0, 2, 1 }, // 1010 0110
{ 0, 2, 0 }, // 1010 0111
{ 0, 3, 3 }, // 1010 1000
{ 0, 2, 0 }, // 1010 1001
{ 0, 1, 1 }, // 1010 1010
{ 0, 1, 0 }, // 1010 1011
{ 0, 2, 2 }, // 1010 1100
{ 0, 1, 0 }, // 1010 1101
{ 0, 1, 1 }, // 1010 1110
{ 0, 1, 0 }, // 1010 1111
{ 0, 4, 4 }, // 1011 0000
{ 0, 3, 0 }, // 1011 0001
{ 0, 2, 1 }, // 1011 0010
{ 0, 2, 0 }, // 1011 0011
{ 0, 2, 2 }, // 1011 0100
{ 0, 1, 0 }, // 1011 0101
{ 0, 1, 1 }, // 1011 0110
{ 0, 1, 0 }, // 1011 0111
{ 0, 3, 3 }, // 1011 1000
{ 0, 2, 0 }, // 1011 1001
{ 0, 1, 1 }, // 1011 1010
{ 0, 1, 0 }, // 1011 1011
{ 0, 2, 2 }, // 1011 1100
{ 0, 1, 0 }, // 1011 1101
{ 0, 1, 1 }, // 1011 1110
{ 0, 1, 0 }, // 1011 1111
{ 0, 6, 6 }, // 1100 0000
{ 0, 5, 0 }, // 1100 0001
{ 0, 4, 1 }, // 1100 0010
{ 0, 4, 0 }, // 1100 0011
{ 0, 3, 2 }, // 1100 0100
{ 0, 3, 0 }, // 1100 0101
{ 0, 3, 1 }, // 1100 0110
{ 0, 3, 0 }, // 1100 0111
{ 0, 3, 3 }, // 1100 1000
{ 0, 2, 0 }, // 1100 1001
{ 0, 2, 1 }, // 1100 1010
{ 0, 2, 0 }, // 1100 1011
{ 0, 2, 2 }, // 1100 1100
{ 0, 2, 0 }, // 1100 1101
{ 0, 2, 1 }, // 1100 1110
{ 0, 2, 0 }, // 1100 1111
{ 0, 4, 4 }, // 1101 0000
{ 0, 3, 0 }, // 1101 0001
{ 0, 2, 1 }, // 1101 0010
{ 0, 2, 0 }, // 1101 0011
{ 0, 2, 2 }, // 1101 0100
{ 0, 1, 0 }, // 1101 0101
{ 0, 1, 1 }, // 1101 0110
{ 0, 1, 0 }, // 1101 0111
{ 0, 3, 3 }, // 1101 1000
{ 0, 2, 0 }, // 1101 1001
{ 0, 1, 1 }, // 1101 1010
{ 0, 1, 0 }, // 1101 1011
{ 0, 2, 2 }, // 1101 1100
{ 0, 1, 0 }, // 1101 1101
{ 0, 1, 1 }, // 1101 1110
{ 0, 1, 0 }, // 1101 1111
{ 0, 5, 5 }, // 1110 0000
{ 0, 4, 0 }, // 1110 0001
{ 0, 3, 1 }, // 1110 0010
{ 0, 3, 0 }, // 1110 0011
{ 0, 2, 2 }, // 1110 0100
{ 0, 2, 0 }, // 1110 0101
{ 0, 2, 1 }, // 1110 0110
{ 0, 2, 0 }, // 1110 0111
{ 0, 3, 3 }, // 1110 1000
{ 0, 2, 0 }, // 1110 1001
{ 0, 1, 1 }, // 1110 1010
{ 0, 1, 0 }, // 1110 1011
{ 0, 2, 2 }, // 1110 1100
{ 0, 1, 0 }, // 1110 1101
{ 0, 1, 1 }, // 1110 1110
{ 0, 1, 0 }, // 1110 1111
{ 0, 4, 4 }, // 1111 0000
{ 0, 3, 0 }, // 1111 0001
{ 0, 2, 1 }, // 1111 0010
{ 0, 2, 0 }, // 1111 0011
{ 0, 2, 2 }, // 1111 0100
{ 0, 1, 0 }, // 1111 0101
{ 0, 1, 1 }, // 1111 0110
{ 0, 1, 0 }, // 1111 0111
{ 0, 3, 3 }, // 1111 1000
{ 0, 2, 0 }, // 1111 1001
{ 0, 1, 1 }, // 1111 1010
{ 0, 1, 0 }, // 1111 1011
{ 0, 2, 2 }, // 1111 1100
{ 0, 1, 0 }, // 1111 1101
{ 0, 1, 1 }, // 1111 1110
{ 0, 0, 0 }, // 1111 1111
};
How many numbers have 6 consecutive 0 bits (even when considering 2 consecutive bytes)?
Byte 1
XXXXXXXX
000000?? 0/1/2/3
?000000? 0/1/128/129
??000000 0/64/128/192
So if we consider 1 byte at a time then 0/1/2/3/64/128/192
Byte a Byte b
XXXXXXXX XXXXXXXX
??100000 0??????? (a & 31 == 0) && (b & 128 == 0)
???10000 00?????? (a & 15 == 0) && (b & 192 == 0)
????1000 000????? (a & 7 == 0) && (b & 224 == 0)
?????100 0000???? (a & 3 == 0) && (b & 240 == 0)
??????10 00000??? (a & 1 == 0) && (b & 248 == 0)
So an absolute maximum of 12 tests gives you all combinations.
I am sure if you do the comparisons smartly you can reduce that.
If we steel @Michael Burr solution below for using a table driven approach. Then we can organize it so that you can do one or two comparisons per byte.
struct TestStruct
{
bool is6Consecutive;
bool hasTrailing;
int maskNextByte;
int offset;
};
TestStruct testData[256];
std::size_t findOffsetOf_6ConsecutiveZero(char const* data, size_t size)
{
for(int loop = 0;loop < (size-1); ++loop)
{
char const& val = data[loop];
TestStructure const& test = testData[val];
if (test.is6Consecutive)
{ return loop*8 + test.offset;
}
if (test.hasTrailing)
{
if ((data[loop + 1] & test.maskNextByte) == 0)
{ return loop*8 + test.offset;
}
}
}
// Test last byte
TestStructure const& test = testData[data[size-1]];
if (test.is6Consecutive)
{ return (size-1)*8 + test.offset;
}
return -1;
}
First few table entries:
TestStruct testData[256] =
{
{true, false, 0x00, 0},
{true, false, 0x00, 0},
{true, false, 0x00, 0},
{true, false, 0x00, 0},
{false, true, 0xf0, 6}, // 4 => 00000100 <Next 4 bytes are zero we hit>
{false, false, 0x00, 0}, // 5 => 00000101 <Ignore and move on>
{false, true, 0xf8, 7}, // 6 => 00000110 <Next 5 bytes are zero we hit>
etc...
};
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With