Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to quickly pack a float to 4 bytes?

I've been looking for a way to store floats on WebGL textures. I've found some solutions on the internet, but those only deal with floats on the [0..1) range. I'd like to be able to store arbitrary floats, and, for that, such a function would need to be extended to also store the exponent (on the first byte, say). I don't quite understand how those work, though, so it is not obvious how to do so. In short:

What is an efficient algorithm to pack a float into 4 bytes?

like image 717
MaiaVictor Avatar asked Aug 19 '16 14:08

MaiaVictor


3 Answers

It's not fast, but doable. (Note that GLSL 1.00 floating point literals have conversion bugs in the compiler).

struct Bitset8Bits {
    mediump vec4 bit0;
    mediump vec4 bit1;
    mediump vec4 bit2;
    mediump vec4 bit3;
    mediump vec4 bit4;
    mediump vec4 bit5;
    mediump vec4 bit6;
    mediump vec4 bit7;
};


vec4 when_gt (vec4 l, vec4 r) {
  return max(sign(l - r), 0.0);
}


Bitset8Bits unpack_4_bytes (lowp vec4 byte) {
    Bitset8Bits result;

    result.bit7 = when_gt(byte, vec4(127.5));
    vec4 bits0to6 = byte - 128.0 * result.bit7;

    result.bit6 = when_gt(bits0to6, vec4(63.5));
    vec4 bits0to5 = bits0to6 - 64.0 * result.bit6;

    result.bit5 = when_gt(bits0to5, vec4(31.5));
    vec4 bits0to4 = bits0to5 - 32.0 * result.bit5;

    result.bit4 = when_gt(bits0to4, vec4(15.5));
    vec4 bits0to3 = bits0to4 - 16.0 * result.bit4;

    result.bit3 = when_gt(bits0to3, vec4(7.5));
    vec4 bits0to2 = bits0to3 - 8.0 * result.bit3;

    result.bit2 = when_gt(bits0to2, vec4(3.5));
    vec4 bits0to1 = bits0to2 - 4.0 * result.bit2;

    result.bit1 = when_gt(bits0to1, vec4(1.5));
    vec4 bit0 = bits0to1 - 2.0 * result.bit1;

    result.bit0 = when_gt(bit0, vec4(0.5));

    return result;
}

float when_gt (float l, float r) {
  return max(sign(l - r), 0.0);
}




vec4 pack_4_bytes (Bitset8Bits state) {

  vec4 data;

  data = state.bit0
    + 2.0 * state.bit1
    + 4.0 * state.bit2
    + 8.0 * state.bit3
    + 16.0 * state.bit4
    + 32.0 * state.bit5
    + 64.0 * state.bit6
    + 128.0 * state.bit7;

  return data;
}

vec4 brians_float_pack (
    float original_value) {

    // Remove the sign
    float pos_value = abs(original_value);

    float exp_real = floor(log2(pos_value));
    float multiplier = pow(2.0, exp_real);
    float normalized = pos_value / multiplier - 1.0;

    float exp_v = exp_real + 127.0;
    // if exp_v == -Inf -> 0
    // if exp_v == +Inf -> 255
    // if exp_v < -126.0 -> denormalized (remove the "1")
    // otherwise + 127.0;

    Bitset8Bits packed_v;

    packed_v.bit7.a =
        step(sign(original_value) - 1.0, -1.5); // pos

    // Exponent 8 bits

    packed_v.bit6.a = when_gt(exp_v, 127.5);
    float bits0to6 = exp_v - 128.0 * packed_v.bit6.a;

    packed_v.bit5.a = when_gt(bits0to6, 63.5);
    float bits0to5 = bits0to6 - 64.0 * packed_v.bit5.a;

    packed_v.bit4.a = when_gt(bits0to5, 31.5);
    float bits0to4 = bits0to5 - 32.0 * packed_v.bit4.a;

    packed_v.bit3.a = when_gt(bits0to4, 15.5);
    float bits0to3 = bits0to4 - 16.0 * packed_v.bit3.a;

    packed_v.bit2.a = when_gt(bits0to3, 7.5);
    float bits0to2 = bits0to3 - 8.0 * packed_v.bit2.a;

    packed_v.bit1.a = when_gt(bits0to2, 3.5);
    float bits0to1 = bits0to2 - 4.0 * packed_v.bit1.a;

    packed_v.bit0.a = when_gt(bits0to1, 1.5);
    float bit0 = bits0to1 - 2.0 * packed_v.bit0.a;

    packed_v.bit7.b = when_gt(bit0, 0.5);

    // Significand 23 bits

    float factor = 0.5;
    // 0.4999999
    
    // Significand MSB bit 22:
    packed_v.bit6.b =
        when_gt(normalized, factor - 0.00000005);
    normalized = normalized - factor * packed_v.bit6.b;
    factor = 0.5 * factor;

    packed_v.bit5.b =
        when_gt(normalized, factor - 0.00000005);
    normalized = normalized - factor * packed_v.bit5.b;
    factor = 0.5 * factor;

    packed_v.bit4.b =
        when_gt(normalized, factor - 0.00000005);
    normalized = normalized - factor * packed_v.bit4.b;
    factor = 0.5 * factor;

    packed_v.bit3.b =
        when_gt(normalized, factor - 0.00000005);
    normalized = normalized - factor * packed_v.bit3.b;
    factor = 0.5 * factor;

    packed_v.bit2.b =
        when_gt(normalized, factor - 0.00000005);
    normalized = normalized - factor * packed_v.bit2.b;
    factor = 0.5 * factor;

    packed_v.bit1.b =
        when_gt(normalized, factor - 0.00000005);
    normalized = normalized - factor * packed_v.bit1.b;
    factor = 0.5 * factor;

    packed_v.bit0.b =
        when_gt(normalized, factor - 0.00000005);
    normalized = normalized - factor * packed_v.bit0.b;
    factor = 0.5 * factor;


    packed_v.bit7.g =
        when_gt(normalized, factor - 0.00000005);
    normalized = normalized - factor * packed_v.bit7.g;
    factor = 0.5 * factor;

    packed_v.bit6.g =
        when_gt(normalized, factor - 0.00000005);
    normalized = normalized - factor * packed_v.bit6.g;
    factor = 0.5 * factor;

    packed_v.bit5.g =
        when_gt(normalized, factor - 0.00000005);
    normalized = normalized - factor * packed_v.bit5.g;
    factor = 0.5 * factor;

    packed_v.bit4.g =
        when_gt(normalized, factor - 0.00000005);
    normalized = normalized - factor * packed_v.bit4.g;
    factor = 0.5 * factor;

    packed_v.bit3.g =
        when_gt(normalized, factor - 0.00000005);
    normalized = normalized - factor * packed_v.bit3.g;
    factor = 0.5 * factor;

    packed_v.bit2.g =
        when_gt(normalized, factor - 0.00000005);
    normalized = normalized - factor * packed_v.bit2.g;
    factor = 0.5 * factor;

    packed_v.bit1.g =
        when_gt(normalized, factor - 0.00000005);
    normalized = normalized - factor * packed_v.bit1.g;
    factor = 0.5 * factor;

    packed_v.bit0.g =
        when_gt(normalized, factor - 0.00000005);
    normalized = normalized - factor * packed_v.bit0.g;
    factor = 0.5 * factor;


    packed_v.bit7.r =
        when_gt(normalized, factor - 0.00000005);
    normalized = normalized - factor * packed_v.bit7.r;
    factor = 0.5 * factor;

    packed_v.bit6.r =
        when_gt(normalized, factor - 0.00000005);
    normalized = normalized - factor * packed_v.bit6.r;
    factor = 0.5 * factor;

    packed_v.bit5.r =
        when_gt(normalized, factor - 0.00000005);
    normalized = normalized - factor * packed_v.bit5.r;
    factor = 0.5 * factor;

    packed_v.bit4.r =
        when_gt(normalized, factor - 0.00000005);
    normalized = normalized - factor * packed_v.bit4.r;
    factor = 0.5 * factor;

    packed_v.bit3.r =
        when_gt(normalized, factor - 0.00000005);
    normalized = normalized - factor * packed_v.bit3.r;
    factor = 0.5 * factor;

    packed_v.bit2.r =
        when_gt(normalized, factor - 0.00000005);
    normalized = normalized - factor * packed_v.bit2.r;
    factor = 0.5 * factor;

    packed_v.bit1.r =
        when_gt(normalized, factor - 0.00000005);
    normalized = normalized - factor * packed_v.bit1.r;
    factor = 0.5 * factor;

    // LSB bit 0
    packed_v.bit0.r =
        when_gt(normalized, factor - 0.00000005);

    vec4 result = pack_4_bytes(packed_v);

    return result;
}
like image 148
Brian Cannard Avatar answered Nov 18 '22 09:11

Brian Cannard


An easy way to do it is to first agree on the range of float you are supporting and remap it to [0...1) range before packing.

const MIN = -100;
const MAX = 100;

function packRemap(v){
    return (v - MIN) / (MAX - MIN);
}

function unpackRemap(p){
    return MIN + p * (MAX - MIN);
}
like image 34
WacławJasper Avatar answered Nov 18 '22 08:11

WacławJasper


Well, float is an 32-bit number (23 bits for mantissa + 1 bit implicitly, 8 bits for exponent and 1 bit for sign) and a texel of RGBA8 texture is also 32 bit. Thus we only need an encoding scheme, which can be packed in JS (or any other language for that matter) and unpacked in GLSL (given restrictions os GLSL ES 1.0, e.g. lack of bitwise ops). Here's my suggestion (in C++):

#include <cstdint>
#include <iostream>
#include <cmath>

// for storing result of encoding
struct rgba {
    uint8_t r, g, b, a;
};

rgba float2rgba(float x) {
    union {
        float xc;
        uint32_t xi;
    };

    // let's "convert" our float number to uint32_t so we can mess with it's bits
    xc = x;

    // in v we'll pack sign bit and mantissa, that would be exactly 24 bits
    int_least32_t v =
        // sign bit
        (xi >> 31 & 1) |
        // mantissa
        ((xi & 0x7fffff) << 1);

    rgba r;

    // then we just split into bytes and store them in RGB channels
    r.r = v / 0x10000;
    r.g = (v % 0x10000) / 0x100;
    r.b = v % 0x100;

    // and we'll put the exponent to alpha channel
    r.a = xi >> 23 & 0xff;

    return r;
}

float rgba2float(rgba r) {
    // let's "rebuild" mantissa and sign bit first
    uint32_t v = (r.b / 2) + r.g * 0x80 + r.r * 0x8000;

    return
        // let's apply sign (it's in least significant bit of v)
        (r.b % 2 ? -1.f : 1.f) *
        // and reconstruct the number itself
        (1.f + v * pow(2.f, -23.f)) * pow(2.f, static_cast<unsigned>(r.a) - 127);
}

int main() {
    const float a = -1.34320e32f;
    rgba r = float2rgba(a);
    std::cout <<
        a << '\n' <<
        static_cast<unsigned>(r.r) << ',' <<
        static_cast<unsigned>(r.g) << ',' <<
        static_cast<unsigned>(r.b) << ',' <<
        static_cast<unsigned>(r.a) << '\n' <<
        rgba2float(r) << std::endl;
}

Output:

-1.3432e+32
167,214,213,233
-1.3432e+32
like image 1
Kirill Dmitrenko Avatar answered Nov 18 '22 08:11

Kirill Dmitrenko