I'm trying to convert a python module to cython, it does a lot of serialize and deserialize work.
Currently I have to do this:
import struct
from libc.stdint cimport (
int32_t,
int64_t,
)
cpdef bytes write_int(int32_t i):
return struct.pack("!i", i)
cpdef bytes write_long(int64_t i):
return struct.pack("!q", i)
cdef bytes write_double(double val):
return struct.pack("!d", val)
cdef bytes write_string(bytes val):
cdef int32_t length = len(val)
cdef str fmt
fmt = "!i%ds" % length
return struct.pack(fmt, length, val)
Is there an equal in c lib to struct.pack and struct.unpack? What's the best way to do things like this in cython?
I looked at the modules (this and this) and just translated the code to Cython and removed the PyObject
parts. In theory this should work, but some parts (like the float
parts) I have no way of rigorously testing:
Some imports:
from cpython.array cimport array, clone
from libc.string cimport memcmp, memcpy
from libc.math cimport frexp, ldexp
from libc.stdint cimport int32_t, int64_t
Save some code with a fused type. It's technically not a stable feature, but it works flawlessly for me:
ctypedef fused integer:
int32_t
int64_t
This part tests the machine's endianness. It works for me, but that's hardly a complete suite. OTOH, it looks about right
cdef enum float_format_type:
unknown_format,
ieee_big_endian_format,
ieee_little_endian_format
# Set-up
cdef array stringtemplate = array('B')
cdef float_format_type double_format
cdef double x = 9006104071832581.0
if sizeof(double) == 8:
if memcmp(&x, b"\x43\x3f\xff\x01\x02\x03\x04\x05", 8) == 0:
double_format = ieee_big_endian_format
elif memcmp(&x, b"\x05\x04\x03\x02\x01\xff\x3f\x43", 8) == 0:
double_format = ieee_little_endian_format
else:
double_format = unknown_format
else:
double_format = unknown_format;
(The stringtemplate
is used to be able to make bytes
objects quickly)
This part's simple:
cdef void _write_integer(integer x, char* output):
cdef int i
for i in range(sizeof(integer)-1, -1, -1):
output[i] = <char>x
x >>= 8
cpdef bytes write_int(int32_t i):
cdef array output = clone(stringtemplate, sizeof(int32_t), False)
_write_integer(i, output.data.as_chars)
return output.data.as_chars[:sizeof(int32_t)]
cpdef bytes write_long(int64_t i):
cdef array output = clone(stringtemplate, sizeof(int64_t), False)
_write_integer(i, output.data.as_chars)
return output.data.as_chars[:sizeof(int64_t)]
The array
is similar to malloc
but it's garbage collected :).
This part I mostly have no idea about. My "tests" passed, but it's mostly hope:
cdef void _write_double(double x, char* output):
cdef:
unsigned char sign
int e
double f
unsigned int fhi, flo, i
char *s
if double_format == unknown_format or True:
if x < 0:
sign = 1
x = -x
else:
sign = 0
f = frexp(x, &e)
# Normalize f to be in the range [1.0, 2.0)
if 0.5 <= f < 1.0:
f *= 2.0
e -= 1
elif f == 0.0:
e = 0
else:
raise SystemError("frexp() result out of range")
if e >= 1024:
raise OverflowError("float too large to pack with d format")
elif e < -1022:
# Gradual underflow
f = ldexp(f, 1022 + e)
e = 0;
elif not (e == 0 and f == 0.0):
e += 1023
f -= 1.0 # Get rid of leading 1
# fhi receives the high 28 bits; flo the low 24 bits (== 52 bits)
f *= 2.0 ** 28
fhi = <unsigned int>f # Truncate
assert fhi < 268435456
f -= <double>fhi
f *= 2.0 ** 24
flo = <unsigned int>(f + 0.5) # Round
assert(flo <= 16777216);
if flo >> 24:
# The carry propagated out of a string of 24 1 bits.
flo = 0
fhi += 1
if fhi >> 28:
# And it also progagated out of the next 28 bits.
fhi = 0
e += 1
if e >= 2047:
raise OverflowError("float too large to pack with d format")
output[0] = (sign << 7) | (e >> 4)
output[1] = <unsigned char> (((e & 0xF) << 4) | (fhi >> 24))
output[2] = 0xFF & (fhi >> 16)
output[3] = 0xFF & (fhi >> 8)
output[4] = 0xFF & fhi
output[5] = 0xFF & (flo >> 16)
output[6] = 0xFF & (flo >> 8)
output[7] = 0xFF & flo
else:
s = <char*>&x;
if double_format == ieee_little_endian_format:
for i in range(8):
output[i] = s[7-i]
else:
for i in range(8):
output[i] = s[i]
If you can understand how it works, be sure to check it yourself.
Then we wrap it as before:
cdef bytes write_double(double x):
cdef array output = clone(stringtemplate, sizeof(double), False)
_write_double(x, output.data.as_chars)
return output.data.as_chars[:sizeof(double)]
The string one is actually really simple, and explains why I set it up as I did above:
cdef bytes write_string(bytes val):
cdef:
int32_t int_length = sizeof(int32_t)
int32_t input_length = len(val)
array output = clone(stringtemplate, int_length + input_length, True)
_write_integer(input_length, output.data.as_chars)
memcpy(output.data.as_chars + int_length, <char*>val, input_length)
return output.data.as_chars[:int_length + input_length]
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With