Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

python read from fd directly into bytearray

Is there a means to read from a file descriptor (not an IO-like object) directly into a bytearray?

Right now I use a temporary FileIO object to mediate, something like:

def fd_readinto(fd, ba):
    fio = io.FileIO(fd, closefd = False)
    return fio.readinto(ba)
like image 285
pilcrow Avatar asked Nov 13 '22 15:11

pilcrow


1 Answers

There is no function that does this, and your method is already the fastest approach.

I was going to suggest bytearray(mmap), array.fromfile, and even a homebrew os.read() using bytearray and memoryview, but FileIO.readinto is screaming fast. (It makes sense that it would be because it performs only one system call.)

import os
import mmap, io, array
import timeit

fn = 'path-to-largeish-file'

def fd_readinto_mmap(fd, ba):
    m = mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
    ba.extend(m)
    m.close()

def fd_readinto_fio(fd, ba):
    sz = os.fstat(fd).st_size
    ba2 = bytearray(sz)
    with io.FileIO(fd, closefd = False) as fio:
        fio.readinto(ba2)
    ba.extend(ba2)

def fd_readinto_array(fd, ba):
    ar = array.array('c')
    sz = os.fstat(fd).st_size
    fp = os.fdopen(fd, 'rb')
    ar.fromfile(fp, sz)
    ba.extend(ar)

def fd_readinto_mv(fd, ba):
    stat = os.fstat(fd)
    blksize = getattr(stat, 'st_blksize', 4096)
    bufsize = stat.st_size
    buf = bytearray(bufsize)
    m = memoryview(buf)
    while True:
        b = os.read(fd, blksize)
        s = len(b)
        if not s: break
        m[:s], m = b, m[s:]
    writtenbytes = buffer(buf, 0, bufsize-len(m))
    ba.extend(writtenbytes)

setup = """
from __main__ import fn, fd_readinto_mmap, fd_readinto_fio, fd_readinto_array, fd_readinto_mv
import os
openfd = lambda : os.open(fn, os.O_RDONLY)
closefd = lambda fd: os.close(fd)
"""


reps = 2
tests = {
    'fio' : "fd=openfd(); fd_readinto_fio(fd, bytearray()); closefd(fd)",
    'mmap': "fd=openfd(); fd_readinto_mmap(fd, bytearray()); closefd(fd)",
    'array': "fd=openfd(); fd_readinto_array(fd, bytearray());",
    'mv' : "fd=openfd(); fd_readinto_mv(fd, bytearray()); closefd(fd)",
}

width = max(map(len, tests))
for n,t in tests.iteritems():
    time = timeit.timeit(t, setup, number=reps)
    print ("{:%s} {}" % width).format(n, time)

On my system (OS X 10.14.6, Python 2.7.10), FileIO is the fastest option:

mmap  7.19839119911
array 5.72453403473
mv    0.49933886528
fio   0.299485206604
like image 199
Francis Avila Avatar answered Nov 15 '22 04:11

Francis Avila