I'm trying to parse an HTTP request line (e.g. GET / HTTP/1.1\r\n
), which is easy with socket.makefile().readline()
(BaseHTTPRequestHandler
uses it), like:
print sock.makefile().readline()
unfortunately, as the documentation says, when using makefile()
the socket must be in blocking mode (it can not have a timeout); how can I implement a readline()
-like function that does the same without using makefile()
file object interface and not reading more than needed (as it'd discard data I will need after)?
a pretty inefficient example:
request_line = ""
while not request_line.endswith('\n'):
request_line += sock.recv(1)
print request_line
Four and a half years later, I would suggest asyncio's Streams for this, but here's how you might do it properly using BytesIO
Note that this implementation "shrinks" the in-memory BytesIO
object each time a line is detected. If you didn't care about that, this could be a lot fewer lines.
import socket
import time
from io import BytesIO
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.connect(('localhost', 1234))
sock.setblocking(False)
def handle_line(line):
# or, print("Line Received:", line.decode().rstrip())
print(f"Line Received: {line.decode().rstrip()!r}")
with BytesIO() as buffer:
while True:
try:
resp = sock.recv(100) # Read in some number of bytes -- balance this
except BlockingIOError:
print("sleeping") # Do whatever you want here, this just
time.sleep(2) # illustrates that it's nonblocking
else:
buffer.write(resp) # Write to the BytesIO object
buffer.seek(0) # Set the file pointer to the SoF
start_index = 0 # Count the number of characters processed
for line in buffer:
start_index += len(line)
handle_line(line) # Do something with your line
""" If we received any newline-terminated lines, this will be nonzero.
In that case, we read the remaining bytes into memory, truncate
the BytesIO object, reset the file pointer and re-write the
remaining bytes back into it. This will advance the file pointer
appropriately. If start_index is zero, the buffer doesn't contain
any newline-terminated lines, so we set the file pointer to the
end of the file to not overwrite bytes.
"""
if start_index:
buffer.seek(start_index)
remaining = buffer.read()
buffer.truncate(0)
buffer.seek(0)
buffer.write(remaining)
else:
buffer.seek(0, 2)
(The original answer was so bad that it wasn't worth keeping (I promise), but should be available in the edit history).
Here is a (buffered) line-reader that does not use asyncio
. It can be used as a "synchronous" socket
-based replacement for asyncio.StreamReader
.
import socket
from asyncio import IncompleteReadError # only import the exception class
class SocketStreamReader:
def __init__(self, sock: socket.socket):
self._sock = sock
self._recv_buffer = bytearray()
def read(self, num_bytes: int = -1) -> bytes:
raise NotImplementedError
def readexactly(self, num_bytes: int) -> bytes:
buf = bytearray(num_bytes)
pos = 0
while pos < num_bytes:
n = self._recv_into(memoryview(buf)[pos:])
if n == 0:
raise IncompleteReadError(bytes(buf[:pos]), num_bytes)
pos += n
return bytes(buf)
def readline(self) -> bytes:
return self.readuntil(b"\n")
def readuntil(self, separator: bytes = b"\n") -> bytes:
if len(separator) != 1:
raise ValueError("Only separators of length 1 are supported.")
chunk = bytearray(4096)
start = 0
buf = bytearray(len(self._recv_buffer))
bytes_read = self._recv_into(memoryview(buf))
assert bytes_read == len(buf)
while True:
idx = buf.find(separator, start)
if idx != -1:
break
start = len(self._recv_buffer)
bytes_read = self._recv_into(memoryview(chunk))
buf += memoryview(chunk)[:bytes_read]
result = bytes(buf[: idx + 1])
self._recv_buffer = b"".join(
(memoryview(buf)[idx + 1 :], self._recv_buffer)
)
return result
def _recv_into(self, view: memoryview) -> int:
bytes_read = min(len(view), len(self._recv_buffer))
view[:bytes_read] = self._recv_buffer[:bytes_read]
self._recv_buffer = self._recv_buffer[bytes_read:]
if bytes_read == len(view):
return bytes_read
bytes_read += self._sock.recv_into(view[bytes_read:])
return bytes_read
Usage:
reader = SocketStreamReader(sock)
line = reader.readline()
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With