Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

tail -f in python with no time.sleep

Tags:

python

(update) Either use FS monitors tools

  • For linux
  • For Windows
  • For Mac

Or a single sleep usage (which I would you consider as much more elegant).

import time
def follow(thefile):
    thefile.seek(0,2)      # Go to the end of the file
    while True:
         line = thefile.readline()
         if not line:
             time.sleep(0.1)    # Sleep briefly
             continue
         yield line

logfile = open("access-log")
loglines = follow(logfile)
for line in loglines:
    print line

To minimize the sleep issues I modified Tzury Bar Yochay's solution and now it polls quickly if there is activity and after a few seconds of no activity it only polls every second.

import time

def follow(thefile):
    thefile.seek(0,2)      # Go to the end of the file
    sleep = 0.00001
    while True:
        line = thefile.readline()
        if not line:
            time.sleep(sleep)    # Sleep briefly
            if sleep < 1.0:
                sleep += 0.00001
            continue
        sleep = 0.00001
        yield line

logfile = open("/var/log/system.log")
loglines = follow(logfile)
for line in loglines:
    print line,

When reading from a file, your only choice is sleep (see the source code). If you read from a pipe, you can simply read since the read will block until there is data ready.

The reason for this is that the OS doesn't support the notion "wait for someone to write to a file". Only recently, some filesystems added an API where you can listen for changes made to a file but tail is too old to use this API and it's also not available everywhere.


The simplest C implementation of tail -f for Linux is this:

#include <unistd.h>
#include <sys/inotify.h>

int main() {
    int inotify_fd = inotify_init();
    inotify_add_watch(inotify_fd, "/tmp/f", IN_MODIFY);
    struct inotify_event event;
    while (1) {
        read(inotify_fd, &event, sizeof(event));
        [file has changed; open, stat, read new data]
    }
}

This is just a minimal example that's obviously lacking error checking and won't notice when the file is deleted/moved, but it should give a good idea about what the Python implementation should look like.

Here's a proper Python implementation that uses the built-in ctypes to talk to inotify in the way outlined above.

""" simple python implementation of tail -f, utilizing inotify. """

import ctypes
from errno import errorcode
import os
from struct import Struct

# constants from <sys/inotify.h>
IN_MODIFY = 2
IN_DELETE_SELF = 1024
IN_MOVE_SELF = 2048

def follow(filename, blocksize=8192):
    """
    Monitors the file, and yields bytes objects.

    Terminates when the file is deleted or moved.
    """
    with INotify() as inotify:
        # return when we encounter one of these events.
        stop_mask = IN_DELETE_SELF | IN_MOVE_SELF

        inotify.add_watch(filename, IN_MODIFY | stop_mask)

        # we have returned this many bytes from the file.
        filepos = 0
        while True:
            with open(filename, "rb") as fileobj:
                fileobj.seek(filepos)
                while True:
                    data = fileobj.read(blocksize)
                    if not data:
                        break
                    filepos += len(data)
                    yield data

            # wait for next inotify event
            _, mask, _, _ = inotify.next_event()
            if mask & stop_mask:
                break

LIBC = ctypes.CDLL("libc.so.6")


class INotify:
    """ Ultra-lightweight inotify class. """
    def __init__(self):
        self.fd = LIBC.inotify_init()
        if self.fd < 0:
            raise OSError("could not init inotify: " + errorcode[-self.fd])
        self.event_struct = Struct("iIII")

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc, exc_tb):
        self.close()

    def close(self):
        """ Frees the associated resources. """
        os.close(self.fd)

    def next_event(self):
        """
        Waits for the next event, and returns a tuple of
        watch id, mask, cookie, name (bytes).
        """
        raw = os.read(self.fd, self.event_struct.size)
        watch_id, mask, cookie, name_size = self.event_struct.unpack(raw)
        if name_size:
            name = os.read(self.fd, name_size)
        else:
            name = b""

        return watch_id, mask, cookie, name

    def add_watch(self, filename, mask):
        """
        Adds a watch for filename, with the given mask.
        Returns the watch id.
        """
        if not isinstance(filename, bytes):
            raise TypeError("filename must be bytes")
        watch_id = LIBC.inotify_add_watch(self.fd, filename, mask)
        if watch_id < 0:
            raise OSError("could not add watch: " + errorcode[-watch_id])
        return watch_id


def main():
    """ CLI """
    from argparse import ArgumentParser
    cli = ArgumentParser()
    cli.add_argument("filename")
    args = cli.parse_args()
    import sys
    for data in follow(args.filename.encode()):
        sys.stdout.buffer.write(data)
        sys.stdout.buffer.flush()

if __name__ == '__main__':
    try:
        main()
    except KeyboardInterrupt:
        print("")

Note that there are various inotify adapters for Python, such as inotify, pyinotify and python-inotify. Those would basically do the work of the INotify class.