Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Simple log analysis with Zig

Tags:

zig

Motivated by https://benhoyt.com/writings/count-words/ , I have played a bit with rewriting an internal log analysis script in several languages (I will not go as far as in the article!).

After Go (by myself) and Rust (with some help from SO), I am currently stuck with Zig. I have more or less understood https://github.com/benhoyt/countwords/blob/master/simple.zig but still having a hard time with translating my original along these lines... Notably, using a Hash with tuple keys, handling name of months in parsing and printing...

Original script in Python:

import sys

months = { "Jan": 1, "Feb": 2, "Mar": 3, "Apr": 4, "May": 5, "Jun": 6,
           "Jul": 7, "Aug": 8, "Sep": 9, "Oct": 10, "Nov": 11, "Dec": 12 }

months_r = { v:k for k,v in months.items() }

totals = {}

for line in sys.stdin:
    if "redis" in line and "Partial" in line:
        f1, f2 = line.split()[:2]
        w = (months[f1], int(f2))
        totals[w] = totals.get(w, 0) + 1

for k in sorted(totals.keys()):
    print(months_r[k[0]], k[1], totals[k])

Could someone fluent with recent Zig give a hand?

Thanks a lot!

like image 452
user15795022 Avatar asked Apr 02 '26 11:04

user15795022


1 Answers

Solution from the Zig Forum

const std = @import("std");

const Key = struct { month: u4, day: u5 };

fn keyHash(key: Key) u64 {
    return @as(u64, key.month) << 32 | @as(u64, key.day);
}

const Totals = std.HashMap(
    Key,
    usize,
    keyHash,
    std.hash_map.getAutoEqlFn(Key),
    std.hash_map.default_max_load_percentage,
);

const Item = struct { key: Key, count: usize };

fn itemSort(context: void, lhs: Item, rhs: Item) bool {
    return keyHash(lhs.key) < keyHash(rhs.key);
}

// zig fmt: off
const months = std.ComptimeStringMap(u4, .{
    .{ "Jan",  1 }, .{ "Feb",  2 }, .{ "Mar",  3 },
    .{ "Apr",  4 }, .{ "May",  5 }, .{ "Jun",  6 },
    .{ "Jul",  7 }, .{ "Aug",  8 }, .{ "Sep",  9 },
    .{ "Oct", 10 }, .{ "Nov", 11 }, .{ "Dec", 12 },
});

const months_r = [_][]const u8{
    "(padding)",
    "Jan", "Feb", "Mar",
    "Apr", "May", "Jun",
    "Jul", "Aug", "Sep",
    "Oct", "Nov", "Dec",
};
// zig fmt: on

pub fn main() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    defer if (gpa.deinit()) std.log.err("memory leak detected", .{});
    const allocator = &gpa.allocator;

    var totals = Totals.init(allocator);
    defer totals.deinit();

    const stdin = std.io.bufferedReader(std.io.getStdIn().reader()).reader();
    var buf: [4096]u8 = undefined;
    while (try stdin.readUntilDelimiterOrEof(&buf, '\n')) |line| {
        if (std.mem.indexOf(u8, line, "redis") == null or std.mem.indexOf(u8, line, "Partial") == null)
            continue;

        var it = std.mem.tokenize(line, &std.ascii.spaces);
        const month = months.get(it.next().?).?;
        const day = try std.fmt.parseUnsigned(u5, it.next().?, 10);

        const res = try totals.getOrPut(.{ .month = month, .day = day });
        if (res.found_existing)
            res.entry.value += 1
        else
            res.entry.value = 1;
    }

    var stdout = std.io.bufferedWriter(std.io.getStdOut().writer());
    defer stdout.flush() catch std.log.err("stdout flushing failed", .{});
    const out = stdout.writer();

    var items = try allocator.alloc(Item, totals.count());
    defer allocator.free(items);

    {
        var it = totals.iterator();
        var i: usize = 0;
        while (it.next()) |kv| : (i += 1) {
            items[i] = .{ .key = kv.key, .count = kv.value };
        }
    }

    std.sort.sort(Item, items, {}, itemSort);

    for (items) |it| {
        try out.print("{s} {d} {d}\n", .{ months_r[it.key.month], it.key.day, it.count });
    }
}
like image 98
user15795022 Avatar answered Apr 08 '26 07:04

user15795022



Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!