diff --git a/build.zig b/build.zig index 73e957b..7b17d02 100644 --- a/build.zig +++ b/build.zig @@ -19,6 +19,9 @@ pub fn build(b: *std.Build) void { exe.pie = pie; exe.root_module.linkSystemLibrary("ncursesw", .{}); + exe.root_module.linkSystemLibrary("zlib", .{}); + exe.root_module.linkSystemLibrary("libzstd", .{}); + exe.root_module.linkSystemLibrary("lz4", .{}); // https://github.com/ziglang/zig/blob/b52be973dfb7d1408218b8e75800a2da3dc69108/build.zig#L551-L554 if (target.result.isDarwin()) { // useful for package maintainers diff --git a/src/bin_export.zig b/src/bin_export.zig index 54904f3..de5f153 100644 --- a/src/bin_export.zig +++ b/src/bin_export.zig @@ -6,6 +6,11 @@ const main = @import("main.zig"); const sink = @import("sink.zig"); const util = @import("util.zig"); const ui = @import("ui.zig"); +const c = @cImport({ + @cInclude("zlib.h"); + @cInclude("zstd.h"); + @cInclude("lz4.h"); +}); pub const global = struct { var fd: std.fs.File = undefined; @@ -17,7 +22,7 @@ pub const global = struct { // var links: Map dev -> ino -> (last_offset, size, blocks, nlink) }; -const BLOCK_SIZE: usize = 64*1024; +const BLOCK_SIZE: usize = 512*1024; // XXX: Current maximum for benchmarking, should just stick with a fixed block size. const ItemType = enum(i3) { dir = 0, @@ -59,7 +64,7 @@ const ItemKey = enum(u5) { }; // Pessimistic upper bound on the encoded size of an item, excluding the name field. -// 2 bytes for map start/end, 10 per field (2 for the key, 9 for a full u64). +// 2 bytes for map start/end, 11 per field (2 for the key, 9 for a full u64). const MAX_ITEM_LEN = 2 + 11 * @typeInfo(ItemKey).Enum.fields.len; const CborMajor = enum(u3) { pos, neg, bytes, text, array, map, tag, simple }; @@ -79,19 +84,48 @@ pub const Thread = struct { block_num: u32 = std.math.maxInt(u32), itemref: u64 = 0, // ref of item currently being written - // Temporary buffer for headers and compression + // Temporary buffer for headers and compression. + // TODO: check with compressBound()/ZSTD_compressBound() tmp: [BLOCK_SIZE+128]u8 = undefined, + fn compressNone(in: []const u8, out: []u8) usize { + @memcpy(out[0..in.len], in); + return in.len; + } + + fn compressZlib(in: []const u8, out: []u8) usize { + var outlen: c.uLongf = out.len; + const r = c.compress2(out.ptr, &outlen, in.ptr, in.len, main.config.complevel); + std.debug.assert(r == c.Z_OK); + return outlen; + } + + fn compressZstd(in: []const u8, out: []u8) usize { + const r = c.ZSTD_compress(out.ptr, out.len, in.ptr, in.len, main.config.complevel); + std.debug.assert(c.ZSTD_isError(r) == 0); + return r; + } + + fn compressLZ4(in: []const u8, out: []u8) usize { + const r = c.LZ4_compress_default(in.ptr, out.ptr, @intCast(in.len), @intCast(out.len)); + std.debug.assert(r > 0); + return @intCast(r); + } + fn createBlock(t: *Thread) []const u8 { if (t.block_num == std.math.maxInt(u32) or t.off <= 1) return ""; - // TODO: Compression - const blocklen: u32 = @intCast(t.off + 16); + const bodylen = switch (main.config.compression) { + .none => compressNone(t.buf[0..t.off], t.tmp[12..]), + .zlib => compressZlib(t.buf[0..t.off], t.tmp[12..]), + .zstd => compressZstd(t.buf[0..t.off], t.tmp[12..]), + .lz4 => compressLZ4(t.buf[0..t.off], t.tmp[12..]), + }; + const blocklen: u32 = @intCast(bodylen + 16); t.tmp[0..4].* = blockHeader(1, blocklen); t.tmp[4..8].* = bigu32(t.block_num); t.tmp[8..12].* = bigu32(@intCast(t.off)); - @memcpy(t.tmp[12..][0..t.off], t.buf[0..t.off]); - t.tmp[12+t.off..][0..4].* = blockHeader(1, blocklen); + t.tmp[12+bodylen..][0..4].* = blockHeader(1, blocklen); return t.tmp[0..blocklen]; } @@ -164,7 +198,7 @@ pub const Thread = struct { // Reserve space for a new item, write out the type, prev and name fields and return the itemref. fn itemStart(t: *Thread, itype: ItemType, prev_item: u64, name: []const u8) u64 { const min_len = name.len + MAX_ITEM_LEN; - if (t.off + min_len > t.buf.len) t.flush(min_len); + if (t.off + min_len > main.config.blocksize) t.flush(min_len); t.itemref = (@as(u64, t.block_num) << 24) | t.off; t.cborIndef(.map); diff --git a/src/main.zig b/src/main.zig index 73fdce0..0fc9f7c 100644 --- a/src/main.zig +++ b/src/main.zig @@ -70,6 +70,9 @@ pub const config = struct { pub var exclude_kernfs: bool = false; pub var exclude_patterns: std.ArrayList([:0]const u8) = std.ArrayList([:0]const u8).init(allocator); pub var threads: usize = 1; + pub var compression: enum { none, zlib, zstd, lz4 } = .none; + pub var complevel: u8 = 5; + pub var blocksize: usize = 64*1024; pub var update_delay: u64 = 100*std.time.ns_per_ms; pub var scan_ui: ?enum { none, line, full } = null; @@ -502,7 +505,17 @@ pub fn main() void { else if (opt.is("-f")) import_file = allocator.dupeZ(u8, args.arg()) catch unreachable else if (opt.is("--ignore-config")) {} else if (opt.is("--quit-after-scan")) quit_after_scan = true // undocumented feature to help with benchmarking scan/import - else if (argConfig(&args, opt)) {} + else if (opt.is("--binfmt")) { // Experimental, for benchmarking + const a = args.arg(); + config.compression = switch (a[0]) { + 'z' => .zlib, + 's','S' => .zstd, + 'l' => .lz4, + else => .none, + }; + config.complevel = (a[1] - '0') + (if (a[0] == 'S') @as(u8, 10) else 0); + config.blocksize = @as(usize, 8*1024) << @intCast(a[2] - '0'); // 0 = 8k, 1 16k, 2 32k, 3 64k, 4 128k, 5 256k, 6 512k + } else if (argConfig(&args, opt)) {} else ui.die("Unrecognized option '{s}'.\n", .{opt.val}); } }