Stick with zstd-4 + 64k block, add --compress-level, fix 32bit build

And do dynamic buffer allocation for bin_export, removing 128k of .rodata that I accidentally introduced earlier and reducing memory use for parallel scans. Static binaries now also include the minimal version of zstd, current sizes for x86_64 are: 582k ncdu-2.5 601k ncdu-new-nocompress 765k ncdu-new-zstd That's not great, but also not awful. Even zlib or LZ4 would've resulted in a 700k binary.
2026-03-13 15:08:39 -08:00 · 2024-08-03 13:16:44 +02:00 · 2024-08-03 13:16:44 +02:00 · 8ad61e87c1
commit 8ad61e87c1
parent 85e12beb1c
9 changed files with 75 additions and 71 deletions
--- a/.gitignore
+++ b/.gitignore
@ -4,6 +4,7 @@
 *.swp
 *~
 ncurses
 zstd
 static-*/
 zig-cache/
 zig-out/
--- a/16
+++ b/16
@ -52,9 +52,21 @@ dist:
 	rm -rf ncdu-${NCDU_VERSION}
-# ASSUMPTION: the ncurses source tree has been extracted into ncurses/
+# ASSUMPTION:
 # - the ncurses source tree has been extracted into ncurses/
 # - the zstd source tree has been extracted into zstd/
 # Would be nicer to do all this with the Zig build system, but no way am I
 # going to write build.zig's for these projects.
 static-%.tar.gz:
 	mkdir -p static-$*/nc static-$*/inst/pkg
 	cp -R zstd/lib static-$*/zstd
 	make -C static-$*/zstd -j8 libzstd.a V=1\
 		ZSTD_LIB_DICTBUILDER=0\
 		ZSTD_LIB_MINIFY=1\
 		ZSTD_LIB_EXCLUDE_COMPRESSORS_DFAST_AND_UP=1\
 		CC="${ZIG} cc --target=$*"\
 		LD="${ZIG} cc --target=$*"\
 		AR="${ZIG} ar" RANLIB="${ZIG} ranlib"
 	cd static-$*/nc && ../../ncurses/configure --prefix="`pwd`/../inst"\
 		--with-pkg-config-libdir="`pwd`/../inst/pkg"\
 		--without-cxx --without-cxx-binding --without-ada --without-manpages --without-progs\
@ -71,7 +83,7 @@ static-%.tar.gz:
 	@#	--build-file ../build.zig --search-prefix inst/ --cache-dir zig -Drelease-fast=true
 	@# Alternative approach, bypassing zig-build
 	cd static-$* && ${ZIG} build-exe -target $*\
-		-Iinst/include -Iinst/include/ncursesw -lc inst/lib/libncursesw.a\
+		-Iinst/include -Iinst/include/ncursesw -Izstd -lc inst/lib/libncursesw.a zstd/libzstd.a\
 		--cache-dir zig-cache -static -fstrip -O ReleaseFast ../src/main.zig
 	cd static-$* && mv main ncdu && tar -czf ../static-$*.tar.gz ncdu
 	rm -rf static-$*
--- a/README.md
+++ b/README.md
@ -21,7 +21,8 @@ C version (1.x).
 - Zig 0.12 or 0.13.
 - Some sort of POSIX-like OS
- ncurses libraries and header files
+- ncurses
 - libzstd
 ## Install
--- a/build.zig
+++ b/build.zig
@ -19,9 +19,7 @@ pub fn build(b: *std.Build) void {
    exe.pie = pie;
    exe.root_module.linkSystemLibrary("ncursesw", .{});
    exe.root_module.linkSystemLibrary("zlib", .{});
    exe.root_module.linkSystemLibrary("libzstd", .{});
    exe.root_module.linkSystemLibrary("lz4", .{});
    // https://github.com/ziglang/zig/blob/b52be973dfb7d1408218b8e75800a2da3dc69108/build.zig#L551-L554
    if (target.result.isDarwin()) {
        // useful for package maintainers
--- a/ncdubinexp.pl
+++ b/ncdubinexp.pl
@ -27,6 +27,7 @@ use bytes;
 no warnings 'portable';
 use List::Util 'min', 'max';
 use CBOR::XS;  # Does not officially support recent perl versions, but it's the only CPAN module that supports streaming.
 use Compress::Zstd;
 my $printblocks = grep $_ eq 'blocks', @ARGV;
 my $printitems = grep $_ eq 'items', @ARGV;
@ -76,13 +77,17 @@ sub datablock($prefix, $off, $blklen, $content) {
    die sprintf "%s: Duplicate block id %d (first at %010x)", $prefix, $num, $datablocks{$num}>>24 if $datablocks{$num};
    $datablocks{$num} = ($off << 24) | $blklen;
-    $printblocks && printf "%s: data block %d  rawlen %d (%.2f)\n", $prefix, $num, $rawlen, $rawlen/(length($content)-8)*100;
+    my $compressed = substr $content, 8;
    $printblocks && printf "%s: data block %d  rawlen %d (%.2f)\n", $prefix, $num, $rawlen, $rawlen/(length($compressed))*100;
-    $datablock_len += length($content)-8;
+    $datablock_len += length($compressed);
    $rawdata_len += $rawlen;
-    # TODO: Decompress
+    my $rawdata = decompress($compressed);
-    cbordata($num, substr $content, 8);
+    die "$prefix: Block id $num failed decompression\n" if !defined $rawdata;
    die sprintf "%s: Block id %d decompressed to %d bytes but expected %d\n",
        $prefix, $num, length($rawdata), $rawlen if $rawlen != length $rawdata;
    cbordata($num, $rawdata);
 }
--- a/src/bin_export.zig
+++ b/src/bin_export.zig
@ -7,11 +7,9 @@ const model = @import("model.zig");
 const sink = @import("sink.zig");
 const util = @import("util.zig");
 const ui = @import("ui.zig");
-const c = @cImport({
+
-    @cInclude("zlib.h");
+extern fn ZSTD_compress(dst: ?*anyopaque, dstCapacity: usize, src: ?*const anyopaque, srcSize: usize, compressionLevel: c_int) usize;
-    @cInclude("zstd.h");
+extern fn ZSTD_isError(code: usize) c_uint;
    @cInclude("lz4.h");
 });
 pub const global = struct {
    var fd: std.fs.File = undefined;
@ -21,7 +19,8 @@ pub const global = struct {
    var root_itemref: u64 = 0;
 };
-const BLOCK_SIZE: usize = 512*1024; // XXX: Current maximum for benchmarking, should just stick with a fixed block size.
+const BLOCK_SIZE: usize = 64*1024;
 const COMPRESSED_SIZE: usize = 65824; // ZSTD_COMPRESSBOUND(BLOCK_SIZE)
 pub const SIGNATURE = "\xbfncduEX1";
@ -69,48 +68,30 @@ inline fn cborByte(major: CborMajor, arg: u5) u8 { return (@as(u8, @intFromEnum(
 pub const Thread = struct {
-    buf: [BLOCK_SIZE]u8 = undefined,
+    buf: []u8 = undefined,
    off: usize = BLOCK_SIZE,
    block_num: u32 = std.math.maxInt(u32),
    itemref: u64 = 0, // ref of item currently being written
    tmp: []u8 = undefined, // Temporary buffer for headers and compression.
-    // Temporary buffer for headers and compression.
+    // unused, but kept around for easy debugging
    // TODO: check with compressBound()/ZSTD_compressBound()
    tmp: [BLOCK_SIZE+128]u8 = undefined,
    fn compressNone(in: []const u8, out: []u8) usize {
        @memcpy(out[0..in.len], in);
        return in.len;
    }
    fn compressZlib(in: []const u8, out: []u8) usize {
        var outlen: c.uLongf = out.len;
        const r = c.compress2(out.ptr, &outlen, in.ptr, in.len, main.config.complevel);
        std.debug.assert(r == c.Z_OK);
        return outlen;
    }
    fn compressZstd(in: []const u8, out: []u8) usize {
-        const r = c.ZSTD_compress(out.ptr, out.len, in.ptr, in.len, main.config.complevel);
+        while (true) {
-        std.debug.assert(c.ZSTD_isError(r) == 0);
+            const r = ZSTD_compress(out.ptr, out.len, in.ptr, in.len, main.config.complevel);
-        return r;
+            if (ZSTD_isError(r) == 0) return r;
            ui.oom(); // That *ought* to be the only reason the above call can fail.
        }
    fn compressLZ4(in: []const u8, out: []u8) usize {
        const r = c.LZ4_compress_default(in.ptr, out.ptr, @intCast(in.len), @intCast(out.len));
        std.debug.assert(r > 0);
        return @intCast(r);
    }
    fn createBlock(t: *Thread) []const u8 {
        if (t.block_num == std.math.maxInt(u32) or t.off <= 1) return "";
-        const bodylen = switch (main.config.compression) {
+        const bodylen = compressZstd(t.buf[0..t.off], t.tmp[12..]);
            .none => compressNone(t.buf[0..t.off], t.tmp[12..]),
            .zlib => compressZlib(t.buf[0..t.off], t.tmp[12..]),
            .zstd => compressZstd(t.buf[0..t.off], t.tmp[12..]),
            .lz4 => compressLZ4(t.buf[0..t.off], t.tmp[12..]),
        };
        const blocklen: u32 = @intCast(bodylen + 16);
        t.tmp[0..4].* = blockHeader(1, blocklen);
        t.tmp[4..8].* = bigu32(t.block_num);
@ -188,7 +169,7 @@ pub const Thread = struct {
    // Reserve space for a new item, write out the type, prev and name fields and return the itemref.
    fn itemStart(t: *Thread, itype: model.EType, prev_item: u64, name: []const u8) u64 {
        const min_len = name.len + MAX_ITEM_LEN;
-        if (t.off + min_len > main.config.blocksize) t.flush(min_len);
+        if (t.off + min_len > t.buf.len) t.flush(min_len);
        t.itemref = (@as(u64, t.block_num) << 24) | t.off;
        t.cborIndef(.map);
@ -418,12 +399,21 @@ pub const Dir = struct {
 };
-pub fn createRoot(stat: *const sink.Stat) Dir {
+pub fn createRoot(stat: *const sink.Stat, threads: []sink.Thread) Dir {
    for (threads) |*t| {
        t.sink.bin.buf = main.allocator.alloc(u8, BLOCK_SIZE) catch unreachable;
        t.sink.bin.tmp = main.allocator.alloc(u8, COMPRESSED_SIZE) catch unreachable;
    }
    return .{ .stat = stat.* };
 }
 pub fn done(threads: []sink.Thread) void {
-    for (threads) |*t| t.sink.bin.flush(0);
+    for (threads) |*t| {
        t.sink.bin.flush(0);
        main.allocator.free(t.sink.bin.buf);
        main.allocator.free(t.sink.bin.tmp);
    }
    while (std.mem.endsWith(u8, global.index.items, &[1]u8{0}**8))
        global.index.shrinkRetainingCapacity(global.index.items.len - 8);
--- a/src/bin_reader.zig
+++ b/src/bin_reader.zig
@ -9,6 +9,8 @@ const sink = @import("sink.zig");
 const ui = @import("ui.zig");
 const bin_export = @import("bin_export.zig");
 extern fn ZSTD_decompress(dst: ?*anyopaque, dstCapacity: usize, src: ?*const anyopaque, compressedSize: usize) usize;
 const CborMajor = bin_export.CborMajor;
 const ItemKey = bin_export.ItemKey;
@ -90,7 +92,7 @@ fn readBlock(num: u32) []const u8 {
    const offlen = bigu64(global.index[num*8..][0..8].*);
    if ((offlen & 0xffffff) < 16) die();
-    const buf = main.allocator.alloc(u8, (offlen & 0xffffff) - 12) catch unreachable;
+    const buf = main.allocator.alloc(u8, @intCast((offlen & 0xffffff) - 12)) catch unreachable;
    defer main.allocator.free(buf);
    const rdlen = global.fd.preadAll(buf, (offlen >> 24) + 8)
        catch |e| ui.die("Error reading from file: {s}\n", .{ui.errorString(e)});
@ -100,8 +102,9 @@ fn readBlock(num: u32) []const u8 {
    if (rawlen >= (1<<24)) die();
    block.data = main.allocator.alloc(u8, rawlen) catch unreachable;
-    // TODO: decompress
+    const res = ZSTD_decompress(block.data.ptr, block.data.len, buf[4..].ptr, buf.len - 4);
-    @memcpy(block.data, buf[4..][0..rawlen]);
+    if (res != block.data.len) ui.die("Error decompressing block {} (expected {} got {})\n", .{ num, block.data.len, res });
    return block.data;
 }
@ -190,8 +193,8 @@ const CborVal = struct {
    fn bytes(v: *const CborVal) []const u8 {
        if (v.indef or (v.major != .bytes and v.major != .text)) die();
        if (v.rd.buf.len < v.arg) die();
-        defer v.rd.buf = v.rd.buf[v.arg..];
+        defer v.rd.buf = v.rd.buf[@intCast(v.arg)..];
-        return v.rd.buf[0..v.arg];
+        return v.rd.buf[0..@intCast(v.arg)];
    }
    // Skip current value.
@ -207,13 +210,15 @@ const CborVal = struct {
        switch (v.major) {
            .bytes, .text => {
                if (v.rd.buf.len < v.arg) die();
-                v.rd.buf = v.rd.buf[v.arg..];
+                v.rd.buf = v.rd.buf[@intCast(v.arg)..];
            },
            .array => {
-                for (0..v.arg) |_| v.rd.next().skip();
+                if (v.arg > (1<<24)) die();
                for (0..@intCast(v.arg)) |_| v.rd.next().skip();
            },
            .map => {
-                for (0..v.arg*|2) |_| v.rd.next().skip();
+                if (v.arg > (1<<24)) die();
                for (0..@intCast(v.arg*|2)) |_| v.rd.next().skip();
            },
            else => {},
        }
@ -297,7 +302,7 @@ test "CBOR skip parsing" {
 const ItemParser = struct {
    r: CborReader,
-    len: ?usize = null,
+    len: ?u64 = null,
    const Field = struct {
        key: ItemKey,
@ -344,7 +349,7 @@ fn readItem(ref: u64) ItemParser {
    if (ref >= (1 << (24 + 32))) die();
    const block = readBlock(@intCast(ref >> 24));
    if ((ref & 0xffffff) > block.len) die();
-    return ItemParser.init(block[(ref & 0xffffff)..]);
+    return ItemParser.init(block[@intCast(ref & 0xffffff)..]);
 }
 const Import = struct {
--- a/src/main.zig
+++ b/src/main.zig
@ -72,9 +72,7 @@ pub const config = struct {
    pub var exclude_kernfs: bool = false;
    pub var exclude_patterns: std.ArrayList([:0]const u8) = std.ArrayList([:0]const u8).init(allocator);
    pub var threads: usize = 1;
-    pub var compression: enum { none, zlib, zstd, lz4 } = .none;
+    pub var complevel: u8 = 4;
    pub var complevel: u8 = 5;
    pub var blocksize: usize = 64*1024;
    pub var update_delay: u64 = 100*std.time.ns_per_ms;
    pub var scan_ui: ?enum { none, line, full } = null;
@ -269,7 +267,11 @@ fn argConfig(args: *Args, opt: Args.Option) bool {
    else if (opt.is("--include-caches")) config.exclude_caches = false
    else if (opt.is("--exclude-kernfs")) config.exclude_kernfs = true
    else if (opt.is("--include-kernfs")) config.exclude_kernfs = false
-    else if (opt.is("--confirm-quit")) config.confirm_quit = true
+    else if (opt.is("--compress-level")) {
        const val = args.arg();
        config.complevel = std.fmt.parseInt(u8, val, 10) catch ui.die("Invalid number for --compress-level: {s}.\n", .{val});
        if (config.complevel <= 0 or config.complevel > 20) ui.die("Invalid number for --compress-level: {s}.\n", .{val});
    } else if (opt.is("--confirm-quit")) config.confirm_quit = true
    else if (opt.is("--no-confirm-quit")) config.confirm_quit = false
    else if (opt.is("--confirm-delete")) config.confirm_delete = true
    else if (opt.is("--no-confirm-delete")) config.confirm_delete = false
@ -523,17 +525,7 @@ pub fn main() void {
            else if (opt.is("-f")) import_file = allocator.dupeZ(u8, args.arg()) catch unreachable
            else if (opt.is("--ignore-config")) {}
            else if (opt.is("--quit-after-scan")) quit_after_scan = true // undocumented feature to help with benchmarking scan/import
-            else if (opt.is("--binfmt")) { // Experimental, for benchmarking
+            else if (argConfig(&args, opt)) {}
                const a = args.arg();
                config.compression = switch (a[0]) {
                    'z' => .zlib,
                    's','S' => .zstd,
                    'l' => .lz4,
                    else => .none,
                };
                config.complevel = (a[1] - '0') + (if (a[0] == 'S') @as(u8, 10) else 0);
                config.blocksize = @as(usize, 8*1024) << @intCast(a[2] - '0'); // 0 = 8k, 1 16k, 2 32k, 3 64k, 4 128k, 5 256k, 6 512k
            } else if (argConfig(&args, opt)) {}
            else ui.die("Unrecognized option '{s}'.\n", .{opt.val});
        }
    }
--- a/src/sink.zig
+++ b/src/sink.zig
@ -279,7 +279,7 @@ pub fn createRoot(path: []const u8, stat: *const Stat) *Dir {
        .out = switch (global.sink) {
            .mem => .{ .mem = mem_sink.createRoot(path, stat) },
            .json => .{ .json = json_export.createRoot(path, stat) },
-            .bin => .{ .bin = bin_export.createRoot(stat) },
+            .bin => .{ .bin = bin_export.createRoot(stat, global.threads) },
        },
    };
    return d;