From 8ad61e87c112c0ed8e12af7b81af1ca3aa56b37d Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sat, 3 Aug 2024 13:16:44 +0200 Subject: [PATCH] Stick with zstd-4 + 64k block, add --compress-level, fix 32bit build And do dynamic buffer allocation for bin_export, removing 128k of .rodata that I accidentally introduced earlier and reducing memory use for parallel scans. Static binaries now also include the minimal version of zstd, current sizes for x86_64 are: 582k ncdu-2.5 601k ncdu-new-nocompress 765k ncdu-new-zstd That's not great, but also not awful. Even zlib or LZ4 would've resulted in a 700k binary. --- .gitignore | 1 + Makefile | 16 ++++++++++-- README.md | 3 ++- build.zig | 2 -- ncdubinexp.pl | 13 +++++++--- src/bin_export.zig | 62 +++++++++++++++++++--------------------------- src/bin_reader.zig | 25 +++++++++++-------- src/main.zig | 22 ++++++---------- src/sink.zig | 2 +- 9 files changed, 75 insertions(+), 71 deletions(-) diff --git a/.gitignore b/.gitignore index 5087b6c..1f1cd99 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ *.swp *~ ncurses +zstd static-*/ zig-cache/ zig-out/ diff --git a/Makefile b/Makefile index 14e0c18..9dce609 100644 --- a/Makefile +++ b/Makefile @@ -52,9 +52,21 @@ dist: rm -rf ncdu-${NCDU_VERSION} -# ASSUMPTION: the ncurses source tree has been extracted into ncurses/ +# ASSUMPTION: +# - the ncurses source tree has been extracted into ncurses/ +# - the zstd source tree has been extracted into zstd/ +# Would be nicer to do all this with the Zig build system, but no way am I +# going to write build.zig's for these projects. static-%.tar.gz: mkdir -p static-$*/nc static-$*/inst/pkg + cp -R zstd/lib static-$*/zstd + make -C static-$*/zstd -j8 libzstd.a V=1\ + ZSTD_LIB_DICTBUILDER=0\ + ZSTD_LIB_MINIFY=1\ + ZSTD_LIB_EXCLUDE_COMPRESSORS_DFAST_AND_UP=1\ + CC="${ZIG} cc --target=$*"\ + LD="${ZIG} cc --target=$*"\ + AR="${ZIG} ar" RANLIB="${ZIG} ranlib" cd static-$*/nc && ../../ncurses/configure --prefix="`pwd`/../inst"\ --with-pkg-config-libdir="`pwd`/../inst/pkg"\ --without-cxx --without-cxx-binding --without-ada --without-manpages --without-progs\ @@ -71,7 +83,7 @@ static-%.tar.gz: @# --build-file ../build.zig --search-prefix inst/ --cache-dir zig -Drelease-fast=true @# Alternative approach, bypassing zig-build cd static-$* && ${ZIG} build-exe -target $*\ - -Iinst/include -Iinst/include/ncursesw -lc inst/lib/libncursesw.a\ + -Iinst/include -Iinst/include/ncursesw -Izstd -lc inst/lib/libncursesw.a zstd/libzstd.a\ --cache-dir zig-cache -static -fstrip -O ReleaseFast ../src/main.zig cd static-$* && mv main ncdu && tar -czf ../static-$*.tar.gz ncdu rm -rf static-$* diff --git a/README.md b/README.md index 54516fe..03ed71d 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,8 @@ C version (1.x). - Zig 0.12 or 0.13. - Some sort of POSIX-like OS -- ncurses libraries and header files +- ncurses +- libzstd ## Install diff --git a/build.zig b/build.zig index 7b17d02..15e5a6c 100644 --- a/build.zig +++ b/build.zig @@ -19,9 +19,7 @@ pub fn build(b: *std.Build) void { exe.pie = pie; exe.root_module.linkSystemLibrary("ncursesw", .{}); - exe.root_module.linkSystemLibrary("zlib", .{}); exe.root_module.linkSystemLibrary("libzstd", .{}); - exe.root_module.linkSystemLibrary("lz4", .{}); // https://github.com/ziglang/zig/blob/b52be973dfb7d1408218b8e75800a2da3dc69108/build.zig#L551-L554 if (target.result.isDarwin()) { // useful for package maintainers diff --git a/ncdubinexp.pl b/ncdubinexp.pl index ec48d20..e4ad98d 100755 --- a/ncdubinexp.pl +++ b/ncdubinexp.pl @@ -27,6 +27,7 @@ use bytes; no warnings 'portable'; use List::Util 'min', 'max'; use CBOR::XS; # Does not officially support recent perl versions, but it's the only CPAN module that supports streaming. +use Compress::Zstd; my $printblocks = grep $_ eq 'blocks', @ARGV; my $printitems = grep $_ eq 'items', @ARGV; @@ -76,13 +77,17 @@ sub datablock($prefix, $off, $blklen, $content) { die sprintf "%s: Duplicate block id %d (first at %010x)", $prefix, $num, $datablocks{$num}>>24 if $datablocks{$num}; $datablocks{$num} = ($off << 24) | $blklen; - $printblocks && printf "%s: data block %d rawlen %d (%.2f)\n", $prefix, $num, $rawlen, $rawlen/(length($content)-8)*100; + my $compressed = substr $content, 8; + $printblocks && printf "%s: data block %d rawlen %d (%.2f)\n", $prefix, $num, $rawlen, $rawlen/(length($compressed))*100; - $datablock_len += length($content)-8; + $datablock_len += length($compressed); $rawdata_len += $rawlen; - # TODO: Decompress - cbordata($num, substr $content, 8); + my $rawdata = decompress($compressed); + die "$prefix: Block id $num failed decompression\n" if !defined $rawdata; + die sprintf "%s: Block id %d decompressed to %d bytes but expected %d\n", + $prefix, $num, length($rawdata), $rawlen if $rawlen != length $rawdata; + cbordata($num, $rawdata); } diff --git a/src/bin_export.zig b/src/bin_export.zig index 4aef619..d7ccb41 100644 --- a/src/bin_export.zig +++ b/src/bin_export.zig @@ -7,11 +7,9 @@ const model = @import("model.zig"); const sink = @import("sink.zig"); const util = @import("util.zig"); const ui = @import("ui.zig"); -const c = @cImport({ - @cInclude("zlib.h"); - @cInclude("zstd.h"); - @cInclude("lz4.h"); -}); + +extern fn ZSTD_compress(dst: ?*anyopaque, dstCapacity: usize, src: ?*const anyopaque, srcSize: usize, compressionLevel: c_int) usize; +extern fn ZSTD_isError(code: usize) c_uint; pub const global = struct { var fd: std.fs.File = undefined; @@ -21,7 +19,8 @@ pub const global = struct { var root_itemref: u64 = 0; }; -const BLOCK_SIZE: usize = 512*1024; // XXX: Current maximum for benchmarking, should just stick with a fixed block size. +const BLOCK_SIZE: usize = 64*1024; +const COMPRESSED_SIZE: usize = 65824; // ZSTD_COMPRESSBOUND(BLOCK_SIZE) pub const SIGNATURE = "\xbfncduEX1"; @@ -69,48 +68,30 @@ inline fn cborByte(major: CborMajor, arg: u5) u8 { return (@as(u8, @intFromEnum( pub const Thread = struct { - buf: [BLOCK_SIZE]u8 = undefined, + buf: []u8 = undefined, off: usize = BLOCK_SIZE, block_num: u32 = std.math.maxInt(u32), itemref: u64 = 0, // ref of item currently being written + tmp: []u8 = undefined, // Temporary buffer for headers and compression. - // Temporary buffer for headers and compression. - // TODO: check with compressBound()/ZSTD_compressBound() - tmp: [BLOCK_SIZE+128]u8 = undefined, - + // unused, but kept around for easy debugging fn compressNone(in: []const u8, out: []u8) usize { @memcpy(out[0..in.len], in); return in.len; } - fn compressZlib(in: []const u8, out: []u8) usize { - var outlen: c.uLongf = out.len; - const r = c.compress2(out.ptr, &outlen, in.ptr, in.len, main.config.complevel); - std.debug.assert(r == c.Z_OK); - return outlen; - } - fn compressZstd(in: []const u8, out: []u8) usize { - const r = c.ZSTD_compress(out.ptr, out.len, in.ptr, in.len, main.config.complevel); - std.debug.assert(c.ZSTD_isError(r) == 0); - return r; - } - - fn compressLZ4(in: []const u8, out: []u8) usize { - const r = c.LZ4_compress_default(in.ptr, out.ptr, @intCast(in.len), @intCast(out.len)); - std.debug.assert(r > 0); - return @intCast(r); + while (true) { + const r = ZSTD_compress(out.ptr, out.len, in.ptr, in.len, main.config.complevel); + if (ZSTD_isError(r) == 0) return r; + ui.oom(); // That *ought* to be the only reason the above call can fail. + } } fn createBlock(t: *Thread) []const u8 { if (t.block_num == std.math.maxInt(u32) or t.off <= 1) return ""; - const bodylen = switch (main.config.compression) { - .none => compressNone(t.buf[0..t.off], t.tmp[12..]), - .zlib => compressZlib(t.buf[0..t.off], t.tmp[12..]), - .zstd => compressZstd(t.buf[0..t.off], t.tmp[12..]), - .lz4 => compressLZ4(t.buf[0..t.off], t.tmp[12..]), - }; + const bodylen = compressZstd(t.buf[0..t.off], t.tmp[12..]); const blocklen: u32 = @intCast(bodylen + 16); t.tmp[0..4].* = blockHeader(1, blocklen); t.tmp[4..8].* = bigu32(t.block_num); @@ -188,7 +169,7 @@ pub const Thread = struct { // Reserve space for a new item, write out the type, prev and name fields and return the itemref. fn itemStart(t: *Thread, itype: model.EType, prev_item: u64, name: []const u8) u64 { const min_len = name.len + MAX_ITEM_LEN; - if (t.off + min_len > main.config.blocksize) t.flush(min_len); + if (t.off + min_len > t.buf.len) t.flush(min_len); t.itemref = (@as(u64, t.block_num) << 24) | t.off; t.cborIndef(.map); @@ -418,12 +399,21 @@ pub const Dir = struct { }; -pub fn createRoot(stat: *const sink.Stat) Dir { +pub fn createRoot(stat: *const sink.Stat, threads: []sink.Thread) Dir { + for (threads) |*t| { + t.sink.bin.buf = main.allocator.alloc(u8, BLOCK_SIZE) catch unreachable; + t.sink.bin.tmp = main.allocator.alloc(u8, COMPRESSED_SIZE) catch unreachable; + } + return .{ .stat = stat.* }; } pub fn done(threads: []sink.Thread) void { - for (threads) |*t| t.sink.bin.flush(0); + for (threads) |*t| { + t.sink.bin.flush(0); + main.allocator.free(t.sink.bin.buf); + main.allocator.free(t.sink.bin.tmp); + } while (std.mem.endsWith(u8, global.index.items, &[1]u8{0}**8)) global.index.shrinkRetainingCapacity(global.index.items.len - 8); diff --git a/src/bin_reader.zig b/src/bin_reader.zig index 9cc415f..1dbf8ba 100644 --- a/src/bin_reader.zig +++ b/src/bin_reader.zig @@ -9,6 +9,8 @@ const sink = @import("sink.zig"); const ui = @import("ui.zig"); const bin_export = @import("bin_export.zig"); +extern fn ZSTD_decompress(dst: ?*anyopaque, dstCapacity: usize, src: ?*const anyopaque, compressedSize: usize) usize; + const CborMajor = bin_export.CborMajor; const ItemKey = bin_export.ItemKey; @@ -90,7 +92,7 @@ fn readBlock(num: u32) []const u8 { const offlen = bigu64(global.index[num*8..][0..8].*); if ((offlen & 0xffffff) < 16) die(); - const buf = main.allocator.alloc(u8, (offlen & 0xffffff) - 12) catch unreachable; + const buf = main.allocator.alloc(u8, @intCast((offlen & 0xffffff) - 12)) catch unreachable; defer main.allocator.free(buf); const rdlen = global.fd.preadAll(buf, (offlen >> 24) + 8) catch |e| ui.die("Error reading from file: {s}\n", .{ui.errorString(e)}); @@ -100,8 +102,9 @@ fn readBlock(num: u32) []const u8 { if (rawlen >= (1<<24)) die(); block.data = main.allocator.alloc(u8, rawlen) catch unreachable; - // TODO: decompress - @memcpy(block.data, buf[4..][0..rawlen]); + const res = ZSTD_decompress(block.data.ptr, block.data.len, buf[4..].ptr, buf.len - 4); + if (res != block.data.len) ui.die("Error decompressing block {} (expected {} got {})\n", .{ num, block.data.len, res }); + return block.data; } @@ -190,8 +193,8 @@ const CborVal = struct { fn bytes(v: *const CborVal) []const u8 { if (v.indef or (v.major != .bytes and v.major != .text)) die(); if (v.rd.buf.len < v.arg) die(); - defer v.rd.buf = v.rd.buf[v.arg..]; - return v.rd.buf[0..v.arg]; + defer v.rd.buf = v.rd.buf[@intCast(v.arg)..]; + return v.rd.buf[0..@intCast(v.arg)]; } // Skip current value. @@ -207,13 +210,15 @@ const CborVal = struct { switch (v.major) { .bytes, .text => { if (v.rd.buf.len < v.arg) die(); - v.rd.buf = v.rd.buf[v.arg..]; + v.rd.buf = v.rd.buf[@intCast(v.arg)..]; }, .array => { - for (0..v.arg) |_| v.rd.next().skip(); + if (v.arg > (1<<24)) die(); + for (0..@intCast(v.arg)) |_| v.rd.next().skip(); }, .map => { - for (0..v.arg*|2) |_| v.rd.next().skip(); + if (v.arg > (1<<24)) die(); + for (0..@intCast(v.arg*|2)) |_| v.rd.next().skip(); }, else => {}, } @@ -297,7 +302,7 @@ test "CBOR skip parsing" { const ItemParser = struct { r: CborReader, - len: ?usize = null, + len: ?u64 = null, const Field = struct { key: ItemKey, @@ -344,7 +349,7 @@ fn readItem(ref: u64) ItemParser { if (ref >= (1 << (24 + 32))) die(); const block = readBlock(@intCast(ref >> 24)); if ((ref & 0xffffff) > block.len) die(); - return ItemParser.init(block[(ref & 0xffffff)..]); + return ItemParser.init(block[@intCast(ref & 0xffffff)..]); } const Import = struct { diff --git a/src/main.zig b/src/main.zig index bfca8ae..cc3bdf4 100644 --- a/src/main.zig +++ b/src/main.zig @@ -72,9 +72,7 @@ pub const config = struct { pub var exclude_kernfs: bool = false; pub var exclude_patterns: std.ArrayList([:0]const u8) = std.ArrayList([:0]const u8).init(allocator); pub var threads: usize = 1; - pub var compression: enum { none, zlib, zstd, lz4 } = .none; - pub var complevel: u8 = 5; - pub var blocksize: usize = 64*1024; + pub var complevel: u8 = 4; pub var update_delay: u64 = 100*std.time.ns_per_ms; pub var scan_ui: ?enum { none, line, full } = null; @@ -269,7 +267,11 @@ fn argConfig(args: *Args, opt: Args.Option) bool { else if (opt.is("--include-caches")) config.exclude_caches = false else if (opt.is("--exclude-kernfs")) config.exclude_kernfs = true else if (opt.is("--include-kernfs")) config.exclude_kernfs = false - else if (opt.is("--confirm-quit")) config.confirm_quit = true + else if (opt.is("--compress-level")) { + const val = args.arg(); + config.complevel = std.fmt.parseInt(u8, val, 10) catch ui.die("Invalid number for --compress-level: {s}.\n", .{val}); + if (config.complevel <= 0 or config.complevel > 20) ui.die("Invalid number for --compress-level: {s}.\n", .{val}); + } else if (opt.is("--confirm-quit")) config.confirm_quit = true else if (opt.is("--no-confirm-quit")) config.confirm_quit = false else if (opt.is("--confirm-delete")) config.confirm_delete = true else if (opt.is("--no-confirm-delete")) config.confirm_delete = false @@ -523,17 +525,7 @@ pub fn main() void { else if (opt.is("-f")) import_file = allocator.dupeZ(u8, args.arg()) catch unreachable else if (opt.is("--ignore-config")) {} else if (opt.is("--quit-after-scan")) quit_after_scan = true // undocumented feature to help with benchmarking scan/import - else if (opt.is("--binfmt")) { // Experimental, for benchmarking - const a = args.arg(); - config.compression = switch (a[0]) { - 'z' => .zlib, - 's','S' => .zstd, - 'l' => .lz4, - else => .none, - }; - config.complevel = (a[1] - '0') + (if (a[0] == 'S') @as(u8, 10) else 0); - config.blocksize = @as(usize, 8*1024) << @intCast(a[2] - '0'); // 0 = 8k, 1 16k, 2 32k, 3 64k, 4 128k, 5 256k, 6 512k - } else if (argConfig(&args, opt)) {} + else if (argConfig(&args, opt)) {} else ui.die("Unrecognized option '{s}'.\n", .{opt.val}); } } diff --git a/src/sink.zig b/src/sink.zig index 8a51a80..e04dd7f 100644 --- a/src/sink.zig +++ b/src/sink.zig @@ -279,7 +279,7 @@ pub fn createRoot(path: []const u8, stat: *const Stat) *Dir { .out = switch (global.sink) { .mem => .{ .mem = mem_sink.createRoot(path, stat) }, .json => .{ .json = json_export.createRoot(path, stat) }, - .bin => .{ .bin = bin_export.createRoot(stat) }, + .bin => .{ .bin = bin_export.createRoot(stat, global.threads) }, }, }; return d;