Stick with zstd-4 + 64k block, add --compress-level, fix 32bit build

And do dynamic buffer allocation for bin_export, removing 128k of
.rodata that I accidentally introduced earlier and reducing memory use
for parallel scans.

Static binaries now also include the minimal version of zstd, current
sizes for x86_64 are:

  582k ncdu-2.5
  601k ncdu-new-nocompress
  765k ncdu-new-zstd

That's not great, but also not awful. Even zlib or LZ4 would've resulted
in a 700k binary.
This commit is contained in:
Yorhel 2024-08-03 13:16:44 +02:00
parent 85e12beb1c
commit 8ad61e87c1
9 changed files with 75 additions and 71 deletions

1
.gitignore vendored
View file

@ -4,6 +4,7 @@
*.swp *.swp
*~ *~
ncurses ncurses
zstd
static-*/ static-*/
zig-cache/ zig-cache/
zig-out/ zig-out/

View file

@ -52,9 +52,21 @@ dist:
rm -rf ncdu-${NCDU_VERSION} rm -rf ncdu-${NCDU_VERSION}
# ASSUMPTION: the ncurses source tree has been extracted into ncurses/ # ASSUMPTION:
# - the ncurses source tree has been extracted into ncurses/
# - the zstd source tree has been extracted into zstd/
# Would be nicer to do all this with the Zig build system, but no way am I
# going to write build.zig's for these projects.
static-%.tar.gz: static-%.tar.gz:
mkdir -p static-$*/nc static-$*/inst/pkg mkdir -p static-$*/nc static-$*/inst/pkg
cp -R zstd/lib static-$*/zstd
make -C static-$*/zstd -j8 libzstd.a V=1\
ZSTD_LIB_DICTBUILDER=0\
ZSTD_LIB_MINIFY=1\
ZSTD_LIB_EXCLUDE_COMPRESSORS_DFAST_AND_UP=1\
CC="${ZIG} cc --target=$*"\
LD="${ZIG} cc --target=$*"\
AR="${ZIG} ar" RANLIB="${ZIG} ranlib"
cd static-$*/nc && ../../ncurses/configure --prefix="`pwd`/../inst"\ cd static-$*/nc && ../../ncurses/configure --prefix="`pwd`/../inst"\
--with-pkg-config-libdir="`pwd`/../inst/pkg"\ --with-pkg-config-libdir="`pwd`/../inst/pkg"\
--without-cxx --without-cxx-binding --without-ada --without-manpages --without-progs\ --without-cxx --without-cxx-binding --without-ada --without-manpages --without-progs\
@ -71,7 +83,7 @@ static-%.tar.gz:
@# --build-file ../build.zig --search-prefix inst/ --cache-dir zig -Drelease-fast=true @# --build-file ../build.zig --search-prefix inst/ --cache-dir zig -Drelease-fast=true
@# Alternative approach, bypassing zig-build @# Alternative approach, bypassing zig-build
cd static-$* && ${ZIG} build-exe -target $*\ cd static-$* && ${ZIG} build-exe -target $*\
-Iinst/include -Iinst/include/ncursesw -lc inst/lib/libncursesw.a\ -Iinst/include -Iinst/include/ncursesw -Izstd -lc inst/lib/libncursesw.a zstd/libzstd.a\
--cache-dir zig-cache -static -fstrip -O ReleaseFast ../src/main.zig --cache-dir zig-cache -static -fstrip -O ReleaseFast ../src/main.zig
cd static-$* && mv main ncdu && tar -czf ../static-$*.tar.gz ncdu cd static-$* && mv main ncdu && tar -czf ../static-$*.tar.gz ncdu
rm -rf static-$* rm -rf static-$*

View file

@ -21,7 +21,8 @@ C version (1.x).
- Zig 0.12 or 0.13. - Zig 0.12 or 0.13.
- Some sort of POSIX-like OS - Some sort of POSIX-like OS
- ncurses libraries and header files - ncurses
- libzstd
## Install ## Install

View file

@ -19,9 +19,7 @@ pub fn build(b: *std.Build) void {
exe.pie = pie; exe.pie = pie;
exe.root_module.linkSystemLibrary("ncursesw", .{}); exe.root_module.linkSystemLibrary("ncursesw", .{});
exe.root_module.linkSystemLibrary("zlib", .{});
exe.root_module.linkSystemLibrary("libzstd", .{}); exe.root_module.linkSystemLibrary("libzstd", .{});
exe.root_module.linkSystemLibrary("lz4", .{});
// https://github.com/ziglang/zig/blob/b52be973dfb7d1408218b8e75800a2da3dc69108/build.zig#L551-L554 // https://github.com/ziglang/zig/blob/b52be973dfb7d1408218b8e75800a2da3dc69108/build.zig#L551-L554
if (target.result.isDarwin()) { if (target.result.isDarwin()) {
// useful for package maintainers // useful for package maintainers

View file

@ -27,6 +27,7 @@ use bytes;
no warnings 'portable'; no warnings 'portable';
use List::Util 'min', 'max'; use List::Util 'min', 'max';
use CBOR::XS; # Does not officially support recent perl versions, but it's the only CPAN module that supports streaming. use CBOR::XS; # Does not officially support recent perl versions, but it's the only CPAN module that supports streaming.
use Compress::Zstd;
my $printblocks = grep $_ eq 'blocks', @ARGV; my $printblocks = grep $_ eq 'blocks', @ARGV;
my $printitems = grep $_ eq 'items', @ARGV; my $printitems = grep $_ eq 'items', @ARGV;
@ -76,13 +77,17 @@ sub datablock($prefix, $off, $blklen, $content) {
die sprintf "%s: Duplicate block id %d (first at %010x)", $prefix, $num, $datablocks{$num}>>24 if $datablocks{$num}; die sprintf "%s: Duplicate block id %d (first at %010x)", $prefix, $num, $datablocks{$num}>>24 if $datablocks{$num};
$datablocks{$num} = ($off << 24) | $blklen; $datablocks{$num} = ($off << 24) | $blklen;
$printblocks && printf "%s: data block %d rawlen %d (%.2f)\n", $prefix, $num, $rawlen, $rawlen/(length($content)-8)*100; my $compressed = substr $content, 8;
$printblocks && printf "%s: data block %d rawlen %d (%.2f)\n", $prefix, $num, $rawlen, $rawlen/(length($compressed))*100;
$datablock_len += length($content)-8; $datablock_len += length($compressed);
$rawdata_len += $rawlen; $rawdata_len += $rawlen;
# TODO: Decompress my $rawdata = decompress($compressed);
cbordata($num, substr $content, 8); die "$prefix: Block id $num failed decompression\n" if !defined $rawdata;
die sprintf "%s: Block id %d decompressed to %d bytes but expected %d\n",
$prefix, $num, length($rawdata), $rawlen if $rawlen != length $rawdata;
cbordata($num, $rawdata);
} }

View file

@ -7,11 +7,9 @@ const model = @import("model.zig");
const sink = @import("sink.zig"); const sink = @import("sink.zig");
const util = @import("util.zig"); const util = @import("util.zig");
const ui = @import("ui.zig"); const ui = @import("ui.zig");
const c = @cImport({
@cInclude("zlib.h"); extern fn ZSTD_compress(dst: ?*anyopaque, dstCapacity: usize, src: ?*const anyopaque, srcSize: usize, compressionLevel: c_int) usize;
@cInclude("zstd.h"); extern fn ZSTD_isError(code: usize) c_uint;
@cInclude("lz4.h");
});
pub const global = struct { pub const global = struct {
var fd: std.fs.File = undefined; var fd: std.fs.File = undefined;
@ -21,7 +19,8 @@ pub const global = struct {
var root_itemref: u64 = 0; var root_itemref: u64 = 0;
}; };
const BLOCK_SIZE: usize = 512*1024; // XXX: Current maximum for benchmarking, should just stick with a fixed block size. const BLOCK_SIZE: usize = 64*1024;
const COMPRESSED_SIZE: usize = 65824; // ZSTD_COMPRESSBOUND(BLOCK_SIZE)
pub const SIGNATURE = "\xbfncduEX1"; pub const SIGNATURE = "\xbfncduEX1";
@ -69,48 +68,30 @@ inline fn cborByte(major: CborMajor, arg: u5) u8 { return (@as(u8, @intFromEnum(
pub const Thread = struct { pub const Thread = struct {
buf: [BLOCK_SIZE]u8 = undefined, buf: []u8 = undefined,
off: usize = BLOCK_SIZE, off: usize = BLOCK_SIZE,
block_num: u32 = std.math.maxInt(u32), block_num: u32 = std.math.maxInt(u32),
itemref: u64 = 0, // ref of item currently being written itemref: u64 = 0, // ref of item currently being written
tmp: []u8 = undefined, // Temporary buffer for headers and compression.
// Temporary buffer for headers and compression. // unused, but kept around for easy debugging
// TODO: check with compressBound()/ZSTD_compressBound()
tmp: [BLOCK_SIZE+128]u8 = undefined,
fn compressNone(in: []const u8, out: []u8) usize { fn compressNone(in: []const u8, out: []u8) usize {
@memcpy(out[0..in.len], in); @memcpy(out[0..in.len], in);
return in.len; return in.len;
} }
fn compressZlib(in: []const u8, out: []u8) usize {
var outlen: c.uLongf = out.len;
const r = c.compress2(out.ptr, &outlen, in.ptr, in.len, main.config.complevel);
std.debug.assert(r == c.Z_OK);
return outlen;
}
fn compressZstd(in: []const u8, out: []u8) usize { fn compressZstd(in: []const u8, out: []u8) usize {
const r = c.ZSTD_compress(out.ptr, out.len, in.ptr, in.len, main.config.complevel); while (true) {
std.debug.assert(c.ZSTD_isError(r) == 0); const r = ZSTD_compress(out.ptr, out.len, in.ptr, in.len, main.config.complevel);
return r; if (ZSTD_isError(r) == 0) return r;
ui.oom(); // That *ought* to be the only reason the above call can fail.
} }
fn compressLZ4(in: []const u8, out: []u8) usize {
const r = c.LZ4_compress_default(in.ptr, out.ptr, @intCast(in.len), @intCast(out.len));
std.debug.assert(r > 0);
return @intCast(r);
} }
fn createBlock(t: *Thread) []const u8 { fn createBlock(t: *Thread) []const u8 {
if (t.block_num == std.math.maxInt(u32) or t.off <= 1) return ""; if (t.block_num == std.math.maxInt(u32) or t.off <= 1) return "";
const bodylen = switch (main.config.compression) { const bodylen = compressZstd(t.buf[0..t.off], t.tmp[12..]);
.none => compressNone(t.buf[0..t.off], t.tmp[12..]),
.zlib => compressZlib(t.buf[0..t.off], t.tmp[12..]),
.zstd => compressZstd(t.buf[0..t.off], t.tmp[12..]),
.lz4 => compressLZ4(t.buf[0..t.off], t.tmp[12..]),
};
const blocklen: u32 = @intCast(bodylen + 16); const blocklen: u32 = @intCast(bodylen + 16);
t.tmp[0..4].* = blockHeader(1, blocklen); t.tmp[0..4].* = blockHeader(1, blocklen);
t.tmp[4..8].* = bigu32(t.block_num); t.tmp[4..8].* = bigu32(t.block_num);
@ -188,7 +169,7 @@ pub const Thread = struct {
// Reserve space for a new item, write out the type, prev and name fields and return the itemref. // Reserve space for a new item, write out the type, prev and name fields and return the itemref.
fn itemStart(t: *Thread, itype: model.EType, prev_item: u64, name: []const u8) u64 { fn itemStart(t: *Thread, itype: model.EType, prev_item: u64, name: []const u8) u64 {
const min_len = name.len + MAX_ITEM_LEN; const min_len = name.len + MAX_ITEM_LEN;
if (t.off + min_len > main.config.blocksize) t.flush(min_len); if (t.off + min_len > t.buf.len) t.flush(min_len);
t.itemref = (@as(u64, t.block_num) << 24) | t.off; t.itemref = (@as(u64, t.block_num) << 24) | t.off;
t.cborIndef(.map); t.cborIndef(.map);
@ -418,12 +399,21 @@ pub const Dir = struct {
}; };
pub fn createRoot(stat: *const sink.Stat) Dir { pub fn createRoot(stat: *const sink.Stat, threads: []sink.Thread) Dir {
for (threads) |*t| {
t.sink.bin.buf = main.allocator.alloc(u8, BLOCK_SIZE) catch unreachable;
t.sink.bin.tmp = main.allocator.alloc(u8, COMPRESSED_SIZE) catch unreachable;
}
return .{ .stat = stat.* }; return .{ .stat = stat.* };
} }
pub fn done(threads: []sink.Thread) void { pub fn done(threads: []sink.Thread) void {
for (threads) |*t| t.sink.bin.flush(0); for (threads) |*t| {
t.sink.bin.flush(0);
main.allocator.free(t.sink.bin.buf);
main.allocator.free(t.sink.bin.tmp);
}
while (std.mem.endsWith(u8, global.index.items, &[1]u8{0}**8)) while (std.mem.endsWith(u8, global.index.items, &[1]u8{0}**8))
global.index.shrinkRetainingCapacity(global.index.items.len - 8); global.index.shrinkRetainingCapacity(global.index.items.len - 8);

View file

@ -9,6 +9,8 @@ const sink = @import("sink.zig");
const ui = @import("ui.zig"); const ui = @import("ui.zig");
const bin_export = @import("bin_export.zig"); const bin_export = @import("bin_export.zig");
extern fn ZSTD_decompress(dst: ?*anyopaque, dstCapacity: usize, src: ?*const anyopaque, compressedSize: usize) usize;
const CborMajor = bin_export.CborMajor; const CborMajor = bin_export.CborMajor;
const ItemKey = bin_export.ItemKey; const ItemKey = bin_export.ItemKey;
@ -90,7 +92,7 @@ fn readBlock(num: u32) []const u8 {
const offlen = bigu64(global.index[num*8..][0..8].*); const offlen = bigu64(global.index[num*8..][0..8].*);
if ((offlen & 0xffffff) < 16) die(); if ((offlen & 0xffffff) < 16) die();
const buf = main.allocator.alloc(u8, (offlen & 0xffffff) - 12) catch unreachable; const buf = main.allocator.alloc(u8, @intCast((offlen & 0xffffff) - 12)) catch unreachable;
defer main.allocator.free(buf); defer main.allocator.free(buf);
const rdlen = global.fd.preadAll(buf, (offlen >> 24) + 8) const rdlen = global.fd.preadAll(buf, (offlen >> 24) + 8)
catch |e| ui.die("Error reading from file: {s}\n", .{ui.errorString(e)}); catch |e| ui.die("Error reading from file: {s}\n", .{ui.errorString(e)});
@ -100,8 +102,9 @@ fn readBlock(num: u32) []const u8 {
if (rawlen >= (1<<24)) die(); if (rawlen >= (1<<24)) die();
block.data = main.allocator.alloc(u8, rawlen) catch unreachable; block.data = main.allocator.alloc(u8, rawlen) catch unreachable;
// TODO: decompress const res = ZSTD_decompress(block.data.ptr, block.data.len, buf[4..].ptr, buf.len - 4);
@memcpy(block.data, buf[4..][0..rawlen]); if (res != block.data.len) ui.die("Error decompressing block {} (expected {} got {})\n", .{ num, block.data.len, res });
return block.data; return block.data;
} }
@ -190,8 +193,8 @@ const CborVal = struct {
fn bytes(v: *const CborVal) []const u8 { fn bytes(v: *const CborVal) []const u8 {
if (v.indef or (v.major != .bytes and v.major != .text)) die(); if (v.indef or (v.major != .bytes and v.major != .text)) die();
if (v.rd.buf.len < v.arg) die(); if (v.rd.buf.len < v.arg) die();
defer v.rd.buf = v.rd.buf[v.arg..]; defer v.rd.buf = v.rd.buf[@intCast(v.arg)..];
return v.rd.buf[0..v.arg]; return v.rd.buf[0..@intCast(v.arg)];
} }
// Skip current value. // Skip current value.
@ -207,13 +210,15 @@ const CborVal = struct {
switch (v.major) { switch (v.major) {
.bytes, .text => { .bytes, .text => {
if (v.rd.buf.len < v.arg) die(); if (v.rd.buf.len < v.arg) die();
v.rd.buf = v.rd.buf[v.arg..]; v.rd.buf = v.rd.buf[@intCast(v.arg)..];
}, },
.array => { .array => {
for (0..v.arg) |_| v.rd.next().skip(); if (v.arg > (1<<24)) die();
for (0..@intCast(v.arg)) |_| v.rd.next().skip();
}, },
.map => { .map => {
for (0..v.arg*|2) |_| v.rd.next().skip(); if (v.arg > (1<<24)) die();
for (0..@intCast(v.arg*|2)) |_| v.rd.next().skip();
}, },
else => {}, else => {},
} }
@ -297,7 +302,7 @@ test "CBOR skip parsing" {
const ItemParser = struct { const ItemParser = struct {
r: CborReader, r: CborReader,
len: ?usize = null, len: ?u64 = null,
const Field = struct { const Field = struct {
key: ItemKey, key: ItemKey,
@ -344,7 +349,7 @@ fn readItem(ref: u64) ItemParser {
if (ref >= (1 << (24 + 32))) die(); if (ref >= (1 << (24 + 32))) die();
const block = readBlock(@intCast(ref >> 24)); const block = readBlock(@intCast(ref >> 24));
if ((ref & 0xffffff) > block.len) die(); if ((ref & 0xffffff) > block.len) die();
return ItemParser.init(block[(ref & 0xffffff)..]); return ItemParser.init(block[@intCast(ref & 0xffffff)..]);
} }
const Import = struct { const Import = struct {

View file

@ -72,9 +72,7 @@ pub const config = struct {
pub var exclude_kernfs: bool = false; pub var exclude_kernfs: bool = false;
pub var exclude_patterns: std.ArrayList([:0]const u8) = std.ArrayList([:0]const u8).init(allocator); pub var exclude_patterns: std.ArrayList([:0]const u8) = std.ArrayList([:0]const u8).init(allocator);
pub var threads: usize = 1; pub var threads: usize = 1;
pub var compression: enum { none, zlib, zstd, lz4 } = .none; pub var complevel: u8 = 4;
pub var complevel: u8 = 5;
pub var blocksize: usize = 64*1024;
pub var update_delay: u64 = 100*std.time.ns_per_ms; pub var update_delay: u64 = 100*std.time.ns_per_ms;
pub var scan_ui: ?enum { none, line, full } = null; pub var scan_ui: ?enum { none, line, full } = null;
@ -269,7 +267,11 @@ fn argConfig(args: *Args, opt: Args.Option) bool {
else if (opt.is("--include-caches")) config.exclude_caches = false else if (opt.is("--include-caches")) config.exclude_caches = false
else if (opt.is("--exclude-kernfs")) config.exclude_kernfs = true else if (opt.is("--exclude-kernfs")) config.exclude_kernfs = true
else if (opt.is("--include-kernfs")) config.exclude_kernfs = false else if (opt.is("--include-kernfs")) config.exclude_kernfs = false
else if (opt.is("--confirm-quit")) config.confirm_quit = true else if (opt.is("--compress-level")) {
const val = args.arg();
config.complevel = std.fmt.parseInt(u8, val, 10) catch ui.die("Invalid number for --compress-level: {s}.\n", .{val});
if (config.complevel <= 0 or config.complevel > 20) ui.die("Invalid number for --compress-level: {s}.\n", .{val});
} else if (opt.is("--confirm-quit")) config.confirm_quit = true
else if (opt.is("--no-confirm-quit")) config.confirm_quit = false else if (opt.is("--no-confirm-quit")) config.confirm_quit = false
else if (opt.is("--confirm-delete")) config.confirm_delete = true else if (opt.is("--confirm-delete")) config.confirm_delete = true
else if (opt.is("--no-confirm-delete")) config.confirm_delete = false else if (opt.is("--no-confirm-delete")) config.confirm_delete = false
@ -523,17 +525,7 @@ pub fn main() void {
else if (opt.is("-f")) import_file = allocator.dupeZ(u8, args.arg()) catch unreachable else if (opt.is("-f")) import_file = allocator.dupeZ(u8, args.arg()) catch unreachable
else if (opt.is("--ignore-config")) {} else if (opt.is("--ignore-config")) {}
else if (opt.is("--quit-after-scan")) quit_after_scan = true // undocumented feature to help with benchmarking scan/import else if (opt.is("--quit-after-scan")) quit_after_scan = true // undocumented feature to help with benchmarking scan/import
else if (opt.is("--binfmt")) { // Experimental, for benchmarking else if (argConfig(&args, opt)) {}
const a = args.arg();
config.compression = switch (a[0]) {
'z' => .zlib,
's','S' => .zstd,
'l' => .lz4,
else => .none,
};
config.complevel = (a[1] - '0') + (if (a[0] == 'S') @as(u8, 10) else 0);
config.blocksize = @as(usize, 8*1024) << @intCast(a[2] - '0'); // 0 = 8k, 1 16k, 2 32k, 3 64k, 4 128k, 5 256k, 6 512k
} else if (argConfig(&args, opt)) {}
else ui.die("Unrecognized option '{s}'.\n", .{opt.val}); else ui.die("Unrecognized option '{s}'.\n", .{opt.val});
} }
} }

View file

@ -279,7 +279,7 @@ pub fn createRoot(path: []const u8, stat: *const Stat) *Dir {
.out = switch (global.sink) { .out = switch (global.sink) {
.mem => .{ .mem = mem_sink.createRoot(path, stat) }, .mem => .{ .mem = mem_sink.createRoot(path, stat) },
.json => .{ .json = json_export.createRoot(path, stat) }, .json => .{ .json = json_export.createRoot(path, stat) },
.bin => .{ .bin = bin_export.createRoot(stat) }, .bin => .{ .bin = bin_export.createRoot(stat, global.threads) },
}, },
}; };
return d; return d;