Stick with zstd-4 + 64k block, add --compress-level, fix 32bit build

And do dynamic buffer allocation for bin_export, removing 128k of
.rodata that I accidentally introduced earlier and reducing memory use
for parallel scans.

Static binaries now also include the minimal version of zstd, current
sizes for x86_64 are:

  582k ncdu-2.5
  601k ncdu-new-nocompress
  765k ncdu-new-zstd

That's not great, but also not awful. Even zlib or LZ4 would've resulted
in a 700k binary.
This commit is contained in:
Yorhel 2024-08-03 13:16:44 +02:00
parent 85e12beb1c
commit 8ad61e87c1
9 changed files with 75 additions and 71 deletions

1
.gitignore vendored
View file

@ -4,6 +4,7 @@
*.swp
*~
ncurses
zstd
static-*/
zig-cache/
zig-out/

View file

@ -52,9 +52,21 @@ dist:
rm -rf ncdu-${NCDU_VERSION}
# ASSUMPTION: the ncurses source tree has been extracted into ncurses/
# ASSUMPTION:
# - the ncurses source tree has been extracted into ncurses/
# - the zstd source tree has been extracted into zstd/
# Would be nicer to do all this with the Zig build system, but no way am I
# going to write build.zig's for these projects.
static-%.tar.gz:
mkdir -p static-$*/nc static-$*/inst/pkg
cp -R zstd/lib static-$*/zstd
make -C static-$*/zstd -j8 libzstd.a V=1\
ZSTD_LIB_DICTBUILDER=0\
ZSTD_LIB_MINIFY=1\
ZSTD_LIB_EXCLUDE_COMPRESSORS_DFAST_AND_UP=1\
CC="${ZIG} cc --target=$*"\
LD="${ZIG} cc --target=$*"\
AR="${ZIG} ar" RANLIB="${ZIG} ranlib"
cd static-$*/nc && ../../ncurses/configure --prefix="`pwd`/../inst"\
--with-pkg-config-libdir="`pwd`/../inst/pkg"\
--without-cxx --without-cxx-binding --without-ada --without-manpages --without-progs\
@ -71,7 +83,7 @@ static-%.tar.gz:
@# --build-file ../build.zig --search-prefix inst/ --cache-dir zig -Drelease-fast=true
@# Alternative approach, bypassing zig-build
cd static-$* && ${ZIG} build-exe -target $*\
-Iinst/include -Iinst/include/ncursesw -lc inst/lib/libncursesw.a\
-Iinst/include -Iinst/include/ncursesw -Izstd -lc inst/lib/libncursesw.a zstd/libzstd.a\
--cache-dir zig-cache -static -fstrip -O ReleaseFast ../src/main.zig
cd static-$* && mv main ncdu && tar -czf ../static-$*.tar.gz ncdu
rm -rf static-$*

View file

@ -21,7 +21,8 @@ C version (1.x).
- Zig 0.12 or 0.13.
- Some sort of POSIX-like OS
- ncurses libraries and header files
- ncurses
- libzstd
## Install

View file

@ -19,9 +19,7 @@ pub fn build(b: *std.Build) void {
exe.pie = pie;
exe.root_module.linkSystemLibrary("ncursesw", .{});
exe.root_module.linkSystemLibrary("zlib", .{});
exe.root_module.linkSystemLibrary("libzstd", .{});
exe.root_module.linkSystemLibrary("lz4", .{});
// https://github.com/ziglang/zig/blob/b52be973dfb7d1408218b8e75800a2da3dc69108/build.zig#L551-L554
if (target.result.isDarwin()) {
// useful for package maintainers

View file

@ -27,6 +27,7 @@ use bytes;
no warnings 'portable';
use List::Util 'min', 'max';
use CBOR::XS; # Does not officially support recent perl versions, but it's the only CPAN module that supports streaming.
use Compress::Zstd;
my $printblocks = grep $_ eq 'blocks', @ARGV;
my $printitems = grep $_ eq 'items', @ARGV;
@ -76,13 +77,17 @@ sub datablock($prefix, $off, $blklen, $content) {
die sprintf "%s: Duplicate block id %d (first at %010x)", $prefix, $num, $datablocks{$num}>>24 if $datablocks{$num};
$datablocks{$num} = ($off << 24) | $blklen;
$printblocks && printf "%s: data block %d rawlen %d (%.2f)\n", $prefix, $num, $rawlen, $rawlen/(length($content)-8)*100;
my $compressed = substr $content, 8;
$printblocks && printf "%s: data block %d rawlen %d (%.2f)\n", $prefix, $num, $rawlen, $rawlen/(length($compressed))*100;
$datablock_len += length($content)-8;
$datablock_len += length($compressed);
$rawdata_len += $rawlen;
# TODO: Decompress
cbordata($num, substr $content, 8);
my $rawdata = decompress($compressed);
die "$prefix: Block id $num failed decompression\n" if !defined $rawdata;
die sprintf "%s: Block id %d decompressed to %d bytes but expected %d\n",
$prefix, $num, length($rawdata), $rawlen if $rawlen != length $rawdata;
cbordata($num, $rawdata);
}

View file

@ -7,11 +7,9 @@ const model = @import("model.zig");
const sink = @import("sink.zig");
const util = @import("util.zig");
const ui = @import("ui.zig");
const c = @cImport({
@cInclude("zlib.h");
@cInclude("zstd.h");
@cInclude("lz4.h");
});
extern fn ZSTD_compress(dst: ?*anyopaque, dstCapacity: usize, src: ?*const anyopaque, srcSize: usize, compressionLevel: c_int) usize;
extern fn ZSTD_isError(code: usize) c_uint;
pub const global = struct {
var fd: std.fs.File = undefined;
@ -21,7 +19,8 @@ pub const global = struct {
var root_itemref: u64 = 0;
};
const BLOCK_SIZE: usize = 512*1024; // XXX: Current maximum for benchmarking, should just stick with a fixed block size.
const BLOCK_SIZE: usize = 64*1024;
const COMPRESSED_SIZE: usize = 65824; // ZSTD_COMPRESSBOUND(BLOCK_SIZE)
pub const SIGNATURE = "\xbfncduEX1";
@ -69,48 +68,30 @@ inline fn cborByte(major: CborMajor, arg: u5) u8 { return (@as(u8, @intFromEnum(
pub const Thread = struct {
buf: [BLOCK_SIZE]u8 = undefined,
buf: []u8 = undefined,
off: usize = BLOCK_SIZE,
block_num: u32 = std.math.maxInt(u32),
itemref: u64 = 0, // ref of item currently being written
tmp: []u8 = undefined, // Temporary buffer for headers and compression.
// Temporary buffer for headers and compression.
// TODO: check with compressBound()/ZSTD_compressBound()
tmp: [BLOCK_SIZE+128]u8 = undefined,
// unused, but kept around for easy debugging
fn compressNone(in: []const u8, out: []u8) usize {
@memcpy(out[0..in.len], in);
return in.len;
}
fn compressZlib(in: []const u8, out: []u8) usize {
var outlen: c.uLongf = out.len;
const r = c.compress2(out.ptr, &outlen, in.ptr, in.len, main.config.complevel);
std.debug.assert(r == c.Z_OK);
return outlen;
}
fn compressZstd(in: []const u8, out: []u8) usize {
const r = c.ZSTD_compress(out.ptr, out.len, in.ptr, in.len, main.config.complevel);
std.debug.assert(c.ZSTD_isError(r) == 0);
return r;
while (true) {
const r = ZSTD_compress(out.ptr, out.len, in.ptr, in.len, main.config.complevel);
if (ZSTD_isError(r) == 0) return r;
ui.oom(); // That *ought* to be the only reason the above call can fail.
}
fn compressLZ4(in: []const u8, out: []u8) usize {
const r = c.LZ4_compress_default(in.ptr, out.ptr, @intCast(in.len), @intCast(out.len));
std.debug.assert(r > 0);
return @intCast(r);
}
fn createBlock(t: *Thread) []const u8 {
if (t.block_num == std.math.maxInt(u32) or t.off <= 1) return "";
const bodylen = switch (main.config.compression) {
.none => compressNone(t.buf[0..t.off], t.tmp[12..]),
.zlib => compressZlib(t.buf[0..t.off], t.tmp[12..]),
.zstd => compressZstd(t.buf[0..t.off], t.tmp[12..]),
.lz4 => compressLZ4(t.buf[0..t.off], t.tmp[12..]),
};
const bodylen = compressZstd(t.buf[0..t.off], t.tmp[12..]);
const blocklen: u32 = @intCast(bodylen + 16);
t.tmp[0..4].* = blockHeader(1, blocklen);
t.tmp[4..8].* = bigu32(t.block_num);
@ -188,7 +169,7 @@ pub const Thread = struct {
// Reserve space for a new item, write out the type, prev and name fields and return the itemref.
fn itemStart(t: *Thread, itype: model.EType, prev_item: u64, name: []const u8) u64 {
const min_len = name.len + MAX_ITEM_LEN;
if (t.off + min_len > main.config.blocksize) t.flush(min_len);
if (t.off + min_len > t.buf.len) t.flush(min_len);
t.itemref = (@as(u64, t.block_num) << 24) | t.off;
t.cborIndef(.map);
@ -418,12 +399,21 @@ pub const Dir = struct {
};
pub fn createRoot(stat: *const sink.Stat) Dir {
pub fn createRoot(stat: *const sink.Stat, threads: []sink.Thread) Dir {
for (threads) |*t| {
t.sink.bin.buf = main.allocator.alloc(u8, BLOCK_SIZE) catch unreachable;
t.sink.bin.tmp = main.allocator.alloc(u8, COMPRESSED_SIZE) catch unreachable;
}
return .{ .stat = stat.* };
}
pub fn done(threads: []sink.Thread) void {
for (threads) |*t| t.sink.bin.flush(0);
for (threads) |*t| {
t.sink.bin.flush(0);
main.allocator.free(t.sink.bin.buf);
main.allocator.free(t.sink.bin.tmp);
}
while (std.mem.endsWith(u8, global.index.items, &[1]u8{0}**8))
global.index.shrinkRetainingCapacity(global.index.items.len - 8);

View file

@ -9,6 +9,8 @@ const sink = @import("sink.zig");
const ui = @import("ui.zig");
const bin_export = @import("bin_export.zig");
extern fn ZSTD_decompress(dst: ?*anyopaque, dstCapacity: usize, src: ?*const anyopaque, compressedSize: usize) usize;
const CborMajor = bin_export.CborMajor;
const ItemKey = bin_export.ItemKey;
@ -90,7 +92,7 @@ fn readBlock(num: u32) []const u8 {
const offlen = bigu64(global.index[num*8..][0..8].*);
if ((offlen & 0xffffff) < 16) die();
const buf = main.allocator.alloc(u8, (offlen & 0xffffff) - 12) catch unreachable;
const buf = main.allocator.alloc(u8, @intCast((offlen & 0xffffff) - 12)) catch unreachable;
defer main.allocator.free(buf);
const rdlen = global.fd.preadAll(buf, (offlen >> 24) + 8)
catch |e| ui.die("Error reading from file: {s}\n", .{ui.errorString(e)});
@ -100,8 +102,9 @@ fn readBlock(num: u32) []const u8 {
if (rawlen >= (1<<24)) die();
block.data = main.allocator.alloc(u8, rawlen) catch unreachable;
// TODO: decompress
@memcpy(block.data, buf[4..][0..rawlen]);
const res = ZSTD_decompress(block.data.ptr, block.data.len, buf[4..].ptr, buf.len - 4);
if (res != block.data.len) ui.die("Error decompressing block {} (expected {} got {})\n", .{ num, block.data.len, res });
return block.data;
}
@ -190,8 +193,8 @@ const CborVal = struct {
fn bytes(v: *const CborVal) []const u8 {
if (v.indef or (v.major != .bytes and v.major != .text)) die();
if (v.rd.buf.len < v.arg) die();
defer v.rd.buf = v.rd.buf[v.arg..];
return v.rd.buf[0..v.arg];
defer v.rd.buf = v.rd.buf[@intCast(v.arg)..];
return v.rd.buf[0..@intCast(v.arg)];
}
// Skip current value.
@ -207,13 +210,15 @@ const CborVal = struct {
switch (v.major) {
.bytes, .text => {
if (v.rd.buf.len < v.arg) die();
v.rd.buf = v.rd.buf[v.arg..];
v.rd.buf = v.rd.buf[@intCast(v.arg)..];
},
.array => {
for (0..v.arg) |_| v.rd.next().skip();
if (v.arg > (1<<24)) die();
for (0..@intCast(v.arg)) |_| v.rd.next().skip();
},
.map => {
for (0..v.arg*|2) |_| v.rd.next().skip();
if (v.arg > (1<<24)) die();
for (0..@intCast(v.arg*|2)) |_| v.rd.next().skip();
},
else => {},
}
@ -297,7 +302,7 @@ test "CBOR skip parsing" {
const ItemParser = struct {
r: CborReader,
len: ?usize = null,
len: ?u64 = null,
const Field = struct {
key: ItemKey,
@ -344,7 +349,7 @@ fn readItem(ref: u64) ItemParser {
if (ref >= (1 << (24 + 32))) die();
const block = readBlock(@intCast(ref >> 24));
if ((ref & 0xffffff) > block.len) die();
return ItemParser.init(block[(ref & 0xffffff)..]);
return ItemParser.init(block[@intCast(ref & 0xffffff)..]);
}
const Import = struct {

View file

@ -72,9 +72,7 @@ pub const config = struct {
pub var exclude_kernfs: bool = false;
pub var exclude_patterns: std.ArrayList([:0]const u8) = std.ArrayList([:0]const u8).init(allocator);
pub var threads: usize = 1;
pub var compression: enum { none, zlib, zstd, lz4 } = .none;
pub var complevel: u8 = 5;
pub var blocksize: usize = 64*1024;
pub var complevel: u8 = 4;
pub var update_delay: u64 = 100*std.time.ns_per_ms;
pub var scan_ui: ?enum { none, line, full } = null;
@ -269,7 +267,11 @@ fn argConfig(args: *Args, opt: Args.Option) bool {
else if (opt.is("--include-caches")) config.exclude_caches = false
else if (opt.is("--exclude-kernfs")) config.exclude_kernfs = true
else if (opt.is("--include-kernfs")) config.exclude_kernfs = false
else if (opt.is("--confirm-quit")) config.confirm_quit = true
else if (opt.is("--compress-level")) {
const val = args.arg();
config.complevel = std.fmt.parseInt(u8, val, 10) catch ui.die("Invalid number for --compress-level: {s}.\n", .{val});
if (config.complevel <= 0 or config.complevel > 20) ui.die("Invalid number for --compress-level: {s}.\n", .{val});
} else if (opt.is("--confirm-quit")) config.confirm_quit = true
else if (opt.is("--no-confirm-quit")) config.confirm_quit = false
else if (opt.is("--confirm-delete")) config.confirm_delete = true
else if (opt.is("--no-confirm-delete")) config.confirm_delete = false
@ -523,17 +525,7 @@ pub fn main() void {
else if (opt.is("-f")) import_file = allocator.dupeZ(u8, args.arg()) catch unreachable
else if (opt.is("--ignore-config")) {}
else if (opt.is("--quit-after-scan")) quit_after_scan = true // undocumented feature to help with benchmarking scan/import
else if (opt.is("--binfmt")) { // Experimental, for benchmarking
const a = args.arg();
config.compression = switch (a[0]) {
'z' => .zlib,
's','S' => .zstd,
'l' => .lz4,
else => .none,
};
config.complevel = (a[1] - '0') + (if (a[0] == 'S') @as(u8, 10) else 0);
config.blocksize = @as(usize, 8*1024) << @intCast(a[2] - '0'); // 0 = 8k, 1 16k, 2 32k, 3 64k, 4 128k, 5 256k, 6 512k
} else if (argConfig(&args, opt)) {}
else if (argConfig(&args, opt)) {}
else ui.die("Unrecognized option '{s}'.\n", .{opt.val});
}
}

View file

@ -279,7 +279,7 @@ pub fn createRoot(path: []const u8, stat: *const Stat) *Dir {
.out = switch (global.sink) {
.mem => .{ .mem = mem_sink.createRoot(path, stat) },
.json => .{ .json = json_export.createRoot(path, stat) },
.bin => .{ .bin = bin_export.createRoot(stat) },
.bin => .{ .bin = bin_export.createRoot(stat, global.threads) },
},
};
return d;