mirror of
https://code.blicky.net/yorhel/ncdu.git
synced 2026-01-12 17:08:39 -09:00
Stick with zstd-4 + 64k block, add --compress-level, fix 32bit build
And do dynamic buffer allocation for bin_export, removing 128k of .rodata that I accidentally introduced earlier and reducing memory use for parallel scans. Static binaries now also include the minimal version of zstd, current sizes for x86_64 are: 582k ncdu-2.5 601k ncdu-new-nocompress 765k ncdu-new-zstd That's not great, but also not awful. Even zlib or LZ4 would've resulted in a 700k binary.
This commit is contained in:
parent
85e12beb1c
commit
8ad61e87c1
9 changed files with 75 additions and 71 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -4,6 +4,7 @@
|
|||
*.swp
|
||||
*~
|
||||
ncurses
|
||||
zstd
|
||||
static-*/
|
||||
zig-cache/
|
||||
zig-out/
|
||||
|
|
|
|||
16
Makefile
16
Makefile
|
|
@ -52,9 +52,21 @@ dist:
|
|||
rm -rf ncdu-${NCDU_VERSION}
|
||||
|
||||
|
||||
# ASSUMPTION: the ncurses source tree has been extracted into ncurses/
|
||||
# ASSUMPTION:
|
||||
# - the ncurses source tree has been extracted into ncurses/
|
||||
# - the zstd source tree has been extracted into zstd/
|
||||
# Would be nicer to do all this with the Zig build system, but no way am I
|
||||
# going to write build.zig's for these projects.
|
||||
static-%.tar.gz:
|
||||
mkdir -p static-$*/nc static-$*/inst/pkg
|
||||
cp -R zstd/lib static-$*/zstd
|
||||
make -C static-$*/zstd -j8 libzstd.a V=1\
|
||||
ZSTD_LIB_DICTBUILDER=0\
|
||||
ZSTD_LIB_MINIFY=1\
|
||||
ZSTD_LIB_EXCLUDE_COMPRESSORS_DFAST_AND_UP=1\
|
||||
CC="${ZIG} cc --target=$*"\
|
||||
LD="${ZIG} cc --target=$*"\
|
||||
AR="${ZIG} ar" RANLIB="${ZIG} ranlib"
|
||||
cd static-$*/nc && ../../ncurses/configure --prefix="`pwd`/../inst"\
|
||||
--with-pkg-config-libdir="`pwd`/../inst/pkg"\
|
||||
--without-cxx --without-cxx-binding --without-ada --without-manpages --without-progs\
|
||||
|
|
@ -71,7 +83,7 @@ static-%.tar.gz:
|
|||
@# --build-file ../build.zig --search-prefix inst/ --cache-dir zig -Drelease-fast=true
|
||||
@# Alternative approach, bypassing zig-build
|
||||
cd static-$* && ${ZIG} build-exe -target $*\
|
||||
-Iinst/include -Iinst/include/ncursesw -lc inst/lib/libncursesw.a\
|
||||
-Iinst/include -Iinst/include/ncursesw -Izstd -lc inst/lib/libncursesw.a zstd/libzstd.a\
|
||||
--cache-dir zig-cache -static -fstrip -O ReleaseFast ../src/main.zig
|
||||
cd static-$* && mv main ncdu && tar -czf ../static-$*.tar.gz ncdu
|
||||
rm -rf static-$*
|
||||
|
|
|
|||
|
|
@ -21,7 +21,8 @@ C version (1.x).
|
|||
|
||||
- Zig 0.12 or 0.13.
|
||||
- Some sort of POSIX-like OS
|
||||
- ncurses libraries and header files
|
||||
- ncurses
|
||||
- libzstd
|
||||
|
||||
## Install
|
||||
|
||||
|
|
|
|||
|
|
@ -19,9 +19,7 @@ pub fn build(b: *std.Build) void {
|
|||
|
||||
exe.pie = pie;
|
||||
exe.root_module.linkSystemLibrary("ncursesw", .{});
|
||||
exe.root_module.linkSystemLibrary("zlib", .{});
|
||||
exe.root_module.linkSystemLibrary("libzstd", .{});
|
||||
exe.root_module.linkSystemLibrary("lz4", .{});
|
||||
// https://github.com/ziglang/zig/blob/b52be973dfb7d1408218b8e75800a2da3dc69108/build.zig#L551-L554
|
||||
if (target.result.isDarwin()) {
|
||||
// useful for package maintainers
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ use bytes;
|
|||
no warnings 'portable';
|
||||
use List::Util 'min', 'max';
|
||||
use CBOR::XS; # Does not officially support recent perl versions, but it's the only CPAN module that supports streaming.
|
||||
use Compress::Zstd;
|
||||
|
||||
my $printblocks = grep $_ eq 'blocks', @ARGV;
|
||||
my $printitems = grep $_ eq 'items', @ARGV;
|
||||
|
|
@ -76,13 +77,17 @@ sub datablock($prefix, $off, $blklen, $content) {
|
|||
die sprintf "%s: Duplicate block id %d (first at %010x)", $prefix, $num, $datablocks{$num}>>24 if $datablocks{$num};
|
||||
$datablocks{$num} = ($off << 24) | $blklen;
|
||||
|
||||
$printblocks && printf "%s: data block %d rawlen %d (%.2f)\n", $prefix, $num, $rawlen, $rawlen/(length($content)-8)*100;
|
||||
my $compressed = substr $content, 8;
|
||||
$printblocks && printf "%s: data block %d rawlen %d (%.2f)\n", $prefix, $num, $rawlen, $rawlen/(length($compressed))*100;
|
||||
|
||||
$datablock_len += length($content)-8;
|
||||
$datablock_len += length($compressed);
|
||||
$rawdata_len += $rawlen;
|
||||
|
||||
# TODO: Decompress
|
||||
cbordata($num, substr $content, 8);
|
||||
my $rawdata = decompress($compressed);
|
||||
die "$prefix: Block id $num failed decompression\n" if !defined $rawdata;
|
||||
die sprintf "%s: Block id %d decompressed to %d bytes but expected %d\n",
|
||||
$prefix, $num, length($rawdata), $rawlen if $rawlen != length $rawdata;
|
||||
cbordata($num, $rawdata);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -7,11 +7,9 @@ const model = @import("model.zig");
|
|||
const sink = @import("sink.zig");
|
||||
const util = @import("util.zig");
|
||||
const ui = @import("ui.zig");
|
||||
const c = @cImport({
|
||||
@cInclude("zlib.h");
|
||||
@cInclude("zstd.h");
|
||||
@cInclude("lz4.h");
|
||||
});
|
||||
|
||||
extern fn ZSTD_compress(dst: ?*anyopaque, dstCapacity: usize, src: ?*const anyopaque, srcSize: usize, compressionLevel: c_int) usize;
|
||||
extern fn ZSTD_isError(code: usize) c_uint;
|
||||
|
||||
pub const global = struct {
|
||||
var fd: std.fs.File = undefined;
|
||||
|
|
@ -21,7 +19,8 @@ pub const global = struct {
|
|||
var root_itemref: u64 = 0;
|
||||
};
|
||||
|
||||
const BLOCK_SIZE: usize = 512*1024; // XXX: Current maximum for benchmarking, should just stick with a fixed block size.
|
||||
const BLOCK_SIZE: usize = 64*1024;
|
||||
const COMPRESSED_SIZE: usize = 65824; // ZSTD_COMPRESSBOUND(BLOCK_SIZE)
|
||||
|
||||
pub const SIGNATURE = "\xbfncduEX1";
|
||||
|
||||
|
|
@ -69,48 +68,30 @@ inline fn cborByte(major: CborMajor, arg: u5) u8 { return (@as(u8, @intFromEnum(
|
|||
|
||||
|
||||
pub const Thread = struct {
|
||||
buf: [BLOCK_SIZE]u8 = undefined,
|
||||
buf: []u8 = undefined,
|
||||
off: usize = BLOCK_SIZE,
|
||||
block_num: u32 = std.math.maxInt(u32),
|
||||
itemref: u64 = 0, // ref of item currently being written
|
||||
tmp: []u8 = undefined, // Temporary buffer for headers and compression.
|
||||
|
||||
// Temporary buffer for headers and compression.
|
||||
// TODO: check with compressBound()/ZSTD_compressBound()
|
||||
tmp: [BLOCK_SIZE+128]u8 = undefined,
|
||||
|
||||
// unused, but kept around for easy debugging
|
||||
fn compressNone(in: []const u8, out: []u8) usize {
|
||||
@memcpy(out[0..in.len], in);
|
||||
return in.len;
|
||||
}
|
||||
|
||||
fn compressZlib(in: []const u8, out: []u8) usize {
|
||||
var outlen: c.uLongf = out.len;
|
||||
const r = c.compress2(out.ptr, &outlen, in.ptr, in.len, main.config.complevel);
|
||||
std.debug.assert(r == c.Z_OK);
|
||||
return outlen;
|
||||
}
|
||||
|
||||
fn compressZstd(in: []const u8, out: []u8) usize {
|
||||
const r = c.ZSTD_compress(out.ptr, out.len, in.ptr, in.len, main.config.complevel);
|
||||
std.debug.assert(c.ZSTD_isError(r) == 0);
|
||||
return r;
|
||||
while (true) {
|
||||
const r = ZSTD_compress(out.ptr, out.len, in.ptr, in.len, main.config.complevel);
|
||||
if (ZSTD_isError(r) == 0) return r;
|
||||
ui.oom(); // That *ought* to be the only reason the above call can fail.
|
||||
}
|
||||
|
||||
fn compressLZ4(in: []const u8, out: []u8) usize {
|
||||
const r = c.LZ4_compress_default(in.ptr, out.ptr, @intCast(in.len), @intCast(out.len));
|
||||
std.debug.assert(r > 0);
|
||||
return @intCast(r);
|
||||
}
|
||||
|
||||
fn createBlock(t: *Thread) []const u8 {
|
||||
if (t.block_num == std.math.maxInt(u32) or t.off <= 1) return "";
|
||||
|
||||
const bodylen = switch (main.config.compression) {
|
||||
.none => compressNone(t.buf[0..t.off], t.tmp[12..]),
|
||||
.zlib => compressZlib(t.buf[0..t.off], t.tmp[12..]),
|
||||
.zstd => compressZstd(t.buf[0..t.off], t.tmp[12..]),
|
||||
.lz4 => compressLZ4(t.buf[0..t.off], t.tmp[12..]),
|
||||
};
|
||||
const bodylen = compressZstd(t.buf[0..t.off], t.tmp[12..]);
|
||||
const blocklen: u32 = @intCast(bodylen + 16);
|
||||
t.tmp[0..4].* = blockHeader(1, blocklen);
|
||||
t.tmp[4..8].* = bigu32(t.block_num);
|
||||
|
|
@ -188,7 +169,7 @@ pub const Thread = struct {
|
|||
// Reserve space for a new item, write out the type, prev and name fields and return the itemref.
|
||||
fn itemStart(t: *Thread, itype: model.EType, prev_item: u64, name: []const u8) u64 {
|
||||
const min_len = name.len + MAX_ITEM_LEN;
|
||||
if (t.off + min_len > main.config.blocksize) t.flush(min_len);
|
||||
if (t.off + min_len > t.buf.len) t.flush(min_len);
|
||||
|
||||
t.itemref = (@as(u64, t.block_num) << 24) | t.off;
|
||||
t.cborIndef(.map);
|
||||
|
|
@ -418,12 +399,21 @@ pub const Dir = struct {
|
|||
};
|
||||
|
||||
|
||||
pub fn createRoot(stat: *const sink.Stat) Dir {
|
||||
pub fn createRoot(stat: *const sink.Stat, threads: []sink.Thread) Dir {
|
||||
for (threads) |*t| {
|
||||
t.sink.bin.buf = main.allocator.alloc(u8, BLOCK_SIZE) catch unreachable;
|
||||
t.sink.bin.tmp = main.allocator.alloc(u8, COMPRESSED_SIZE) catch unreachable;
|
||||
}
|
||||
|
||||
return .{ .stat = stat.* };
|
||||
}
|
||||
|
||||
pub fn done(threads: []sink.Thread) void {
|
||||
for (threads) |*t| t.sink.bin.flush(0);
|
||||
for (threads) |*t| {
|
||||
t.sink.bin.flush(0);
|
||||
main.allocator.free(t.sink.bin.buf);
|
||||
main.allocator.free(t.sink.bin.tmp);
|
||||
}
|
||||
|
||||
while (std.mem.endsWith(u8, global.index.items, &[1]u8{0}**8))
|
||||
global.index.shrinkRetainingCapacity(global.index.items.len - 8);
|
||||
|
|
|
|||
|
|
@ -9,6 +9,8 @@ const sink = @import("sink.zig");
|
|||
const ui = @import("ui.zig");
|
||||
const bin_export = @import("bin_export.zig");
|
||||
|
||||
extern fn ZSTD_decompress(dst: ?*anyopaque, dstCapacity: usize, src: ?*const anyopaque, compressedSize: usize) usize;
|
||||
|
||||
const CborMajor = bin_export.CborMajor;
|
||||
const ItemKey = bin_export.ItemKey;
|
||||
|
||||
|
|
@ -90,7 +92,7 @@ fn readBlock(num: u32) []const u8 {
|
|||
const offlen = bigu64(global.index[num*8..][0..8].*);
|
||||
if ((offlen & 0xffffff) < 16) die();
|
||||
|
||||
const buf = main.allocator.alloc(u8, (offlen & 0xffffff) - 12) catch unreachable;
|
||||
const buf = main.allocator.alloc(u8, @intCast((offlen & 0xffffff) - 12)) catch unreachable;
|
||||
defer main.allocator.free(buf);
|
||||
const rdlen = global.fd.preadAll(buf, (offlen >> 24) + 8)
|
||||
catch |e| ui.die("Error reading from file: {s}\n", .{ui.errorString(e)});
|
||||
|
|
@ -100,8 +102,9 @@ fn readBlock(num: u32) []const u8 {
|
|||
if (rawlen >= (1<<24)) die();
|
||||
block.data = main.allocator.alloc(u8, rawlen) catch unreachable;
|
||||
|
||||
// TODO: decompress
|
||||
@memcpy(block.data, buf[4..][0..rawlen]);
|
||||
const res = ZSTD_decompress(block.data.ptr, block.data.len, buf[4..].ptr, buf.len - 4);
|
||||
if (res != block.data.len) ui.die("Error decompressing block {} (expected {} got {})\n", .{ num, block.data.len, res });
|
||||
|
||||
return block.data;
|
||||
}
|
||||
|
||||
|
|
@ -190,8 +193,8 @@ const CborVal = struct {
|
|||
fn bytes(v: *const CborVal) []const u8 {
|
||||
if (v.indef or (v.major != .bytes and v.major != .text)) die();
|
||||
if (v.rd.buf.len < v.arg) die();
|
||||
defer v.rd.buf = v.rd.buf[v.arg..];
|
||||
return v.rd.buf[0..v.arg];
|
||||
defer v.rd.buf = v.rd.buf[@intCast(v.arg)..];
|
||||
return v.rd.buf[0..@intCast(v.arg)];
|
||||
}
|
||||
|
||||
// Skip current value.
|
||||
|
|
@ -207,13 +210,15 @@ const CborVal = struct {
|
|||
switch (v.major) {
|
||||
.bytes, .text => {
|
||||
if (v.rd.buf.len < v.arg) die();
|
||||
v.rd.buf = v.rd.buf[v.arg..];
|
||||
v.rd.buf = v.rd.buf[@intCast(v.arg)..];
|
||||
},
|
||||
.array => {
|
||||
for (0..v.arg) |_| v.rd.next().skip();
|
||||
if (v.arg > (1<<24)) die();
|
||||
for (0..@intCast(v.arg)) |_| v.rd.next().skip();
|
||||
},
|
||||
.map => {
|
||||
for (0..v.arg*|2) |_| v.rd.next().skip();
|
||||
if (v.arg > (1<<24)) die();
|
||||
for (0..@intCast(v.arg*|2)) |_| v.rd.next().skip();
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
|
|
@ -297,7 +302,7 @@ test "CBOR skip parsing" {
|
|||
|
||||
const ItemParser = struct {
|
||||
r: CborReader,
|
||||
len: ?usize = null,
|
||||
len: ?u64 = null,
|
||||
|
||||
const Field = struct {
|
||||
key: ItemKey,
|
||||
|
|
@ -344,7 +349,7 @@ fn readItem(ref: u64) ItemParser {
|
|||
if (ref >= (1 << (24 + 32))) die();
|
||||
const block = readBlock(@intCast(ref >> 24));
|
||||
if ((ref & 0xffffff) > block.len) die();
|
||||
return ItemParser.init(block[(ref & 0xffffff)..]);
|
||||
return ItemParser.init(block[@intCast(ref & 0xffffff)..]);
|
||||
}
|
||||
|
||||
const Import = struct {
|
||||
|
|
|
|||
22
src/main.zig
22
src/main.zig
|
|
@ -72,9 +72,7 @@ pub const config = struct {
|
|||
pub var exclude_kernfs: bool = false;
|
||||
pub var exclude_patterns: std.ArrayList([:0]const u8) = std.ArrayList([:0]const u8).init(allocator);
|
||||
pub var threads: usize = 1;
|
||||
pub var compression: enum { none, zlib, zstd, lz4 } = .none;
|
||||
pub var complevel: u8 = 5;
|
||||
pub var blocksize: usize = 64*1024;
|
||||
pub var complevel: u8 = 4;
|
||||
|
||||
pub var update_delay: u64 = 100*std.time.ns_per_ms;
|
||||
pub var scan_ui: ?enum { none, line, full } = null;
|
||||
|
|
@ -269,7 +267,11 @@ fn argConfig(args: *Args, opt: Args.Option) bool {
|
|||
else if (opt.is("--include-caches")) config.exclude_caches = false
|
||||
else if (opt.is("--exclude-kernfs")) config.exclude_kernfs = true
|
||||
else if (opt.is("--include-kernfs")) config.exclude_kernfs = false
|
||||
else if (opt.is("--confirm-quit")) config.confirm_quit = true
|
||||
else if (opt.is("--compress-level")) {
|
||||
const val = args.arg();
|
||||
config.complevel = std.fmt.parseInt(u8, val, 10) catch ui.die("Invalid number for --compress-level: {s}.\n", .{val});
|
||||
if (config.complevel <= 0 or config.complevel > 20) ui.die("Invalid number for --compress-level: {s}.\n", .{val});
|
||||
} else if (opt.is("--confirm-quit")) config.confirm_quit = true
|
||||
else if (opt.is("--no-confirm-quit")) config.confirm_quit = false
|
||||
else if (opt.is("--confirm-delete")) config.confirm_delete = true
|
||||
else if (opt.is("--no-confirm-delete")) config.confirm_delete = false
|
||||
|
|
@ -523,17 +525,7 @@ pub fn main() void {
|
|||
else if (opt.is("-f")) import_file = allocator.dupeZ(u8, args.arg()) catch unreachable
|
||||
else if (opt.is("--ignore-config")) {}
|
||||
else if (opt.is("--quit-after-scan")) quit_after_scan = true // undocumented feature to help with benchmarking scan/import
|
||||
else if (opt.is("--binfmt")) { // Experimental, for benchmarking
|
||||
const a = args.arg();
|
||||
config.compression = switch (a[0]) {
|
||||
'z' => .zlib,
|
||||
's','S' => .zstd,
|
||||
'l' => .lz4,
|
||||
else => .none,
|
||||
};
|
||||
config.complevel = (a[1] - '0') + (if (a[0] == 'S') @as(u8, 10) else 0);
|
||||
config.blocksize = @as(usize, 8*1024) << @intCast(a[2] - '0'); // 0 = 8k, 1 16k, 2 32k, 3 64k, 4 128k, 5 256k, 6 512k
|
||||
} else if (argConfig(&args, opt)) {}
|
||||
else if (argConfig(&args, opt)) {}
|
||||
else ui.die("Unrecognized option '{s}'.\n", .{opt.val});
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -279,7 +279,7 @@ pub fn createRoot(path: []const u8, stat: *const Stat) *Dir {
|
|||
.out = switch (global.sink) {
|
||||
.mem => .{ .mem = mem_sink.createRoot(path, stat) },
|
||||
.json => .{ .json = json_export.createRoot(path, stat) },
|
||||
.bin => .{ .bin = bin_export.createRoot(stat) },
|
||||
.bin => .{ .bin = bin_export.createRoot(stat, global.threads) },
|
||||
},
|
||||
};
|
||||
return d;
|
||||
|
|
|
|||
Loading…
Reference in a new issue