Support writing zstd-compressed json, add --compress option

This commit is contained in:
Yorhel 2024-10-26 19:30:09 +02:00
parent 0e6967498f
commit df5845baad
3 changed files with 71 additions and 12 deletions

27
ncdu.1
View file

@ -1,6 +1,6 @@
.\" SPDX-FileCopyrightText: Yorhel <projects@yorhel.nl>
.\" SPDX-License-Identifier: MIT
.Dd September 27, 2024
.Dd October 26, 2024
.Dt NCDU 1
.Os
.Sh NAME
@ -21,6 +21,7 @@
.Op Fl L , \-follow\-symlinks , \-no\-follow\-symlinks
.Op Fl \-include\-kernfs , \-exclude\-kernfs
.Op Fl t , \-threads Ar num
.Op Fl c , \-compress , \-no\-compress
.Op Fl \-compress\-level Ar num
.Op Fl 0 , 1 , 2
.Op Fl q , \-slow\-ui\-updates , \-fast\-ui\-updates
@ -97,6 +98,11 @@ uncompressed, or a little over 100 KiB when compressed with gzip.
This scales linearly, so be prepared to handle a few tens of megabytes when
dealing with millions of files.
.Pp
Consider enabling
.Fl c
to output Zstandard-compressed JSON, which can significantly reduce size of the
exported data.
.Pp
When running a multi-threaded scan or when scanning a directory tree that may
not fit in memory, consider using
.Fl O
@ -187,10 +193,14 @@ The binary format (see
.Fl O )
does not have this problem and supports efficient exporting with any number of
threads.
.It Fl c , \-compress , \-no\-compress
Enable or disable Zstandard compression when exporting to JSON (see
.Fl o )
.It Fl \-compress\-level Ar num
Set the Zstandard compression level when using
.Fl O
to create a binary export.
or
.Fl c .
Valid values are 1 (fastest) to 19 (slowest).
Defaults to 4.
.El
@ -495,9 +505,9 @@ you'll want to use
Since scanning a large directory may take a while, you can scan a directory and
export the results for later viewing:
.Bd -literal -offset indent
ncdu \-1xo\- / | gzip >export.gz
ncdu \-1cxo export.json.zst /
# ...some time later:
zcat export.gz | ncdu \-f\-
ncdu \-f export.json.zst
.Ed
To export from a cron job, make sure to replace
.Fl 1
@ -506,15 +516,10 @@ with
to suppress any unnecessary output.
.Pp
You can also export a directory and browse it once scanning is done:
.Dl ncdu \-o\- | tee export.file | ./ncdu \-f\-
The same is possible with gzip compression, but is a bit kludgey:
.Dl ncdu \-o\- | gzip | tee export.gz | gunzip | ./ncdu \-f\-
.Dl ncdu \-co\- | tee export.json.zst | ./ncdu \-f\-
.Pp
To scan a system remotely, but browse through the files locally:
.Dl ssh \-C user@system ncdu \-o\- / | ./ncdu \-f\-
The
.Fl C
option to ssh enables compression, which will be very useful over slow links.
.Dl ssh user@system ncdu \-co\- / | ./ncdu \-cf\-
Remote scanning and local viewing has two major advantages when
compared to running
.Nm

View file

@ -7,6 +7,7 @@ const model = @import("model.zig");
const sink = @import("sink.zig");
const util = @import("util.zig");
const ui = @import("ui.zig");
const c = @import("c.zig").c;
// JSON output is necessarily single-threaded and items MUST be added depth-first.
@ -14,8 +15,55 @@ pub const global = struct {
var writer: *Writer = undefined;
};
const ZstdWriter = struct {
ctx: ?*c.ZSTD_CStream,
out: c.ZSTD_outBuffer,
outbuf: [c.ZSTD_BLOCKSIZE_MAX + 64]u8,
fn create() *ZstdWriter {
const w = main.allocator.create(ZstdWriter) catch unreachable;
w.out = .{
.dst = &w.outbuf,
.size = w.outbuf.len,
.pos = 0,
};
while (true) {
w.ctx = c.ZSTD_createCStream();
if (w.ctx != null) break;
ui.oom();
}
_ = c.ZSTD_CCtx_setParameter(w.ctx, c.ZSTD_c_compressionLevel, main.config.complevel);
return w;
}
fn destroy(w: *ZstdWriter) void {
_ = c.ZSTD_freeCStream(w.ctx);
main.allocator.destroy(w);
}
fn write(w: *ZstdWriter, f: std.fs.File, in: []const u8, flush: bool) !void {
var arg = c.ZSTD_inBuffer{
.src = in.ptr,
.size = in.len,
.pos = 0,
};
while (true) {
const v = c.ZSTD_compressStream2(w.ctx, &w.out, &arg, if (flush) c.ZSTD_e_end else c.ZSTD_e_continue);
if (c.ZSTD_isError(v) != 0) return error.ZstdCompressError;
if (flush or w.out.pos > w.outbuf.len / 2) {
try f.writeAll(w.outbuf[0..w.out.pos]);
w.out.pos = 0;
}
if (!flush and arg.pos == arg.size) break;
if (flush and v == 0) break;
}
}
};
pub const Writer = struct {
fd: std.fs.File,
zstd: ?*ZstdWriter = null,
// Must be large enough to hold PATH_MAX*6 plus some overhead.
// (The 6 is because, in the worst case, every byte expands to a "\u####"
// escape, and we do pessimistic estimates here in order to avoid checking
@ -29,7 +77,8 @@ pub const Writer = struct {
// This can only really happen when the root path exceeds PATH_MAX,
// in which case we would probably have error'ed out earlier anyway.
if (bytes > ctx.buf.len) ui.die("Error writing JSON export: path too long.\n", .{});
ctx.fd.writeAll(ctx.buf[0..ctx.off]) catch |e|
const buf = ctx.buf[0..ctx.off];
(if (ctx.zstd) |z| z.write(ctx.fd, buf, bytes == 0) else ctx.fd.writeAll(buf)) catch |e|
ui.die("Error writing to file: {s}.\n", .{ ui.errorString(e) });
ctx.off = 0;
}
@ -92,6 +141,7 @@ pub const Writer = struct {
fn init(out: std.fs.File) *Writer {
var ctx = main.allocator.create(Writer) catch unreachable;
ctx.* = .{ .fd = out };
if (main.config.compress) ctx.zstd = ZstdWriter.create();
ctx.write("[1,2,{\"progname\":\"ncdu\",\"progver\":\"" ++ main.program_version ++ "\",\"timestamp\":");
ctx.writeUint(@intCast(@max(0, std.time.timestamp())));
ctx.writeByte('}');
@ -210,6 +260,7 @@ pub fn createRoot(path: []const u8, stat: *const sink.Stat) Dir {
pub fn done() void {
global.writer.write("]\n");
global.writer.flush(0);
if (global.writer.zstd) |z| z.destroy();
global.writer.fd.close();
main.allocator.destroy(global.writer);
}

View file

@ -81,6 +81,7 @@ pub const config = struct {
pub var exclude_patterns: std.ArrayList([:0]const u8) = std.ArrayList([:0]const u8).init(allocator);
pub var threads: usize = 1;
pub var complevel: u8 = 4;
pub var compress: bool = false;
pub var update_delay: u64 = 100*std.time.ns_per_ms;
pub var scan_ui: ?enum { none, line, full } = null;
@ -276,6 +277,8 @@ fn argConfig(args: *Args, opt: Args.Option) bool {
else if (opt.is("--include-caches")) config.exclude_caches = false
else if (opt.is("--exclude-kernfs")) config.exclude_kernfs = true
else if (opt.is("--include-kernfs")) config.exclude_kernfs = false
else if (opt.is("-c") or opt.is("--compress")) config.compress = true
else if (opt.is("--no-compress")) config.compress = false
else if (opt.is("--compress-level")) {
const val = args.arg();
config.complevel = std.fmt.parseInt(u8, val, 10) catch ui.die("Invalid number for --compress-level: {s}.\n", .{val});