Support writing zstd-compressed json, add --compress option

This commit is contained in:
Yorhel 2024-10-26 19:30:09 +02:00
parent 0e6967498f
commit df5845baad
3 changed files with 71 additions and 12 deletions

27
ncdu.1
View file

@ -1,6 +1,6 @@
.\" SPDX-FileCopyrightText: Yorhel <projects@yorhel.nl> .\" SPDX-FileCopyrightText: Yorhel <projects@yorhel.nl>
.\" SPDX-License-Identifier: MIT .\" SPDX-License-Identifier: MIT
.Dd September 27, 2024 .Dd October 26, 2024
.Dt NCDU 1 .Dt NCDU 1
.Os .Os
.Sh NAME .Sh NAME
@ -21,6 +21,7 @@
.Op Fl L , \-follow\-symlinks , \-no\-follow\-symlinks .Op Fl L , \-follow\-symlinks , \-no\-follow\-symlinks
.Op Fl \-include\-kernfs , \-exclude\-kernfs .Op Fl \-include\-kernfs , \-exclude\-kernfs
.Op Fl t , \-threads Ar num .Op Fl t , \-threads Ar num
.Op Fl c , \-compress , \-no\-compress
.Op Fl \-compress\-level Ar num .Op Fl \-compress\-level Ar num
.Op Fl 0 , 1 , 2 .Op Fl 0 , 1 , 2
.Op Fl q , \-slow\-ui\-updates , \-fast\-ui\-updates .Op Fl q , \-slow\-ui\-updates , \-fast\-ui\-updates
@ -97,6 +98,11 @@ uncompressed, or a little over 100 KiB when compressed with gzip.
This scales linearly, so be prepared to handle a few tens of megabytes when This scales linearly, so be prepared to handle a few tens of megabytes when
dealing with millions of files. dealing with millions of files.
.Pp .Pp
Consider enabling
.Fl c
to output Zstandard-compressed JSON, which can significantly reduce size of the
exported data.
.Pp
When running a multi-threaded scan or when scanning a directory tree that may When running a multi-threaded scan or when scanning a directory tree that may
not fit in memory, consider using not fit in memory, consider using
.Fl O .Fl O
@ -187,10 +193,14 @@ The binary format (see
.Fl O ) .Fl O )
does not have this problem and supports efficient exporting with any number of does not have this problem and supports efficient exporting with any number of
threads. threads.
.It Fl c , \-compress , \-no\-compress
Enable or disable Zstandard compression when exporting to JSON (see
.Fl o )
.It Fl \-compress\-level Ar num .It Fl \-compress\-level Ar num
Set the Zstandard compression level when using Set the Zstandard compression level when using
.Fl O .Fl O
to create a binary export. or
.Fl c .
Valid values are 1 (fastest) to 19 (slowest). Valid values are 1 (fastest) to 19 (slowest).
Defaults to 4. Defaults to 4.
.El .El
@ -495,9 +505,9 @@ you'll want to use
Since scanning a large directory may take a while, you can scan a directory and Since scanning a large directory may take a while, you can scan a directory and
export the results for later viewing: export the results for later viewing:
.Bd -literal -offset indent .Bd -literal -offset indent
ncdu \-1xo\- / | gzip >export.gz ncdu \-1cxo export.json.zst /
# ...some time later: # ...some time later:
zcat export.gz | ncdu \-f\- ncdu \-f export.json.zst
.Ed .Ed
To export from a cron job, make sure to replace To export from a cron job, make sure to replace
.Fl 1 .Fl 1
@ -506,15 +516,10 @@ with
to suppress any unnecessary output. to suppress any unnecessary output.
.Pp .Pp
You can also export a directory and browse it once scanning is done: You can also export a directory and browse it once scanning is done:
.Dl ncdu \-o\- | tee export.file | ./ncdu \-f\- .Dl ncdu \-co\- | tee export.json.zst | ./ncdu \-f\-
The same is possible with gzip compression, but is a bit kludgey:
.Dl ncdu \-o\- | gzip | tee export.gz | gunzip | ./ncdu \-f\-
.Pp .Pp
To scan a system remotely, but browse through the files locally: To scan a system remotely, but browse through the files locally:
.Dl ssh \-C user@system ncdu \-o\- / | ./ncdu \-f\- .Dl ssh user@system ncdu \-co\- / | ./ncdu \-cf\-
The
.Fl C
option to ssh enables compression, which will be very useful over slow links.
Remote scanning and local viewing has two major advantages when Remote scanning and local viewing has two major advantages when
compared to running compared to running
.Nm .Nm

View file

@ -7,6 +7,7 @@ const model = @import("model.zig");
const sink = @import("sink.zig"); const sink = @import("sink.zig");
const util = @import("util.zig"); const util = @import("util.zig");
const ui = @import("ui.zig"); const ui = @import("ui.zig");
const c = @import("c.zig").c;
// JSON output is necessarily single-threaded and items MUST be added depth-first. // JSON output is necessarily single-threaded and items MUST be added depth-first.
@ -14,8 +15,55 @@ pub const global = struct {
var writer: *Writer = undefined; var writer: *Writer = undefined;
}; };
const ZstdWriter = struct {
ctx: ?*c.ZSTD_CStream,
out: c.ZSTD_outBuffer,
outbuf: [c.ZSTD_BLOCKSIZE_MAX + 64]u8,
fn create() *ZstdWriter {
const w = main.allocator.create(ZstdWriter) catch unreachable;
w.out = .{
.dst = &w.outbuf,
.size = w.outbuf.len,
.pos = 0,
};
while (true) {
w.ctx = c.ZSTD_createCStream();
if (w.ctx != null) break;
ui.oom();
}
_ = c.ZSTD_CCtx_setParameter(w.ctx, c.ZSTD_c_compressionLevel, main.config.complevel);
return w;
}
fn destroy(w: *ZstdWriter) void {
_ = c.ZSTD_freeCStream(w.ctx);
main.allocator.destroy(w);
}
fn write(w: *ZstdWriter, f: std.fs.File, in: []const u8, flush: bool) !void {
var arg = c.ZSTD_inBuffer{
.src = in.ptr,
.size = in.len,
.pos = 0,
};
while (true) {
const v = c.ZSTD_compressStream2(w.ctx, &w.out, &arg, if (flush) c.ZSTD_e_end else c.ZSTD_e_continue);
if (c.ZSTD_isError(v) != 0) return error.ZstdCompressError;
if (flush or w.out.pos > w.outbuf.len / 2) {
try f.writeAll(w.outbuf[0..w.out.pos]);
w.out.pos = 0;
}
if (!flush and arg.pos == arg.size) break;
if (flush and v == 0) break;
}
}
};
pub const Writer = struct { pub const Writer = struct {
fd: std.fs.File, fd: std.fs.File,
zstd: ?*ZstdWriter = null,
// Must be large enough to hold PATH_MAX*6 plus some overhead. // Must be large enough to hold PATH_MAX*6 plus some overhead.
// (The 6 is because, in the worst case, every byte expands to a "\u####" // (The 6 is because, in the worst case, every byte expands to a "\u####"
// escape, and we do pessimistic estimates here in order to avoid checking // escape, and we do pessimistic estimates here in order to avoid checking
@ -29,7 +77,8 @@ pub const Writer = struct {
// This can only really happen when the root path exceeds PATH_MAX, // This can only really happen when the root path exceeds PATH_MAX,
// in which case we would probably have error'ed out earlier anyway. // in which case we would probably have error'ed out earlier anyway.
if (bytes > ctx.buf.len) ui.die("Error writing JSON export: path too long.\n", .{}); if (bytes > ctx.buf.len) ui.die("Error writing JSON export: path too long.\n", .{});
ctx.fd.writeAll(ctx.buf[0..ctx.off]) catch |e| const buf = ctx.buf[0..ctx.off];
(if (ctx.zstd) |z| z.write(ctx.fd, buf, bytes == 0) else ctx.fd.writeAll(buf)) catch |e|
ui.die("Error writing to file: {s}.\n", .{ ui.errorString(e) }); ui.die("Error writing to file: {s}.\n", .{ ui.errorString(e) });
ctx.off = 0; ctx.off = 0;
} }
@ -92,6 +141,7 @@ pub const Writer = struct {
fn init(out: std.fs.File) *Writer { fn init(out: std.fs.File) *Writer {
var ctx = main.allocator.create(Writer) catch unreachable; var ctx = main.allocator.create(Writer) catch unreachable;
ctx.* = .{ .fd = out }; ctx.* = .{ .fd = out };
if (main.config.compress) ctx.zstd = ZstdWriter.create();
ctx.write("[1,2,{\"progname\":\"ncdu\",\"progver\":\"" ++ main.program_version ++ "\",\"timestamp\":"); ctx.write("[1,2,{\"progname\":\"ncdu\",\"progver\":\"" ++ main.program_version ++ "\",\"timestamp\":");
ctx.writeUint(@intCast(@max(0, std.time.timestamp()))); ctx.writeUint(@intCast(@max(0, std.time.timestamp())));
ctx.writeByte('}'); ctx.writeByte('}');
@ -210,6 +260,7 @@ pub fn createRoot(path: []const u8, stat: *const sink.Stat) Dir {
pub fn done() void { pub fn done() void {
global.writer.write("]\n"); global.writer.write("]\n");
global.writer.flush(0); global.writer.flush(0);
if (global.writer.zstd) |z| z.destroy();
global.writer.fd.close(); global.writer.fd.close();
main.allocator.destroy(global.writer); main.allocator.destroy(global.writer);
} }

View file

@ -81,6 +81,7 @@ pub const config = struct {
pub var exclude_patterns: std.ArrayList([:0]const u8) = std.ArrayList([:0]const u8).init(allocator); pub var exclude_patterns: std.ArrayList([:0]const u8) = std.ArrayList([:0]const u8).init(allocator);
pub var threads: usize = 1; pub var threads: usize = 1;
pub var complevel: u8 = 4; pub var complevel: u8 = 4;
pub var compress: bool = false;
pub var update_delay: u64 = 100*std.time.ns_per_ms; pub var update_delay: u64 = 100*std.time.ns_per_ms;
pub var scan_ui: ?enum { none, line, full } = null; pub var scan_ui: ?enum { none, line, full } = null;
@ -276,6 +277,8 @@ fn argConfig(args: *Args, opt: Args.Option) bool {
else if (opt.is("--include-caches")) config.exclude_caches = false else if (opt.is("--include-caches")) config.exclude_caches = false
else if (opt.is("--exclude-kernfs")) config.exclude_kernfs = true else if (opt.is("--exclude-kernfs")) config.exclude_kernfs = true
else if (opt.is("--include-kernfs")) config.exclude_kernfs = false else if (opt.is("--include-kernfs")) config.exclude_kernfs = false
else if (opt.is("-c") or opt.is("--compress")) config.compress = true
else if (opt.is("--no-compress")) config.compress = false
else if (opt.is("--compress-level")) { else if (opt.is("--compress-level")) {
const val = args.arg(); const val = args.arg();
config.complevel = std.fmt.parseInt(u8, val, 10) catch ui.die("Invalid number for --compress-level: {s}.\n", .{val}); config.complevel = std.fmt.parseInt(u8, val, 10) catch ui.die("Invalid number for --compress-level: {s}.\n", .{val});