mirror of
https://code.blicky.net/yorhel/ncdu.git
synced 2026-01-12 17:08:39 -09:00
Support writing zstd-compressed json, add --compress option
This commit is contained in:
parent
0e6967498f
commit
df5845baad
3 changed files with 71 additions and 12 deletions
27
ncdu.1
27
ncdu.1
|
|
@ -1,6 +1,6 @@
|
|||
.\" SPDX-FileCopyrightText: Yorhel <projects@yorhel.nl>
|
||||
.\" SPDX-License-Identifier: MIT
|
||||
.Dd September 27, 2024
|
||||
.Dd October 26, 2024
|
||||
.Dt NCDU 1
|
||||
.Os
|
||||
.Sh NAME
|
||||
|
|
@ -21,6 +21,7 @@
|
|||
.Op Fl L , \-follow\-symlinks , \-no\-follow\-symlinks
|
||||
.Op Fl \-include\-kernfs , \-exclude\-kernfs
|
||||
.Op Fl t , \-threads Ar num
|
||||
.Op Fl c , \-compress , \-no\-compress
|
||||
.Op Fl \-compress\-level Ar num
|
||||
.Op Fl 0 , 1 , 2
|
||||
.Op Fl q , \-slow\-ui\-updates , \-fast\-ui\-updates
|
||||
|
|
@ -97,6 +98,11 @@ uncompressed, or a little over 100 KiB when compressed with gzip.
|
|||
This scales linearly, so be prepared to handle a few tens of megabytes when
|
||||
dealing with millions of files.
|
||||
.Pp
|
||||
Consider enabling
|
||||
.Fl c
|
||||
to output Zstandard-compressed JSON, which can significantly reduce size of the
|
||||
exported data.
|
||||
.Pp
|
||||
When running a multi-threaded scan or when scanning a directory tree that may
|
||||
not fit in memory, consider using
|
||||
.Fl O
|
||||
|
|
@ -187,10 +193,14 @@ The binary format (see
|
|||
.Fl O )
|
||||
does not have this problem and supports efficient exporting with any number of
|
||||
threads.
|
||||
.It Fl c , \-compress , \-no\-compress
|
||||
Enable or disable Zstandard compression when exporting to JSON (see
|
||||
.Fl o )
|
||||
.It Fl \-compress\-level Ar num
|
||||
Set the Zstandard compression level when using
|
||||
.Fl O
|
||||
to create a binary export.
|
||||
or
|
||||
.Fl c .
|
||||
Valid values are 1 (fastest) to 19 (slowest).
|
||||
Defaults to 4.
|
||||
.El
|
||||
|
|
@ -495,9 +505,9 @@ you'll want to use
|
|||
Since scanning a large directory may take a while, you can scan a directory and
|
||||
export the results for later viewing:
|
||||
.Bd -literal -offset indent
|
||||
ncdu \-1xo\- / | gzip >export.gz
|
||||
ncdu \-1cxo export.json.zst /
|
||||
# ...some time later:
|
||||
zcat export.gz | ncdu \-f\-
|
||||
ncdu \-f export.json.zst
|
||||
.Ed
|
||||
To export from a cron job, make sure to replace
|
||||
.Fl 1
|
||||
|
|
@ -506,15 +516,10 @@ with
|
|||
to suppress any unnecessary output.
|
||||
.Pp
|
||||
You can also export a directory and browse it once scanning is done:
|
||||
.Dl ncdu \-o\- | tee export.file | ./ncdu \-f\-
|
||||
The same is possible with gzip compression, but is a bit kludgey:
|
||||
.Dl ncdu \-o\- | gzip | tee export.gz | gunzip | ./ncdu \-f\-
|
||||
.Dl ncdu \-co\- | tee export.json.zst | ./ncdu \-f\-
|
||||
.Pp
|
||||
To scan a system remotely, but browse through the files locally:
|
||||
.Dl ssh \-C user@system ncdu \-o\- / | ./ncdu \-f\-
|
||||
The
|
||||
.Fl C
|
||||
option to ssh enables compression, which will be very useful over slow links.
|
||||
.Dl ssh user@system ncdu \-co\- / | ./ncdu \-cf\-
|
||||
Remote scanning and local viewing has two major advantages when
|
||||
compared to running
|
||||
.Nm
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ const model = @import("model.zig");
|
|||
const sink = @import("sink.zig");
|
||||
const util = @import("util.zig");
|
||||
const ui = @import("ui.zig");
|
||||
const c = @import("c.zig").c;
|
||||
|
||||
// JSON output is necessarily single-threaded and items MUST be added depth-first.
|
||||
|
||||
|
|
@ -14,8 +15,55 @@ pub const global = struct {
|
|||
var writer: *Writer = undefined;
|
||||
};
|
||||
|
||||
|
||||
const ZstdWriter = struct {
|
||||
ctx: ?*c.ZSTD_CStream,
|
||||
out: c.ZSTD_outBuffer,
|
||||
outbuf: [c.ZSTD_BLOCKSIZE_MAX + 64]u8,
|
||||
|
||||
fn create() *ZstdWriter {
|
||||
const w = main.allocator.create(ZstdWriter) catch unreachable;
|
||||
w.out = .{
|
||||
.dst = &w.outbuf,
|
||||
.size = w.outbuf.len,
|
||||
.pos = 0,
|
||||
};
|
||||
while (true) {
|
||||
w.ctx = c.ZSTD_createCStream();
|
||||
if (w.ctx != null) break;
|
||||
ui.oom();
|
||||
}
|
||||
_ = c.ZSTD_CCtx_setParameter(w.ctx, c.ZSTD_c_compressionLevel, main.config.complevel);
|
||||
return w;
|
||||
}
|
||||
|
||||
fn destroy(w: *ZstdWriter) void {
|
||||
_ = c.ZSTD_freeCStream(w.ctx);
|
||||
main.allocator.destroy(w);
|
||||
}
|
||||
|
||||
fn write(w: *ZstdWriter, f: std.fs.File, in: []const u8, flush: bool) !void {
|
||||
var arg = c.ZSTD_inBuffer{
|
||||
.src = in.ptr,
|
||||
.size = in.len,
|
||||
.pos = 0,
|
||||
};
|
||||
while (true) {
|
||||
const v = c.ZSTD_compressStream2(w.ctx, &w.out, &arg, if (flush) c.ZSTD_e_end else c.ZSTD_e_continue);
|
||||
if (c.ZSTD_isError(v) != 0) return error.ZstdCompressError;
|
||||
if (flush or w.out.pos > w.outbuf.len / 2) {
|
||||
try f.writeAll(w.outbuf[0..w.out.pos]);
|
||||
w.out.pos = 0;
|
||||
}
|
||||
if (!flush and arg.pos == arg.size) break;
|
||||
if (flush and v == 0) break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
pub const Writer = struct {
|
||||
fd: std.fs.File,
|
||||
zstd: ?*ZstdWriter = null,
|
||||
// Must be large enough to hold PATH_MAX*6 plus some overhead.
|
||||
// (The 6 is because, in the worst case, every byte expands to a "\u####"
|
||||
// escape, and we do pessimistic estimates here in order to avoid checking
|
||||
|
|
@ -29,7 +77,8 @@ pub const Writer = struct {
|
|||
// This can only really happen when the root path exceeds PATH_MAX,
|
||||
// in which case we would probably have error'ed out earlier anyway.
|
||||
if (bytes > ctx.buf.len) ui.die("Error writing JSON export: path too long.\n", .{});
|
||||
ctx.fd.writeAll(ctx.buf[0..ctx.off]) catch |e|
|
||||
const buf = ctx.buf[0..ctx.off];
|
||||
(if (ctx.zstd) |z| z.write(ctx.fd, buf, bytes == 0) else ctx.fd.writeAll(buf)) catch |e|
|
||||
ui.die("Error writing to file: {s}.\n", .{ ui.errorString(e) });
|
||||
ctx.off = 0;
|
||||
}
|
||||
|
|
@ -92,6 +141,7 @@ pub const Writer = struct {
|
|||
fn init(out: std.fs.File) *Writer {
|
||||
var ctx = main.allocator.create(Writer) catch unreachable;
|
||||
ctx.* = .{ .fd = out };
|
||||
if (main.config.compress) ctx.zstd = ZstdWriter.create();
|
||||
ctx.write("[1,2,{\"progname\":\"ncdu\",\"progver\":\"" ++ main.program_version ++ "\",\"timestamp\":");
|
||||
ctx.writeUint(@intCast(@max(0, std.time.timestamp())));
|
||||
ctx.writeByte('}');
|
||||
|
|
@ -210,6 +260,7 @@ pub fn createRoot(path: []const u8, stat: *const sink.Stat) Dir {
|
|||
pub fn done() void {
|
||||
global.writer.write("]\n");
|
||||
global.writer.flush(0);
|
||||
if (global.writer.zstd) |z| z.destroy();
|
||||
global.writer.fd.close();
|
||||
main.allocator.destroy(global.writer);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -81,6 +81,7 @@ pub const config = struct {
|
|||
pub var exclude_patterns: std.ArrayList([:0]const u8) = std.ArrayList([:0]const u8).init(allocator);
|
||||
pub var threads: usize = 1;
|
||||
pub var complevel: u8 = 4;
|
||||
pub var compress: bool = false;
|
||||
|
||||
pub var update_delay: u64 = 100*std.time.ns_per_ms;
|
||||
pub var scan_ui: ?enum { none, line, full } = null;
|
||||
|
|
@ -276,6 +277,8 @@ fn argConfig(args: *Args, opt: Args.Option) bool {
|
|||
else if (opt.is("--include-caches")) config.exclude_caches = false
|
||||
else if (opt.is("--exclude-kernfs")) config.exclude_kernfs = true
|
||||
else if (opt.is("--include-kernfs")) config.exclude_kernfs = false
|
||||
else if (opt.is("-c") or opt.is("--compress")) config.compress = true
|
||||
else if (opt.is("--no-compress")) config.compress = false
|
||||
else if (opt.is("--compress-level")) {
|
||||
const val = args.arg();
|
||||
config.complevel = std.fmt.parseInt(u8, val, 10) catch ui.die("Invalid number for --compress-level: {s}.\n", .{val});
|
||||
|
|
|
|||
Loading…
Reference in a new issue