mirror of
https://code.blicky.net/yorhel/ncdu.git
synced 2026-01-12 17:08:39 -09:00
Support writing zstd-compressed json, add --compress option
This commit is contained in:
parent
0e6967498f
commit
df5845baad
3 changed files with 71 additions and 12 deletions
27
ncdu.1
27
ncdu.1
|
|
@ -1,6 +1,6 @@
|
||||||
.\" SPDX-FileCopyrightText: Yorhel <projects@yorhel.nl>
|
.\" SPDX-FileCopyrightText: Yorhel <projects@yorhel.nl>
|
||||||
.\" SPDX-License-Identifier: MIT
|
.\" SPDX-License-Identifier: MIT
|
||||||
.Dd September 27, 2024
|
.Dd October 26, 2024
|
||||||
.Dt NCDU 1
|
.Dt NCDU 1
|
||||||
.Os
|
.Os
|
||||||
.Sh NAME
|
.Sh NAME
|
||||||
|
|
@ -21,6 +21,7 @@
|
||||||
.Op Fl L , \-follow\-symlinks , \-no\-follow\-symlinks
|
.Op Fl L , \-follow\-symlinks , \-no\-follow\-symlinks
|
||||||
.Op Fl \-include\-kernfs , \-exclude\-kernfs
|
.Op Fl \-include\-kernfs , \-exclude\-kernfs
|
||||||
.Op Fl t , \-threads Ar num
|
.Op Fl t , \-threads Ar num
|
||||||
|
.Op Fl c , \-compress , \-no\-compress
|
||||||
.Op Fl \-compress\-level Ar num
|
.Op Fl \-compress\-level Ar num
|
||||||
.Op Fl 0 , 1 , 2
|
.Op Fl 0 , 1 , 2
|
||||||
.Op Fl q , \-slow\-ui\-updates , \-fast\-ui\-updates
|
.Op Fl q , \-slow\-ui\-updates , \-fast\-ui\-updates
|
||||||
|
|
@ -97,6 +98,11 @@ uncompressed, or a little over 100 KiB when compressed with gzip.
|
||||||
This scales linearly, so be prepared to handle a few tens of megabytes when
|
This scales linearly, so be prepared to handle a few tens of megabytes when
|
||||||
dealing with millions of files.
|
dealing with millions of files.
|
||||||
.Pp
|
.Pp
|
||||||
|
Consider enabling
|
||||||
|
.Fl c
|
||||||
|
to output Zstandard-compressed JSON, which can significantly reduce size of the
|
||||||
|
exported data.
|
||||||
|
.Pp
|
||||||
When running a multi-threaded scan or when scanning a directory tree that may
|
When running a multi-threaded scan or when scanning a directory tree that may
|
||||||
not fit in memory, consider using
|
not fit in memory, consider using
|
||||||
.Fl O
|
.Fl O
|
||||||
|
|
@ -187,10 +193,14 @@ The binary format (see
|
||||||
.Fl O )
|
.Fl O )
|
||||||
does not have this problem and supports efficient exporting with any number of
|
does not have this problem and supports efficient exporting with any number of
|
||||||
threads.
|
threads.
|
||||||
|
.It Fl c , \-compress , \-no\-compress
|
||||||
|
Enable or disable Zstandard compression when exporting to JSON (see
|
||||||
|
.Fl o )
|
||||||
.It Fl \-compress\-level Ar num
|
.It Fl \-compress\-level Ar num
|
||||||
Set the Zstandard compression level when using
|
Set the Zstandard compression level when using
|
||||||
.Fl O
|
.Fl O
|
||||||
to create a binary export.
|
or
|
||||||
|
.Fl c .
|
||||||
Valid values are 1 (fastest) to 19 (slowest).
|
Valid values are 1 (fastest) to 19 (slowest).
|
||||||
Defaults to 4.
|
Defaults to 4.
|
||||||
.El
|
.El
|
||||||
|
|
@ -495,9 +505,9 @@ you'll want to use
|
||||||
Since scanning a large directory may take a while, you can scan a directory and
|
Since scanning a large directory may take a while, you can scan a directory and
|
||||||
export the results for later viewing:
|
export the results for later viewing:
|
||||||
.Bd -literal -offset indent
|
.Bd -literal -offset indent
|
||||||
ncdu \-1xo\- / | gzip >export.gz
|
ncdu \-1cxo export.json.zst /
|
||||||
# ...some time later:
|
# ...some time later:
|
||||||
zcat export.gz | ncdu \-f\-
|
ncdu \-f export.json.zst
|
||||||
.Ed
|
.Ed
|
||||||
To export from a cron job, make sure to replace
|
To export from a cron job, make sure to replace
|
||||||
.Fl 1
|
.Fl 1
|
||||||
|
|
@ -506,15 +516,10 @@ with
|
||||||
to suppress any unnecessary output.
|
to suppress any unnecessary output.
|
||||||
.Pp
|
.Pp
|
||||||
You can also export a directory and browse it once scanning is done:
|
You can also export a directory and browse it once scanning is done:
|
||||||
.Dl ncdu \-o\- | tee export.file | ./ncdu \-f\-
|
.Dl ncdu \-co\- | tee export.json.zst | ./ncdu \-f\-
|
||||||
The same is possible with gzip compression, but is a bit kludgey:
|
|
||||||
.Dl ncdu \-o\- | gzip | tee export.gz | gunzip | ./ncdu \-f\-
|
|
||||||
.Pp
|
.Pp
|
||||||
To scan a system remotely, but browse through the files locally:
|
To scan a system remotely, but browse through the files locally:
|
||||||
.Dl ssh \-C user@system ncdu \-o\- / | ./ncdu \-f\-
|
.Dl ssh user@system ncdu \-co\- / | ./ncdu \-cf\-
|
||||||
The
|
|
||||||
.Fl C
|
|
||||||
option to ssh enables compression, which will be very useful over slow links.
|
|
||||||
Remote scanning and local viewing has two major advantages when
|
Remote scanning and local viewing has two major advantages when
|
||||||
compared to running
|
compared to running
|
||||||
.Nm
|
.Nm
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ const model = @import("model.zig");
|
||||||
const sink = @import("sink.zig");
|
const sink = @import("sink.zig");
|
||||||
const util = @import("util.zig");
|
const util = @import("util.zig");
|
||||||
const ui = @import("ui.zig");
|
const ui = @import("ui.zig");
|
||||||
|
const c = @import("c.zig").c;
|
||||||
|
|
||||||
// JSON output is necessarily single-threaded and items MUST be added depth-first.
|
// JSON output is necessarily single-threaded and items MUST be added depth-first.
|
||||||
|
|
||||||
|
|
@ -14,8 +15,55 @@ pub const global = struct {
|
||||||
var writer: *Writer = undefined;
|
var writer: *Writer = undefined;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
const ZstdWriter = struct {
|
||||||
|
ctx: ?*c.ZSTD_CStream,
|
||||||
|
out: c.ZSTD_outBuffer,
|
||||||
|
outbuf: [c.ZSTD_BLOCKSIZE_MAX + 64]u8,
|
||||||
|
|
||||||
|
fn create() *ZstdWriter {
|
||||||
|
const w = main.allocator.create(ZstdWriter) catch unreachable;
|
||||||
|
w.out = .{
|
||||||
|
.dst = &w.outbuf,
|
||||||
|
.size = w.outbuf.len,
|
||||||
|
.pos = 0,
|
||||||
|
};
|
||||||
|
while (true) {
|
||||||
|
w.ctx = c.ZSTD_createCStream();
|
||||||
|
if (w.ctx != null) break;
|
||||||
|
ui.oom();
|
||||||
|
}
|
||||||
|
_ = c.ZSTD_CCtx_setParameter(w.ctx, c.ZSTD_c_compressionLevel, main.config.complevel);
|
||||||
|
return w;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn destroy(w: *ZstdWriter) void {
|
||||||
|
_ = c.ZSTD_freeCStream(w.ctx);
|
||||||
|
main.allocator.destroy(w);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write(w: *ZstdWriter, f: std.fs.File, in: []const u8, flush: bool) !void {
|
||||||
|
var arg = c.ZSTD_inBuffer{
|
||||||
|
.src = in.ptr,
|
||||||
|
.size = in.len,
|
||||||
|
.pos = 0,
|
||||||
|
};
|
||||||
|
while (true) {
|
||||||
|
const v = c.ZSTD_compressStream2(w.ctx, &w.out, &arg, if (flush) c.ZSTD_e_end else c.ZSTD_e_continue);
|
||||||
|
if (c.ZSTD_isError(v) != 0) return error.ZstdCompressError;
|
||||||
|
if (flush or w.out.pos > w.outbuf.len / 2) {
|
||||||
|
try f.writeAll(w.outbuf[0..w.out.pos]);
|
||||||
|
w.out.pos = 0;
|
||||||
|
}
|
||||||
|
if (!flush and arg.pos == arg.size) break;
|
||||||
|
if (flush and v == 0) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
pub const Writer = struct {
|
pub const Writer = struct {
|
||||||
fd: std.fs.File,
|
fd: std.fs.File,
|
||||||
|
zstd: ?*ZstdWriter = null,
|
||||||
// Must be large enough to hold PATH_MAX*6 plus some overhead.
|
// Must be large enough to hold PATH_MAX*6 plus some overhead.
|
||||||
// (The 6 is because, in the worst case, every byte expands to a "\u####"
|
// (The 6 is because, in the worst case, every byte expands to a "\u####"
|
||||||
// escape, and we do pessimistic estimates here in order to avoid checking
|
// escape, and we do pessimistic estimates here in order to avoid checking
|
||||||
|
|
@ -29,7 +77,8 @@ pub const Writer = struct {
|
||||||
// This can only really happen when the root path exceeds PATH_MAX,
|
// This can only really happen when the root path exceeds PATH_MAX,
|
||||||
// in which case we would probably have error'ed out earlier anyway.
|
// in which case we would probably have error'ed out earlier anyway.
|
||||||
if (bytes > ctx.buf.len) ui.die("Error writing JSON export: path too long.\n", .{});
|
if (bytes > ctx.buf.len) ui.die("Error writing JSON export: path too long.\n", .{});
|
||||||
ctx.fd.writeAll(ctx.buf[0..ctx.off]) catch |e|
|
const buf = ctx.buf[0..ctx.off];
|
||||||
|
(if (ctx.zstd) |z| z.write(ctx.fd, buf, bytes == 0) else ctx.fd.writeAll(buf)) catch |e|
|
||||||
ui.die("Error writing to file: {s}.\n", .{ ui.errorString(e) });
|
ui.die("Error writing to file: {s}.\n", .{ ui.errorString(e) });
|
||||||
ctx.off = 0;
|
ctx.off = 0;
|
||||||
}
|
}
|
||||||
|
|
@ -92,6 +141,7 @@ pub const Writer = struct {
|
||||||
fn init(out: std.fs.File) *Writer {
|
fn init(out: std.fs.File) *Writer {
|
||||||
var ctx = main.allocator.create(Writer) catch unreachable;
|
var ctx = main.allocator.create(Writer) catch unreachable;
|
||||||
ctx.* = .{ .fd = out };
|
ctx.* = .{ .fd = out };
|
||||||
|
if (main.config.compress) ctx.zstd = ZstdWriter.create();
|
||||||
ctx.write("[1,2,{\"progname\":\"ncdu\",\"progver\":\"" ++ main.program_version ++ "\",\"timestamp\":");
|
ctx.write("[1,2,{\"progname\":\"ncdu\",\"progver\":\"" ++ main.program_version ++ "\",\"timestamp\":");
|
||||||
ctx.writeUint(@intCast(@max(0, std.time.timestamp())));
|
ctx.writeUint(@intCast(@max(0, std.time.timestamp())));
|
||||||
ctx.writeByte('}');
|
ctx.writeByte('}');
|
||||||
|
|
@ -210,6 +260,7 @@ pub fn createRoot(path: []const u8, stat: *const sink.Stat) Dir {
|
||||||
pub fn done() void {
|
pub fn done() void {
|
||||||
global.writer.write("]\n");
|
global.writer.write("]\n");
|
||||||
global.writer.flush(0);
|
global.writer.flush(0);
|
||||||
|
if (global.writer.zstd) |z| z.destroy();
|
||||||
global.writer.fd.close();
|
global.writer.fd.close();
|
||||||
main.allocator.destroy(global.writer);
|
main.allocator.destroy(global.writer);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -81,6 +81,7 @@ pub const config = struct {
|
||||||
pub var exclude_patterns: std.ArrayList([:0]const u8) = std.ArrayList([:0]const u8).init(allocator);
|
pub var exclude_patterns: std.ArrayList([:0]const u8) = std.ArrayList([:0]const u8).init(allocator);
|
||||||
pub var threads: usize = 1;
|
pub var threads: usize = 1;
|
||||||
pub var complevel: u8 = 4;
|
pub var complevel: u8 = 4;
|
||||||
|
pub var compress: bool = false;
|
||||||
|
|
||||||
pub var update_delay: u64 = 100*std.time.ns_per_ms;
|
pub var update_delay: u64 = 100*std.time.ns_per_ms;
|
||||||
pub var scan_ui: ?enum { none, line, full } = null;
|
pub var scan_ui: ?enum { none, line, full } = null;
|
||||||
|
|
@ -276,6 +277,8 @@ fn argConfig(args: *Args, opt: Args.Option) bool {
|
||||||
else if (opt.is("--include-caches")) config.exclude_caches = false
|
else if (opt.is("--include-caches")) config.exclude_caches = false
|
||||||
else if (opt.is("--exclude-kernfs")) config.exclude_kernfs = true
|
else if (opt.is("--exclude-kernfs")) config.exclude_kernfs = true
|
||||||
else if (opt.is("--include-kernfs")) config.exclude_kernfs = false
|
else if (opt.is("--include-kernfs")) config.exclude_kernfs = false
|
||||||
|
else if (opt.is("-c") or opt.is("--compress")) config.compress = true
|
||||||
|
else if (opt.is("--no-compress")) config.compress = false
|
||||||
else if (opt.is("--compress-level")) {
|
else if (opt.is("--compress-level")) {
|
||||||
const val = args.arg();
|
const val = args.arg();
|
||||||
config.complevel = std.fmt.parseInt(u8, val, 10) catch ui.die("Invalid number for --compress-level: {s}.\n", .{val});
|
config.complevel = std.fmt.parseInt(u8, val, 10) catch ui.die("Invalid number for --compress-level: {s}.\n", .{val});
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue