Split mem import and json export out of sink.zig

Mainly to make room for another export format, though that'll take a lot
more experimenting before it'll get anywhere.
This commit is contained in:
Yorhel 2024-07-27 11:58:06 +02:00
parent f6bffa40c7
commit 3c055810d0
5 changed files with 531 additions and 481 deletions

View file

@ -5,6 +5,7 @@ const std = @import("std");
const main = @import("main.zig"); const main = @import("main.zig");
const model = @import("model.zig"); const model = @import("model.zig");
const sink = @import("sink.zig"); const sink = @import("sink.zig");
const mem_sink = @import("mem_sink.zig");
const delete = @import("delete.zig"); const delete = @import("delete.zig");
const ui = @import("ui.zig"); const ui = @import("ui.zig");
const c = @cImport(@cInclude("time.h")); const c = @cImport(@cInclude("time.h"));
@ -845,7 +846,8 @@ pub fn keyInput(ch: i32) void {
message = "Directory refresh feature disabled." message = "Directory refresh feature disabled."
else { else {
main.state = .refresh; main.state = .refresh;
sink.state.out = sink.state.Out{ .mem = dir_parent }; sink.global.sink = .mem;
mem_sink.global.root = dir_parent;
} }
}, },
'b' => { 'b' => {

214
src/json_export.zig Normal file
View file

@ -0,0 +1,214 @@
// SPDX-FileCopyrightText: Yorhel <projects@yorhel.nl>
// SPDX-License-Identifier: MIT
const std = @import("std");
const main = @import("main.zig");
const sink = @import("sink.zig");
const util = @import("util.zig");
const ui = @import("ui.zig");
// JSON output is necessarily single-threaded and items MUST be added depth-first.
pub const global = struct {
var writer: *Writer = undefined;
};
pub const Writer = struct {
fd: std.fs.File,
// Must be large enough to hold PATH_MAX*6 plus some overhead.
// (The 6 is because, in the worst case, every byte expands to a "\u####"
// escape, and we do pessimistic estimates here in order to avoid checking
// buffer lengths for each and every write operation)
buf: [64*1024]u8 = undefined,
off: usize = 0,
dir_entry_open: bool = false,
fn flush(ctx: *Writer, bytes: usize) void {
@setCold(true);
// This can only really happen when the root path exceeds PATH_MAX,
// in which case we would probably have error'ed out earlier anyway.
if (bytes > ctx.buf.len) ui.die("Error writing JSON export: path too long.\n", .{});
ctx.fd.writeAll(ctx.buf[0..ctx.off]) catch |e|
ui.die("Error writing to file: {s}.\n", .{ ui.errorString(e) });
ctx.off = 0;
}
fn ensureSpace(ctx: *Writer, bytes: usize) void {
if (bytes > ctx.buf.len - ctx.off) ctx.flush(bytes);
}
fn write(ctx: *Writer, s: []const u8) void {
@memcpy(ctx.buf[ctx.off..][0..s.len], s);
ctx.off += s.len;
}
fn writeByte(ctx: *Writer, b: u8) void {
ctx.buf[ctx.off] = b;
ctx.off += 1;
}
// Write escaped string contents, excluding the quotes.
fn writeStr(ctx: *Writer, s: []const u8) void {
for (s) |b| {
if (b >= 0x20 and b != '"' and b != '\\' and b != 127) ctx.writeByte(b)
else switch (b) {
'\n' => ctx.write("\\n"),
'\r' => ctx.write("\\r"),
0x8 => ctx.write("\\b"),
'\t' => ctx.write("\\t"),
0xC => ctx.write("\\f"),
'\\' => ctx.write("\\\\"),
'"' => ctx.write("\\\""),
else => {
ctx.write("\\u00");
const hexdig = "0123456789abcdef";
ctx.writeByte(hexdig[b>>4]);
ctx.writeByte(hexdig[b&0xf]);
},
}
}
}
fn writeUint(ctx: *Writer, n: u64) void {
// Based on std.fmt.formatInt
var a = n;
var buf: [24]u8 = undefined;
var index: usize = buf.len;
while (a >= 100) : (a = @divTrunc(a, 100)) {
index -= 2;
buf[index..][0..2].* = std.fmt.digits2(@as(usize, @intCast(a % 100)));
}
if (a < 10) {
index -= 1;
buf[index] = '0' + @as(u8, @intCast(a));
} else {
index -= 2;
buf[index..][0..2].* = std.fmt.digits2(@as(usize, @intCast(a)));
}
ctx.write(buf[index..]);
}
fn init(out: std.fs.File) *Writer {
var ctx = main.allocator.create(Writer) catch unreachable;
ctx.* = .{ .fd = out };
ctx.write("[1,2,{\"progname\":\"ncdu\",\"progver\":\"" ++ main.program_version ++ "\",\"timestamp\":");
ctx.writeUint(@intCast(@max(0, std.time.timestamp())));
ctx.writeByte('}');
return ctx;
}
// A newly written directory entry is left "open", i.e. the '}' to close
// the item object is not written, to allow for a setReadError() to be
// caught if one happens before the first sub entry.
// Any read errors after the first sub entry are thrown away, but that's
// just a limitation of the JSON format.
fn closeDirEntry(ctx: *Writer, rderr: bool) void {
if (ctx.dir_entry_open) {
ctx.dir_entry_open = false;
if (rderr) ctx.write(",\"read_error\":true");
ctx.writeByte('}');
}
}
fn writeSpecial(ctx: *Writer, name: []const u8, t: sink.Special) void {
ctx.closeDirEntry(false);
ctx.ensureSpace(name.len*6 + 1000);
// not necessarily correct, but mimics model.Entry.isDirectory()
const isdir = switch (t) {
.other_fs, .kernfs => true,
.err, .excluded => false,
};
ctx.write(if (isdir) ",\n[{\"name\":\"" else ",\n{\"name\":\"");
ctx.writeStr(name);
ctx.write(switch (t) {
.err => "\",\"read_error\":true}",
.other_fs => "\",\"excluded\":\"otherfs\"}",
.kernfs => "\",\"excluded\":\"kernfs\"}",
.excluded => "\",\"excluded\":\"pattern\"}",
});
if (isdir) ctx.writeByte(']');
}
fn writeStat(ctx: *Writer, name: []const u8, stat: *const sink.Stat, parent_dev: u64) void {
ctx.ensureSpace(name.len*6 + 1000);
ctx.write(if (stat.dir) ",\n[{\"name\":\"" else ",\n{\"name\":\"");
ctx.writeStr(name);
ctx.writeByte('"');
if (stat.size > 0) {
ctx.write(",\"asize\":");
ctx.writeUint(stat.size);
}
if (stat.blocks > 0) {
ctx.write(",\"dsize\":");
ctx.writeUint(util.blocksToSize(stat.blocks));
}
if (stat.dir and stat.dev != parent_dev) {
ctx.write(",\"dev\":");
ctx.writeUint(stat.dev);
}
if (stat.hlinkc) {
ctx.write(",\"ino\":");
ctx.writeUint(stat.ino);
ctx.write(",\"hlnkc\":true,\"nlink\":");
ctx.writeUint(stat.nlink);
}
if (!stat.dir and !stat.reg) ctx.write(",\"notreg\":true");
if (main.config.extended) {
ctx.write(",\"uid\":");
ctx.writeUint(stat.ext.uid);
ctx.write(",\"gid\":");
ctx.writeUint(stat.ext.gid);
ctx.write(",\"mode\":");
ctx.writeUint(stat.ext.mode);
ctx.write(",\"mtime\":");
ctx.writeUint(stat.ext.mtime);
}
}
};
pub const Dir = struct {
dev: u64,
pub fn addSpecial(_: *Dir, name: []const u8, sp: sink.Special) void {
global.writer.writeSpecial(name, sp);
}
pub fn addStat(_: *Dir, name: []const u8, stat: *const sink.Stat) void {
global.writer.closeDirEntry(false);
global.writer.writeStat(name, stat, undefined);
global.writer.writeByte('}');
}
pub fn addDir(d: *Dir, name: []const u8, stat: *const sink.Stat) Dir {
global.writer.closeDirEntry(false);
global.writer.writeStat(name, stat, d.dev);
global.writer.dir_entry_open = true;
return .{ .dev = stat.dev };
}
pub fn setReadError(_: *Dir) void {
global.writer.closeDirEntry(true);
}
pub fn final(_: *Dir) void {
global.writer.ensureSpace(1000);
global.writer.closeDirEntry(false);
global.writer.writeByte(']');
}
};
pub fn createRoot(path: []const u8, stat: *const sink.Stat) Dir {
var root = Dir{.dev=0};
return root.addDir(path, stat);
}
pub fn done() void {
global.writer.write("]\n");
global.writer.flush(0);
global.writer.fd.close();
main.allocator.destroy(global.writer);
}
pub fn setupOutput(out: std.fs.File) void {
global.writer = Writer.init(out);
}

View file

@ -7,7 +7,10 @@ const std = @import("std");
const model = @import("model.zig"); const model = @import("model.zig");
const scan = @import("scan.zig"); const scan = @import("scan.zig");
const json_import = @import("json_import.zig"); const json_import = @import("json_import.zig");
const json_export = @import("json_export.zig");
const sink = @import("sink.zig"); const sink = @import("sink.zig");
const mem_src = @import("mem_src.zig");
const mem_sink = @import("mem_sink.zig");
const ui = @import("ui.zig"); const ui = @import("ui.zig");
const browser = @import("browser.zig"); const browser = @import("browser.zig");
const delete = @import("delete.zig"); const delete = @import("delete.zig");
@ -19,7 +22,10 @@ test "imports" {
_ = model; _ = model;
_ = scan; _ = scan;
_ = json_import; _ = json_import;
_ = json_export;
_ = sink; _ = sink;
_ = mem_src;
_ = mem_sink;
_ = ui; _ = ui;
_ = browser; _ = browser;
_ = delete; _ = delete;
@ -518,11 +524,14 @@ pub fn main() void {
event_delay_timer = std.time.Timer.start() catch unreachable; event_delay_timer = std.time.Timer.start() catch unreachable;
defer ui.deinit(); defer ui.deinit();
if (export_file) |f| sink.setupJsonOutput( if (export_file) |f| {
const file =
if (std.mem.eql(u8, f, "-")) stdout if (std.mem.eql(u8, f, "-")) stdout
else std.fs.cwd().createFileZ(f, .{}) else std.fs.cwd().createFileZ(f, .{})
catch |e| ui.die("Error opening export file: {s}.\n", .{ui.errorString(e)}) catch |e| ui.die("Error opening export file: {s}.\n", .{ui.errorString(e)});
); json_export.setupOutput(file);
sink.global.sink = .json;
}
if (import_file) |f| { if (import_file) |f| {
json_import.import(f); json_import.import(f);
@ -551,10 +560,10 @@ pub fn main() void {
.refresh => { .refresh => {
var full_path = std.ArrayList(u8).init(allocator); var full_path = std.ArrayList(u8).init(allocator);
defer full_path.deinit(); defer full_path.deinit();
sink.state.out.mem.?.fmtPath(true, &full_path); mem_sink.global.root.?.fmtPath(true, &full_path);
scan.scan(util.arrayListBufZ(&full_path)) catch { scan.scan(util.arrayListBufZ(&full_path)) catch {
sink.state.last_error = allocator.dupeZ(u8, full_path.items) catch unreachable; sink.global.last_error = allocator.dupeZ(u8, full_path.items) catch unreachable;
sink.state.status = .err; sink.global.state = .err;
while (state == .refresh) handleEvent(true, true); while (state == .refresh) handleEvent(true, true);
}; };
state = .browse; state = .browse;

217
src/mem_sink.zig Normal file
View file

@ -0,0 +1,217 @@
// SPDX-FileCopyrightText: Yorhel <projects@yorhel.nl>
// SPDX-License-Identifier: MIT
const std = @import("std");
const main = @import("main.zig");
const model = @import("model.zig");
const sink = @import("sink.zig");
pub const global = struct {
pub var root: ?*model.Dir = null;
pub var stats: bool = true; // calculate aggregate directory stats
};
pub const Thread = struct {
// Arena allocator for model.Entry structs, these are never freed.
arena: std.heap.ArenaAllocator = std.heap.ArenaAllocator.init(std.heap.page_allocator),
};
pub const Dir = struct {
dir: *model.Dir,
entries: Map,
own_blocks: model.Blocks,
own_bytes: u64,
// Additional counts collected from subdirectories. Subdirs may run final()
// from separate threads so these need to be protected.
blocks: model.Blocks = 0,
bytes: u64 = 0,
items: u32 = 0,
mtime: u64 = 0,
suberr: bool = false,
lock: std.Thread.Mutex = .{},
const Map = std.HashMap(*model.Entry, void, HashContext, 80);
const HashContext = struct {
pub fn hash(_: @This(), e: *model.Entry) u64 {
return std.hash.Wyhash.hash(0, e.name());
}
pub fn eql(_: @This(), a: *model.Entry, b: *model.Entry) bool {
return a == b or std.mem.eql(u8, a.name(), b.name());
}
};
const HashContextAdapted = struct {
pub fn hash(_: @This(), v: []const u8) u64 {
return std.hash.Wyhash.hash(0, v);
}
pub fn eql(_: @This(), a: []const u8, b: *model.Entry) bool {
return std.mem.eql(u8, a, b.name());
}
};
fn init(dir: *model.Dir) Dir {
var self = Dir{
.dir = dir,
.entries = Map.initContext(main.allocator, HashContext{}),
.own_blocks = dir.entry.pack.blocks,
.own_bytes = dir.entry.size,
};
var count: Map.Size = 0;
var it = dir.sub;
while (it) |e| : (it = e.next) count += 1;
self.entries.ensureUnusedCapacity(count) catch unreachable;
it = dir.sub;
while (it) |e| : (it = e.next)
self.entries.putAssumeCapacity(e, {});
return self;
}
fn getEntry(self: *Dir, t: *Thread, etype: model.EType, isext: bool, name: []const u8) *model.Entry {
if (self.entries.getKeyAdapted(name, HashContextAdapted{})) |e| {
// XXX: In-place conversion may be possible in some cases.
if (e.pack.etype == etype and (!isext or e.pack.isext)) {
e.pack.isext = isext;
_ = self.entries.removeAdapted(name, HashContextAdapted{});
return e;
}
}
const e = model.Entry.create(t.arena.allocator(), etype, isext, name);
e.next = self.dir.sub;
self.dir.sub = e;
return e;
}
pub fn addSpecial(self: *Dir, t: *Thread, name: []const u8, st: sink.Special) void {
self.dir.items += 1;
if (st == .err) self.dir.pack.suberr = true;
const e = self.getEntry(t, .file, false, name);
e.file().?.pack = switch (st) {
.err => .{ .err = true },
.other_fs => .{ .other_fs = true },
.kernfs => .{ .kernfs = true },
.excluded => .{ .excluded = true },
};
}
pub fn addStat(self: *Dir, t: *Thread, name: []const u8, stat: *const sink.Stat) *model.Entry {
if (global.stats) {
self.dir.items +|= 1;
if (!stat.hlinkc) {
self.dir.entry.pack.blocks +|= stat.blocks;
self.dir.entry.size +|= stat.size;
}
if (self.dir.entry.ext()) |e| {
if (stat.ext.mtime > e.mtime) e.mtime = stat.ext.mtime;
}
}
const etype = if (stat.dir) model.EType.dir
else if (stat.hlinkc) model.EType.link
else model.EType.file;
const e = self.getEntry(t, etype, main.config.extended, name);
e.pack.blocks = stat.blocks;
e.size = stat.size;
if (e.dir()) |d| {
d.parent = self.dir;
d.pack.dev = model.devices.getId(stat.dev);
}
if (e.file()) |f| f.pack = .{ .notreg = !stat.dir and !stat.reg };
if (e.link()) |l| {
l.parent = self.dir;
l.ino = stat.ino;
l.pack.nlink = stat.nlink;
model.inodes.lock.lock();
defer model.inodes.lock.unlock();
l.addLink();
}
if (e.ext()) |ext| ext.* = stat.ext;
return e;
}
pub fn addDir(self: *Dir, t: *Thread, name: []const u8, stat: *const sink.Stat) Dir {
return init(self.addStat(t, name, stat).dir().?);
}
pub fn setReadError(self: *Dir) void {
self.dir.pack.err = true;
}
pub fn final(self: *Dir, parent: ?*Dir) void {
// Remove entries we've not seen
if (self.entries.count() > 0) {
var it = &self.dir.sub;
while (it.*) |e| {
if (self.entries.getKey(e) == e) it.* = e.next
else it = &e.next;
}
}
self.entries.deinit();
if (!global.stats) return;
// Grab counts collected from subdirectories
self.dir.entry.pack.blocks +|= self.blocks;
self.dir.entry.size +|= self.bytes;
self.dir.items +|= self.items;
if (self.suberr) self.dir.pack.suberr = true;
if (self.dir.entry.ext()) |e| {
if (self.mtime > e.mtime) e.mtime = self.mtime;
}
// Add own counts to parent
if (parent) |p| {
p.lock.lock();
defer p.lock.unlock();
p.blocks +|= self.dir.entry.pack.blocks - self.own_blocks;
p.bytes +|= self.dir.entry.size - self.own_bytes;
p.items +|= self.dir.items;
if (self.dir.entry.ext()) |e| {
if (e.mtime > p.mtime) p.mtime = e.mtime;
}
if (self.suberr or self.dir.pack.suberr or self.dir.pack.err) p.suberr = true;
}
}
};
pub fn createRoot(path: []const u8, stat: *const sink.Stat) Dir {
const p = global.root orelse blk: {
model.root = model.Entry.create(main.allocator, .dir, main.config.extended, path).dir().?;
break :blk model.root;
};
sink.global.state = .zeroing;
if (p.items > 10_000) main.handleEvent(false, true);
// Do the zeroStats() here, after the "root" entry has been
// stat'ed and opened, so that a fatal error on refresh won't
// zero-out the requested directory.
p.entry.zeroStats(p.parent);
sink.global.state = .running;
p.entry.pack.blocks = stat.blocks;
p.entry.size = stat.size;
p.pack.dev = model.devices.getId(stat.dev);
return Dir.init(p);
}
pub fn done() void {
if (!global.stats) return;
sink.global.state = .hlcnt;
main.handleEvent(false, true);
const dir = global.root orelse model.root;
var it: ?*model.Dir = dir;
while (it) |p| : (it = p.parent) {
p.updateSubErr();
if (p != dir) {
p.entry.pack.blocks +|= dir.entry.pack.blocks;
p.entry.size +|= dir.entry.size;
p.items +|= dir.items + 1;
}
}
model.inodes.addAllStats();
}

View file

@ -5,18 +5,24 @@ const std = @import("std");
const main = @import("main.zig"); const main = @import("main.zig");
const model = @import("model.zig"); const model = @import("model.zig");
const mem_src = @import("mem_src.zig"); const mem_src = @import("mem_src.zig");
const mem_sink = @import("mem_sink.zig");
const json_export = @import("json_export.zig");
const ui = @import("ui.zig"); const ui = @import("ui.zig");
const util = @import("util.zig"); const util = @import("util.zig");
// "sink" in this case is where the scan/import results (from scan.zig and // Terminology note:
// json_import.zig) are being forwarded to and processed. This code handles // "source" is where scan results come from, these are scan.zig, mem_src.zig
// aggregating the tree structure into memory or exporting it as JSON. Also // and json_import.zig.
// handles progress display. // "sink" is where scan results go to. This file provides a generic sink API
// for sources to use. The API forwards the results to specific sink
// implementations (mem_sink.zig or json_export.zig) and provides progress
// updates.
// API for sources: // API for sources:
// //
// Single-threaded: // Single-threaded:
// //
// createThreads(1)
// dir = createRoot(name, stat) // dir = createRoot(name, stat)
// dir.addSpecial(name, opt) // dir.addSpecial(name, opt)
// dir.addFile(name, stat) // dir.addFile(name, stat)
@ -25,9 +31,11 @@ const util = @import("util.zig");
// sub.addstuff(); // sub.addstuff();
// sub.unref(); // sub.unref();
// dir.unref(); // dir.unref();
// done()
// //
// Multi-threaded interleaving: // Multi-threaded interleaving:
// //
// createThreads(n)
// dir = createRoot(name, stat) // dir = createRoot(name, stat)
// dir.addSpecial(name, opt) // dir.addSpecial(name, opt)
// dir.addFile(name, stat) // dir.addFile(name, stat)
@ -38,6 +46,7 @@ const util = @import("util.zig");
// dir.unref(); // <- no more direct descendants for x, but subdirs could still be active // dir.unref(); // <- no more direct descendants for x, but subdirs could still be active
// sub2.addStuff(); // sub2.addStuff();
// sub2.unref(); // <- this is where 'dir' is really done. // sub2.unref(); // <- this is where 'dir' is really done.
// done()
// //
// Rule: // Rule:
// No concurrent method calls on a single Dir object, but objects may be passed between threads. // No concurrent method calls on a single Dir object, but objects may be passed between threads.
@ -60,377 +69,29 @@ pub const Stat = struct {
pub const Special = enum { err, other_fs, kernfs, excluded }; pub const Special = enum { err, other_fs, kernfs, excluded };
// JSON output is necessarily single-threaded and items MUST be added depth-first.
const JsonWriter = struct {
fd: std.fs.File,
// Must be large enough to hold PATH_MAX*6 plus some overhead.
// (The 6 is because, in the worst case, every byte expands to a "\u####"
// escape, and we do pessimistic estimates here in order to avoid checking
// buffer lengths for each and every write operation)
buf: [64*1024]u8 = undefined,
off: usize = 0,
dir_entry_open: bool = false,
fn flush(ctx: *JsonWriter, bytes: usize) void {
@setCold(true);
// This can only really happen when the root path exceeds PATH_MAX,
// in which case we would probably have error'ed out earlier anyway.
if (bytes > ctx.buf.len) ui.die("Error writing JSON export: path too long.\n", .{});
ctx.fd.writeAll(ctx.buf[0..ctx.off]) catch |e|
ui.die("Error writing to file: {s}.\n", .{ ui.errorString(e) });
ctx.off = 0;
}
fn ensureSpace(ctx: *JsonWriter, bytes: usize) void {
if (bytes > ctx.buf.len - ctx.off) ctx.flush(bytes);
}
fn write(ctx: *JsonWriter, s: []const u8) void {
@memcpy(ctx.buf[ctx.off..][0..s.len], s);
ctx.off += s.len;
}
fn writeByte(ctx: *JsonWriter, b: u8) void {
ctx.buf[ctx.off] = b;
ctx.off += 1;
}
// Write escaped string contents, excluding the quotes.
fn writeStr(ctx: *JsonWriter, s: []const u8) void {
for (s) |b| {
if (b >= 0x20 and b != '"' and b != '\\' and b != 127) ctx.writeByte(b)
else switch (b) {
'\n' => ctx.write("\\n"),
'\r' => ctx.write("\\r"),
0x8 => ctx.write("\\b"),
'\t' => ctx.write("\\t"),
0xC => ctx.write("\\f"),
'\\' => ctx.write("\\\\"),
'"' => ctx.write("\\\""),
else => {
ctx.write("\\u00");
const hexdig = "0123456789abcdef";
ctx.writeByte(hexdig[b>>4]);
ctx.writeByte(hexdig[b&0xf]);
},
}
}
}
fn writeUint(ctx: *JsonWriter, n: u64) void {
// Based on std.fmt.formatInt
var a = n;
var buf: [24]u8 = undefined;
var index: usize = buf.len;
while (a >= 100) : (a = @divTrunc(a, 100)) {
index -= 2;
buf[index..][0..2].* = std.fmt.digits2(@as(usize, @intCast(a % 100)));
}
if (a < 10) {
index -= 1;
buf[index] = '0' + @as(u8, @intCast(a));
} else {
index -= 2;
buf[index..][0..2].* = std.fmt.digits2(@as(usize, @intCast(a)));
}
ctx.write(buf[index..]);
}
fn init(out: std.fs.File) *JsonWriter {
var ctx = main.allocator.create(JsonWriter) catch unreachable;
ctx.* = .{ .fd = out };
ctx.write("[1,2,{\"progname\":\"ncdu\",\"progver\":\"" ++ main.program_version ++ "\",\"timestamp\":");
ctx.writeUint(@intCast(@max(0, std.time.timestamp())));
ctx.writeByte('}');
return ctx;
}
// A newly written directory entry is left "open", i.e. the '}' to close
// the item object is not written, to allow for a setReadError() to be
// caught if one happens before the first sub entry.
// Any read errors after the first sub entry are thrown away, but that's
// just a limitation of the JSON format.
fn closeDirEntry(ctx: *JsonWriter, rderr: bool) void {
if (ctx.dir_entry_open) {
ctx.dir_entry_open = false;
if (rderr) ctx.write(",\"read_error\":true");
ctx.writeByte('}');
}
}
fn addSpecial(ctx: *JsonWriter, name: []const u8, t: Special) void {
ctx.closeDirEntry(false);
ctx.ensureSpace(name.len*6 + 1000);
// not necessarily correct, but mimics model.Entry.isDirectory()
const isdir = switch (t) {
.other_fs, .kernfs => true,
.err, .excluded => false,
};
ctx.write(if (isdir) ",\n[{\"name\":\"" else ",\n{\"name\":\"");
ctx.writeStr(name);
ctx.write(switch (t) {
.err => "\",\"read_error\":true}",
.other_fs => "\",\"excluded\":\"otherfs\"}",
.kernfs => "\",\"excluded\":\"kernfs\"}",
.excluded => "\",\"excluded\":\"pattern\"}",
});
if (isdir) ctx.writeByte(']');
}
fn writeStat(ctx: *JsonWriter, name: []const u8, stat: *const Stat, parent_dev: u64) void {
ctx.ensureSpace(name.len*6 + 1000);
ctx.write(if (stat.dir) ",\n[{\"name\":\"" else ",\n{\"name\":\"");
ctx.writeStr(name);
ctx.writeByte('"');
if (stat.size > 0) {
ctx.write(",\"asize\":");
ctx.writeUint(stat.size);
}
if (stat.blocks > 0) {
ctx.write(",\"dsize\":");
ctx.writeUint(util.blocksToSize(stat.blocks));
}
if (stat.dir and stat.dev != parent_dev) {
ctx.write(",\"dev\":");
ctx.writeUint(stat.dev);
}
if (stat.hlinkc) {
ctx.write(",\"ino\":");
ctx.writeUint(stat.ino);
ctx.write(",\"hlnkc\":true,\"nlink\":");
ctx.writeUint(stat.nlink);
}
if (!stat.dir and !stat.reg) ctx.write(",\"notreg\":true");
if (main.config.extended) {
ctx.write(",\"uid\":");
ctx.writeUint(stat.ext.uid);
ctx.write(",\"gid\":");
ctx.writeUint(stat.ext.gid);
ctx.write(",\"mode\":");
ctx.writeUint(stat.ext.mode);
ctx.write(",\"mtime\":");
ctx.writeUint(stat.ext.mtime);
}
}
fn addStat(ctx: *JsonWriter, name: []const u8, stat: *const Stat) void {
ctx.closeDirEntry(false);
ctx.writeStat(name, stat, undefined);
ctx.writeByte('}');
}
fn addDir(ctx: *JsonWriter, name: []const u8, stat: *const Stat, parent_dev: u64) void {
ctx.closeDirEntry(false);
ctx.writeStat(name, stat, parent_dev);
ctx.dir_entry_open = true;
}
fn setReadError(ctx: *JsonWriter) void {
ctx.closeDirEntry(true);
}
fn close(ctx: *JsonWriter) void {
ctx.ensureSpace(1000);
ctx.closeDirEntry(false);
ctx.writeByte(']');
}
fn done(ctx: *JsonWriter) void {
ctx.write("]\n");
ctx.flush(0);
ctx.fd.close();
main.allocator.destroy(ctx);
}
};
const MemDir = struct {
dir: *model.Dir,
entries: Map,
own_blocks: model.Blocks,
own_bytes: u64,
// Additional counts collected from subdirectories. Subdirs may run final()
// from separate threads so these need to be protected.
blocks: model.Blocks = 0,
bytes: u64 = 0,
items: u32 = 0,
mtime: u64 = 0,
suberr: bool = false,
lock: std.Thread.Mutex = .{},
const Map = std.HashMap(*model.Entry, void, HashContext, 80);
const HashContext = struct {
pub fn hash(_: @This(), e: *model.Entry) u64 {
return std.hash.Wyhash.hash(0, e.name());
}
pub fn eql(_: @This(), a: *model.Entry, b: *model.Entry) bool {
return a == b or std.mem.eql(u8, a.name(), b.name());
}
};
const HashContextAdapted = struct {
pub fn hash(_: @This(), v: []const u8) u64 {
return std.hash.Wyhash.hash(0, v);
}
pub fn eql(_: @This(), a: []const u8, b: *model.Entry) bool {
return std.mem.eql(u8, a, b.name());
}
};
fn init(dir: *model.Dir) MemDir {
var self = MemDir{
.dir = dir,
.entries = Map.initContext(main.allocator, HashContext{}),
.own_blocks = dir.entry.pack.blocks,
.own_bytes = dir.entry.size,
};
var count: Map.Size = 0;
var it = dir.sub;
while (it) |e| : (it = e.next) count += 1;
self.entries.ensureUnusedCapacity(count) catch unreachable;
it = dir.sub;
while (it) |e| : (it = e.next)
self.entries.putAssumeCapacity(e, {});
return self;
}
fn getEntry(self: *MemDir, alloc: std.mem.Allocator, etype: model.EType, isext: bool, name: []const u8) *model.Entry {
if (self.entries.getKeyAdapted(name, HashContextAdapted{})) |e| {
// XXX: In-place conversion may be possible in some cases.
if (e.pack.etype == etype and (!isext or e.pack.isext)) {
e.pack.isext = isext;
_ = self.entries.removeAdapted(name, HashContextAdapted{});
return e;
}
}
const e = model.Entry.create(alloc, etype, isext, name);
e.next = self.dir.sub;
self.dir.sub = e;
return e;
}
fn addSpecial(self: *MemDir, alloc: std.mem.Allocator, name: []const u8, t: Special) void {
self.dir.items += 1;
if (t == .err) self.dir.pack.suberr = true;
const e = self.getEntry(alloc, .file, false, name);
e.file().?.pack = switch (t) {
.err => .{ .err = true },
.other_fs => .{ .other_fs = true },
.kernfs => .{ .kernfs = true },
.excluded => .{ .excluded = true },
};
}
fn addStat(self: *MemDir, alloc: std.mem.Allocator, name: []const u8, stat: *const Stat) *model.Entry {
if (state.defer_json == null) {
self.dir.items +|= 1;
if (!stat.hlinkc) {
self.dir.entry.pack.blocks +|= stat.blocks;
self.dir.entry.size +|= stat.size;
}
if (self.dir.entry.ext()) |e| {
if (stat.ext.mtime > e.mtime) e.mtime = stat.ext.mtime;
}
}
const etype = if (stat.dir) model.EType.dir
else if (stat.hlinkc) model.EType.link
else model.EType.file;
const e = self.getEntry(alloc, etype, main.config.extended, name);
e.pack.blocks = stat.blocks;
e.size = stat.size;
if (e.dir()) |d| {
d.parent = self.dir;
d.pack.dev = model.devices.getId(stat.dev);
}
if (e.file()) |f| f.pack = .{ .notreg = !stat.dir and !stat.reg };
if (e.link()) |l| {
l.parent = self.dir;
l.ino = stat.ino;
l.pack.nlink = stat.nlink;
model.inodes.lock.lock();
defer model.inodes.lock.unlock();
l.addLink();
}
if (e.ext()) |ext| ext.* = stat.ext;
return e;
}
fn setReadError(self: *MemDir) void {
self.dir.pack.err = true;
}
fn final(self: *MemDir, parent: ?*MemDir) void {
// Remove entries we've not seen
if (self.entries.count() > 0) {
var it = &self.dir.sub;
while (it.*) |e| {
if (self.entries.getKey(e) == e) it.* = e.next
else it = &e.next;
}
}
self.entries.deinit();
if (state.defer_json != null) return;
// Grab counts collected from subdirectories
self.dir.entry.pack.blocks +|= self.blocks;
self.dir.entry.size +|= self.bytes;
self.dir.items +|= self.items;
if (self.suberr) self.dir.pack.suberr = true;
if (self.dir.entry.ext()) |e| {
if (self.mtime > e.mtime) e.mtime = self.mtime;
}
// Add own counts to parent
if (parent) |p| {
p.lock.lock();
defer p.lock.unlock();
p.blocks +|= self.dir.entry.pack.blocks - self.own_blocks;
p.bytes +|= self.dir.entry.size - self.own_bytes;
p.items +|= self.dir.items;
if (self.dir.entry.ext()) |e| {
if (e.mtime > p.mtime) p.mtime = e.mtime;
}
if (self.suberr or self.dir.pack.suberr or self.dir.pack.err) p.suberr = true;
}
}
};
pub const Dir = struct { pub const Dir = struct {
refcnt: std.atomic.Value(usize) = std.atomic.Value(usize).init(1), refcnt: std.atomic.Value(usize) = std.atomic.Value(usize).init(1),
// (XXX: This allocation can be avoided when scanning to a MemDir)
name: []const u8, name: []const u8,
parent: ?*Dir, parent: ?*Dir,
out: Out, out: Out,
const Out = union(enum) { const Out = union(enum) {
mem: MemDir, mem: mem_sink.Dir,
json: struct { json: json_export.Dir,
dev: u64,
wr: *JsonWriter,
},
}; };
pub fn addSpecial(d: *Dir, t: *Thread, name: []const u8, sp: Special) void { pub fn addSpecial(d: *Dir, t: *Thread, name: []const u8, sp: Special) void {
_ = t.files_seen.fetchAdd(1, .monotonic); _ = t.files_seen.fetchAdd(1, .monotonic);
switch (d.out) { switch (d.out) {
.mem => |*m| m.addSpecial(t.arena.allocator(), name, sp), .mem => |*m| m.addSpecial(&t.sink.mem, name, sp),
.json => |j| j.wr.addSpecial(name, sp), .json => |*j| j.addSpecial(name, sp),
} }
if (sp == .err) { if (sp == .err) {
state.last_error_lock.lock(); global.last_error_lock.lock();
defer state.last_error_lock.unlock(); defer global.last_error_lock.unlock();
if (state.last_error) |p| main.allocator.free(p); if (global.last_error) |p| main.allocator.free(p);
const p = d.path(); const p = d.path();
state.last_error = std.fs.path.joinZ(main.allocator, &.{ p, name }) catch unreachable; global.last_error = std.fs.path.joinZ(main.allocator, &.{ p, name }) catch unreachable;
main.allocator.free(p); main.allocator.free(p);
} }
} }
@ -440,8 +101,8 @@ pub const Dir = struct {
_ = t.addBytes((stat.blocks *| 512) / @max(1, stat.nlink)); _ = t.addBytes((stat.blocks *| 512) / @max(1, stat.nlink));
std.debug.assert(!stat.dir); std.debug.assert(!stat.dir);
switch (d.out) { switch (d.out) {
.mem => |*m| _ = m.addStat(t.arena.allocator(), name, stat), .mem => |*m| _ = m.addStat(&t.sink.mem, name, stat),
.json => |j| j.wr.addStat(name, stat), .json => |*j| j.addStat(name, stat),
} }
} }
@ -449,20 +110,15 @@ pub const Dir = struct {
_ = t.files_seen.fetchAdd(1, .monotonic); _ = t.files_seen.fetchAdd(1, .monotonic);
_ = t.addBytes(stat.blocks *| 512); _ = t.addBytes(stat.blocks *| 512);
std.debug.assert(stat.dir); std.debug.assert(stat.dir);
std.debug.assert(d.out != .json or d.refcnt.load(.monotonic) == 1);
const s = main.allocator.create(Dir) catch unreachable; const s = main.allocator.create(Dir) catch unreachable;
s.* = .{ s.* = .{
.name = main.allocator.dupe(u8, name) catch unreachable, .name = main.allocator.dupe(u8, name) catch unreachable,
.parent = d, .parent = d,
.out = switch (d.out) { .out = switch (d.out) {
.mem => |*m| .{ .mem => |*m| .{ .mem = m.addDir(&t.sink.mem, name, stat) },
.mem = MemDir.init(m.addStat(t.arena.allocator(), name, stat).dir().?) .json => |*j| .{ .json = j.addDir(name, stat) },
},
.json => |j| blk: {
std.debug.assert(d.refcnt.load(.monotonic) == 1);
j.wr.addDir(name, stat, j.dev);
break :blk .{ .json = .{ .wr = j.wr, .dev = stat.dev } };
},
}, },
}; };
d.ref(); d.ref();
@ -473,12 +129,12 @@ pub const Dir = struct {
_ = t; _ = t;
switch (d.out) { switch (d.out) {
.mem => |*m| m.setReadError(), .mem => |*m| m.setReadError(),
.json => |j| j.wr.setReadError(), .json => |*j| j.setReadError(),
} }
state.last_error_lock.lock(); global.last_error_lock.lock();
defer state.last_error_lock.unlock(); defer global.last_error_lock.unlock();
if (state.last_error) |p| main.allocator.free(p); if (global.last_error) |p| main.allocator.free(p);
state.last_error = d.path(); global.last_error = d.path();
} }
fn path(d: *Dir) [:0]u8 { fn path(d: *Dir) [:0]u8 {
@ -508,7 +164,7 @@ pub const Dir = struct {
switch (d.out) { switch (d.out) {
.mem => |*m| m.final(if (d.parent) |p| &p.out.mem else null), .mem => |*m| m.final(if (d.parent) |p| &p.out.mem else null),
.json => |j| j.wr.close(), .json => |*j| j.final(),
} }
if (d.parent) |p| p.unref(); if (d.parent) |p| p.unref();
@ -524,8 +180,11 @@ pub const Thread = struct {
// On 32-bit architectures, bytes_seen is protected by the above mutex instead. // On 32-bit architectures, bytes_seen is protected by the above mutex instead.
bytes_seen: std.atomic.Value(u64) = std.atomic.Value(u64).init(0), bytes_seen: std.atomic.Value(u64) = std.atomic.Value(u64).init(0),
files_seen: std.atomic.Value(u32) = std.atomic.Value(u32).init(0), files_seen: std.atomic.Value(u32) = std.atomic.Value(u32).init(0),
// Arena allocator for model.Entry structs, these are never freed.
arena: std.heap.ArenaAllocator = std.heap.ArenaAllocator.init(std.heap.page_allocator), sink: union {
mem: mem_sink.Thread,
json: void,
} = .{.mem = .{}},
fn addBytes(t: *Thread, bytes: u64) void { fn addBytes(t: *Thread, bytes: u64) void {
if (@bitSizeOf(usize) >= 64) _ = t.bytes_seen.fetchAdd(bytes, .monotonic) if (@bitSizeOf(usize) >= 64) _ = t.bytes_seen.fetchAdd(bytes, .monotonic)
@ -553,76 +212,46 @@ pub const Thread = struct {
}; };
pub const state = struct { pub const global = struct {
pub var status: enum { done, err, zeroing, hlcnt, running } = .running; pub var state: enum { done, err, zeroing, hlcnt, running } = .running;
pub var threads: []Thread = undefined; pub var threads: []Thread = undefined;
pub var out: Out = .{ .mem = null }; pub var sink: enum { json, mem } = .mem;
pub var defer_json: ?*JsonWriter = null;
pub var last_error: ?[:0]u8 = null; pub var last_error: ?[:0]u8 = null;
var last_error_lock = std.Thread.Mutex{}; var last_error_lock = std.Thread.Mutex{};
var need_confirm_quit = false; var need_confirm_quit = false;
pub const Out = union(enum) {
mem: ?*model.Dir,
json: *JsonWriter,
};
}; };
pub fn setupJsonOutput(out: std.fs.File) void {
state.out = state.Out{ .json = JsonWriter.init(out) };
}
// Must be the first thing to call from a source; initializes global state. // Must be the first thing to call from a source; initializes global state.
pub fn createThreads(num: usize) []Thread { pub fn createThreads(num: usize) []Thread {
switch (state.out) { // JSON export does not support multiple threads, scan into memory first.
.mem => {}, if (global.sink == .json and num > 1) {
.json => |j| { global.sink = .mem;
if (num > 1) { mem_sink.global.stats = false;
state.out = state.Out{ .mem = null };
state.defer_json = j;
}
},
} }
state.status = .running; global.state = .running;
if (state.last_error) |p| main.allocator.free(p); if (global.last_error) |p| main.allocator.free(p);
state.last_error = null; global.last_error = null;
state.threads = main.allocator.alloc(Thread, num) catch unreachable; global.threads = main.allocator.alloc(Thread, num) catch unreachable;
for (state.threads) |*t| t.* = .{}; for (global.threads) |*t| t.* = .{};
return state.threads; return global.threads;
} }
// Must be the last thing to call from a source. // Must be the last thing to call from a source.
pub fn done() void { pub fn done() void {
switch (state.out) { switch (global.sink) {
.mem => if (state.defer_json == null) { .mem => mem_sink.done(),
state.status = .hlcnt; .json => json_export.done(),
main.handleEvent(false, true);
const dir = state.out.mem orelse model.root;
var it: ?*model.Dir = dir;
while (it) |p| : (it = p.parent) {
p.updateSubErr();
if (p != dir) {
p.entry.pack.blocks +|= dir.entry.pack.blocks;
p.entry.size +|= dir.entry.size;
p.items +|= dir.items + 1;
} }
} global.state = .done;
model.inodes.addAllStats(); main.allocator.free(global.threads);
},
.json => |j| j.done(),
}
state.status = .done;
main.allocator.free(state.threads);
// We scanned into memory, now we need to scan from memory to JSON // We scanned into memory, now we need to scan from memory to JSON
if (state.defer_json) |j| { if (global.sink == .mem and !mem_sink.global.stats) {
state.out = state.Out{ .json = j }; global.sink = .json;
state.defer_json = null;
mem_src.run(model.root); mem_src.run(model.root);
} }
@ -632,35 +261,14 @@ pub fn done() void {
pub fn createRoot(path: []const u8, stat: *const Stat) *Dir { pub fn createRoot(path: []const u8, stat: *const Stat) *Dir {
const out = switch (state.out) {
.mem => |parent| sw: {
const p = parent orelse blk: {
model.root = model.Entry.create(main.allocator, .dir, main.config.extended, path).dir().?;
break :blk model.root;
};
state.status = .zeroing;
if (p.items > 10_000) main.handleEvent(false, true);
// Do the zeroStats() here, after the "root" entry has been
// stat'ed and opened, so that a fatal error on refresh won't
// zero-out the requested directory.
p.entry.zeroStats(p.parent);
state.status = .running;
p.entry.pack.blocks = stat.blocks;
p.entry.size = stat.size;
p.pack.dev = model.devices.getId(stat.dev);
break :sw Dir.Out{ .mem = MemDir.init(p) };
},
.json => |ctx| sw: {
ctx.addDir(path, stat, 0);
break :sw Dir.Out{ .json = .{ .wr = ctx, .dev = stat.dev } };
},
};
const d = main.allocator.create(Dir) catch unreachable; const d = main.allocator.create(Dir) catch unreachable;
d.* = .{ d.* = .{
.name = main.allocator.dupe(u8, path) catch unreachable, .name = main.allocator.dupe(u8, path) catch unreachable,
.parent = null, .parent = null,
.out = out, .out = switch (global.sink) {
.mem => .{ .mem = mem_sink.createRoot(path, stat) },
.json => .{ .json = json_export.createRoot(path, stat) },
},
}; };
return d; return d;
} }
@ -686,13 +294,13 @@ fn drawConsole() void {
st.lines_written -= 1; st.lines_written -= 1;
} }
if (state.status == .hlcnt) { if (global.state == .hlcnt) {
wr.writeAll("Counting hardlinks...\n") catch {}; wr.writeAll("Counting hardlinks...\n") catch {};
} else if (state.status == .running) { } else if (global.state == .running) {
var bytes: u64 = 0; var bytes: u64 = 0;
var files: u64 = 0; var files: u64 = 0;
for (state.threads) |*t| { for (global.threads) |*t| {
bytes +|= t.getBytes(); bytes +|= t.getBytes();
files += t.files_seen.load(.monotonic); files += t.files_seen.load(.monotonic);
} }
@ -700,7 +308,7 @@ fn drawConsole() void {
wr.print("{} files / {s}{s}\n", .{files, r.num(), r.unit}) catch {}; wr.print("{} files / {s}{s}\n", .{files, r.num(), r.unit}) catch {};
st.lines_written += 1; st.lines_written += 1;
for (state.threads, 0..) |*t, i| { for (global.threads, 0..) |*t, i| {
const dir = blk: { const dir = blk: {
t.lock.lock(); t.lock.lock();
defer t.lock.unlock(); defer t.lock.unlock();
@ -721,14 +329,14 @@ fn drawProgress() void {
var bytes: u64 = 0; var bytes: u64 = 0;
var files: u64 = 0; var files: u64 = 0;
for (state.threads) |*t| { for (global.threads) |*t| {
bytes +|= t.getBytes(); bytes +|= t.getBytes();
files += t.files_seen.load(.monotonic); files += t.files_seen.load(.monotonic);
} }
ui.init(); ui.init();
const width = ui.cols -| 5; const width = ui.cols -| 5;
const numthreads: u32 = @intCast(@min(state.threads.len, @max(1, ui.rows -| 10))); const numthreads: u32 = @intCast(@min(global.threads.len, @max(1, ui.rows -| 10)));
const box = ui.Box.create(8 + numthreads, width, "Scanning..."); const box = ui.Box.create(8 + numthreads, width, "Scanning...");
box.move(2, 2); box.move(2, 2);
ui.addstr("Total items: "); ui.addstr("Total items: ");
@ -743,7 +351,7 @@ fn drawProgress() void {
for (0..numthreads) |i| { for (0..numthreads) |i| {
box.move(3+@as(u32, @intCast(i)), 4); box.move(3+@as(u32, @intCast(i)), 4);
const dir = blk: { const dir = blk: {
const t = &state.threads[i]; const t = &global.threads[i];
t.lock.lock(); t.lock.lock();
defer t.lock.unlock(); defer t.lock.unlock();
break :blk if (t.current_dir) |d| d.path() else null; break :blk if (t.current_dir) |d| d.path() else null;
@ -753,9 +361,9 @@ fn drawProgress() void {
} }
blk: { blk: {
state.last_error_lock.lock(); global.last_error_lock.lock();
defer state.last_error_lock.unlock(); defer global.last_error_lock.unlock();
const err = state.last_error orelse break :blk; const err = global.last_error orelse break :blk;
box.move(4 + numthreads, 2); box.move(4 + numthreads, 2);
ui.style(.bold); ui.style(.bold);
ui.addstr("Warning: "); ui.addstr("Warning: ");
@ -766,7 +374,7 @@ fn drawProgress() void {
ui.addstr("some directory sizes may not be correct."); ui.addstr("some directory sizes may not be correct.");
} }
if (state.need_confirm_quit) { if (global.need_confirm_quit) {
box.move(6 + numthreads, width -| 20); box.move(6 + numthreads, width -| 20);
ui.addstr("Press "); ui.addstr("Press ");
ui.style(.key); ui.style(.key);
@ -807,7 +415,7 @@ fn drawError() void {
box.move(2, 2); box.move(2, 2);
ui.addstr("Unable to open directory:"); ui.addstr("Unable to open directory:");
box.move(3, 4); box.move(3, 4);
ui.addstr(ui.shorten(ui.toUtf8(state.last_error.?), width -| 10)); ui.addstr(ui.shorten(ui.toUtf8(global.last_error.?), width -| 10));
box.move(4, width -| 27); box.move(4, width -| 27);
ui.addstr("Press any key to continue"); ui.addstr("Press any key to continue");
@ -826,7 +434,7 @@ pub fn draw() void {
switch (main.config.scan_ui.?) { switch (main.config.scan_ui.?) {
.none => {}, .none => {},
.line => drawConsole(), .line => drawConsole(),
.full => switch (state.status) { .full => switch (global.state) {
.done => {}, .done => {},
.err => drawError(), .err => drawError(),
.zeroing => { .zeroing => {
@ -846,7 +454,7 @@ pub fn draw() void {
pub fn keyInput(ch: i32) void { pub fn keyInput(ch: i32) void {
switch (state.status) { switch (global.state) {
.done => {}, .done => {},
.err => main.state = .browse, .err => main.state = .browse,
.zeroing => {}, .zeroing => {},
@ -854,11 +462,11 @@ pub fn keyInput(ch: i32) void {
.running => { .running => {
switch (ch) { switch (ch) {
'q' => { 'q' => {
if (main.config.confirm_quit) state.need_confirm_quit = !state.need_confirm_quit if (main.config.confirm_quit) global.need_confirm_quit = !global.need_confirm_quit
else ui.quit(); else ui.quit();
}, },
'y', 'Y' => if (state.need_confirm_quit) ui.quit(), 'y', 'Y' => if (global.need_confirm_quit) ui.quit(),
else => state.need_confirm_quit = false, else => global.need_confirm_quit = false,
} }
}, },
} }