ncdu-zig/src/model.zig

512 lines
18 KiB
Zig
Raw Normal View History

// SPDX-FileCopyrightText: Yorhel <projects@yorhel.nl>
2021-07-18 01:36:05 -08:00
// SPDX-License-Identifier: MIT
const std = @import("std");
const main = @import("main.zig");
const ui = @import("ui.zig");
2021-07-19 05:28:11 -08:00
const util = @import("util.zig");
// Numbers are used in the binfmt export, so must be stable.
pub const EType = enum(i3) {
dir = 0,
reg = 1,
nonreg = 2,
link = 3,
err = -1,
pattern = -2,
otherfs = -3,
kernfs = -4,
pub fn base(t: EType) EType {
return switch (t) {
.dir, .link => t,
else => .reg,
};
}
// Whether this entry should be displayed as a "directory".
// Some dirs are actually represented in this data model as a File for efficiency.
pub fn isDirectory(t: EType) bool {
return switch (t) {
.dir, .otherfs, .kernfs => true,
else => false,
};
}
};
// Type for the Entry.Packed.blocks field. Smaller than a u64 to make room for flags.
pub const Blocks = u60;
// Entries read from bin_reader may refer to other entries by itemref rather than pointer.
// This is a hack that allows browser.zig to use the same types for in-memory
// and bin_reader-backed directory trees. Most code can only deal with
// in-memory trees and accesses the .ptr field directly.
pub const Ref = extern union {
ptr: ?*Entry align(1),
ref: u64 align(1),
pub fn isNull(r: Ref) bool {
if (main.config.binreader) return r.ref == std.math.maxInt(u64)
else return r.ptr == null;
}
};
// Memory layout:
// (Ext +) Dir + name
// or: (Ext +) Link + name
// or: (Ext +) File + name
//
// Entry is always the first part of Dir, Link and File, so a pointer cast to
// *Entry is always safe and an *Entry can be casted to the full type. The Ext
// struct, if present, is placed before the *Entry pointer.
// These are all packed structs and hence do not have any alignment, which is
// great for saving memory but perhaps not very great for code size or
// performance.
pub const Entry = extern struct {
pack: Packed align(1),
size: u64 align(1) = 0,
next: Ref = .{ .ptr = null },
pub const Packed = packed struct(u64) {
etype: EType,
isext: bool,
blocks: Blocks = 0, // 512-byte blocks
};
const Self = @This();
pub fn dir(self: *Self) ?*Dir {
return if (self.pack.etype == .dir) @ptrCast(self) else null;
}
pub fn link(self: *Self) ?*Link {
return if (self.pack.etype == .link) @ptrCast(self) else null;
}
pub fn file(self: *Self) ?*File {
return if (self.pack.etype != .dir and self.pack.etype != .link) @ptrCast(self) else null;
}
pub fn name(self: *const Self) [:0]const u8 {
const self_name = switch (self.pack.etype) {
.dir => &@as(*const Dir, @ptrCast(self)).name,
.link => &@as(*const Link, @ptrCast(self)).name,
else => &@as(*const File, @ptrCast(self)).name,
};
const name_ptr: [*:0]const u8 = @ptrCast(self_name);
return std.mem.sliceTo(name_ptr, 0);
}
pub fn nameHash(self: *const Self) u64 {
return std.hash.Wyhash.hash(0, self.name());
}
pub fn ext(self: *Self) ?*Ext {
if (!self.pack.isext) return null;
return @ptrCast(@as([*]Ext, @ptrCast(self)) - 1);
}
fn alloc(comptime T: type, allocator: std.mem.Allocator, etype: EType, isext: bool, ename: []const u8) *Entry {
const size = (if (isext) @as(usize, @sizeOf(Ext)) else 0) + @sizeOf(T) + ename.len + 1;
var ptr = blk: while (true) {
if (allocator.allocWithOptions(u8, size, 1, null)) |p| break :blk p
else |_| {}
ui.oom();
};
if (isext) {
@as(*Ext, @ptrCast(ptr)).* = .{};
ptr = ptr[@sizeOf(Ext)..];
}
const e: *T = @ptrCast(ptr);
e.* = .{ .entry = .{ .pack = .{ .etype = etype, .isext = isext } } };
const n = @as([*]u8, @ptrCast(&e.name))[0..ename.len+1];
@memcpy(n[0..ename.len], ename);
n[ename.len] = 0;
return &e.entry;
}
pub fn create(allocator: std.mem.Allocator, etype: EType, isext: bool, ename: []const u8) *Entry {
return switch (etype) {
.dir => alloc(Dir, allocator, etype, isext, ename),
.link => alloc(Link, allocator, etype, isext, ename),
else => alloc(File, allocator, etype, isext, ename),
};
}
pub fn destroy(self: *Self, allocator: std.mem.Allocator) void {
const ptr: [*]u8 = if (self.ext()) |e| @ptrCast(e) else @ptrCast(self);
const esize: usize = switch (self.pack.etype) {
.dir => @sizeOf(Dir),
.link => @sizeOf(Link),
else => @sizeOf(File),
};
const size = (if (self.pack.isext) @as(usize, @sizeOf(Ext)) else 0) + esize + self.name().len + 1;
allocator.free(ptr[0..size]);
}
fn hasErr(self: *Self) bool {
return
if(self.dir()) |d| d.pack.err or d.pack.suberr
else self.pack.etype == .err;
}
fn removeLinks(self: *Entry) void {
if (self.dir()) |d| {
var it = d.sub.ptr;
while (it) |e| : (it = e.next.ptr) e.removeLinks();
}
if (self.link()) |l| l.removeLink();
}
fn zeroStatsRec(self: *Entry) void {
self.pack.blocks = 0;
self.size = 0;
if (self.dir()) |d| {
d.items = 0;
d.pack.err = false;
d.pack.suberr = false;
var it = d.sub.ptr;
while (it) |e| : (it = e.next.ptr) e.zeroStatsRec();
}
}
// Recursively set stats and those of sub-items to zero and removes counts
// from parent directories; as if this item does not exist in the tree.
// XXX: Does not update the 'suberr' flag of parent directories, make sure
// to call updateSubErr() afterwards.
pub fn zeroStats(self: *Entry, parent: ?*Dir) void {
self.removeLinks();
var it = parent;
while (it) |p| : (it = p.parent) {
p.entry.pack.blocks -|= self.pack.blocks;
p.entry.size -|= self.size;
p.items -|= 1 + (if (self.dir()) |d| d.items else 0);
}
self.zeroStatsRec();
}
};
const DevId = u30; // Can be reduced to make room for more flags in Dir.Packed.
pub const Dir = extern struct {
entry: Entry,
sub: Ref = .{ .ptr = null },
parent: ?*Dir align(1) = null,
// entry.{blocks,size}: Total size of all unique files + dirs. Non-shared hardlinks are counted only once.
// (i.e. the space you'll need if you created a filesystem with only this dir)
// shared_*: Unique hardlinks that still have references outside of this directory.
// (i.e. the space you won't reclaim by deleting this dir)
// (space reclaimed by deleting a dir =~ entry. - shared_)
shared_blocks: u64 align(1) = 0,
shared_size: u64 align(1) = 0,
items: u32 align(1) = 0,
pack: Packed align(1) = .{},
2021-07-19 05:28:11 -08:00
// Only used to find the @offsetOff, the name is written at this point as a 0-terminated string.
// (Old C habits die hard)
name: [0]u8 = undefined,
pub const Packed = packed struct {
// Indexes into the global 'devices.list' array
dev: DevId = 0,
err: bool = false,
suberr: bool = false,
};
pub fn fmtPath(self: *const @This(), withRoot: bool, out: *std.ArrayList(u8)) void {
if (!withRoot and self.parent == null) return;
var components = std.ArrayList([:0]const u8).init(main.allocator);
defer components.deinit();
var it: ?*const @This() = self;
while (it) |e| : (it = e.parent)
if (withRoot or e.parent != null)
components.append(e.entry.name()) catch unreachable;
var i: usize = components.items.len-1;
while (true) {
if (i != components.items.len-1 and !(out.items.len != 0 and out.items[out.items.len-1] == '/')) out.append('/') catch unreachable;
out.appendSlice(components.items[i]) catch unreachable;
if (i == 0) break;
i -= 1;
}
}
// Only updates the suberr of this Dir, assumes child dirs have already
// been updated and does not propagate to parents.
pub fn updateSubErr(self: *@This()) void {
self.pack.suberr = false;
var sub = self.sub.ptr;
while (sub) |e| : (sub = e.next.ptr) {
if (e.hasErr()) {
self.pack.suberr = true;
break;
}
}
}
};
// File that's been hardlinked (i.e. nlink > 1)
pub const Link = extern struct {
entry: Entry,
parent: *Dir align(1) = undefined,
next: *Link align(1) = undefined, // circular linked list of all *Link nodes with the same dev,ino.
prev: *Link align(1) = undefined,
// dev is inherited from the parent Dir
ino: u64 align(1) = undefined,
pack: Pack align(1) = .{},
name: [0]u8 = undefined,
const Pack = packed struct(u32) {
// Whether this Inode is counted towards the parent directories.
// Is kept synchronized between all Link nodes with the same dev/ino.
counted: bool = false,
// Number of links for this inode. When set to '0', we don't know the
// actual nlink count; which happens for old JSON dumps.
nlink: u31 = undefined,
};
// Return value should be freed with main.allocator.
pub fn path(self: *const @This(), withRoot: bool) [:0]const u8 {
var out = std.ArrayList(u8).init(main.allocator);
self.parent.fmtPath(withRoot, &out);
out.append('/') catch unreachable;
out.appendSlice(self.entry.name()) catch unreachable;
return out.toOwnedSliceSentinel(0) catch unreachable;
}
// Add this link to the inodes map and mark it as 'uncounted'.
pub fn addLink(l: *@This()) void {
const d = inodes.map.getOrPut(l) catch unreachable;
if (!d.found_existing) {
l.next = l;
l.prev = l;
} else {
inodes.setStats(d.key_ptr.*, false);
l.next = d.key_ptr.*;
l.prev = d.key_ptr.*.prev;
l.next.prev = l;
l.prev.next = l;
}
inodes.addUncounted(l);
}
// Remove this link from the inodes map and remove its stats from parent directories.
fn removeLink(l: *@This()) void {
inodes.setStats(l, false);
const entry = inodes.map.getEntry(l) orelse return;
if (l.next == l) {
_ = inodes.map.remove(l);
_ = inodes.uncounted.remove(l);
} else {
// XXX: If this link is actually removed from the filesystem, then
// the nlink count of the existing links should be updated to
// reflect that. But we can't do that here, because this function
// is also called before doing a filesystem refresh - in which case
// the nlink count likely won't change. Best we can hope for is
// that a refresh will encounter another link to the same inode and
// trigger an nlink change.
if (entry.key_ptr.* == l)
entry.key_ptr.* = l.next;
inodes.addUncounted(l.next);
l.next.prev = l.prev;
l.prev.next = l.next;
}
}
};
// Anything that's not an (indexed) directory or hardlink. Excluded directories are also "Files".
pub const File = extern struct {
entry: Entry,
name: [0]u8 = undefined,
};
pub const Ext = extern struct {
pack: Pack = .{},
mtime: u64 align(1) = 0,
uid: u32 align(1) = 0,
gid: u32 align(1) = 0,
mode: u16 align(1) = 0,
pub const Pack = packed struct(u8) {
hasmtime: bool = false,
hasuid: bool = false,
hasgid: bool = false,
hasmode: bool = false,
_pad: u4 = 0,
};
pub fn isEmpty(e: *const Ext) bool {
return !e.pack.hasmtime and !e.pack.hasuid and !e.pack.hasgid and !e.pack.hasmode;
}
};
// List of st_dev entries. Those are typically 64bits, but that's quite a waste
// of space when a typical scan won't cover many unique devices.
pub const devices = struct {
var lock = std.Thread.Mutex{};
// id -> dev
pub var list = std.ArrayList(u64).init(main.allocator);
// dev -> id
var lookup = std.AutoHashMap(u64, DevId).init(main.allocator);
pub fn getId(dev: u64) DevId {
lock.lock();
defer lock.unlock();
const d = lookup.getOrPut(dev) catch unreachable;
if (!d.found_existing) {
if (list.items.len >= std.math.maxInt(DevId)) ui.die("Maximum number of device identifiers exceeded.\n", .{});
d.value_ptr.* = @as(DevId, @intCast(list.items.len));
list.append(dev) catch unreachable;
}
return d.value_ptr.*;
}
};
// Lookup table for ino -> *Link entries, used for hard link counting.
pub const inodes = struct {
// Keys are hashed by their (dev,ino), the *Link points to an arbitrary
// node in the list. Link entries with the same dev/ino are part of a
// circular linked list, so you can iterate through all of them with this
// single pointer.
const Map = std.HashMap(*Link, void, HashContext, 80);
pub var map = Map.init(main.allocator);
// List of nodes in 'map' with !counted, to speed up addAllStats().
// If this list grows large relative to the number of nodes in 'map', then
// this list is cleared and uncounted_full is set instead, so that
// addAllStats() will do a full iteration over 'map'.
var uncounted = std.HashMap(*Link, void, HashContext, 80).init(main.allocator);
var uncounted_full = true; // start with true for the initial scan
pub var lock = std.Thread.Mutex{};
const HashContext = struct {
pub fn hash(_: @This(), l: *Link) u64 {
var h = std.hash.Wyhash.init(0);
h.update(std.mem.asBytes(&@as(u32, l.parent.pack.dev)));
h.update(std.mem.asBytes(&l.ino));
return h.final();
}
pub fn eql(_: @This(), a: *Link, b: *Link) bool {
return a.ino == b.ino and a.parent.pack.dev == b.parent.pack.dev;
}
};
fn addUncounted(l: *Link) void {
if (uncounted_full) return;
if (uncounted.count() > map.count()/8) {
uncounted.clearAndFree();
uncounted_full = true;
} else
(uncounted.getOrPut(l) catch unreachable).key_ptr.* = l;
}
// Add/remove this inode from the parent Dir sizes. When removing stats,
// the list of *Links and their sizes and counts must be in the exact same
// state as when the stats were added. Hence, any modification to the Link
// state should be preceded by a setStats(.., false).
fn setStats(l: *Link, add: bool) void {
if (l.pack.counted == add) return;
var nlink: u31 = 0;
var inconsistent = false;
var dirs = std.AutoHashMap(*Dir, u32).init(main.allocator);
defer dirs.deinit();
var it = l;
while (true) {
it.pack.counted = add;
nlink += 1;
if (it.pack.nlink != l.pack.nlink) inconsistent = true;
var parent: ?*Dir = it.parent;
while (parent) |p| : (parent = p.parent) {
const de = dirs.getOrPut(p) catch unreachable;
if (de.found_existing) de.value_ptr.* += 1
else de.value_ptr.* = 1;
}
it = it.next;
if (it == l)
break;
}
// There's not many sensible things we can do when we encounter
// inconsistent nlink counts. Current approach is to use the number of
// times we've seen this link in our tree as fallback for when the
// nlink counts aren't matching. May want to add a warning of some
// sorts to the UI at some point.
if (!inconsistent and l.pack.nlink >= nlink) nlink = l.pack.nlink;
// XXX: We're also not testing for inconsistent entry sizes, instead
// using the given 'l' size for all Links. Might warrant a warning as
// well.
var dir_iter = dirs.iterator();
if (add) {
while (dir_iter.next()) |de| {
de.key_ptr.*.entry.pack.blocks +|= l.entry.pack.blocks;
de.key_ptr.*.entry.size +|= l.entry.size;
if (de.value_ptr.* < nlink) {
de.key_ptr.*.shared_blocks +|= l.entry.pack.blocks;
de.key_ptr.*.shared_size +|= l.entry.size;
}
}
} else {
while (dir_iter.next()) |de| {
de.key_ptr.*.entry.pack.blocks -|= l.entry.pack.blocks;
de.key_ptr.*.entry.size -|= l.entry.size;
if (de.value_ptr.* < nlink) {
de.key_ptr.*.shared_blocks -|= l.entry.pack.blocks;
de.key_ptr.*.shared_size -|= l.entry.size;
}
}
}
}
// counters to track progress for addAllStats()
pub var add_total: usize = 0;
pub var add_done: usize = 0;
pub fn addAllStats() void {
if (uncounted_full) {
add_total = map.count();
add_done = 0;
var it = map.keyIterator();
while (it.next()) |e| {
setStats(e.*, true);
add_done += 1;
if ((add_done & 65) == 0) main.handleEvent(false, false);
}
} else {
add_total = uncounted.count();
add_done = 0;
var it = uncounted.keyIterator();
while (it.next()) |u| {
if (map.getKey(u.*)) |e| setStats(e, true);
add_done += 1;
if ((add_done & 65) == 0) main.handleEvent(false, false);
}
}
uncounted_full = false;
if (uncounted.count() > 0)
uncounted.clearAndFree();
}
};
pub var root: *Dir = undefined;
test "entry" {
var e = Entry.create(std.testing.allocator, .reg, false, "hello");
defer e.destroy(std.testing.allocator);
try std.testing.expectEqual(e.pack.etype, .reg);
try std.testing.expect(!e.pack.isext);
try std.testing.expectEqualStrings(e.name(), "hello");
}