From 705bd8907d8dc717e3f8000e80496c16d21358b1 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Wed, 17 Jul 2024 14:04:17 +0200 Subject: [PATCH] Move nlink count from inode map into Link node This adds another +4 bytes* to Link nodes, but allows for the in-memory tree to be properly exported to JSON, which we'll need for multithreaded export. It's also slightly nicer conceptually, as we can now detect inconsistencies without throwing away the actual data, so have a better chance of recovering on partial refresh. Still unlikely, anyway, but whatever. (* but saves 4+ bytes per unique inode in the inode map, so the memory increase is only noticeable when links are repeated in the scanned tree. Admittedly, that may be the common case) --- src/browser.zig | 2 +- src/model.zig | 83 ++++++++++++++++++++++++++----------------------- src/sink.zig | 3 +- 3 files changed, 47 insertions(+), 41 deletions(-) diff --git a/src/browser.zig b/src/browser.zig index 3c87a54..a073138 100644 --- a/src/browser.zig +++ b/src/browser.zig @@ -489,7 +489,7 @@ const info = struct { box.move(row.*, 3); ui.style(.bold); ui.addstr(" Link count: "); - ui.addnum(.default, model.inodes.map.get(l).?.nlink); + ui.addnum(.default, l.pack.nlink); box.move(row.*, 23); ui.style(.bold); ui.addstr(" Inode: "); diff --git a/src/model.zig b/src/model.zig index 957e462..9c37d01 100644 --- a/src/model.zig +++ b/src/model.zig @@ -210,8 +210,18 @@ pub const Link = extern struct { prev: *Link align(1) = undefined, // dev is inherited from the parent Dir ino: u64 align(1) = undefined, + pack: Pack align(1) = .{}, name: [0]u8 = undefined, + const Pack = packed struct(u32) { + // Whether this Inode is counted towards the parent directories. + // Is kept synchronized between all Link nodes with the same dev/ino. + counted: bool = false, + // Number of links for this inode. When set to '0', we don't know the + // actual nlink count; which happens for old JSON dumps. + nlink: u31 = undefined, + }; + // Return value should be freed with main.allocator. pub fn path(self: *const @This(), withRoot: bool) [:0]const u8 { var out = std.ArrayList(u8).init(main.allocator); @@ -222,17 +232,13 @@ pub const Link = extern struct { } // Add this link to the inodes map and mark it as 'uncounted'. - pub fn addLink(l: *@This(), nlink: u31) void { - var d = inodes.map.getOrPut(l) catch unreachable; + pub fn addLink(l: *@This()) void { + const d = inodes.map.getOrPut(l) catch unreachable; if (!d.found_existing) { - d.value_ptr.* = .{ .counted = false, .nlink = nlink }; l.next = l; l.prev = l; } else { - inodes.setStats(.{ .key_ptr = d.key_ptr, .value_ptr = d.value_ptr }, false); - // If the nlink counts are not consistent, reset to 0 so we calculate with what we have instead. - if (d.value_ptr.nlink != nlink) - d.value_ptr.nlink = 0; + inodes.setStats(l, false); l.next = d.key_ptr.*; l.prev = d.key_ptr.*.prev; l.next.prev = l; @@ -243,8 +249,8 @@ pub const Link = extern struct { // Remove this link from the inodes map and remove its stats from parent directories. fn removeLink(l: *@This()) void { + inodes.setStats(l, false); const entry = inodes.map.getEntry(l) orelse return; - inodes.setStats(entry, false); if (l.next == l) { _ = inodes.map.remove(l); _ = inodes.uncounted.remove(l); @@ -317,7 +323,7 @@ pub const inodes = struct { // node in the list. Link entries with the same dev/ino are part of a // circular linked list, so you can iterate through all of them with this // single pointer. - const Map = std.HashMap(*Link, Inode, HashContext, 80); + const Map = std.HashMap(*Link, void, HashContext, 80); pub var map = Map.init(main.allocator); // List of nodes in 'map' with !counted, to speed up addAllStats(). @@ -329,17 +335,6 @@ pub const inodes = struct { pub var lock = std.Thread.Mutex{}; - const Inode = packed struct { - // Whether this Inode is counted towards the parent directories. - counted: bool, - // Number of links for this inode. When set to '0', we don't know the - // actual nlink count, either because it wasn't part of the imported - // JSON data or because we read inconsistent values from the - // filesystem. The count will then be updated by the actual number of - // links in our in-memory tree. - nlink: u31, - }; - const HashContext = struct { pub fn hash(_: @This(), l: *Link) u64 { var h = std.hash.Wyhash.init(0); @@ -366,16 +361,18 @@ pub const inodes = struct { // the list of *Links and their sizes and counts must be in the exact same // state as when the stats were added. Hence, any modification to the Link // state should be preceded by a setStats(.., false). - fn setStats(entry: Map.Entry, add: bool) void { - if (entry.value_ptr.counted == add) return; - entry.value_ptr.counted = add; + fn setStats(l: *Link, add: bool) void { + if (l.pack.counted == add) return; var nlink: u31 = 0; + var inconsistent = false; var dirs = std.AutoHashMap(*Dir, u32).init(main.allocator); defer dirs.deinit(); - var it = entry.key_ptr.*; + var it = l; while (true) { + it.pack.counted = add; nlink += 1; + if (it.pack.nlink != l.pack.nlink) inconsistent = true; var parent: ?*Dir = it.parent; while (parent) |p| : (parent = p.parent) { const de = dirs.getOrPut(p) catch unreachable; @@ -383,30 +380,38 @@ pub const inodes = struct { else de.value_ptr.* = 1; } it = it.next; - if (it == entry.key_ptr.*) + if (it == l) break; } - if (entry.value_ptr.nlink < nlink) entry.value_ptr.nlink = nlink - else nlink = entry.value_ptr.nlink; + // There's not many sensible things we can do when we encounter + // inconsistent nlink counts. Current approach is to use the number of + // times we've seen this link in our tree as fallback for when the + // nlink counts aren't matching. May want to add a warning of some + // sorts to the UI at some point. + if (!inconsistent and l.pack.nlink >= nlink) nlink = l.pack.nlink; + + // XXX: We're also not testing for inconsistent entry sizes, instead + // using the given 'l' size for all Links. Might warrant a warning as + // well. var dir_iter = dirs.iterator(); if (add) { while (dir_iter.next()) |de| { - de.key_ptr.*.entry.pack.blocks +|= entry.key_ptr.*.entry.pack.blocks; - de.key_ptr.*.entry.size +|= entry.key_ptr.*.entry.size; + de.key_ptr.*.entry.pack.blocks +|= l.entry.pack.blocks; + de.key_ptr.*.entry.size +|= l.entry.size; if (de.value_ptr.* < nlink) { - de.key_ptr.*.shared_blocks +|= entry.key_ptr.*.entry.pack.blocks; - de.key_ptr.*.shared_size +|= entry.key_ptr.*.entry.size; + de.key_ptr.*.shared_blocks +|= l.entry.pack.blocks; + de.key_ptr.*.shared_size +|= l.entry.size; } } } else { while (dir_iter.next()) |de| { - de.key_ptr.*.entry.pack.blocks -|= entry.key_ptr.*.entry.pack.blocks; - de.key_ptr.*.entry.size -|= entry.key_ptr.*.entry.size; + de.key_ptr.*.entry.pack.blocks -|= l.entry.pack.blocks; + de.key_ptr.*.entry.size -|= l.entry.size; if (de.value_ptr.* < nlink) { - de.key_ptr.*.shared_blocks -|= entry.key_ptr.*.entry.pack.blocks; - de.key_ptr.*.shared_size -|= entry.key_ptr.*.entry.size; + de.key_ptr.*.shared_blocks -|= l.entry.pack.blocks; + de.key_ptr.*.shared_size -|= l.entry.size; } } } @@ -414,11 +419,11 @@ pub const inodes = struct { pub fn addAllStats() void { if (uncounted_full) { - var it = map.iterator(); - while (it.next()) |e| setStats(e, true); + var it = map.keyIterator(); + while (it.next()) |e| setStats(e.*, true); } else { - var it = uncounted.iterator(); - while (it.next()) |u| if (map.getEntry(u.key_ptr.*)) |e| setStats(e, true); + var it = uncounted.keyIterator(); + while (it.next()) |u| if (map.getKey(u.*)) |e| setStats(e, true); } uncounted_full = false; if (uncounted.count() > 0) diff --git a/src/sink.zig b/src/sink.zig index 6d746de..f0ff844 100644 --- a/src/sink.zig +++ b/src/sink.zig @@ -290,9 +290,10 @@ const MemDir = struct { if (e.link()) |l| { l.parent = self.dir; l.ino = stat.ino; + l.pack.nlink = stat.nlink; model.inodes.lock.lock(); defer model.inodes.lock.unlock(); - l.addLink(stat.nlink); + l.addLink(); } if (e.ext()) |ext| ext.* = stat.ext; return e;