diff --git a/src/model.zig b/src/model.zig index deef87d..e99c8c6 100644 --- a/src/model.zig +++ b/src/model.zig @@ -101,12 +101,7 @@ pub const Entry = packed struct { } } - // Insert this entry into the tree at the given directory, updating parent sizes and item counts. - pub fn insert(self: *Entry, parents: *const Parents) void { - self.next = parents.top().sub; - parents.top().sub = self; - if (self.dir()) |d| std.debug.assert(d.sub == null); - + fn addStats(self: *Entry, parents: *const Parents) void { const dev = parents.top().dev; // Set if this is the first time we've found this hardlink in the bottom-most directory of the given dev. // Means we should count it for other-dev parent dirs, too. @@ -149,6 +144,19 @@ pub const Entry = packed struct { } } } + + // Insert this entry into the tree at the given directory, updating parent sizes and item counts. + pub fn insert(self: *Entry, parents: *const Parents) void { + self.next = parents.top().sub; + parents.top().sub = self; + if (self.dir()) |d| std.debug.assert(d.sub == null); + + // Links with nlink == 0 are counted after we're done scanning. + if (if (self.link()) |l| l.nlink == 0 else false) + link_count.add(parents.top().dev, self.link().?.ino) + else + self.addStats(parents); + } }; const DevId = u30; // Can be reduced to make room for more flags in Dir. @@ -181,8 +189,12 @@ pub const Dir = packed struct { // File that's been hardlinked (i.e. nlink > 1) pub const Link = packed struct { entry: Entry, - ino: u64, // dev is inherited from the parent Dir + ino: u64, + // Special value '0' means: "This link hasn't been counted in the parent + // sizes yet because we only know that it's a hard link but not how many + // links it has". These are added to the tree structure first and are + // counted after the scan is complete (see link_count below). nlink: u32, name: u8, }; @@ -311,6 +323,67 @@ pub const devices = struct { } }; +// Special hash table for counting hard links with nlink=0. +pub const link_count = struct { + var nodes = std.HashMap(Node, void, Node.hash, Node.eql, 80).init(main.allocator); + + const Node = struct { + ino: u64, + dev: u32, // DevId, but 32-bits for easier hashing + count: u32, + + const Self = @This(); + + fn hash(self: Self) u64 { + var h = std.hash.Wyhash.init(0); + h.update(std.mem.asBytes(&self.dev)); + h.update(std.mem.asBytes(&self.ino)); + return h.final(); + } + + fn eql(a: Self, b: Self) bool { + return a.ino == b.ino and a.dev == b.dev; + } + }; + + pub fn add(dev: DevId, ino: u64) void { + const n = Node{ .dev = dev, .ino = ino, .count = 1 }; + var d = nodes.getOrPut(n) catch unreachable; + if (d.found_existing) d.entry.key.count += 1; + } + + var final_dir: Parents = undefined; + + fn final_rec() void { + var it = final_dir.top().sub; + while (it) |e| : (it = e.next) { + if (e.dir()) |d| { + final_dir.push(d); + final_rec(); + final_dir.pop(); + continue; + } + const l = e.link() orelse continue; + if (l.nlink > 0) continue; + const s = Node{ .dev = final_dir.top().dev, .ino = l.ino, .count = 0 }; + if (nodes.getEntry(s)) |n| { + l.nlink = n.key.count; + e.addStats(&final_dir); + } + } + } + + // Called when all files have been added, will traverse the directory to + // find all links, update their nlink count and parent sizes. + pub fn final() void { + if (nodes.count() == 0) return; + final_dir = Parents{}; + final_rec(); + nodes.clearAndFree(); + final_dir.deinit(); + } +}; + pub var root: *Dir = undefined; // Stack of parent directories, convenient helper when constructing and traversing the tree. diff --git a/src/scan.zig b/src/scan.zig index a4341b3..c159330 100644 --- a/src/scan.zig +++ b/src/scan.zig @@ -151,6 +151,7 @@ const Context = struct { } fn final(self: *Self) void { + if (self.parents) |_| model.link_count.final(); if (self.wr) |wr| { wr.writer().writeByte(']') catch |e| writeErr(e); wr.flush() catch |e| writeErr(e); @@ -261,8 +262,6 @@ const Context = struct { e.size = self.stat.size; if (e.dir()) |d| d.dev = model.devices.getId(self.stat.dev); if (e.file()) |f| f.notreg = !self.stat.dir and !self.stat.reg; - // TODO: Handle the scenario where we don't know the hard link count - // (i.e. on imports from old ncdu versions that don't have the "nlink" field) if (e.link()) |l| { l.ino = self.stat.ino; l.nlink = self.stat.nlink; @@ -342,8 +341,8 @@ fn scanDir(ctx: *Context, dir: std.fs.Dir, dir_dev: u64) void { ctx.stat = nstat; // Symlink targets may reside on different filesystems, // this will break hardlink detection and counting so let's disable it. - if (ctx.stat.nlink > 1 and ctx.stat.dev != dir_dev) - ctx.stat.nlink = 1; + if (ctx.stat.hlinkc and ctx.stat.dev != dir_dev) + ctx.stat.hlinkc = false; } } else |_| {} } @@ -431,6 +430,7 @@ const Import = struct { self.ch = self.rd.reader().readByte() catch |e| switch (e) { error.EndOfStream => 0, error.InputOutput => self.die("I/O error"), + error.IsDir => self.die("not a file"), // should be detected at open() time, but no flag for that... // TODO: This one can be retried error.SystemResources => self.die("out of memory"), else => unreachable, @@ -628,8 +628,8 @@ const Import = struct { } }, 'h' => { - if (eq(u8, key, "hlinkc")) { - self.ctx.stat.hlinkc = true; + if (eq(u8, key, "hlnkc")) { + self.ctx.stat.hlinkc = self.boolean(); return; } },