mirror of
https://code.blicky.net/yorhel/ncdu.git
synced 2026-01-12 17:08:39 -09:00
Experimental new export format
The goals of this format being: - Streaming parallel export with minimal mandatory buffering. - Exported data includes cumulative directory stats, so reader doesn't have to go through the entire tree to calculate these. - Fast-ish directory listings without reading the entire file. - Built-in compression. Current implementation is missing compression, hardlink counting and actually reading the file. Also need to tune and measure stuff.
This commit is contained in:
parent
87d336baeb
commit
f25bc5cbf4
7 changed files with 627 additions and 19 deletions
240
ncdubinexp.pl
Executable file
240
ncdubinexp.pl
Executable file
|
|
@ -0,0 +1,240 @@
|
|||
#!/usr/bin/perl
|
||||
# SPDX-FileCopyrightText: Yorhel <projects@yorhel.nl>
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
|
||||
# Usage: ncdubinexp.pl [options] <export.ncdu
|
||||
# Or: ncdu -O- | ncdubinexp.pl [options]
|
||||
#
|
||||
# Reads and validates a binary ncdu export file and optionally prints out
|
||||
# various diagnostic data and statistics.
|
||||
#
|
||||
# Options:
|
||||
# blocks - print a listing of all blocks as they are read
|
||||
# items - print a listing of all items as they are read
|
||||
# dirs - print out dir listing stats
|
||||
# stats - print some overview stats
|
||||
#
|
||||
# This script is highly inefficient in both RAM and CPU, not suitable for large
|
||||
# exports.
|
||||
# This script does not permit unknown blocks or item keys, although that is
|
||||
# technically valid.
|
||||
|
||||
|
||||
use v5.36;
|
||||
use autodie;
|
||||
use bytes;
|
||||
no warnings 'portable';
|
||||
use List::Util 'min', 'max';
|
||||
use CBOR::XS; # Does not officially support recent perl versions, but it's the only CPAN module that supports streaming.
|
||||
|
||||
my $printblocks = grep $_ eq 'blocks', @ARGV;
|
||||
my $printitems = grep $_ eq 'items', @ARGV;
|
||||
my $printdirs = grep $_ eq 'dirs', @ARGV;
|
||||
my $printstats = grep $_ eq 'stats', @ARGV;
|
||||
|
||||
my %datablocks;
|
||||
my %items;
|
||||
my $root_itemref;
|
||||
my $datablock_len = 0;
|
||||
my $rawdata_len = 0;
|
||||
my $minitemsperblock = 1e10;
|
||||
my $maxitemsperblock = 0;
|
||||
|
||||
{
|
||||
die "Input too short\n" if 8 != read STDIN, my $sig, 8;
|
||||
die "Invalid file signature\n" if $sig ne "\xbfncduEX1";
|
||||
}
|
||||
|
||||
my @itemkeys = qw/
|
||||
type
|
||||
name
|
||||
prev
|
||||
asize
|
||||
dsize
|
||||
dev
|
||||
rderr
|
||||
cumasize
|
||||
cumdsize
|
||||
shrasize
|
||||
shrdsize
|
||||
items
|
||||
sub
|
||||
ino
|
||||
nlink
|
||||
prevlnk
|
||||
uid
|
||||
gid
|
||||
mode
|
||||
mtime
|
||||
/;
|
||||
|
||||
|
||||
sub datablock($prefix, $off, $blklen, $content) {
|
||||
die "$prefix: Data block too small\n" if length $content < 8;
|
||||
|
||||
my($num, $rawlen) = unpack 'NN', $content;
|
||||
die sprintf "%s: Duplicate block id %d (first at %010x)", $prefix, $num, $datablocks{$num}>>24 if $datablocks{$num};
|
||||
$datablocks{$num} = ($off << 24) | $blklen;
|
||||
|
||||
$printblocks && printf "%s: data block %d rawlen %d (%.2f)\n", $prefix, $num, $rawlen, $rawlen/(length($content)-8)*100;
|
||||
|
||||
$datablock_len += length($content)-8;
|
||||
$rawdata_len += $rawlen;
|
||||
|
||||
# TODO: Decompress
|
||||
cbordata($num, substr $content, 8);
|
||||
}
|
||||
|
||||
|
||||
sub fmtitem($val) {
|
||||
join ' ', map "$_:$val->{$_}", grep exists $val->{$_}, @itemkeys;
|
||||
}
|
||||
|
||||
|
||||
sub cbordata($blknum, $data) {
|
||||
my $cbor = CBOR::XS->new_safe;
|
||||
my $off = 0;
|
||||
my $nitems = 0;
|
||||
while ($off < length $data) { # This substr madness is prolly quite slow
|
||||
my($val, $len) = $cbor->decode_prefix(substr $data, $off);
|
||||
my $itemref = ($blknum << 24) | $off;
|
||||
$off += $len;
|
||||
next if !defined $val;
|
||||
$nitems++;
|
||||
|
||||
# Basic validation of the CBOR data. Doesn't validate that every value
|
||||
# has the correct CBOR type or that integers are within range.
|
||||
$val = { _itemref => $itemref, map {
|
||||
die sprintf "#%010x: Invalid CBOR key '%s'\n", $itemref, $_ if !/^[0-9]+$/ || !$itemkeys[$_];
|
||||
my($k, $v) = ($itemkeys[$_], $val->{$_});
|
||||
die sprintf "#%010x: Invalid value for key '%s': '%s'\n", $itemref, $k, $v
|
||||
if ref $v eq 'ARRAY' || ref $v eq 'HASH' || !defined $v || !(
|
||||
$k eq 'type' ? ($v =~ /^(-[1-4]|[0-3])$/) :
|
||||
$k eq 'prev' || $k eq 'sub' || $k eq 'prevlnk' ? 1 : # itemrefs are validated separately
|
||||
$k eq 'name' ? length $v :
|
||||
$k eq 'rderr' ? Types::Serialiser::is_bool($v) :
|
||||
/^[0-9]+$/
|
||||
);
|
||||
($k,$v)
|
||||
} keys %$val };
|
||||
|
||||
$printitems && printf "#%010x: %s\n", $itemref, fmtitem $val;
|
||||
$items{$itemref} = $val;
|
||||
}
|
||||
$minitemsperblock = $nitems if $minitemsperblock > $nitems;
|
||||
$maxitemsperblock = $nitems if $maxitemsperblock < $nitems;
|
||||
}
|
||||
|
||||
|
||||
sub indexblock($prefix, $content) {
|
||||
$printblocks && print "$prefix: index block\n";
|
||||
|
||||
my $maxnum = max keys %datablocks;
|
||||
die "$prefix: index block size incorrect for $maxnum+1 data blocks\n" if length($content) != 8*($maxnum+1) + 8;
|
||||
|
||||
my @ints = unpack 'Q>*', $content;
|
||||
$root_itemref = pop @ints;
|
||||
|
||||
for my $i (0..$#ints-1) {
|
||||
if (!$datablocks{$i}) {
|
||||
die "$prefix: index entry for missing block (#$i) must be 0\n" if $ints[$i] != 0;
|
||||
} else {
|
||||
die sprintf "%s: invalid index entry for block #%d (got %016x expected %016x)\n",
|
||||
$prefix, $i, $ints[$i], $datablocks{$i}
|
||||
if $ints[$i] != $datablocks{$i};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
while (1) {
|
||||
my $off = tell STDIN;
|
||||
my $prefix = sprintf '%010x', $off;
|
||||
die "$prefix Input too short, expected block header\n" if 4 != read STDIN, my $blkhead, 4;
|
||||
$blkhead = unpack 'N', $blkhead;
|
||||
my $blkid = $blkhead >> 24;
|
||||
my $blklen = $blkhead & 0xffffff;
|
||||
|
||||
$prefix .= "[$blklen]";
|
||||
die "$prefix: Short read on block content\n" if $blklen - 8 != read STDIN, my $content, $blklen - 8;
|
||||
die "$prefix: Input too short, expected block footer\n" if 4 != read STDIN, my $blkfoot, 4;
|
||||
die "$prefix: Block footer does not match header\n" if $blkhead != unpack 'N', $blkfoot;
|
||||
|
||||
if ($blkid == 1) {
|
||||
datablock($prefix, $off, $blklen, $content);
|
||||
} elsif ($blkid == 2) {
|
||||
indexblock($prefix, $content);
|
||||
last;
|
||||
} else {
|
||||
die "$prefix Unknown block id $blkid\n";
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
die sprintf "0x%08x: Data after index block\n", tell(STDIN) if 0 != read STDIN, my $x, 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
# Each item must be referenced exactly once from either a 'prev' or 'sub' key,
|
||||
# $nodup verifies the "at most once" part.
|
||||
sub resolve($cur, $key, $nodup) {
|
||||
my $ref = exists $cur->{$key} ? $cur->{$key} : return;
|
||||
my $item = $ref < 0
|
||||
? ($items{ $cur->{_itemref} + $ref } || die sprintf "#%010x: Invalid relative itemref %s: %d\n", $cur->{_itemref}, $key, $ref)
|
||||
: ($items{$ref} || die sprintf "#%010x: Invalid reference %s to #%010x\n", $cur->{_itemref}, $key, $ref);
|
||||
die sprintf "Item #%010x referenced more than once, from #%010x and #%010x\n", $item->{_itemref}, $item->{_lastseen}, $cur->{_itemref}
|
||||
if $nodup && defined $item->{_lastseen};
|
||||
$item->{_lastseen} = $cur->{_itemref} if $nodup;
|
||||
return $item;
|
||||
}
|
||||
|
||||
my @dirblocks; # [ path, nitems, nblocks ]
|
||||
my %dirblocks; # nblocks => ndirs
|
||||
|
||||
sub traverse($parent, $path) {
|
||||
my $sub = resolve($parent, 'sub', 1);
|
||||
my %blocks;
|
||||
my $items = 0;
|
||||
while ($sub) {
|
||||
$items++;
|
||||
$blocks{ $sub->{_itemref} >> 24 }++;
|
||||
traverse($sub, "$path/$sub->{name}") if $sub->{type} == 0;
|
||||
$sub = resolve($sub, 'prev', 1);
|
||||
}
|
||||
push @dirblocks, [ $path, $items, scalar keys %blocks ] if scalar keys %blocks > 1;
|
||||
$dirblocks{ keys %blocks }++ if $items > 0;
|
||||
$items && $printdirs && printf "#%010x: %d items in %d blocks (%d .. %d) %s\n",
|
||||
$parent->{_itemref}, $items, scalar keys %blocks,
|
||||
min(values %blocks), max(values %blocks), $path;
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
my $root = $items{$root_itemref} || die sprintf "Invalid root itemref: %010x\n", $root_itemref;
|
||||
$root->{_lastseen} = 0xffffffffff;
|
||||
traverse($root, $root->{name});
|
||||
|
||||
my($noref) = grep !$_->{_lastseen}, values %items;
|
||||
die sprintf "No reference found to #%010x\n", $noref->{_itemref} if $noref;
|
||||
|
||||
# TODO: Verify 'link' references
|
||||
}
|
||||
|
||||
if ($printstats) {
|
||||
my $nblocks = keys %datablocks;
|
||||
my $nitems = keys %items;
|
||||
printf " Total items: %d\n", $nitems;
|
||||
printf " Total blocks: %d\n", $nblocks;
|
||||
printf " Items per block: %.1f (%d .. %d)\n", $nitems / $nblocks, $minitemsperblock, $maxitemsperblock;
|
||||
printf " Avg block size: %d compressed, %d raw (%.1f)\n", $datablock_len/$nblocks, $rawdata_len/$nblocks, $rawdata_len/$datablock_len*100;
|
||||
printf " Avg item size: %.1f compressed, %.1f raw\n", $datablock_len/$nitems, $rawdata_len/$nitems;
|
||||
|
||||
@dirblocks = sort { $b->[2] <=> $a->[2] } @dirblocks;
|
||||
print "\nBlocks per directory listing histogram\n";
|
||||
printf " %5d %6d\n", $_, $dirblocks{$_} for sort { $a <=> $b } keys %dirblocks;
|
||||
print "\nMost blocks per directory listing\n";
|
||||
print " items blks path\n";
|
||||
printf "%10d %4d %s\n", @{$dirblocks[$_]}[1,2,0] for (0..min 9, $#dirblocks);
|
||||
}
|
||||
340
src/bin_export.zig
Normal file
340
src/bin_export.zig
Normal file
|
|
@ -0,0 +1,340 @@
|
|||
// SPDX-FileCopyrightText: Yorhel <projects@yorhel.nl>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
const std = @import("std");
|
||||
const main = @import("main.zig");
|
||||
const sink = @import("sink.zig");
|
||||
const util = @import("util.zig");
|
||||
const ui = @import("ui.zig");
|
||||
|
||||
pub const global = struct {
|
||||
var fd: std.fs.File = undefined;
|
||||
var index = std.ArrayList(u8).init(main.allocator);
|
||||
var file_off: u64 = 0;
|
||||
var lock: std.Thread.Mutex = .{};
|
||||
var root_itemref: u64 = 0;
|
||||
// TODO:
|
||||
// var links: Map dev -> ino -> (last_offset, size, blocks, nlink)
|
||||
};
|
||||
|
||||
const BLOCK_SIZE: usize = 64*1024;
|
||||
|
||||
const ItemType = enum(i3) {
|
||||
dir = 0,
|
||||
reg = 1,
|
||||
nonreg = 2,
|
||||
link = 3,
|
||||
err = -1,
|
||||
pattern = -2,
|
||||
otherfs = -3,
|
||||
kernfs = -4
|
||||
};
|
||||
|
||||
const ItemKey = enum(u5) {
|
||||
// all items
|
||||
type = 0, // ItemType
|
||||
name = 1, // bytes
|
||||
prev = 2, // itemref
|
||||
// Only for non-specials
|
||||
asize = 3, // u64
|
||||
dsize = 4, // u64
|
||||
// Only for .dir
|
||||
dev = 5, // u64 only if different from parent dir
|
||||
rderr = 6, // bool true = error reading directory list, false = error in sub-item, absent = no error
|
||||
cumasize = 7, // u64
|
||||
cumdsize = 8, // u64
|
||||
shrasize = 9, // u64
|
||||
shrdsize = 10, // u64
|
||||
items = 11, // u32
|
||||
sub = 12, // itemref only if dir is not empty
|
||||
// Only for .link
|
||||
ino = 13, // u64
|
||||
nlink = 14, // u32
|
||||
prevlnk = 15, // itemref
|
||||
// Extended mode
|
||||
uid = 16, // u32
|
||||
gid = 17, // u32
|
||||
mode = 18, // u16
|
||||
mtime = 19, // u64
|
||||
};
|
||||
|
||||
// Pessimistic upper bound on the encoded size of an item, excluding the name field.
|
||||
// 2 bytes for map start/end, 10 per field (2 for the key, 9 for a full u64).
|
||||
const MAX_ITEM_LEN = 2 + 11 * @typeInfo(ItemKey).Enum.fields.len;
|
||||
|
||||
const CborMajor = enum(u3) { pos, neg, bytes, text, array, map, tag, simple };
|
||||
|
||||
inline fn bigu16(v: u16) [2]u8 { return @bitCast(std.mem.nativeToBig(u16, v)); }
|
||||
inline fn bigu32(v: u32) [4]u8 { return @bitCast(std.mem.nativeToBig(u32, v)); }
|
||||
inline fn bigu64(v: u64) [8]u8 { return @bitCast(std.mem.nativeToBig(u64, v)); }
|
||||
|
||||
inline fn blockHeader(id: u8, len: u32) [4]u8 { return bigu32((@as(u32, id) << 24) | len); }
|
||||
|
||||
inline fn cborByte(major: CborMajor, arg: u5) u8 { return (@as(u8, @intFromEnum(major)) << 5) | arg; }
|
||||
|
||||
|
||||
pub const Thread = struct {
|
||||
buf: [BLOCK_SIZE]u8 = undefined,
|
||||
off: usize = BLOCK_SIZE,
|
||||
block_num: u32 = std.math.maxInt(u32),
|
||||
itemref: u64 = 0, // ref of item currently being written
|
||||
|
||||
// Temporary buffer for headers and compression
|
||||
tmp: [BLOCK_SIZE+128]u8 = undefined,
|
||||
|
||||
fn createBlock(t: *Thread) []const u8 {
|
||||
if (t.block_num == std.math.maxInt(u32) or t.off <= 1) return "";
|
||||
|
||||
// TODO: Compression
|
||||
const blocklen: u32 = @intCast(t.off + 16);
|
||||
t.tmp[0..4].* = blockHeader(1, blocklen);
|
||||
t.tmp[4..8].* = bigu32(t.block_num);
|
||||
t.tmp[8..12].* = bigu32(@intCast(t.off));
|
||||
@memcpy(t.tmp[12..][0..t.off], t.buf[0..t.off]);
|
||||
t.tmp[12+t.off..][0..4].* = blockHeader(1, blocklen);
|
||||
return t.tmp[0..blocklen];
|
||||
}
|
||||
|
||||
fn flush(t: *Thread, expected_len: usize) void {
|
||||
@setCold(true);
|
||||
const block = createBlock(t);
|
||||
|
||||
global.lock.lock();
|
||||
defer global.lock.unlock();
|
||||
// This can only really happen when the root path exceeds BLOCK_SIZE,
|
||||
// in which case we would probably have error'ed out earlier anyway.
|
||||
if (expected_len > t.buf.len) ui.die("Error writing data: path too long.\n", .{});
|
||||
|
||||
if (block.len > 0) {
|
||||
global.index.items[4..][t.block_num*8..][0..8].* = bigu64((global.file_off << 24) + block.len);
|
||||
global.file_off += block.len;
|
||||
global.fd.writeAll(block) catch |e|
|
||||
ui.die("Error writing to file: {s}.\n", .{ ui.errorString(e) });
|
||||
}
|
||||
|
||||
t.off = 0;
|
||||
t.block_num = @intCast((global.index.items.len - 4) / 8);
|
||||
global.index.appendSlice(&[1]u8{0}**8) catch unreachable;
|
||||
// Start the first block with a CBOR 'null', so that itemrefs can never be 0.
|
||||
if (t.block_num == 0) t.cborHead(.simple, 22);
|
||||
}
|
||||
|
||||
fn cborHead(t: *Thread, major: CborMajor, arg: u64) void {
|
||||
if (arg <= 23) {
|
||||
t.buf[t.off] = cborByte(major, @intCast(arg));
|
||||
t.off += 1;
|
||||
} else if (arg <= std.math.maxInt(u8)) {
|
||||
t.buf[t.off] = cborByte(major, 24);
|
||||
t.buf[t.off+1] = @truncate(arg);
|
||||
t.off += 2;
|
||||
} else if (arg <= std.math.maxInt(u16)) {
|
||||
t.buf[t.off] = cborByte(major, 25);
|
||||
t.buf[t.off+1..][0..2].* = bigu16(@intCast(arg));
|
||||
t.off += 3;
|
||||
} else if (arg <= std.math.maxInt(u32)) {
|
||||
t.buf[t.off] = cborByte(major, 26);
|
||||
t.buf[t.off+1..][0..4].* = bigu32(@intCast(arg));
|
||||
t.off += 5;
|
||||
} else {
|
||||
t.buf[t.off] = cborByte(major, 27);
|
||||
t.buf[t.off+1..][0..8].* = bigu64(arg);
|
||||
t.off += 9;
|
||||
}
|
||||
}
|
||||
|
||||
fn cborIndef(t: *Thread, major: CborMajor) void {
|
||||
t.buf[t.off] = cborByte(major, 31);
|
||||
t.off += 1;
|
||||
}
|
||||
|
||||
fn itemKey(t: *Thread, key: ItemKey) void {
|
||||
t.cborHead(.pos, @intFromEnum(key));
|
||||
}
|
||||
|
||||
fn itemRef(t: *Thread, key: ItemKey, ref: u64) void {
|
||||
if (ref == 0) return;
|
||||
t.itemKey(key);
|
||||
// Full references compress like shit and most of the references point
|
||||
// into the same block, so optimize that case by using a negative
|
||||
// offset instead.
|
||||
if ((ref >> 24) == t.block_num) t.cborHead(.neg, t.itemref - ref - 1)
|
||||
else t.cborHead(.pos, ref);
|
||||
}
|
||||
|
||||
// Reserve space for a new item, write out the type, prev and name fields and return the itemref.
|
||||
fn itemStart(t: *Thread, itype: ItemType, prev_item: u64, name: []const u8) u64 {
|
||||
const min_len = name.len + MAX_ITEM_LEN;
|
||||
if (t.off + min_len > t.buf.len) t.flush(min_len);
|
||||
|
||||
t.itemref = (@as(u64, t.block_num) << 24) | t.off;
|
||||
t.cborIndef(.map);
|
||||
t.itemKey(.type);
|
||||
if (@intFromEnum(itype) >= 0) t.cborHead(.pos, @intCast(@intFromEnum(itype)))
|
||||
else t.cborHead(.neg, @intCast(-1 - @intFromEnum(itype)));
|
||||
t.itemKey(.name);
|
||||
t.cborHead(.bytes, name.len);
|
||||
@memcpy(t.buf[t.off..][0..name.len], name);
|
||||
t.off += name.len;
|
||||
t.itemRef(.prev, prev_item);
|
||||
return t.itemref;
|
||||
}
|
||||
|
||||
fn itemExt(t: *Thread, stat: *const sink.Stat) void {
|
||||
if (!main.config.extended) return;
|
||||
t.itemKey(.uid);
|
||||
t.cborHead(.pos, stat.ext.uid);
|
||||
t.itemKey(.gid);
|
||||
t.cborHead(.pos, stat.ext.gid);
|
||||
t.itemKey(.mode);
|
||||
t.cborHead(.pos, stat.ext.mode);
|
||||
t.itemKey(.mtime);
|
||||
t.cborHead(.pos, stat.ext.mtime);
|
||||
}
|
||||
|
||||
fn itemEnd(t: *Thread) void {
|
||||
t.cborIndef(.simple);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
pub const Dir = struct {
|
||||
// TODO: When items are written out into blocks depth-first, parent dirs
|
||||
// will end up getting their items distributed over many blocks, which will
|
||||
// significantly slow down reading that dir's listing. It may be worth
|
||||
// buffering some items at the Dir level before flushing them out to the
|
||||
// Thread buffer.
|
||||
|
||||
// The lock protects all of the below, and is necessary because final()
|
||||
// accesses the parent dir and may be called from other threads.
|
||||
// I'm not expecting much lock contention, but it's possible to turn
|
||||
// last_item into an atomic integer and other fields could be split up for
|
||||
// subdir use.
|
||||
lock: std.Thread.Mutex = .{},
|
||||
last_sub: u64 = 0,
|
||||
stat: sink.Stat,
|
||||
items: u32 = 0,
|
||||
size: u64 = 0,
|
||||
blocks: u64 = 0,
|
||||
err: bool = false,
|
||||
suberr: bool = false,
|
||||
// TODO: set of links
|
||||
//shared_size: u64,
|
||||
//shared_blocks: u64,
|
||||
|
||||
pub fn addSpecial(d: *Dir, t: *Thread, name: []const u8, sp: sink.Special) void {
|
||||
d.lock.lock();
|
||||
defer d.lock.unlock();
|
||||
d.items += 1;
|
||||
if (sp == .err) d.suberr = true;
|
||||
const it: ItemType = switch (sp) {
|
||||
.err => .err,
|
||||
.other_fs => .otherfs,
|
||||
.kernfs => .kernfs,
|
||||
.excluded => .pattern,
|
||||
};
|
||||
d.last_sub = t.itemStart(it, d.last_sub, name);
|
||||
t.itemEnd();
|
||||
}
|
||||
|
||||
pub fn addStat(d: *Dir, t: *Thread, name: []const u8, stat: *const sink.Stat) void {
|
||||
d.lock.lock();
|
||||
defer d.lock.unlock();
|
||||
d.items += 1;
|
||||
if (!stat.hlinkc) {
|
||||
d.size +|= stat.size;
|
||||
d.blocks +|= stat.blocks;
|
||||
}
|
||||
const it: ItemType = if (stat.hlinkc) .link else if (stat.reg) .reg else .nonreg;
|
||||
d.last_sub = t.itemStart(it, d.last_sub, name);
|
||||
t.itemKey(.asize);
|
||||
t.cborHead(.pos, stat.size);
|
||||
t.itemKey(.dsize);
|
||||
t.cborHead(.pos, util.blocksToSize(stat.blocks));
|
||||
// TODO: hardlink stuff
|
||||
t.itemExt(stat);
|
||||
t.itemEnd();
|
||||
}
|
||||
|
||||
pub fn addDir(d: *Dir, stat: *const sink.Stat) Dir {
|
||||
d.lock.lock();
|
||||
defer d.lock.unlock();
|
||||
d.items += 1;
|
||||
d.size +|= stat.size;
|
||||
d.blocks +|= stat.blocks;
|
||||
return .{ .stat = stat.* };
|
||||
}
|
||||
|
||||
pub fn setReadError(d: *Dir) void {
|
||||
d.lock.lock();
|
||||
defer d.lock.unlock();
|
||||
d.err = true;
|
||||
}
|
||||
|
||||
pub fn final(d: *Dir, t: *Thread, name: []const u8, parent: ?*Dir) void {
|
||||
if (parent) |p| p.lock.lock();
|
||||
defer if (parent) |p| p.lock.unlock();
|
||||
|
||||
// TODO: hardlink stuff
|
||||
if (parent) |p| {
|
||||
p.items += d.items;
|
||||
p.size +|= d.size;
|
||||
p.blocks +|= d.blocks;
|
||||
if (d.suberr or d.err) p.suberr = true;
|
||||
|
||||
p.last_sub = t.itemStart(.dir, p.last_sub, name);
|
||||
} else
|
||||
global.root_itemref = t.itemStart(.dir, 0, name);
|
||||
|
||||
t.itemKey(.asize);
|
||||
t.cborHead(.pos, d.stat.size);
|
||||
t.itemKey(.dsize);
|
||||
t.cborHead(.pos, util.blocksToSize(d.stat.blocks));
|
||||
if (parent == null or parent.?.stat.dev != d.stat.dev) {
|
||||
t.itemKey(.dev);
|
||||
t.cborHead(.pos, d.stat.dev);
|
||||
}
|
||||
if (d.err or d.suberr) {
|
||||
t.itemKey(.rderr);
|
||||
t.cborHead(.simple, if (d.err) 21 else 20);
|
||||
}
|
||||
t.itemKey(.cumasize);
|
||||
t.cborHead(.pos, d.size);
|
||||
t.itemKey(.cumdsize);
|
||||
t.cborHead(.pos, util.blocksToSize(d.blocks));
|
||||
t.itemKey(.items);
|
||||
t.cborHead(.pos, d.items);
|
||||
t.itemRef(.sub, d.last_sub);
|
||||
t.itemExt(&d.stat);
|
||||
t.itemEnd();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
pub fn createRoot(stat: *const sink.Stat) Dir {
|
||||
return .{ .stat = stat.* };
|
||||
}
|
||||
|
||||
pub fn done(threads: []sink.Thread) void {
|
||||
for (threads) |*t| t.sink.bin.flush(0);
|
||||
|
||||
while (std.mem.endsWith(u8, global.index.items, &[1]u8{0}**8))
|
||||
global.index.shrinkRetainingCapacity(global.index.items.len - 8);
|
||||
global.index.appendSlice(&bigu64(global.root_itemref)) catch unreachable;
|
||||
global.index.appendSlice(&blockHeader(2, @intCast(global.index.items.len + 4))) catch unreachable;
|
||||
global.index.items[0..4].* = blockHeader(2, @intCast(global.index.items.len));
|
||||
global.fd.writeAll(global.index.items) catch |e|
|
||||
ui.die("Error writing to file: {s}.\n", .{ ui.errorString(e) });
|
||||
global.index.clearAndFree();
|
||||
|
||||
global.fd.close();
|
||||
}
|
||||
|
||||
pub fn setupOutput(fd: std.fs.File) void {
|
||||
global.fd = fd;
|
||||
fd.writeAll("\xbfncduEX1") catch |e|
|
||||
ui.die("Error writing to file: {s}.\n", .{ ui.errorString(e) });
|
||||
global.file_off = 8;
|
||||
|
||||
// Placeholder for the index block header.
|
||||
global.index.appendSlice("aaaa") catch unreachable;
|
||||
}
|
||||
|
|
@ -448,7 +448,7 @@ fn item(ctx: *Ctx, parent: ?*sink.Dir, dev: u64) void {
|
|||
if (ctx.special == .err) dir.setReadError(ctx.sink);
|
||||
while (ctx.p.elem(false)) item(ctx, dir, ndev);
|
||||
ctx.sink.setDir(parent);
|
||||
dir.unref();
|
||||
dir.unref(ctx.sink);
|
||||
} else if (ctx.special) |s| {
|
||||
parent.?.addSpecial(ctx.sink, name, s);
|
||||
if (ctx.stat.dir and ctx.p.elem(false)) ctx.p.die("unexpected contents in an excluded directory");
|
||||
|
|
|
|||
28
src/main.zig
28
src/main.zig
|
|
@ -8,6 +8,7 @@ const model = @import("model.zig");
|
|||
const scan = @import("scan.zig");
|
||||
const json_import = @import("json_import.zig");
|
||||
const json_export = @import("json_export.zig");
|
||||
const bin_export = @import("bin_export.zig");
|
||||
const sink = @import("sink.zig");
|
||||
const mem_src = @import("mem_src.zig");
|
||||
const mem_sink = @import("mem_sink.zig");
|
||||
|
|
@ -23,6 +24,7 @@ test "imports" {
|
|||
_ = scan;
|
||||
_ = json_import;
|
||||
_ = json_export;
|
||||
_ = bin_export;
|
||||
_ = sink;
|
||||
_ = mem_src;
|
||||
_ = mem_sink;
|
||||
|
|
@ -475,7 +477,8 @@ pub fn main() void {
|
|||
|
||||
var scan_dir: ?[:0]const u8 = null;
|
||||
var import_file: ?[:0]const u8 = null;
|
||||
var export_file: ?[:0]const u8 = null;
|
||||
var export_json: ?[:0]const u8 = null;
|
||||
var export_bin: ?[:0]const u8 = null;
|
||||
var quit_after_scan = false;
|
||||
{
|
||||
const arglist = std.process.argsAlloc(allocator) catch unreachable;
|
||||
|
|
@ -491,8 +494,10 @@ pub fn main() void {
|
|||
}
|
||||
if (opt.is("-h") or opt.is("-?") or opt.is("--help")) help()
|
||||
else if (opt.is("-v") or opt.is("-V") or opt.is("--version")) version()
|
||||
else if (opt.is("-o") and export_file != null) ui.die("The -o flag can only be given once.\n", .{})
|
||||
else if (opt.is("-o")) export_file = allocator.dupeZ(u8, args.arg()) catch unreachable
|
||||
else if (opt.is("-o") and (export_json != null or export_bin != null)) ui.die("The -o flag can only be given once.\n", .{})
|
||||
else if (opt.is("-o")) export_json = allocator.dupeZ(u8, args.arg()) catch unreachable
|
||||
else if (opt.is("-O") and (export_json != null or export_bin != null)) ui.die("The -O flag can only be given once.\n", .{})
|
||||
else if (opt.is("-O")) export_bin = allocator.dupeZ(u8, args.arg()) catch unreachable
|
||||
else if (opt.is("-f") and import_file != null) ui.die("The -f flag can only be given once.\n", .{})
|
||||
else if (opt.is("-f")) import_file = allocator.dupeZ(u8, args.arg()) catch unreachable
|
||||
else if (opt.is("--ignore-config")) {}
|
||||
|
|
@ -512,25 +517,32 @@ pub fn main() void {
|
|||
const out_tty = stdout.isTty();
|
||||
const in_tty = stdin.isTty();
|
||||
if (config.scan_ui == null) {
|
||||
if (export_file) |f| {
|
||||
if (export_json orelse export_bin) |f| {
|
||||
if (!out_tty or std.mem.eql(u8, f, "-")) config.scan_ui = .none
|
||||
else config.scan_ui = .line;
|
||||
} else config.scan_ui = .full;
|
||||
}
|
||||
if (!in_tty and import_file == null and export_file == null and !quit_after_scan)
|
||||
if (!in_tty and import_file == null and export_json == null and export_bin == null and !quit_after_scan)
|
||||
ui.die("Standard input is not a TTY. Did you mean to import a file using '-f -'?\n", .{});
|
||||
config.nc_tty = !in_tty or (if (export_file) |f| std.mem.eql(u8, f, "-") else false);
|
||||
config.nc_tty = !in_tty or (if (export_json orelse export_bin) |f| std.mem.eql(u8, f, "-") else false);
|
||||
|
||||
event_delay_timer = std.time.Timer.start() catch unreachable;
|
||||
defer ui.deinit();
|
||||
|
||||
if (export_file) |f| {
|
||||
if (export_json) |f| {
|
||||
const file =
|
||||
if (std.mem.eql(u8, f, "-")) stdout
|
||||
else std.fs.cwd().createFileZ(f, .{})
|
||||
catch |e| ui.die("Error opening export file: {s}.\n", .{ui.errorString(e)});
|
||||
json_export.setupOutput(file);
|
||||
sink.global.sink = .json;
|
||||
} else if (export_bin) |f| {
|
||||
const file =
|
||||
if (std.mem.eql(u8, f, "-")) stdout
|
||||
else std.fs.cwd().createFileZ(f, .{})
|
||||
catch |e| ui.die("Error opening export file: {s}.\n", .{ui.errorString(e)});
|
||||
bin_export.setupOutput(file);
|
||||
sink.global.sink = .bin;
|
||||
}
|
||||
|
||||
if (import_file) |f| {
|
||||
|
|
@ -543,7 +555,7 @@ pub fn main() void {
|
|||
else |_| (scan_dir orelse ".");
|
||||
scan.scan(path) catch |e| ui.die("Error opening directory: {s}.\n", .{ui.errorString(e)});
|
||||
}
|
||||
if (quit_after_scan or export_file != null) return;
|
||||
if (quit_after_scan or export_json != null or export_bin != null) return;
|
||||
|
||||
config.can_shell = config.can_shell orelse !config.imported;
|
||||
config.can_delete = config.can_delete orelse !config.imported;
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ fn rec(ctx: *Ctx, dir: *sink.Dir, entry: *model.Entry) void {
|
|||
var it = d.sub;
|
||||
while (it) |e| : (it = e.next) rec(ctx, ndir, e);
|
||||
ctx.sink.setDir(dir);
|
||||
ndir.unref();
|
||||
ndir.unref(ctx.sink);
|
||||
return;
|
||||
}
|
||||
if (entry.file()) |f| {
|
||||
|
|
@ -74,6 +74,6 @@ pub fn run(d: *model.Dir) void {
|
|||
var it = d.sub;
|
||||
while (it) |e| : (it = e.next) rec(&ctx, root, e);
|
||||
|
||||
root.unref();
|
||||
root.unref(ctx.sink);
|
||||
sink.done();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -162,10 +162,10 @@ const Dir = struct {
|
|||
return d;
|
||||
}
|
||||
|
||||
fn destroy(d: *Dir) void {
|
||||
fn destroy(d: *Dir, t: *Thread) void {
|
||||
d.pat.deinit();
|
||||
d.fd.close();
|
||||
d.sink.unref();
|
||||
d.sink.unref(t.sink);
|
||||
main.allocator.destroy(d);
|
||||
}
|
||||
};
|
||||
|
|
@ -231,7 +231,7 @@ const Thread = struct {
|
|||
var edir = dir.fd.openDirZ(name, .{ .no_follow = true, .iterate = true }) catch {
|
||||
const s = dir.sink.addDir(t.sink, name, &stat);
|
||||
s.setReadError(t.sink);
|
||||
s.unref();
|
||||
s.unref(t.sink);
|
||||
return;
|
||||
};
|
||||
|
||||
|
|
@ -275,7 +275,7 @@ const Thread = struct {
|
|||
if (entry) |e| t.scanOne(d, e.name)
|
||||
else {
|
||||
t.sink.setDir(null);
|
||||
t.stack.pop().destroy();
|
||||
t.stack.pop().destroy(t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
24
src/sink.zig
24
src/sink.zig
|
|
@ -7,6 +7,7 @@ const model = @import("model.zig");
|
|||
const mem_src = @import("mem_src.zig");
|
||||
const mem_sink = @import("mem_sink.zig");
|
||||
const json_export = @import("json_export.zig");
|
||||
const bin_export = @import("bin_export.zig");
|
||||
const ui = @import("ui.zig");
|
||||
const util = @import("util.zig");
|
||||
|
||||
|
|
@ -78,6 +79,7 @@ pub const Dir = struct {
|
|||
const Out = union(enum) {
|
||||
mem: mem_sink.Dir,
|
||||
json: json_export.Dir,
|
||||
bin: bin_export.Dir,
|
||||
};
|
||||
|
||||
pub fn addSpecial(d: *Dir, t: *Thread, name: []const u8, sp: Special) void {
|
||||
|
|
@ -85,6 +87,7 @@ pub const Dir = struct {
|
|||
switch (d.out) {
|
||||
.mem => |*m| m.addSpecial(&t.sink.mem, name, sp),
|
||||
.json => |*j| j.addSpecial(name, sp),
|
||||
.bin => |*b| b.addSpecial(&t.sink.bin, name, sp),
|
||||
}
|
||||
if (sp == .err) {
|
||||
global.last_error_lock.lock();
|
||||
|
|
@ -103,6 +106,7 @@ pub const Dir = struct {
|
|||
switch (d.out) {
|
||||
.mem => |*m| _ = m.addStat(&t.sink.mem, name, stat),
|
||||
.json => |*j| j.addStat(name, stat),
|
||||
.bin => |*b| b.addStat(&t.sink.bin, name, stat),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -119,6 +123,7 @@ pub const Dir = struct {
|
|||
.out = switch (d.out) {
|
||||
.mem => |*m| .{ .mem = m.addDir(&t.sink.mem, name, stat) },
|
||||
.json => |*j| .{ .json = j.addDir(name, stat) },
|
||||
.bin => |*b| .{ .bin = b.addDir(stat) },
|
||||
},
|
||||
};
|
||||
d.ref();
|
||||
|
|
@ -130,6 +135,7 @@ pub const Dir = struct {
|
|||
switch (d.out) {
|
||||
.mem => |*m| m.setReadError(),
|
||||
.json => |*j| j.setReadError(),
|
||||
.bin => |*b| b.setReadError(),
|
||||
}
|
||||
global.last_error_lock.lock();
|
||||
defer global.last_error_lock.unlock();
|
||||
|
|
@ -158,16 +164,17 @@ pub const Dir = struct {
|
|||
_ = d.refcnt.fetchAdd(1, .monotonic);
|
||||
}
|
||||
|
||||
pub fn unref(d: *Dir) void {
|
||||
pub fn unref(d: *Dir, t: *Thread) void {
|
||||
if (d.refcnt.fetchSub(1, .release) != 1) return;
|
||||
d.refcnt.fence(.acquire);
|
||||
|
||||
switch (d.out) {
|
||||
.mem => |*m| m.final(if (d.parent) |p| &p.out.mem else null),
|
||||
.json => |*j| j.final(),
|
||||
.bin => |*b| b.final(&t.sink.bin, d.name, if (d.parent) |p| &p.out.bin else null),
|
||||
}
|
||||
|
||||
if (d.parent) |p| p.unref();
|
||||
if (d.parent) |p| p.unref(t);
|
||||
if (d.name.len > 0) main.allocator.free(d.name);
|
||||
main.allocator.destroy(d);
|
||||
}
|
||||
|
|
@ -184,6 +191,7 @@ pub const Thread = struct {
|
|||
sink: union {
|
||||
mem: mem_sink.Thread,
|
||||
json: void,
|
||||
bin: bin_export.Thread,
|
||||
} = .{.mem = .{}},
|
||||
|
||||
fn addBytes(t: *Thread, bytes: u64) void {
|
||||
|
|
@ -215,7 +223,7 @@ pub const Thread = struct {
|
|||
pub const global = struct {
|
||||
pub var state: enum { done, err, zeroing, hlcnt, running } = .running;
|
||||
pub var threads: []Thread = undefined;
|
||||
pub var sink: enum { json, mem } = .mem;
|
||||
pub var sink: enum { json, mem, bin } = .mem;
|
||||
|
||||
pub var last_error: ?[:0]u8 = null;
|
||||
var last_error_lock = std.Thread.Mutex{};
|
||||
|
|
@ -235,7 +243,13 @@ pub fn createThreads(num: usize) []Thread {
|
|||
if (global.last_error) |p| main.allocator.free(p);
|
||||
global.last_error = null;
|
||||
global.threads = main.allocator.alloc(Thread, num) catch unreachable;
|
||||
for (global.threads) |*t| t.* = .{};
|
||||
for (global.threads) |*t| t.* = .{
|
||||
.sink = switch (global.sink) {
|
||||
.mem => .{ .mem = .{} },
|
||||
.json => .{ .json = {} },
|
||||
.bin => .{ .bin = .{} },
|
||||
},
|
||||
};
|
||||
return global.threads;
|
||||
}
|
||||
|
||||
|
|
@ -245,6 +259,7 @@ pub fn done() void {
|
|||
switch (global.sink) {
|
||||
.mem => mem_sink.done(),
|
||||
.json => json_export.done(),
|
||||
.bin => bin_export.done(global.threads),
|
||||
}
|
||||
global.state = .done;
|
||||
main.allocator.free(global.threads);
|
||||
|
|
@ -268,6 +283,7 @@ pub fn createRoot(path: []const u8, stat: *const Stat) *Dir {
|
|||
.out = switch (global.sink) {
|
||||
.mem => .{ .mem = mem_sink.createRoot(path, stat) },
|
||||
.json => .{ .json = json_export.createRoot(path, stat) },
|
||||
.bin => .{ .bin = bin_export.createRoot(stat) },
|
||||
},
|
||||
};
|
||||
return d;
|
||||
|
|
|
|||
Loading…
Reference in a new issue