#!/usr/bin/perl # SPDX-FileCopyrightText: Yorhel # SPDX-License-Identifier: MIT # Usage: ncdubinexp.pl [options] >24 if $datablocks{$num}; $datablocks{$num} = ($off << 24) | $blklen; my $compressed = substr $content, 8; $printblocks && printf "%s: data block %d rawlen %d (%.2f)\n", $prefix, $num, $rawlen, $rawlen/(length($compressed))*100; $datablock_len += length($compressed); $rawdata_len += $rawlen; my $rawdata = decompress($compressed); die "$prefix: Block id $num failed decompression\n" if !defined $rawdata; die sprintf "%s: Block id %d decompressed to %d bytes but expected %d\n", $prefix, $num, length($rawdata), $rawlen if $rawlen != length $rawdata; cbordata($num, $rawdata); } sub fmtitem($val) { join ' ', map "$_:$val->{$_}", grep exists $val->{$_}, @itemkeys; } sub cbordata($blknum, $data) { my $cbor = CBOR::XS->new_safe; my $off = 0; my $nitems = 0; while ($off < length $data) { # This substr madness is prolly quite slow my($val, $len) = $cbor->decode_prefix(substr $data, $off); my $itemref = ($blknum << 24) | $off; $off += $len; next if !defined $val; $nitems++; # Basic validation of the CBOR data. Doesn't validate that every value # has the correct CBOR type or that integers are within range. $val = { _itemref => $itemref, map { die sprintf "#%010x: Invalid CBOR key '%s'\n", $itemref, $_ if !/^[0-9]+$/ || !$itemkeys[$_]; my($k, $v) = ($itemkeys[$_], $val->{$_}); die sprintf "#%010x: Invalid value for key '%s': '%s'\n", $itemref, $k, $v if ref $v eq 'ARRAY' || ref $v eq 'HASH' || !defined $v || !( $k eq 'type' ? ($v =~ /^(-[1-4]|[0-3])$/) : $k eq 'prev' || $k eq 'sub' || $k eq 'prevlnk' ? 1 : # itemrefs are validated separately $k eq 'name' ? length $v : $k eq 'rderr' ? Types::Serialiser::is_bool($v) : /^[0-9]+$/ ); ($k,$v) } keys %$val }; $printitems && printf "#%010x: %s\n", $itemref, fmtitem $val; $items{$itemref} = $val; } $minitemsperblock = $nitems if $minitemsperblock > $nitems; $maxitemsperblock = $nitems if $maxitemsperblock < $nitems; } sub indexblock($prefix, $content) { $printblocks && print "$prefix: index block\n"; my $maxnum = max keys %datablocks; die "$prefix: index block size incorrect for $maxnum+1 data blocks\n" if length($content) != 8*($maxnum+1) + 8; my @ints = unpack 'Q>*', $content; $root_itemref = pop @ints; for my $i (0..$#ints-1) { if (!$datablocks{$i}) { die "$prefix: index entry for missing block (#$i) must be 0\n" if $ints[$i] != 0; } else { die sprintf "%s: invalid index entry for block #%d (got %016x expected %016x)\n", $prefix, $i, $ints[$i], $datablocks{$i} if $ints[$i] != $datablocks{$i}; } } } while (1) { my $off = tell STDIN; my $prefix = sprintf '%010x', $off; die "$prefix Input too short, expected block header\n" if 4 != read STDIN, my $blkhead, 4; $blkhead = unpack 'N', $blkhead; my $blkid = $blkhead >> 24; my $blklen = $blkhead & 0xffffff; $prefix .= "[$blklen]"; die "$prefix: Short read on block content\n" if $blklen - 8 != read STDIN, my $content, $blklen - 8; die "$prefix: Input too short, expected block footer\n" if 4 != read STDIN, my $blkfoot, 4; die "$prefix: Block footer does not match header\n" if $blkhead != unpack 'N', $blkfoot; if ($blkid == 1) { datablock($prefix, $off, $blklen, $content); } elsif ($blkid == 2) { indexblock($prefix, $content); last; } else { die "$prefix Unknown block id $blkid\n"; } } { die sprintf "0x%08x: Data after index block\n", tell(STDIN) if 0 != read STDIN, my $x, 1; } # Each item must be referenced exactly once from either a 'prev' or 'sub' key, # $nodup verifies the "at most once" part. sub resolve($cur, $key, $nodup) { my $ref = exists $cur->{$key} ? $cur->{$key} : return; my $item = $ref < 0 ? ($items{ $cur->{_itemref} + $ref } || die sprintf "#%010x: Invalid relative itemref %s: %d\n", $cur->{_itemref}, $key, $ref) : ($items{$ref} || die sprintf "#%010x: Invalid reference %s to #%010x\n", $cur->{_itemref}, $key, $ref); die sprintf "Item #%010x referenced more than once, from #%010x and #%010x\n", $item->{_itemref}, $item->{_lastseen}, $cur->{_itemref} if $nodup && defined $item->{_lastseen}; $item->{_lastseen} = $cur->{_itemref} if $nodup; return $item; } my @dirblocks; # [ path, nitems, nblocks ] my %dirblocks; # nblocks => ndirs sub traverse($parent, $path) { my $sub = resolve($parent, 'sub', 1); my %blocks; my $items = 0; while ($sub) { $items++; $blocks{ $sub->{_itemref} >> 24 }++; traverse($sub, "$path/$sub->{name}") if $sub->{type} == 0; $sub = resolve($sub, 'prev', 1); } push @dirblocks, [ $path, $items, scalar keys %blocks ] if scalar keys %blocks > 1; $dirblocks{ keys %blocks }++ if $items > 0; $items && $printdirs && printf "#%010x: %d items in %d blocks (%d .. %d) %s\n", $parent->{_itemref}, $items, scalar keys %blocks, min(values %blocks), max(values %blocks), $path; } { my $root = $items{$root_itemref} || die sprintf "Invalid root itemref: %010x\n", $root_itemref; $root->{_lastseen} = 0xffffffffff; traverse($root, $root->{name}); my($noref) = grep !$_->{_lastseen}, values %items; die sprintf "No reference found to #%010x\n", $noref->{_itemref} if $noref; } if ($printstats) { my $nblocks = keys %datablocks; my $nitems = keys %items; printf " Total items: %d\n", $nitems; printf " Total blocks: %d\n", $nblocks; printf " Items per block: %.1f (%d .. %d)\n", $nitems / $nblocks, $minitemsperblock, $maxitemsperblock; printf " Avg block size: %d compressed, %d raw (%.1f)\n", $datablock_len/$nblocks, $rawdata_len/$nblocks, $rawdata_len/$datablock_len*100; printf " Avg item size: %.1f compressed, %.1f raw\n", $datablock_len/$nitems, $rawdata_len/$nitems; @dirblocks = sort { $b->[2] <=> $a->[2] } @dirblocks; print "\nBlocks per directory listing histogram\n"; printf " %5d %6d\n", $_, $dirblocks{$_} for sort { $a <=> $b } keys %dirblocks; print "\nMost blocks per directory listing\n"; print " items blks path\n"; printf "%10d %4d %s\n", @{$dirblocks[$_]}[1,2,0] for (0..min 9, $#dirblocks); }