From 2fcd7f370cdc3efabc68147eb750ebaf502a9b77 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sun, 3 Nov 2024 13:13:52 +0100 Subject: [PATCH] Drop ncdubinexp.pl Was a useful testing tool during development, but now been replaced with a more robust 'ncdutils validate' in https://code.blicky.net/yorhel/ncdutils --- ncdubinexp.pl | 242 -------------------------------------------------- 1 file changed, 242 deletions(-) delete mode 100755 ncdubinexp.pl diff --git a/ncdubinexp.pl b/ncdubinexp.pl deleted file mode 100755 index aa8d30f..0000000 --- a/ncdubinexp.pl +++ /dev/null @@ -1,242 +0,0 @@ -#!/usr/bin/perl -# SPDX-FileCopyrightText: Yorhel -# SPDX-License-Identifier: MIT - - -# Usage: ncdubinexp.pl [options] = (1<<24); - - my $num = unpack 'N', $content; - die sprintf "%s: Duplicate block id %d (first at %010x)", $prefix, $num, $datablocks{$num}>>24 if $datablocks{$num}; - $datablocks{$num} = ($off << 24) | $blklen; - - my $compressed = substr $content, 4; - my $rawdata = decompress($compressed); - die "$prefix: Block id $num failed decompression\n" if !defined $rawdata; - die "$prefix: Uncompressed data block size too large\n" if length $rawdata >= (1<<24); - - $printblocks && printf "%s: data block %d rawlen %d (%.2f)\n", $prefix, $num, length($rawdata), length($compressed)/length($rawdata)*100; - - $datablock_len += length($compressed); - $rawdata_len += length($rawdata); - - cbordata($num, $rawdata); -} - - -sub fmtitem($val) { - join ' ', map "$_:$val->{$_}", grep exists $val->{$_}, @itemkeys; -} - - -sub cbordata($blknum, $data) { - my $cbor = CBOR::XS->new_safe; - my $off = 0; - my $nitems = 0; - while ($off < length $data) { # This substr madness is prolly quite slow - my($val, $len) = $cbor->decode_prefix(substr $data, $off); - my $itemref = ($blknum << 24) | $off; - $off += $len; - $nitems++; - - # Basic validation of the CBOR data. Doesn't validate that every value - # has the correct CBOR type or that integers are within range. - $val = { _itemref => $itemref, map { - die sprintf "#%010x: Invalid CBOR key '%s'\n", $itemref, $_ if !/^[0-9]+$/ || !$itemkeys[$_]; - my($k, $v) = ($itemkeys[$_], $val->{$_}); - die sprintf "#%010x: Invalid value for key '%s': '%s'\n", $itemref, $k, $v - if ref $v eq 'ARRAY' || ref $v eq 'HASH' || !defined $v || !( - $k eq 'type' ? ($v =~ /^(-[1-4]|[0-3])$/) : - $k eq 'prev' || $k eq 'sub' || $k eq 'prevlnk' ? 1 : # itemrefs are validated separately - $k eq 'name' ? length $v : - $k eq 'rderr' ? Types::Serialiser::is_bool($v) : - /^[0-9]+$/ - ); - ($k,$v) - } keys %$val }; - - $printitems && printf "#%010x: %s\n", $itemref, fmtitem $val; - $items{$itemref} = $val; - } - $minitemsperblock = $nitems if $minitemsperblock > $nitems; - $maxitemsperblock = $nitems if $maxitemsperblock < $nitems; -} - - -sub indexblock($prefix, $content) { - $printblocks && print "$prefix: index block\n"; - - my $maxnum = max keys %datablocks; - die "$prefix: index block size incorrect for $maxnum+1 data blocks\n" if length($content) != 8*($maxnum+1) + 8; - - my @ints = unpack 'Q>*', $content; - $root_itemref = pop @ints; - - for my $i (0..$#ints-1) { - if (!$datablocks{$i}) { - die "$prefix: index entry for missing block (#$i) must be 0\n" if $ints[$i] != 0; - } else { - die sprintf "%s: invalid index entry for block #%d (got %016x expected %016x)\n", - $prefix, $i, $ints[$i], $datablocks{$i} - if $ints[$i] != $datablocks{$i}; - } - } -} - - -while (1) { - my $off = tell STDIN; - my $prefix = sprintf '%010x', $off; - die "$prefix Input too short, expected block header\n" if 4 != read STDIN, my $blkhead, 4; - $blkhead = unpack 'N', $blkhead; - my $blkid = $blkhead >> 28; - my $blklen = $blkhead & 0x0fffffff; - - $prefix .= "[$blklen]"; - die "$prefix: Short read on block content\n" if $blklen - 8 != read STDIN, my $content, $blklen - 8; - die "$prefix: Input too short, expected block footer\n" if 4 != read STDIN, my $blkfoot, 4; - die "$prefix: Block footer does not match header\n" if $blkhead != unpack 'N', $blkfoot; - - if ($blkid == 0) { - datablock($prefix, $off, $blklen, $content); - } elsif ($blkid == 1) { - indexblock($prefix, $content); - last; - } else { - die "$prefix Unknown block id $blkid\n"; - } -} - -{ - die sprintf "0x%08x: Data after index block\n", tell(STDIN) if 0 != read STDIN, my $x, 1; -} - - - -# Each item must be referenced exactly once from either a 'prev' or 'sub' key, -# $nodup verifies the "at most once" part. -sub resolve($cur, $key, $nodup) { - my $ref = exists $cur->{$key} ? $cur->{$key} : return; - my $item = $ref < 0 - ? ($items{ $cur->{_itemref} + $ref } || die sprintf "#%010x: Invalid relative itemref %s: %d\n", $cur->{_itemref}, $key, $ref) - : ($items{$ref} || die sprintf "#%010x: Invalid reference %s to #%010x\n", $cur->{_itemref}, $key, $ref); - die sprintf "Item #%010x referenced more than once, from #%010x and #%010x\n", $item->{_itemref}, $item->{_lastseen}, $cur->{_itemref} - if $nodup && defined $item->{_lastseen}; - $item->{_lastseen} = $cur->{_itemref} if $nodup; - return $item; -} - -my @dirblocks; # [ path, nitems, nblocks ] -my %dirblocks; # nblocks => ndirs - -sub traverse($parent, $path) { - my $sub = resolve($parent, 'sub', 1); - my %blocks; - my $items = 0; - while ($sub) { - $items++; - $blocks{ $sub->{_itemref} >> 24 }++; - traverse($sub, "$path/$sub->{name}") if $sub->{type} == 0; - $sub = resolve($sub, 'prev', 1); - } - push @dirblocks, [ $path, $items, scalar keys %blocks ] if scalar keys %blocks > 1; - $dirblocks{ keys %blocks }++ if $items > 0; - $items && $printdirs && printf "#%010x: %d items in %d blocks (%d .. %d) %s\n", - $parent->{_itemref}, $items, scalar keys %blocks, - min(values %blocks), max(values %blocks), $path; -} - - -{ - my $root = $items{$root_itemref} || die sprintf "Invalid root itemref: %010x\n", $root_itemref; - $root->{_lastseen} = 0xffffffffff; - traverse($root, $root->{name}); - - my($noref) = grep !$_->{_lastseen}, values %items; - die sprintf "No reference found to #%010x\n", $noref->{_itemref} if $noref; -} - -if ($printstats) { - my $nblocks = keys %datablocks; - my $nitems = keys %items; - printf " Total items: %d\n", $nitems; - printf " Total blocks: %d\n", $nblocks; - printf " Items per block: %.1f (%d .. %d)\n", $nitems / $nblocks, $minitemsperblock, $maxitemsperblock; - printf " Avg block size: %d compressed, %d raw (%.1f)\n", $datablock_len/$nblocks, $rawdata_len/$nblocks, $datablock_len/$rawdata_len*100; - printf " Avg item size: %.1f compressed, %.1f raw\n", $datablock_len/$nitems, $rawdata_len/$nitems; - - @dirblocks = sort { $b->[2] <=> $a->[2] } @dirblocks; - print "\nBlocks per directory listing histogram\n"; - printf " %5d %6d\n", $_, $dirblocks{$_} for sort { $a <=> $b } keys %dirblocks; - print "\nMost blocks per directory listing\n"; - print " items blks path\n"; - printf "%10d %4d %s\n", @{$dirblocks[$_]}[1,2,0] for (0..min 9, $#dirblocks); -}