binfmt: Remove "rawlen" field, require use of ZSTD_getFrameContentSize()

The zstd frame format already supports this functionality and I don't really see a benefit in not making use of that.
2026-03-13 15:08:39 -08:00 · 2024-08-11 15:56:12 +02:00 · 2024-08-11 15:56:12 +02:00 · 26229d7a63
commit 26229d7a63
parent 4ef9c3e817
3 changed files with 26 additions and 23 deletions
--- a/ncdubinexp.pl
+++ b/ncdubinexp.pl
@ -74,21 +74,20 @@ sub datablock($prefix, $off, $blklen, $content) {
    die "$prefix: Data block too small\n" if length $content < 8;
    die "$prefix: Data block too large\n" if length $content >= (1<<24);
-    my($num, $rawlen) = unpack 'NN', $content;
+    my $num = unpack 'N', $content;
    die "$prefix: Uncompressed data block size too large\n" if $rawlen >= (1<<24);
    die sprintf "%s: Duplicate block id %d (first at %010x)", $prefix, $num, $datablocks{$num}>>24 if $datablocks{$num};
    $datablocks{$num} = ($off << 24) | $blklen;
-    my $compressed = substr $content, 8;
+    my $compressed = substr $content, 4;
    $printblocks && printf "%s: data block %d  rawlen %d (%.2f)\n", $prefix, $num, $rawlen, $rawlen/(length($compressed))*100;
    $datablock_len += length($compressed);
    $rawdata_len += $rawlen;
    my $rawdata = decompress($compressed);
    die "$prefix: Block id $num failed decompression\n" if !defined $rawdata;
-    die sprintf "%s: Block id %d decompressed to %d bytes but expected %d\n",
+    die "$prefix: Uncompressed data block size too large\n" if length $rawdata >= (1<<24);
-        $prefix, $num, length($rawdata), $rawlen if $rawlen != length $rawdata;
+
    $printblocks && printf "%s: data block %d  rawlen %d (%.2f)\n", $prefix, $num, length($rawdata), length($compressed)/length($rawdata)*100;
    $datablock_len += length($compressed);
    $rawdata_len += length($rawdata);
    cbordata($num, $rawdata);
 }
@ -231,7 +230,7 @@ if ($printstats) {
    printf "     Total items: %d\n", $nitems;
    printf "    Total blocks: %d\n", $nblocks;
    printf " Items per block: %.1f (%d .. %d)\n", $nitems / $nblocks, $minitemsperblock, $maxitemsperblock;
-    printf "  Avg block size: %d compressed, %d raw (%.1f)\n", $datablock_len/$nblocks, $rawdata_len/$nblocks, $rawdata_len/$datablock_len*100;
+    printf "  Avg block size: %d compressed, %d raw (%.1f)\n", $datablock_len/$nblocks, $rawdata_len/$nblocks, $datablock_len/$rawdata_len*100;
    printf "   Avg item size: %.1f compressed, %.1f raw\n", $datablock_len/$nitems, $rawdata_len/$nitems;
    @dirblocks = sort { $b->[2] <=> $a->[2] } @dirblocks;
--- a/src/bin_export.zig
+++ b/src/bin_export.zig
@ -110,15 +110,14 @@ pub const Thread = struct {
        var out = std.ArrayList(u8).init(main.allocator);
        if (t.block_num == std.math.maxInt(u32) or t.off == 0) return out;
-        out.ensureTotalCapacityPrecise(16 + compressBound(t.off)) catch unreachable;
+        out.ensureTotalCapacityPrecise(12 + compressBound(t.off)) catch unreachable;
        out.items.len = out.capacity;
-        const bodylen = compressZstd(t.buf[0..t.off], out.items[12..]);
+        const bodylen = compressZstd(t.buf[0..t.off], out.items[8..]);
-        out.items.len = 16 + bodylen;
+        out.items.len = 12 + bodylen;
        out.items[0..4].* = blockHeader(0, @intCast(out.items.len));
        out.items[4..8].* = bigu32(t.block_num);
-        out.items[8..12].* = bigu32(@intCast(t.off));
+        out.items[8+bodylen..][0..4].* = blockHeader(0, @intCast(out.items.len));
        out.items[12+bodylen..][0..4].* = blockHeader(0, @intCast(out.items.len));
        return out;
    }
--- a/src/bin_reader.zig
+++ b/src/bin_reader.zig
@ -10,6 +10,8 @@ const ui = @import("ui.zig");
 const bin_export = @import("bin_export.zig");
 extern fn ZSTD_decompress(dst: ?*anyopaque, dstCapacity: usize, src: ?*const anyopaque, compressedSize: usize) usize;
 extern fn ZSTD_getFrameContentSize(src: ?*const anyopaque, srcSize: usize) c_ulonglong;
 const CborMajor = bin_export.CborMajor;
 const ItemKey = bin_export.ItemKey;
@ -90,19 +92,22 @@ fn readBlock(num: u32) []const u8 {
    if (num > global.index.len/8 - 1) die();
    const offlen = bigu64(global.index[num*8..][0..8].*);
-    if ((offlen & 0xffffff) < 16) die();
+    const off = offlen >> 24;
    const len = offlen & 0xffffff;
    if (len <= 12) die();
-    const buf = main.allocator.alloc(u8, @intCast((offlen & 0xffffff) - 12)) catch unreachable;
+    // Only read the compressed data part, assume block header, number and footer are correct.
    const buf = main.allocator.alloc(u8, @intCast(len - 12)) catch unreachable;
    defer main.allocator.free(buf);
-    const rdlen = global.fd.preadAll(buf, (offlen >> 24) + 8)
+    const rdlen = global.fd.preadAll(buf, off + 8)
        catch |e| ui.die("Error reading from file: {s}\n", .{ui.errorString(e)});
    if (rdlen != buf.len) die();
-    const rawlen = bigu32(buf[0..4].*);
+    const rawlen = ZSTD_getFrameContentSize(buf.ptr, buf.len);
-    if (rawlen >= (1<<24)) die();
+    if (rawlen <= 0 or rawlen >= (1<<24)) die();
-    block.data = main.allocator.alloc(u8, rawlen) catch unreachable;
+    block.data = main.allocator.alloc(u8, @intCast(rawlen)) catch unreachable;
-    const res = ZSTD_decompress(block.data.ptr, block.data.len, buf[4..].ptr, buf.len - 4);
+    const res = ZSTD_decompress(block.data.ptr, block.data.len, buf.ptr, buf.len);
    if (res != block.data.len) ui.die("Error decompressing block {} (expected {} got {})\n", .{ num, block.data.len, res });
    return block.data;