binfmt: Support larger (non-data) block sizes

I realized that the 16 MiB limitation implied that the index block could only hold ((2^24)-16)/8 =~ 2 mil data block pointers. At the default 64k data block size that means an export can only reference up to ~128 GiB of uncompressed data. That's pretty limiting. This change increases the maximum size of the index block to 256 MiB, supporting ~33 mil data block pointers and ~2 TiB of uncompressed data with the default data block size.
2026-03-14 07:18:40 -08:00 · 2024-08-09 09:29:26 +02:00 · 2024-08-09 09:29:26 +02:00 · 6b7983b2f5
commit 6b7983b2f5
parent 9418079da3
3 changed files with 14 additions and 12 deletions
--- a/ncdubinexp.pl
+++ b/ncdubinexp.pl
@ -72,8 +72,10 @@ my @itemkeys = qw/
 sub datablock($prefix, $off, $blklen, $content) {
    die "$prefix: Data block too small\n" if length $content < 8;
    die "$prefix: Data block too large\n" if length $content >= (1<<24);
    my($num, $rawlen) = unpack 'NN', $content;
    die "$prefix: Uncompressed data block size too large\n" if $rawlen >= (1<<24);
    die sprintf "%s: Duplicate block id %d (first at %010x)", $prefix, $num, $datablocks{$num}>>24 if $datablocks{$num};
    $datablocks{$num} = ($off << 24) | $blklen;
@ -156,17 +158,17 @@ while (1) {
    my $prefix = sprintf '%010x', $off;
    die "$prefix Input too short, expected block header\n" if 4 != read STDIN, my $blkhead, 4;
    $blkhead = unpack 'N', $blkhead;
-    my $blkid = $blkhead >> 24;
+    my $blkid = $blkhead >> 28;
-    my $blklen = $blkhead & 0xffffff;
+    my $blklen = $blkhead & 0x0fffffff;
    $prefix .= "[$blklen]";
    die "$prefix: Short read on block content\n" if $blklen - 8 != read STDIN, my $content, $blklen - 8;
    die "$prefix: Input too short, expected block footer\n" if 4 != read STDIN, my $blkfoot, 4;
    die "$prefix: Block footer does not match header\n" if $blkhead != unpack 'N', $blkfoot;
-    if ($blkid == 1) {
+    if ($blkid == 0) {
        datablock($prefix, $off, $blklen, $content);
-    } elsif ($blkid == 2) {
+    } elsif ($blkid == 1) {
        indexblock($prefix, $content);
        last;
    } else {
--- a/src/bin_export.zig
+++ b/src/bin_export.zig
@ -62,7 +62,7 @@ inline fn bigu16(v: u16) [2]u8 { return @bitCast(std.mem.nativeToBig(u16, v)); }
 inline fn bigu32(v: u32) [4]u8 { return @bitCast(std.mem.nativeToBig(u32, v)); }
 inline fn bigu64(v: u64) [8]u8 { return @bitCast(std.mem.nativeToBig(u64, v)); }
-inline fn blockHeader(id: u8, len: u32) [4]u8 { return bigu32((@as(u32, id) << 24) | len); }
+inline fn blockHeader(id: u4, len: u28) [4]u8 { return bigu32((@as(u32, id) << 28) | len); }
 inline fn cborByte(major: CborMajor, arg: u5) u8 { return (@as(u8, @intFromEnum(major)) << 5) | arg; }
@ -92,11 +92,11 @@ pub const Thread = struct {
        if (t.block_num == std.math.maxInt(u32) or t.off <= 1) return "";
        const bodylen = compressZstd(t.buf[0..t.off], t.tmp[12..]);
-        const blocklen: u32 = @intCast(bodylen + 16);
+        const blocklen: u28 = @intCast(bodylen + 16);
-        t.tmp[0..4].* = blockHeader(1, blocklen);
+        t.tmp[0..4].* = blockHeader(0, blocklen);
        t.tmp[4..8].* = bigu32(t.block_num);
        t.tmp[8..12].* = bigu32(@intCast(t.off));
-        t.tmp[12+bodylen..][0..4].* = blockHeader(1, blocklen);
+        t.tmp[12+bodylen..][0..4].* = blockHeader(0, blocklen);
        return t.tmp[0..blocklen];
    }
@ -416,8 +416,8 @@ pub fn done(threads: []sink.Thread) void {
    while (std.mem.endsWith(u8, global.index.items, &[1]u8{0}**8))
        global.index.shrinkRetainingCapacity(global.index.items.len - 8);
    global.index.appendSlice(&bigu64(global.root_itemref)) catch unreachable;
-    global.index.appendSlice(&blockHeader(2, @intCast(global.index.items.len + 4))) catch unreachable;
+    global.index.appendSlice(&blockHeader(1, @intCast(global.index.items.len + 4))) catch unreachable;
-    global.index.items[0..4].* = blockHeader(2, @intCast(global.index.items.len));
+    global.index.items[0..4].* = blockHeader(1, @intCast(global.index.items.len));
    global.fd.writeAll(global.index.items) catch |e|
        ui.die("Error writing to file: {s}.\n", .{ ui.errorString(e) });
    global.index.clearAndFree();
--- a/src/bin_reader.zig
+++ b/src/bin_reader.zig
@ -508,8 +508,8 @@ pub fn open(fd: std.fs.File) !void {
    var buf: [4]u8 = undefined;
    if (try fd.preadAll(&buf, size - 4) != 4) return error.EndOfStream;
    const index_header = bigu32(buf);
-    if ((index_header >> 24) != 2 or (index_header & 7) != 0) die();
+    if ((index_header >> 28) != 1 or (index_header & 7) != 0) die();
-    const len = (index_header & 0x00ffffff) - 8; // excluding block header & footer
+    const len = (index_header & 0x0fffffff) - 8; // excluding block header & footer
    if (len >= size) die();
    global.index = main.allocator.alloc(u8, len) catch unreachable;
    if (try fd.preadAll(global.index, size - len - 4) != global.index.len) return error.EndOfStream;