binfmt: Support larger (non-data) block sizes

I realized that the 16 MiB limitation implied that the index block could
only hold ((2^24)-16)/8 =~ 2 mil data block pointers. At the default
64k data block size that means an export can only reference up to
~128 GiB of uncompressed data. That's pretty limiting.

This change increases the maximum size of the index block to 256 MiB,
supporting ~33 mil data block pointers and ~2 TiB of uncompressed data
with the default data block size.
This commit is contained in:
Yorhel 2024-08-09 09:29:26 +02:00
parent 9418079da3
commit 6b7983b2f5
3 changed files with 14 additions and 12 deletions

View file

@ -72,8 +72,10 @@ my @itemkeys = qw/
sub datablock($prefix, $off, $blklen, $content) {
die "$prefix: Data block too small\n" if length $content < 8;
die "$prefix: Data block too large\n" if length $content >= (1<<24);
my($num, $rawlen) = unpack 'NN', $content;
die "$prefix: Uncompressed data block size too large\n" if $rawlen >= (1<<24);
die sprintf "%s: Duplicate block id %d (first at %010x)", $prefix, $num, $datablocks{$num}>>24 if $datablocks{$num};
$datablocks{$num} = ($off << 24) | $blklen;
@ -156,17 +158,17 @@ while (1) {
my $prefix = sprintf '%010x', $off;
die "$prefix Input too short, expected block header\n" if 4 != read STDIN, my $blkhead, 4;
$blkhead = unpack 'N', $blkhead;
my $blkid = $blkhead >> 24;
my $blklen = $blkhead & 0xffffff;
my $blkid = $blkhead >> 28;
my $blklen = $blkhead & 0x0fffffff;
$prefix .= "[$blklen]";
die "$prefix: Short read on block content\n" if $blklen - 8 != read STDIN, my $content, $blklen - 8;
die "$prefix: Input too short, expected block footer\n" if 4 != read STDIN, my $blkfoot, 4;
die "$prefix: Block footer does not match header\n" if $blkhead != unpack 'N', $blkfoot;
if ($blkid == 1) {
if ($blkid == 0) {
datablock($prefix, $off, $blklen, $content);
} elsif ($blkid == 2) {
} elsif ($blkid == 1) {
indexblock($prefix, $content);
last;
} else {

View file

@ -62,7 +62,7 @@ inline fn bigu16(v: u16) [2]u8 { return @bitCast(std.mem.nativeToBig(u16, v)); }
inline fn bigu32(v: u32) [4]u8 { return @bitCast(std.mem.nativeToBig(u32, v)); }
inline fn bigu64(v: u64) [8]u8 { return @bitCast(std.mem.nativeToBig(u64, v)); }
inline fn blockHeader(id: u8, len: u32) [4]u8 { return bigu32((@as(u32, id) << 24) | len); }
inline fn blockHeader(id: u4, len: u28) [4]u8 { return bigu32((@as(u32, id) << 28) | len); }
inline fn cborByte(major: CborMajor, arg: u5) u8 { return (@as(u8, @intFromEnum(major)) << 5) | arg; }
@ -92,11 +92,11 @@ pub const Thread = struct {
if (t.block_num == std.math.maxInt(u32) or t.off <= 1) return "";
const bodylen = compressZstd(t.buf[0..t.off], t.tmp[12..]);
const blocklen: u32 = @intCast(bodylen + 16);
t.tmp[0..4].* = blockHeader(1, blocklen);
const blocklen: u28 = @intCast(bodylen + 16);
t.tmp[0..4].* = blockHeader(0, blocklen);
t.tmp[4..8].* = bigu32(t.block_num);
t.tmp[8..12].* = bigu32(@intCast(t.off));
t.tmp[12+bodylen..][0..4].* = blockHeader(1, blocklen);
t.tmp[12+bodylen..][0..4].* = blockHeader(0, blocklen);
return t.tmp[0..blocklen];
}
@ -416,8 +416,8 @@ pub fn done(threads: []sink.Thread) void {
while (std.mem.endsWith(u8, global.index.items, &[1]u8{0}**8))
global.index.shrinkRetainingCapacity(global.index.items.len - 8);
global.index.appendSlice(&bigu64(global.root_itemref)) catch unreachable;
global.index.appendSlice(&blockHeader(2, @intCast(global.index.items.len + 4))) catch unreachable;
global.index.items[0..4].* = blockHeader(2, @intCast(global.index.items.len));
global.index.appendSlice(&blockHeader(1, @intCast(global.index.items.len + 4))) catch unreachable;
global.index.items[0..4].* = blockHeader(1, @intCast(global.index.items.len));
global.fd.writeAll(global.index.items) catch |e|
ui.die("Error writing to file: {s}.\n", .{ ui.errorString(e) });
global.index.clearAndFree();

View file

@ -508,8 +508,8 @@ pub fn open(fd: std.fs.File) !void {
var buf: [4]u8 = undefined;
if (try fd.preadAll(&buf, size - 4) != 4) return error.EndOfStream;
const index_header = bigu32(buf);
if ((index_header >> 24) != 2 or (index_header & 7) != 0) die();
const len = (index_header & 0x00ffffff) - 8; // excluding block header & footer
if ((index_header >> 28) != 1 or (index_header & 7) != 0) die();
const len = (index_header & 0x0fffffff) - 8; // excluding block header & footer
if (len >= size) die();
global.index = main.allocator.alloc(u8, len) catch unreachable;
if (try fd.preadAll(global.index, size - len - 4) != global.index.len) return error.EndOfStream;