mirror of
https://code.blicky.net/yorhel/ncdu.git
synced 2026-01-13 01:08:41 -09:00
binfmt: Support larger (non-data) block sizes
I realized that the 16 MiB limitation implied that the index block could only hold ((2^24)-16)/8 =~ 2 mil data block pointers. At the default 64k data block size that means an export can only reference up to ~128 GiB of uncompressed data. That's pretty limiting. This change increases the maximum size of the index block to 256 MiB, supporting ~33 mil data block pointers and ~2 TiB of uncompressed data with the default data block size.
This commit is contained in:
parent
9418079da3
commit
6b7983b2f5
3 changed files with 14 additions and 12 deletions
|
|
@ -72,8 +72,10 @@ my @itemkeys = qw/
|
||||||
|
|
||||||
sub datablock($prefix, $off, $blklen, $content) {
|
sub datablock($prefix, $off, $blklen, $content) {
|
||||||
die "$prefix: Data block too small\n" if length $content < 8;
|
die "$prefix: Data block too small\n" if length $content < 8;
|
||||||
|
die "$prefix: Data block too large\n" if length $content >= (1<<24);
|
||||||
|
|
||||||
my($num, $rawlen) = unpack 'NN', $content;
|
my($num, $rawlen) = unpack 'NN', $content;
|
||||||
|
die "$prefix: Uncompressed data block size too large\n" if $rawlen >= (1<<24);
|
||||||
die sprintf "%s: Duplicate block id %d (first at %010x)", $prefix, $num, $datablocks{$num}>>24 if $datablocks{$num};
|
die sprintf "%s: Duplicate block id %d (first at %010x)", $prefix, $num, $datablocks{$num}>>24 if $datablocks{$num};
|
||||||
$datablocks{$num} = ($off << 24) | $blklen;
|
$datablocks{$num} = ($off << 24) | $blklen;
|
||||||
|
|
||||||
|
|
@ -156,17 +158,17 @@ while (1) {
|
||||||
my $prefix = sprintf '%010x', $off;
|
my $prefix = sprintf '%010x', $off;
|
||||||
die "$prefix Input too short, expected block header\n" if 4 != read STDIN, my $blkhead, 4;
|
die "$prefix Input too short, expected block header\n" if 4 != read STDIN, my $blkhead, 4;
|
||||||
$blkhead = unpack 'N', $blkhead;
|
$blkhead = unpack 'N', $blkhead;
|
||||||
my $blkid = $blkhead >> 24;
|
my $blkid = $blkhead >> 28;
|
||||||
my $blklen = $blkhead & 0xffffff;
|
my $blklen = $blkhead & 0x0fffffff;
|
||||||
|
|
||||||
$prefix .= "[$blklen]";
|
$prefix .= "[$blklen]";
|
||||||
die "$prefix: Short read on block content\n" if $blklen - 8 != read STDIN, my $content, $blklen - 8;
|
die "$prefix: Short read on block content\n" if $blklen - 8 != read STDIN, my $content, $blklen - 8;
|
||||||
die "$prefix: Input too short, expected block footer\n" if 4 != read STDIN, my $blkfoot, 4;
|
die "$prefix: Input too short, expected block footer\n" if 4 != read STDIN, my $blkfoot, 4;
|
||||||
die "$prefix: Block footer does not match header\n" if $blkhead != unpack 'N', $blkfoot;
|
die "$prefix: Block footer does not match header\n" if $blkhead != unpack 'N', $blkfoot;
|
||||||
|
|
||||||
if ($blkid == 1) {
|
if ($blkid == 0) {
|
||||||
datablock($prefix, $off, $blklen, $content);
|
datablock($prefix, $off, $blklen, $content);
|
||||||
} elsif ($blkid == 2) {
|
} elsif ($blkid == 1) {
|
||||||
indexblock($prefix, $content);
|
indexblock($prefix, $content);
|
||||||
last;
|
last;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
|
|
@ -62,7 +62,7 @@ inline fn bigu16(v: u16) [2]u8 { return @bitCast(std.mem.nativeToBig(u16, v)); }
|
||||||
inline fn bigu32(v: u32) [4]u8 { return @bitCast(std.mem.nativeToBig(u32, v)); }
|
inline fn bigu32(v: u32) [4]u8 { return @bitCast(std.mem.nativeToBig(u32, v)); }
|
||||||
inline fn bigu64(v: u64) [8]u8 { return @bitCast(std.mem.nativeToBig(u64, v)); }
|
inline fn bigu64(v: u64) [8]u8 { return @bitCast(std.mem.nativeToBig(u64, v)); }
|
||||||
|
|
||||||
inline fn blockHeader(id: u8, len: u32) [4]u8 { return bigu32((@as(u32, id) << 24) | len); }
|
inline fn blockHeader(id: u4, len: u28) [4]u8 { return bigu32((@as(u32, id) << 28) | len); }
|
||||||
|
|
||||||
inline fn cborByte(major: CborMajor, arg: u5) u8 { return (@as(u8, @intFromEnum(major)) << 5) | arg; }
|
inline fn cborByte(major: CborMajor, arg: u5) u8 { return (@as(u8, @intFromEnum(major)) << 5) | arg; }
|
||||||
|
|
||||||
|
|
@ -92,11 +92,11 @@ pub const Thread = struct {
|
||||||
if (t.block_num == std.math.maxInt(u32) or t.off <= 1) return "";
|
if (t.block_num == std.math.maxInt(u32) or t.off <= 1) return "";
|
||||||
|
|
||||||
const bodylen = compressZstd(t.buf[0..t.off], t.tmp[12..]);
|
const bodylen = compressZstd(t.buf[0..t.off], t.tmp[12..]);
|
||||||
const blocklen: u32 = @intCast(bodylen + 16);
|
const blocklen: u28 = @intCast(bodylen + 16);
|
||||||
t.tmp[0..4].* = blockHeader(1, blocklen);
|
t.tmp[0..4].* = blockHeader(0, blocklen);
|
||||||
t.tmp[4..8].* = bigu32(t.block_num);
|
t.tmp[4..8].* = bigu32(t.block_num);
|
||||||
t.tmp[8..12].* = bigu32(@intCast(t.off));
|
t.tmp[8..12].* = bigu32(@intCast(t.off));
|
||||||
t.tmp[12+bodylen..][0..4].* = blockHeader(1, blocklen);
|
t.tmp[12+bodylen..][0..4].* = blockHeader(0, blocklen);
|
||||||
return t.tmp[0..blocklen];
|
return t.tmp[0..blocklen];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -416,8 +416,8 @@ pub fn done(threads: []sink.Thread) void {
|
||||||
while (std.mem.endsWith(u8, global.index.items, &[1]u8{0}**8))
|
while (std.mem.endsWith(u8, global.index.items, &[1]u8{0}**8))
|
||||||
global.index.shrinkRetainingCapacity(global.index.items.len - 8);
|
global.index.shrinkRetainingCapacity(global.index.items.len - 8);
|
||||||
global.index.appendSlice(&bigu64(global.root_itemref)) catch unreachable;
|
global.index.appendSlice(&bigu64(global.root_itemref)) catch unreachable;
|
||||||
global.index.appendSlice(&blockHeader(2, @intCast(global.index.items.len + 4))) catch unreachable;
|
global.index.appendSlice(&blockHeader(1, @intCast(global.index.items.len + 4))) catch unreachable;
|
||||||
global.index.items[0..4].* = blockHeader(2, @intCast(global.index.items.len));
|
global.index.items[0..4].* = blockHeader(1, @intCast(global.index.items.len));
|
||||||
global.fd.writeAll(global.index.items) catch |e|
|
global.fd.writeAll(global.index.items) catch |e|
|
||||||
ui.die("Error writing to file: {s}.\n", .{ ui.errorString(e) });
|
ui.die("Error writing to file: {s}.\n", .{ ui.errorString(e) });
|
||||||
global.index.clearAndFree();
|
global.index.clearAndFree();
|
||||||
|
|
|
||||||
|
|
@ -508,8 +508,8 @@ pub fn open(fd: std.fs.File) !void {
|
||||||
var buf: [4]u8 = undefined;
|
var buf: [4]u8 = undefined;
|
||||||
if (try fd.preadAll(&buf, size - 4) != 4) return error.EndOfStream;
|
if (try fd.preadAll(&buf, size - 4) != 4) return error.EndOfStream;
|
||||||
const index_header = bigu32(buf);
|
const index_header = bigu32(buf);
|
||||||
if ((index_header >> 24) != 2 or (index_header & 7) != 0) die();
|
if ((index_header >> 28) != 1 or (index_header & 7) != 0) die();
|
||||||
const len = (index_header & 0x00ffffff) - 8; // excluding block header & footer
|
const len = (index_header & 0x0fffffff) - 8; // excluding block header & footer
|
||||||
if (len >= size) die();
|
if (len >= size) die();
|
||||||
global.index = main.allocator.alloc(u8, len) catch unreachable;
|
global.index = main.allocator.alloc(u8, len) catch unreachable;
|
||||||
if (try fd.preadAll(global.index, size - len - 4) != global.index.len) return error.EndOfStream;
|
if (try fd.preadAll(global.index, size - len - 4) != global.index.len) return error.EndOfStream;
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue