diff --git a/ncdubinexp.pl b/ncdubinexp.pl index 4acaa61..aa8d30f 100755 --- a/ncdubinexp.pl +++ b/ncdubinexp.pl @@ -74,21 +74,20 @@ sub datablock($prefix, $off, $blklen, $content) { die "$prefix: Data block too small\n" if length $content < 8; die "$prefix: Data block too large\n" if length $content >= (1<<24); - my($num, $rawlen) = unpack 'NN', $content; - die "$prefix: Uncompressed data block size too large\n" if $rawlen >= (1<<24); + my $num = unpack 'N', $content; die sprintf "%s: Duplicate block id %d (first at %010x)", $prefix, $num, $datablocks{$num}>>24 if $datablocks{$num}; $datablocks{$num} = ($off << 24) | $blklen; - my $compressed = substr $content, 8; - $printblocks && printf "%s: data block %d rawlen %d (%.2f)\n", $prefix, $num, $rawlen, $rawlen/(length($compressed))*100; - - $datablock_len += length($compressed); - $rawdata_len += $rawlen; - + my $compressed = substr $content, 4; my $rawdata = decompress($compressed); die "$prefix: Block id $num failed decompression\n" if !defined $rawdata; - die sprintf "%s: Block id %d decompressed to %d bytes but expected %d\n", - $prefix, $num, length($rawdata), $rawlen if $rawlen != length $rawdata; + die "$prefix: Uncompressed data block size too large\n" if length $rawdata >= (1<<24); + + $printblocks && printf "%s: data block %d rawlen %d (%.2f)\n", $prefix, $num, length($rawdata), length($compressed)/length($rawdata)*100; + + $datablock_len += length($compressed); + $rawdata_len += length($rawdata); + cbordata($num, $rawdata); } @@ -231,7 +230,7 @@ if ($printstats) { printf " Total items: %d\n", $nitems; printf " Total blocks: %d\n", $nblocks; printf " Items per block: %.1f (%d .. %d)\n", $nitems / $nblocks, $minitemsperblock, $maxitemsperblock; - printf " Avg block size: %d compressed, %d raw (%.1f)\n", $datablock_len/$nblocks, $rawdata_len/$nblocks, $rawdata_len/$datablock_len*100; + printf " Avg block size: %d compressed, %d raw (%.1f)\n", $datablock_len/$nblocks, $rawdata_len/$nblocks, $datablock_len/$rawdata_len*100; printf " Avg item size: %.1f compressed, %.1f raw\n", $datablock_len/$nitems, $rawdata_len/$nitems; @dirblocks = sort { $b->[2] <=> $a->[2] } @dirblocks; diff --git a/src/bin_export.zig b/src/bin_export.zig index 0767fff..08be2c6 100644 --- a/src/bin_export.zig +++ b/src/bin_export.zig @@ -110,15 +110,14 @@ pub const Thread = struct { var out = std.ArrayList(u8).init(main.allocator); if (t.block_num == std.math.maxInt(u32) or t.off == 0) return out; - out.ensureTotalCapacityPrecise(16 + compressBound(t.off)) catch unreachable; + out.ensureTotalCapacityPrecise(12 + compressBound(t.off)) catch unreachable; out.items.len = out.capacity; - const bodylen = compressZstd(t.buf[0..t.off], out.items[12..]); - out.items.len = 16 + bodylen; + const bodylen = compressZstd(t.buf[0..t.off], out.items[8..]); + out.items.len = 12 + bodylen; out.items[0..4].* = blockHeader(0, @intCast(out.items.len)); out.items[4..8].* = bigu32(t.block_num); - out.items[8..12].* = bigu32(@intCast(t.off)); - out.items[12+bodylen..][0..4].* = blockHeader(0, @intCast(out.items.len)); + out.items[8+bodylen..][0..4].* = blockHeader(0, @intCast(out.items.len)); return out; } diff --git a/src/bin_reader.zig b/src/bin_reader.zig index 06a792a..2dce8e6 100644 --- a/src/bin_reader.zig +++ b/src/bin_reader.zig @@ -10,6 +10,8 @@ const ui = @import("ui.zig"); const bin_export = @import("bin_export.zig"); extern fn ZSTD_decompress(dst: ?*anyopaque, dstCapacity: usize, src: ?*const anyopaque, compressedSize: usize) usize; +extern fn ZSTD_getFrameContentSize(src: ?*const anyopaque, srcSize: usize) c_ulonglong; + const CborMajor = bin_export.CborMajor; const ItemKey = bin_export.ItemKey; @@ -90,19 +92,22 @@ fn readBlock(num: u32) []const u8 { if (num > global.index.len/8 - 1) die(); const offlen = bigu64(global.index[num*8..][0..8].*); - if ((offlen & 0xffffff) < 16) die(); + const off = offlen >> 24; + const len = offlen & 0xffffff; + if (len <= 12) die(); - const buf = main.allocator.alloc(u8, @intCast((offlen & 0xffffff) - 12)) catch unreachable; + // Only read the compressed data part, assume block header, number and footer are correct. + const buf = main.allocator.alloc(u8, @intCast(len - 12)) catch unreachable; defer main.allocator.free(buf); - const rdlen = global.fd.preadAll(buf, (offlen >> 24) + 8) + const rdlen = global.fd.preadAll(buf, off + 8) catch |e| ui.die("Error reading from file: {s}\n", .{ui.errorString(e)}); if (rdlen != buf.len) die(); - const rawlen = bigu32(buf[0..4].*); - if (rawlen >= (1<<24)) die(); - block.data = main.allocator.alloc(u8, rawlen) catch unreachable; + const rawlen = ZSTD_getFrameContentSize(buf.ptr, buf.len); + if (rawlen <= 0 or rawlen >= (1<<24)) die(); + block.data = main.allocator.alloc(u8, @intCast(rawlen)) catch unreachable; - const res = ZSTD_decompress(block.data.ptr, block.data.len, buf[4..].ptr, buf.len - 4); + const res = ZSTD_decompress(block.data.ptr, block.data.len, buf.ptr, buf.len); if (res != block.data.len) ui.die("Error decompressing block {} (expected {} got {})\n", .{ num, block.data.len, res }); return block.data;