Improve performance of bin format import by 30%

By calling die() instead of propagating error unions. Not surprising
that error propagation has a performance impact, but I was hoping it
wasn't this bad.

Import performance was already quite good, but now it's even better!
With the one test case I have it's faster than JSON import, but I expect
that some dir structures will be much slower.
This commit is contained in:
Yorhel 2024-08-02 14:07:02 +02:00
parent 025e5ee99e
commit 85e12beb1c

View file

@ -41,6 +41,11 @@ pub const global = struct {
var index: []u8 = undefined; var index: []u8 = undefined;
var blocks: [8]Block = [1]Block{.{}}**8; var blocks: [8]Block = [1]Block{.{}}**8;
var counter: u64 = 0; var counter: u64 = 0;
// Last itemref being read/parsed. This is a hack to provide *some* context on error.
// Providing more context mainly just bloats the binary and decreases
// performance for fairly little benefit. Nobody's going to debug a corrupted export.
var lastitem: ?u64 = null;
}; };
@ -55,8 +60,10 @@ inline fn bigu16(v: [2]u8) u16 { return std.mem.bigToNative(u16, @bitCast(v)); }
inline fn bigu32(v: [4]u8) u32 { return std.mem.bigToNative(u32, @bitCast(v)); } inline fn bigu32(v: [4]u8) u32 { return std.mem.bigToNative(u32, @bitCast(v)); }
inline fn bigu64(v: [8]u8) u64 { return std.mem.bigToNative(u64, @bitCast(v)); } inline fn bigu64(v: [8]u8) u64 { return std.mem.bigToNative(u64, @bitCast(v)); }
fn die(comptime s: []const u8, arg: anytype) noreturn { fn die() noreturn {
ui.die("Error reading from file: "++s++"\n", arg); @setCold(true);
if (global.lastitem) |e| ui.die("Error reading item {x} from file\n", .{e})
else ui.die("Error reading from file\n", .{});
} }
@ -79,18 +86,18 @@ fn readBlock(num: u32) []const u8 {
global.counter += 1; global.counter += 1;
block.last = global.counter; block.last = global.counter;
if (num > global.index.len/8 - 1) die("invalid reference to block {}", .{num}); if (num > global.index.len/8 - 1) die();
const offlen = bigu64(global.index[num*8..][0..8].*); const offlen = bigu64(global.index[num*8..][0..8].*);
if ((offlen & 0xffffff) < 16) die("block {} is missing or too small", .{num}); if ((offlen & 0xffffff) < 16) die();
const buf = main.allocator.alloc(u8, (offlen & 0xffffff) - 12) catch unreachable; const buf = main.allocator.alloc(u8, (offlen & 0xffffff) - 12) catch unreachable;
defer main.allocator.free(buf); defer main.allocator.free(buf);
const rdlen = global.fd.preadAll(buf, (offlen >> 24) + 8) const rdlen = global.fd.preadAll(buf, (offlen >> 24) + 8)
catch |e| die("{s}", .{ui.errorString(e)}); catch |e| ui.die("Error reading from file: {s}\n", .{ui.errorString(e)});
if (rdlen != buf.len) die("short read for block {}", .{num}); if (rdlen != buf.len) die();
const rawlen = bigu32(buf[0..4].*); const rawlen = bigu32(buf[0..4].*);
if (rawlen >= (1<<24)) die("too large uncompressed size for {}", .{num}); if (rawlen >= (1<<24)) die();
block.data = main.allocator.alloc(u8, rawlen) catch unreachable; block.data = main.allocator.alloc(u8, rawlen) catch unreachable;
// TODO: decompress // TODO: decompress
@ -102,8 +109,8 @@ fn readBlock(num: u32) []const u8 {
const CborReader = struct { const CborReader = struct {
buf: []const u8, buf: []const u8,
fn head(r: *CborReader) !CborVal { fn head(r: *CborReader) CborVal {
if (r.buf.len < 0) return error.EndOfStream; if (r.buf.len < 0) die();
var v = CborVal{ var v = CborVal{
.rd = r, .rd = r,
.major = @enumFromInt(r.buf[0] >> 5), .major = @enumFromInt(r.buf[0] >> 5),
@ -116,22 +123,22 @@ const CborReader = struct {
r.buf = r.buf[1..]; r.buf = r.buf[1..];
}, },
0x18 => { 0x18 => {
if (r.buf.len < 2) return error.EndOfStream; if (r.buf.len < 2) die();
v.arg = r.buf[1]; v.arg = r.buf[1];
r.buf = r.buf[2..]; r.buf = r.buf[2..];
}, },
0x19 => { 0x19 => {
if (r.buf.len < 3) return error.EndOfStream; if (r.buf.len < 3) die();
v.arg = bigu16(r.buf[1..3].*); v.arg = bigu16(r.buf[1..3].*);
r.buf = r.buf[3..]; r.buf = r.buf[3..];
}, },
0x1a => { 0x1a => {
if (r.buf.len < 5) return error.EndOfStream; if (r.buf.len < 5) die();
v.arg = bigu32(r.buf[1..5].*); v.arg = bigu32(r.buf[1..5].*);
r.buf = r.buf[5..]; r.buf = r.buf[5..];
}, },
0x1b => { 0x1b => {
if (r.buf.len < 9) return error.EndOfStream; if (r.buf.len < 9) die();
v.arg = bigu64(r.buf[1..9].*); v.arg = bigu64(r.buf[1..9].*);
r.buf = r.buf[9..]; r.buf = r.buf[9..];
}, },
@ -140,17 +147,17 @@ const CborReader = struct {
v.indef = true; v.indef = true;
r.buf = r.buf[1..]; r.buf = r.buf[1..];
}, },
else => return error.Invalid, else => die(),
}, },
else => return error.Invalid, else => die(),
} }
return v; return v;
} }
// Read the next CBOR value, skipping any tags // Read the next CBOR value, skipping any tags
fn next(r: *CborReader) !CborVal { fn next(r: *CborReader) CborVal {
while (true) { while (true) {
const v = try r.head(); const v = r.head();
if (v.major != .tag) return v; if (v.major != .tag) return v;
} }
} }
@ -166,65 +173,65 @@ const CborVal = struct {
return v.major == .simple and v.indef; return v.major == .simple and v.indef;
} }
fn int(v: *const CborVal, T: type) !T { fn int(v: *const CborVal, T: type) T {
switch (v.major) { switch (v.major) {
.pos => return std.math.cast(T, v.arg) orelse return error.IntegerOutOfRange, .pos => return std.math.cast(T, v.arg) orelse die(),
.neg => { .neg => {
if (std.math.minInt(T) == 0) return error.IntegerOutOfRange; if (std.math.minInt(T) == 0) die();
if (v.arg > std.math.maxInt(T)) return error.IntegerOutOfRange; if (v.arg > std.math.maxInt(T)) die();
return -@as(T, @intCast(v.arg)) + (-1); return -@as(T, @intCast(v.arg)) + (-1);
}, },
else => return error.InvalidValue, else => die(),
} }
} }
// Read either a byte or text string. // Read either a byte or text string.
// Doesn't validate UTF-8 strings, doesn't support indefinite-length strings. // Doesn't validate UTF-8 strings, doesn't support indefinite-length strings.
fn bytes(v: *const CborVal) ![]const u8 { fn bytes(v: *const CborVal) []const u8 {
if (v.indef or (v.major != .bytes and v.major != .text)) return error.InvalidValue; if (v.indef or (v.major != .bytes and v.major != .text)) die();
if (v.rd.buf.len < v.arg) return error.EndOfStream; if (v.rd.buf.len < v.arg) die();
defer v.rd.buf = v.rd.buf[v.arg..]; defer v.rd.buf = v.rd.buf[v.arg..];
return v.rd.buf[0..v.arg]; return v.rd.buf[0..v.arg];
} }
// Skip current value. // Skip current value.
fn skip(v: *const CborVal) !void { fn skip(v: *const CborVal) void {
// indefinite-length bytes, text, array or map; skip till break marker. // indefinite-length bytes, text, array or map; skip till break marker.
if (v.major != .simple and v.indef) { if (v.major != .simple and v.indef) {
while (true) { while (true) {
const n = try v.rd.next(); const n = v.rd.next();
if (n.end()) return; if (n.end()) return;
try n.skip(); n.skip();
} }
} }
switch (v.major) { switch (v.major) {
.bytes, .text => { .bytes, .text => {
if (v.rd.buf.len < v.arg) return error.EndOfStream; if (v.rd.buf.len < v.arg) die();
v.rd.buf = v.rd.buf[v.arg..]; v.rd.buf = v.rd.buf[v.arg..];
}, },
.array => { .array => {
for (0..v.arg) |_| try (try v.rd.next()).skip(); for (0..v.arg) |_| v.rd.next().skip();
}, },
.map => { .map => {
for (0..v.arg*|2) |_| try (try v.rd.next()).skip(); for (0..v.arg*|2) |_| v.rd.next().skip();
}, },
else => {}, else => {},
} }
} }
fn etype(v: *const CborVal) !model.EType { fn etype(v: *const CborVal) model.EType {
const n = try v.int(i32); const n = v.int(i32);
return std.meta.intToEnum(model.EType, n) return std.meta.intToEnum(model.EType, n)
catch if (n < 0) .pattern else .nonreg; catch if (n < 0) .pattern else .nonreg;
} }
fn itemref(v: *const CborVal, cur: u64) !u64 { fn itemref(v: *const CborVal, cur: u64) u64 {
if (v.major == .pos) return v.arg; if (v.major == .pos) return v.arg;
if (v.major == .neg) { if (v.major == .neg) {
if (v.arg > (1<<24)) return error.InvalidBackReference; if (v.arg > (1<<24)) die();
return cur - v.arg - 1; return cur - v.arg - 1;
} }
return error.InvalidReference; return die();
} }
}; };
@ -249,27 +256,19 @@ test "CBOR int parsing" {
.{ .in = "\x3b\xff\xff\xff\xff\xff\xff\xff\xff", .t = i65, .exp = std.math.minInt(i65) }, .{ .in = "\x3b\xff\xff\xff\xff\xff\xff\xff\xff", .t = i65, .exp = std.math.minInt(i65) },
}) |t| { }) |t| {
var r = CborReader{.buf = t.in}; var r = CborReader{.buf = t.in};
try std.testing.expectEqual(@as(t.t, t.exp), (try r.next()).int(t.t)); try std.testing.expectEqual(@as(t.t, t.exp), r.next().int(t.t));
try std.testing.expectEqual(0, r.buf.len); try std.testing.expectEqual(0, r.buf.len);
} }
var r = CborReader{.buf="\x02"};
try std.testing.expectError(error.IntegerOutOfRange, (try r.next()).int(u1));
r.buf = "\x18";
try std.testing.expectError(error.EndOfStream, r.next());
r.buf = "\x1e";
try std.testing.expectError(error.Invalid, r.next());
r.buf = "\x1f";
try std.testing.expectError(error.Invalid, r.next());
} }
test "CBOR string parsing" { test "CBOR string parsing" {
var r = CborReader{.buf="\x40"}; var r = CborReader{.buf="\x40"};
try std.testing.expectEqualStrings("", try (try r.next()).bytes()); try std.testing.expectEqualStrings("", r.next().bytes());
r.buf = "\x45\x00\x01\x02\x03\x04x"; r.buf = "\x45\x00\x01\x02\x03\x04x";
try std.testing.expectEqualStrings("\x00\x01\x02\x03\x04", try (try r.next()).bytes()); try std.testing.expectEqualStrings("\x00\x01\x02\x03\x04", r.next().bytes());
try std.testing.expectEqualStrings("x", r.buf); try std.testing.expectEqualStrings("x", r.buf);
r.buf = "\x78\x241234567890abcdefghijklmnopqrstuvwxyz-end"; r.buf = "\x78\x241234567890abcdefghijklmnopqrstuvwxyz-end";
try std.testing.expectEqualStrings("1234567890abcdefghijklmnopqrstuvwxyz", try (try r.next()).bytes()); try std.testing.expectEqualStrings("1234567890abcdefghijklmnopqrstuvwxyz", r.next().bytes());
try std.testing.expectEqualStrings("-end", r.buf); try std.testing.expectEqualStrings("-end", r.buf);
} }
@ -291,7 +290,7 @@ test "CBOR skip parsing" {
"\xbf\xc0\x00\x9f\xff\xff", "\xbf\xc0\x00\x9f\xff\xff",
}) |s| { }) |s| {
var r = CborReader{.buf = s ++ "garbage"}; var r = CborReader{.buf = s ++ "garbage"};
try (try r.next()).skip(); r.next().skip();
try std.testing.expectEqualStrings(r.buf, "garbage"); try std.testing.expectEqualStrings(r.buf, "garbage");
} }
} }
@ -305,34 +304,34 @@ const ItemParser = struct {
val: CborVal, val: CborVal,
}; };
fn init(buf: []const u8) !ItemParser { fn init(buf: []const u8) ItemParser {
var r = ItemParser{.r = .{.buf = buf}}; var r = ItemParser{.r = .{.buf = buf}};
const head = try r.r.next(); const head = r.r.next();
if (head.major != .map) return error.Invalid; if (head.major != .map) die();
if (!head.indef) r.len = head.arg; if (!head.indef) r.len = head.arg;
return r; return r;
} }
fn key(r: *ItemParser) !?CborVal { fn key(r: *ItemParser) ?CborVal {
if (r.len) |*l| { if (r.len) |*l| {
if (l.* == 0) return null; if (l.* == 0) return null;
l.* -= 1; l.* -= 1;
return try r.r.next(); return r.r.next();
} else { } else {
const v = try r.r.next(); const v = r.r.next();
return if (v.end()) null else v; return if (v.end()) null else v;
} }
} }
// Skips over any fields that don't fit into an ItemKey. // Skips over any fields that don't fit into an ItemKey.
fn next(r: *ItemParser) !?Field { fn next(r: *ItemParser) ?Field {
while (try r.key()) |k| { while (r.key()) |k| {
if (k.int(@typeInfo(ItemKey).Enum.tag_type)) |n| return .{ if (k.major == .pos and k.arg <= std.math.maxInt(@typeInfo(ItemKey).Enum.tag_type)) return .{
.key = @enumFromInt(n), .key = @enumFromInt(k.arg),
.val = try r.r.next(), .val = r.r.next(),
} else |_| { } else {
try k.skip(); k.skip();
try (try r.r.next()).skip(); r.r.next().skip();
} }
} }
return null; return null;
@ -341,10 +340,11 @@ const ItemParser = struct {
// Returned buffer is valid until the next readItem(). // Returned buffer is valid until the next readItem().
fn readItem(ref: u64) ItemParser { fn readItem(ref: u64) ItemParser {
if (ref >= (1 << (24 + 32))) die("invalid itemref {x}", .{ref}); global.lastitem = ref;
if (ref >= (1 << (24 + 32))) die();
const block = readBlock(@intCast(ref >> 24)); const block = readBlock(@intCast(ref >> 24));
if ((ref & 0xffffff) > block.len) die("itemref {x} exceeds block size", .{ref}); if ((ref & 0xffffff) > block.len) die();
return ItemParser.init(block[(ref & 0xffffff)..]) catch die("invalid item at ref {x}", .{ref}); return ItemParser.init(block[(ref & 0xffffff)..]);
} }
const Import = struct { const Import = struct {
@ -360,39 +360,39 @@ const Import = struct {
sub: ?u64 = null, sub: ?u64 = null,
}; };
fn readFields(ctx: *Import, ref: u64) !void { fn readFields(ctx: *Import, ref: u64) void {
ctx.p = readItem(ref); ctx.p = readItem(ref);
var hastype = false; var hastype = false;
while (try ctx.p.next()) |kv| switch (kv.key) { while (ctx.p.next()) |kv| switch (kv.key) {
.type => { .type => {
ctx.stat.etype = try kv.val.etype(); ctx.stat.etype = kv.val.etype();
hastype = true; hastype = true;
}, },
.name => ctx.fields.name = try kv.val.bytes(), .name => ctx.fields.name = kv.val.bytes(),
.prev => ctx.fields.prev = try kv.val.itemref(ref), .prev => ctx.fields.prev = kv.val.itemref(ref),
.asize => ctx.stat.size = try kv.val.int(u64), .asize => ctx.stat.size = kv.val.int(u64),
.dsize => ctx.stat.blocks = @intCast(try kv.val.int(u64)/512), .dsize => ctx.stat.blocks = @intCast(kv.val.int(u64)/512),
.dev => ctx.stat.dev = try kv.val.int(u64), .dev => ctx.stat.dev = kv.val.int(u64),
.rderr => ctx.fields.rderr = kv.val.major == .simple and kv.val.arg == 21, .rderr => ctx.fields.rderr = kv.val.major == .simple and kv.val.arg == 21,
.sub => ctx.fields.sub = try kv.val.itemref(ref), .sub => ctx.fields.sub = kv.val.itemref(ref),
.ino => ctx.stat.ino = try kv.val.int(u64), .ino => ctx.stat.ino = kv.val.int(u64),
.nlink => ctx.stat.nlink = try kv.val.int(u31), .nlink => ctx.stat.nlink = kv.val.int(u31),
.uid => ctx.stat.ext.uid = try kv.val.int(u32), .uid => ctx.stat.ext.uid = kv.val.int(u32),
.gid => ctx.stat.ext.gid = try kv.val.int(u32), .gid => ctx.stat.ext.gid = kv.val.int(u32),
.mode => ctx.stat.ext.mode = try kv.val.int(u16), .mode => ctx.stat.ext.mode = kv.val.int(u16),
.mtime => ctx.stat.ext.mtime = try kv.val.int(u64), .mtime => ctx.stat.ext.mtime = kv.val.int(u64),
else => try kv.val.skip(), else => kv.val.skip(),
}; };
if (!hastype) return error.MissingType; if (!hastype) die();
if (ctx.fields.name.len == 0) return error.MissingName; if (ctx.fields.name.len == 0) die();
} }
fn import(ctx: *Import, ref: u64, parent: ?*sink.Dir, dev: u64) void { fn import(ctx: *Import, ref: u64, parent: ?*sink.Dir, dev: u64) void {
ctx.stat = .{ .dev = dev }; ctx.stat = .{ .dev = dev };
ctx.fields = .{}; ctx.fields = .{};
ctx.readFields(ref) catch |e| die("item {x}: {}", .{ref, e}); ctx.readFields(ref);
if (ctx.stat.etype == .dir) { if (ctx.stat.etype == .dir) {
const prev = ctx.fields.prev; const prev = ctx.fields.prev;
@ -410,7 +410,7 @@ const Import = struct {
ctx.fields.prev = prev; ctx.fields.prev = prev;
} else { } else {
const p = parent orelse die("root item must be a directory", .{}); const p = parent orelse die();
if (@intFromEnum(ctx.stat.etype) < 0) if (@intFromEnum(ctx.stat.etype) < 0)
p.addSpecial(ctx.sink, ctx.fields.name, ctx.stat.etype) p.addSpecial(ctx.sink, ctx.fields.name, ctx.stat.etype)
else else
@ -444,10 +444,9 @@ pub fn open(fd: std.fs.File) !void {
var buf: [4]u8 = undefined; var buf: [4]u8 = undefined;
if (try fd.preadAll(&buf, size - 4) != 4) return error.EndOfStream; if (try fd.preadAll(&buf, size - 4) != 4) return error.EndOfStream;
const index_header = bigu32(buf); const index_header = bigu32(buf);
if ((index_header >> 24) != 2 or (index_header & 7) != 0) if ((index_header >> 24) != 2 or (index_header & 7) != 0) die();
die("invalid index block ({x})", .{index_header});
const len = (index_header & 0x00ffffff) - 8; // excluding block header & footer const len = (index_header & 0x00ffffff) - 8; // excluding block header & footer
if (len >= size) die("invalid index block size", .{}); if (len >= size) die();
global.index = main.allocator.alloc(u8, len) catch unreachable; global.index = main.allocator.alloc(u8, len) catch unreachable;
if (try fd.preadAll(global.index, size - len - 4) != global.index.len) return error.EndOfStream; if (try fd.preadAll(global.index, size - len - 4) != global.index.len) return error.EndOfStream;
} }