From 85e12beb1c064cbe73a819b5a4dbad3c4216bc02 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Fri, 2 Aug 2024 14:07:02 +0200 Subject: [PATCH] Improve performance of bin format import by 30% By calling die() instead of propagating error unions. Not surprising that error propagation has a performance impact, but I was hoping it wasn't this bad. Import performance was already quite good, but now it's even better! With the one test case I have it's faster than JSON import, but I expect that some dir structures will be much slower. --- src/bin_reader.zig | 177 ++++++++++++++++++++++----------------------- 1 file changed, 88 insertions(+), 89 deletions(-) diff --git a/src/bin_reader.zig b/src/bin_reader.zig index fc22000..9cc415f 100644 --- a/src/bin_reader.zig +++ b/src/bin_reader.zig @@ -41,6 +41,11 @@ pub const global = struct { var index: []u8 = undefined; var blocks: [8]Block = [1]Block{.{}}**8; var counter: u64 = 0; + + // Last itemref being read/parsed. This is a hack to provide *some* context on error. + // Providing more context mainly just bloats the binary and decreases + // performance for fairly little benefit. Nobody's going to debug a corrupted export. + var lastitem: ?u64 = null; }; @@ -55,8 +60,10 @@ inline fn bigu16(v: [2]u8) u16 { return std.mem.bigToNative(u16, @bitCast(v)); } inline fn bigu32(v: [4]u8) u32 { return std.mem.bigToNative(u32, @bitCast(v)); } inline fn bigu64(v: [8]u8) u64 { return std.mem.bigToNative(u64, @bitCast(v)); } -fn die(comptime s: []const u8, arg: anytype) noreturn { - ui.die("Error reading from file: "++s++"\n", arg); +fn die() noreturn { + @setCold(true); + if (global.lastitem) |e| ui.die("Error reading item {x} from file\n", .{e}) + else ui.die("Error reading from file\n", .{}); } @@ -79,18 +86,18 @@ fn readBlock(num: u32) []const u8 { global.counter += 1; block.last = global.counter; - if (num > global.index.len/8 - 1) die("invalid reference to block {}", .{num}); + if (num > global.index.len/8 - 1) die(); const offlen = bigu64(global.index[num*8..][0..8].*); - if ((offlen & 0xffffff) < 16) die("block {} is missing or too small", .{num}); + if ((offlen & 0xffffff) < 16) die(); const buf = main.allocator.alloc(u8, (offlen & 0xffffff) - 12) catch unreachable; defer main.allocator.free(buf); const rdlen = global.fd.preadAll(buf, (offlen >> 24) + 8) - catch |e| die("{s}", .{ui.errorString(e)}); - if (rdlen != buf.len) die("short read for block {}", .{num}); + catch |e| ui.die("Error reading from file: {s}\n", .{ui.errorString(e)}); + if (rdlen != buf.len) die(); const rawlen = bigu32(buf[0..4].*); - if (rawlen >= (1<<24)) die("too large uncompressed size for {}", .{num}); + if (rawlen >= (1<<24)) die(); block.data = main.allocator.alloc(u8, rawlen) catch unreachable; // TODO: decompress @@ -102,8 +109,8 @@ fn readBlock(num: u32) []const u8 { const CborReader = struct { buf: []const u8, - fn head(r: *CborReader) !CborVal { - if (r.buf.len < 0) return error.EndOfStream; + fn head(r: *CborReader) CborVal { + if (r.buf.len < 0) die(); var v = CborVal{ .rd = r, .major = @enumFromInt(r.buf[0] >> 5), @@ -116,22 +123,22 @@ const CborReader = struct { r.buf = r.buf[1..]; }, 0x18 => { - if (r.buf.len < 2) return error.EndOfStream; + if (r.buf.len < 2) die(); v.arg = r.buf[1]; r.buf = r.buf[2..]; }, 0x19 => { - if (r.buf.len < 3) return error.EndOfStream; + if (r.buf.len < 3) die(); v.arg = bigu16(r.buf[1..3].*); r.buf = r.buf[3..]; }, 0x1a => { - if (r.buf.len < 5) return error.EndOfStream; + if (r.buf.len < 5) die(); v.arg = bigu32(r.buf[1..5].*); r.buf = r.buf[5..]; }, 0x1b => { - if (r.buf.len < 9) return error.EndOfStream; + if (r.buf.len < 9) die(); v.arg = bigu64(r.buf[1..9].*); r.buf = r.buf[9..]; }, @@ -140,17 +147,17 @@ const CborReader = struct { v.indef = true; r.buf = r.buf[1..]; }, - else => return error.Invalid, + else => die(), }, - else => return error.Invalid, + else => die(), } return v; } // Read the next CBOR value, skipping any tags - fn next(r: *CborReader) !CborVal { + fn next(r: *CborReader) CborVal { while (true) { - const v = try r.head(); + const v = r.head(); if (v.major != .tag) return v; } } @@ -166,65 +173,65 @@ const CborVal = struct { return v.major == .simple and v.indef; } - fn int(v: *const CborVal, T: type) !T { + fn int(v: *const CborVal, T: type) T { switch (v.major) { - .pos => return std.math.cast(T, v.arg) orelse return error.IntegerOutOfRange, + .pos => return std.math.cast(T, v.arg) orelse die(), .neg => { - if (std.math.minInt(T) == 0) return error.IntegerOutOfRange; - if (v.arg > std.math.maxInt(T)) return error.IntegerOutOfRange; + if (std.math.minInt(T) == 0) die(); + if (v.arg > std.math.maxInt(T)) die(); return -@as(T, @intCast(v.arg)) + (-1); }, - else => return error.InvalidValue, + else => die(), } } // Read either a byte or text string. // Doesn't validate UTF-8 strings, doesn't support indefinite-length strings. - fn bytes(v: *const CborVal) ![]const u8 { - if (v.indef or (v.major != .bytes and v.major != .text)) return error.InvalidValue; - if (v.rd.buf.len < v.arg) return error.EndOfStream; + fn bytes(v: *const CborVal) []const u8 { + if (v.indef or (v.major != .bytes and v.major != .text)) die(); + if (v.rd.buf.len < v.arg) die(); defer v.rd.buf = v.rd.buf[v.arg..]; return v.rd.buf[0..v.arg]; } // Skip current value. - fn skip(v: *const CborVal) !void { + fn skip(v: *const CborVal) void { // indefinite-length bytes, text, array or map; skip till break marker. if (v.major != .simple and v.indef) { while (true) { - const n = try v.rd.next(); + const n = v.rd.next(); if (n.end()) return; - try n.skip(); + n.skip(); } } switch (v.major) { .bytes, .text => { - if (v.rd.buf.len < v.arg) return error.EndOfStream; + if (v.rd.buf.len < v.arg) die(); v.rd.buf = v.rd.buf[v.arg..]; }, .array => { - for (0..v.arg) |_| try (try v.rd.next()).skip(); + for (0..v.arg) |_| v.rd.next().skip(); }, .map => { - for (0..v.arg*|2) |_| try (try v.rd.next()).skip(); + for (0..v.arg*|2) |_| v.rd.next().skip(); }, else => {}, } } - fn etype(v: *const CborVal) !model.EType { - const n = try v.int(i32); + fn etype(v: *const CborVal) model.EType { + const n = v.int(i32); return std.meta.intToEnum(model.EType, n) catch if (n < 0) .pattern else .nonreg; } - fn itemref(v: *const CborVal, cur: u64) !u64 { + fn itemref(v: *const CborVal, cur: u64) u64 { if (v.major == .pos) return v.arg; if (v.major == .neg) { - if (v.arg > (1<<24)) return error.InvalidBackReference; + if (v.arg > (1<<24)) die(); return cur - v.arg - 1; } - return error.InvalidReference; + return die(); } }; @@ -249,27 +256,19 @@ test "CBOR int parsing" { .{ .in = "\x3b\xff\xff\xff\xff\xff\xff\xff\xff", .t = i65, .exp = std.math.minInt(i65) }, }) |t| { var r = CborReader{.buf = t.in}; - try std.testing.expectEqual(@as(t.t, t.exp), (try r.next()).int(t.t)); + try std.testing.expectEqual(@as(t.t, t.exp), r.next().int(t.t)); try std.testing.expectEqual(0, r.buf.len); } - var r = CborReader{.buf="\x02"}; - try std.testing.expectError(error.IntegerOutOfRange, (try r.next()).int(u1)); - r.buf = "\x18"; - try std.testing.expectError(error.EndOfStream, r.next()); - r.buf = "\x1e"; - try std.testing.expectError(error.Invalid, r.next()); - r.buf = "\x1f"; - try std.testing.expectError(error.Invalid, r.next()); } test "CBOR string parsing" { var r = CborReader{.buf="\x40"}; - try std.testing.expectEqualStrings("", try (try r.next()).bytes()); + try std.testing.expectEqualStrings("", r.next().bytes()); r.buf = "\x45\x00\x01\x02\x03\x04x"; - try std.testing.expectEqualStrings("\x00\x01\x02\x03\x04", try (try r.next()).bytes()); + try std.testing.expectEqualStrings("\x00\x01\x02\x03\x04", r.next().bytes()); try std.testing.expectEqualStrings("x", r.buf); r.buf = "\x78\x241234567890abcdefghijklmnopqrstuvwxyz-end"; - try std.testing.expectEqualStrings("1234567890abcdefghijklmnopqrstuvwxyz", try (try r.next()).bytes()); + try std.testing.expectEqualStrings("1234567890abcdefghijklmnopqrstuvwxyz", r.next().bytes()); try std.testing.expectEqualStrings("-end", r.buf); } @@ -291,7 +290,7 @@ test "CBOR skip parsing" { "\xbf\xc0\x00\x9f\xff\xff", }) |s| { var r = CborReader{.buf = s ++ "garbage"}; - try (try r.next()).skip(); + r.next().skip(); try std.testing.expectEqualStrings(r.buf, "garbage"); } } @@ -305,34 +304,34 @@ const ItemParser = struct { val: CborVal, }; - fn init(buf: []const u8) !ItemParser { + fn init(buf: []const u8) ItemParser { var r = ItemParser{.r = .{.buf = buf}}; - const head = try r.r.next(); - if (head.major != .map) return error.Invalid; + const head = r.r.next(); + if (head.major != .map) die(); if (!head.indef) r.len = head.arg; return r; } - fn key(r: *ItemParser) !?CborVal { + fn key(r: *ItemParser) ?CborVal { if (r.len) |*l| { if (l.* == 0) return null; l.* -= 1; - return try r.r.next(); + return r.r.next(); } else { - const v = try r.r.next(); + const v = r.r.next(); return if (v.end()) null else v; } } // Skips over any fields that don't fit into an ItemKey. - fn next(r: *ItemParser) !?Field { - while (try r.key()) |k| { - if (k.int(@typeInfo(ItemKey).Enum.tag_type)) |n| return .{ - .key = @enumFromInt(n), - .val = try r.r.next(), - } else |_| { - try k.skip(); - try (try r.r.next()).skip(); + fn next(r: *ItemParser) ?Field { + while (r.key()) |k| { + if (k.major == .pos and k.arg <= std.math.maxInt(@typeInfo(ItemKey).Enum.tag_type)) return .{ + .key = @enumFromInt(k.arg), + .val = r.r.next(), + } else { + k.skip(); + r.r.next().skip(); } } return null; @@ -341,10 +340,11 @@ const ItemParser = struct { // Returned buffer is valid until the next readItem(). fn readItem(ref: u64) ItemParser { - if (ref >= (1 << (24 + 32))) die("invalid itemref {x}", .{ref}); + global.lastitem = ref; + if (ref >= (1 << (24 + 32))) die(); const block = readBlock(@intCast(ref >> 24)); - if ((ref & 0xffffff) > block.len) die("itemref {x} exceeds block size", .{ref}); - return ItemParser.init(block[(ref & 0xffffff)..]) catch die("invalid item at ref {x}", .{ref}); + if ((ref & 0xffffff) > block.len) die(); + return ItemParser.init(block[(ref & 0xffffff)..]); } const Import = struct { @@ -360,39 +360,39 @@ const Import = struct { sub: ?u64 = null, }; - fn readFields(ctx: *Import, ref: u64) !void { + fn readFields(ctx: *Import, ref: u64) void { ctx.p = readItem(ref); var hastype = false; - while (try ctx.p.next()) |kv| switch (kv.key) { + while (ctx.p.next()) |kv| switch (kv.key) { .type => { - ctx.stat.etype = try kv.val.etype(); + ctx.stat.etype = kv.val.etype(); hastype = true; }, - .name => ctx.fields.name = try kv.val.bytes(), - .prev => ctx.fields.prev = try kv.val.itemref(ref), - .asize => ctx.stat.size = try kv.val.int(u64), - .dsize => ctx.stat.blocks = @intCast(try kv.val.int(u64)/512), - .dev => ctx.stat.dev = try kv.val.int(u64), + .name => ctx.fields.name = kv.val.bytes(), + .prev => ctx.fields.prev = kv.val.itemref(ref), + .asize => ctx.stat.size = kv.val.int(u64), + .dsize => ctx.stat.blocks = @intCast(kv.val.int(u64)/512), + .dev => ctx.stat.dev = kv.val.int(u64), .rderr => ctx.fields.rderr = kv.val.major == .simple and kv.val.arg == 21, - .sub => ctx.fields.sub = try kv.val.itemref(ref), - .ino => ctx.stat.ino = try kv.val.int(u64), - .nlink => ctx.stat.nlink = try kv.val.int(u31), - .uid => ctx.stat.ext.uid = try kv.val.int(u32), - .gid => ctx.stat.ext.gid = try kv.val.int(u32), - .mode => ctx.stat.ext.mode = try kv.val.int(u16), - .mtime => ctx.stat.ext.mtime = try kv.val.int(u64), - else => try kv.val.skip(), + .sub => ctx.fields.sub = kv.val.itemref(ref), + .ino => ctx.stat.ino = kv.val.int(u64), + .nlink => ctx.stat.nlink = kv.val.int(u31), + .uid => ctx.stat.ext.uid = kv.val.int(u32), + .gid => ctx.stat.ext.gid = kv.val.int(u32), + .mode => ctx.stat.ext.mode = kv.val.int(u16), + .mtime => ctx.stat.ext.mtime = kv.val.int(u64), + else => kv.val.skip(), }; - if (!hastype) return error.MissingType; - if (ctx.fields.name.len == 0) return error.MissingName; + if (!hastype) die(); + if (ctx.fields.name.len == 0) die(); } fn import(ctx: *Import, ref: u64, parent: ?*sink.Dir, dev: u64) void { ctx.stat = .{ .dev = dev }; ctx.fields = .{}; - ctx.readFields(ref) catch |e| die("item {x}: {}", .{ref, e}); + ctx.readFields(ref); if (ctx.stat.etype == .dir) { const prev = ctx.fields.prev; @@ -410,7 +410,7 @@ const Import = struct { ctx.fields.prev = prev; } else { - const p = parent orelse die("root item must be a directory", .{}); + const p = parent orelse die(); if (@intFromEnum(ctx.stat.etype) < 0) p.addSpecial(ctx.sink, ctx.fields.name, ctx.stat.etype) else @@ -444,10 +444,9 @@ pub fn open(fd: std.fs.File) !void { var buf: [4]u8 = undefined; if (try fd.preadAll(&buf, size - 4) != 4) return error.EndOfStream; const index_header = bigu32(buf); - if ((index_header >> 24) != 2 or (index_header & 7) != 0) - die("invalid index block ({x})", .{index_header}); + if ((index_header >> 24) != 2 or (index_header & 7) != 0) die(); const len = (index_header & 0x00ffffff) - 8; // excluding block header & footer - if (len >= size) die("invalid index block size", .{}); + if (len >= size) die(); global.index = main.allocator.alloc(u8, len) catch unreachable; if (try fd.preadAll(global.index, size - len - 4) != global.index.len) return error.EndOfStream; }