// SPDX-FileCopyrightText: Yorhel // SPDX-License-Identifier: MIT const std = @import("std"); const main = @import("main.zig"); const model = @import("model.zig"); const util = @import("util.zig"); const sink = @import("sink.zig"); const ui = @import("ui.zig"); const bin_export = @import("bin_export.zig"); extern fn ZSTD_decompress(dst: ?*anyopaque, dstCapacity: usize, src: ?*const anyopaque, compressedSize: usize) usize; const CborMajor = bin_export.CborMajor; const ItemKey = bin_export.ItemKey; // Two ways to read a bin export: // // 1. Streaming import // - Read blocks sequentially, assemble items into model.Entry's and stitch // them together on the go. // - Does not use the sink.zig API, since sub-level items are read before their parent dirs. // - Useful when: // - User attempts to do a refresh or delete while browsing a file through (2) // - Reading from a stream // // 2. Random access browsing // - Read final block first to get the root item, then have browser.zig fetch // dir listings from this file. // - The default reader mode, requires much less memory than (1) and provides // a snappier first-browsing experience. // // The approach from (2) can also be used to walk through the entire directory // tree and stream it to sink.zig (either for importing or converting to JSON). // That would allow for better code reuse and low-memory conversion, but // performance will not be as good as a direct streaming read. Needs // benchmarks. // // This file only implements (2) at the moment. pub const global = struct { var fd: std.fs.File = undefined; var index: []u8 = undefined; var blocks: [8]Block = [1]Block{.{}}**8; var counter: u64 = 0; // Last itemref being read/parsed. This is a hack to provide *some* context on error. // Providing more context mainly just bloats the binary and decreases // performance for fairly little benefit. Nobody's going to debug a corrupted export. var lastitem: ?u64 = null; }; const Block = struct { num: u32 = std.math.maxInt(u32), last: u64 = 0, data: []u8 = undefined, }; inline fn bigu16(v: [2]u8) u16 { return std.mem.bigToNative(u16, @bitCast(v)); } inline fn bigu32(v: [4]u8) u32 { return std.mem.bigToNative(u32, @bitCast(v)); } inline fn bigu64(v: [8]u8) u64 { return std.mem.bigToNative(u64, @bitCast(v)); } fn die() noreturn { @setCold(true); if (global.lastitem) |e| ui.die("Error reading item {x} from file\n", .{e}) else ui.die("Error reading from file\n", .{}); } fn readBlock(num: u32) []const u8 { // Simple linear search, only suitable if we keep the number of in-memory blocks small. var block: *Block = &global.blocks[0]; for (&global.blocks) |*b| { if (b.num == num) { if (b.last != global.counter) { global.counter += 1; b.last = global.counter; } return b.data; } if (block.last > b.last) block = b; } if (block.num != std.math.maxInt(u32)) main.allocator.free(block.data); block.num = num; global.counter += 1; block.last = global.counter; if (num > global.index.len/8 - 1) die(); const offlen = bigu64(global.index[num*8..][0..8].*); if ((offlen & 0xffffff) < 16) die(); const buf = main.allocator.alloc(u8, @intCast((offlen & 0xffffff) - 12)) catch unreachable; defer main.allocator.free(buf); const rdlen = global.fd.preadAll(buf, (offlen >> 24) + 8) catch |e| ui.die("Error reading from file: {s}\n", .{ui.errorString(e)}); if (rdlen != buf.len) die(); const rawlen = bigu32(buf[0..4].*); if (rawlen >= (1<<24)) die(); block.data = main.allocator.alloc(u8, rawlen) catch unreachable; const res = ZSTD_decompress(block.data.ptr, block.data.len, buf[4..].ptr, buf.len - 4); if (res != block.data.len) ui.die("Error decompressing block {} (expected {} got {})\n", .{ num, block.data.len, res }); return block.data; } const CborReader = struct { buf: []const u8, fn head(r: *CborReader) CborVal { if (r.buf.len < 0) die(); var v = CborVal{ .rd = r, .major = @enumFromInt(r.buf[0] >> 5), .indef = false, .arg = 0, }; switch (r.buf[0] & 0x1f) { 0x00...0x17 => |n| { v.arg = n; r.buf = r.buf[1..]; }, 0x18 => { if (r.buf.len < 2) die(); v.arg = r.buf[1]; r.buf = r.buf[2..]; }, 0x19 => { if (r.buf.len < 3) die(); v.arg = bigu16(r.buf[1..3].*); r.buf = r.buf[3..]; }, 0x1a => { if (r.buf.len < 5) die(); v.arg = bigu32(r.buf[1..5].*); r.buf = r.buf[5..]; }, 0x1b => { if (r.buf.len < 9) die(); v.arg = bigu64(r.buf[1..9].*); r.buf = r.buf[9..]; }, 0x1f => switch (v.major) { .bytes, .text, .array, .map, .simple => { v.indef = true; r.buf = r.buf[1..]; }, else => die(), }, else => die(), } return v; } // Read the next CBOR value, skipping any tags fn next(r: *CborReader) CborVal { while (true) { const v = r.head(); if (v.major != .tag) return v; } } }; const CborVal = struct { rd: *CborReader, major: CborMajor, indef: bool, arg: u64, fn end(v: *const CborVal) bool { return v.major == .simple and v.indef; } fn int(v: *const CborVal, T: type) T { switch (v.major) { .pos => return std.math.cast(T, v.arg) orelse die(), .neg => { if (std.math.minInt(T) == 0) die(); if (v.arg > std.math.maxInt(T)) die(); return -@as(T, @intCast(v.arg)) + (-1); }, else => die(), } } fn isTrue(v: *const CborVal) bool { return v.major == .simple and v.arg == 21; } // Read either a byte or text string. // Doesn't validate UTF-8 strings, doesn't support indefinite-length strings. fn bytes(v: *const CborVal) []const u8 { if (v.indef or (v.major != .bytes and v.major != .text)) die(); if (v.rd.buf.len < v.arg) die(); defer v.rd.buf = v.rd.buf[@intCast(v.arg)..]; return v.rd.buf[0..@intCast(v.arg)]; } // Skip current value. fn skip(v: *const CborVal) void { // indefinite-length bytes, text, array or map; skip till break marker. if (v.major != .simple and v.indef) { while (true) { const n = v.rd.next(); if (n.end()) return; n.skip(); } } switch (v.major) { .bytes, .text => { if (v.rd.buf.len < v.arg) die(); v.rd.buf = v.rd.buf[@intCast(v.arg)..]; }, .array => { if (v.arg > (1<<24)) die(); for (0..@intCast(v.arg)) |_| v.rd.next().skip(); }, .map => { if (v.arg > (1<<24)) die(); for (0..@intCast(v.arg*|2)) |_| v.rd.next().skip(); }, else => {}, } } fn etype(v: *const CborVal) model.EType { const n = v.int(i32); return std.meta.intToEnum(model.EType, n) catch if (n < 0) .pattern else .nonreg; } fn itemref(v: *const CborVal, cur: u64) u64 { if (v.major == .pos) return v.arg; if (v.major == .neg) { if (v.arg > (1<<24)) die(); return cur - v.arg - 1; } return die(); } }; test "CBOR int parsing" { inline for (.{ .{ .in = "\x00", .t = u1, .exp = 0 }, .{ .in = "\x01", .t = u1, .exp = 1 }, .{ .in = "\x18\x18", .t = u8, .exp = 0x18 }, .{ .in = "\x18\xff", .t = u8, .exp = 0xff }, .{ .in = "\x19\x07\xff", .t = u64, .exp = 0x7ff }, .{ .in = "\x19\xff\xff", .t = u64, .exp = 0xffff }, .{ .in = "\x1a\x00\x01\x00\x00", .t = u64, .exp = 0x10000 }, .{ .in = "\x1b\x7f\xff\xff\xff\xff\xff\xff\xff", .t = i64, .exp = std.math.maxInt(i64) }, .{ .in = "\x1b\xff\xff\xff\xff\xff\xff\xff\xff", .t = u64, .exp = std.math.maxInt(u64) }, .{ .in = "\x1b\xff\xff\xff\xff\xff\xff\xff\xff", .t = i65, .exp = std.math.maxInt(u64) }, .{ .in = "\x20", .t = i1, .exp = -1 }, .{ .in = "\x38\x18", .t = i8, .exp = -0x19 }, .{ .in = "\x39\x01\xf3", .t = i16, .exp = -500 }, .{ .in = "\x3a\xfe\xdc\xba\x97", .t = i33, .exp = -0xfedc_ba98 }, .{ .in = "\x3b\x7f\xff\xff\xff\xff\xff\xff\xff", .t = i64, .exp = std.math.minInt(i64) }, .{ .in = "\x3b\xff\xff\xff\xff\xff\xff\xff\xff", .t = i65, .exp = std.math.minInt(i65) }, }) |t| { var r = CborReader{.buf = t.in}; try std.testing.expectEqual(@as(t.t, t.exp), r.next().int(t.t)); try std.testing.expectEqual(0, r.buf.len); } } test "CBOR string parsing" { var r = CborReader{.buf="\x40"}; try std.testing.expectEqualStrings("", r.next().bytes()); r.buf = "\x45\x00\x01\x02\x03\x04x"; try std.testing.expectEqualStrings("\x00\x01\x02\x03\x04", r.next().bytes()); try std.testing.expectEqualStrings("x", r.buf); r.buf = "\x78\x241234567890abcdefghijklmnopqrstuvwxyz-end"; try std.testing.expectEqualStrings("1234567890abcdefghijklmnopqrstuvwxyz", r.next().bytes()); try std.testing.expectEqualStrings("-end", r.buf); } test "CBOR skip parsing" { inline for (.{ "\x00", "\x40", "\x41a", "\x5f\xff", "\x5f\x41a\xff", "\x80", "\x81\x00", "\x9f\xff", "\x9f\x9f\xff\xff", "\x9f\x9f\x81\x00\xff\xff", "\xa0", "\xa1\x00\x01", "\xbf\xff", "\xbf\xc0\x00\x9f\xff\xff", }) |s| { var r = CborReader{.buf = s ++ "garbage"}; r.next().skip(); try std.testing.expectEqualStrings(r.buf, "garbage"); } } const ItemParser = struct { r: CborReader, len: ?u64 = null, const Field = struct { key: ItemKey, val: CborVal, }; fn init(buf: []const u8) ItemParser { var r = ItemParser{.r = .{.buf = buf}}; const head = r.r.next(); if (head.major != .map) die(); if (!head.indef) r.len = head.arg; return r; } fn key(r: *ItemParser) ?CborVal { if (r.len) |*l| { if (l.* == 0) return null; l.* -= 1; return r.r.next(); } else { const v = r.r.next(); return if (v.end()) null else v; } } // Skips over any fields that don't fit into an ItemKey. fn next(r: *ItemParser) ?Field { while (r.key()) |k| { if (k.major == .pos and k.arg <= std.math.maxInt(@typeInfo(ItemKey).Enum.tag_type)) return .{ .key = @enumFromInt(k.arg), .val = r.r.next(), } else { k.skip(); r.r.next().skip(); } } return null; } }; // Returned buffer is valid until the next readItem(). fn readItem(ref: u64) ItemParser { global.lastitem = ref; if (ref >= (1 << (24 + 32))) die(); const block = readBlock(@intCast(ref >> 24)); if ((ref & 0xffffff) > block.len) die(); return ItemParser.init(block[@intCast(ref & 0xffffff)..]); } const Import = struct { sink: *sink.Thread, stat: sink.Stat = .{}, fields: Fields = .{}, p: ItemParser = undefined, const Fields = struct { name: []const u8 = "", rderr: bool = false, prev: ?u64 = null, sub: ?u64 = null, }; fn readFields(ctx: *Import, ref: u64) void { ctx.p = readItem(ref); var hastype = false; while (ctx.p.next()) |kv| switch (kv.key) { .type => { ctx.stat.etype = kv.val.etype(); hastype = true; }, .name => ctx.fields.name = kv.val.bytes(), .prev => ctx.fields.prev = kv.val.itemref(ref), .asize => ctx.stat.size = kv.val.int(u64), .dsize => ctx.stat.blocks = @intCast(kv.val.int(u64)/512), .dev => ctx.stat.dev = kv.val.int(u64), .rderr => ctx.fields.rderr = kv.val.isTrue(), .sub => ctx.fields.sub = kv.val.itemref(ref), .ino => ctx.stat.ino = kv.val.int(u64), .nlink => ctx.stat.nlink = kv.val.int(u31), .uid => ctx.stat.ext.uid = kv.val.int(u32), .gid => ctx.stat.ext.gid = kv.val.int(u32), .mode => ctx.stat.ext.mode = kv.val.int(u16), .mtime => ctx.stat.ext.mtime = kv.val.int(u64), else => kv.val.skip(), }; if (!hastype) die(); if (ctx.fields.name.len == 0) die(); } fn import(ctx: *Import, ref: u64, parent: ?*sink.Dir, dev: u64) void { ctx.stat = .{ .dev = dev }; ctx.fields = .{}; ctx.readFields(ref); if (ctx.stat.etype == .dir) { const prev = ctx.fields.prev; const dir = if (parent) |d| d.addDir(ctx.sink, ctx.fields.name, &ctx.stat) else sink.createRoot(ctx.fields.name, &ctx.stat); ctx.sink.setDir(dir); if (ctx.fields.rderr) dir.setReadError(ctx.sink); ctx.fields.prev = ctx.fields.sub; while (ctx.fields.prev) |n| ctx.import(n, dir, ctx.stat.dev); ctx.sink.setDir(parent); dir.unref(ctx.sink); ctx.fields.prev = prev; } else { const p = parent orelse die(); if (@intFromEnum(ctx.stat.etype) < 0) p.addSpecial(ctx.sink, ctx.fields.name, ctx.stat.etype) else p.addStat(ctx.sink, ctx.fields.name, &ctx.stat); } if ((ctx.sink.files_seen.load(.monotonic) & 65) == 0) main.handleEvent(false, false); } }; // Resolve an itemref and return a newly allocated entry. // Dir.parent and Link.next/prev are left uninitialized. pub fn get(ref: u64, alloc: std.mem.Allocator) *model.Entry { const parser = readItem(ref); var etype: ?model.EType = null; var name: []const u8 = ""; var p = parser; while (p.next()) |kv| { switch (kv.key) { .type => etype = kv.val.etype(), .name => name = kv.val.bytes(), else => kv.val.skip(), } if (etype != null and name.len != 0) break; } if (etype == null or name.len == 0) die(); // XXX: 'extended' should really depend on whether the info is in the file. var entry = model.Entry.create(alloc, etype.?, main.config.extended, name); entry.next = .{ .ref = std.math.maxInt(u64) }; if (entry.dir()) |d| d.sub = .{ .ref = std.math.maxInt(u64) }; while (p.next()) |kv| switch (kv.key) { .prev => entry.next = .{ .ref = kv.val.itemref(ref) }, .asize => { if (entry.pack.etype != .dir) entry.size = kv.val.int(u64); }, .dsize => { if (entry.pack.etype != .dir) entry.pack.blocks = @intCast(kv.val.int(u64)/512); }, .rderr => { if (entry.dir()) |d| { if (kv.val.isTrue()) d.pack.err = true else d.pack.suberr = true; } }, .dev => { if (entry.dir()) |d| d.pack.dev = model.devices.getId(kv.val.int(u64)); }, .cumasize => entry.size = kv.val.int(u64), .cumdsize => entry.pack.blocks = @intCast(kv.val.int(u64)/512), .shrasize => { if (entry.dir()) |d| d.shared_size = kv.val.int(u64); }, .shrdsize => { if (entry.dir()) |d| d.shared_blocks = kv.val.int(u64)/512; }, .items => { if (entry.dir()) |d| d.items = kv.val.int(u32); }, .sub => { if (entry.dir()) |d| d.sub = .{ .ref = kv.val.itemref(ref) }; }, .ino => { if (entry.link()) |l| l.ino = kv.val.int(u64); }, .nlink => { if (entry.link()) |l| l.pack.nlink = kv.val.int(u31); }, .uid => { if (entry.ext()) |e| e.uid = kv.val.int(u32); }, .gid => { if (entry.ext()) |e| e.gid = kv.val.int(u32); }, .mode => { if (entry.ext()) |e| e.mode = kv.val.int(u16); }, .mtime => { if (entry.ext()) |e| e.mtime = kv.val.int(u64); }, else => kv.val.skip(), }; return entry; } pub fn getRoot() u64 { return bigu64(global.index[global.index.len-8..][0..8].*); } // Walk through the directory tree in depth-first order and pass results to sink.zig. // Depth-first is required for JSON export, but more efficient strategies are // possible for other sinks. Parallel import is also an option, but that's more // complex and likely less efficient than a streaming import. pub fn import() void { const sink_threads = sink.createThreads(1); var ctx = Import{.sink = &sink_threads[0]}; ctx.import(getRoot(), null, 0); sink.done(); } // Assumes that the file signature has already been read and validated. pub fn open(fd: std.fs.File) !void { global.fd = fd; const size = try fd.getEndPos(); if (size < 16) return error.EndOfStream; // Read index block var buf: [4]u8 = undefined; if (try fd.preadAll(&buf, size - 4) != 4) return error.EndOfStream; const index_header = bigu32(buf); if ((index_header >> 24) != 2 or (index_header & 7) != 0) die(); const len = (index_header & 0x00ffffff) - 8; // excluding block header & footer if (len >= size) die(); global.index = main.allocator.alloc(u8, len) catch unreachable; if (try fd.preadAll(global.index, size - len - 4) != global.index.len) return error.EndOfStream; }