// SPDX-FileCopyrightText: Yorhel // SPDX-License-Identifier: MIT const std = @import("std"); const main = @import("main.zig"); const util = @import("util.zig"); const model = @import("model.zig"); const sink = @import("sink.zig"); const ui = @import("ui.zig"); // Using a custom JSON parser here because, while std.json is great, it does // perform strict UTF-8 validation. Which is correct, of course, but ncdu dumps // are not always correct JSON as they may contain non-UTF-8 paths encoded as // strings. const Parser = struct { rd: std.fs.File, rdoff: usize = 0, rdsize: usize = 0, byte: u64 = 1, line: u64 = 1, buf: [16*1024]u8 = undefined, fn die(p: *Parser, str: []const u8) noreturn { ui.die("Error importing file on line {}:{}: {s}.\n", .{ p.line, p.byte, str }); } // Feed back a byte that has just been returned by nextByte() fn undoNextByte(p: *Parser, b: u8) void { p.byte -= 1; p.rdoff -= 1; p.buf[p.rdoff] = b; } fn fill(p: *Parser) void { @setCold(true); p.rdoff = 0; p.rdsize = p.rd.read(&p.buf) catch |e| switch (e) { error.IsDir => p.die("not a file"), // should be detected at open() time, but no flag for that... error.SystemResources => p.die("out of memory"), else => p.die("I/O error"), }; } // Returns 0 on EOF. // (or if the file contains a 0 byte, but that's invalid anyway) // (Returning a '?u8' here is nicer but kills performance by about +30%) fn nextByte(p: *Parser) u8 { if (p.rdoff == p.rdsize) { p.fill(); if (p.rdsize == 0) return 0; } p.byte += 1; defer p.rdoff += 1; return (&p.buf)[p.rdoff]; } // next non-whitespace byte fn nextChr(p: *Parser) u8 { while (true) switch (p.nextByte()) { '\n' => { p.line += 1; p.byte = 1; }, ' ', '\t', '\r' => {}, else => |b| return b, }; } fn expectLit(p: *Parser, lit: []const u8) void { for (lit) |b| if (b != p.nextByte()) p.die("invalid JSON"); } fn hexdig(p: *Parser) u16 { const b = p.nextByte(); return switch (b) { '0'...'9' => b - '0', 'a'...'f' => b - 'a' + 10, 'A'...'F' => b - 'A' + 10, else => p.die("invalid hex digit"), }; } // Read a string (after the ") into buf. // Any characters beyond the size of the buffer are consumed but otherwise discarded. fn stringContent(p: *Parser, buf: []u8) []u8 { var n: usize = 0; while (true) switch (p.nextByte()) { '"' => break, '\\' => switch (p.nextByte()) { '"' => if (n < buf.len) { buf[n] = '"'; n += 1; }, '\\'=> if (n < buf.len) { buf[n] = '\\';n += 1; }, '/' => if (n < buf.len) { buf[n] = '/'; n += 1; }, 'b' => if (n < buf.len) { buf[n] = 0x8; n += 1; }, 'f' => if (n < buf.len) { buf[n] = 0xc; n += 1; }, 'n' => if (n < buf.len) { buf[n] = 0xa; n += 1; }, 'r' => if (n < buf.len) { buf[n] = 0xd; n += 1; }, 't' => if (n < buf.len) { buf[n] = 0x9; n += 1; }, 'u' => { const char = (p.hexdig()<<12) + (p.hexdig()<<8) + (p.hexdig()<<4) + p.hexdig(); if (n + 6 < buf.len) n += std.unicode.utf8Encode(char, buf[n..n+5]) catch unreachable; }, else => p.die("invalid escape sequence"), }, 0x20, 0x21, 0x23...0x5b, 0x5d...0xff => |b| if (n < buf.len) { buf[n] = b; n += 1; }, else => p.die("invalid character in string"), }; return buf[0..n]; } fn string(p: *Parser, buf: []u8) []u8 { if (p.nextChr() != '"') p.die("expected string"); return p.stringContent(buf); } fn uintTail(p: *Parser, head: u8, T: anytype) T { if (head == '0') return 0; var v: T = head - '0'; // Assumption: T >= u8 // Assumption: we don't parse JSON "documents" that are a bare uint. while (true) switch (p.nextByte()) { '0'...'9' => |b| { const newv = v *% 10 +% (b - '0'); if (newv < v) p.die("integer out of range"); v = newv; }, else => |b| break p.undoNextByte(b), }; if (v == 0) p.die("expected number"); return v; } fn uint(p: *Parser, T: anytype) T { switch (p.nextChr()) { '0'...'9' => |b| return p.uintTail(b, T), else => p.die("expected number"), } } fn boolean(p: *Parser) bool { switch (p.nextChr()) { 't' => { p.expectLit("rue"); return true; }, 'f' => { p.expectLit("alse"); return false; }, else => p.die("expected boolean"), } } fn obj(p: *Parser) void { if (p.nextChr() != '{') p.die("expected object"); } fn key(p: *Parser, first: bool, buf: []u8) ?[]u8 { const k = switch (p.nextChr()) { ',' => blk: { if (first) p.die("invalid JSON"); break :blk p.string(buf); }, '"' => blk: { if (!first) p.die("invalid JSON"); break :blk p.stringContent(buf); }, '}' => return null, else => p.die("invalid JSON"), }; if (p.nextChr() != ':') p.die("invalid JSON"); return k; } fn array(p: *Parser) void { if (p.nextChr() != '[') p.die("expected array"); } fn elem(p: *Parser, first: bool) bool { switch (p.nextChr()) { ',' => if (first) p.die("invalid JSON") else return true, ']' => return false, else => |b| { if (!first) p.die("invalid JSON"); p.undoNextByte(b); return true; }, } } fn skipContent(p: *Parser, head: u8) void { switch (head) { 't' => p.expectLit("rue"), 'f' => p.expectLit("alse"), 'n' => p.expectLit("ull"), '-', '0'...'9' => // Numbers are kind of annoying, this "parsing" is invalid and ultra-lazy. while (true) switch (p.nextByte()) { '-', '+', 'e', 'E', '.', '0'...'9' => {}, else => |b| return p.undoNextByte(b), }, '"' => _ = p.stringContent(&[0]u8{}), '[' => { var first = true; while (p.elem(first)) { first = false; p.skip(); } }, '{' => { var first = true; while (p.key(first, &[0]u8{})) |_| { first = false; p.skip(); } }, else => p.die("invalid JSON"), } } fn skip(p: *Parser) void { p.skipContent(p.nextChr()); } fn eof(p: *Parser) void { if (p.nextChr() != 0) p.die("trailing garbage"); } }; // Should really add some invalid JSON test cases as well, but I'd first like // to benchmark the performance impact of using error returns instead of // calling ui.die(). test "JSON parser" { const json = \\{ \\ "null": null, \\ "true": true, \\ "false": false, \\ "zero":0 ,"uint": 123, \\ "emptyObj": {}, \\ "emptyArray": [], \\ "emptyString": "", \\ "encString": "\"\\\/\b\f\n\uBe3F", \\ "numbers": [0,1,20,-300, 3.4 ,0e-10 , -100.023e+13 ] \\} ; var p = Parser{ .rd = undefined, .rdsize = json.len }; @memcpy(p.buf[0..json.len], json); p.skip(); p = Parser{ .rd = undefined, .rdsize = json.len }; @memcpy(p.buf[0..json.len], json); var buf: [128]u8 = undefined; p.obj(); try std.testing.expectEqualStrings(p.key(true, &buf).?, "null"); p.skip(); try std.testing.expectEqualStrings(p.key(false, &buf).?, "true"); try std.testing.expect(p.boolean()); try std.testing.expectEqualStrings(p.key(false, &buf).?, "false"); try std.testing.expect(!p.boolean()); try std.testing.expectEqualStrings(p.key(false, &buf).?, "zero"); try std.testing.expectEqual(0, p.uint(u8)); try std.testing.expectEqualStrings(p.key(false, &buf).?, "uint"); try std.testing.expectEqual(123, p.uint(u8)); try std.testing.expectEqualStrings(p.key(false, &buf).?, "emptyObj"); p.obj(); try std.testing.expect(p.key(true, &buf) == null); try std.testing.expectEqualStrings(p.key(false, &buf).?, "emptyArray"); p.array(); try std.testing.expect(!p.elem(true)); try std.testing.expectEqualStrings(p.key(false, &buf).?, "emptyString"); try std.testing.expectEqualStrings(p.string(&buf), ""); try std.testing.expectEqualStrings(p.key(false, &buf).?, "encString"); try std.testing.expectEqualStrings(p.string(&buf), "\"\\/\x08\x0c\n\u{be3f}"); try std.testing.expectEqualStrings(p.key(false, &buf).?, "numbers"); p.skip(); try std.testing.expect(p.key(true, &buf) == null); } const Ctx = struct { p: *Parser, sink: *sink.Thread, items_seen: u64 = 0, stat: sink.Stat = .{}, special: ?sink.Special = null, namelen: usize = 0, namebuf: [32*1024]u8 = undefined, }; fn itemkey(ctx: *Ctx, key: []const u8) void { const eq = std.mem.eql; switch (if (key.len > 0) key[0] else @as(u8,0)) { 'a' => { if (eq(u8, key, "asize")) { ctx.stat.size = ctx.p.uint(u64); return; } }, 'd' => { if (eq(u8, key, "dsize")) { ctx.stat.blocks = @intCast(ctx.p.uint(u64)>>9); return; } if (eq(u8, key, "dev")) { ctx.stat.dev = ctx.p.uint(u64); return; } }, 'e' => { if (eq(u8, key, "excluded")) { var buf: [32]u8 = undefined; const typ = ctx.p.string(&buf); // "frmlnk" is also possible, but currently considered equivalent to "pattern". if (eq(u8, typ, "otherfs")) ctx.special = .other_fs else if (eq(u8, typ, "kernfs")) ctx.special = .kernfs else ctx.special = .excluded; return; } }, 'g' => { if (eq(u8, key, "gid")) { ctx.stat.ext.gid = ctx.p.uint(u32); return; } }, 'h' => { if (eq(u8, key, "hlnkc")) { ctx.stat.hlinkc = ctx.p.boolean(); return; } }, 'i' => { if (eq(u8, key, "ino")) { ctx.stat.ino = ctx.p.uint(u64); return; } }, 'm' => { if (eq(u8, key, "mode")) { ctx.stat.ext.mode = ctx.p.uint(u16); return; } if (eq(u8, key, "mtime")) { ctx.stat.ext.mtime = ctx.p.uint(u64); // Accept decimal numbers, but discard the fractional part because our data model doesn't support it. switch (ctx.p.nextByte()) { '.' => while (true) switch (ctx.p.nextByte()) { '0'...'9' => {}, else => |b| return ctx.p.undoNextByte(b), }, else => |b| return ctx.p.undoNextByte(b), } } }, 'n' => { if (eq(u8, key, "name")) { if (ctx.namelen != 0) ctx.p.die("duplicate key"); ctx.namelen = ctx.p.string(&ctx.namebuf).len; if (ctx.namelen > ctx.namebuf.len-5) ctx.p.die("too long file name"); return; } if (eq(u8, key, "nlink")) { ctx.stat.nlink = ctx.p.uint(u31); if (!ctx.stat.dir and ctx.stat.nlink > 1) ctx.stat.hlinkc = true; return; } if (eq(u8, key, "notreg")) { ctx.stat.reg = !ctx.p.boolean(); return; } }, 'r' => { if (eq(u8, key, "read_error")) { if (ctx.p.boolean()) ctx.special = .err; return; } }, 'u' => { if (eq(u8, key, "uid")) { ctx.stat.ext.uid = ctx.p.uint(u32); return; } }, else => {}, } ctx.p.skip(); } fn item(ctx: *Ctx, parent: ?*sink.Dir, dev: u64) void { ctx.stat = .{ .dev = dev }; ctx.namelen = 0; ctx.special = null; ctx.stat.dir = switch (ctx.p.nextChr()) { '[' => blk: { ctx.p.obj(); break :blk true; }, '{' => false, else => ctx.p.die("expected object or array"), }; if (parent == null and !ctx.stat.dir) ctx.p.die("parent item must be a directory"); var keybuf: [32]u8 = undefined; var first = true; while (ctx.p.key(first, &keybuf)) |k| { first = false; itemkey(ctx, k); } if (ctx.namelen == 0) ctx.p.die("missing \"name\" field"); const name = (&ctx.namebuf)[0..ctx.namelen]; if (ctx.stat.dir and (ctx.special == null or ctx.special == .err)) { const ndev = ctx.stat.dev; const dir = if (parent) |d| d.addDir(ctx.sink, name, &ctx.stat) else sink.createRoot(name, &ctx.stat); ctx.sink.setDir(dir); if (ctx.special == .err) dir.setReadError(ctx.sink); while (ctx.p.elem(false)) item(ctx, dir, ndev); ctx.sink.setDir(parent); dir.unref(); } else if (ctx.special) |s| { parent.?.addSpecial(ctx.sink, name, s); if (ctx.stat.dir and ctx.p.elem(false)) ctx.p.die("unexpected contents in an excluded directory"); } else { parent.?.addStat(ctx.sink, name, &ctx.stat); } ctx.items_seen += 1; if ((ctx.items_seen & 1023) == 0) main.handleEvent(false, false); } pub fn import(path: [:0]const u8) void { const sink_threads = sink.createThreads(1); defer sink.done(); const fd = if (std.mem.eql(u8, "-", path)) std.io.getStdIn() else std.fs.cwd().openFileZ(path, .{}) catch |e| ui.die("Error reading file: {s}.\n", .{ui.errorString(e)}); defer fd.close(); var p = Parser{.rd = fd}; p.array(); if (p.uint(u16) != 1) p.die("incompatible major format version"); if (!p.elem(false)) p.die("expected array element"); _ = p.uint(u16); // minor version, ignored for now if (!p.elem(false)) p.die("expected array element"); // metadata object p.obj(); p.skipContent('{'); // Items if (!p.elem(false)) p.die("expected array element"); var ctx = Ctx{.p = &p, .sink = &sink_threads[0]}; item(&ctx, null, 0); // accept more trailing elements while (p.elem(false)) p.skip(); p.eof(); }