Improve JSON import performance by another 10%

Profiling showed that string parsing was a bottleneck. We rarely need the full power of JSON strings, though, so we can optimize for the common case of plain strings without escape codes. Keeping the slower string parser as fallback, of course.
2026-03-13 23:08:40 -08:00 · 2024-07-16 17:36:37 +02:00 · 2024-07-16 17:36:37 +02:00 · 1e56c8604e
commit 1e56c8604e
parent d2e8dd8a90
1 changed files with 40 additions and 22 deletions
--- a/src/json_import.zig
+++ b/src/json_import.zig
@ -82,32 +82,50 @@ const Parser = struct {
        };
    }
    fn stringContentSlow(p: *Parser, buf: []u8, head: u8, off: usize) []u8 {
        @setCold(true);
        var b = head;
        var n = off;
        while (true) {
            switch (b) {
                '"' => break,
                '\\' => switch (p.nextByte()) {
                    '"' => if (n < buf.len) { buf[n] = '"'; n += 1; },
                    '\\'=> if (n < buf.len) { buf[n] = '\\';n += 1; },
                    '/' => if (n < buf.len) { buf[n] = '/'; n += 1; },
                    'b' => if (n < buf.len) { buf[n] = 0x8; n += 1; },
                    'f' => if (n < buf.len) { buf[n] = 0xc; n += 1; },
                    'n' => if (n < buf.len) { buf[n] = 0xa; n += 1; },
                    'r' => if (n < buf.len) { buf[n] = 0xd; n += 1; },
                    't' => if (n < buf.len) { buf[n] = 0x9; n += 1; },
                    'u' => {
                        const char = (p.hexdig()<<12) + (p.hexdig()<<8) + (p.hexdig()<<4) + p.hexdig();
                        if (n + 6 < buf.len)
                            n += std.unicode.utf8Encode(char, buf[n..n+5]) catch unreachable;
                    },
                    else => p.die("invalid escape sequence"),
                },
                0x20, 0x21, 0x23...0x5b, 0x5d...0xff => if (n < buf.len) { buf[n] = b; n += 1; },
                else => p.die("invalid character in string"),
            }
            b = p.nextByte();
        }
        return buf[0..n];
    }
    // Read a string (after the ") into buf.
    // Any characters beyond the size of the buffer are consumed but otherwise discarded.
    fn stringContent(p: *Parser, buf: []u8) []u8 {
        // The common case (for ncdu dumps): string fits in the given buffer and does not contain any escapes.
        var n: usize = 0;
-        while (true) switch (p.nextByte()) {
+        var b = p.nextByte();
-            '"' => break,
+        while (n < buf.len and b >= 0x20 and b != '"' and b != '\\') {
-            '\\' => switch (p.nextByte()) {
+            buf[n] = b;
-                '"' => if (n < buf.len) { buf[n] = '"'; n += 1; },
+            n += 1;
-                '\\'=> if (n < buf.len) { buf[n] = '\\';n += 1; },
+            b = p.nextByte();
-                '/' => if (n < buf.len) { buf[n] = '/'; n += 1; },
+        }
-                'b' => if (n < buf.len) { buf[n] = 0x8; n += 1; },
+        if (b == '"') return buf[0..n];
-                'f' => if (n < buf.len) { buf[n] = 0xc; n += 1; },
+        return p.stringContentSlow(buf, b, n);
                'n' => if (n < buf.len) { buf[n] = 0xa; n += 1; },
                'r' => if (n < buf.len) { buf[n] = 0xd; n += 1; },
                't' => if (n < buf.len) { buf[n] = 0x9; n += 1; },
                'u' => {
                    const char = (p.hexdig()<<12) + (p.hexdig()<<8) + (p.hexdig()<<4) + p.hexdig();
                    if (n + 6 < buf.len)
                        n += std.unicode.utf8Encode(char, buf[n..n+5]) catch unreachable;
                },
                else => p.die("invalid escape sequence"),
            },
            0x20, 0x21, 0x23...0x5b, 0x5d...0xff => |b| if (n < buf.len) { buf[n] = b; n += 1; },
            else => p.die("invalid character in string"),
        };
        return buf[0..n];
    }
    fn string(p: *Parser, buf: []u8) []u8 {