From 1e56c8604ea0c15d22ac299394031e3c1e58d46f Mon Sep 17 00:00:00 2001 From: Yorhel Date: Tue, 16 Jul 2024 17:36:37 +0200 Subject: [PATCH] Improve JSON import performance by another 10% Profiling showed that string parsing was a bottleneck. We rarely need the full power of JSON strings, though, so we can optimize for the common case of plain strings without escape codes. Keeping the slower string parser as fallback, of course. --- src/json_import.zig | 62 +++++++++++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 22 deletions(-) diff --git a/src/json_import.zig b/src/json_import.zig index bd29fe6..8b7b5c4 100644 --- a/src/json_import.zig +++ b/src/json_import.zig @@ -82,32 +82,50 @@ const Parser = struct { }; } + fn stringContentSlow(p: *Parser, buf: []u8, head: u8, off: usize) []u8 { + @setCold(true); + var b = head; + var n = off; + while (true) { + switch (b) { + '"' => break, + '\\' => switch (p.nextByte()) { + '"' => if (n < buf.len) { buf[n] = '"'; n += 1; }, + '\\'=> if (n < buf.len) { buf[n] = '\\';n += 1; }, + '/' => if (n < buf.len) { buf[n] = '/'; n += 1; }, + 'b' => if (n < buf.len) { buf[n] = 0x8; n += 1; }, + 'f' => if (n < buf.len) { buf[n] = 0xc; n += 1; }, + 'n' => if (n < buf.len) { buf[n] = 0xa; n += 1; }, + 'r' => if (n < buf.len) { buf[n] = 0xd; n += 1; }, + 't' => if (n < buf.len) { buf[n] = 0x9; n += 1; }, + 'u' => { + const char = (p.hexdig()<<12) + (p.hexdig()<<8) + (p.hexdig()<<4) + p.hexdig(); + if (n + 6 < buf.len) + n += std.unicode.utf8Encode(char, buf[n..n+5]) catch unreachable; + }, + else => p.die("invalid escape sequence"), + }, + 0x20, 0x21, 0x23...0x5b, 0x5d...0xff => if (n < buf.len) { buf[n] = b; n += 1; }, + else => p.die("invalid character in string"), + } + b = p.nextByte(); + } + return buf[0..n]; + } + // Read a string (after the ") into buf. // Any characters beyond the size of the buffer are consumed but otherwise discarded. fn stringContent(p: *Parser, buf: []u8) []u8 { + // The common case (for ncdu dumps): string fits in the given buffer and does not contain any escapes. var n: usize = 0; - while (true) switch (p.nextByte()) { - '"' => break, - '\\' => switch (p.nextByte()) { - '"' => if (n < buf.len) { buf[n] = '"'; n += 1; }, - '\\'=> if (n < buf.len) { buf[n] = '\\';n += 1; }, - '/' => if (n < buf.len) { buf[n] = '/'; n += 1; }, - 'b' => if (n < buf.len) { buf[n] = 0x8; n += 1; }, - 'f' => if (n < buf.len) { buf[n] = 0xc; n += 1; }, - 'n' => if (n < buf.len) { buf[n] = 0xa; n += 1; }, - 'r' => if (n < buf.len) { buf[n] = 0xd; n += 1; }, - 't' => if (n < buf.len) { buf[n] = 0x9; n += 1; }, - 'u' => { - const char = (p.hexdig()<<12) + (p.hexdig()<<8) + (p.hexdig()<<4) + p.hexdig(); - if (n + 6 < buf.len) - n += std.unicode.utf8Encode(char, buf[n..n+5]) catch unreachable; - }, - else => p.die("invalid escape sequence"), - }, - 0x20, 0x21, 0x23...0x5b, 0x5d...0xff => |b| if (n < buf.len) { buf[n] = b; n += 1; }, - else => p.die("invalid character in string"), - }; - return buf[0..n]; + var b = p.nextByte(); + while (n < buf.len and b >= 0x20 and b != '"' and b != '\\') { + buf[n] = b; + n += 1; + b = p.nextByte(); + } + if (b == '"') return buf[0..n]; + return p.stringContentSlow(buf, b, n); } fn string(p: *Parser, buf: []u8) []u8 {