Improve JSON import performance by another 10%

Profiling showed that string parsing was a bottleneck. We rarely need
the full power of JSON strings, though, so we can optimize for the
common case of plain strings without escape codes. Keeping the slower
string parser as fallback, of course.
This commit is contained in:
Yorhel 2024-07-16 17:36:37 +02:00
parent d2e8dd8a90
commit 1e56c8604e

View file

@ -82,32 +82,50 @@ const Parser = struct {
}; };
} }
fn stringContentSlow(p: *Parser, buf: []u8, head: u8, off: usize) []u8 {
@setCold(true);
var b = head;
var n = off;
while (true) {
switch (b) {
'"' => break,
'\\' => switch (p.nextByte()) {
'"' => if (n < buf.len) { buf[n] = '"'; n += 1; },
'\\'=> if (n < buf.len) { buf[n] = '\\';n += 1; },
'/' => if (n < buf.len) { buf[n] = '/'; n += 1; },
'b' => if (n < buf.len) { buf[n] = 0x8; n += 1; },
'f' => if (n < buf.len) { buf[n] = 0xc; n += 1; },
'n' => if (n < buf.len) { buf[n] = 0xa; n += 1; },
'r' => if (n < buf.len) { buf[n] = 0xd; n += 1; },
't' => if (n < buf.len) { buf[n] = 0x9; n += 1; },
'u' => {
const char = (p.hexdig()<<12) + (p.hexdig()<<8) + (p.hexdig()<<4) + p.hexdig();
if (n + 6 < buf.len)
n += std.unicode.utf8Encode(char, buf[n..n+5]) catch unreachable;
},
else => p.die("invalid escape sequence"),
},
0x20, 0x21, 0x23...0x5b, 0x5d...0xff => if (n < buf.len) { buf[n] = b; n += 1; },
else => p.die("invalid character in string"),
}
b = p.nextByte();
}
return buf[0..n];
}
// Read a string (after the ") into buf. // Read a string (after the ") into buf.
// Any characters beyond the size of the buffer are consumed but otherwise discarded. // Any characters beyond the size of the buffer are consumed but otherwise discarded.
fn stringContent(p: *Parser, buf: []u8) []u8 { fn stringContent(p: *Parser, buf: []u8) []u8 {
// The common case (for ncdu dumps): string fits in the given buffer and does not contain any escapes.
var n: usize = 0; var n: usize = 0;
while (true) switch (p.nextByte()) { var b = p.nextByte();
'"' => break, while (n < buf.len and b >= 0x20 and b != '"' and b != '\\') {
'\\' => switch (p.nextByte()) { buf[n] = b;
'"' => if (n < buf.len) { buf[n] = '"'; n += 1; }, n += 1;
'\\'=> if (n < buf.len) { buf[n] = '\\';n += 1; }, b = p.nextByte();
'/' => if (n < buf.len) { buf[n] = '/'; n += 1; }, }
'b' => if (n < buf.len) { buf[n] = 0x8; n += 1; }, if (b == '"') return buf[0..n];
'f' => if (n < buf.len) { buf[n] = 0xc; n += 1; }, return p.stringContentSlow(buf, b, n);
'n' => if (n < buf.len) { buf[n] = 0xa; n += 1; },
'r' => if (n < buf.len) { buf[n] = 0xd; n += 1; },
't' => if (n < buf.len) { buf[n] = 0x9; n += 1; },
'u' => {
const char = (p.hexdig()<<12) + (p.hexdig()<<8) + (p.hexdig()<<4) + p.hexdig();
if (n + 6 < buf.len)
n += std.unicode.utf8Encode(char, buf[n..n+5]) catch unreachable;
},
else => p.die("invalid escape sequence"),
},
0x20, 0x21, 0x23...0x5b, 0x5d...0xff => |b| if (n < buf.len) { buf[n] = b; n += 1; },
else => p.die("invalid character in string"),
};
return buf[0..n];
} }
fn string(p: *Parser, buf: []u8) []u8 { fn string(p: *Parser, buf: []u8) []u8 {