JSON import: support reading escaped UTF-16 surrogate pairs

Fixes #245

json/scanner.zig in std notes inconsistencies in the standard as to
whether unpaired surrogate halves are allowed. That implementation
disallows them and so does this commit.
This commit is contained in:
Yorhel 2024-11-03 10:36:49 +01:00
parent bdc730f1e5
commit 232a4f8741

View file

@ -151,9 +151,16 @@ const Parser = struct {
'r' => if (n < buf.len) { buf[n] = 0xd; n += 1; },
't' => if (n < buf.len) { buf[n] = 0x9; n += 1; },
'u' => {
const char = (p.hexdig()<<12) + (p.hexdig()<<8) + (p.hexdig()<<4) + p.hexdig();
const first = (p.hexdig()<<12) + (p.hexdig()<<8) + (p.hexdig()<<4) + p.hexdig();
var unit = @as(u21, first);
if (std.unicode.utf16IsLowSurrogate(first)) p.die("Unexpected low surrogate");
if (std.unicode.utf16IsHighSurrogate(first)) {
p.expectLit("\\u");
const second = (p.hexdig()<<12) + (p.hexdig()<<8) + (p.hexdig()<<4) + p.hexdig();
unit = std.unicode.utf16DecodeSurrogatePair(&.{first, second}) catch p.die("Invalid low surrogate");
}
if (n + 6 < buf.len)
n += std.unicode.utf8Encode(char, buf[n..n+5]) catch unreachable;
n += std.unicode.utf8Encode(unit, buf[n..n+5]) catch unreachable;
},
else => p.die("invalid escape sequence"),
},