mirror of
https://code.blicky.net/yorhel/ncdu.git
synced 2026-01-12 17:08:39 -09:00
JSON import: support reading escaped UTF-16 surrogate pairs
Fixes #245 json/scanner.zig in std notes inconsistencies in the standard as to whether unpaired surrogate halves are allowed. That implementation disallows them and so does this commit.
This commit is contained in:
parent
bdc730f1e5
commit
232a4f8741
1 changed files with 9 additions and 2 deletions
|
|
@ -151,9 +151,16 @@ const Parser = struct {
|
|||
'r' => if (n < buf.len) { buf[n] = 0xd; n += 1; },
|
||||
't' => if (n < buf.len) { buf[n] = 0x9; n += 1; },
|
||||
'u' => {
|
||||
const char = (p.hexdig()<<12) + (p.hexdig()<<8) + (p.hexdig()<<4) + p.hexdig();
|
||||
const first = (p.hexdig()<<12) + (p.hexdig()<<8) + (p.hexdig()<<4) + p.hexdig();
|
||||
var unit = @as(u21, first);
|
||||
if (std.unicode.utf16IsLowSurrogate(first)) p.die("Unexpected low surrogate");
|
||||
if (std.unicode.utf16IsHighSurrogate(first)) {
|
||||
p.expectLit("\\u");
|
||||
const second = (p.hexdig()<<12) + (p.hexdig()<<8) + (p.hexdig()<<4) + p.hexdig();
|
||||
unit = std.unicode.utf16DecodeSurrogatePair(&.{first, second}) catch p.die("Invalid low surrogate");
|
||||
}
|
||||
if (n + 6 < buf.len)
|
||||
n += std.unicode.utf8Encode(char, buf[n..n+5]) catch unreachable;
|
||||
n += std.unicode.utf8Encode(unit, buf[n..n+5]) catch unreachable;
|
||||
},
|
||||
else => p.die("invalid escape sequence"),
|
||||
},
|
||||
|
|
|
|||
Loading…
Reference in a new issue