mirror of
https://code.blicky.net/yorhel/ncdu.git
synced 2026-01-12 17:08:39 -09:00
JSON import: support reading escaped UTF-16 surrogate pairs
Fixes #245 json/scanner.zig in std notes inconsistencies in the standard as to whether unpaired surrogate halves are allowed. That implementation disallows them and so does this commit.
This commit is contained in:
parent
bdc730f1e5
commit
232a4f8741
1 changed files with 9 additions and 2 deletions
|
|
@ -151,9 +151,16 @@ const Parser = struct {
|
||||||
'r' => if (n < buf.len) { buf[n] = 0xd; n += 1; },
|
'r' => if (n < buf.len) { buf[n] = 0xd; n += 1; },
|
||||||
't' => if (n < buf.len) { buf[n] = 0x9; n += 1; },
|
't' => if (n < buf.len) { buf[n] = 0x9; n += 1; },
|
||||||
'u' => {
|
'u' => {
|
||||||
const char = (p.hexdig()<<12) + (p.hexdig()<<8) + (p.hexdig()<<4) + p.hexdig();
|
const first = (p.hexdig()<<12) + (p.hexdig()<<8) + (p.hexdig()<<4) + p.hexdig();
|
||||||
|
var unit = @as(u21, first);
|
||||||
|
if (std.unicode.utf16IsLowSurrogate(first)) p.die("Unexpected low surrogate");
|
||||||
|
if (std.unicode.utf16IsHighSurrogate(first)) {
|
||||||
|
p.expectLit("\\u");
|
||||||
|
const second = (p.hexdig()<<12) + (p.hexdig()<<8) + (p.hexdig()<<4) + p.hexdig();
|
||||||
|
unit = std.unicode.utf16DecodeSurrogatePair(&.{first, second}) catch p.die("Invalid low surrogate");
|
||||||
|
}
|
||||||
if (n + 6 < buf.len)
|
if (n + 6 < buf.len)
|
||||||
n += std.unicode.utf8Encode(char, buf[n..n+5]) catch unreachable;
|
n += std.unicode.utf8Encode(unit, buf[n..n+5]) catch unreachable;
|
||||||
},
|
},
|
||||||
else => p.die("invalid escape sequence"),
|
else => p.die("invalid escape sequence"),
|
||||||
},
|
},
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue