JSON import: Fix parsing of escaped UTF-16 surrogate pairs

Fixes #245
This commit is contained in:
Yorhel 2024-11-03 11:05:47 +01:00
parent 88c9b8718e
commit bc8129cad1

View file

@ -187,7 +187,7 @@ static int cons(void) {
static int rstring_esc(char **dest, int *destlen) { static int rstring_esc(char **dest, int *destlen) {
unsigned int n; unsigned int n, s;
C(rfill1); C(rfill1);
@ -204,8 +204,22 @@ static int rstring_esc(char **dest, int *destlen) {
case 'u': case 'u':
C(rfill(5)); C(rfill(5));
#define hn(n) (n >= '0' && n <= '9' ? n-'0' : n >= 'A' && n <= 'F' ? n-'A'+10 : n >= 'a' && n <= 'f' ? n-'a'+10 : 1<<16) #define hn(n) (n >= '0' && n <= '9' ? n-'0' : n >= 'A' && n <= 'F' ? n-'A'+10 : n >= 'a' && n <= 'f' ? n-'a'+10 : 1<<16)
n = (hn(ctx->buf[1])<<12) + (hn(ctx->buf[2])<<8) + (hn(ctx->buf[3])<<4) + hn(ctx->buf[4]); #define h4(b) (hn((b)[0])<<12) + (hn((b)[1])<<8) + (hn((b)[2])<<4) + hn((b)[3])
#undef hn n = h4(ctx->buf+1);
con(5);
E(n >= (1<<16), "Invalid \\u escape");
E((n & 0xfc00) == 0xdc00, "Unexpected low surrogate");
if((n & 0xfc00) == 0xd800) { /* high surrogate */
C(rfill(7));
E(ctx->buf[0] != '\\', "Expected low surrogate");
E(ctx->buf[1] != 'u', "Expected low surrogate");
s = h4(ctx->buf+2);
con(6);
E(s >= (1<<16), "Invalid \\u escape");
E((s & 0xfc00) != 0xdc00, "Expected low surrogate");
n = 0x10000 + (((n & 0x03ff) << 10) | (s & 0x03ff));
}
if(n <= 0x007F) { if(n <= 0x007F) {
ap(n); ap(n);
} else if(n <= 0x07FF) { } else if(n <= 0x07FF) {
@ -215,9 +229,14 @@ static int rstring_esc(char **dest, int *destlen) {
ap(0xE0 | (n>>12)); ap(0xE0 | (n>>12));
ap(0x80 | ((n>>6) & 0x3F)); ap(0x80 | ((n>>6) & 0x3F));
ap(0x80 | (n & 0x3F)); ap(0x80 | (n & 0x3F));
} else /* this happens if there was an invalid character (n >= (1<<16)) */ } else {
E(1, "Invalid character in \\u escape"); ap(0xF0 | (n>>18));\
con(5); ap(0x80 | ((n>>12) & 0x3F));
ap(0x80 | ((n>>6) & 0x3F));
ap(0x80 | (n & 0x3F));
}
#undef hn
#undef h4
break; break;
default: default:
E(1, "Invalid escape sequence"); E(1, "Invalid escape sequence");