JSON import: Fix parsing of escaped UTF-16 surrogate pairs

Fixes #245
This commit is contained in:
Yorhel 2024-11-03 11:05:47 +01:00
parent 88c9b8718e
commit bc8129cad1

View file

@ -187,7 +187,7 @@ static int cons(void) {
static int rstring_esc(char **dest, int *destlen) {
unsigned int n;
unsigned int n, s;
C(rfill1);
@ -204,8 +204,22 @@ static int rstring_esc(char **dest, int *destlen) {
case 'u':
C(rfill(5));
#define hn(n) (n >= '0' && n <= '9' ? n-'0' : n >= 'A' && n <= 'F' ? n-'A'+10 : n >= 'a' && n <= 'f' ? n-'a'+10 : 1<<16)
n = (hn(ctx->buf[1])<<12) + (hn(ctx->buf[2])<<8) + (hn(ctx->buf[3])<<4) + hn(ctx->buf[4]);
#undef hn
#define h4(b) (hn((b)[0])<<12) + (hn((b)[1])<<8) + (hn((b)[2])<<4) + hn((b)[3])
n = h4(ctx->buf+1);
con(5);
E(n >= (1<<16), "Invalid \\u escape");
E((n & 0xfc00) == 0xdc00, "Unexpected low surrogate");
if((n & 0xfc00) == 0xd800) { /* high surrogate */
C(rfill(7));
E(ctx->buf[0] != '\\', "Expected low surrogate");
E(ctx->buf[1] != 'u', "Expected low surrogate");
s = h4(ctx->buf+2);
con(6);
E(s >= (1<<16), "Invalid \\u escape");
E((s & 0xfc00) != 0xdc00, "Expected low surrogate");
n = 0x10000 + (((n & 0x03ff) << 10) | (s & 0x03ff));
}
if(n <= 0x007F) {
ap(n);
} else if(n <= 0x07FF) {
@ -215,9 +229,14 @@ static int rstring_esc(char **dest, int *destlen) {
ap(0xE0 | (n>>12));
ap(0x80 | ((n>>6) & 0x3F));
ap(0x80 | (n & 0x3F));
} else /* this happens if there was an invalid character (n >= (1<<16)) */
E(1, "Invalid character in \\u escape");
con(5);
} else {
ap(0xF0 | (n>>18));\
ap(0x80 | ((n>>12) & 0x3F));
ap(0x80 | ((n>>6) & 0x3F));
ap(0x80 | (n & 0x3F));
}
#undef hn
#undef h4
break;
default:
E(1, "Invalid escape sequence");