From bc8129cad192f05198d01219ab35d85d6d333b43 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sun, 3 Nov 2024 11:05:47 +0100 Subject: [PATCH] JSON import: Fix parsing of escaped UTF-16 surrogate pairs Fixes #245 --- src/dir_import.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/src/dir_import.c b/src/dir_import.c index 1425fed..4bc4789 100644 --- a/src/dir_import.c +++ b/src/dir_import.c @@ -187,7 +187,7 @@ static int cons(void) { static int rstring_esc(char **dest, int *destlen) { - unsigned int n; + unsigned int n, s; C(rfill1); @@ -204,8 +204,22 @@ static int rstring_esc(char **dest, int *destlen) { case 'u': C(rfill(5)); #define hn(n) (n >= '0' && n <= '9' ? n-'0' : n >= 'A' && n <= 'F' ? n-'A'+10 : n >= 'a' && n <= 'f' ? n-'a'+10 : 1<<16) - n = (hn(ctx->buf[1])<<12) + (hn(ctx->buf[2])<<8) + (hn(ctx->buf[3])<<4) + hn(ctx->buf[4]); -#undef hn +#define h4(b) (hn((b)[0])<<12) + (hn((b)[1])<<8) + (hn((b)[2])<<4) + hn((b)[3]) + n = h4(ctx->buf+1); + con(5); + E(n >= (1<<16), "Invalid \\u escape"); + E((n & 0xfc00) == 0xdc00, "Unexpected low surrogate"); + if((n & 0xfc00) == 0xd800) { /* high surrogate */ + C(rfill(7)); + E(ctx->buf[0] != '\\', "Expected low surrogate"); + E(ctx->buf[1] != 'u', "Expected low surrogate"); + s = h4(ctx->buf+2); + con(6); + E(s >= (1<<16), "Invalid \\u escape"); + E((s & 0xfc00) != 0xdc00, "Expected low surrogate"); + n = 0x10000 + (((n & 0x03ff) << 10) | (s & 0x03ff)); + } + if(n <= 0x007F) { ap(n); } else if(n <= 0x07FF) { @@ -215,9 +229,14 @@ static int rstring_esc(char **dest, int *destlen) { ap(0xE0 | (n>>12)); ap(0x80 | ((n>>6) & 0x3F)); ap(0x80 | (n & 0x3F)); - } else /* this happens if there was an invalid character (n >= (1<<16)) */ - E(1, "Invalid character in \\u escape"); - con(5); + } else { + ap(0xF0 | (n>>18));\ + ap(0x80 | ((n>>12) & 0x3F)); + ap(0x80 | ((n>>6) & 0x3F)); + ap(0x80 | (n & 0x3F)); + } +#undef hn +#undef h4 break; default: E(1, "Invalid escape sequence");