mirror of
https://code.blicky.net/yorhel/ncdu.git
synced 2026-01-15 02:08:40 -09:00
Fix panic when shortening strings with unicode variation selectors
Fixes #199. That's not to say it handles variation selectors or combining marks well, though. This is kind of messy. :(
This commit is contained in:
parent
2e4f0f0bce
commit
0fc14173f2
1 changed files with 8 additions and 5 deletions
13
src/ui.zig
13
src/ui.zig
|
|
@ -128,20 +128,21 @@ pub fn shorten(in: [:0]const u8, max_width: u32) [:0] const u8 {
|
||||||
var total_width: u32 = 0;
|
var total_width: u32 = 0;
|
||||||
var prefix_width: u32 = 0;
|
var prefix_width: u32 = 0;
|
||||||
var prefix_end: u32 = 0;
|
var prefix_end: u32 = 0;
|
||||||
|
var prefix_done = false;
|
||||||
var it = std.unicode.Utf8View.initUnchecked(in).iterator();
|
var it = std.unicode.Utf8View.initUnchecked(in).iterator();
|
||||||
while (it.nextCodepoint()) |cp| {
|
while (it.nextCodepoint()) |cp| {
|
||||||
// XXX: libc assumption: wchar_t is a Unicode point. True for most modern libcs?
|
// XXX: libc assumption: wchar_t is a Unicode point. True for most modern libcs?
|
||||||
// (The "proper" way is to use mbtowc(), but I'd rather port the musl wcwidth implementation to Zig so that I *know* it'll be Unicode.
|
// (The "proper" way is to use mbtowc(), but I'd rather port the musl wcwidth implementation to Zig so that I *know* it'll be Unicode.
|
||||||
// On the other hand, ncurses also use wcwidth() so that would cause duplicated code. Ugh)
|
// On the other hand, ncurses also use wcwidth() so that would cause duplicated code. Ugh)
|
||||||
const cp_width_ = c.wcwidth(cp);
|
const cp_width_ = c.wcwidth(cp);
|
||||||
const cp_width = @intCast(u32, if (cp_width_ < 0) 1 else cp_width_);
|
const cp_width = @intCast(u32, if (cp_width_ < 0) 0 else cp_width_);
|
||||||
const cp_len = std.unicode.utf8CodepointSequenceLength(cp) catch unreachable;
|
const cp_len = std.unicode.utf8CodepointSequenceLength(cp) catch unreachable;
|
||||||
total_width += cp_width;
|
total_width += cp_width;
|
||||||
if (prefix_width + cp_width <= @divFloor(max_width-1, 2)-1) {
|
if (!prefix_done and prefix_width + cp_width <= @divFloor(max_width-1, 2)-1) {
|
||||||
prefix_width += cp_width;
|
prefix_width += cp_width;
|
||||||
prefix_end += cp_len;
|
prefix_end += cp_len;
|
||||||
continue;
|
} else
|
||||||
}
|
prefix_done = true;
|
||||||
}
|
}
|
||||||
if (total_width <= max_width) return in;
|
if (total_width <= max_width) return in;
|
||||||
|
|
||||||
|
|
@ -154,7 +155,7 @@ pub fn shorten(in: [:0]const u8, max_width: u32) [:0] const u8 {
|
||||||
it = std.unicode.Utf8View.initUnchecked(in[prefix_end..]).iterator();
|
it = std.unicode.Utf8View.initUnchecked(in[prefix_end..]).iterator();
|
||||||
while (it.nextCodepoint()) |cp| {
|
while (it.nextCodepoint()) |cp| {
|
||||||
const cp_width_ = c.wcwidth(cp);
|
const cp_width_ = c.wcwidth(cp);
|
||||||
const cp_width = @intCast(u32, if (cp_width_ < 0) 1 else cp_width_);
|
const cp_width = @intCast(u32, if (cp_width_ < 0) 0 else cp_width_);
|
||||||
const cp_len = std.unicode.utf8CodepointSequenceLength(cp) catch unreachable;
|
const cp_len = std.unicode.utf8CodepointSequenceLength(cp) catch unreachable;
|
||||||
start_width += cp_width;
|
start_width += cp_width;
|
||||||
start_len += cp_len;
|
start_len += cp_len;
|
||||||
|
|
@ -185,6 +186,8 @@ test "shorten" {
|
||||||
try t("AaBCDEFGH", 8, "A...H"); // could optimize this, but w/e
|
try t("AaBCDEFGH", 8, "A...H"); // could optimize this, but w/e
|
||||||
try t("ABCDEFGaH", 8, "A...aH");
|
try t("ABCDEFGaH", 8, "A...aH");
|
||||||
try t("ABCDEFGH", 15, "ABC...FGH");
|
try t("ABCDEFGH", 15, "ABC...FGH");
|
||||||
|
try t("❤︎a❤︎a❤︎a", 5, "❤︎...︎a"); // Variation selectors; not great, there's an additional U+FE0E before 'a'.
|
||||||
|
try t("ą́ą́ą́ą́ą́ą́", 5, "ą́...̨́ą́"); // Combining marks, similarly bad.
|
||||||
}
|
}
|
||||||
|
|
||||||
// ncurses_refs.c
|
// ncurses_refs.c
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue