From 83f251bc5c13b400a260f2d5125f8f4652c8c57b Mon Sep 17 00:00:00 2001 From: Ding Li Date: Thu, 18 Jan 2024 18:08:58 +0800 Subject: [PATCH 1/4] Update common.c fix: unicode_from_qutf8 ingores 0xED and 0xEF cases --- src/common.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/common.c b/src/common.c index 1d9fb56..0a2d27f 100644 --- a/src/common.c +++ b/src/common.c @@ -232,9 +232,21 @@ ic_private unicode_t unicode_from_qutf8(const uint8_t* s, ssize_t len, ssize_t* return (((c0 & 0x1F) << 6) | (s[1] & 0x3F)); } // 3 bytes: reject overlong and surrogate halves + /* + Fix: unicode_from_qutf8 ingores 0xED and 0xEF cases; + + see https://github.com/koka-lang/koka/blob/master/kklib/src/string.c + line 252 (kk_utf8_read_validate) + + // 3 byte encoding; reject overlong and utf-16 surrogate halves (0xD800 - 0xDFFF) + if ((b == 0xE0 && s[1] >= 0xA0 && s[1] <= 0xBF && kk_utf8_is_cont(s[2])) + || ((b >= 0xE1 && b <= 0xEF && b != 0xED) && kk_utf8_is_cont(s[1]) && kk_utf8_is_cont(s[2])) + || (b == 0xED && s[1] >= 0x80 && s[1] <= 0x9F && kk_utf8_is_cont(s[2]))) + */ else if (len >= 3 && ((c0 == 0xE0 && s[1] >= 0xA0 && s[1] <= 0xBF && utf8_is_cont(s[2])) || - (c0 >= 0xE1 && c0 <= 0xEC && utf8_is_cont(s[1]) && utf8_is_cont(s[2])) + (c0 >= 0xE1 && c0 <= 0xEF && c0 != 0xED && utf8_is_cont(s[1]) && utf8_is_cont(s[2]) || + (c0 == 0xED && s[1] > 0x80 && s[1] <= 0x9F && utf8_is_cont(s[2]))) )) { if (count != NULL) *count = 3; From 033b9b494b7e53780ca51981caa781eebd4e0479 Mon Sep 17 00:00:00 2001 From: Ding Li Date: Thu, 18 Jan 2024 18:32:17 +0800 Subject: [PATCH 2/4] Update CMakeLists.txt feature: add IC_BUILD_TESTS option --- CMakeLists.txt | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d3d1522..de0a759 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,6 +12,7 @@ option(IC_DEBUG_UBSAN "Build with undefined behaviour sanitizer" OFF) option(IC_DEBUG_ASAN "Build with address sanitizer" OFF) option(IC_DEBUG_MSG "Enable printing debug messages stderr (only if also ISOCLINE_DEBUG=1 is set in the environment)" ON) option(IC_SEPARATE_OBJS "Compile with separate object files instead of one (warning: exports internal symbols)" OFF) +option(IC_BUILD_TESTS "Build test binaries (example & test_colors)" ON) set(ic_version "0.1") set(ic_sources src/isocline.c) @@ -151,12 +152,14 @@ target_include_directories(isocline PUBLIC $ ) -add_executable(example test/example.c) -target_compile_options(example PRIVATE ${ic_cflags}) -target_include_directories(example PRIVATE include) -target_link_libraries(example PRIVATE isocline) - -add_executable(test_colors test/test_colors.c) -target_compile_options(test_colors PRIVATE ${ic_cflags}) -target_include_directories(test_colors PRIVATE include) -target_link_libraries(test_colors PRIVATE isocline) +if (IC_BUILD_TESTS) + add_executable(example test/example.c) + target_compile_options(example PRIVATE ${ic_cflags}) + target_include_directories(example PRIVATE include) + target_link_libraries(example PRIVATE isocline) + + add_executable(test_colors test/test_colors.c) + target_compile_options(test_colors PRIVATE ${ic_cflags}) + target_include_directories(test_colors PRIVATE include) + target_link_libraries(test_colors PRIVATE isocline) +end_if() \ No newline at end of file From 690c9b0f26d96d69ef8633a930c626978325793d Mon Sep 17 00:00:00 2001 From: Ding Li Date: Thu, 18 Jan 2024 18:49:09 +0800 Subject: [PATCH 3/4] Update CMakeLists.txt fix: typo endif --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index de0a759..ece8dbe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -162,4 +162,4 @@ if (IC_BUILD_TESTS) target_compile_options(test_colors PRIVATE ${ic_cflags}) target_include_directories(test_colors PRIVATE include) target_link_libraries(test_colors PRIVATE isocline) -end_if() \ No newline at end of file +endif() \ No newline at end of file From daee8f04c6b81adcdc9c3e7b933eeadc8d2dde97 Mon Sep 17 00:00:00 2001 From: Ding Li Date: Thu, 18 Jan 2024 19:00:15 +0800 Subject: [PATCH 4/4] Update common.c fix: parentheses --- src/common.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/common.c b/src/common.c index 0a2d27f..f8ccd91 100644 --- a/src/common.c +++ b/src/common.c @@ -245,9 +245,8 @@ ic_private unicode_t unicode_from_qutf8(const uint8_t* s, ssize_t len, ssize_t* */ else if (len >= 3 && ((c0 == 0xE0 && s[1] >= 0xA0 && s[1] <= 0xBF && utf8_is_cont(s[2])) || - (c0 >= 0xE1 && c0 <= 0xEF && c0 != 0xED && utf8_is_cont(s[1]) && utf8_is_cont(s[2]) || - (c0 == 0xED && s[1] > 0x80 && s[1] <= 0x9F && utf8_is_cont(s[2]))) - )) + (c0 >= 0xE1 && c0 <= 0xEF && c0 != 0xED && utf8_is_cont(s[1]) && utf8_is_cont(s[2])) || + (c0 == 0xED && s[1] > 0x80 && s[1] <= 0x9F && utf8_is_cont(s[2])))) { if (count != NULL) *count = 3; return (((c0 & 0x0F) << 12) | ((unicode_t)(s[1] & 0x3F) << 6) | (s[2] & 0x3F));