diff --git a/CMakeLists.txt b/CMakeLists.txt index d3d1522..ece8dbe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,6 +12,7 @@ option(IC_DEBUG_UBSAN "Build with undefined behaviour sanitizer" OFF) option(IC_DEBUG_ASAN "Build with address sanitizer" OFF) option(IC_DEBUG_MSG "Enable printing debug messages stderr (only if also ISOCLINE_DEBUG=1 is set in the environment)" ON) option(IC_SEPARATE_OBJS "Compile with separate object files instead of one (warning: exports internal symbols)" OFF) +option(IC_BUILD_TESTS "Build test binaries (example & test_colors)" ON) set(ic_version "0.1") set(ic_sources src/isocline.c) @@ -151,12 +152,14 @@ target_include_directories(isocline PUBLIC $ ) -add_executable(example test/example.c) -target_compile_options(example PRIVATE ${ic_cflags}) -target_include_directories(example PRIVATE include) -target_link_libraries(example PRIVATE isocline) - -add_executable(test_colors test/test_colors.c) -target_compile_options(test_colors PRIVATE ${ic_cflags}) -target_include_directories(test_colors PRIVATE include) -target_link_libraries(test_colors PRIVATE isocline) +if (IC_BUILD_TESTS) + add_executable(example test/example.c) + target_compile_options(example PRIVATE ${ic_cflags}) + target_include_directories(example PRIVATE include) + target_link_libraries(example PRIVATE isocline) + + add_executable(test_colors test/test_colors.c) + target_compile_options(test_colors PRIVATE ${ic_cflags}) + target_include_directories(test_colors PRIVATE include) + target_link_libraries(test_colors PRIVATE isocline) +endif() \ No newline at end of file diff --git a/src/common.c b/src/common.c index 1d9fb56..f8ccd91 100644 --- a/src/common.c +++ b/src/common.c @@ -232,10 +232,21 @@ ic_private unicode_t unicode_from_qutf8(const uint8_t* s, ssize_t len, ssize_t* return (((c0 & 0x1F) << 6) | (s[1] & 0x3F)); } // 3 bytes: reject overlong and surrogate halves + /* + Fix: unicode_from_qutf8 ingores 0xED and 0xEF cases; + + see https://github.com/koka-lang/koka/blob/master/kklib/src/string.c + line 252 (kk_utf8_read_validate) + + // 3 byte encoding; reject overlong and utf-16 surrogate halves (0xD800 - 0xDFFF) + if ((b == 0xE0 && s[1] >= 0xA0 && s[1] <= 0xBF && kk_utf8_is_cont(s[2])) + || ((b >= 0xE1 && b <= 0xEF && b != 0xED) && kk_utf8_is_cont(s[1]) && kk_utf8_is_cont(s[2])) + || (b == 0xED && s[1] >= 0x80 && s[1] <= 0x9F && kk_utf8_is_cont(s[2]))) + */ else if (len >= 3 && ((c0 == 0xE0 && s[1] >= 0xA0 && s[1] <= 0xBF && utf8_is_cont(s[2])) || - (c0 >= 0xE1 && c0 <= 0xEC && utf8_is_cont(s[1]) && utf8_is_cont(s[2])) - )) + (c0 >= 0xE1 && c0 <= 0xEF && c0 != 0xED && utf8_is_cont(s[1]) && utf8_is_cont(s[2])) || + (c0 == 0xED && s[1] > 0x80 && s[1] <= 0x9F && utf8_is_cont(s[2])))) { if (count != NULL) *count = 3; return (((c0 & 0x0F) << 12) | ((unicode_t)(s[1] & 0x3F) << 6) | (s[2] & 0x3F));