File tree Expand file tree Collapse file tree 1 file changed +6
-1
lines changed Expand file tree Collapse file tree 1 file changed +6
-1
lines changed Original file line number Diff line number Diff line change @@ -608,7 +608,12 @@ static std::vector<size_t> unicode_regex_split_custom_kimi_k2(const std::string
608
608
// Pattern 2 & 3: Letter words excluding Han characters with optional contractions
609
609
// [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+(?:'s|'t|'re|'ve|'m|'ll|'d)?
610
610
// [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*(?:'s|'t|'re|'ve|'m|'ll|'d)?
611
- if (flags.is_letter && !unicode_cpt_is_han (cpt)) {
611
+ // Check if current char is a letter OR if current char could be a leading char and next char is a letter
612
+ bool is_letter_pattern = (flags.is_letter && !unicode_cpt_is_han (cpt)) ||
613
+ (!(cpt == ' \r ' || cpt == ' \n ' || flags.is_letter || flags.is_number ) &&
614
+ _get_flags (pos + 1 ).is_letter && !unicode_cpt_is_han (_get_cpt (pos + 1 )));
615
+
616
+ if (is_letter_pattern) {
612
617
// Handle optional leading non-letter/non-number character
613
618
bool has_leading_char = false ;
614
619
if (!(cpt == ' \r ' || cpt == ' \n ' || flags.is_letter || flags.is_number )) {
You can’t perform that action at this time.
0 commit comments