Skip to content

Commit 2e1d05f

Browse files
authored
This closes #2205, fix string counting and truncation for cell length limitation (#2206)
1 parent 845a274 commit 2e1d05f

File tree

7 files changed

+68
-25
lines changed

7 files changed

+68
-25
lines changed

calc.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"sync"
3030
"time"
3131
"unicode"
32+
"unicode/utf16"
3233
"unicode/utf8"
3334
"unsafe"
3435

@@ -14366,7 +14367,7 @@ func (fn *formulaFuncs) TEXTJOIN(argsList *list.List) formulaArg {
1436614367
return ok
1436714368
}
1436814369
result := strings.Join(args, delimiter.Value())
14369-
if len(result) > TotalCellChars {
14370+
if len(utf16.Encode([]rune(result))) > TotalCellChars {
1437014371
return newErrorFormulaArg(formulaErrorVALUE, fmt.Sprintf("TEXTJOIN function exceeds %d characters", TotalCellChars))
1437114372
}
1437214373
return newStringFormulaArg(result)

calc_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3969,6 +3969,7 @@ func TestCalcCellValue(t *testing.T) {
39693969
"TEXTJOIN(\"\",TRUE,NA())": {"#N/A", "#N/A"},
39703970
"TEXTJOIN(\"\",TRUE," + strings.Repeat("0,", 250) + ",0)": {"#VALUE!", "TEXTJOIN accepts at most 252 arguments"},
39713971
"TEXTJOIN(\",\",FALSE,REPT(\"*\",32768))": {"#VALUE!", "TEXTJOIN function exceeds 32767 characters"},
3972+
"TEXTJOIN(\"\",FALSE,REPT(\"\U0001F600\",16384))": {"#VALUE!", "TEXTJOIN function exceeds 32767 characters"},
39723973
// TRIM
39733974
"TRIM()": {"#VALUE!", "TRIM requires 1 argument"},
39743975
"TRIM(1,2)": {"#VALUE!", "TRIM requires 1 argument"},

cell.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ import (
2121
"strconv"
2222
"strings"
2323
"time"
24-
"unicode/utf8"
24+
"unicode/utf16"
2525

2626
"github.com/xuri/efp"
2727
)
@@ -447,8 +447,8 @@ func (f *File) SetCellStr(sheet, cell, value string) error {
447447

448448
// setCellString provides a function to set string type to shared string table.
449449
func (f *File) setCellString(value string) (t, v string, err error) {
450-
if utf8.RuneCountInString(value) > TotalCellChars {
451-
value = string([]rune(value)[:TotalCellChars])
450+
if len(utf16.Encode([]rune(value))) > TotalCellChars {
451+
value = truncateUTF16Units(value, TotalCellChars)
452452
}
453453
t = "s"
454454
var si int
@@ -510,8 +510,8 @@ func (f *File) setSharedString(val string) (int, error) {
510510

511511
// trimCellValue provides a function to set string type to cell.
512512
func trimCellValue(value string, escape bool) (v string, ns xml.Attr) {
513-
if utf8.RuneCountInString(value) > TotalCellChars {
514-
value = string([]rune(value)[:TotalCellChars])
513+
if len(utf16.Encode([]rune(value))) > TotalCellChars {
514+
value = truncateUTF16Units(value, TotalCellChars)
515515
}
516516
if value != "" {
517517
prefix, suffix := value[0], value[len(value)-1]
@@ -1211,7 +1211,7 @@ func setRichText(runs []RichTextRun) ([]xlsxR, error) {
12111211
totalCellChars int
12121212
)
12131213
for _, textRun := range runs {
1214-
totalCellChars += utf8.RuneCountInString(textRun.Text)
1214+
totalCellChars += len(utf16.Encode([]rune(textRun.Text)))
12151215
if totalCellChars > TotalCellChars {
12161216
return textRuns, ErrCellCharsLength
12171217
}

lib.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"regexp"
2626
"strconv"
2727
"strings"
28+
"unicode/utf16"
2829
)
2930

3031
// ReadZipReader extract spreadsheet with given options.
@@ -940,6 +941,18 @@ func setPtrFieldsVal(fields []string, immutable, mutable reflect.Value) {
940941
}
941942
}
942943

944+
// truncateUTF16Units truncates a string to a maximum number of UTF-16 code
945+
// units.
946+
func truncateUTF16Units(s string, length int) string {
947+
var cnt int
948+
for i, r := range s {
949+
if cnt += utf16.RuneLen(r); cnt > length {
950+
return s[:i]
951+
}
952+
}
953+
return s
954+
}
955+
943956
// Stack defined an abstract data type that serves as a collection of elements.
944957
type Stack struct {
945958
list *list.List

lib_test.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"strings"
1212
"sync"
1313
"testing"
14+
"unicode/utf16"
1415

1516
"github.com/stretchr/testify/assert"
1617
"github.com/stretchr/testify/require"
@@ -353,6 +354,28 @@ func TestBstrMarshal(t *testing.T) {
353354
}
354355
}
355356

357+
func TestTruncateUTF16Units(t *testing.T) {
358+
assertTrunc := func(s string, max int, expected string) {
359+
assert.Equal(t, expected, truncateUTF16Units(s, max), "src=%q max=%d", s, max)
360+
assert.LessOrEqual(t, len(utf16.Encode([]rune(truncateUTF16Units(s, max)))), max)
361+
}
362+
// No truncation
363+
assertTrunc("ABC", 3, "ABC")
364+
assertTrunc("A\U0001F600B", 4, "A\U0001F600B")
365+
// Truncate cutting before BMP rune
366+
assertTrunc("ABCDE", 3, "ABC")
367+
// Truncate with surrogate pair boundary: keep pair intact
368+
assertTrunc("A\U0001F600B", 3, "A\U0001F600") // 1 + 2 units
369+
assertTrunc("A\U0001F600B", 2, "A") // pair would overflow
370+
assertTrunc("\U0001F600B", 1, "") // first rune (2 units) exceeds limit
371+
assertTrunc("\U0001F600B", 2, "\U0001F600") // exact fit
372+
assertTrunc("\U0001F600B", 3, "\U0001F600B") // allow extra
373+
// Multiple surrogate pairs
374+
assertTrunc("\U0001F600\U0001F600B", 2, "\U0001F600") // corrected expectation per logic
375+
assertTrunc("\U0001F600\U0001F600B", 3, "\U0001F600") // 2 units kept, next pair would exceed
376+
assertTrunc("\U0001F600\U0001F600B", 4, "\U0001F600\U0001F600") // both pairs (4 units)
377+
}
378+
356379
func TestReadBytes(t *testing.T) {
357380
f := &File{tempFiles: sync.Map{}}
358381
sheet := "xl/worksheets/sheet1.xml"

sheet.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ import (
2525
"strconv"
2626
"strings"
2727
"unicode/utf16"
28-
"unicode/utf8"
2928

3029
"github.com/tiendc/go-deepcopy"
3130
)
@@ -1485,7 +1484,7 @@ func checkSheetName(name string) error {
14851484
if name == "" {
14861485
return ErrSheetNameBlank
14871486
}
1488-
if utf8.RuneCountInString(name) > MaxSheetNameLength {
1487+
if len(utf16.Encode([]rune(name))) > MaxSheetNameLength {
14891488
return ErrSheetNameLength
14901489
}
14911490
if strings.HasPrefix(name, "'") || strings.HasSuffix(name, "'") {

sheet_test.go

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -762,22 +762,28 @@ func TestSetSheetBackgroundFromBytes(t *testing.T) {
762762
}
763763

764764
func TestCheckSheetName(t *testing.T) {
765-
// Test valid sheet name
766-
assert.NoError(t, checkSheetName("Sheet1"))
767-
assert.NoError(t, checkSheetName("She'et1"))
768-
// Test invalid sheet name, empty name
769-
assert.EqualError(t, checkSheetName(""), ErrSheetNameBlank.Error())
770-
// Test invalid sheet name, include :\/?*[]
771-
assert.EqualError(t, checkSheetName("Sheet:"), ErrSheetNameInvalid.Error())
772-
assert.EqualError(t, checkSheetName(`Sheet\`), ErrSheetNameInvalid.Error())
773-
assert.EqualError(t, checkSheetName("Sheet/"), ErrSheetNameInvalid.Error())
774-
assert.EqualError(t, checkSheetName("Sheet?"), ErrSheetNameInvalid.Error())
775-
assert.EqualError(t, checkSheetName("Sheet*"), ErrSheetNameInvalid.Error())
776-
assert.EqualError(t, checkSheetName("Sheet["), ErrSheetNameInvalid.Error())
777-
assert.EqualError(t, checkSheetName("Sheet]"), ErrSheetNameInvalid.Error())
778-
// Test invalid sheet name, single quotes at the front or at the end
779-
assert.EqualError(t, checkSheetName("'Sheet"), ErrSheetNameSingleQuote.Error())
780-
assert.EqualError(t, checkSheetName("Sheet'"), ErrSheetNameSingleQuote.Error())
765+
for expected, name := range map[error]string{
766+
// Test valid sheet name
767+
nil: "Sheet1",
768+
nil: "She'et1",
769+
// Test invalid sheet name, empty name
770+
ErrSheetNameBlank: "",
771+
// Test invalid sheet name, include :\/?*[]
772+
ErrSheetNameInvalid: "Sheet:",
773+
ErrSheetNameInvalid: `Sheet\`,
774+
ErrSheetNameInvalid: "Sheet/",
775+
ErrSheetNameInvalid: "Sheet?",
776+
ErrSheetNameInvalid: "Sheet*",
777+
ErrSheetNameInvalid: "Sheet[",
778+
ErrSheetNameInvalid: "Sheet]",
779+
// Test invalid sheet name, single quotes at the front or at the end
780+
ErrSheetNameSingleQuote: "'Sheet",
781+
ErrSheetNameSingleQuote: "Sheet'",
782+
// Test invalid sheet name, exceed max length
783+
ErrSheetNameLength: "Sheet" + strings.Repeat("\U0001F600", 14),
784+
} {
785+
assert.Equal(t, expected, checkSheetName(name))
786+
}
781787
}
782788

783789
func TestSheetDimension(t *testing.T) {

0 commit comments

Comments
 (0)