1+ // Copyright 2021-present StarRocks, Inc. All rights reserved.
2+ //
3+ // Licensed under the Apache License, Version 2.0 (the "License");
4+ // you may not use this file except in compliance with the License.
5+ // You may obtain a copy of the License at
6+ //
7+ // https://www.apache.org/licenses/LICENSE-2.0
8+ //
9+ // Unless required by applicable law or agreed to in writing, software
10+ // distributed under the License is distributed on an "AS IS" BASIS,
11+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+ // See the License for the specific language governing permissions and
13+ // limitations under the License.
14+
15+ #include " formats/csv/csv_reader.h"
16+
17+ #include < gtest/gtest.h>
18+
19+ #include " runtime/types.h"
20+
21+ namespace starrocks ::csv {
22+
23+ // Mock CSVReader for testing - implements the pure virtual function
24+ class MockCSVReader : public starrocks ::CSVReader {
25+ public:
26+ explicit MockCSVReader (const starrocks::CSVParseOptions& parse_options) : CSVReader(parse_options) {}
27+
28+ protected:
29+ starrocks::Status _fill_buffer () override {
30+ // Mock implementation - not needed for split_record tests
31+ return starrocks::Status::OK ();
32+ }
33+
34+ char * _find_line_delimiter (starrocks::CSVBuffer& buffer, size_t pos) override {
35+ // Mock implementation - not needed for split_record tests
36+ return nullptr ;
37+ }
38+ };
39+
40+ class CSVReaderTest : public ::testing::Test {
41+ public:
42+ CSVReaderTest () = default ;
43+
44+ protected:
45+ void SetUp () override {
46+ _parse_options.column_delimiter = " ," ;
47+ _parse_options.row_delimiter = " \n " ;
48+ _parse_options.trim_space = false ;
49+ }
50+
51+ starrocks::CSVParseOptions _parse_options;
52+ };
53+
54+ // NOLINTNEXTLINE
55+ TEST_F (CSVReaderTest, test_split_record_single_delimiter) {
56+ MockCSVReader reader (_parse_options);
57+
58+ // Test basic splitting
59+ starrocks::CSVReader::Record record1{" a,b,c" , 5 };
60+ starrocks::CSVReader::Fields fields1;
61+ reader.split_record (record1, &fields1);
62+
63+ EXPECT_EQ (3 , fields1.size ());
64+ EXPECT_EQ (" a" , fields1[0 ].to_string ());
65+ EXPECT_EQ (" b" , fields1[1 ].to_string ());
66+ EXPECT_EQ (" c" , fields1[2 ].to_string ());
67+ }
68+
69+ // NOLINTNEXTLINE
70+ TEST_F (CSVReaderTest, test_split_record_empty_fields) {
71+ MockCSVReader reader (_parse_options);
72+
73+ // Test empty fields
74+ starrocks::CSVReader::Record record1{" ,," , 2 };
75+ starrocks::CSVReader::Fields fields1;
76+ reader.split_record (record1, &fields1);
77+
78+ EXPECT_EQ (3 , fields1.size ());
79+ EXPECT_EQ (" " , fields1[0 ].to_string ());
80+ EXPECT_EQ (" " , fields1[1 ].to_string ());
81+ EXPECT_EQ (" " , fields1[2 ].to_string ());
82+ }
83+
84+ // NOLINTNEXTLINE
85+ TEST_F (CSVReaderTest, test_split_record_ends_with_delimiter) {
86+ MockCSVReader reader (_parse_options);
87+
88+ // Test string ending with delimiter
89+ starrocks::CSVReader::Record record1{" a,b," , 4 };
90+ starrocks::CSVReader::Fields fields1;
91+ reader.split_record (record1, &fields1);
92+
93+ EXPECT_EQ (3 , fields1.size ());
94+ EXPECT_EQ (" a" , fields1[0 ].to_string ());
95+ EXPECT_EQ (" b" , fields1[1 ].to_string ());
96+ EXPECT_EQ (" " , fields1[2 ].to_string ());
97+ }
98+
99+ // NOLINTNEXTLINE
100+ TEST_F (CSVReaderTest, test_split_record_starts_with_delimiter) {
101+ MockCSVReader reader (_parse_options);
102+
103+ // Test string starting with delimiter
104+ starrocks::CSVReader::Record record1{" ,a,b" , 4 };
105+ starrocks::CSVReader::Fields fields1;
106+ reader.split_record (record1, &fields1);
107+
108+ EXPECT_EQ (3 , fields1.size ());
109+ EXPECT_EQ (" " , fields1[0 ].to_string ());
110+ EXPECT_EQ (" a" , fields1[1 ].to_string ());
111+ EXPECT_EQ (" b" , fields1[2 ].to_string ());
112+ }
113+
114+ // NOLINTNEXTLINE
115+ TEST_F (CSVReaderTest, test_split_record_single_field) {
116+ MockCSVReader reader (_parse_options);
117+
118+ // Test single field (no delimiters)
119+ starrocks::CSVReader::Record record1{" single_field" , 12 };
120+ starrocks::CSVReader::Fields fields1;
121+ reader.split_record (record1, &fields1);
122+
123+ EXPECT_EQ (1 , fields1.size ());
124+ EXPECT_EQ (" single_field" , fields1[0 ].to_string ());
125+ }
126+
127+ // NOLINTNEXTLINE
128+ TEST_F (CSVReaderTest, test_split_record_empty_string) {
129+ MockCSVReader reader (_parse_options);
130+
131+ // Test empty string
132+ starrocks::CSVReader::Record record1{" " , 0 };
133+ starrocks::CSVReader::Fields fields1;
134+ reader.split_record (record1, &fields1);
135+
136+ EXPECT_EQ (1 , fields1.size ());
137+ EXPECT_EQ (" " , fields1[0 ].to_string ());
138+ }
139+
140+ // NOLINTNEXTLINE
141+ TEST_F (CSVReaderTest, test_split_record_multi_character_delimiter) {
142+ starrocks::CSVParseOptions options;
143+ options.column_delimiter = " ||" ;
144+ options.row_delimiter = " \n " ;
145+ options.trim_space = false ;
146+
147+ MockCSVReader reader (options);
148+
149+ // Test multi-character delimiter
150+ starrocks::CSVReader::Record record1{" a||b||c" , 7 };
151+ starrocks::CSVReader::Fields fields1;
152+ reader.split_record (record1, &fields1);
153+
154+ EXPECT_EQ (3 , fields1.size ());
155+ EXPECT_EQ (" a" , fields1[0 ].to_string ());
156+ EXPECT_EQ (" b" , fields1[1 ].to_string ());
157+ EXPECT_EQ (" c" , fields1[2 ].to_string ());
158+ }
159+
160+ // NOLINTNEXTLINE
161+ TEST_F (CSVReaderTest, test_split_record_with_trim_space) {
162+ starrocks::CSVParseOptions options;
163+ options.column_delimiter = " ," ;
164+ options.row_delimiter = " \n " ;
165+ options.trim_space = true ;
166+
167+ MockCSVReader reader (options);
168+
169+ // Test with trim_space enabled
170+ starrocks::CSVReader::Record record1{" a , b , c " , 11 };
171+ starrocks::CSVReader::Fields fields1;
172+ reader.split_record (record1, &fields1);
173+
174+ EXPECT_EQ (3 , fields1.size ());
175+ EXPECT_EQ (" a" , fields1[0 ].to_string ());
176+ EXPECT_EQ (" b" , fields1[1 ].to_string ());
177+ EXPECT_EQ (" c" , fields1[2 ].to_string ());
178+ }
179+
180+ // NOLINTNEXTLINE
181+ TEST_F (CSVReaderTest, test_split_record_large_data) {
182+ MockCSVReader reader (_parse_options);
183+
184+ // Test with larger data to verify performance optimization
185+ std::string large_data;
186+ for (int i = 0 ; i < 1000 ; ++i) {
187+ if (i > 0 ) large_data += " ," ;
188+ large_data += " field" + std::to_string (i);
189+ }
190+
191+ starrocks::CSVReader::Record record1{large_data.c_str (), large_data.size ()};
192+ starrocks::CSVReader::Fields fields1;
193+ reader.split_record (record1, &fields1);
194+
195+ EXPECT_EQ (1000 , fields1.size ());
196+ EXPECT_EQ (" field0" , fields1[0 ].to_string ());
197+ EXPECT_EQ (" field999" , fields1[999 ].to_string ());
198+ }
199+
200+ } // namespace starrocks::csv
0 commit comments